Commit b5f2457f authored by Betoule Marc's avatar Betoule Marc
Browse files

Merge branch 'master' of gitorious.org:pipelet/pipelet

parents 87b9d6eb c6013964
......@@ -5,12 +5,14 @@
\usepackage{multicol}
\usepackage{ulem}
\usepackage{color}
\usepackage{xspace}
\usepackage{listings}
\usepackage{wasysym}
\useoutertheme{infolines}
\usepackage{hangcaption}
\newcommand{\pipelet}{\textbf{\small{PIPELET}}\xspace}
\title[Pipelet]{The Pipelet software}
\title[Pipelet]{The \pipelet software}
\author[Betoule, Le Jeune]{Marc \textsc{Betoule}, Maude \textsc{Le Jeune}}
\institute[CNRS]{}
\date[2010/09/04]{september, 4th, 2010}
......@@ -20,13 +22,60 @@
\begin{document}
\begin{frame}{Pipelet}
\begin{frame}{\pipelet}
\tableofcontents
\end{frame}
\section{From the user point of view}
\section{Context}
\begin{frame}
\tableofcontents[currentsection]
\end{frame}
\begin{frame}{Context and needs}
Usually in scientific data processing:
\begin{itemize}
\item Big data sets
\item Complex processing (multiple interdependant steps)
\item Optimal parameters unknown
\end{itemize}
\begin{centering} $\rightarrow$ Computational \textbf{and development} cost a lot.\\
\end{centering}
\begin{figure}
\includegraphics[width=0.50\textwidth]{pipelet_scheme_small2.pdf}
\end{figure}
The \pipelet software answers the 3 above items:
\begin{itemize}
\item Computational cost limited to its lower limit
\item Guarranty traceability
\item Offer comparison facilities
\end{itemize}
\end{frame}
\begin{frame}{The \pipelet software}
The main idea behind \pipelet is to:
\begin{itemize}
\item Cut the whole processing into \textbf{segments} (script files)
\item Save intermediate products on disk
\item Use an unique indentifier wrt code, parameters and I/Os.
\end{itemize}
\begin{figure}
\includegraphics[width=0.50\textwidth]{pipelet_scheme_small3.pdf}
\end{figure}
\pipelet is written in Python:
\begin{itemize}
\item High level language offering lots of functionalities
\item Known as a glue language ideal for interfacing heterogenous codes
\item Ease debugging and interactivity
\end{itemize}
\end{frame}
\begin{frame}{The pipelet big scheme}
\section{How it works}
\begin{frame}
\tableofcontents[currentsection]
\end{frame}
\begin{frame}{The \pipelet big scheme}
\begin{figure}
\includegraphics[width=0.90\textwidth]{pipelet_scheme.pdf}
\end{figure}
......@@ -38,6 +87,9 @@
\begin{verbatim}P = Pipeline(pipedot, codedir='./', prefix='/data/...')
\end{verbatim}
\begin{figure}
\includegraphics[width=0.5\textwidth]{pipelet_scheme_small.pdf}
\end{figure}
\begin{itemize}
\item \verb pipedot is the string description of the pipeline
\begin{verbatim}pipedot = """
......@@ -48,9 +100,7 @@
\item \verb codedir is the path of the processing code files (.py)
\item \verb prefix is the path of the processed data repository
\end{itemize}
\begin{figure}
\includegraphics[width=0.50\textwidth]{pipelet_scheme_small.pdf}
\end{figure}
\end{frame}
......@@ -58,16 +108,18 @@
\subsection{Writing segment scripts}
\begin{frame}[fragile]{Writing segment scripts}
The default segment environment provides utilities to:
\begin{itemize}
\item A segment is a python script (\verb .py file)
\item It benefits from an improved namespace to:
\begin{itemize}
\item control the pipe parallelization scheme;
\begin{figure}
\includegraphics[width=0.90\textwidth]{seg_scheme.pdf}
\includegraphics[width=0.98\textwidth]{seg_scheme.pdf}
\end{figure}
\item save and load I/O's and provide filenames;
\item save and load parameters;
\item execute or include sub processings
\item execute or include subprocess
\end{itemize}
\end{itemize}
\end{frame}
......@@ -82,8 +134,8 @@ process) into a \textcolor{blue}{task list}.
One can empty the \textcolor{blue}{task list} in different modes:
\begin{itemize}
\item the interactive mode (or debugging mode)
\item the process/thread mode
\item the batch mode
\item the process/thread mode (for smp machine)
\item the batch mode (for cluster)
\end{itemize}
\end{frame}
......@@ -91,46 +143,47 @@ One can empty the \textcolor{blue}{task list} in different modes:
\subsection{Browsing a pipeline}
\begin{frame}[fragile]{Browsing a pipeline : \href{http://localhost:8080}{http://localhost:8080}}
The web interface includes:
\begin{enumerate}
\item a pipe tree view
\begin{itemize}
\item filters on tags
\item filters on date
\item delete pipelines
\end{itemize}
\item a segment view
\begin{itemize}
\item highlight the dependencies
\item read the code and parameters
\end{itemize}
\item a product view
\begin{itemize}
\item download the product files
\item display figures
\item delete products
\end{itemize}
\item a log view
\end{enumerate}
From the web interface one can: \\
\vspace{0.5cm}
\begin{tabular}{ll}
$\bullet$ Filter/delete pipe instances & from the pipeline page\\
$\bullet$ Highlight dependencies & from the segment page\\
$\bullet$ Read code & from the segment page\\
$\bullet$ Read log files & from the log page\\
$\bullet$ Download/visualize/delete product files & from the product page\\
\end{tabular}
\end{frame}
\section{Getting started}
\begin{frame}
\tableofcontents[currentsection]
\end{frame}
\section{From the developper point of view}
\begin{frame}[fragile]{Getting \pipelet}
Download from \url{http://gitorious.org/pipelet}
\begin{itemize}
\item Git repository\\
\begin{centering}\verb!git clone git@gitorious.org:pipelet/pipelet.git!
\end{centering}
\item Open wiki including documentation
\end{itemize}
Features and bugs are tracked from the IN2P3 forge.
\end{frame}
\subsection{The pipelet actors}
\section{Going further}
\begin{frame}
\tableofcontents[currentsection]
\end{frame}
\begin{frame}{The pipelet actors}
\begin{frame}{The \pipelet actors}
\begin{figure}
\includegraphics[width=1\textwidth]{pipelet_actors.pdf}
\end{figure}
\end{frame}
\subsection{The pipeline object}
\begin{frame}[fragile]{The pipeline object}
The pipelet scheme is resumed by its segment's relations:
The \pipelet scheme is resumed by its segment's relations:
\begin{itemize}
\item a tree view (dot scheme)
\item a flat view
......@@ -152,7 +205,7 @@ For each segment, a unique hash key is computed from:
\textsl{removing blank lines and comments.}
\end{frame}
\subsection{The task object}
\begin{frame}[fragile]{The task object}
A task is the association of a \textcolor{blue}{segment} with its \textcolor{blue}{input} product, its
execution \textcolor{blue}{status} and its \textcolor{blue}{output} product(s). \\
......@@ -178,7 +231,7 @@ The task attributs:
\end{columns}
\end{frame}
\subsection{The scheduler object}
\begin{frame}[fragile]{The scheduler object}
......@@ -212,7 +265,7 @@ The tasks inputs are build from:
\subsection{The worker object}
\begin{frame}[fragile]{The worker object}
\begin{figure}
......@@ -229,7 +282,7 @@ The \verb worker.execute_task(task) function:
\end{frame}
\subsection{The tracker object}
\begin{frame}[fragile]{The tracker object}
\begin{figure}
......@@ -250,5 +303,23 @@ The \verb tracker.update_status(task) function:
\end{frame}
\begin{frame}{Adapt to DPC environment}
\begin{itemize}
\item put intermediate products into DMC
\begin{itemize}
\item need to provide filenames
\item including group name
\item and product identifier
\end{itemize}
\item link to the DPC modules and pipelines facilities ?
\begin{itemize}
\item build a pipelet module + parameter file from an existing pipeline ?
\item build as many modules + parameter files as segments ?
\item convert a pipeline into a DMC pipeline object ?
\end{itemize}
\item any more ambitious ideas ?
\end{itemize}
\end{frame}
\end{document}
\ No newline at end of file
......@@ -144,6 +144,7 @@ class Environment(EnvironmentBase):
self.logger = init_logger (self._get_data_fn(""), self._get_log_file(), level=[])
def get_data_fn(self, x):
""" Complete the filename with the path to the working
directory.
......
pipelet/static/home.png

1.03 KB | W: | H:

pipelet/static/home.png

721 Bytes | W: | H:

pipelet/static/home.png
pipelet/static/home.png
pipelet/static/home.png
pipelet/static/home.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -48,12 +48,14 @@ li.button{
.icon.apply{
background-image: url(/static/apply.png)
}
icon.home{
background-image: url(/static/home.gif)
}
.icon.clear{
background-image: url(/static/clear.png)
background-image: url(/static/clear.png)
}
.icon.home{
background-image: url(/static/home.png)
}
.icon.tag{
background-image: url(/static/tag.gif)
......@@ -65,6 +67,10 @@ icon.home{
background-image: url(/static/log.png)
}
.icon.image{
background-image: url(/static/icon_jpg.gif)
}
.icon img{
width:10px;
height:10px;
......
......@@ -167,6 +167,32 @@ class Web:
raise cherrypy.HTTPRedirect('/'+self.name+'/',303)
def _get_thumbnail(self, segid):
""" Return the list of thumbnail for a given segment.
Parameters
----------
seg_id: segment identifier.
Returns
-------
string list
"""
conn = sqlite3.connect(self.db_file,check_same_thread=True)
with conn:
seg, currdir = conn.execute(
'select seg, curr_dir from segments where seg_id = ?'
,(segid,)).fetchone()
l = conn.execute('select str_input, task_id from tasks where seg_id=?',(segid,)).fetchall()
conn.close()
lstfile = []
for e in l:
directory = os.path.relpath(e[0],start=currdir)
directory = self.check_path (segid, directory)
lstfile = lstfile+(glob(os.path.join(directory,'*.png')))
return lstfile
@cherrypy.expose
@read_access
def filter (self, tag=None, date=None):
......@@ -202,7 +228,7 @@ class Web:
@cherrypy.expose
@read_access
def index(self, highlight=None):
def index(self, highlight=None, thumbnail=None):
""" Pipeline instances tree view
Print the pipeline instances trough a tree view.
......@@ -219,7 +245,7 @@ class Web:
l = conn.execute('select seg, curr_dir, seg_id, param from segments order by curr_dir').fetchall()
html = html_tmp
html += '<a class="icon home" href="/%s/"><span class="text"><small>Home</small></span></a>'%(self.name)
html += '<a class="icon home" href="/%s/"><small>Home</small></a>'%(self.name)
html += '<h1>Pipelines in %s </h1>'%self.name
## Filter fieldset
......@@ -266,6 +292,24 @@ class Web:
html += '<a class="icon delete" href="javascript:del_seg();"><small>Delete</small></a>'
html += '<a class="icon log" href="log?logdir=%s"><small>Browse log</small></a>'%(l[0][1].split("seg")[0]+"log")
html +='</p></fieldset>'
## Thumbnails
if thumbnail is not None:
html += '<fieldset id="filters"><legend><span class="text">Thumbnails</span></legend>'
html += '<div style=""width:100%;overflow:scroll;">'
html += '<table width="100%"><tr>'
lstimage = self._get_thumbnail (thumbnail)
for img in lstimage:
html += '<td><a href="download?segid=%s&filepath=%s"><img src="download?segid=%s&filepath=%s" width=100 border=no></a></td>'%(thumbnail, os.path.abspath(img),thumbnail, os.path.abspath(img))
html += '<tr></tr></table></div>'
html +='</fieldset>'
html += '<br>'
html +='<div class="list"><ul class="mktree" id="segtree">'
......@@ -285,9 +329,10 @@ class Web:
ss = s[3]
if ss is None:
ss = ""
print s
ss = '<a class="icon image" href="index?thumbnail=%d"></a>'%(s[2])+ss
for stat in e:
ss = '<a href="product?segid=%s&status=%s" class=%s>%d</a>, '%(s[2],stat[0], stat[0], stat[1]) + ss
ss = '<a href="product?segid=%s&status=%s" class=%s>%d</a>, '%(s[2],stat[0], stat[0], stat[1]) + ss
ss += '<INPUT type="checkbox" name="checkbox" id="%d"'%(s[2])
diff = s[1].count('/') - indent
if diff == 1:
......@@ -326,7 +371,7 @@ class Web:
,(segid,)).fetchone()
l = conn.execute('select str_input, task_id from tasks where seg_id=? and status=?',(segid, status)).fetchall()
conn.close()
html = html_tmp + '<a class="icon home" href="/%s/"><span class="text"><small>Home</small></span></a>'%(self.name)
html = html_tmp + '<a class="icon home" href="/%s/"><small>Home</small></a>'%(self.name)
html += '<h1> Data products for %s tasks in segment %s </h1>' % (status, seg)
html += '<div class="list"><p>Directory : %s</p> %d <span class="%s">%s</span> tasks<br><br> '%( currdir,len(l), status, status)
html += '<a class="icon delete" href="javascript:del_prod(%d)"><small>Delete</small></a><ul>'%(int(segid))
......@@ -569,17 +614,35 @@ class Web:
directory: string pipeline directory path.
"""
directory = self.check_path (segid, directory)
html = html_tmp + '<a class="icon home" href="/%s/"><span class="text"><small>Home</small></span></a>'%(self.name)
html = html_tmp + '<a class="icon home" href="/%s/"><small>Home</small></a>'%(self.name)
html += '<h1> Content of %s </h1> <div class="list"><ul>'%directory
for filename in sorted(glob(os.path.join(directory,'*'))):
absPath = os.path.abspath(filename)
if os.path.islink(absPath):
html += '<li><a href="pipedir?segid=%d&directory='%int(segid) + absPath + '">' + os.path.basename(filename)+"("+os.path.realpath(filename)+")" + "</a></li>"
elif os.path.isdir(absPath):
html += '<li><a href="pipedir?segid=%d&directory='%int(segid) + absPath + '">' + os.path.basename(filename) + "</a></li>"
lstfile = (glob(os.path.join(directory,'*')))
imglist = []
datlist = []
loglist = []
for filename in lstfile:
if (filename.split(".")[1] in ["png", "jpg", "eps", "pdf"]):
imglist.append(filename)
elif (filename.split(".")[1] in ["meta", "log", "err"]):
loglist.append(filename)
else:
html += '<li><a href="download?segid=%d&filepath='%int(segid) + absPath + '">' + os.path.basename(filename) + "</a> </li>"
datlist.append(filename)
biglist = [sorted(loglist), sorted(datlist), sorted(imglist)]
name = ['LOGS', 'DATA', 'FIGURES']
for L in biglist:
if L:
html += '<span class="text"><small>%s</small></span>'%name[biglist.index(L)]
for filename in L:
absPath = os.path.abspath(filename)
if os.path.islink(absPath):
html += '<li><a href="pipedir?segid=%d&directory='%int(segid) + absPath + '">' + os.path.basename(filename)+"("+os.path.realpath(filename)+")" + "</a></li>"
elif os.path.isdir(absPath):
html += '<li><a href="pipedir?segid=%d&directory='%int(segid) + absPath + '">' + os.path.basename(filename) + "</a></li>"
else:
html += '<li><a href="download?segid=%d&filepath='%int(segid) + absPath + '">' + os.path.basename(filename) + "</a> </li>"
html += """</ul></div></body></html>"""
return html
......@@ -592,7 +655,7 @@ class Web:
""" Print the content of the log directory.
"""
directory = logdir
html = html_tmp + '<a class="icon home" href="/%s/"><span class="text"><small>Home</small></span></a>'%(self.name)
html = html_tmp + '<a class="icon home" href="/%s/"><small>Home</small></a>'%(self.name)
html += '<h1> Content of %s </h1> <div class="list"><a class="icon delete" href="delete_log?logdir=%s"><small>Delete logs</small></a><ul>'%(directory,logdir)
for filename in sorted(glob(os.path.join(directory,'*')), reverse=True):
absPath = os.path.abspath(filename)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment