Commit 883aad10 authored by Maude Le Jeune's avatar Maude Le Jeune
Browse files

first attempt at git repository

parent 7c43b3f0
......@@ -169,12 +169,12 @@ Practically, the creation of a Pipeline object requires 3 arguments:
#+begin_src python
from pipelet.pipeline import Pipeline
P = Pipeline(pipedot, codedir="./", prefix="./")
P = Pipeline(pipedot, code_dir="./", prefix="./")
#+end_src
- =pipedot= is the string description of the pipeline
- =codedir= is the path where the segment scripts can be found
- =code_dir= is the path where the segment scripts can be found
- =prefix= is the path to the data repository (see below [[*Hierarchical%20data%20storage][Hierarchical data storage]])
It is possible to output the graphviz representation of the pipeline
......@@ -987,6 +987,46 @@ confusion.
The hook scripts are included into the hash key computation.
** Segment script repository
*** Local repository
By default, segment scripts are read from a local directory, specified
at the pipeline initialization with the parameter named =code_dir=:
#+begin_src python
from pipelet.pipeline import Pipeline
P = Pipeline(pipedot, code_dir="./", prefix="./")
#+end_src
The segment script contents are immediatly stored, to prevent from
any modification between the pipeline start time and the actual execution
of each segment.
It is generally a good idea to make this directory controlled by an
RCS, to ease the reproducibility of the pipeline (even if the pipelet
engine makes a copy of the segment script in the segment output
directory). This can be done manually or using one the following
repositories.
*** Git repository
A Git repository is defined by :
+ its URL and the location (starting from Git repository) to the segment scripts
+ a revision string (heads or tags, default is 'HEAD')
See http://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html
for more informations.
#+begin_src python
from pipelet.pipeline import Pipeline
P = Pipeline(pipedot, code_dir = "./repo", git_url=("git://host.xz[:port]/path/to/repo.git/", "path/to/segments"), git_rev='HEAD', prefix="./")
#+end_src
At the pipeline initialization, a local copy of the Git repository
will be downloaded or updated to =code_dir= using the =git clone= and
=git checkout= command.
** Writing custom environments
The Pipelet software provides a set of default utilities available
......
......@@ -93,7 +93,7 @@ class Pipeline:
in order to allow the execution in non-interactive environment.
"""
def __init__(self, seg_list, code_dir=None, prefix='./', sqlfile=None, matplotlib=False, matplotlib_interactive=False, env=Environment):
def __init__(self, seg_list, code_dir=None, prefix='./', sqlfile=None, matplotlib=False, matplotlib_interactive=False, env=Environment, git_url=(None,None),git_rev='HEAD'):
""" Initialize a pipeline.
Parameters
......@@ -108,6 +108,7 @@ class Pipeline:
in order to allow the execution in non-interactive environment.
matplotlib_interactive: same thing for interactive workers
env: extension of the EnvironmentBase class.
"""
if isinstance(seg_list, str):
self.from_dot(seg_list)
......@@ -116,8 +117,10 @@ class Pipeline:
## string, the location of the segment source code
self.repository = None
if code_dir:
print self._seg_list
self.repository = LocalRepository(self._seg_list, code_dir)
elif git_url:
self.repository = GitRepository(self._seg_list, git_url=git_url, code_dir=code_dir,git_rev=git_rev)
## string, indicates where to save the pipeline products.
self._prefix = path.realpath(prefix)
if not os.path.exists(prefix):
......
......@@ -14,7 +14,7 @@
## along with this program; if not, see http://www.gnu.org/licenses/gpl.html
from glob import glob
from os import path
from os import path, system
import pipelet
import sys
import re
......@@ -267,8 +267,8 @@ class LocalRepository(Repository):
Parameters
----------
src_path : where to find segment's source code files
lib_path : where to find segment's library
lst_seg : list of segment name
src_path : where to find segment's script
"""
## dict, code string corresponding to each segment
......@@ -312,13 +312,71 @@ class GitRepository(Repository):
A git repository bla bla bla.
"""
def __init__(self, giturl, revision):
##git_revision = subprocess.check_output("git", "rev-parse", "HEAD", cwd=mod_path)
pass
def __init__(self, lst_seg, git_url=(None,None), code_dir=None, git_rev='HEAD' ):
""" Initialize a git repository.
Parameters
----------
lst_seg : list of segment name
giturl : the git url
revision : the revision number
"""
## dict, code string corresponding to each segment
self._code = {}
self._hook = {}
self._deps = {}
self._all_string = {}
(self.git_url, sub_dir) = git_url
if not re.match(".*.git/", self.git_url):
raise RepositoryError('Not a valid git url %s'%(self.git_url))
self.git_rev = git_rev
if code_dir is None:
code_dir = "./"+self.git_url.split(".git")[-2].split('/')[-1]
if not path.isdir (path.join(code_dir, ".git")):
res = system("git clone %s %s"%(self.git_url, code_dir))
if res!=0:
raise RepositoryError('Can t clone git repository %s'%(self.git_url))
res = system("cd %s ; git checkout %s"%(code_dir, git_rev))
if res!=0:
raise RepositoryError('Can t checkout git revision %s'%(git_rev))
self.src_path = path.expanduser(path.expandvars(path.join(code_dir, sub_dir)))
self._fill_dict (lst_seg)
def get_code_source(self, filename):
pass
""" Read source code from file
Parameters
----------
filename: string
Returns
-------
string, content of file
"""
## retrieve filemane
fid = open(filename, "r")
code = fid.read()
fid.close()
return code
def get_fns(self):
pass
""" Return filename candidates.
Returns
-------
list of string basename.
"""
return glob(path.join(self.src_path, '*.py'))
class CVSRepository(Repository):
""" A CVS repository.
......@@ -328,9 +386,9 @@ class CVSRepository(Repository):
def __init__(self):
pass
def get_code_file(self, seg):
def get_code_source(self, filename):
pass
def get_hook_file(self, seg):
def get_fns(self):
pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment