harvest.py 4.39 KB
Newer Older
1 2 3
""" Harvest Controllers

"""
4
import traceback
5

6
from gluon.storage import Storage
7
from harvest_tools import get_harvester_tool, ToolException
8
from plugin_dbui import Selector
9 10


11
def free_run():
12 13
    """Run a free harvester.
    All harvester parameters are defined via the selector.
14 15
    
    """
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    fields = ('collections', 
              'controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories', 
              'ratio')

    try:
        selector = Selector(virtdb.free_harvester_selector,
                            exclude_fields=('mode', 'year_start', 'year_end'))

        for el in fields:
            if not selector[el]:
                return T('All fields of the form have to be defined !!!')
        
        tool_class = get_harvester_tool(selector.controller)
        tool = tool_class(selector, debug=False)
        
        tool.harvester = Storage()
        for el in fields:
            tool.harvester[el] = selector[el]
         
        tool.process(db)
    
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return tool.report()
52 53


54 55
def insert_marcxml():
    """Insert a MarcXML record in the database.
tux091's avatar
tux091 committed
56
    
57
    """
tux091's avatar
tux091 committed
58
    try:
59
        selector = Selector(virtdb.marc12_selector)
60

61 62
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
63
            return T('Select a controller.')
64
            
65
        tool = tool_class(selector, debug=False)
66
        tool.process(db)
67
    
68 69 70
    except ToolException, e:
        return T(str(e))
    
71
    except BaseException, e:
72 73 74 75
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
76 77
    
    response.view = 'harvest/layout.html'
78
    return tool.report()
79 80


81
def run():
82 83 84
    """Run an harvester.
    
    Scan the cds/invenio stores to find articles published during 
85 86
    a given range of years and for a given team/project. 
    Insert them in the database if they don't exist.
87
    
88 89
    The scanning is steered using the current request arguments as well as
    the harvest parameters associated to this action.
90

91
    Search arguments are defined via the harvester selector. 
92

Renaud Le Gac's avatar
Renaud Le Gac committed
93 94
    """
    try:
95 96
        selector = Selector(virtdb.harvester_selector,
                            exclude_fields=('year_start', 'year_end'))
Renaud Le Gac's avatar
Renaud Le Gac committed
97

98 99
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
100 101
            return T('Select an harvester.')
         
102
        tool = tool_class(selector, debug=False)
103 104
        tool.process(db)
    
105 106 107
    except ToolException, e:
        return T(str(e))
    
108
    except BaseException, e:
109 110 111 112
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
113
    
114
    response.view = 'harvest/layout.html'
115
    return tool.report()
116 117


118 119 120 121
def run_all():
    """Run all harvesters in one go.
    
    """
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
    collections = []
    logs = []
    nfound = 0
    ninsert = 0
    urls = []
    
    try:
        selector = Selector(virtdb.run_all_harvesters_selector,
                            exclude_fields=('year_start', 'year_end'))
        
        for row in db().select(db.harvesters.ALL):

            selector.controller = row.controller
            selector.id_projects = row.id_projects
            selector.id_teams = row.id_teams
            
            collections.extend(row.collections.split(','))
            
140 141 142
            tool_class = get_harvester_tool(selector.controller)
            tool = tool_class(selector, debug=False)

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
            tool.process(db)
    
            logs.extend(tool.logs)
            nfound += tool.nfound
            ninsert += tool.ninsert
            urls.extend(tool.search_urls)
            
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return dict(harvester=Storage(controller='all harvesters', 
                                  collections=','.join(collections)),
                logs=logs,
                nfound=nfound,
                ninsert=ninsert,
                selector=selector,
                urls=urls)