harvest.py 5.3 KB
Newer Older
1 2 3
""" Harvest Controllers

"""
4
import traceback
5

6
from gluon.storage import Storage
7
from harvest_tools import get_harvester_tool, ToolException
8
from plugin_dbui import Selector
9

10
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
11

12 13 14
INLINE_ALERT = "<script>Ext.Msg.alert('%s', '%s');</script>"


15
def free_run():
16 17
    """Run a free harvester.
    All harvester parameters are defined via the selector.
18 19
    
    """
20 21 22 23 24 25 26 27 28 29 30 31 32 33
    fields = ('collections', 
              'controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories', 
              'ratio')

    try:
        selector = Selector(virtdb.free_harvester_selector,
                            exclude_fields=('mode', 'year_start', 'year_end'))

        for el in fields:
            if not selector[el]:
LE GAC Renaud's avatar
LE GAC Renaud committed
34 35 36
                msg = T('All fields of the form have to be defined !!!')
                msg += "<br>"
                msg += T('The field "%s" is missing.') % el
37
                return INLINE_ALERT % (T('Error'), msg)
38 39
        
        tool_class = get_harvester_tool(selector.controller)
40
        tool = tool_class(db, selector, debug=False)
41 42 43 44 45
        
        tool.harvester = Storage()
        for el in fields:
            tool.harvester[el] = selector[el]
         
46
        tool()
47 48 49 50 51 52 53 54 55 56 57 58
    
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return tool.report()
59 60


61 62
def insert_marcxml():
    """Insert a MarcXML record in the database.
tux091's avatar
tux091 committed
63
    
64
    """
tux091's avatar
tux091 committed
65
    try:
66
        selector = Selector(virtdb.marc12_selector,  exclude_fields=('mode'))
67

68 69
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
70
            return INLINE_ALERT % (T('Error'), T('Select a controller.'))
71
            
72
        tool = tool_class(db, selector, debug=False)
73
        tool()
74
    
75 76 77
    except ToolException, e:
        return T(str(e))
    
78
    except BaseException, e:
79 80 81 82
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
83 84
    
    response.view = 'harvest/layout.html'
85
    return tool.report()
86 87


88
def run():
89 90 91
    """Run an harvester.
    
    Scan the cds/invenio stores to find articles published during 
92 93
    a given range of years and for a given team/project. 
    Insert them in the database if they don't exist.
94
    
95 96
    The scanning is steered using the current request arguments as well as
    the harvest parameters associated to this action.
97

98
    Search arguments are defined via the harvester selector. 
99

Renaud Le Gac's avatar
Renaud Le Gac committed
100 101
    """
    try:
102
        selector = Selector(virtdb.harvester_selector,
103
                            exclude_fields=('mode', 'year_start', 'year_end'))
Renaud Le Gac's avatar
Renaud Le Gac committed
104

105 106
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
107
            return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
108
         
109
        tool = tool_class(db, selector, debug=False)
110
        tool()
111
    
112 113 114
    except ToolException, e:
        return T(str(e))
    
115
    except BaseException, e:
116 117 118 119
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
120 121

    response.view = 'harvest/layout.%s' % request.extension 
122
    return tool.report()
123 124


125 126 127 128
def run_all():
    """Run all harvesters in one go.
    
    """
129
    collection_logs = []
130 131 132 133 134
    collections = []
    logs = []
    
    try:
        selector = Selector(virtdb.run_all_harvesters_selector,
135
                            exclude_fields=('mode', 'year_start', 'year_end'))
136

137 138 139 140 141 142 143 144 145
        query = None
        for fieldname in ('id_teams', 'id_projects'):
            if selector[fieldname]:
                q = db.harvesters[fieldname] == selector[fieldname]
                if query:
                    query = (query) & (q)
                else:
                    query = q

146 147 148
        harvesters = db(query).select(db.harvesters.ALL)
        if not len(harvesters):
            return INLINE_ALERT % (T('Error'), MSG_NO_HARVESTER)
149
        
150 151 152 153
        for harvester in harvesters:
            selector.controller = harvester.controller
            selector.id_projects = harvester.id_projects
            selector.id_teams = harvester.id_teams
154
            
155
            collections.extend(harvester.collections.split(','))
156
            
157
            tool_class = get_harvester_tool(selector.controller)
158
            tool = tool_class(db, selector, debug=False)
159

160
            tool()
161
    
162
            collection_logs.extend(tool.collection_logs)
163 164 165 166 167 168 169 170 171 172 173
            logs.extend(tool.logs)
            
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
174 175 176 177 178 179 180 181
    # tune harvester / selector parameters used in the report title
    harvester = Storage(controller='all harvesters', 
                        collections=','.join(collections))
    
    if query == None:
        selector.id_projects = None
        
    # delegate rendering to the report view
182
    response.view = 'harvest/layout.%s'  % request.extension
183
    return dict(collection_logs=collection_logs,
184
                harvester=harvester,
185
                logs=logs,
186
                selector=selector)