harvest.py 4.91 KB
Newer Older
1 2 3
""" Harvest Controllers

"""
4
import traceback
5

6
from gluon.storage import Storage
7
from harvest_tools import get_harvester_tool, ToolException
8
from plugin_dbui import Selector
9

10
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
11

12
def free_run():
13 14
    """Run a free harvester.
    All harvester parameters are defined via the selector.
15 16
    
    """
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
    fields = ('collections', 
              'controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories', 
              'ratio')

    try:
        selector = Selector(virtdb.free_harvester_selector,
                            exclude_fields=('mode', 'year_start', 'year_end'))

        for el in fields:
            if not selector[el]:
                return T('All fields of the form have to be defined !!!')
        
        tool_class = get_harvester_tool(selector.controller)
        tool = tool_class(selector, debug=False)
        
        tool.harvester = Storage()
        for el in fields:
            tool.harvester[el] = selector[el]
         
        tool.process(db)
    
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return tool.report()
53 54


55 56
def insert_marcxml():
    """Insert a MarcXML record in the database.
tux091's avatar
tux091 committed
57
    
58
    """
tux091's avatar
tux091 committed
59
    try:
60
        selector = Selector(virtdb.marc12_selector,  exclude_fields=('mode'))
61

62 63
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
64
            return T('Select a controller.')
65
            
66
        tool = tool_class(selector, debug=False)
67
        tool.process(db)
68
    
69 70 71
    except ToolException, e:
        return T(str(e))
    
72
    except BaseException, e:
73 74 75 76
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
77 78
    
    response.view = 'harvest/layout.html'
79
    return tool.report()
80 81


82
def run():
83 84 85
    """Run an harvester.
    
    Scan the cds/invenio stores to find articles published during 
86 87
    a given range of years and for a given team/project. 
    Insert them in the database if they don't exist.
88
    
89 90
    The scanning is steered using the current request arguments as well as
    the harvest parameters associated to this action.
91

92
    Search arguments are defined via the harvester selector. 
93

Renaud Le Gac's avatar
Renaud Le Gac committed
94 95
    """
    try:
96
        selector = Selector(virtdb.harvester_selector,
97
                            exclude_fields=('mode', 'year_start', 'year_end'))
Renaud Le Gac's avatar
Renaud Le Gac committed
98

99 100
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
101 102
            return T('Select an harvester.')
         
103
        tool = tool_class(selector, debug=False)
104 105
        tool.process(db)
    
106 107 108
    except ToolException, e:
        return T(str(e))
    
109
    except BaseException, e:
110 111 112 113
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
114 115

    response.view = 'harvest/layout.%s' % request.extension 
116
    return tool.report()
117 118


119 120 121 122
def run_all():
    """Run all harvesters in one go.
    
    """
123 124 125 126 127 128 129 130
    collections = []
    logs = []
    nfound = 0
    ninsert = 0
    urls = []
    
    try:
        selector = Selector(virtdb.run_all_harvesters_selector,
131
                            exclude_fields=('mode', 'year_start', 'year_end'))
132

133 134 135 136 137 138 139 140 141 142 143 144
        query = None
        for fieldname in ('id_teams', 'id_projects'):
            if selector[fieldname]:
                q = db.harvesters[fieldname] == selector[fieldname]
                if query:
                    query = (query) & (q)
                else:
                    query = q

        rows = db(query).select(db.harvesters.ALL)
        if not len(rows):
            return MSG_NO_HARVESTER
145
        
146
        for row in rows:
147 148 149 150 151 152
            selector.controller = row.controller
            selector.id_projects = row.id_projects
            selector.id_teams = row.id_teams
            
            collections.extend(row.collections.split(','))
            
153 154 155
            tool_class = get_harvester_tool(selector.controller)
            tool = tool_class(selector, debug=False)

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
            tool.process(db)
    
            logs.extend(tool.logs)
            nfound += tool.nfound
            ninsert += tool.ninsert
            urls.extend(tool.search_urls)
            
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
172
    response.view = 'harvest/layout.%s'  % request.extension
173 174 175 176 177 178 179
    return dict(harvester=Storage(controller='all harvesters', 
                                  collections=','.join(collections)),
                logs=logs,
                nfound=nfound,
                ninsert=ninsert,
                selector=selector,
                urls=urls)