harvest.py 10.6 KB
Newer Older
1 2 3
""" Harvest Controllers

"""
4
import traceback
5

6
from gluon.storage import Storage
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
from harvest_tools import (format_author_fr, 
                           family_name_fr, 
                           get_harvester_tool,
                           PublicationsTool, 
                           ToolException)
from invenio_tools import (CdsException,
                           CheckAndFix,
                           CheckException,
                           Marc12Exception,
                           InvenioStore, 
                           Marc12,
                           OAI_URL)
from plugin_dbui import (get_id, 
                         Selector, 
                         to_formPanel,
                         UNDEF_ID)
23

24
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
25

26 27 28
INLINE_ALERT = "<script>Ext.Msg.alert('%s', '%s');</script>"


29
def free_run():
30 31
    """Run a free harvester.
    All harvester parameters are defined via the selector.
32 33
    
    """
34
    table = virtdb.free_harvester_selector
35 36 37 38 39 40 41 42 43
    fields = ('collections', 
              'controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories', 
              'ratio')

    try:
44
        selector = Selector(table,
45 46 47 48
                            exclude_fields=('mode', 'year_start', 'year_end'))

        for el in fields:
            if not selector[el]:
LE GAC Renaud's avatar
LE GAC Renaud committed
49 50
                msg = T('All fields of the form have to be defined !!!')
                msg += "<br>"
51
                msg += T('The field "%s" is missing ...') % T(table[el].label)
52
                return INLINE_ALERT % (T('Error'), msg)
53 54
        
        tool_class = get_harvester_tool(selector.controller)
55
        tool = tool_class(db, selector, debug=False)
56 57 58 59 60
        
        tool.harvester = Storage()
        for el in fields:
            tool.harvester[el] = selector[el]
         
61
        tool()
62 63 64 65 66 67 68 69 70 71 72 73
    
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return tool.report()
74 75


76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
def edit_insert():
    """Edit an invenio record and insert it in the database.
    
    @note: Recovery procedures are applied to fix basic non-conformity, but
    no checks are run. The user is editing the record to fix problems.
    
    """
    fields = ('controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories',
              'record_id')

    table = virtdb.edit_insert_selector
    
    try:
        selector = Selector(table)

        for el in fields:
            if not selector[el]:
                msg = T('All fields of the form have to be defined !!!')
                msg += "<br>"
                msg += T('The field "%s" is missing ...') % T(table[el].label)
                return INLINE_ALERT % (T('Error'), msg)
    
        # record
        store = InvenioStore(selector.host)
        xml = store.get_record(selector.record_id)
        decode = Marc12()
        record = decode(xml)[0]
        
        # form configuration
        cfg = to_formPanel(db.publications)
        
        # tools to extract values to be loaded in the form
        values = {}
        check = CheckAndFix()
        tool = PublicationsTool(db, selector)
         
        # title, preprint, URL, report number
        values['PublicationsTitle'] = record.title()
        values['PublicationsPreprint'] = record.preprint_number()
        values['PublicationsPublication_url'] = record.paper_url()
        values['PublicationsReport_numbers'] = record.report_number()
        
        # authors
123
        try:
124 125 126 127 128 129 130
            check.authors(record)
            check.format_authors(record, format_author_fr)
            
            check.my_authors(record, 
                             reference=tool._my_author_list(record), 
                             cmpFct=family_name_fr)
        
131 132
        except CheckException:
            pass
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
        
        values['PublicationsFirst_author'] = record.first_author()
        values['PublicationsAuthors'] = record.authors()
        values['PublicationsAuthors_cppm'] = record.my_authors

        # collaboration
        id = get_id(db.collaborations, collaboration=record.collaboration())
        values['PublicationsId_collaborations'] = (int(id) if id else UNDEF_ID)
                
        # teams, project, categories, origin
        values['PublicationsId_categories'] = int(selector.id_categories)
        values['PublicationsId_projects'] = int(selector.id_projects)
        values['PublicationsId_teams'] = int(selector.id_teams)
        values['PublicationsOrigin'] = OAI_URL %(selector.host, selector.record_id) 
        
        # publishers
        if selector.controller in ('articles', 'proceedings'):

            check.format_editor(record)
            id = get_id(db.publishers, abbreviation=record.paper_editor())
            values['PublicationsId_publishers'] = (int(id) if id else UNDEF_ID)
            values['PublicationsVolume'] = record.paper_volume()
            values['PublicationsPages'] = record.paper_pages()
            
        # conference
        if selector.controller in ('proceedings', 'talks'):
            
160 161 162 163 164
            try:
                check.conference(record)
            except CheckException:
                pass
            
165 166 167 168 169 170
            values['PublicationsConference_title'] = record.conference_title()
            values['PublicationsConference_url'] = record.conference_url()
            values['PublicationsConference_dates'] = record.conference_dates()
            values['PublicationsConference_town'] = record.conference_town()
            
            id = get_id(db.countries, country=record.conference_country())
171
            values['PublicationsId_countries'] = (id if id != None else UNDEF_ID)
172 173 174 175 176 177 178
        
            values['PublicationsConference_speaker'] = record.first_author()
            
        # thesis
        if selector.controller == 'theses':
            
            values['PublicationsUniversities'] = record.these_universities()
LE GAC Renaud's avatar
LE GAC Renaud committed
179 180
            values['PublicationsDirectors'] = record.these_directors()
            values['PublicationsDefense'] = record.these_defense()
181 182 183 184 185 186 187 188 189 190

        # submitted date and year
        try:
            check.submitted(record)
            check.year(record)
        except CheckException:
            pass
        
        values['PublicationsSubmitted'] = ', '.join(record.submitted())
        values['PublicationsYear'] = record.year()
191 192
        
    except (CdsException, Marc12Exception, ToolException), e:
193
        return INLINE_ALERT % (T('Error'), T(str(e)))
194 195
    
    except BaseException, e:
196 197 198
        # for debug when web2py is in debug mode
        print traceback.format_exc()
        return INLINE_ALERT % (T('Error'), T(str(e)))
199 200 201 202

    return dict(cfg=cfg, values=values)


203 204
def insert_marcxml():
    """Insert a MarcXML record in the database.
tux091's avatar
tux091 committed
205
    
206
    """
tux091's avatar
tux091 committed
207
    try:
208
        selector = Selector(virtdb.marc12_selector,  exclude_fields=('mode'))
209

210 211
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
212
            return INLINE_ALERT % (T('Error'), T('Select a controller.'))
213
            
214
        tool = tool_class(db, selector, debug=False)
215
        tool()
216
    
217 218 219
    except ToolException, e:
        return T(str(e))
    
220
    except BaseException, e:
221 222 223 224
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
225 226
    
    response.view = 'harvest/layout.html'
227
    return tool.report()
228 229


230
def run():
231 232 233
    """Run an harvester.
    
    Scan the cds/invenio stores to find articles published during 
234 235
    a given range of years and for a given team/project. 
    Insert them in the database if they don't exist.
236
    
237 238
    The scanning is steered using the current request arguments as well as
    the harvest parameters associated to this action.
239

240
    Search arguments are defined via the harvester selector. 
241

Renaud Le Gac's avatar
Renaud Le Gac committed
242 243
    """
    try:
244
        selector = Selector(virtdb.harvester_selector,
245
                            exclude_fields=('mode', 'year_start', 'year_end'))
Renaud Le Gac's avatar
Renaud Le Gac committed
246

247 248
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
249
            return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
250
         
251
        tool = tool_class(db, selector, debug=False)
252
        tool()
253
    
254 255 256
    except ToolException, e:
        return T(str(e))
    
257
    except BaseException, e:
258 259 260 261
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
262 263

    response.view = 'harvest/layout.%s' % request.extension 
264
    return tool.report()
265 266


267 268 269 270
def run_all():
    """Run all harvesters in one go.
    
    """
271
    collection_logs = []
272 273 274 275 276
    collections = []
    logs = []
    
    try:
        selector = Selector(virtdb.run_all_harvesters_selector,
277
                            exclude_fields=('mode', 'year_start', 'year_end'))
278

279 280 281 282 283 284 285 286 287
        query = None
        for fieldname in ('id_teams', 'id_projects'):
            if selector[fieldname]:
                q = db.harvesters[fieldname] == selector[fieldname]
                if query:
                    query = (query) & (q)
                else:
                    query = q

288 289 290
        harvesters = db(query).select(db.harvesters.ALL)
        if not len(harvesters):
            return INLINE_ALERT % (T('Error'), MSG_NO_HARVESTER)
291
        
292 293 294 295
        for harvester in harvesters:
            selector.controller = harvester.controller
            selector.id_projects = harvester.id_projects
            selector.id_teams = harvester.id_teams
296
            
297
            collections.extend(harvester.collections.split(','))
298
            
299
            tool_class = get_harvester_tool(selector.controller)
300
            tool = tool_class(db, selector, debug=False)
301

302
            tool()
303
    
304
            collection_logs.extend(tool.collection_logs)
305 306 307 308 309 310 311 312 313 314 315
            logs.extend(tool.logs)
            
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
316 317 318 319 320 321 322 323
    # tune harvester / selector parameters used in the report title
    harvester = Storage(controller='all harvesters', 
                        collections=','.join(collections))
    
    if query == None:
        selector.id_projects = None
        
    # delegate rendering to the report view
324
    response.view = 'harvest/layout.%s'  % request.extension
325
    return dict(collection_logs=collection_logs,
326
                harvester=harvester,
327
                logs=logs,
328
                selector=selector)