harvest.py 10.4 KB
Newer Older
1 2 3
""" Harvest Controllers

"""
4
import traceback
5

6
from gluon.storage import Storage
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
from harvest_tools import (format_author_fr, 
                           family_name_fr, 
                           get_harvester_tool,
                           PublicationsTool, 
                           ToolException)
from invenio_tools import (CdsException,
                           CheckAndFix,
                           CheckException,
                           Marc12Exception,
                           InvenioStore, 
                           Marc12,
                           OAI_URL)
from plugin_dbui import (get_id, 
                         Selector, 
                         to_formPanel,
                         UNDEF_ID)
23

24
MSG_NO_HARVESTER = T("No harvesters for your selection !!!")
25

26 27 28
INLINE_ALERT = "<script>Ext.Msg.alert('%s', '%s');</script>"


29
def free_run():
30 31
    """Run a free harvester.
    All harvester parameters are defined via the selector.
32 33
    
    """
34
    table = virtdb.free_harvester_selector
35 36 37 38 39 40 41 42 43
    fields = ('collections', 
              'controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories', 
              'ratio')

    try:
44
        selector = Selector(table,
45 46 47 48
                            exclude_fields=('mode', 'year_start', 'year_end'))

        for el in fields:
            if not selector[el]:
LE GAC Renaud's avatar
LE GAC Renaud committed
49 50
                msg = T('All fields of the form have to be defined !!!')
                msg += "<br>"
51
                msg += T('The field "%s" is missing ...') % T(table[el].label)
52
                return INLINE_ALERT % (T('Error'), msg)
53 54
        
        tool_class = get_harvester_tool(selector.controller)
55
        tool = tool_class(db, selector, debug=False)
56 57 58 59 60
        
        tool.harvester = Storage()
        for el in fields:
            tool.harvester[el] = selector[el]
         
61
        tool()
62 63 64 65 66 67 68 69 70 71 72 73
    
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
    response.view = 'harvest/layout.html'
    return tool.report()
74 75


76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
def edit_insert():
    """Edit an invenio record and insert it in the database.
    
    @note: Recovery procedures are applied to fix basic non-conformity, but
    no checks are run. The user is editing the record to fix problems.
    
    """
    fields = ('controller', 
              'host', 
              'id_projects', 
              'id_teams', 
              'id_categories',
              'record_id')

    table = virtdb.edit_insert_selector
    
    try:
        selector = Selector(table)

        for el in fields:
            if not selector[el]:
                msg = T('All fields of the form have to be defined !!!')
                msg += "<br>"
                msg += T('The field "%s" is missing ...') % T(table[el].label)
                return INLINE_ALERT % (T('Error'), msg)
    
        # record
        store = InvenioStore(selector.host)
        xml = store.get_record(selector.record_id)
        decode = Marc12()
        record = decode(xml)[0]
        
        # form configuration
        cfg = to_formPanel(db.publications)
        
        # tools to extract values to be loaded in the form
        values = {}
        check = CheckAndFix()
        tool = PublicationsTool(db, selector)
         
        # title, preprint, URL, report number
        values['PublicationsTitle'] = record.title()
        values['PublicationsPreprint'] = record.preprint_number()
        values['PublicationsPublication_url'] = record.paper_url()
        values['PublicationsReport_numbers'] = record.report_number()
        
        # authors
        check.format_authors(record, format_author_fr)
        my_authors = tool._my_author_list(record)     
125 126 127 128 129 130
        
        try:
            check.my_authors(record, reference=my_authors, cmpFct=family_name_fr)
        except CheckException:
            record.my_authors = ''
            pass
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
        
        values['PublicationsFirst_author'] = record.first_author()
        values['PublicationsAuthors'] = record.authors()
        values['PublicationsAuthors_cppm'] = record.my_authors

        # collaboration
        id = get_id(db.collaborations, collaboration=record.collaboration())
        values['PublicationsId_collaborations'] = (int(id) if id else UNDEF_ID)
                
        # submitted date and year
        try:
            check.submitted(record)
            check.year(record)
        except CheckException:
            pass
        
        values['PublicationsSubmitted'] = ', '.join(record.submitted())
        values['PublicationsYear'] = record.year()
        
        # teams, project, categories, origin
        values['PublicationsId_categories'] = int(selector.id_categories)
        values['PublicationsId_projects'] = int(selector.id_projects)
        values['PublicationsId_teams'] = int(selector.id_teams)
        values['PublicationsOrigin'] = OAI_URL %(selector.host, selector.record_id) 
        
        # publishers
        if selector.controller in ('articles', 'proceedings'):

            check.format_editor(record)
            id = get_id(db.publishers, abbreviation=record.paper_editor())
            values['PublicationsId_publishers'] = (int(id) if id else UNDEF_ID)
            values['PublicationsVolume'] = record.paper_volume()
            values['PublicationsPages'] = record.paper_pages()
            
        # conference
        if selector.controller in ('proceedings', 'talks'):
            
            values['PublicationsConference_title'] = record.conference_title()
            values['PublicationsConference_url'] = record.conference_url()
            values['PublicationsConference_dates'] = record.conference_dates()
            values['PublicationsConference_town'] = record.conference_town()
            
            id = get_id(db.countries, country=record.conference_country())
            values['PublicationsId_countrie'] = (int(id) if id else UNDEF_ID)
        
            values['PublicationsConference_speaker'] = record.first_author()
            
        # thesis
        if selector.controller == 'theses':
            
            values['PublicationsUniversities'] = record.these_universities()
            values['PublicationsDirectors'] = record.directors()
            values['PublicationsDefense'] = record.defense()
        
    except (CdsException, Marc12Exception, ToolException), e:
186
        return INLINE_ALERT % (T('Error'), T(str(e)))
187 188
    
    except BaseException, e:
189 190 191
        # for debug when web2py is in debug mode
        print traceback.format_exc()
        return INLINE_ALERT % (T('Error'), T(str(e)))
192 193 194 195

    return dict(cfg=cfg, values=values)


196 197
def insert_marcxml():
    """Insert a MarcXML record in the database.
tux091's avatar
tux091 committed
198
    
199
    """
tux091's avatar
tux091 committed
200
    try:
201
        selector = Selector(virtdb.marc12_selector,  exclude_fields=('mode'))
202

203 204
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
205
            return INLINE_ALERT % (T('Error'), T('Select a controller.'))
206
            
207
        tool = tool_class(db, selector, debug=False)
208
        tool()
209
    
210 211 212
    except ToolException, e:
        return T(str(e))
    
213
    except BaseException, e:
214 215 216 217
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
218 219
    
    response.view = 'harvest/layout.html'
220
    return tool.report()
221 222


223
def run():
224 225 226
    """Run an harvester.
    
    Scan the cds/invenio stores to find articles published during 
227 228
    a given range of years and for a given team/project. 
    Insert them in the database if they don't exist.
229
    
230 231
    The scanning is steered using the current request arguments as well as
    the harvest parameters associated to this action.
232

233
    Search arguments are defined via the harvester selector. 
234

Renaud Le Gac's avatar
Renaud Le Gac committed
235 236
    """
    try:
237
        selector = Selector(virtdb.harvester_selector,
238
                            exclude_fields=('mode', 'year_start', 'year_end'))
Renaud Le Gac's avatar
Renaud Le Gac committed
239

240 241
        tool_class = get_harvester_tool(selector.controller)
        if not tool_class:
242
            return INLINE_ALERT % (T('Error'), T('Select an harvester.'))
243
         
244
        tool = tool_class(db, selector, debug=False)
245
        tool()
246
    
247 248 249
    except ToolException, e:
        return T(str(e))
    
250
    except BaseException, e:
251 252 253 254
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
255 256

    response.view = 'harvest/layout.%s' % request.extension 
257
    return tool.report()
258 259


260 261 262 263
def run_all():
    """Run all harvesters in one go.
    
    """
264
    collection_logs = []
265 266 267 268 269
    collections = []
    logs = []
    
    try:
        selector = Selector(virtdb.run_all_harvesters_selector,
270
                            exclude_fields=('mode', 'year_start', 'year_end'))
271

272 273 274 275 276 277 278 279 280
        query = None
        for fieldname in ('id_teams', 'id_projects'):
            if selector[fieldname]:
                q = db.harvesters[fieldname] == selector[fieldname]
                if query:
                    query = (query) & (q)
                else:
                    query = q

281 282 283
        harvesters = db(query).select(db.harvesters.ALL)
        if not len(harvesters):
            return INLINE_ALERT % (T('Error'), MSG_NO_HARVESTER)
284
        
285 286 287 288
        for harvester in harvesters:
            selector.controller = harvester.controller
            selector.id_projects = harvester.id_projects
            selector.id_teams = harvester.id_teams
289
            
290
            collections.extend(harvester.collections.split(','))
291
            
292
            tool_class = get_harvester_tool(selector.controller)
293
            tool = tool_class(db, selector, debug=False)
294

295
            tool()
296
    
297
            collection_logs.extend(tool.collection_logs)
298 299 300 301 302 303 304 305 306 307 308
            logs.extend(tool.logs)
            
    except ToolException, e:
        return T(str(e))
    
    except BaseException, e:
        msg  = '<br><br><hr/>'
        msg += CODE(traceback.format_exc()).xml()
        msg += '<hr/>'
        return msg
    
309 310 311 312 313 314 315 316
    # tune harvester / selector parameters used in the report title
    harvester = Storage(controller='all harvesters', 
                        collections=','.join(collections))
    
    if query == None:
        selector.id_projects = None
        
    # delegate rendering to the report view
317
    response.view = 'harvest/layout.%s'  % request.extension
318
    return dict(collection_logs=collection_logs,
319
                harvester=harvester,
320
                logs=logs,
321
                selector=selector)