Commit ef1c6948 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Re-enfoce the rule for collaboration and add the script fix-collaboration.

parent c79073ce
...@@ -112,6 +112,7 @@ ...@@ -112,6 +112,7 @@
'code': 'code', 'code': 'code',
'Code': 'Code', 'Code': 'Code',
'Collaboration': 'Collaboration', 'Collaboration': 'Collaboration',
'Collaboration(s) signing the publication: CMS Collaboration or CMS and LHCb Collaborations or ATLAS Collaboration, CMS Collaboration or Heavy Flavour Averaging Group or CTA Consortium.': 'Collaboration(s) qui signe la publication: CMS Collaboration ou CMS and LHCb Collaborations ou ATLAS Collaboration, CMS Collaboration ou Heavy Flavour Averaging Group ou CTA Consortium.',
'collaborations': 'collaborations', 'collaborations': 'collaborations',
'Collection': 'Collection', 'Collection': 'Collection',
'Collections': 'Collections', 'Collections': 'Collections',
...@@ -454,6 +455,7 @@ ...@@ -454,6 +455,7 @@
'Regular expression defining the name of our institute. ': 'Expression régulière definissant le nom de votre laboratoire.', 'Regular expression defining the name of our institute. ': 'Expression régulière definissant le nom de votre laboratoire.',
'Reject': 'Rejeter', 'Reject': 'Rejeter',
'Reject article is not published': "Rejeté l'article n'est pas publié", 'Reject article is not published': "Rejeté l'article n'est pas publié",
'Reject collaboration is not well formed': 'Rejeté la collaboration est mal formatté',
'Reject editor is not well formed': "Rejeté l'éditeur est mal formatté", 'Reject editor is not well formed': "Rejeté l'éditeur est mal formatté",
'Reject incomplete paper reference': 'Rejeté la référence du papier est incomplète', 'Reject incomplete paper reference': 'Rejeté la référence du papier est incomplète',
'Reject invalid country': 'Rejeté pays inconnu', 'Reject invalid country': 'Rejeté pays inconnu',
......
...@@ -15,6 +15,8 @@ from callbacks import (INHIBIT_CASCADE_DELETE, ...@@ -15,6 +15,8 @@ from callbacks import (INHIBIT_CASCADE_DELETE,
from datetime import datetime from datetime import datetime
from gluon.tools import PluginManager from gluon.tools import PluginManager
from regex import REG_COLLABORATION
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# #
# PLUGIN DBUI # PLUGIN DBUI
......
...@@ -2,12 +2,21 @@ ...@@ -2,12 +2,21 @@
""" collaborations """ collaborations
""" """
tp_collaboration = \
T("Collaboration(s) signing the publication: "
"CMS Collaboration or CMS and LHCb Collaborations or "
"ATLAS Collaboration, CMS Collaboration or "
"Heavy Flavour Averaging Group or "
"CTA Consortium.")
db.define_table("collaborations", db.define_table("collaborations",
Field("collaboration", "string", length=255, notnull=True, unique=True), Field("collaboration", "string", length=255, comment=tp_collaboration, notnull=True, unique=True),
migrate="collaborations.table") migrate="collaborations.table")
db.collaborations._before_delete.append(INHIBIT_CASCADE_DELETE) db.collaborations._before_delete.append(INHIBIT_CASCADE_DELETE)
db.collaborations._before_delete.append(dbui.INHIBIT_DELETE_UNDEF) db.collaborations._before_delete.append(dbui.INHIBIT_DELETE_UNDEF)
db.collaborations._before_update.append(dbui.INHIBIT_UPDATE_UNDEF) db.collaborations._before_update.append(dbui.INHIBIT_UPDATE_UNDEF)
db.collaborations.collaboration.filter_in = filters.CLEAN_COLLABORATION db.collaborations.collaboration.filter_in = filters.CLEAN_COLLABORATION
\ No newline at end of file
db.collaborations.collaboration.requires = IS_MATCH(REG_COLLABORATION)
\ No newline at end of file
...@@ -660,6 +660,7 @@ class PublicationsTool(object): ...@@ -660,6 +660,7 @@ class PublicationsTool(object):
self.check.temporary_record(record) self.check.temporary_record(record)
self.check.authors(record) self.check.authors(record)
self.check.format_authors(record, format_author_fr) self.check.format_authors(record, format_author_fr)
self.check.collaboration(record)
except BaseException as e: except BaseException as e:
self.logs[-1].reject(e, record.year()) self.logs[-1].reject(e, record.year())
......
...@@ -10,6 +10,7 @@ import httplib ...@@ -10,6 +10,7 @@ import httplib
import json import json
import pprint import pprint
import re import re
import regex
import sys import sys
import time import time
import urllib import urllib
...@@ -73,6 +74,7 @@ MSG_TEMPORARY_RECORD = current.T('Temporary record', lazy=False) ...@@ -73,6 +74,7 @@ MSG_TEMPORARY_RECORD = current.T('Temporary record', lazy=False)
MSG_TO_MANY_DATE = current.T("Reject to many submit date", lazy=False) MSG_TO_MANY_DATE = current.T("Reject to many submit date", lazy=False)
MSG_TO_MANY_FAUTHOR = current.T("Reject to many first author", lazy=False) MSG_TO_MANY_FAUTHOR = current.T("Reject to many first author", lazy=False)
MSG_TO_MANY_YEAR = current.T("Reject to many year", lazy=False) MSG_TO_MANY_YEAR = current.T("Reject to many year", lazy=False)
MSG_WELL_FORMED_COLLABORATION = current.T("Reject collaboration is not well formed", lazy=False)
MSG_WELL_FORMED_DATE = current.T("Reject submission date is not well formed", lazy=False) MSG_WELL_FORMED_DATE = current.T("Reject submission date is not well formed", lazy=False)
MSG_WELL_FORMED_EDITOR = current.T("Reject editor is not well formed", lazy=False) MSG_WELL_FORMED_EDITOR = current.T("Reject editor is not well formed", lazy=False)
MSG_WELL_FORMED_OAI = current.T("Reject OAI is not well formed", lazy=False) MSG_WELL_FORMED_OAI = current.T("Reject OAI is not well formed", lazy=False)
...@@ -87,6 +89,7 @@ OAI_INVENIO = "oai:%s:%s" ...@@ -87,6 +89,7 @@ OAI_INVENIO = "oai:%s:%s"
OAI_URL = "http://%s/record/%s" OAI_URL = "http://%s/record/%s"
REG_BODY = re.compile("<body>(.*)</body>") REG_BODY = re.compile("<body>(.*)</body>")
REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
REG_CONF = re.compile("^C\d+-\d+-\d+(\.\d+)?$") REG_CONF = re.compile("^C\d+-\d+-\d+(\.\d+)?$")
REG_IDS_OK = re.compile("^\[[\d, ]*\]$") REG_IDS_OK = re.compile("^\[[\d, ]*\]$")
REG_INT = re.compile("^\d+$") REG_INT = re.compile("^\d+$")
...@@ -351,6 +354,20 @@ class CheckAndFix(object): ...@@ -351,6 +354,20 @@ class CheckAndFix(object):
record[k] = record[k][0] record[k] = record[k][0]
def collaboration(self, record):
"""Check that the collaboration is well formed.
@type record: L{Record}
@param record:
@raise CheckException:
"""
val = record.collaboration()
if val and not REG_COLLABORATION.match(val):
raise CheckException(MSG_WELL_FORMED_COLLABORATION)
def conference(self, record): def conference(self, record):
"""Get the conference data associated to a talk/proceeding and """Get the conference data associated to a talk/proceeding and
push them in the record. push them in the record.
......
# *-* coding: utf-8 *-*
""" A collections of regular expression used to applied rules
on field contents.
@author: R. Le Gac
"""
# Collaboration field:
# - CMS Collaboration
# - CMS and LHCb Collaborations
# - ATLAS Collaboration, CMS Collaboration
# - Heavy Flavour Averaging Group
# - CTA Consortium
# - any mixture of the above separated by a comma
#
REG_COLLABORATION = r'^[A-Za-z0-9\-/, ]+([Cc]ollaboration|Consortium|Group)[s]?$'
\ No newline at end of file
# -*- coding: utf-8 -*-
""" NAME
fix-collaboration
SYNOPSIS
fix the publications field collaboration.
DESCRIPTION
Before the track_publications version 0.8.8, no rules have been
applied on the collaboration(s) signing the publications.
As a consequence, the database contains a mixture of syntax.
This script standardize the naming convention.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-collaboration
AUTHOR
R. Le Gac -- Dec 2014
"""
def destroy_collaboration(row):
""" delete the collaboration entry when no publications are
attached to it.
"""
query = db.publications.id_collaborations == row.id
publications = db(query).select()
if len(publications) == 0:
print " - No publications associated to '%s' → delete it" % row.collaboration
db(db.collaborations.id==row.id).delete()
db.commit()
return True
return False
if __name__ == "__main__":
import re
import regex
import sys
from argparse import ArgumentParser, FileType
from invenio_tools import InvenioStore, Marc12
REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the collaborations table
for row in db(db.collaborations.id > 1).select():
m = REG_COLLABORATION.match(row.collaboration)
if m:
continue
# check publications attach to it
if destroy_collaboration(row):
continue
# replace by an existing value
msg = "Replace '%s' by an existing collaboration id [skip CR]: " % row.collaboration
id_collaboration = raw_input(msg)
if id_collaboration:
new = db.collaborations[id_collaboration]
print "Replace '%s' by '%s': " % (row.collaboration, new.collaboration)
for el in db(db.publications.id_collaborations==row.id).select():
print " - %s, %s → %s" % (el.id, el.id_collaborations, new.id)
db(db.publications.id==el.id).update(id_collaborations=new.id)
db.commit()
destroy_collaboration(row)
continue
# ask for replacement
rep = raw_input("Replace '%s' by [skip CR]: " % row.collaboration)
if rep:
id = db.collaborations.insert(collaboration=rep)
if not id:
continue
for el in db(db.publications.id_collaborations==row.id).select():
print " - %s, %s → %s" % (el.id, el.id_collaborations, id)
db(db.publications.id==el.id).update(id_collaborations=id)
db.commit()
destroy_collaboration(row)
continue
# close
sys.exit(0)
...@@ -18,12 +18,13 @@ HEAD ...@@ -18,12 +18,13 @@ HEAD
- Use the new callbacks INHIBIT_DELETE_UNDEF and INHIBIT_UPDATE_UNDEF. - Use the new callbacks INHIBIT_DELETE_UNDEF and INHIBIT_UPDATE_UNDEF.
- Redesing the lists and metrics interfaces as well as metric view. - Redesing the lists and metrics interfaces as well as metric view.
- Remove obsolete controller toolbox. It is now replaced by standalone - Remove obsolete controller toolbox. It is now replaced by standalone
scripts: export-to-csv, import-from-csv, fix-conference-dates, fix-country, scripts: export-to-csv, import-from-csv, fix-conference-dates,
fix-defense, ifix-report-number, fix-submitted. fix-collaboration, fix-conference-url,fix-country, fix-defense,
fix-report-number, fix-submitted.
- The list of country is almost frozen by using the default list coming - The list of country is almost frozen by using the default list coming
from a geographical database (www.geonames.org). from a geographical database (www.geonames.org).
Harvester can not add country anymore. Harvester can not add country anymore.
- Re-enforce rule for publications fields: defense, conference_dates - Re-enforce rule for fields: collaborations, defense, conference_dates
and submitted. and submitted.
0.8.7.2 (Sep 2014) 0.8.7.2 (Sep 2014)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment