Commit 957bf16a authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Remove obsolete scripts in archive_py27

parent 05509c13
# -*- coding: utf-8 -*-
""" NAME
change-status-submitted
SYNOPSIS
Change the status to undefined when the submitted date is not valid
DESCRIPTION
In the version 0.8.8 the rule was changed for the submitted date.
Only the format YYYY-MM and YYYY-MM-DD are allowed.
Unfortunately some publications has a submitted date equal
to YYYY and a status OK. This script changes their status
to undefined. I
In such configuration all publications with a wrong submitted
dates can be found using the checkandValidate wizard.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script change-status-submitted
AUTHOR
R. Le Gac -- Dec 2014
"""
if __name__ == "__main__":
import sys
from argparse import ArgumentParser, FileType
from plugin_dbui import UNDEF_ID
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the publications table
i = 0
for row in db(db.publications.submitted.len() <= 4).select():
if row.id_status != UNDEF_ID:
i += 1
print "%s (%s) → status undefined" % (row.id, row.submitted)
db(db.publications.id==row.id).update(submitted=1)
# commit
rep = raw_input("Commit all changes (%i)? [y/N]:" % i)
if rep == 'y':
print "The database is modified."
db.commit()
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix_acti2com_cppm
SYNOPSIS
Copy ACTI to COM for the CPPM database.
DESCRIPTION
Up to the end of 2014 a talk to a conference (COM) is transformed
into a proceeding (ACTI) when the later is published.
In 2015, the policy changes, COM and ACTI are kept as separated
publications
This script implement the new policy for the publications registered
before 2015. It mainly copy the ACTI into COM for the year from
2009 up to 2014.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run -S test_limbra script fix_acti2com_cppm.py
> ./run -S limbra script fix_acti2com_cppm.py
AUTHOR
R. Le Gac -- Jan 2016
"""
import re
from harvest_tools import MONTHS
reg1 = r'(\d{1,2}) ([A-Z][a-z]{2}) (\d{4})'
reg2 = r'(\d{1,2})-\d{1,2} ([A-Z][a-z]{2}) (\d{4})'
reg3 = r'(\d{1,2}) ([A-Z][a-z]{2}) - \d{1,2} [A-Z][a-z]{2} (\d{4})'
REG_CONF_DATES = re.compile(r'%s|%s|%s' % (reg1, reg2, reg3))
REG_ORIGIN = re.compile("https?://([a-z\.]+)/record/(\d+)")
if __name__ == "__main__":
import sys
from argparse import ArgumentParser, FileType
from invenio_tools import load_record, OAI_URL
from plugin_dbui import CALLBACK_ERRORS, get_id
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# get the ACTI / COM identifier
id_acti = get_id(db.categories, code="ACTI")
id_com = get_id(db.categories, code="COM")
# build the query
query = db.publications.id_categories == id_acti
query &= db.publications.year >= 2009
query &= db.publications.year <= 2014
# scan the publications table
for row in db(query).select():
data = row.as_dict()
# skip if the the speaker is not from CPPM
if data["conference_speaker"] not in data["authors_institute"]:
continue
# remove publisher information
data["id_publishers"] = 1
data["pages"] = ""
data["volume"] = ""
data["publication_url"] = ""
data["preprint"] = ""
# the year is the one of the conference
# submitted date is when the conference start
match = REG_CONF_DATES.match(data["conference_dates"])
if match:
offset = 0
for i in xrange(3):
if match.group(1+i*3) is not None:
offset = i*3
break
data["year"] = match.group(offset+3)
month = MONTHS[match.group(offset+2)]
data["submitted"] = \
"%s-%02i-%02i" % (match.group(offset+3), int(month), int(match.group(offset+1)))
else:
print "No conferences dates", data["id"]
# change the category and the status
data["id_categories"] = id_com
data["id_status"] = 1
# change the origin
val = data["origin"]
if val:
origin = val.split(',')[0].strip()
match = REG_ORIGIN.match(origin)
host, rec_id = match.group(1), match.group(2)
proceeding = load_record(host, rec_id)
talk_id = proceeding.reference_conference_talk()
data["origin"] = (OAI_URL % (host, talk_id) if talk_id else "")
# insert the new record in the database
id_rec = data["id"],
del data["id"]
id_new = db.publications.insert(**data)
print "Copy", id_rec, "→",
if id_new:
print id_new
elif CALLBACK_ERRORS in db.publications:
print " ".join(db.publications._callback_errors)
else:
print "???"
# commit change
rep = raw_input("Commit change in the database? [y/N]:")
if rep == 'y':
print "The database is modified."
db.commit()
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix_affiliation_keys_0960 -- create the affiliation_keys table
SYNOPSIS
fix_affiliation_keys_0960
DESCRIPTION
A new mechanism was introduced in version 0.9.6.0 in order to
deal with affiliation. It relies on a new database table
affiliation_keys. Preferences inspirehep_institute_id and
add_rules_reg_institute are obsolete. Remove them.
The aim of this script, is to update existing database.
OPTIONS
EXAMPLE
> cd ...limbra/scripts
> ./run script fix_affiliation_keys_0960.py
> ./run -S limbra_cppm script fix_affiliation_keys_0960.py
> ./run loop fix_affiliation_keys_0960
AUTHOR
R. Le Gac -- Sep 2016
"""
if __name__ == "__main__":
import sys
import os
# create the database table affiliation_keys from SQL statement
# the table is not create if it exist
print "\n\tCreate the table affiliation_keys"
fn = os.path.join(os.getcwd(),
"applications",
request.application,
"scripts",
"affiliation_keys_0960.sql")
with open(fn) as fi:
db.executesql(fi.read())
# remove preferences inspirehep_institute_id, add_rules_reg_institute
for pref in ("inspirehep_institute_id", "add_rules_reg_institute"):
print "\tRemove preference", pref
db(db.preferences.property == pref).delete()
db.commit()
# exit
print "\n\tEnd of script\n"
sys.exit(0)
\ No newline at end of file
# -*- coding: utf-8 -*-
""" NAME
fix-collaboration
SYNOPSIS
fix the publications field collaboration.
DESCRIPTION
Before the limbra version 0.8.8, no rules have been
applied on the collaboration(s) signing the publications.
As a consequence, the database contains a mixture of syntax.
This script standardize the naming convention.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-collaboration
AUTHOR
R. Le Gac -- Dec 2014
"""
def destroy_collaboration(row):
""" delete the collaboration entry when no publications are
attached to it.
"""
query = db.publications.id_collaborations == row.id
publications = db(query).select()
if len(publications) == 0:
print " - No publications associated to '%s' → delete it" % row.collaboration
db(db.collaborations.id==row.id).delete()
db.commit()
return True
return False
if __name__ == "__main__":
import re
import regex
import sys
from argparse import ArgumentParser, FileType
from invenio_tools import InvenioStore, Marc12
REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the collaborations table
for row in db(db.collaborations.id > 1).select():
m = REG_COLLABORATION.match(row.collaboration)
if m:
destroy_collaboration(row)
continue
# check publications attach to it
if destroy_collaboration(row):
continue
# replace by an existing value
msg = "Replace '%s' by an existing collaboration id [skip CR]: " % row.collaboration
id_collaboration = raw_input(msg)
if id_collaboration:
new = db.collaborations[id_collaboration]
print "Replace '%s' by '%s': " % (row.collaboration, new.collaboration)
for el in db(db.publications.id_collaborations==row.id).select():
print " - %s, %s → %s" % (el.id, el.id_collaborations, new.id)
db(db.publications.id==el.id).update(id_collaborations=new.id)
db.commit()
destroy_collaboration(row)
continue
# ask for replacement
rep = raw_input("Replace '%s' by [skip CR]: " % row.collaboration)
if rep:
id = db.collaborations.insert(collaboration=rep)
if not id:
continue
for el in db(db.publications.id_collaborations==row.id).select():
print " - %s, %s → %s" % (el.id, el.id_collaborations, id)
db(db.publications.id==el.id).update(id_collaborations=id)
db.commit()
destroy_collaboration(row)
continue
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix-conference-dates
SYNOPSIS
fix the publications field conference_dates
DESCRIPTION
The syntax for the conference dates is a mixture of English or French.
In addition from time to time the month is encoded with 3 letters.
The latter can start with an upper case or not.
This script standardize the conference dates using English typographic
(see http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Months)
Good value will be:
3-7 Oct 2013
30 Nov - 4 Dec 2014
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-conference-dates
AUTHOR
R. Le Gac -- Nov 2014
"""
def fix_month(value):
value = value.lower()
if value.startswith("jan"):
value = 'Jan'
elif value.startswith("f"):
value = 'Feb'
elif value.startswith("mar"):
value = 'Mar'
elif value.startswith("apr") or value.startswith("avr"):
value = 'Apr'
elif value.startswith("may") or value.startswith("mai"):
value = 'May'
elif value.startswith("jun") or value.startswith("juin"):
value = 'Jun'
elif value.startswith("jul") or value.startswith("juil"):
value = 'Jul'
elif value.startswith("au") or value.startswith("ao"):
value = 'Aug'
elif value.startswith("sep"):
value = 'Sep'
elif value.startswith("oct"):
value = 'Oct'
elif value.startswith("nov"):
value = 'Nov'
elif value.startswith("d"):
value = 'Dec'
return value
if __name__ == "__main__":
import re
import regex
import sys
from argparse import ArgumentParser, FileType
REG1 = re.compile("(\d+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})")
REG2 = re.compile("(\d+) *([A-Za-zéû\.]+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})")
REG_CONF_DATES = re.compile(regex.REG_CONF_DATES)
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the conference_dates field
for row in db(db.publications.conference_dates.len() > 0).select():
if REG_CONF_DATES.match(row.conference_dates):
continue
m1 = REG1.match(row.conference_dates.strip())
m2 = REG2.match(row.conference_dates.strip())
# month equal to jui is ambiguous (juin or juillet ?)
# to be solve by hand
if m1 and m1.group(3).lower() == 'jui' or\
m2 and (m2.group(2).lower() == 'jui' or m2.group(4).lower() == 'jui'):
m1, m2 = False, False
# 4-5 Oct 2014
if m1:
li = list(m1.groups())
li[2] = fix_month(li[2])
val = "%s-%s %s %s" % tuple(li)
# 30 Oct - 2 Nov 2014
elif m2:
li = list(m2.groups())
li[1] = fix_month(li[1])
li[3] = fix_month(li[3])
val = "%s %s - %s %s %s" % tuple(li)
# ???
else:
print
print "\t", row.id
print "\t", row.title
print "\t", row.conference_title
print "\t", row.year, row.submitted,
val = raw_input("\n\tReplace %s by [skip CR]: " % row.conference_dates)
if val and row.conference_dates != val:
print " - %s, %s → %s" % (row.id, row.conference_dates, val)
db(db.publications.id==row.id).update(conference_dates=val)
db.commit()
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix-conference-url
SYNOPSIS
fix the publications field conference_url
DESCRIPTION
Check the field conference_url in the invenio store and update it.
From time to time, it has been forgotten.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-conference-url
AUTHOR
R. Le Gac -- Dec 2014
"""
if __name__ == "__main__":
import re
import sys
from argparse import ArgumentParser, FileType
from harvest_tools import CheckAndFix, CheckException
from invenio_tools import InvenioStore, Marc12
REG_ORIGIN = re.compile("http://([a-z\.]+)/record/(\d+)")
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# service
check = CheckAndFix()
decode = Marc12()
# scan the publications table
query = db.publications.origin.len() > 0
query &= db.publications.conference_url.len() == 0
query &= (db.publications.id_categories == 7) | (db.publications.id_categories == 9)
for row in db(query).select():
m = REG_ORIGIN.match(row.origin)
if not m:
continue
host, store_id = m.groups()
# retrieve the full record from the store
store = InvenioStore(host)
xml = store.get_record(store_id)
record = decode(xml)[0]
try:
check.conference(record)
except CheckException, e:
pass
val = record.conference_url()
if val:
print " - %s, conference url: %s" % (row.id, val)
db(db.publications.id==row.id).update(conference_url=val)
db.commit()
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix-country-0808 -- fix invalid country names
SYNOPSIS
fix-country [options]
DESCRIPTION
Before the limbra 0.8.8, the name of the country
for a conference is defined by the user or by harvesters.
As the result, the database contains a mixture of French and
English name for country. In addition, some value are wrong.