Commit ca18deac authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Change the name in the script documentation.

parent 0334261e
# -*- coding: utf-8 -*-
""" NAME
change-status-submitted
SYNOPSIS
Change the status to undefined when the submitted date is not valid
DESCRIPTION
In the version 0.8.8 the rule was changed for the submitted date.
Only the format YYYY-MM and YYYY-MM-DD are allowed.
Unfortunately some publications has a submitted date equal
to YYYY and a status OK. This script changes their status
Unfortunately some publications has a submitted date equal
to YYYY and a status OK. This script changes their status
to undefined. I
In such configuration all publications with a wrong submitted
dates can be found using the checkandValidate wizard.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications change-status-submitted
> cd ...limbra/scripts
> ./run script change-status-submitted
AUTHOR
R. Le Gac -- Dec 2014
"""
"""
if __name__ == "__main__":
import sys
from argparse import ArgumentParser, FileType
from plugin_dbui import UNDEF_ID
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the publications table
i = 0
for row in db(db.publications.submitted.len() <= 4).select():
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" NAME
export-to-csv -- export table content as CSV file
export-to-csv -- export table content as CSV file
SYNOPSIS
export-to-csv [options] table file
DESCRIPTION
Write the full content of the table to a CSV file.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications export-to-csv.py publications ~/mywap/track_publications/scripts/publications.csv
> cd ...limbra/scripts
> ./run script export-to-csv.py publications ~/mywap/limbra/scripts/publications.csv
AUTHOR
R. Le Gac
"""
if __name__ == "__main__":
import csv
import os
import sys
from argparse import ArgumentParser, FileType
# command line options
......@@ -39,20 +39,20 @@ if __name__ == "__main__":
parser.add_argument('file',
type=FileType('w'),
help='the absolute path of the CSV file.')
args = parser.parse_args()
# the CSV writer
writer = csv.DictWriter(args.file,
db[args.table].fields,
writer = csv.DictWriter(args.file,
db[args.table].fields,
quoting=csv.QUOTE_MINIMAL)
# write the header
headers = {}
for el in db[args.table].fields:
headers[el] = el
writer.writerow(headers)
# write table content
for row in db(db[args.table]).select():
writer.writerow(row.as_dict())
......
......@@ -23,9 +23,9 @@
EXAMPLE
> cd ...track_publications/scripts
> ./run -S test_publications script fix_acti2com_cppm.py
> ./run -S track_publications script fix_acti2com_cppm.py
> cd ...limbra/scripts
> ./run -S test_limbra script fix_acti2com_cppm.py
> ./run -S limbra script fix_acti2com_cppm.py
AUTHOR
......
# -*- coding: utf-8 -*-
""" NAME
fix-collaboration
SYNOPSIS
fix the publications field collaboration.
DESCRIPTION
Before the track_publications version 0.8.8, no rules have been
Before the limbra version 0.8.8, no rules have been
applied on the collaboration(s) signing the publications.
As a consequence, the database contains a mixture of syntax.
This script standardize the naming convention.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-collaboration
> cd ...limbra/scripts
> ./run script fix-collaboration
AUTHOR
R. Le Gac -- Dec 2014
"""
"""
def destroy_collaboration(row):
""" delete the collaboration entry when no publications are
""" delete the collaboration entry when no publications are
attached to it.
"""
query = db.publications.id_collaborations == row.id
publications = db(query).select()
......@@ -38,30 +38,30 @@ def destroy_collaboration(row):
db(db.collaborations.id==row.id).delete()
db.commit()
return True
return False
if __name__ == "__main__":
import re
import regex
import sys
from argparse import ArgumentParser, FileType
from invenio_tools import InvenioStore, Marc12
REG_COLLABORATION = re.compile(regex.REG_COLLABORATION)
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the collaborations table
for row in db(db.collaborations.id > 1).select():
m = REG_COLLABORATION.match(row.collaboration)
if m:
destroy_collaboration(row)
......@@ -77,20 +77,20 @@ if __name__ == "__main__":
if id_collaboration:
new = db.collaborations[id_collaboration]
print "Replace '%s' by '%s': " % (row.collaboration, new.collaboration)
for el in db(db.publications.id_collaborations==row.id).select():
print " - %s, %s → %s" % (el.id, el.id_collaborations, new.id)
db(db.publications.id==el.id).update(id_collaborations=new.id)
db.commit()
destroy_collaboration(row)
continue
# ask for replacement
rep = raw_input("Replace '%s' by [skip CR]: " % row.collaboration)
if rep:
id = db.collaborations.insert(collaboration=rep)
if not id:
continue
......@@ -98,9 +98,9 @@ if __name__ == "__main__":
print " - %s, %s → %s" % (el.id, el.id_collaborations, id)
db(db.publications.id==el.id).update(id_collaborations=id)
db.commit()
destroy_collaboration(row)
continue
# close
sys.exit(0)
# -*- coding: utf-8 -*-
""" NAME
fix-conference-dates
SYNOPSIS
fix the publications field conference_dates
DESCRIPTION
The syntax for the conference dates is a mixture of English or French.
In addition from time to time the month is encoded with 3 letters.
The latter can start with an upper case or not.
This script standardize the conference dates using English typographic
(see http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Months)
Good value will be:
3-7 Oct 2013
30 Nov - 4 Dec 2014
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-conference-dates
> cd ...limbra/scripts
> ./run script fix-conference-dates
AUTHOR
R. Le Gac -- Nov 2014
"""
def fix_month(value):
value = value.lower()
if value.startswith("jan"):
value = 'Jan'
elif value.startswith("f"):
value = 'Feb'
elif value.startswith("mar"):
value = 'Mar'
......@@ -75,31 +75,31 @@ def fix_month(value):
if __name__ == "__main__":
import re
import regex
import sys
from argparse import ArgumentParser, FileType
REG1 = re.compile("(\d+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})")
REG2 = re.compile("(\d+) *([A-Za-zéû\.]+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})")
REG_CONF_DATES = re.compile(regex.REG_CONF_DATES)
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the conference_dates field
for row in db(db.publications.conference_dates.len() > 0).select():
if REG_CONF_DATES.match(row.conference_dates):
continue
m1 = REG1.match(row.conference_dates.strip())
m2 = REG2.match(row.conference_dates.strip())
......@@ -108,33 +108,33 @@ if __name__ == "__main__":
if m1 and m1.group(3).lower() == 'jui' or\
m2 and (m2.group(2).lower() == 'jui' or m2.group(4).lower() == 'jui'):
m1, m2 = False, False
# 4-5 Oct 2014
if m1:
li = list(m1.groups())
li[2] = fix_month(li[2])
val = "%s-%s %s %s" % tuple(li)
# 30 Oct - 2 Nov 2014
elif m2:
li = list(m2.groups())
li[1] = fix_month(li[1])
li[3] = fix_month(li[3])
val = "%s %s - %s %s %s" % tuple(li)
# ???
else:
print
print "\t", row.id
print "\t", row.title
print "\t", row.conference_title
print "\t", row.year, row.submitted,
print "\t", row.year, row.submitted,
val = raw_input("\n\tReplace %s by [skip CR]: " % row.conference_dates)
if val and row.conference_dates != val:
print " - %s, %s → %s" % (row.id, row.conference_dates, val)
db(db.publications.id==row.id).update(conference_dates=val)
db(db.publications.id==row.id).update(conference_dates=val)
db.commit()
# close
sys.exit(0)
......@@ -16,8 +16,8 @@
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-conference-url
> cd ...limbra/scripts
> ./run script fix-conference-url
AUTHOR
R. Le Gac -- Dec 2014
......
......@@ -6,7 +6,7 @@
fix-country [options]
DESCRIPTION
Before the track_publications 0.8.8, the name of the country
Before the limbra 0.8.8, the name of the country
for a conference is defined by the user or by harvesters.
As the result, the database contains a mixture of French and
English name for country. In addition, some value are wrong.
......@@ -26,9 +26,9 @@
EXAMPLE
> cd ... /track_publications/scripts
> cd ...limbra/scripts
> ./run script fix-country-0808.py
> ./run script -S track_publications_cppm fix-country-0808.py
> ./run script -S limbra_cppm fix-country-0808.py
AUTHOR
R. Le Gac -- Nov 2014
......
......@@ -21,9 +21,9 @@
EXAMPLE
> cd ... /track_publications/scripts
> cd ...limbra/scripts
> ./run script fix_country_0815.py
> ./run script -S track_publications_cppm fix_country_0815.py
> ./run script -S limbra_cppm fix_country_0815.py
AUTHOR
R. Le Gac -- Oct 2015
......
# -*- coding: utf-8 -*-
""" NAME
fix-defense
SYNOPSIS
fix the publications field defense
DESCRIPTION
Normalise the field publications.defense to DD MMM YYYY in
which the month is encoded using English.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-defense
> cd ...limbra/scripts
> ./run script fix-defense
AUTHOR
R. Le Gac -- Nov 2014
"""
def fix_month(value):
value = value.lower()
if value.startswith("jan"):
value = 'Jan'
elif value.startswith("f"):
value = 'Feb'
elif value.startswith("mar"):
value = 'Mar'
......@@ -65,25 +65,25 @@ def fix_month(value):
return value
if __name__ == "__main__":
import datetime
import re
import sys
from argparse import ArgumentParser, FileType
REG_DD_MMM_YYYY = re.compile('(\d{2}) *([A-Z][a-z]{2}) *(\d{4})')
REG_DD_MONTH_YYYY = re.compile('(\d{1,2}) *([A-Za-zéû]+) *(\d{4})')
# command line options
parser = ArgumentParser()
args = parser.parse_args()
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the publications table
for row in db(db.publications.defense.len() > 0).select():
......@@ -91,20 +91,20 @@ if __name__ == "__main__":
continue
val = None
m = REG_DD_MONTH_YYYY.search(row.defense)
if m:
val = "%02i %s %s" % (int(m.group(1)), fix_month(m.group(2)), m.group(3))
else:
print
print "\t", row.id
print "\t", row.title
val = raw_input("\n\tReplace %s by [skip CR]: " % row.defense)
print " - %s, %s → %s" % (row.id, row.defense, val)
db(db.publications.id==row.id).update(defense=val)
db(db.publications.id==row.id).update(defense=val)
db.commit()
# close
sys.exit(0)
......@@ -20,8 +20,8 @@
EXAMPLE
> cd ...track_publications/scripts
> ./run fix-institute-id
> cd ...limbra/scripts
> ./run script fix-institute-id
AUTHOR
R. Le Gac -- Sep 2015
......
......@@ -6,7 +6,7 @@
fix-origin-0815 [options]
DESCRIPTION
Before the track_publications 0.8.15, the name origin field
Before the limbra 0.8.15, the name origin field
contains the OAI identifier of the store.
The same publication can be found in the cds and in the inspirehep
......@@ -23,9 +23,9 @@
EXAMPLE
> cd ... /track_publications/scripts
> cd ...limbra/scripts
> ./run script fix-origin-0815.py
> ./run script -S track_publications_cppm fix-origin-0815.py
> ./run script -S limbra_cppm fix-origin-0815.py
AUTHOR
R. Le Gac -- Oct 2015
......
......@@ -18,8 +18,8 @@
EXAMPLE
> cd ...track_publications/scripts
> ./track_publications fix-page-volume
> cd ...limbra/scripts
> ./run script fix-page-volume
AUTHOR
R. Le Gac -- Dec 2014
......
......@@ -7,7 +7,7 @@
DESCRIPTION
The field publication_url is the URL of the pdf file.
This definition has been re-enforce in track_publications 0.8.8.
Thi