Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
957bf16a
Commit
957bf16a
authored
Jan 05, 2021
by
LE GAC Renaud
Browse files
Remove obsolete scripts in archive_py27
parent
05509c13
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
0 additions
and
1794 deletions
+0
-1794
scripts/archives_py27/change_status_submitted.py
scripts/archives_py27/change_status_submitted.py
+0
-62
scripts/archives_py27/fix_acti2com_cppm.py
scripts/archives_py27/fix_acti2com_cppm.py
+0
-143
scripts/archives_py27/fix_affiliation_keys_0960.py
scripts/archives_py27/fix_affiliation_keys_0960.py
+0
-54
scripts/archives_py27/fix_collaboration.py
scripts/archives_py27/fix_collaboration.py
+0
-106
scripts/archives_py27/fix_conference_dates.py
scripts/archives_py27/fix_conference_dates.py
+0
-140
scripts/archives_py27/fix_conference_url.py
scripts/archives_py27/fix_conference_url.py
+0
-81
scripts/archives_py27/fix_country_0808.py
scripts/archives_py27/fix_country_0808.py
+0
-81
scripts/archives_py27/fix_country_0900.py
scripts/archives_py27/fix_country_0900.py
+0
-53
scripts/archives_py27/fix_defense.py
scripts/archives_py27/fix_defense.py
+0
-110
scripts/archives_py27/fix_harvesters_collections.py
scripts/archives_py27/fix_harvesters_collections.py
+0
-64
scripts/archives_py27/fix_institute_id.py
scripts/archives_py27/fix_institute_id.py
+0
-60
scripts/archives_py27/fix_origin_0900.py
scripts/archives_py27/fix_origin_0900.py
+0
-75
scripts/archives_py27/fix_page_volume.py
scripts/archives_py27/fix_page_volume.py
+0
-111
scripts/archives_py27/fix_preference_0965.py
scripts/archives_py27/fix_preference_0965.py
+0
-55
scripts/archives_py27/fix_publications_url.py
scripts/archives_py27/fix_publications_url.py
+0
-163
scripts/archives_py27/fix_publisher_0900.py
scripts/archives_py27/fix_publisher_0900.py
+0
-145
scripts/archives_py27/fix_report_number.py
scripts/archives_py27/fix_report_number.py
+0
-76
scripts/archives_py27/fix_submitted.py
scripts/archives_py27/fix_submitted.py
+0
-124
scripts/archives_py27/fix_year.py
scripts/archives_py27/fix_year.py
+0
-91
No files found.
scripts/archives_py27/change_status_submitted.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
change-status-submitted
SYNOPSIS
Change the status to undefined when the submitted date is not valid
DESCRIPTION
In the version 0.8.8 the rule was changed for the submitted date.
Only the format YYYY-MM and YYYY-MM-DD are allowed.
Unfortunately some publications has a submitted date equal
to YYYY and a status OK. This script changes their status
to undefined. I
In such configuration all publications with a wrong submitted
dates can be found using the checkandValidate wizard.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script change-status-submitted
AUTHOR
R. Le Gac -- Dec 2014
"""
if
__name__
==
"__main__"
:
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
plugin_dbui
import
UNDEF_ID
# command line options
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the publications table
i
=
0
for
row
in
db
(
db
.
publications
.
submitted
.
len
()
<=
4
).
select
():
if
row
.
id_status
!=
UNDEF_ID
:
i
+=
1
print
"%s (%s) → status undefined"
%
(
row
.
id
,
row
.
submitted
)
db
(
db
.
publications
.
id
==
row
.
id
).
update
(
submitted
=
1
)
# commit
rep
=
raw_input
(
"Commit all changes (%i)? [y/N]:"
%
i
)
if
rep
==
'y'
:
print
"The database is modified."
db
.
commit
()
# close
sys
.
exit
(
0
)
scripts/archives_py27/fix_acti2com_cppm.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix_acti2com_cppm
SYNOPSIS
Copy ACTI to COM for the CPPM database.
DESCRIPTION
Up to the end of 2014 a talk to a conference (COM) is transformed
into a proceeding (ACTI) when the later is published.
In 2015, the policy changes, COM and ACTI are kept as separated
publications
This script implement the new policy for the publications registered
before 2015. It mainly copy the ACTI into COM for the year from
2009 up to 2014.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run -S test_limbra script fix_acti2com_cppm.py
> ./run -S limbra script fix_acti2com_cppm.py
AUTHOR
R. Le Gac -- Jan 2016
"""
import
re
from
harvest_tools
import
MONTHS
reg1
=
r
'(\d{1,2}) ([A-Z][a-z]{2}) (\d{4})'
reg2
=
r
'(\d{1,2})-\d{1,2} ([A-Z][a-z]{2}) (\d{4})'
reg3
=
r
'(\d{1,2}) ([A-Z][a-z]{2}) - \d{1,2} [A-Z][a-z]{2} (\d{4})'
REG_CONF_DATES
=
re
.
compile
(
r
'%s|%s|%s'
%
(
reg1
,
reg2
,
reg3
))
REG_ORIGIN
=
re
.
compile
(
"https?://([a-z\.]+)/record/(\d+)"
)
if
__name__
==
"__main__"
:
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
invenio_tools
import
load_record
,
OAI_URL
from
plugin_dbui
import
CALLBACK_ERRORS
,
get_id
# command line options
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
# get the ACTI / COM identifier
id_acti
=
get_id
(
db
.
categories
,
code
=
"ACTI"
)
id_com
=
get_id
(
db
.
categories
,
code
=
"COM"
)
# build the query
query
=
db
.
publications
.
id_categories
==
id_acti
query
&=
db
.
publications
.
year
>=
2009
query
&=
db
.
publications
.
year
<=
2014
# scan the publications table
for
row
in
db
(
query
).
select
():
data
=
row
.
as_dict
()
# skip if the the speaker is not from CPPM
if
data
[
"conference_speaker"
]
not
in
data
[
"authors_institute"
]:
continue
# remove publisher information
data
[
"id_publishers"
]
=
1
data
[
"pages"
]
=
""
data
[
"volume"
]
=
""
data
[
"publication_url"
]
=
""
data
[
"preprint"
]
=
""
# the year is the one of the conference
# submitted date is when the conference start
match
=
REG_CONF_DATES
.
match
(
data
[
"conference_dates"
])
if
match
:
offset
=
0
for
i
in
xrange
(
3
):
if
match
.
group
(
1
+
i
*
3
)
is
not
None
:
offset
=
i
*
3
break
data
[
"year"
]
=
match
.
group
(
offset
+
3
)
month
=
MONTHS
[
match
.
group
(
offset
+
2
)]
data
[
"submitted"
]
=
\
"%s-%02i-%02i"
%
(
match
.
group
(
offset
+
3
),
int
(
month
),
int
(
match
.
group
(
offset
+
1
)))
else
:
print
"No conferences dates"
,
data
[
"id"
]
# change the category and the status
data
[
"id_categories"
]
=
id_com
data
[
"id_status"
]
=
1
# change the origin
val
=
data
[
"origin"
]
if
val
:
origin
=
val
.
split
(
','
)[
0
].
strip
()
match
=
REG_ORIGIN
.
match
(
origin
)
host
,
rec_id
=
match
.
group
(
1
),
match
.
group
(
2
)
proceeding
=
load_record
(
host
,
rec_id
)
talk_id
=
proceeding
.
reference_conference_talk
()
data
[
"origin"
]
=
(
OAI_URL
%
(
host
,
talk_id
)
if
talk_id
else
""
)
# insert the new record in the database
id_rec
=
data
[
"id"
],
del
data
[
"id"
]
id_new
=
db
.
publications
.
insert
(
**
data
)
print
"Copy"
,
id_rec
,
"→"
,
if
id_new
:
print
id_new
elif
CALLBACK_ERRORS
in
db
.
publications
:
print
" "
.
join
(
db
.
publications
.
_callback_errors
)
else
:
print
"???"
# commit change
rep
=
raw_input
(
"Commit change in the database? [y/N]:"
)
if
rep
==
'y'
:
print
"The database is modified."
db
.
commit
()
# close
sys
.
exit
(
0
)
scripts/archives_py27/fix_affiliation_keys_0960.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix_affiliation_keys_0960 -- create the affiliation_keys table
SYNOPSIS
fix_affiliation_keys_0960
DESCRIPTION
A new mechanism was introduced in version 0.9.6.0 in order to
deal with affiliation. It relies on a new database table
affiliation_keys. Preferences inspirehep_institute_id and
add_rules_reg_institute are obsolete. Remove them.
The aim of this script, is to update existing database.
OPTIONS
EXAMPLE
> cd ...limbra/scripts
> ./run script fix_affiliation_keys_0960.py
> ./run -S limbra_cppm script fix_affiliation_keys_0960.py
> ./run loop fix_affiliation_keys_0960
AUTHOR
R. Le Gac -- Sep 2016
"""
if
__name__
==
"__main__"
:
import
sys
import
os
# create the database table affiliation_keys from SQL statement
# the table is not create if it exist
print
"
\n\t
Create the table affiliation_keys"
fn
=
os
.
path
.
join
(
os
.
getcwd
(),
"applications"
,
request
.
application
,
"scripts"
,
"affiliation_keys_0960.sql"
)
with
open
(
fn
)
as
fi
:
db
.
executesql
(
fi
.
read
())
# remove preferences inspirehep_institute_id, add_rules_reg_institute
for
pref
in
(
"inspirehep_institute_id"
,
"add_rules_reg_institute"
):
print
"
\t
Remove preference"
,
pref
db
(
db
.
preferences
.
property
==
pref
).
delete
()
db
.
commit
()
# exit
print
"
\n\t
End of script
\n
"
sys
.
exit
(
0
)
\ No newline at end of file
scripts/archives_py27/fix_collaboration.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix-collaboration
SYNOPSIS
fix the publications field collaboration.
DESCRIPTION
Before the limbra version 0.8.8, no rules have been
applied on the collaboration(s) signing the publications.
As a consequence, the database contains a mixture of syntax.
This script standardize the naming convention.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-collaboration
AUTHOR
R. Le Gac -- Dec 2014
"""
def
destroy_collaboration
(
row
):
""" delete the collaboration entry when no publications are
attached to it.
"""
query
=
db
.
publications
.
id_collaborations
==
row
.
id
publications
=
db
(
query
).
select
()
if
len
(
publications
)
==
0
:
print
" - No publications associated to '%s' → delete it"
%
row
.
collaboration
db
(
db
.
collaborations
.
id
==
row
.
id
).
delete
()
db
.
commit
()
return
True
return
False
if
__name__
==
"__main__"
:
import
re
import
regex
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
invenio_tools
import
InvenioStore
,
Marc12
REG_COLLABORATION
=
re
.
compile
(
regex
.
REG_COLLABORATION
)
# command line options
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the collaborations table
for
row
in
db
(
db
.
collaborations
.
id
>
1
).
select
():
m
=
REG_COLLABORATION
.
match
(
row
.
collaboration
)
if
m
:
destroy_collaboration
(
row
)
continue
# check publications attach to it
if
destroy_collaboration
(
row
):
continue
# replace by an existing value
msg
=
"Replace '%s' by an existing collaboration id [skip CR]: "
%
row
.
collaboration
id_collaboration
=
raw_input
(
msg
)
if
id_collaboration
:
new
=
db
.
collaborations
[
id_collaboration
]
print
"Replace '%s' by '%s': "
%
(
row
.
collaboration
,
new
.
collaboration
)
for
el
in
db
(
db
.
publications
.
id_collaborations
==
row
.
id
).
select
():
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
new
.
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
new
.
id
)
db
.
commit
()
destroy_collaboration
(
row
)
continue
# ask for replacement
rep
=
raw_input
(
"Replace '%s' by [skip CR]: "
%
row
.
collaboration
)
if
rep
:
id
=
db
.
collaborations
.
insert
(
collaboration
=
rep
)
if
not
id
:
continue
for
el
in
db
(
db
.
publications
.
id_collaborations
==
row
.
id
).
select
():
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
id
)
db
.
commit
()
destroy_collaboration
(
row
)
continue
# close
sys
.
exit
(
0
)
scripts/archives_py27/fix_conference_dates.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix-conference-dates
SYNOPSIS
fix the publications field conference_dates
DESCRIPTION
The syntax for the conference dates is a mixture of English or French.
In addition from time to time the month is encoded with 3 letters.
The latter can start with an upper case or not.
This script standardize the conference dates using English typographic
(see http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Months)
Good value will be:
3-7 Oct 2013
30 Nov - 4 Dec 2014
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-conference-dates
AUTHOR
R. Le Gac -- Nov 2014
"""
def
fix_month
(
value
):
value
=
value
.
lower
()
if
value
.
startswith
(
"jan"
):
value
=
'Jan'
elif
value
.
startswith
(
"f"
):
value
=
'Feb'
elif
value
.
startswith
(
"mar"
):
value
=
'Mar'
elif
value
.
startswith
(
"apr"
)
or
value
.
startswith
(
"avr"
):
value
=
'Apr'
elif
value
.
startswith
(
"may"
)
or
value
.
startswith
(
"mai"
):
value
=
'May'
elif
value
.
startswith
(
"jun"
)
or
value
.
startswith
(
"juin"
):
value
=
'Jun'
elif
value
.
startswith
(
"jul"
)
or
value
.
startswith
(
"juil"
):
value
=
'Jul'
elif
value
.
startswith
(
"au"
)
or
value
.
startswith
(
"ao"
):
value
=
'Aug'
elif
value
.
startswith
(
"sep"
):
value
=
'Sep'
elif
value
.
startswith
(
"oct"
):
value
=
'Oct'
elif
value
.
startswith
(
"nov"
):
value
=
'Nov'
elif
value
.
startswith
(
"d"
):
value
=
'Dec'
return
value
if
__name__
==
"__main__"
:
import
re
import
regex
import
sys
from
argparse
import
ArgumentParser
,
FileType
REG1
=
re
.
compile
(
"(\d+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG2
=
re
.
compile
(
"(\d+) *([A-Za-zéû\.]+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG_CONF_DATES
=
re
.
compile
(
regex
.
REG_CONF_DATES
)
# command line options
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the conference_dates field
for
row
in
db
(
db
.
publications
.
conference_dates
.
len
()
>
0
).
select
():
if
REG_CONF_DATES
.
match
(
row
.
conference_dates
):
continue
m1
=
REG1
.
match
(
row
.
conference_dates
.
strip
())
m2
=
REG2
.
match
(
row
.
conference_dates
.
strip
())
# month equal to jui is ambiguous (juin or juillet ?)
# to be solve by hand
if
m1
and
m1
.
group
(
3
).
lower
()
==
'jui'
or
\
m2
and
(
m2
.
group
(
2
).
lower
()
==
'jui'
or
m2
.
group
(
4
).
lower
()
==
'jui'
):
m1
,
m2
=
False
,
False
# 4-5 Oct 2014
if
m1
:
li
=
list
(
m1
.
groups
())
li
[
2
]
=
fix_month
(
li
[
2
])
val
=
"%s-%s %s %s"
%
tuple
(
li
)
# 30 Oct - 2 Nov 2014
elif
m2
:
li
=
list
(
m2
.
groups
())
li
[
1
]
=
fix_month
(
li
[
1
])
li
[
3
]
=
fix_month
(
li
[
3
])
val
=
"%s %s - %s %s %s"
%
tuple
(
li
)
# ???
else
:
print
print
"
\t
"
,
row
.
id
print
"
\t
"
,
row
.
title
print
"
\t
"
,
row
.
conference_title
print
"
\t
"
,
row
.
year
,
row
.
submitted
,
val
=
raw_input
(
"
\n\t
Replace %s by [skip CR]: "
%
row
.
conference_dates
)
if
val
and
row
.
conference_dates
!=
val
:
print
" - %s, %s → %s"
%
(
row
.
id
,
row
.
conference_dates
,
val
)
db
(
db
.
publications
.
id
==
row
.
id
).
update
(
conference_dates
=
val
)
db
.
commit
()
# close
sys
.
exit
(
0
)
scripts/archives_py27/fix_conference_url.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix-conference-url
SYNOPSIS
fix the publications field conference_url
DESCRIPTION
Check the field conference_url in the invenio store and update it.
From time to time, it has been forgotten.
OPTIONS
-h, --help
Display the help and exit.
EXAMPLE
> cd ...limbra/scripts
> ./run script fix-conference-url
AUTHOR
R. Le Gac -- Dec 2014
"""
if
__name__
==
"__main__"
:
import
re
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
harvest_tools
import
CheckAndFix
,
CheckException
from
invenio_tools
import
InvenioStore
,
Marc12
REG_ORIGIN
=
re
.
compile
(
"http://([a-z\.]+)/record/(\d+)"
)
# command line options
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# service
check
=
CheckAndFix
()
decode
=
Marc12
()
# scan the publications table
query
=
db
.
publications
.
origin
.
len
()
>
0
query
&=
db
.
publications
.
conference_url
.
len
()
==
0
query
&=
(
db
.
publications
.
id_categories
==
7
)
|
(
db
.
publications
.
id_categories
==
9
)
for
row
in
db
(
query
).
select
():
m
=
REG_ORIGIN
.
match
(
row
.
origin
)
if
not
m
:
continue
host
,
store_id
=
m
.
groups
()
# retrieve the full record from the store
store
=
InvenioStore
(
host
)
xml
=
store
.
get_record
(
store_id
)
record
=
decode
(
xml
)[
0
]
try
:
check
.
conference
(
record
)
except
CheckException
,
e
:
pass
val
=
record
.
conference_url
()
if
val
:
print
" - %s, conference url: %s"
%
(
row
.
id
,
val
)
db
(
db
.
publications
.
id
==
row
.
id
).
update
(
conference_url
=
val
)
db
.
commit
()
# close
sys
.
exit
(
0
)
scripts/archives_py27/fix_country_0808.py
deleted
100644 → 0
View file @
05509c13
# -*- coding: utf-8 -*-
""" NAME
fix-country-0808 -- fix invalid country names
SYNOPSIS
fix-country [options]
DESCRIPTION
Before the limbra 0.8.8, the name of the country
for a conference is defined by the user or by harvesters.
As the result, the database contains a mixture of French and
English name for country. In addition, some value are wrong.