Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
ca18deac
Commit
ca18deac
authored
Mar 18, 2016
by
LE GAC Renaud
Browse files
Change the name in the script documentation.
parent
0334261e
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
273 additions
and
273 deletions
+273
-273
scripts/change_status_submitted.py
scripts/change_status_submitted.py
+19
-19
scripts/export_to_csv.py
scripts/export_to_csv.py
+19
-19
scripts/fix_acti2com_cppm.py
scripts/fix_acti2com_cppm.py
+3
-3
scripts/fix_collaboration.py
scripts/fix_collaboration.py
+29
-29
scripts/fix_conference_dates.py
scripts/fix_conference_dates.py
+30
-30
scripts/fix_conference_url.py
scripts/fix_conference_url.py
+2
-2
scripts/fix_country_0808.py
scripts/fix_country_0808.py
+3
-3
scripts/fix_country_0900.py
scripts/fix_country_0900.py
+2
-2
scripts/fix_defense.py
scripts/fix_defense.py
+26
-26
scripts/fix_institute_id.py
scripts/fix_institute_id.py
+2
-2
scripts/fix_origin_0900.py
scripts/fix_origin_0900.py
+3
-3
scripts/fix_page_volume.py
scripts/fix_page_volume.py
+2
-2
scripts/fix_publications_url.py
scripts/fix_publications_url.py
+3
-3
scripts/fix_publisher_0900.py
scripts/fix_publisher_0900.py
+3
-3
scripts/fix_report_number.py
scripts/fix_report_number.py
+2
-2
scripts/fix_submitted.py
scripts/fix_submitted.py
+30
-30
scripts/fix_year.py
scripts/fix_year.py
+2
-2
scripts/import_from_csv.py
scripts/import_from_csv.py
+17
-17
scripts/run
scripts/run
+3
-3
scripts/statistics.py
scripts/statistics.py
+73
-73
No files found.
scripts/change_status_submitted.py
View file @
ca18deac
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
""" NAME
""" NAME
change-status-submitted
change-status-submitted
SYNOPSIS
SYNOPSIS
Change the status to undefined when the submitted date is not valid
Change the status to undefined when the submitted date is not valid
DESCRIPTION
DESCRIPTION
In the version 0.8.8 the rule was changed for the submitted date.
In the version 0.8.8 the rule was changed for the submitted date.
Only the format YYYY-MM and YYYY-MM-DD are allowed.
Only the format YYYY-MM and YYYY-MM-DD are allowed.
Unfortunately some publications has a submitted date equal
Unfortunately some publications has a submitted date equal
to YYYY and a status OK. This script changes their status
to YYYY and a status OK. This script changes their status
to undefined. I
to undefined. I
In such configuration all publications with a wrong submitted
In such configuration all publications with a wrong submitted
dates can be found using the checkandValidate wizard.
dates can be found using the checkandValidate wizard.
OPTIONS
OPTIONS
-h, --help
-h, --help
Display the help and exit.
Display the help and exit.
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./
track_publications
change-status-submitted
> ./
run script
change-status-submitted
AUTHOR
AUTHOR
R. Le Gac -- Dec 2014
R. Le Gac -- Dec 2014
"""
"""
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
sys
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
argparse
import
ArgumentParser
,
FileType
from
plugin_dbui
import
UNDEF_ID
from
plugin_dbui
import
UNDEF_ID
# command line options
# command line options
parser
=
ArgumentParser
()
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the publications table
# scan the publications table
i
=
0
i
=
0
for
row
in
db
(
db
.
publications
.
submitted
.
len
()
<=
4
).
select
():
for
row
in
db
(
db
.
publications
.
submitted
.
len
()
<=
4
).
select
():
...
...
scripts/export_to_csv.py
View file @
ca18deac
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
""" NAME
""" NAME
export-to-csv -- export table content as CSV file
export-to-csv -- export table content as CSV file
SYNOPSIS
SYNOPSIS
export-to-csv [options] table file
export-to-csv [options] table file
DESCRIPTION
DESCRIPTION
Write the full content of the table to a CSV file.
Write the full content of the table to a CSV file.
OPTIONS
OPTIONS
-h, --help
-h, --help
Display the help and exit.
Display the help and exit.
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./
track_publications
export-to-csv.py publications ~/mywap/
track_publications
/scripts/publications.csv
> ./
run script
export-to-csv.py publications ~/mywap/
limbra
/scripts/publications.csv
AUTHOR
AUTHOR
R. Le Gac
R. Le Gac
"""
"""
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
csv
import
csv
import
os
import
os
import
sys
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
argparse
import
ArgumentParser
,
FileType
# command line options
# command line options
...
@@ -39,20 +39,20 @@ if __name__ == "__main__":
...
@@ -39,20 +39,20 @@ if __name__ == "__main__":
parser
.
add_argument
(
'file'
,
parser
.
add_argument
(
'file'
,
type
=
FileType
(
'w'
),
type
=
FileType
(
'w'
),
help
=
'the absolute path of the CSV file.'
)
help
=
'the absolute path of the CSV file.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# the CSV writer
# the CSV writer
writer
=
csv
.
DictWriter
(
args
.
file
,
writer
=
csv
.
DictWriter
(
args
.
file
,
db
[
args
.
table
].
fields
,
db
[
args
.
table
].
fields
,
quoting
=
csv
.
QUOTE_MINIMAL
)
quoting
=
csv
.
QUOTE_MINIMAL
)
# write the header
# write the header
headers
=
{}
headers
=
{}
for
el
in
db
[
args
.
table
].
fields
:
for
el
in
db
[
args
.
table
].
fields
:
headers
[
el
]
=
el
headers
[
el
]
=
el
writer
.
writerow
(
headers
)
writer
.
writerow
(
headers
)
# write table content
# write table content
for
row
in
db
(
db
[
args
.
table
]).
select
():
for
row
in
db
(
db
[
args
.
table
]).
select
():
writer
.
writerow
(
row
.
as_dict
())
writer
.
writerow
(
row
.
as_dict
())
...
...
scripts/fix_acti2com_cppm.py
View file @
ca18deac
...
@@ -23,9 +23,9 @@
...
@@ -23,9 +23,9 @@
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./run -S test_
publications
script fix_acti2com_cppm.py
> ./run -S test_
limbra
script fix_acti2com_cppm.py
> ./run -S
track_publications
script fix_acti2com_cppm.py
> ./run -S
limbra
script fix_acti2com_cppm.py
AUTHOR
AUTHOR
...
...
scripts/fix_collaboration.py
View file @
ca18deac
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
""" NAME
""" NAME
fix-collaboration
fix-collaboration
SYNOPSIS
SYNOPSIS
fix the publications field collaboration.
fix the publications field collaboration.
DESCRIPTION
DESCRIPTION
Before the
track_publications
version 0.8.8, no rules have been
Before the
limbra
version 0.8.8, no rules have been
applied on the collaboration(s) signing the publications.
applied on the collaboration(s) signing the publications.
As a consequence, the database contains a mixture of syntax.
As a consequence, the database contains a mixture of syntax.
This script standardize the naming convention.
This script standardize the naming convention.
OPTIONS
OPTIONS
-h, --help
-h, --help
Display the help and exit.
Display the help and exit.
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./
track_publications
fix-collaboration
> ./
run script
fix-collaboration
AUTHOR
AUTHOR
R. Le Gac -- Dec 2014
R. Le Gac -- Dec 2014
"""
"""
def
destroy_collaboration
(
row
):
def
destroy_collaboration
(
row
):
""" delete the collaboration entry when no publications are
""" delete the collaboration entry when no publications are
attached to it.
attached to it.
"""
"""
query
=
db
.
publications
.
id_collaborations
==
row
.
id
query
=
db
.
publications
.
id_collaborations
==
row
.
id
publications
=
db
(
query
).
select
()
publications
=
db
(
query
).
select
()
...
@@ -38,30 +38,30 @@ def destroy_collaboration(row):
...
@@ -38,30 +38,30 @@ def destroy_collaboration(row):
db
(
db
.
collaborations
.
id
==
row
.
id
).
delete
()
db
(
db
.
collaborations
.
id
==
row
.
id
).
delete
()
db
.
commit
()
db
.
commit
()
return
True
return
True
return
False
return
False
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
re
import
re
import
regex
import
regex
import
sys
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
argparse
import
ArgumentParser
,
FileType
from
invenio_tools
import
InvenioStore
,
Marc12
from
invenio_tools
import
InvenioStore
,
Marc12
REG_COLLABORATION
=
re
.
compile
(
regex
.
REG_COLLABORATION
)
REG_COLLABORATION
=
re
.
compile
(
regex
.
REG_COLLABORATION
)
# command line options
# command line options
parser
=
ArgumentParser
()
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the collaborations table
# scan the collaborations table
for
row
in
db
(
db
.
collaborations
.
id
>
1
).
select
():
for
row
in
db
(
db
.
collaborations
.
id
>
1
).
select
():
m
=
REG_COLLABORATION
.
match
(
row
.
collaboration
)
m
=
REG_COLLABORATION
.
match
(
row
.
collaboration
)
if
m
:
if
m
:
destroy_collaboration
(
row
)
destroy_collaboration
(
row
)
...
@@ -77,20 +77,20 @@ if __name__ == "__main__":
...
@@ -77,20 +77,20 @@ if __name__ == "__main__":
if
id_collaboration
:
if
id_collaboration
:
new
=
db
.
collaborations
[
id_collaboration
]
new
=
db
.
collaborations
[
id_collaboration
]
print
"Replace '%s' by '%s': "
%
(
row
.
collaboration
,
new
.
collaboration
)
print
"Replace '%s' by '%s': "
%
(
row
.
collaboration
,
new
.
collaboration
)
for
el
in
db
(
db
.
publications
.
id_collaborations
==
row
.
id
).
select
():
for
el
in
db
(
db
.
publications
.
id_collaborations
==
row
.
id
).
select
():
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
new
.
id
)
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
new
.
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
new
.
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
new
.
id
)
db
.
commit
()
db
.
commit
()
destroy_collaboration
(
row
)
destroy_collaboration
(
row
)
continue
continue
# ask for replacement
# ask for replacement
rep
=
raw_input
(
"Replace '%s' by [skip CR]: "
%
row
.
collaboration
)
rep
=
raw_input
(
"Replace '%s' by [skip CR]: "
%
row
.
collaboration
)
if
rep
:
if
rep
:
id
=
db
.
collaborations
.
insert
(
collaboration
=
rep
)
id
=
db
.
collaborations
.
insert
(
collaboration
=
rep
)
if
not
id
:
if
not
id
:
continue
continue
...
@@ -98,9 +98,9 @@ if __name__ == "__main__":
...
@@ -98,9 +98,9 @@ if __name__ == "__main__":
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
id
)
print
" - %s, %s → %s"
%
(
el
.
id
,
el
.
id_collaborations
,
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
id
)
db
(
db
.
publications
.
id
==
el
.
id
).
update
(
id_collaborations
=
id
)
db
.
commit
()
db
.
commit
()
destroy_collaboration
(
row
)
destroy_collaboration
(
row
)
continue
continue
# close
# close
sys
.
exit
(
0
)
sys
.
exit
(
0
)
scripts/fix_conference_dates.py
View file @
ca18deac
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
""" NAME
""" NAME
fix-conference-dates
fix-conference-dates
SYNOPSIS
SYNOPSIS
fix the publications field conference_dates
fix the publications field conference_dates
DESCRIPTION
DESCRIPTION
The syntax for the conference dates is a mixture of English or French.
The syntax for the conference dates is a mixture of English or French.
In addition from time to time the month is encoded with 3 letters.
In addition from time to time the month is encoded with 3 letters.
The latter can start with an upper case or not.
The latter can start with an upper case or not.
This script standardize the conference dates using English typographic
This script standardize the conference dates using English typographic
(see http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Months)
(see http://en.wikipedia.org/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Months)
Good value will be:
Good value will be:
3-7 Oct 2013
3-7 Oct 2013
30 Nov - 4 Dec 2014
30 Nov - 4 Dec 2014
OPTIONS
OPTIONS
-h, --help
-h, --help
Display the help and exit.
Display the help and exit.
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./
track_publications
fix-conference-dates
> ./
run script
fix-conference-dates
AUTHOR
AUTHOR
R. Le Gac -- Nov 2014
R. Le Gac -- Nov 2014
"""
"""
def
fix_month
(
value
):
def
fix_month
(
value
):
value
=
value
.
lower
()
value
=
value
.
lower
()
if
value
.
startswith
(
"jan"
):
if
value
.
startswith
(
"jan"
):
value
=
'Jan'
value
=
'Jan'
elif
value
.
startswith
(
"f"
):
elif
value
.
startswith
(
"f"
):
value
=
'Feb'
value
=
'Feb'
elif
value
.
startswith
(
"mar"
):
elif
value
.
startswith
(
"mar"
):
value
=
'Mar'
value
=
'Mar'
...
@@ -75,31 +75,31 @@ def fix_month(value):
...
@@ -75,31 +75,31 @@ def fix_month(value):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
import
re
import
re
import
regex
import
regex
import
sys
import
sys
from
argparse
import
ArgumentParser
,
FileType
from
argparse
import
ArgumentParser
,
FileType
REG1
=
re
.
compile
(
"(\d+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG1
=
re
.
compile
(
"(\d+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG2
=
re
.
compile
(
"(\d+) *([A-Za-zéû\.]+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG2
=
re
.
compile
(
"(\d+) *([A-Za-zéû\.]+) *-? *(\d+) *([A-Za-zéû\.]+) *(\d{4})"
)
REG_CONF_DATES
=
re
.
compile
(
regex
.
REG_CONF_DATES
)
REG_CONF_DATES
=
re
.
compile
(
regex
.
REG_CONF_DATES
)
# command line options
# command line options
parser
=
ArgumentParser
()
parser
=
ArgumentParser
()
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
# unlock the publications update when the status is OK
# unlock the publications update when the status is OK
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
db
.
publications
.
_before_update
.
remove
(
INHIBIT_PUBLICATION_UPDATE_ON_OK
)
# scan the conference_dates field
# scan the conference_dates field
for
row
in
db
(
db
.
publications
.
conference_dates
.
len
()
>
0
).
select
():
for
row
in
db
(
db
.
publications
.
conference_dates
.
len
()
>
0
).
select
():
if
REG_CONF_DATES
.
match
(
row
.
conference_dates
):
if
REG_CONF_DATES
.
match
(
row
.
conference_dates
):
continue
continue
m1
=
REG1
.
match
(
row
.
conference_dates
.
strip
())
m1
=
REG1
.
match
(
row
.
conference_dates
.
strip
())
m2
=
REG2
.
match
(
row
.
conference_dates
.
strip
())
m2
=
REG2
.
match
(
row
.
conference_dates
.
strip
())
...
@@ -108,33 +108,33 @@ if __name__ == "__main__":
...
@@ -108,33 +108,33 @@ if __name__ == "__main__":
if
m1
and
m1
.
group
(
3
).
lower
()
==
'jui'
or
\
if
m1
and
m1
.
group
(
3
).
lower
()
==
'jui'
or
\
m2
and
(
m2
.
group
(
2
).
lower
()
==
'jui'
or
m2
.
group
(
4
).
lower
()
==
'jui'
):
m2
and
(
m2
.
group
(
2
).
lower
()
==
'jui'
or
m2
.
group
(
4
).
lower
()
==
'jui'
):
m1
,
m2
=
False
,
False
m1
,
m2
=
False
,
False
# 4-5 Oct 2014
# 4-5 Oct 2014
if
m1
:
if
m1
:
li
=
list
(
m1
.
groups
())
li
=
list
(
m1
.
groups
())
li
[
2
]
=
fix_month
(
li
[
2
])
li
[
2
]
=
fix_month
(
li
[
2
])
val
=
"%s-%s %s %s"
%
tuple
(
li
)
val
=
"%s-%s %s %s"
%
tuple
(
li
)
# 30 Oct - 2 Nov 2014
# 30 Oct - 2 Nov 2014
elif
m2
:
elif
m2
:
li
=
list
(
m2
.
groups
())
li
=
list
(
m2
.
groups
())
li
[
1
]
=
fix_month
(
li
[
1
])
li
[
1
]
=
fix_month
(
li
[
1
])
li
[
3
]
=
fix_month
(
li
[
3
])
li
[
3
]
=
fix_month
(
li
[
3
])
val
=
"%s %s - %s %s %s"
%
tuple
(
li
)
val
=
"%s %s - %s %s %s"
%
tuple
(
li
)
# ???
# ???
else
:
else
:
print
print
print
"
\t
"
,
row
.
id
print
"
\t
"
,
row
.
id
print
"
\t
"
,
row
.
title
print
"
\t
"
,
row
.
title
print
"
\t
"
,
row
.
conference_title
print
"
\t
"
,
row
.
conference_title
print
"
\t
"
,
row
.
year
,
row
.
submitted
,
print
"
\t
"
,
row
.
year
,
row
.
submitted
,
val
=
raw_input
(
"
\n\t
Replace %s by [skip CR]: "
%
row
.
conference_dates
)
val
=
raw_input
(
"
\n\t
Replace %s by [skip CR]: "
%
row
.
conference_dates
)
if
val
and
row
.
conference_dates
!=
val
:
if
val
and
row
.
conference_dates
!=
val
:
print
" - %s, %s → %s"
%
(
row
.
id
,
row
.
conference_dates
,
val
)
print
" - %s, %s → %s"
%
(
row
.
id
,
row
.
conference_dates
,
val
)
db
(
db
.
publications
.
id
==
row
.
id
).
update
(
conference_dates
=
val
)
db
(
db
.
publications
.
id
==
row
.
id
).
update
(
conference_dates
=
val
)
db
.
commit
()
db
.
commit
()
# close
# close
sys
.
exit
(
0
)
sys
.
exit
(
0
)
scripts/fix_conference_url.py
View file @
ca18deac
...
@@ -16,8 +16,8 @@
...
@@ -16,8 +16,8 @@
EXAMPLE
EXAMPLE
> cd ...
track_publications
/scripts
> cd ...
limbra
/scripts
> ./
track_publications
fix-conference-url
> ./
run script
fix-conference-url
AUTHOR
AUTHOR
R. Le Gac -- Dec 2014
R. Le Gac -- Dec 2014
...
...
scripts/fix_country_0808.py
View file @
ca18deac
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
fix-country [options]
fix-country [options]
DESCRIPTION
DESCRIPTION
Before the
track_publications
0.8.8, the name of the country
Before the
limbra
0.8.8, the name of the country
for a conference is defined by the user or by harvesters.
for a conference is defined by the user or by harvesters.
As the result, the database contains a mixture of French and
As the result, the database contains a mixture of French and
English name for country. In addition, some value are wrong.
English name for country. In addition, some value are wrong.
...
@@ -26,9 +26,9 @@
...
@@ -26,9 +26,9 @@
EXAMPLE
EXAMPLE
> cd ...
/track_publications
/scripts
> cd ...
limbra
/scripts
> ./run script fix-country-0808.py
> ./run script fix-country-0808.py