Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
21971181
Commit
21971181
authored
Jan 15, 2021
by
LE GAC Renaud
Browse files
Migrate check and fix method to all automatons
parent
75ff496f
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
213 additions
and
73 deletions
+213
-73
modules/harvest_tools/articles.py
modules/harvest_tools/articles.py
+3
-5
modules/harvest_tools/base.py
modules/harvest_tools/base.py
+3
-0
modules/harvest_tools/notes.py
modules/harvest_tools/notes.py
+21
-9
modules/harvest_tools/preprints.py
modules/harvest_tools/preprints.py
+6
-6
modules/harvest_tools/proceedings.py
modules/harvest_tools/proceedings.py
+14
-5
modules/harvest_tools/reports.py
modules/harvest_tools/reports.py
+17
-10
modules/harvest_tools/talks.py
modules/harvest_tools/talks.py
+30
-14
modules/harvest_tools/thesis.py
modules/harvest_tools/thesis.py
+18
-12
modules/store_tools/base.py
modules/store_tools/base.py
+5
-0
modules/store_tools/recordcdsthesis.py
modules/store_tools/recordcdsthesis.py
+37
-1
modules/store_tools/recordhepthesis.py
modules/store_tools/recordhepthesis.py
+36
-0
tests/basis/test_08_RecordCdsThesis.py
tests/basis/test_08_RecordCdsThesis.py
+17
-11
tests/basis/test_09_RecordHepThesis.py
tests/basis/test_09_RecordHepThesis.py
+6
-0
No files found.
modules/harvest_tools/articles.py
View file @
21971181
...
...
@@ -6,7 +6,9 @@ from .base import (learn_my_authors,
MSG_CRASH
,
MSG_FIX_ORIGIN
,
MSG_IN_DB
,
MSG_LOAD
)
MSG_LOAD
,
T4
,
T6
)
from
plugin_dbui
import
get_id
,
UNDEF_ID
from
store_tools
import
CheckException
...
...
@@ -14,9 +16,6 @@ MSG_NO_EDITOR = "Reject article is not published"
MSG_NOT_ARTICLE
=
"Reject publication is not and article"
MSG_TRANSFORM_PREPRINT
=
"Transform the preprint into an article"
T4
=
" "
*
4
T6
=
" "
*
6
class
Articles
(
Automaton
):
"""Automaton for articles.
...
...
@@ -61,7 +60,6 @@ class Articles(Automaton):
return
False
try
:
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
...
...
modules/harvest_tools/base.py
View file @
21971181
...
...
@@ -8,6 +8,9 @@ MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB
=
"Already in the database"
MSG_LOAD
=
"Load in the database"
T4
=
" "
*
4
T6
=
" "
*
6
def
family_name_fr
(
full_name
):
"""Extract the family name when the full name is encoded as ``J. Doe``.
...
...
modules/harvest_tools/notes.py
View file @
21971181
...
...
@@ -2,11 +2,11 @@
"""
from
.automaton
import
Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
UNDEF_ID
T4
=
" "
*
4
MSG_NOT_NOTE
=
"Reject publication is not a note"
class
Notes
(
Automaton
):
...
...
@@ -17,6 +17,12 @@ class Notes(Automaton):
def
check_record
(
self
,
record
):
"""Check the content of the note in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
Args:
record (RecordPubli):
record describing a note
...
...
@@ -27,16 +33,22 @@ class Notes(Automaton):
corrected.
"""
if
not
Automaton
.
check_record
(
self
,
record
):
return
False
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (note)"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check record (note)"
)
if
record
.
subtype
()
==
"note"
:
self
.
logs
[
-
1
].
reject
(
MSG_NOT_NOTE
,
record
)
return
False
try
:
self
.
check
.
submitted
(
record
)
self
.
check
.
format_authors
(
record
,
fmt
=
"F. Last"
)
self
.
check
.
get_my_authors
(
record
,
sort
=
True
)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record
.
check_and_fix
(
self
.
rex_institute
,
fmt_author
=
"F. Last"
,
sep_author
=
", "
,
sort_author
=
True
)
except
CheckException
as
e
:
self
.
logs
[
-
1
].
reject
(
e
,
record
=
record
)
...
...
modules/harvest_tools/preprints.py
View file @
21971181
...
...
@@ -5,12 +5,12 @@ from .automaton import Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
from
.recordcdsconfpaper
import
RecordCdsConfPaper
from
.recordhepconfpaper
import
RecordHepConfPaper
from
.recordcdsthesis
import
RecordCdsThesis
from
.recordhepthesis
import
RecordHepThesis
from
store_tools
import
(
RecordCdsConfPaper
,
RecordHepConfPaper
,
RecordCdsThesis
,
RecordHepThesis
)
MSG_NOT_
ARTICLE
=
"Reject publication is not a preprint"
MSG_NOT_
PREPRINT
=
"Reject publication is not a preprint"
MSG_PREPRINT_IS_PAPER
=
"Reject preprint is a published paper"
MSG_PREPRINT_IS_CONFERENCE
=
"Reject preprint is a conference"
MSG_PREPRINT_IS_THESIS
=
"Reject preprint is a thesis"
...
...
@@ -44,7 +44,7 @@ class Preprints(Automaton):
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (preprint)"
)
if
record
.
subtype
()
==
"preprint"
:
self
.
logs
[
-
1
].
reject
(
MSG_NOT_
ARTICLE
,
record
)
self
.
logs
[
-
1
].
reject
(
MSG_NOT_
PREPRINT
,
record
)
return
False
if
record
.
is_published
():
...
...
modules/harvest_tools/proceedings.py
View file @
21971181
...
...
@@ -2,14 +2,12 @@
"""
from
.automaton
import
Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
MSG_NOT_PROCEEDING
=
"Reject publication is not a proceeding"
T4
=
" "
*
4
class
Proceedings
(
Automaton
):
"""Automaton for conference proceedings.
...
...
@@ -19,6 +17,18 @@ class Proceedings(Automaton):
def
check_record
(
self
,
record
):
"""Check the content of the proceeding in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
* check conference country
* check conference dates
* format editor according to my criteria
* resolve published synonym
* check reference paper
Args:
record (RecordConf):
record describing a proceeding.
...
...
@@ -29,14 +39,13 @@ class Proceedings(Automaton):
corrected.
"""
self
.
logger
.
debug
(
f
"
{
T4
}
check nd fix record (proceeding)"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check
a
nd fix record (proceeding)"
)
if
record
.
subtype
()
==
"proceeding"
:
self
.
logs
[
-
1
].
reject
(
MSG_NOT_PROCEEDING
,
record
)
return
False
try
:
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
...
...
modules/harvest_tools/reports.py
View file @
21971181
...
...
@@ -2,12 +2,11 @@
"""
from
.automaton
import
Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
,
UNKNOWN
MSG_REPORT_NO_NUMBER
=
"Reject no report number"
T4
=
" "
*
4
class
Reports
(
Automaton
):
...
...
@@ -18,6 +17,12 @@ class Reports(Automaton):
def
check_record
(
self
,
record
):
"""Check the content of the report in order to fix non conformities.
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
Args:
record (RecordPubli):
record describing a report.
...
...
@@ -28,20 +33,22 @@ class Reports(Automaton):
corrected.
"""
if
not
Automaton
.
check_record
(
self
,
record
):
return
False
self
.
logger
.
debug
(
f
"
{
T4
}
check record (report)"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (report)"
)
if
not
record
.
report_number
():
self
.
logs
[
-
1
].
reject
(
MSG_REPORT_NO_NUMBER
,
record
=
record
)
return
False
try
:
self
.
check
.
submitted
(
record
)
self
.
check
.
format_authors
(
record
,
fmt
=
"F. Last"
)
self
.
check
.
get_my_authors
(
record
,
sort
=
True
)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record
.
check_and_fix
(
self
.
rex_institute
,
fmt_author
=
"F. Last"
,
sep_author
=
", "
,
sort_author
=
True
)
except
CheckException
as
e
:
self
.
logs
[
-
1
].
reject
(
e
,
record
=
record
)
...
...
modules/harvest_tools/talks.py
View file @
21971181
...
...
@@ -2,21 +2,32 @@
"""
from
.automaton
import
Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
from
store_tools
import
RecordCdsConfPaper
,
RecordHepConfPaper
T4
=
" "
*
4
MSG_NOT_TALK
=
"Reject publication is not a talk"
class
Talks
(
Automaton
):
"""Automaton for conference talks.
"""
def
check_record
(
self
,
record
):
"""Check the content of the talk in order to fix non conformities.
* is conference
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
* check conference country
* check conference dates
Args:
record (RecordConf):
record describing a conference.
...
...
@@ -27,20 +38,25 @@ class Talks(Automaton):
corrected.
"""
if
not
Automaton
.
check_record
(
self
,
record
):
return
False
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (talk)"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check record (talk)"
)
if
not
isinstance
(
record
,
(
RecordCdsConfPaper
,
RecordHepConfPaper
)):
self
.
logs
[
-
1
].
reject
(
MSG_NOT_TALK
,
record
)
return
False
try
:
self
.
check
.
is_conference
(
record
)
self
.
check
.
country
(
record
)
self
.
check
.
conference_date
(
record
)
self
.
check
.
submitted
(
record
)
self
.
check
.
format_authors
(
record
,
fmt
=
"F. Last"
)
self
.
check
.
get_my_authors
(
record
,
sort
=
True
)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record
.
check_and_fix
(
self
.
rex_institute
,
fmt_author
=
"F. Last"
,
sep_author
=
", "
,
sort_author
=
True
)
record
.
check_country
()
record
.
check_conference_date
()
except
CheckException
as
e
:
self
.
logs
[
-
1
].
reject
(
e
,
record
=
record
)
...
...
modules/harvest_tools/thesis.py
View file @
21971181
...
...
@@ -4,11 +4,12 @@
import
re
from
.automaton
import
Automaton
from
.base
import
MSG_CRASH
,
MSG_LOAD
from
.base
import
MSG_CRASH
,
MSG_LOAD
,
T4
from
.checkandfix
import
CheckException
from
plugin_dbui
import
get_id
,
UNDEF_ID
from
store_tools
import
RecordCdsThesis
,
RecordHepThesis
T4
=
" "
*
4
MSG_NOT_THESIS
=
"Reject publication is not a thesis"
class
Thesis
(
Automaton
):
...
...
@@ -29,19 +30,24 @@ class Thesis(Automaton):
corrected.
"""
if
not
Automaton
.
check_record
(
self
,
record
):
return
False
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (thesis)"
)
self
.
logger
.
debug
(
f
"
{
T4
}
check record (thesis)"
)
if
not
isinstance
(
record
,
(
RecordCdsThesis
,
RecordHepThesis
)):
self
.
logs
[
-
1
].
reject
(
MSG_NOT_THESIS
,
record
)
return
False
try
:
self
.
check
.
is_thesis
(
record
)
self
.
check
.
submitted
(
record
)
self
.
check
.
format_universities
(
record
)
self
.
check
.
format_authors
(
record
,
fmt
=
"F. Last"
)
self
.
check
.
get_my_authors
(
record
,
sort
=
True
)
# is with authors form my institute
# standardise name of collaboration
# format authors according to my format
# extract authors form my institute signing the publication
# is submitted date well formed
record
.
check_and_fix
(
self
.
rex_institute
,
fmt_author
=
"F. Last"
,
sep_author
=
", "
,
sort_author
=
True
)
record
.
format_universities
()
except
CheckException
as
e
:
self
.
logs
[
-
1
].
reject
(
e
,
record
=
record
)
...
...
modules/store_tools/base.py
View file @
21971181
...
...
@@ -6,6 +6,7 @@ import re
from
.exception
import
ToolException
from
plugin_dbui
import
get_id
,
UNDEF_ID
AFF_CPPM
=
"Marseille, CPPM"
ARXIV
=
"arXiv"
ARXIV_PDF
=
"http://arxiv.org/pdf/"
...
...
@@ -53,6 +54,10 @@ T2, T4, T6 = " "*2, " "*4, " "*6
THESIS_DIR
=
"dir."
UNIV_AIX_MARSEILLE
=
"Aix Marseille Université"
UNIV_MARSEILLE
=
"Université de la Méditerrannée Aix-Marseille II"
UNIVERSITY
=
"University"
def
is_conference
(
recjson
):
"""True when the record describes a publication related to a conference.
...
...
modules/store_tools/recordcdsthesis.py
View file @
21971181
""" store_tools.recordcdsthesis
"""
from
.base
import
THESIS_DIR
from
.base
import
(
AFF_CPPM
,
REG_YEAR
,
THESIS_DIR
,
UNIVERSITY
,
UNIV_AIX_MARSEILLE
,
UNIV_MARSEILLE
,
T6
)
from
filters
import
CLEAN_THESIS_DEFENSE
from
gluon
import
current
from
store_tools.recordcdspubli
import
RecordCdsPubli
...
...
@@ -56,6 +63,35 @@ class RecordCdsThesis(RecordCdsPubli):
return
li
def
format_universities
(
self
):
"""Format the name of the university for PhD:
* Fix the name of Aix-Marseille University
* Replace U. by University
"""
self
.
logger
.
debug
(
f
"
{
T6
}
format university"
)
values
=
self
.
these_universities
()
if
len
(
values
)
==
0
:
return
# fix the name for Marseille university
# it depends on the year
if
AFF_CPPM
in
values
:
mtch
=
REG_YEAR
.
search
(
self
.
these_defense
())
if
mtch
:
year
=
int
(
mtch
.
group
(
1
))
val
=
(
UNIV_MARSEILLE
if
year
<
2012
else
UNIV_AIX_MARSEILLE
)
values
=
values
.
replace
(
AFF_CPPM
,
val
)
# Other -- replace U. by University
university
=
current
.
T
(
UNIVERSITY
).
decode
(
"utf8"
)
values
=
values
.
replace
(
'U.'
,
university
)
self
[
"dissertation_note"
][
"university"
]
=
values
def
these_defense
(
self
):
"""The defence date for a master/phd thesis.
...
...
modules/store_tools/recordhepthesis.py
View file @
21971181
""" store_tools.recordhepthesis
"""
from
.base
import
(
AFF_CPPM
,
REG_YEAR
,
UNIVERSITY
,
UNIV_AIX_MARSEILLE
,
UNIV_MARSEILLE
,
T6
)
from
gluon
import
current
from
filters
import
CLEAN_THESIS_DEFENSE
from
.recordheppubli
import
RecordHepPubli
...
...
@@ -51,6 +58,35 @@ class RecordHepThesis(RecordHepPubli):
return
li
def
format_universities
(
self
):
"""Format the name of the university for PhD:
* Fix the name of Aix-Marseille University
* Replace U. by University
"""
self
.
logger
.
debug
(
f
"
{
T6
}
format university"
)
values
=
self
.
these_universities
()
if
len
(
values
)
==
0
:
return
# fix the name for Marseille university
# it depends on the year
if
AFF_CPPM
in
values
:
mtch
=
REG_YEAR
.
search
(
self
.
these_defense
())
if
mtch
:
year
=
int
(
mtch
.
group
(
1
))
val
=
(
UNIV_MARSEILLE
if
year
<
2012
else
UNIV_AIX_MARSEILLE
)
values
=
values
.
replace
(
AFF_CPPM
,
val
)
# Other -- replace U. by University
university
=
current
.
T
(
UNIVERSITY
).
decode
(
"utf8"
)
values
=
values
.
replace
(
'U.'
,
university
)
self
[
"thesis_info"
][
"institutions"
]
=
[{
"name"
:
values
}]
def
these_defense
(
self
):
"""The defence date for a master/phd thesis.
...
...
tests/basis/test_08_RecordCdsThesis.py
View file @
21971181
...
...
@@ -25,25 +25,31 @@ from store_tools import load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
rec
cds
():
def
rec
ord
():
return
load_record
(
"cds.cern.ch"
,
1632177
)
def
test_authors_as_list_cds_08001
(
rec
cds
):
assert
rec
cds
.
authors_as_list
()
==
[
u
'Chen, Liming'
]
def
test_authors_as_list_cds_08001
(
rec
ord
):
assert
rec
ord
.
authors_as_list
()
==
[
u
'Chen, Liming'
]
def
test_these_defense_cds_08002
(
rec
cds
):
assert
rec
cds
.
these_defense
()
==
"2013-12-10"
def
test_these_defense_cds_08002
(
rec
ord
):
assert
rec
ord
.
these_defense
()
==
"2013-12-10"
def
test_these_level_cds_08003
(
rec
cds
):
assert
rec
cds
.
these_level
()
==
"PhD"
def
test_these_level_cds_08003
(
rec
ord
):
assert
rec
ord
.
these_level
()
==
"PhD"
def
test_these_directors_cds_08004
(
rec
cds
):
assert
rec
cds
.
these_directors
(
sep
=
u
"|"
)
==
""
def
test_these_directors_cds_08004
(
rec
ord
):
assert
rec
ord
.
these_directors
(
sep
=
u
"|"
)
==
""
def
test_these_universities_cds_08005
(
reccds
):
assert
reccds
.
these_universities
()
==
"Shandong U. & Marseille, CPPM"
def
test_these_universities_cds_08005
(
record
):
assert
record
.
these_universities
()
==
"Shandong U. & Marseille, CPPM"
def
test_format_universities_cds_08006
(
record
):
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Shandong Université & Aix Marseille Université"
tests/basis/test_09_RecordHepThesis.py
View file @
21971181
...
...
@@ -48,3 +48,9 @@ def test_these_directors_ins_09004(record):
def
test_these_universities_ins_09005
(
record
):
assert
record
.
these_universities
()
==
"Shandong U. & Marseille, CPPM"
def
test_format_universities_cds_09006
(
record
):
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Shandong Université & Aix Marseille Université"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment