Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
7799693c
Commit
7799693c
authored
Jan 15, 2021
by
LE GAC Renaud
Browse files
Polish and improve check and fix for RecordCdsThesis
parent
21971181
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
177 additions
and
74 deletions
+177
-74
modules/harvest_tools/thesis.py
modules/harvest_tools/thesis.py
+1
-1
modules/store_tools/recordcdspubli.py
modules/store_tools/recordcdspubli.py
+5
-2
modules/store_tools/recordcdsthesis.py
modules/store_tools/recordcdsthesis.py
+37
-0
modules/store_tools/recordheppubli.py
modules/store_tools/recordheppubli.py
+1
-1
modules/store_tools/recordhepthesis.py
modules/store_tools/recordhepthesis.py
+28
-0
tests/basis/test_08_RecordCdsThesis.py
tests/basis/test_08_RecordCdsThesis.py
+7
-0
tests/basis/test_09_RecordHepThesis.py
tests/basis/test_09_RecordHepThesis.py
+9
-1
tests/basis/test_13_check_and_fix_article_cds.py
tests/basis/test_13_check_and_fix_article_cds.py
+2
-2
tests/basis/test_14_check_fix_article_ins.py
tests/basis/test_14_check_fix_article_ins.py
+2
-2
tests/basis/test_15_check_and_fix_proceeding_cds.py
tests/basis/test_15_check_and_fix_proceeding_cds.py
+3
-3
tests/basis/test_16_check_and_fix_proceeding_ins.py
tests/basis/test_16_check_and_fix_proceeding_ins.py
+2
-2
tests/basis/test_17_check_and_fix_thesis.py
tests/basis/test_17_check_and_fix_thesis.py
+0
-60
tests/basis/test_17_check_and_fix_thesis_cds.py
tests/basis/test_17_check_and_fix_thesis_cds.py
+80
-0
No files found.
modules/harvest_tools/thesis.py
View file @
7799693c
...
...
@@ -32,7 +32,7 @@ class Thesis(Automaton):
"""
self
.
logger
.
debug
(
f
"
{
T4
}
check and fix record (thesis)"
)
if
not
isinstance
(
record
,
(
RecordCdsThesis
,
RecordHepT
hesis
))
:
if
record
.
subtype
()
==
"t
hesis
"
:
self
.
logs
[
-
1
].
reject
(
MSG_NOT_THESIS
,
record
)
return
False
...
...
modules/store_tools/recordcdspubli.py
View file @
7799693c
...
...
@@ -833,7 +833,7 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns:
str:
*
"
articles
"
,
"
preprint
"
,
"
proceeding
"
,
"
note
" or "report"
* articles, preprint, proceeding, note
, report ot thesis
* empty string when it is not defined
"""
...
...
@@ -841,13 +841,16 @@ class RecordCdsPubli(dict, AuthorsMixin, PublicationInfoMixin):
if
collection
is
None
:
return
""
if
not
isinstance
(
collection
,
list
):
collection
=
[
collection
]
lst
=
[
dct
.
get
(
"primary"
,
""
).
lower
()
for
dct
in
collection
]
if
"conferencepaper"
in
lst
:
return
"proceeding"
# order matter since we have (preprint+note)
for
val
in
(
"article"
,
"note"
,
"report"
,
"preprint"
):
for
val
in
(
"article"
,
"note"
,
"report"
,
"thesis"
,
"preprint"
):
if
val
in
lst
:
return
val
...
...
modules/store_tools/recordcdsthesis.py
View file @
7799693c
...
...
@@ -2,12 +2,15 @@
"""
from
.base
import
(
AFF_CPPM
,
MSG_WELL_FORMED_DATE
,
REG_YEAR
,
REG_DATE_YYYYMM
,
THESIS_DIR
,
UNIVERSITY
,
UNIV_AIX_MARSEILLE
,
UNIV_MARSEILLE
,
T6
)
from
.exception
import
CheckException
from
filters
import
CLEAN_THESIS_DEFENSE
from
gluon
import
current
from
store_tools.recordcdspubli
import
RecordCdsPubli
...
...
@@ -63,6 +66,37 @@ class RecordCdsThesis(RecordCdsPubli):
return
li
def
check_submitted_date
(
self
):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self
.
logger
.
debug
(
f
"
{
T6
}
check submitted date"
)
xdate
=
self
.
submitted
()
if
REG_DATE_YYYYMM
.
match
(
xdate
):
return
# recover by using the defense date
val
=
self
.
these_defense
()
if
REG_DATE_YYYYMM
.
match
(
val
):
if
"prepublication"
in
self
:
prepublication
=
self
[
"prepublication"
]
if
isinstance
(
prepublication
,
list
):
prepublication
[
0
][
"date"
]
=
val
else
:
prepublication
[
"date"
]
=
val
else
:
self
[
"prepublication"
]
=
{
"date"
:
val
}
else
:
raise
CheckException
(
MSG_WELL_FORMED_DATE
)
def
format_universities
(
self
):
"""Format the name of the university for PhD:
...
...
@@ -86,6 +120,9 @@ class RecordCdsThesis(RecordCdsPubli):
val
=
(
UNIV_MARSEILLE
if
year
<
2012
else
UNIV_AIX_MARSEILLE
)
values
=
values
.
replace
(
AFF_CPPM
,
val
)
if
"Marseille U., Luminy"
in
values
:
values
=
values
.
replace
(
"Marseille U., Luminy"
,
UNIV_MARSEILLE
)
# Other -- replace U. by University
university
=
current
.
T
(
UNIVERSITY
).
decode
(
"utf8"
)
values
=
values
.
replace
(
'U.'
,
university
)
...
...
modules/store_tools/recordheppubli.py
View file @
7799693c
...
...
@@ -530,7 +530,7 @@ class RecordHepPubli(dict, AuthorsMixin, PublicationInfoMixin):
Returns:
str:
*
"
articles
"
,
"
preprint
"
,
"
proceeding
"
,
"
note
" or "report"
* articles, preprint, proceeding, note
, report or thesis
* empty string when it is not defined
"""
...
...
modules/store_tools/recordhepthesis.py
View file @
7799693c
...
...
@@ -2,11 +2,14 @@
"""
from
.base
import
(
AFF_CPPM
,
MSG_WELL_FORMED_DATE
,
REG_DATE_YYYYMM
,
REG_YEAR
,
UNIVERSITY
,
UNIV_AIX_MARSEILLE
,
UNIV_MARSEILLE
,
T6
)
from
.exception
import
CheckException
from
gluon
import
current
from
filters
import
CLEAN_THESIS_DEFENSE
from
.recordheppubli
import
RecordHepPubli
...
...
@@ -58,6 +61,28 @@ class RecordHepThesis(RecordHepPubli):
return
li
def
check_submitted_date
(
self
):
"""Check that submitted date is either ``YYYY-MM`` or ``YYYY-MM-DD``.
Raises:
CheckException::
* the date is not well formed
"""
self
.
logger
.
debug
(
f
"
{
T6
}
check submitted date"
)
xdate
=
self
.
submitted
()
if
REG_DATE_YYYYMM
.
match
(
xdate
):
return
# recover by using the defense date
val
=
self
.
these_defense
()
if
REG_DATE_YYYYMM
.
match
(
val
):
self
[
"thesis_info"
][
"defense_date"
]
=
val
else
:
raise
CheckException
(
MSG_WELL_FORMED_DATE
)
def
format_universities
(
self
):
"""Format the name of the university for PhD:
...
...
@@ -81,6 +106,9 @@ class RecordHepThesis(RecordHepPubli):
val
=
(
UNIV_MARSEILLE
if
year
<
2012
else
UNIV_AIX_MARSEILLE
)
values
=
values
.
replace
(
AFF_CPPM
,
val
)
if
"Marseille U., Luminy"
in
values
:
values
=
values
.
replace
(
"Marseille U., Luminy"
,
UNIV_MARSEILLE
)
# Other -- replace U. by University
university
=
current
.
T
(
UNIVERSITY
).
decode
(
"utf8"
)
values
=
values
.
replace
(
'U.'
,
university
)
...
...
tests/basis/test_08_RecordCdsThesis.py
View file @
7799693c
...
...
@@ -53,3 +53,10 @@ def test_format_universities_cds_08006(record):
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Shandong Université & Aix Marseille Université"
def
test_format_universities_cds_08007
():
record
=
load_record
(
"cds.cern.ch"
,
1394605
)
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Université de la Méditerrannée Aix-Marseille II"
tests/basis/test_09_RecordHepThesis.py
View file @
7799693c
...
...
@@ -50,7 +50,15 @@ def test_these_universities_ins_09005(record):
assert
record
.
these_universities
()
==
"Shandong U. & Marseille, CPPM"
def
test_format_universities_
cd
s_09006
(
record
):
def
test_format_universities_
in
s_09006
(
record
):
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Shandong Université & Aix Marseille Université"
def
test_format_universities_ins_08007
():
record
=
load_record
(
"inspirehep.net"
,
1088032
,
shelf
=
"literature"
)
record
.
format_universities
()
assert
record
.
these_universities
()
==
\
"Université de la Méditerrannée Aix-Marseille II"
tests/basis/test_13_check_and_fix_article_cds.py
View file @
7799693c
"""test_13_check_and_fix_article_cds
cds.cern.ch
2242641: Phys. Rev. D 95 (2017) 052005
(same as inspirehep.net
article
15009922)
https://
cds.cern.ch
/record/2242641
(same as
https://
inspirehep.net
/api/literature/
15009922)
Test individual method of check and fix process for article:
...
...
tests/basis/test_14_check_fix_article_ins.py
View file @
7799693c
"""test_14_check_and_fix_article_ins
inspirehep.net article
15009922: Phys. Rev. D 95 (2017) 052005
(same as cds.cern.ch
2242641)
https://
inspirehep.net article
/api/literature/15009922
(same as
https://
cds.cern.ch
/record/
2242641)
Test individual method of check and fix process for article:
...
...
tests/basis/test_15_check_and_fix_proceeding_cds.py
View file @
7799693c
"""test_15_check_and_fix_proceeding_cds
cds.cern.ch
1411352
(same as inspirehep.net
1089237)
https://
cds.cern.ch
/record/
1411352
(same as
https://
inspirehep.net
/api/literature/
1089237)
Test individual method of check and fix process for
article
:
Test individual method of check and fix process for
proceeding
:
* publication is a published proceeding
* is with authors form my institute
...
...
tests/basis/test_16_check_and_fix_proceeding_ins.py
View file @
7799693c
"""test_16_check_and_fix_proceeding_ins
inspirehep.net
1089237
(same as cds.cern.ch
1411352)
https://
inspirehep.net
/api/literature/
1089237
(same as
https://
cds.cern.ch
/record/
1411352)
Test individual method of check and fix process for article:
...
...
tests/basis/test_17_check_and_fix_thesis.py
deleted
100644 → 0
View file @
21971181
"""test_16_CheckAndFix_thesis
* Test CheckAndFix methods for thesis.
Use the one talk in cds.cern.ch
- is_thesis
- submitted
- format_universities
- format_authors (already test with article)
- get_my_authors (already test with article)
"""
import
pytest
from
harvest_tools.checkandfix
import
CheckAndFix
from
store_tools
import
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
reccds
():
return
load_record
(
"cds.cern.ch"
,
1394605
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
svc
():
return
CheckAndFix
()
def
test_is_thesis_cds_17001
(
svc
,
reccds
):
assert
svc
.
is_thesis
(
reccds
)
is
None
def
test_submitted_cds_17002
(
svc
,
reccds
):
assert
reccds
.
these_defense
()
==
"2011"
assert
reccds
.
submitted
()
==
""
svc
.
submitted
(
reccds
)
assert
reccds
.
submitted
()
==
"2011-11"
def
test_format_universities_cds_17003
(
svc
,
reccds
):
# Khanji en 2011 (Université de la Méditerrannée)
assert
reccds
[
"dissertation_note"
][
"university"
]
==
\
"Marseille U., Luminy"
svc
.
format_universities
(
reccds
)
assert
reccds
[
"dissertation_note"
][
"university"
]
==
\
"Université de la Méditerrannée Aix-Marseille II"
# Chen en 2013 (Aix marseille Université)
reccds2
=
load_record
(
"cds.cern.ch"
,
1632177
)
assert
reccds2
[
"dissertation_note"
][
"university"
]
==
\
"Shandong U. & Marseille, CPPM"
svc
.
format_universities
(
reccds2
)
assert
reccds2
[
"dissertation_note"
][
"university"
]
==
\
"Shandong Université & Aix Marseille Université"
tests/basis/test_17_check_and_fix_thesis_cds.py
0 → 100644
View file @
7799693c
"""test_17_check_and_fix_thesis_cds
https://cds.cern.ch/record/1394605
(same as https://inspirehep.net/api/literature/10888032)
Test individual method of check and fix process for thesis:
* is with authors form my institute
* standardise name of collaboration
* format authors according to my format
* extract authors form my institute signing the publication
* is submitted date well formed
"""
import
pytest
from
gluon
import
current
from
harvest_tools
import
get_rex_institute
from
store_tools
import
CheckException
,
load_record
@
pytest
.
fixture
(
scope
=
"module"
)
def
record
():
return
load_record
(
"cds.cern.ch"
,
1394605
)
def
test_subtype_17001
(
record
):
assert
record
.
subtype
()
==
"thesis"
# ............................................................................
#
# Check and fix implemented in the RecordCdsConf
#
def
test_check_authors_17010
(
record
):
# raise CheckException in case of problem
assert
record
.
check_authors
()
is
None
def
test_check_my_affiliation_17011
(
record
):
# raise CheckException in case of problem
rex_institute
=
get_rex_institute
(
current
.
db
,
current
.
app
)
assert
record
.
check_my_affiliation
(
rex_institute
)
is
None
def
test_check_collaboration_17012
(
record
):
assert
record
.
collaboration
()
==
""
record
.
check_collaboration
(
current
.
db
)
assert
record
.
collaboration
()
==
""
def
test_check_format_authors_17013
(
record
):
authors
=
record
.
authors_as_list
()
assert
len
(
authors
)
==
1
assert
authors
[
0
]
==
"Khanji, Basem"
record
.
check_format_authors
(
fmt
=
"F. Last"
)
authors
=
record
.
authors_as_list
()
assert
authors
[
0
]
==
"B. Khanji"
def
test_extract_my_authors_17014
(
record
):
# raise CheckException in case of problem
rex_institute
=
get_rex_institute
(
current
.
db
,
current
.
app
)
assert
record
.
my_authors
is
None
record
.
check_format_authors
(
fmt
=
"F. Last"
)
assert
record
.
extract_my_authors
(
rex_institute
,
sep
=
"|"
,
sort
=
True
)
is
None
my_authors
=
record
.
my_authors
assert
my_authors
==
"B. Khanji"
def
test_check_submitted_date_17015
(
record
):
# raise CheckException in case of problem
assert
record
.
submitted
()
==
""
with
pytest
.
raises
(
CheckException
):
record
.
check_submitted_date
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment