Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
limbra
limbra
Commits
0274d5c2
Commit
0274d5c2
authored
Jan 19, 2016
by
LE GAC Renaud
Browse files
Fix a bug in fix_acti2com_cppm (sublitted date).
parent
5341f1ae
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
6 deletions
+22
-6
modules/harvest_tools/__init__.py
modules/harvest_tools/__init__.py
+1
-1
scripts/fix_acti2com_cppm.py
scripts/fix_acti2com_cppm.py
+21
-5
No files found.
modules/harvest_tools/__init__.py
View file @
0274d5c2
...
...
@@ -16,7 +16,7 @@ from base import (DRY_RUN,
from
automaton
import
Automaton
from
articles
import
Articles
from
checkandfix
import
CheckAndFix
from
checkandfix
import
CheckAndFix
,
MONTHS
from
exception
import
CheckException
,
ToolException
from
factory
import
build_harvester_tool
,
get_harvester_tool
from
msg
import
Msg
...
...
scripts/fix_acti2com_cppm.py
View file @
0274d5c2
...
...
@@ -35,8 +35,17 @@
import
re
from
harvest_tools
import
MONTHS
reg1
=
r
'(\d{1,2}) ([A-Z][a-z]{2}) (\d{4})'
reg2
=
r
'(\d{1,2})-\d{1,2} ([A-Z][a-z]{2}) (\d{4})'
reg3
=
r
'(\d{1,2}) ([A-Z][a-z]{2}) - \d{1,2} [A-Z][a-z]{2} (\d{4})'
REG_CONF_DATES
=
re
.
compile
(
r
'%s|%s|%s'
%
(
reg1
,
reg2
,
reg3
))
REG_ORIGIN
=
re
.
compile
(
"https?://([a-z\.]+)/record/(\d+)"
)
if
__name__
==
"__main__"
:
import
sys
...
...
@@ -70,15 +79,22 @@ if __name__ == "__main__":
data
[
"id_publishers"
]
=
1
data
[
"pages"
]
=
""
data
[
"volume"
]
=
""
data
[
"submitted"
]
=
""
data
[
"publication_url"
]
=
""
data
[
"preprint"
]
=
""
# the year is the one of the conference
conference_dates
=
data
[
"conference_dates"
]
if
conference_dates
:
data
[
"year"
]
=
int
(
data
[
"conference_dates"
][
-
4
:])
# submitted date is when the conference start
match
=
REG_CONF_DATES
.
match
(
data
[
"conference_dates"
])
if
match
:
offset
=
0
for
i
in
xrange
(
3
):
if
match
.
group
(
1
+
i
*
3
)
is
not
None
:
offset
=
i
*
3
break
data
[
"year"
]
=
match
.
group
(
offset
+
3
)
month
=
MONTHS
[
match
.
group
(
offset
+
2
)]
data
[
"submitted"
]
=
"%s-%s-%s"
%
(
match
.
group
(
offset
+
3
),
month
,
match
.
group
(
offset
+
1
))
else
:
print
"No conferences dates"
,
data
[
"id"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment