Commit 0274d5c2 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Fix a bug in fix_acti2com_cppm (sublitted date).

parent 5341f1ae
......@@ -16,7 +16,7 @@ from base import (DRY_RUN,
from automaton import Automaton
from articles import Articles
from checkandfix import CheckAndFix
from checkandfix import CheckAndFix, MONTHS
from exception import CheckException, ToolException
from factory import build_harvester_tool, get_harvester_tool
from msg import Msg
......
......@@ -35,8 +35,17 @@
import re
from harvest_tools import MONTHS
reg1 = r'(\d{1,2}) ([A-Z][a-z]{2}) (\d{4})'
reg2 = r'(\d{1,2})-\d{1,2} ([A-Z][a-z]{2}) (\d{4})'
reg3 = r'(\d{1,2}) ([A-Z][a-z]{2}) - \d{1,2} [A-Z][a-z]{2} (\d{4})'
REG_CONF_DATES = re.compile(r'%s|%s|%s' % (reg1, reg2, reg3))
REG_ORIGIN = re.compile("https?://([a-z\.]+)/record/(\d+)")
if __name__ == "__main__":
import sys
......@@ -70,15 +79,22 @@ if __name__ == "__main__":
data["id_publishers"] = 1
data["pages"] = ""
data["volume"] = ""
data["submitted"] = ""
data["publication_url"] = ""
data["preprint"] = ""
# the year is the one of the conference
conference_dates = data["conference_dates"]
if conference_dates:
data["year"] = int(data["conference_dates"][-4:])
# submitted date is when the conference start
match = REG_CONF_DATES.match(data["conference_dates"])
if match:
offset = 0
for i in xrange(3):
if match.group(1+i*3) is not None:
offset = i*3
break
data["year"] = match.group(offset+3)
month = MONTHS[match.group(offset+2)]
data["submitted"] = "%s-%s-%s" % (match.group(offset+3), month, match.group(offset+1))
else:
print "No conferences dates", data["id"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment