Commit 11bee814 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Update controllers and modules to replace u'key' by 'key'.

parent efd0d1e7
......@@ -54,18 +54,18 @@ def affiliation_institute():
# extract keys defining the affiliation
# subfields are identifier and futur_identifier
# they are not part of the standard JSON record but add by the factory
if u"corporate_note" not in record:
if "corporate_note" not in record:
raise HTTP(500, T(MSG_NO_KEYS))
di = record[u"corporate_note"]
di = record["corporate_note"]
keys = [di[k] for k in di]
# some time the name of the institute is used (ATLAS NOte, ...)
obj = record[u"corporate_name"]
obj = record["corporate_name"]
di = (obj[0] if isinstance(obj, list) else obj)
if u"name" in di:
keys.append(di[u"name"])
if "name" in di:
keys.append(di["name"])
keys = (dict(key_u=key, key_v="") for key in keys)
......@@ -105,7 +105,7 @@ def affiliation_publication():
raise HTTP(500, T(MSG_NO_PUBLICATION))
# find the author and its affiliation
df = record[u"authors"]
df = record["authors"]
query = \
(df.last_name.str.lower() == family_name.lower()) & \
......
......@@ -25,7 +25,7 @@ MSG_NO_TEAM = 'Select a "team" !!!'
MSG_INSERT_FAIL = "Fail to insert the new record in the database."
OAI = u"oai:%s:%i"
OAI = "oai:%s:%i"
# search collection when using inspirehep
# require for "Hal Hidden"
......@@ -394,7 +394,7 @@ class Automaton(object):
# fix record with a missing OAI
if not self.check.is_oai(record):
oai = OAI % (self.harvester.host, record.id())
record[u"oai"] = {u"value": oai}
record["oai"] = {"value": oai}
if self.check.is_bad_oai_used(record):
self.logs[-1].idle(MSG_IN_DB, record.submitted())
......
......@@ -111,27 +111,27 @@ class CheckAndFix(object):
no conference date found.
"""
if u"meeting_name" not in record:
if "meeting_name" not in record:
raise ToolException(MSG_NO_CONF_DATE)
meeting = record[u"meeting_name"]
meeting = record["meeting_name"]
meeting = (meeting[0] if isinstance(meeting, list) else meeting)
# CDS has the opening and closing dates encoded as 20141231
if u"opening_date" in meeting and u"closing_date" in meeting:
if "opening_date" in meeting and "closing_date" in meeting:
fmt = "%Y%m%d"
val = meeting[u"opening_date"]
val = meeting["opening_date"]
opening = datetime.strptime(val, fmt)
val = meeting[u"closing_date"]
val = meeting["closing_date"]
closing = datetime.strptime(val, fmt)
return (opening, closing)
# both CDS and INSPIRE have the dates subfield
val = meeting[u"date"]
val = meeting["date"]
# date is encode as 12 - 15 Mar 2014
m = REG_CONF_DATES_1.match(val)
......@@ -139,10 +139,10 @@ class CheckAndFix(object):
fmt = "%d-%b-%Y"
val = u"%s-%s-%s" % (m.group(1), m.group(3), m.group(4))
val = "%s-%s-%s" % (m.group(1), m.group(3), m.group(4))
opening = datetime.strptime(val, fmt)
val = u"%s-%s-%s" % (m.group(2), m.group(3), m.group(4))
val = "%s-%s-%s" % (m.group(2), m.group(3), m.group(4))
closing = datetime.strptime(val, fmt)
return (opening, closing)
......@@ -154,10 +154,10 @@ class CheckAndFix(object):
fmt = "%d-%b-%Y"
val = u"%s-%s-%s" % (m.group(1), m.group(2), m.group(5))
val = "%s-%s-%s" % (m.group(1), m.group(2), m.group(5))
opening = datetime.strptime(val, fmt)
val = u"%s-%s-%s" % (m.group(3), m.group(4), m.group(5))
val = "%s-%s-%s" % (m.group(3), m.group(4), m.group(5))
closing = datetime.strptime(val, fmt)
return (opening, closing)
......@@ -226,16 +226,16 @@ class CheckAndFix(object):
# try to recover year when not defined
if not year:
# published article, proceeding
if record[u"publication_info"].year.iloc[0] != "":
year = record[u"publication_info"].year.iloc[0]
if record["publication_info"].year.iloc[0] != "":
year = record["publication_info"].year.iloc[0]
# start date of a conference
elif record._get(u"meeting_name", u"opening_date") != u"":
year = record._get(u"meeting_name", u"opening_date")
elif record._get("meeting_name", "opening_date") != "":
year = record._get("meeting_name", "opening_date")
# end date of a conference
elif record._get(u"meeting_name", u"closing_date") != u"":
year = record._get(u"meeting_name", u"closing_date")
elif record._get("meeting_name", "closing_date") != "":
year = record._get("meeting_name", "closing_date")
else:
return []
......@@ -304,7 +304,7 @@ class CheckAndFix(object):
empty when procedure failed
"""
val = u""
val = ""
if isinstance(record, RecordConf):
opening = self._get_conference_dates(record)[0]
......@@ -321,8 +321,8 @@ class CheckAndFix(object):
val = "20%s-%s" % (m_arxiv.group(1), m_arxiv.group(2))
# last change use the creation date for the record
if val == u"" or len(val) < 7:
val = record[u"creation_date"][0:7]
if val == "" or len(val) < 7:
val = record["creation_date"][0:7]
return val
......@@ -380,14 +380,14 @@ class CheckAndFix(object):
if collaboration != val:
# one collaboration
if isinstance(record[u"corporate_name"], dict):
record[u"corporate_name"][u"collaboration"] = collaboration
if isinstance(record["corporate_name"], dict):
record["corporate_name"]["collaboration"] = collaboration
# several collaboration
# replace the list of dictionary by a single one
else:
record[u"corporate_name"] = \
{u"collaboration": collaboration}
record["corporate_name"] = \
{"collaboration": collaboration}
except ToolException as e:
raise CheckException(*e.args)
......@@ -423,19 +423,19 @@ class CheckAndFix(object):
country = db.countries[dbid].country
if country != val:
obj = record[u"meeting_name"]
obj = record["meeting_name"]
if isinstance(obj, dict):
location = obj[u"location"].replace(val, country)
record[u"meeting_name"][u"location"] = location
location = obj["location"].replace(val, country)
record["meeting_name"]["location"] = location
else:
for di in obj:
if u"location" in di:
di[u"location"] = \
di[u"location"].replace(val, country)
if "location" in di:
di["location"] = \
di["location"].replace(val, country)
record[u"meeting_name"] = obj
record["meeting_name"] = obj
except ToolException as e:
raise CheckException(*e.args)
......@@ -485,9 +485,9 @@ class CheckAndFix(object):
closing.strftime("%b"),
opening.year)
meeting = record[u"meeting_name"]
meeting = record["meeting_name"]
meeting = (meeting[0] if isinstance(meeting, list) else meeting)
meeting[u"date"] = val
meeting["date"] = val
def is_bad_oai_used(self, record):
"""Bad OAI is when the ``id`` in the OAI field is different from
......@@ -534,8 +534,8 @@ class CheckAndFix(object):
print "\t\tCheck is oai"
# field / subfield depends on the store
test = (u"oai" in record and u"value" in record[u"oai"]) or \
(u"FIXME_OAI" in record and u"id" in record[u"FIXME_OAI"])
test = ("oai" in record and "value" in record["oai"]) or \
("FIXME_OAI" in record and "id" in record["FIXME_OAI"])
return test
......@@ -581,7 +581,7 @@ class CheckAndFix(object):
if not record.is_published():
return
df = record[u"publication_info"].iloc[0]
df = record["publication_info"].iloc[0]
editor = df.title
volume = df.volume
......@@ -618,7 +618,7 @@ class CheckAndFix(object):
if not isinstance(record, RecordThesis):
return
values = record[u"dissertation_note"][u"university"]
values = record["dissertation_note"]["university"]
# CPPM -- fix the name of Aix-Marseille university
affiliations = record.first_author_institutes()
......@@ -630,7 +630,7 @@ class CheckAndFix(object):
if int(year) < 2012:
university = \
u"Université de la Méditerrannée Aix-Marseille II"
"Université de la Méditerrannée Aix-Marseille II"
else:
university = "Aix Marseille Université"
......@@ -649,7 +649,7 @@ class CheckAndFix(object):
university = current.T(UNIVERSITY).decode("utf8")
values = values.replace('U.', university)
record[u"dissertation_note"][u"university"] = values
record["dissertation_note"]["university"] = values
def get_my_authors(self, record, sep=", ", sort=False):
"""Get authors of my institutes signing the record.
......@@ -714,7 +714,7 @@ class CheckAndFix(object):
if not isinstance(record, RecordConf):
raise CheckException(MSG_NO_CONF)
if u"meeting_name" not in record:
if "meeting_name" not in record:
raise CheckException(MSG_NO_CONF)
def is_thesis(self, record):
......@@ -794,12 +794,12 @@ class CheckAndFix(object):
record.reformat_authors(fmt_rescue)
if sort:
authors = (record[u"authors"][["last_name", "fmt_name"]]
authors = (record["authors"][["last_name", "fmt_name"]]
.sort_values(by="last_name")
.fmt_name)
else:
authors = (record[u"authors"].fmt_name
authors = (record["authors"].fmt_name
.sort_index())
# go back to the origin formatting
......@@ -837,7 +837,7 @@ class CheckAndFix(object):
# paper reference can be incomplete or missing
# is the paper published ? In that case the doi is defined
if u"doi" not in record:
if "doi" not in record:
return
# what information is missing ?
......@@ -848,7 +848,7 @@ class CheckAndFix(object):
# * the row contains empty string when the record is not published.
# * iloc[0] returns a serie where the index are the column's name
#
columns = (record[u"publication_info"].iloc[0]
columns = (record["publication_info"].iloc[0]
.replace("", np.nan)
.dropna()
.index)
......@@ -857,7 +857,7 @@ class CheckAndFix(object):
# try to recover from the doi when it has the form
# xx.yyyy/Publisher.Volume.Page
m = REG_DOI.match(record[u"doi"])
m = REG_DOI.match(record["doi"])
if not m:
raise ToolException(MSG_NO_REF + str(list(missing)))
......@@ -867,13 +867,13 @@ class CheckAndFix(object):
# transform PhysRevD in Phys. Rev. D
li = re.split(r"([A-Z][a-z]+)", m.group(1))
title = ". ".join([el for el in li if len(el) > 0])
record[u"publication_info"].loc[0, u"title"] = title
record["publication_info"].loc[0, "title"] = title
elif subfield == "volume":
record[u"publication_info"].loc[0, u"volume"] = m.group(2)
record["publication_info"].loc[0, "volume"] = m.group(2)
elif subfield == "pagination":
record[u"publication_info"].loc[0, u"pagination"] = m.group(3)
record["publication_info"].loc[0, "pagination"] = m.group(3)
elif subfield == "year":
raise ToolException(MSG_NO_REF + "[year]")
......@@ -908,7 +908,7 @@ class CheckAndFix(object):
abbreviation = db.publishers[dbid].abbreviation
if abbreviation != val:
record[u"publication_info"].loc[0, "title"] = abbreviation
record["publication_info"].loc[0, "title"] = abbreviation
# convert ToolException to CheckExcpetion
except ToolException as e:
......@@ -960,16 +960,16 @@ class CheckAndFix(object):
# in some case we have to deal with a list (see cds 2234042)
# in some case it is not defined (e.g. phd thesis)
if u"prepublication" in record:
if "prepublication" in record:
prepublication = record[u"prepublication"]
prepublication = record["prepublication"]
if isinstance(prepublication, list):
prepublication[0][u"date"] = date
prepublication[0]["date"] = date
else:
prepublication[u"date"] = date
prepublication["date"] = date
else:
record[u"prepublication"] = {u"date": date}
record["prepublication"] = {"date": date}
def temporary_record(self, record):
"""Some records are marked temporary.
......@@ -993,6 +993,6 @@ class CheckAndFix(object):
# or the corresponding JSON field:
# http://inspirehep.net/comment="*Temporary record*"
#
if u"comment" in record:
if record[u"comment"] == u"*Temporary record*":
if "comment" in record:
if record["comment"] == "*Temporary record*":
raise CheckException(MSG_TEMPORARY_RECORD)
......@@ -67,7 +67,7 @@ def is_conference(recjson):
("aleph_linking_page" in recjson) or \
("subject" in recjson
and "term" in recjson["subject"]
and recjson["subject"]["term"] == u"Talk")
and recjson["subject"]["term"] == "Talk")
if found:
return True
......
......@@ -42,7 +42,7 @@ def add_affiliation_keys(recjson):
recjson (dict): record data (MarcJSON)
"""
url = "https://inspirehep.net/record/%i" % recjson[u"recid"]
url = "https://inspirehep.net/record/%i" % recjson["recid"]
rep = requests.get(url, params={"ot": "110", "of": "txt"})
# decode the string: '000recid 110__ $$aXXX$$bYYY$$tZZZ\n'
......@@ -55,8 +55,8 @@ def add_affiliation_keys(recjson):
continue
di[el[0:1]] = el[1:]
recjson[u"corporate_note"] = {u"identifier": di["u"],
u"future_identifier": di["t"]}
recjson["corporate_note"] = {"identifier": di["u"],
"future_identifier": di["t"]}
def add_conference_data(recjson):
......@@ -89,19 +89,19 @@ def add_conference_data(recjson):
#
conf_id, conf_key, host = None, None, None
if u"aleph_linking_page" in recjson:
di = recjson[u"aleph_linking_page"]
conf_id = di[u"sysno"]
conf_key = di[u"up_link"]
if "aleph_linking_page" in recjson:
di = recjson["aleph_linking_page"]
conf_id = di["sysno"]
conf_key = di["up_link"]
host = "cds.cern.ch"
elif u"publication_info" in recjson:
data = recjson[u"publication_info"]
elif "publication_info" in recjson:
data = recjson["publication_info"]
data = (data if isinstance(data, list) else [data])
for di in data:
if u"cnum" in di:
conf_key = di[u"cnum"]
if "cnum" in di:
conf_key = di["cnum"]
host = "inspirehep.net"
break
......@@ -129,17 +129,17 @@ def add_conference_data(recjson):
# home page while the second one is for the proceeding (cds 2270940)
# - in other case the url is not defined (cds 2258914)
confurl = u""
if u"url" in confjson:
obj = confjson[u"url"]
confurl = (obj[u"url"] if isinstance(obj, dict) else obj[0][u"url"])
confurl = ""
if "url" in confjson:
obj = confjson["url"]
confurl = (obj["url"] if isinstance(obj, dict) else obj[0]["url"])
# ........................................................................
#
# Add conference data to the recjson
#
recjson[u"meeting_name"] = confjson[u"meeting_name"]
recjson[u"meeting_note"] = {u"recid": confjson[u"recid"], u"url": confurl}
recjson["meeting_name"] = confjson["meeting_name"]
recjson["meeting_note"] = {"recid": confjson["recid"], "url": confurl}
def build_record(recjson):
......@@ -259,10 +259,10 @@ def match_conference_key(recjson, conf_key):
bool:
"""
if u"meeting_name" in recjson:
for di in recjson[u"meeting_name"]:
if "meeting_name" in recjson:
for di in recjson["meeting_name"]:
subfield = u"coference_code"
subfield = "coference_code"
if subfield in di and di[subfield] == conf_key:
return True
......
......@@ -223,7 +223,7 @@ class Record(dict):
the unique id of the record in the store
"""
return self[u"recid"]
return self["recid"]
def oai(self):
"""The Open Archive Initiative identifier(s).
......@@ -278,11 +278,11 @@ class Record(dict):
"""
# the location of the OAI information depends on the store
if u"oai" in self:
field, subfield = u"oai", u"value"
if "oai" in self:
field, subfield = "oai", "value"
elif u"FIXME_OAI" in self:
field, subfield = u"FIXME_OAI", u"id"
elif "FIXME_OAI" in self:
field, subfield = "FIXME_OAI", "id"
else:
return ""
......@@ -338,23 +338,23 @@ class Record(dict):
It is an empty string when not defined
"""
if u"system_control_number" not in self:
return u""
if "system_control_number" not in self:
return ""
data = self[u"system_control_number"]
data = self["system_control_number"]
data = (data if isinstance(data, list) else [data])
# data is a list of dictionary
# keys are `institute`, `value` or `canceled`
for di in data:
institute = di[u"institute"]
institute = di["institute"]
if institute == "CDS":
if u"value" in di:
return OAI % (u"cds.cern.ch", di[u"value"])
if "value" in di:
return OAI % ("cds.cern.ch", di["value"])
elif institute == "Inspire":
if u"value" in di:
return OAI % (u"inspirehep.net", di[u"value"])
if "value" in di:
return OAI % ("inspirehep.net", di["value"])
return ""
......
......@@ -45,7 +45,7 @@ class RecordConf(RecordPubli):
location = self.conference_location()
if len(location) == 0:
return u""
return ""
return CLEAN_SPACES(location.split(",")[-1])
......@@ -60,7 +60,7 @@ class RecordConf(RecordPubli):
"""
# for list assume that the first item is the correct one
val = self._get(u"meeting_name", u"date")
val = self._get("meeting_name", "date")
val = (val[0] if isinstance(val, list) and len(val) > 0 else val)
return val
......@@ -71,7 +71,7 @@ class RecordConf(RecordPubli):
int:
"""
return self[u"meeting_note"][u"recid"]
return self["meeting_note"]["recid"]
def conference_key(self):
"""The conference key used in the store.
......@@ -83,19 +83,19 @@ class RecordConf(RecordPubli):
"""
# algorithm depends on the store
# CDS
if u"aleph_linking_page" in self:
value = self[u"aleph_linking_page"][u"up_link"]
if "aleph_linking_page" in self:
value = self["aleph_linking_page"]["up_link"]
# INSPIRE
elif u"publication_info" in self:
df = self[u"publication_info"]
elif "publication_info" in self:
df = self["publication_info"]
cnums = df[df.cnum.str.match(REG_CONF.pattern) == True].cnum
if len(cnums) == 1:
value = cnums.iloc[0]
else:
value = u""
value = ""
return value
......@@ -109,8 +109,8 @@ class RecordConf(RecordPubli):
- empty string when not defined
"""
location = self._get(u"meeting_name", u"location", force_list=True)
location = (location[0] if len(location) == 1 else u"")
location = self._get("meeting_name", "location", force_list=True)
location = (location[0] if len(location) == 1 else "")
return CLEAN_SPACES(location)
......@@ -122,7 +122,7 @@ class RecordConf(RecordPubli):
"""
# for list assume that the first item is the correct one
value = self._get(u"meeting_name", u"meeting")
value = self._get("meeting_name", "meeting")
value = (value[0] if isinstance(value, list) else value)
return CLEAN_SPACES(value)
......@@ -137,7 +137,7 @@ class RecordConf(RecordPubli):
location = self.conference_location()
if len(location) == 0:
return u""
return ""
return CLEAN_SPACES(location.split(",")[0])
......@@ -151,7 +151,7 @@ class RecordConf(RecordPubli):
is not defined.
"""
return self[u"meeting_note"][u"url"]
return self["meeting_note"]["url"]
def conference_year(self):
"""The year of the conference.
......@@ -166,4 +166,4 @@ class RecordConf(RecordPubli):
if match:
return match.group(1)
return u""
return ""
......@@ -82,7 +82,7 @@ class RecordInst(Record):
if the identifier is not defined.
"""
return self._get(u"corporate_note", u"future_identifier")
return self._get("corporate_note", "future_identifier")
def identifier(self):
"""Identifier of the institute.
......@@ -93,7 +93,7 @@ class RecordInst(Record):