Commit 09e09e72 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Merge branch '52-request-instead-urlib' into 'master'

Resolve "Use the python module request instead of urlib"

Closes #11 and #52

See merge request !87
parents 7d155814 80ec86f1
......@@ -13,6 +13,7 @@ databases
databases*/
errors/
private/
prof/
sessions/
uploads/
models/plugin_dbui.py
......
......@@ -530,7 +530,9 @@
'Registration successful': 'Registration successful',
'Regular expression defining the name of our institute.': 'Expression régulière definissant le nom de votre laboratoire.',
'Reject': 'Rejeter',
'Reject %s is not defined': "Rejeté %s n'est pas défini",
'Reject article is not published': "Rejeté l'article n'est pas publié",
'Reject collaboration is not defined': "Rejeté la collaboration n'est pas définie",
'Reject collaboration is not well formed': 'Rejeté la collaboration est mal formatté',
'Reject collaborations is not defined': "Rejeté la collaboration n'est pas définie",
'Reject conference dates is not well formed': 'Rejecté les dates de la conférence dates sont mal formatté',
......@@ -543,6 +545,7 @@
'Reject no author(s)': "Rejeté pas d'autheur(s)",
'Reject no authors': "Rejeté pas d'auteurs",
'Reject no authors of my institute': "Rejeté pas d'auteurs de mon laboratoire",
'Reject no conference date': 'Rejeté pas de dates pour la conférence',
'Reject no conference information': "Rejeté pas d'information sur la conférence",
'Reject no CPPM authors': "Rejeté pas d'auteurs du CPPM",
'Reject no OAI identifier': "Rejeté pas d'identifiant OAI",
......@@ -562,6 +565,7 @@
'Reject to many first author': 'Rejeté trop de premier autheur',
'Reject to many submit date': 'Rejeté plusieurs date de soumission',
'Reject to many year': 'Rejeté plusieurs année',
'Reject too many %s synonyms': 'Rejeté trop de synonymes %s',
'Reject too many collaborations synonyms': 'Rejeté synonyme de collaboration défini plusieurs fois',
'Reject too many countries synonyms': 'Rejeté synonyme de pays défini plusieurs fois',
'Reject too many publishers synonyms': 'Rejeté synonyme de revue défini plusieurs fois',
......
......@@ -5,8 +5,8 @@
import httplib
import json
import re
import requests
import time
import urllib
from exception import CdsException
......@@ -50,11 +50,19 @@ class InvenioStore(object):
self._url = None
self._try = 0
def interogate(self, url):
# start a session, a persistent connection with the server
self._session = requests.Session()
def __del__(self):
# close the session
self._session.close()
def interogate(self, url, params=None):
"""Interrogate the store using the *URL*.
Args:
url (unicode): URL string
params (dict): parameters to be send with the URL
Returns:
unicode: the HTTP response
......@@ -67,10 +75,9 @@ class InvenioStore(object):
self._url = url
self._try += 1
fi = urllib.urlopen(url)
code = fi.getcode()
data = fi.read()
r = self._session.get(url, params=params)
code = r.status_code
data = r.content
# the server is busy or return error wait one minute an retry.
# the number of trial is limited to 5
......@@ -166,10 +173,9 @@ class InvenioStore(object):
self._try = 0
kwargs["jrec"] += N_IDS
params = urllib.urlencode(kwargs)
url = "http://%s/search?%s" % (self._host, params)
url = "http://%s/search" % self._host
rep = self.interogate(url)
rep = self.interogate(url, params=kwargs)
# check that the list of ids is well form
# [1291068, 1352722, 1376692, 1454870, 1492807] or [1493820] or []
......@@ -397,10 +403,9 @@ class InvenioStore(object):
kwargs["action_search"] = "Search"
params = urllib.urlencode(kwargs)
url = "http://%s/search?%s" % (self._host, params)
url = "http://%s/search" % self._host
return self.interogate(url)
return self.interogate(url, params=kwargs)
def search_year(self, collection, year, of="xm", rg=10, so="d"):
"""Search records for given *collection* and for a given *year*.
......
# -*- coding: utf-8 -*-
"""test_all_harvester
"""
import pytest
from gluon import current
from harvest_tools.articles import (
MSG_NO_EDITOR,
MSG_TRANSFORM_PREPRINT)
from harvest_tools.base import (
MSG_FIX_ORIGIN,
MSG_IN_DB,
MSG_LOAD,
MSG_NO_ENTRY,
MSG_TOOMANY_SYNONYM)
from harvest_tools.checkandfix import (
MSG_NO_AUTHOR,
MSG_NO_CONF_DATE,
MSG_NO_DATE,
MSG_NO_MY_AUTHOR,
MSG_NO_REF,
MSG_NO_YEAR,
MSG_TEMPORARY_RECORD,
MSG_TO_MANY_DATE,
MSG_TO_MANY_FAUTHOR,
MSG_TO_MANY_YEAR,
MSG_WELL_FORMED_CONF_DATES,
MSG_WELL_FORMED_DATE,
MSG_WELL_FORMED_EDITOR)
from harvest_tools.factory import build_harvester_tool
from harvest_tools.preprints import (
MSG_PREPRINT_IS_PAPER,
MSG_PREPRINT_IS_CONFERENCE,
MSG_PREPRINT_IS_THESIS,
MSG_PREPRINT_NO_NUMBER)
from harvest_tools.reports import MSG_REPORT_NO_NUMBER
from harvest_tools.thesis import MSG_NO_THESIS
from invenio_tools.base import MSG_NO_CONF, MSG_NO_PUBLISHER
@pytest.fixture(scope="module")
def messages():
T = current.T
set_msgs = {
T(MSG_NO_EDITOR),
T(MSG_TRANSFORM_PREPRINT),
T(MSG_FIX_ORIGIN),
T(MSG_IN_DB),
T(MSG_LOAD),
T(MSG_NO_ENTRY % "collaborations"),
T(MSG_NO_ENTRY % "countries"),
T(MSG_NO_ENTRY % "publishers"),
T(MSG_TOOMANY_SYNONYM),
T(MSG_NO_AUTHOR),
T(MSG_NO_CONF),
T(MSG_NO_CONF_DATE),
T(MSG_NO_DATE),
T(MSG_NO_MY_AUTHOR),
T(MSG_NO_PUBLISHER),
T(MSG_NO_REF),
T(MSG_NO_THESIS),
T(MSG_NO_YEAR),
T(MSG_PREPRINT_IS_PAPER),
T(MSG_PREPRINT_IS_CONFERENCE),
T(MSG_PREPRINT_IS_THESIS),
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_REPORT_NO_NUMBER),
T(MSG_TEMPORARY_RECORD),
T(MSG_TO_MANY_DATE),
T(MSG_TO_MANY_FAUTHOR),
T(MSG_TO_MANY_YEAR),
T(MSG_WELL_FORMED_CONF_DATES),
T(MSG_WELL_FORMED_DATE),
T(MSG_WELL_FORMED_EDITOR)}
return set_msgs
def test_astro_gamma(messages):
"""Test all harvesters for the astro-gamma team.
"""
# Parameters
# Select the current year in order to test different case
db = current.db
id_team = 2
year = current.request.now.year
# get the list of harvester
query = db.harvesters.id_teams == id_team
# process
for harvester in db(query).iterselect():
tool = build_harvester_tool(
db,
harvester.id_teams,
harvester.id_projects,
harvester.controller,
harvester.id_categories,
year_start=str(year),
year_end="",
dry_run=True,
debug=False)
tool.process_url(harvester.host, harvester.collections)
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(messages)
def test_atlas_all(messages):
"""Test all harvesters for the Atlas team.
"""
# Parameters
# Select the current year in order to test different case
db = current.db
id_team = 3
year = current.request.now.year
# get the list of harvester
query = db.harvesters.id_teams == id_team
# process
for harvester in db(query).iterselect():
tool = build_harvester_tool(
db,
harvester.id_teams,
harvester.id_projects,
harvester.controller,
harvester.id_categories,
year_start=str(year),
year_end="",
dry_run=True,
debug=False)
tool.process_url(harvester.host, harvester.collections)
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(messages)
def test_lhcb_all(messages):
"""Test all harvesters for the LHCb team.
"""
# Parameters
# Select the current year in order to test different case
db = current.db
id_team = 7
year = current.request.now.year
# get the list of harvester
query = db.harvesters.id_teams == id_team
# process
for harvester in db(query).iterselect():
tool = build_harvester_tool(
db,
harvester.id_teams,
harvester.id_projects,
harvester.controller,
harvester.id_categories,
year_start=str(year),
year_end="",
dry_run=True,
debug=False)
tool.process_url(harvester.host, harvester.collections)
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(messages)
# -*- coding: utf-8 -*-
"""test_single_harvester
"""
from gluon import current
import pytest
from harvest_tools.articles import (
Articles,
MSG_NO_EDITOR,
MSG_TRANSFORM_PREPRINT)
from harvest_tools.base import (
MSG_FIX_ORIGIN,
MSG_IN_DB,
MSG_LOAD,
MSG_NO_ENTRY,
MSG_TOOMANY_SYNONYM)
from harvest_tools.checkandfix import (
MSG_NO_AUTHOR,
MSG_NO_CONF_DATE,
MSG_NO_DATE,
MSG_NO_MY_AUTHOR,
MSG_NO_REF,
MSG_NO_YEAR,
MSG_TEMPORARY_RECORD,
MSG_TO_MANY_DATE,
MSG_TO_MANY_FAUTHOR,
MSG_TO_MANY_YEAR,
MSG_WELL_FORMED_CONF_DATES,
MSG_WELL_FORMED_DATE,
MSG_WELL_FORMED_EDITOR)
from harvest_tools.factory import build_harvester_tool
from harvest_tools.preprints import (
MSG_PREPRINT_IS_PAPER,
MSG_PREPRINT_IS_CONFERENCE,
MSG_PREPRINT_IS_THESIS,
MSG_PREPRINT_NO_NUMBER)
from harvest_tools.reports import MSG_REPORT_NO_NUMBER
from harvest_tools.thesis import MSG_NO_THESIS
from invenio_tools.base import MSG_NO_CONF, MSG_NO_PUBLISHER
@pytest.fixture(scope="module")
def messages():
T = current.T
set_msgs = {
T(MSG_NO_EDITOR),
T(MSG_TRANSFORM_PREPRINT),
T(MSG_FIX_ORIGIN),
T(MSG_IN_DB),
T(MSG_LOAD),
T(MSG_NO_ENTRY % "collaborations"),
T(MSG_NO_ENTRY % "countries"),
T(MSG_NO_ENTRY % "publishers"),
T(MSG_TOOMANY_SYNONYM),
T(MSG_NO_AUTHOR),
T(MSG_NO_CONF),
T(MSG_NO_CONF_DATE),
T(MSG_NO_DATE),
T(MSG_NO_MY_AUTHOR),
T(MSG_NO_PUBLISHER),
T(MSG_NO_REF),
T(MSG_NO_THESIS),
T(MSG_NO_YEAR),
T(MSG_PREPRINT_IS_PAPER),
T(MSG_PREPRINT_IS_CONFERENCE),
T(MSG_PREPRINT_IS_THESIS),
T(MSG_PREPRINT_NO_NUMBER),
T(MSG_REPORT_NO_NUMBER),
T(MSG_TEMPORARY_RECORD),
T(MSG_TO_MANY_DATE),
T(MSG_TO_MANY_FAUTHOR),
T(MSG_TO_MANY_YEAR),
T(MSG_WELL_FORMED_CONF_DATES),
T(MSG_WELL_FORMED_DATE),
T(MSG_WELL_FORMED_EDITOR)}
return set_msgs
def test_lhcb_acl(messages):
"""Harvest LHCb article for a given year.
This test is useful to:
* debug an harvester
* profile its performance to see where the time is spent.
* compare different implementation to measure improvements.
* ...
"""
# These parameter only make sense if you are inserting record in database
# Select the current year in order to test different case
db = current.db
id_team = 7 # LHCb
id_project = 8 # LHCb
id_category = 2 # ACL
year = current.request.now.year
# build the harvester
tool = build_harvester_tool(
db,
id_team,
id_project,
"articles",
id_category,
year_start=str(year),
year_end="",
dry_run=True,
debug=False)
assert isinstance(tool, Articles)
# run the harvester
tool.process_url("cds.cern.ch", "LHCb Papers")
# analyse the log
# Number of article cannot be check since it evolve within a year
# Only test that there are no unexpected messages
msgs = set([el.txt for el in tool.logs])
assert msgs.issubset(messages)
# -*- coding: utf-8 -*-
"""test_inveniostore
"""
from invenio_tools.inveniostore import InvenioStore
def test_get_ids():
"""Check the list of record ids for LHCb articles published in 2015.
"""
store = InvenioStore("cds.cern.ch")
kwargs = {
"f1": "year",
"p1": "2015",
"cc": "LHCb Papers",
"m1": "r",
"so": "d",
"sf": "year"}
rec_ids = store.get_ids(**kwargs)
rec_ids.sort()
ref_2015_ids = [
1750838, 1755550, 1951383, 1951424, 1955544, 1966993, 1967222,
1967422, 1968989, 1969197, 1970675, 1970690, 1972201, 1975522,
1975714, 1978281, 1978798, 1981106, 1983198, 1987883, 1996441,
2000543, 2002385, 2003252, 2003792, 2003793, 2003794, 2004586,
2004591, 2005510, 2007377, 2011387, 2012165, 2012990, 2014715,
2014733, 2014836, 2016239, 2016711, 2019534, 2019536, 2020686,
2021262, 2029609, 2029820, 2030417, 2033887, 2033891, 2038937,
2040342, 2045144, 2047219, 2048426, 2048427, 2048812, 2049870,
2055598, 2057916, 2059561, 2060452]
assert len(rec_ids) == 60
assert rec_ids == ref_2015_ids
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment