Commit c88a8953 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

New script to update the origin field in existing databases.

parent f21f3064
# -*- coding: utf-8 -*-
""" NAME
fix-origin-0815 -- OAI identifier for multiple stores
SYNOPSIS
fix-origin-0815 [options]
DESCRIPTION
Before the track_publications 0.8.15, the name origin field
contains the OAI identifier of the store.
The same publication can be found in the cds and in the inspirehep
store. In each case, the record has an OAI identifier which depends
on the store.
Up to version 0.8.14, the origin field contains the OAI identifier
of the store. Starting with version 0.8.15, the origin field
contains the OAI identifier for both store.
This script reconstruct the origin field.
OPTIONS
EXAMPLE
> cd ... /track_publications/scripts
> ./run script fix-origin-0815.py
> ./run script -S track_publications_cppm fix-origin-0815.py
AUTHOR
R. Le Gac -- Oct 2015
"""
if __name__ == "__main__":
import sys
import re
from callbacks import INHIBIT_PUBLICATION_UPDATE_ON_OK
from invenio_tools import load_record, Marc12Exception
from plugin_dbui import get_id, UNDEF_ID
REG_OAI_URL = re.compile(r"http[s]?://([\w\.]+)/record/(\d+)")
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the publications table
query = db.publications.origin.len() > 0
for row in db(query).select():
match = REG_OAI_URL.match(row.origin)
host = match.group(1)
rec_id = match.group(2)
try:
record = load_record(host, rec_id)
except Marc12Exception:
continue
val = record.oai_url()
if not val:
continue
print row.id, row.origin, "-->", val
db(db.publications.id==row.id).update(origin=val)
# commit change
rep = raw_input("Commit database changes [y/N]: ")
if rep == 'y':
db.commit()
# exit gently
sys.exit(0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment