base.py 4.34 KB
Newer Older
1 2 3
""" harvest_tools.base

"""
4
from .exception import ToolException
5 6
from plugin_dbui import get_id, UNDEF_ID

7

8
DRY_RUN = "dry run"
9 10

MSG_CRASH = "Crash: %s"
11 12 13
MSG_FIX_ORIGIN = "Fixed the origin field"
MSG_IN_DB = "Already in the database"
MSG_LOAD = "Load in the database"
14 15
MSG_NO_ENTRY = "Reject %s is not defined"
MSG_TOOMANY_SYNONYM = "Reject too many %s synonyms"
16 17 18


def family_name_fr(full_name):
19
    """Extract the family name when the full name is encoded as ``J. Doe``.
20

21
    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
22 23
        full_name (unicode):
            author name encoded according to French typographic rules.
24

25
    Returns:
LE GAC Renaud's avatar
LE GAC Renaud committed
26 27
        unicode:
            family name
28 29 30 31 32 33 34 35 36 37

    """
    return full_name[full_name.find(' ') + 1:]


def learn_my_authors(db,
                     authors=None,
                     id_project=None,
                     id_team=None,
                     year=None):
38 39 40 41 42 43 44 45 46
    """Train the rescue list of the authors of my institute.
    Authors which are not in the rescue list, are added.
    The rescue list is defined by the project, the team identifier and
    by the year.

    Warning:
        all keyword arguments have to be defined.

    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
47 48 49 50 51 52 53 54 55 56 57 58 59 60
        db (gluon.dal.DAL):
            database connection.

        authors (list):
            authors names

        id_project (int):
            the identifier of the project in the database.

        id_team (int):
            the identifier of the team in the database.

        year (int):
            the year
61 62 63 64 65 66 67 68

    """
    # get the list of authors store in the database
    row = db.my_authors(id_projects=id_project,
                        id_teams=id_team,
                        year=year)

    # no entry in the database
69 70 71 72 73
    if row is None:
        db.my_authors[None] = dict(authors=authors,
                                   id_projects=id_project,
                                   id_teams=id_team,
                                   year=year)
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
        return

    database_authors = row.authors.split(', ')

    # compare with the input list
    # and extract authors which are not in the db
    new = set(authors.split(', '))
    ref = set(database_authors)
    diff = new.difference(ref)

    # update the database
    if diff:

        # NOTE1: be careful with the string encoding
        # NOTE2: handle the case J. Foo and J. M. Foo are the same person
        elems = []
        for elem in diff:
91
            if isinstance(elem, str):
LE GAC Renaud's avatar
LE GAC Renaud committed
92
                family_name = elem[elem.rfind('. ') + 2:]
93 94 95 96 97 98 99 100 101

                if family_name not in row.authors:
                    elems.append(elem)

        database_authors.extend(elems)
        database_authors.sort(key=family_name_fr)
        db.my_authors[row.id] = dict(authors=', '.join(database_authors))


102 103 104 105 106 107
def search_synonym(table, fieldname, value, create=False):
    """Get the database identifier for the record having the database field
    or the synonyms field matching the value.

    Note:
        The database table must have a field name *synonyms*.
108
        It contains a list of strings.
109
    Args:
LE GAC Renaud's avatar
LE GAC Renaud committed
110 111 112 113 114 115 116 117 118 119 120
        table (gluon.DAL.Table):
            database table.

        fieldname (unicode):
            field of the database table identified by its name.

        value (unicode):
            value to be matched.

        create(bool):
            create a new entry in the database table when
121 122 123 124 125 126 127 128
            it is ``True``

    Returns:
        int:
            * the id of the database record.
            * UNDEF_ID if value is not defined.

    Raises:
LE GAC Renaud's avatar
LE GAC Renaud committed
129
        ToolException:
130 131
            * no synonym found and not allow to create a new one.
            * more than one synonym is found.
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146

    """
    if not value:
        return UNDEF_ID

    db = table._db

    kwargs = {}
    kwargs[fieldname] = value

    id_rec = get_id(table, **kwargs)
    if id_rec is not None:
        return id_rec

    # nothing found, have a look to the synonyms field
147
    query = table.synonyms.contains(value)
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
    setrows = db(query)

    # no synonym found, create the entry
    ncount = setrows.count()
    if ncount == 0:
        if create:
            return table.insert(**kwargs)
        else:
            msg = MSG_NO_ENTRY % table._tablename
            raise ToolException(msg)

    # one synonym found
    elif ncount == 1:
        return setrows.select(table.id).first().id

163
    # more than one synonyms - don't know what to choose
164 165 166
    else:
        msg = MSG_TOOMANY_SYNONYM % table._tablename
        raise ToolException(msg)