Commit c2aaa063 authored by LE GAC Renaud's avatar LE GAC Renaud
Browse files

Add the script fix-country.py to correct invalid country names.

parent eaa3d6dd
......@@ -3,268 +3,8 @@
List of countries extract from the geographical database www.geonames.org:
1) get the file countryInfo.txt
from http://download.geonames.org/export/dump/
2) open the file with libreoffice calc
and remove all columns but the country names
Extract in Nov. 2014
"""
from countries import COUNTRIES
countries = ['Andorra',
'United Arab Emirates',
'Afghanistan',
'Antigua and Barbuda',
'Anguilla',
'Albania',
'Armenia',
'Angola',
'Antarctica',
'Argentina',
'American Samoa',
'Austria',
'Australia',
'Aruba',
'Aland Islands',
'Azerbaijan',
'Bosnia and Herzegovina',
'Barbados',
'Bangladesh',
'Belgium',
'Burkina Faso',
'Bulgaria',
'Bahrain',
'Burundi',
'Benin',
'Saint Barthelemy',
'Bermuda',
'Brunei',
'Bolivia',
'Bonaire',
'Brazil',
'Bahamas',
'Bhutan',
'Bouvet Island',
'Botswana',
'Belarus',
'Belize',
'Canada',
'Cocos Islands',
'Democratic Republic of the Congo',
'Central African Republic',
'Republic of the Congo',
'Switzerland',
'Ivory Coast',
'Cook Islands',
'Chile',
'Cameroon',
'China',
'Colombia',
'Costa Rica',
'Cuba',
'Cape Verde',
'Curacao',
'Christmas Island',
'Cyprus',
'Czech Republic',
'Germany',
'Djibouti',
'Denmark',
'Dominica',
'Dominican Republic',
'Algeria',
'Ecuador',
'Estonia',
'Egypt',
'Western Sahara',
'Eritrea',
'Spain',
'Ethiopia',
'Finland',
'Fiji',
'Falkland Islands',
'Micronesia',
'Faroe Islands',
'France',
'Gabon',
'United Kingdom',
'Grenada',
'Georgia',
'French Guiana',
'Guernsey',
'Ghana',
'Gibraltar',
'Greenland',
'Gambia',
'Guinea',
'Guadeloupe',
'Equatorial Guinea',
'Greece',
'South Georgia and the South Sandwich Islands',
'Guatemala',
'Guam',
'Guinea-Bissau',
'Guyana',
'Hong Kong',
'Heard Island and McDonald Islands',
'Honduras',
'Croatia',
'Haiti',
'Hungary',
'Indonesia',
'Ireland',
'Israel',
'Isle of Man',
'India',
'British Indian Ocean Territory',
'Iraq',
'Iran',
'Iceland',
'Italy',
'Jersey',
'Jamaica',
'Jordan',
'Japan',
'Kenya',
'Kyrgyzstan',
'Cambodia',
'Kiribati',
'Comoros',
'Saint Kitts and Nevis',
'North Korea',
'South Korea',
'Kosovo',
'Kuwait',
'Cayman Islands',
'Kazakhstan',
'Laos',
'Lebanon',
'Saint Lucia',
'Liechtenstein',
'Sri Lanka',
'Liberia',
'Lesotho',
'Lithuania',
'Luxembourg',
'Latvia',
'Libya',
'Morocco',
'Monaco',
'Moldova',
'Montenegro',
'Saint Martin',
'Madagascar',
'Marshall Islands',
'Macedonia',
'Mali',
'Myanmar',
'Mongolia',
'Macao',
'Northern Mariana Islands',
'Martinique',
'Mauritania',
'Montserrat',
'Malta',
'Mauritius',
'Maldives',
'Malawi',
'Mexico',
'Malaysia',
'Mozambique',
'Namibia',
'New Caledonia',
'Niger',
'Norfolk Island',
'Nigeria',
'Nicaragua',
'Netherlands',
'Norway',
'Nepal',
'Nauru',
'Niue',
'New Zealand',
'Oman',
'Panama',
'Peru',
'French Polynesia',
'Papua New Guinea',
'Philippines',
'Pakistan',
'Poland',
'Saint Pierre and Miquelon',
'Pitcairn',
'Puerto Rico',
'Palestinian Territory',
'Portugal',
'Palau',
'Paraguay',
'Qatar',
'Reunion',
'Romania',
'Serbia',
'Russia',
'Rwanda',
'Saudi Arabia',
'Solomon Islands',
'Seychelles',
'Sudan',
'South Sudan',
'Sweden',
'Singapore',
'Saint Helena',
'Slovenia',
'Svalbard and Jan Mayen',
'Slovakia',
'Sierra Leone',
'San Marino',
'Senegal',
'Somalia',
'Suriname',
'Sao Tome and Principe',
'El Salvador',
'Sint Maarten',
'Syria',
'Swaziland',
'Turks and Caicos Islands',
'Chad',
'French Southern Territories',
'Togo',
'Thailand',
'Tajikistan',
'Tokelau',
'East Timor',
'Turkmenistan',
'Tunisia',
'Tonga',
'Turkey',
'Trinidad and Tobago',
'Tuvalu',
'Taiwan',
'Tanzania',
'Ukraine',
'Uganda',
'United States Minor Outlying Islands',
'United States',
'Uruguay',
'Uzbekistan',
'Vatican',
'Saint Vincent and the Grenadines',
'Venezuela',
'British Virgin Islands',
'U.S. Virgin Islands',
'Vietnam',
'Vanuatu',
'Wallis and Futuna',
'Samoa',
'Yemen',
'Mayotte',
'South Africa',
'Zambia',
'Zimbabwe',
'Serbia and Montenegro',
'Netherlands Antilles']
for el in countries:
for el in COUNTRIES:
dbui.get_create_id(db.countries, country=el)
# -*- coding: utf-8 -*-
""" countries
List of countries extract from the geographical database www.geonames.org:
1) get the file countryInfo.txt
from http://download.geonames.org/export/dump/
2) open the file with libreoffice calc
and remove all columns but the country names
Extract in Nov. 2014
"""
COUNTRIES = ['Andorra',
'United Arab Emirates',
'Afghanistan',
'Antigua and Barbuda',
'Anguilla',
'Albania',
'Armenia',
'Angola',
'Antarctica',
'Argentina',
'American Samoa',
'Austria',
'Australia',
'Aruba',
'Aland Islands',
'Azerbaijan',
'Bosnia and Herzegovina',
'Barbados',
'Bangladesh',
'Belgium',
'Burkina Faso',
'Bulgaria',
'Bahrain',
'Burundi',
'Benin',
'Saint Barthelemy',
'Bermuda',
'Brunei',
'Bolivia',
'Bonaire',
'Brazil',
'Bahamas',
'Bhutan',
'Bouvet Island',
'Botswana',
'Belarus',
'Belize',
'Canada',
'Cocos Islands',
'Democratic Republic of the Congo',
'Central African Republic',
'Republic of the Congo',
'Switzerland',
'Ivory Coast',
'Cook Islands',
'Chile',
'Cameroon',
'China',
'Colombia',
'Costa Rica',
'Cuba',
'Cape Verde',
'Curacao',
'Christmas Island',
'Cyprus',
'Czech Republic',
'Germany',
'Djibouti',
'Denmark',
'Dominica',
'Dominican Republic',
'Algeria',
'Ecuador',
'Estonia',
'Egypt',
'Western Sahara',
'Eritrea',
'Spain',
'Ethiopia',
'Finland',
'Fiji',
'Falkland Islands',
'Micronesia',
'Faroe Islands',
'France',
'Gabon',
'United Kingdom',
'Grenada',
'Georgia',
'French Guiana',
'Guernsey',
'Ghana',
'Gibraltar',
'Greenland',
'Gambia',
'Guinea',
'Guadeloupe',
'Equatorial Guinea',
'Greece',
'South Georgia and the South Sandwich Islands',
'Guatemala',
'Guam',
'Guinea-Bissau',
'Guyana',
'Hong Kong',
'Heard Island and McDonald Islands',
'Honduras',
'Croatia',
'Haiti',
'Hungary',
'Indonesia',
'Ireland',
'Israel',
'Isle of Man',
'India',
'British Indian Ocean Territory',
'Iraq',
'Iran',
'Iceland',
'Italy',
'Jersey',
'Jamaica',
'Jordan',
'Japan',
'Kenya',
'Kyrgyzstan',
'Cambodia',
'Kiribati',
'Comoros',
'Saint Kitts and Nevis',
'North Korea',
'South Korea',
'Kosovo',
'Kuwait',
'Cayman Islands',
'Kazakhstan',
'Laos',
'Lebanon',
'Saint Lucia',
'Liechtenstein',
'Sri Lanka',
'Liberia',
'Lesotho',
'Lithuania',
'Luxembourg',
'Latvia',
'Libya',
'Morocco',
'Monaco',
'Moldova',
'Montenegro',
'Saint Martin',
'Madagascar',
'Marshall Islands',
'Macedonia',
'Mali',
'Myanmar',
'Mongolia',
'Macao',
'Northern Mariana Islands',
'Martinique',
'Mauritania',
'Montserrat',
'Malta',
'Mauritius',
'Maldives',
'Malawi',
'Mexico',
'Malaysia',
'Mozambique',
'Namibia',
'New Caledonia',
'Niger',
'Norfolk Island',
'Nigeria',
'Nicaragua',
'Netherlands',
'Norway',
'Nepal',
'Nauru',
'Niue',
'New Zealand',
'Oman',
'Panama',
'Peru',
'French Polynesia',
'Papua New Guinea',
'Philippines',
'Pakistan',
'Poland',
'Saint Pierre and Miquelon',
'Pitcairn',
'Puerto Rico',
'Palestinian Territory',
'Portugal',
'Palau',
'Paraguay',
'Qatar',
'Reunion',
'Romania',
'Serbia',
'Russia',
'Rwanda',
'Saudi Arabia',
'Solomon Islands',
'Seychelles',
'Sudan',
'South Sudan',
'Sweden',
'Singapore',
'Saint Helena',
'Slovenia',
'Svalbard and Jan Mayen',
'Slovakia',
'Sierra Leone',
'San Marino',
'Senegal',
'Somalia',
'Suriname',
'Sao Tome and Principe',
'El Salvador',
'Sint Maarten',
'Syria',
'Swaziland',
'Turks and Caicos Islands',
'Chad',
'French Southern Territories',
'Togo',
'Thailand',
'Tajikistan',
'Tokelau',
'East Timor',
'Turkmenistan',
'Tunisia',
'Tonga',
'Turkey',
'Trinidad and Tobago',
'Tuvalu',
'Taiwan',
'Tanzania',
'Ukraine',
'Uganda',
'United States Minor Outlying Islands',
'United States',
'Uruguay',
'Uzbekistan',
'Vatican',
'Saint Vincent and the Grenadines',
'Venezuela',
'British Virgin Islands',
'U.S. Virgin Islands',
'Vietnam',
'Vanuatu',
'Wallis and Futuna',
'Samoa',
'Yemen',
'Mayotte',
'South Africa',
'Zambia',
'Zimbabwe',
'Serbia and Montenegro',
'Netherlands Antilles']
# -*- coding: utf-8 -*-
""" NAME
fix-country -- fix invalid country names
SYNOPSIS
fix-country [options]
DESCRIPTION
Before the track_publications 0.8.8, the name of the country
for a conference is defined by the user or by harvesters.
As the result, the database contains a mixture of French and
English name for country. In addition, some value are wrong.
Starting with version 0.8.8, the database is populated with
a list of countries, in English, coming from a geographical
database. A standard user or an harvesters can no longer add
a country in the database.
The aim of this script, is to replace invalid name by the
correct one and to remove bad ones.
At the end of this process, the database should only contains
official country names.
OPTIONS
EXAMPLE
> cd web2py
> python web2py.py --shell track_publications -M --run fix-country.py
AUTHOR
R. Le Gac
"""
from callbacks import INHIBIT_PUBLICATION_UPDATE_ON_OK
from countries import COUNTRIES
from plugin_dbui import get_id, UNDEF_ID
# unlock the publications update when the status is OK
db.publications._before_update.remove(INHIBIT_PUBLICATION_UPDATE_ON_OK)
# scan the database to find invalid countries
for row in db(db.countries).select():
if row.country in COUNTRIES or row.id == UNDEF_ID:
continue
# replacement value for the country
old_country = row.country
new_country = raw_input("\nReplacement for '%s' [to skip CR]: " % old_country)
if not new_country:
continue
# is the new value valid ?
id_old = get_id(db.countries, country=old_country)
id_new = get_id(db.countries, country=new_country)
if not id_new:
continue
print "%s will be replaced by %s" % (old_country, new_country)
rep = raw_input("Ok to continue [y/N]: ")
if rep != "y":
continue
# modify publications
for row in db(db.publications.id_countries==id_old).select():
print " - ", row.id, row.title
db(db.publications.id==row.id).update(id_countries=id_new)
db.commit()
# delete the old value
db(db.countries.id==id_old).delete()
db.commit()
\ No newline at end of file
......@@ -2,7 +2,7 @@
HEAD
- Require plugin_dbui 0.6.1.12 or later version.
- Require plugin_dbui 0.6.1.13 or later version.
Backward compatibility is broken (report interfaces, country default, ...).
The table axes, lists and metrics have to be recreated.
- Consolidate harvesters software.
......@@ -16,6 +16,8 @@ HEAD
- Refactor the ux files to ease the buiding of a new database from scratch.
- Use the new callbacks INHIBIT_DELETE_UNDEF and INHIBIT_UPDATE_UNDEF.
- Redesing the lists and metrics interfaces.
- The list of country is almost frozen. Add the script fix-country.py
to correct invalid country name in the exiting database.
0.8.7.2 (Sep 2014)
- Migrate to plugin_dbui 0.6.1.7.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment