| 1 |
#!/usr/bin/python2.5 |
|---|
| 2 |
# encoding=UTF-8 |
|---|
| 3 |
''' |
|---|
| 4 |
Usage: terc.py <TERC.xml |
|---|
| 5 |
|
|---|
| 6 |
TERC.xml can be found at http://www.stat.gov.pl/ |
|---|
| 7 |
''' |
|---|
| 8 |
# Copyright © 2008 |
|---|
| 9 |
# Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, |
|---|
| 10 |
# |
|---|
| 11 |
# This program is free software; you can redistribute it and/or modify it |
|---|
| 12 |
# under the terms of the GNU General Public License, version 2, as |
|---|
| 13 |
# published by the Free Software Foundation. |
|---|
| 14 |
# |
|---|
| 15 |
# This program is distributed in the hope that it will be useful, |
|---|
| 16 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 17 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 18 |
# GNU General Public License for more details. |
|---|
| 19 |
|
|---|
| 20 |
import re |
|---|
| 21 |
import sys |
|---|
| 22 |
import xml.etree.cElementTree as etree |
|---|
| 23 |
|
|---|
| 24 |
def parse_TERC(stream): |
|---|
| 25 |
for event, element in etree.iterparse(stream): |
|---|
| 26 |
if element.tag != 'row': |
|---|
| 27 |
continue |
|---|
| 28 |
item = dict((child.get('name'), child.text) for child in element) |
|---|
| 29 |
yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() |
|---|
| 30 |
|
|---|
| 31 |
CITY_PREFIX_RE = r'[Mm][.]( st[.])?' |
|---|
| 32 |
PROVINCE_RE = re.compile(r'^WOJ[.] ') |
|---|
| 33 |
COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) |
|---|
| 34 |
COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) |
|---|
| 35 |
|
|---|
| 36 |
DATASETS = { |
|---|
| 37 |
2: ('provinces', {}, |
|---|
| 38 |
lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() |
|---|
| 39 |
), |
|---|
| 40 |
4: ('counties', {}, |
|---|
| 41 |
lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) |
|---|
| 42 |
), |
|---|
| 43 |
7: ('communes', {}, |
|---|
| 44 |
lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) |
|---|
| 45 |
), |
|---|
| 46 |
} |
|---|
| 47 |
|
|---|
| 48 |
if __name__ == '__main__': |
|---|
| 49 |
for code, name in parse_TERC(sys.stdin): |
|---|
| 50 |
index = len(code) |
|---|
| 51 |
_, dict_, clean_name = DATASETS[index] |
|---|
| 52 |
name = clean_name(code, name.replace("'", "\\'")) |
|---|
| 53 |
if name: |
|---|
| 54 |
dict_[code] = name |
|---|
| 55 |
|
|---|
| 56 |
for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): |
|---|
| 57 |
print '%s = {' % ('PL_' + dict_name.upper()) |
|---|
| 58 |
for code, name in sorted(dict_.iteritems()): |
|---|
| 59 |
print " %r: %s," % (code, name.encode('UTF-8')) |
|---|
| 60 |
print '}\n' |
|---|
| 61 |
|
|---|
| 62 |
# vim:et ts=4 sw=4 |
|---|