Django

Code

PolishLocalflavor: terc.py

File terc.py, 2.0 kB (added by Piotr Lewandowski <django@icomputing.pl>, 5 months ago)

+ terc.py

Line 
1 #!/usr/bin/python2.5
2 # encoding=UTF-8
3 '''
4     Usage: terc.py <TERC.xml
5
6     TERC.xml can be found at http://www.stat.gov.pl/
7 '''
8 # Copyright © 2008
9 #   Piotr Lewandowski <piotr.lewandowski+django@gmail.com>,
10 #
11 # This program is free software; you can redistribute it and/or modify it
12 # under the terms of the GNU General Public License, version 2, as
13 # published by the Free Software Foundation.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19
20 import re
21 import sys
22 import xml.etree.cElementTree as etree
23
24 def parse_TERC(stream):
25     for event, element in etree.iterparse(stream):
26         if element.tag != 'row':
27             continue
28         item = dict((child.get('name'), child.text) for child in element)
29         yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip()
30        
31 CITY_PREFIX_RE = r'[Mm][.]( st[.])?'
32 PROVINCE_RE = re.compile(r'^WOJ[.] ')
33 COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE)
34 COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE)
35
36 DATASETS = {
37     2: ('provinces', {},
38         lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower()
39     ),
40     4: ('counties', {},
41         lambda c, n: "u'%s'" % COUNTY_RE.sub('', n)
42     ),
43     7: ('communes', {},
44         lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None)
45     ),
46 }
47
48 if __name__ == '__main__':
49     for code, name in parse_TERC(sys.stdin):
50         index = len(code)
51         _, dict_, clean_name = DATASETS[index]
52         name = clean_name(code, name.replace("'", "\\'"))
53         if name:
54             dict_[code] = name
55    
56     for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()):
57         print '%s = {' % ('PL_' + dict_name.upper())
58         for code, name in sorted(dict_.iteritems()):
59             print "    %r: %s," % (code, name.encode('UTF-8'))
60         print '}\n'
61
62 # vim:et ts=4 sw=4