1
2 """ATC/DDD handling code.
3
4 http://who.no
5
6 license: GPL v2 or later
7 """
8
9 __version__ = "$Revision: 1.7 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11
12 import sys, codecs, logging, csv, re as regex, os.path
13
14
15 if __name__ == '__main__':
16 sys.path.insert(0, '../../')
17 from Gnumed.pycommon import gmPG2, gmTools, gmCfg2
18
19
20 _log = logging.getLogger('gm.atc')
21 _log.info(__version__)
22
23 _cfg = gmCfg2.gmCfgData()
24
26
27 _log.debug('substance <%s>, ATC <%s>', substance, atc)
28
29 if atc is not None:
30 if atc.strip() == u'':
31 atc = None
32
33 if atc is None:
34 atcs = text2atc(text = substance, fuzzy = False)
35 if len(atcs) == 0:
36 _log.debug(u'no ATC found, aborting')
37 return atc
38 if len(atcs) > 1:
39 _log.debug(u'non-unique ATC mapping, aborting')
40 return atc
41 atc = atcs[0][0].strip()
42
43 args = {'atc': atc, 'term': substance.strip()}
44 queries = [
45 {'cmd': u"UPDATE ref.consumable_substance SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL",
46 'args': args},
47 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL",
48 'args': args}
49 ]
50 gmPG2.run_rw_queries(queries = queries)
51
52 return atc
53
54 -def text2atc(text=None, fuzzy=False):
55
56 text = text.strip()
57
58 if fuzzy:
59 args = {'term': u'%%%s%%' % text}
60 cmd = u"""
61 SELECT DISTINCT ON (atc_code) *
62 FROM (
63 SELECT atc as atc_code, is_group_code, pk_data_source
64 FROM ref.v_atc
65 WHERE term ilike %(term)s AND atc IS NOT NULL
66 UNION
67 SELECT atc_code, null, null
68 FROM ref.consumable_substance
69 WHERE description ilike %(term)s AND atc_code IS NOT NULL
70 UNION
71 SELECT atc_code, null, null
72 FROM ref.branded_drug
73 WHERE description ilike %(term)s AND atc_code IS NOT NULL
74 ) as tmp
75 ORDER BY atc_code
76 """
77 else:
78 args = {'term': text.lower()}
79 cmd = u"""
80 SELECT DISTINCT ON (atc_code) *
81 FROM (
82 SELECT atc as atc_code, is_group_code, pk_data_source
83 FROM ref.v_atc
84 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL
85 UNION
86 SELECT atc_code, null, null
87 FROM ref.consumable_substance
88 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL
89 UNION
90 SELECT atc_code, null, null
91 FROM ref.branded_drug
92 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL
93 ) as tmp
94 ORDER BY atc_code
95 """
96
97 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
98
99 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy)
100
101 return rows
102
104 cmd = u"""
105 SELECT DISTINCT ON (code) ddd, unit
106 FROM ref.atc
107 WHERE
108 code = %(atc)s
109 AND
110 ddd IS NOT NULL
111 """
112 args = {'atc': atc.strip()}
113 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
114
115 _log.debug(u'ATC: %s => DDD: %s', atc, rows)
116
117 return rows
118
120 cmd = u'SELECT * FROM ref.v_atc ORDER BY %s' % order_by
121 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False)
122 return rows
123
125
126
127 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')
128
129 data_fname = os.path.join (
130 os.path.dirname(cfg_fname),
131 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
132 )
133 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
134 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
135 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
136 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
137 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
138 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])
139
140 _cfg.remove_source(source = 'atc')
141
142 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)
143
144 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
145
146
147 queries = [
148 {
149 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
150 'args': args
151 }, {
152 'cmd': u"""
153 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
154 %(name_long)s,
155 %(name_short)s,
156 %(ver)s,
157 %(desc)s,
158 %(lang)s,
159 %(url)s
160 )""",
161 'args': args
162 }, {
163 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
164 'args': args
165 }
166 ]
167 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
168 data_src_pk = rows[0][0]
169 _log.debug('ATC data source record created, pk is #%s', data_src_pk)
170
171
172 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
173 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')
174
175
176 curs = conn.cursor()
177 cmd = u"""delete from ref.atc_staging"""
178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
179 curs.close()
180 conn.commit()
181 _log.debug('ATC staging table emptied')
182
183
184 curs = conn.cursor()
185 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s, %s)"""
186 first = False
187 for atc_line in atc_reader:
188
189 if not first:
190 first = True
191 continue
192
193 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == u'':
194 continue
195
196 comment = u''
197 ddd_val = u''
198 unit = u''
199 adro = u''
200
201
202 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
203 ddd_val, unit, adro = regex.split('\s', atc_line[4])
204
205 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
206 ddd_val, unit, adro, comment = regex.split('\s', atc_line[4], 3)
207
208 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]):
209 ddd_val, unit, adro = regex.split('\s', atc_line[4])
210
211 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]):
212 ddd_val, unit, adro, comment = regex.split('\s', atc_line[4], 3)
213
214 else:
215 comment = atc_line[4]
216
217 args = [
218 atc_line[0].strip(),
219 atc_line[2],
220 ddd_val.replace(',', '.'),
221 unit,
222 adro,
223 comment
224 ]
225
226 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
227
228 curs.close()
229 conn.commit()
230 csv_file.close()
231 _log.debug('ATC staging table loaded')
232
233
234 curs = conn.cursor()
235 args = {'src_pk': data_src_pk}
236 cmd = u"""
237 insert into ref.atc (
238 fk_data_source,
239 code,
240 term,
241 comment,
242 ddd,
243 unit,
244 administration_route
245 ) select
246 %(src_pk)s,
247 atc,
248 name,
249 nullif(comment, ''),
250 nullif(ddd, '')::numeric,
251 nullif(unit, ''),
252 nullif(adro, '')
253
254 from
255 ref.atc_staging
256 """
257
258 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
259
260 curs.close()
261 conn.commit()
262 _log.debug('transfer from ATC staging table to real ATC table done')
263
264
265 curs = conn.cursor()
266 cmd = u"""delete from ref.atc_staging"""
267 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
268 curs.close()
269 conn.commit()
270 _log.debug('ATC staging table emptied')
271
272 return True
273
274
275
276 if __name__ == "__main__":
277
278 if len(sys.argv) == 1:
279 sys.exit()
280
281 if sys.argv[1] != 'test':
282 sys.exit()
283
284 from Gnumed.pycommon import gmLog2
285 from Gnumed.pycommon import gmI18N
286
287 gmI18N.activate_locale()
288
289
290
293
295 print 'searching ATC code for:', sys.argv[2]
296 print ' ', text2atc(sys.argv[2])
297 print ' ', text2atc(sys.argv[2], True)
298
300 print "searching for DDD on ATC:", sys.argv[2]
301 print atc2ddd(atc = sys.argv[2])
302
307
308
309
310 test_atc2ddd()
311
312
313
314