Package Gnumed :: Package business :: Module gmATC
[frames] | no frames]

Source Code for Module Gnumed.business.gmATC

  1  # -*- coding: utf8 -*- 
  2  """ATC/DDD handling code. 
  3   
  4  http://who.no 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __version__ = "$Revision: 1.7 $" 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11   
 12  import sys, codecs, logging, csv, re as regex, os.path 
 13   
 14   
 15  if __name__ == '__main__': 
 16          sys.path.insert(0, '../../') 
 17  from Gnumed.pycommon import gmPG2, gmTools, gmCfg2 
 18   
 19   
 20  _log = logging.getLogger('gm.atc') 
 21  _log.info(__version__) 
 22   
 23  _cfg = gmCfg2.gmCfgData() 
 24  #============================================================ 
25 -def propagate_atc(substance=None, atc=None):
26 27 _log.debug('substance <%s>, ATC <%s>', substance, atc) 28 29 if atc is not None: 30 if atc.strip() == u'': 31 atc = None 32 33 if atc is None: 34 atcs = text2atc(text = substance, fuzzy = False) 35 if len(atcs) == 0: 36 _log.debug(u'no ATC found, aborting') 37 return atc 38 if len(atcs) > 1: 39 _log.debug(u'non-unique ATC mapping, aborting') 40 return atc 41 atc = atcs[0][0].strip() 42 43 args = {'atc': atc, 'term': substance.strip()} 44 queries = [ 45 {'cmd': u"UPDATE ref.consumable_substance SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 46 'args': args}, 47 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE lower(description) = lower(%(term)s) AND atc_code IS NULL", 48 'args': args} 49 ] 50 gmPG2.run_rw_queries(queries = queries) 51 52 return atc
53 #============================================================
54 -def text2atc(text=None, fuzzy=False):
55 56 text = text.strip() 57 58 if fuzzy: 59 args = {'term': u'%%%s%%' % text} 60 cmd = u""" 61 SELECT DISTINCT ON (atc_code) * 62 FROM ( 63 SELECT atc as atc_code, is_group_code, pk_data_source 64 FROM ref.v_atc 65 WHERE term ilike %(term)s AND atc IS NOT NULL 66 UNION 67 SELECT atc_code, null, null 68 FROM ref.consumable_substance 69 WHERE description ilike %(term)s AND atc_code IS NOT NULL 70 UNION 71 SELECT atc_code, null, null 72 FROM ref.branded_drug 73 WHERE description ilike %(term)s AND atc_code IS NOT NULL 74 ) as tmp 75 ORDER BY atc_code 76 """ 77 else: 78 args = {'term': text.lower()} 79 cmd = u""" 80 SELECT DISTINCT ON (atc_code) * 81 FROM ( 82 SELECT atc as atc_code, is_group_code, pk_data_source 83 FROM ref.v_atc 84 WHERE lower(term) = lower(%(term)s) AND atc IS NOT NULL 85 UNION 86 SELECT atc_code, null, null 87 FROM ref.consumable_substance 88 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 89 UNION 90 SELECT atc_code, null, null 91 FROM ref.branded_drug 92 WHERE lower(description) = lower(%(term)s) AND atc_code IS NOT NULL 93 ) as tmp 94 ORDER BY atc_code 95 """ 96 97 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 98 99 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy) 100 101 return rows
102 #============================================================
103 -def atc2ddd(atc=None):
104 cmd = u""" 105 SELECT DISTINCT ON (code) ddd, unit 106 FROM ref.atc 107 WHERE 108 code = %(atc)s 109 AND 110 ddd IS NOT NULL 111 """ 112 args = {'atc': atc.strip()} 113 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 114 115 _log.debug(u'ATC: %s => DDD: %s', atc, rows) 116 117 return rows
118 #============================================================
119 -def get_reference_atcs(order_by=u'atc, term, lang'):
120 cmd = u'SELECT * FROM ref.v_atc ORDER BY %s' % order_by 121 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False) 122 return rows
123 #============================================================
124 -def atc_import(cfg_fname=None, conn=None):
125 126 # read meta data 127 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8') 128 129 data_fname = os.path.join ( 130 os.path.dirname(cfg_fname), 131 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')]) 132 ) # must be in same dir as conf file 133 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')]) 134 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')]) 135 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')]) 136 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')]) 137 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')]) 138 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')]) 139 140 _cfg.remove_source(source = 'atc') 141 142 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname) 143 144 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 145 146 # create data source record 147 queries = [ 148 { 149 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 150 'args': args 151 }, { 152 'cmd': u""" 153 insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 154 %(name_long)s, 155 %(name_short)s, 156 %(ver)s, 157 %(desc)s, 158 %(lang)s, 159 %(url)s 160 )""", 161 'args': args 162 }, { 163 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 164 'args': args 165 } 166 ] 167 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 168 data_src_pk = rows[0][0] 169 _log.debug('ATC data source record created, pk is #%s', data_src_pk) 170 171 # import data 172 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 173 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"') 174 175 # clean out staging area 176 curs = conn.cursor() 177 cmd = u"""delete from ref.atc_staging""" 178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 179 curs.close() 180 conn.commit() 181 _log.debug('ATC staging table emptied') 182 183 # from file into staging table 184 curs = conn.cursor() 185 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s, %s)""" 186 first = False 187 for atc_line in atc_reader: 188 # skip first 189 if not first: 190 first = True 191 continue 192 # skip blanks 193 if atc_line[0] + atc_line[1] + atc_line[2] + atc_line[3] + atc_line[4] == u'': 194 continue 195 196 comment = u'' 197 ddd_val = u'' 198 unit = u'' 199 adro = u'' 200 201 # "1,1 mg O,P,R,..." 202 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 203 ddd_val, unit, adro = regex.split('\s', atc_line[4]) 204 # "1,1 mg O,P,R bezogen auf ..." 205 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 206 ddd_val, unit, adro, comment = regex.split('\s', atc_line[4], 3) 207 # "20 mg O" 208 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', atc_line[4]): 209 ddd_val, unit, adro = regex.split('\s', atc_line[4]) 210 # "20 mg O bezogen auf ..." 211 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', atc_line[4]): 212 ddd_val, unit, adro, comment = regex.split('\s', atc_line[4], 3) 213 # "Standarddosis: 1 Tablette oder 30 ml Mixtur" 214 else: 215 comment = atc_line[4] 216 217 args = [ 218 atc_line[0].strip(), 219 atc_line[2], 220 ddd_val.replace(',', '.'), 221 unit, 222 adro, 223 comment 224 ] 225 226 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 227 228 curs.close() 229 conn.commit() 230 csv_file.close() 231 _log.debug('ATC staging table loaded') 232 233 # from staging table to real table 234 curs = conn.cursor() 235 args = {'src_pk': data_src_pk} 236 cmd = u""" 237 insert into ref.atc ( 238 fk_data_source, 239 code, 240 term, 241 comment, 242 ddd, 243 unit, 244 administration_route 245 ) select 246 %(src_pk)s, 247 atc, 248 name, 249 nullif(comment, ''), 250 nullif(ddd, '')::numeric, 251 nullif(unit, ''), 252 nullif(adro, '') 253 254 from 255 ref.atc_staging 256 """ 257 258 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 259 260 curs.close() 261 conn.commit() 262 _log.debug('transfer from ATC staging table to real ATC table done') 263 264 # clean out staging area 265 curs = conn.cursor() 266 cmd = u"""delete from ref.atc_staging""" 267 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 268 curs.close() 269 conn.commit() 270 _log.debug('ATC staging table emptied') 271 272 return True
273 #============================================================ 274 # main 275 #------------------------------------------------------------ 276 if __name__ == "__main__": 277 278 if len(sys.argv) == 1: 279 sys.exit() 280 281 if sys.argv[1] != 'test': 282 sys.exit() 283 284 from Gnumed.pycommon import gmLog2 285 from Gnumed.pycommon import gmI18N 286 287 gmI18N.activate_locale() 288 # gmDateTime.init() 289 290 #--------------------------------------------------------
291 - def test_atc_import():
292 atc_import(cfg_fname = sys.argv[2], conn = gmPG2.get_connection(readonly = False))
293 #--------------------------------------------------------
294 - def test_text2atc():
295 print 'searching ATC code for:', sys.argv[2] 296 print ' ', text2atc(sys.argv[2]) 297 print ' ', text2atc(sys.argv[2], True)
298 #--------------------------------------------------------
299 - def test_atc2ddd():
300 print "searching for DDD on ATC:", sys.argv[2] 301 print atc2ddd(atc = sys.argv[2])
302 #--------------------------------------------------------
303 - def test_get_reference_atcs():
304 print "reference_of_atc_codes:" 305 for atc in get_reference_atcs(): 306 print atc
307 #-------------------------------------------------------- 308 #test_atc_import() 309 #test_text2atc() 310 test_atc2ddd() 311 #test_get_reference_atcs() 312 313 #============================================================ 314