Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 10   
 11  import sys 
 12  import codecs 
 13  import logging 
 14  import csv 
 15  import re as regex 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2 
 21  from Gnumed.pycommon import gmTools 
 22  from Gnumed.pycommon import gmMatchProvider 
 23   
 24   
 25  _log = logging.getLogger('gm.loinc') 
 26   
 27   
 28  origin_url = u'http://loinc.org' 
 29  file_encoding = 'latin1'                        # encoding is empirical 
 30  license_delimiter = u'Clip Here for Data' 
 31  version_tag = u'LOINC(R) Database Version' 
 32  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 33  name_short = u'LOINC' 
 34   
 35  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 36   
 37  #============================================================ 
 38   
 39  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 40  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 41  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 42   
 43  #============================================================ 
44 -def loinc2term(loinc=None):
45 46 # NOTE: will return [NULL] on no-match due to the coalesce() 47 cmd = u""" 48 SELECT coalesce ( 49 (SELECT term 50 FROM ref.v_coded_terms 51 WHERE 52 coding_system = 'LOINC' 53 AND 54 code = %(loinc)s 55 AND 56 lang = i18n.get_curr_lang() 57 ), 58 (SELECT term 59 FROM ref.v_coded_terms 60 WHERE 61 coding_system = 'LOINC' 62 AND 63 code = %(loinc)s 64 AND 65 lang = 'en_EN' 66 ), 67 (SELECT term 68 FROM ref.v_coded_terms 69 WHERE 70 coding_system = 'LOINC' 71 AND 72 code = %(loinc)s 73 ) 74 )""" 75 args = {'loinc': loinc} 76 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 77 78 if rows[0][0] is None: 79 return [] 80 81 return [ r[0] for r in rows ]
82 #============================================================
83 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
84 85 _log.debug('splitting LOINC source file [%s]', input_fname) 86 87 if license_fname is None: 88 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 89 _log.debug('LOINC header: %s', license_fname) 90 91 if data_fname is None: 92 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 93 _log.debug('LOINC data: %s', data_fname) 94 95 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 96 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 97 98 for line in loinc_file: 99 100 if license_delimiter in line: 101 out_file.write(line) 102 out_file.close() 103 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 104 continue 105 106 out_file.write(line) 107 108 out_file.close() 109 110 return data_fname, license_fname
111 #============================================================
112 -def map_field_names(data_fname='loinc_data.csv'):
113 114 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 115 first_line = csv_file.readline() 116 sniffer = csv.Sniffer() 117 if sniffer.has_header(first_line): 118 pass
119 #============================================================
120 -def get_version(license_fname='loinc_license.txt'):
121 122 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 123 124 version = None 125 for line in in_file: 126 if line.startswith(version_tag): 127 version = line[len(version_tag):].strip() 128 break 129 130 in_file.close() 131 return version
132 #============================================================
133 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
134 135 if version is None: 136 version = get_version(license_fname = license_fname) 137 138 if version is None: 139 raise ValueError('cannot detect LOINC version') 140 141 _log.debug('importing LOINC version [%s]', version) 142 143 # clean out staging area 144 curs = conn.cursor() 145 cmd = u"""DELETE FROM ref.loinc_staging""" 146 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 147 curs.close() 148 conn.commit() 149 _log.debug('staging table emptied') 150 151 # import data from csv file into staging table 152 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 153 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 154 curs = conn.cursor() 155 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 156 first = False 157 for loinc_line in loinc_reader: 158 if not first: 159 first = True 160 continue 161 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 162 curs.close() 163 conn.commit() 164 csv_file.close() 165 _log.debug('staging table loaded') 166 167 # create data source record 168 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 169 desc = in_file.read() 170 in_file.close() 171 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 172 queries = [ 173 # insert if not existing 174 {'args': args, 'cmd': u""" 175 INSERT INTO ref.data_source (name_long, name_short, version) SELECT 176 %(name_long)s, 177 %(name_short)s, 178 %(ver)s 179 WHERE NOT EXISTS ( 180 SELECT 1 FROM ref.data_source WHERE 181 name_long = %(name_long)s 182 AND 183 name_short = %(name_short)s 184 AND 185 version = %(ver)s 186 )""" 187 }, 188 # update non-unique fields 189 {'args': args, 'cmd': u""" 190 UPDATE ref.data_source SET 191 description = %(desc)s, 192 source = %(url)s, 193 lang = %(lang)s 194 WHERE 195 name_long = %(name_long)s 196 AND 197 name_short = %(name_short)s 198 AND 199 version = %(ver)s 200 """ 201 }, 202 # retrieve PK of data source 203 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 204 ] 205 curs = conn.cursor() 206 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 207 data_src_pk = rows[0][0] 208 curs.close() 209 _log.debug('data source record created or updated, pk is #%s', data_src_pk) 210 211 # import from staging table to real table 212 args = {'src_pk': data_src_pk} 213 queries = [] 214 queries.append ({ 215 'args': args, 216 'cmd': u""" 217 INSERT INTO ref.loinc ( 218 fk_data_source, term, code 219 ) 220 SELECT 221 %(src_pk)s, 222 coalesce ( 223 nullif(long_common_name, ''), 224 ( 225 coalesce(nullif(component, '') || ':', '') || 226 coalesce(nullif(property, '') || ':', '') || 227 coalesce(nullif(time_aspect, '') || ':', '') || 228 coalesce(nullif(system, '') || ':', '') || 229 coalesce(nullif(scale_type, '') || ':', '') || 230 coalesce(nullif(method_type, '') || ':', '') 231 ) 232 ), 233 nullif(loinc_num, '') 234 FROM 235 ref.loinc_staging r_ls 236 WHERE NOT EXISTS ( 237 SELECT 1 FROM ref.loinc r_l WHERE 238 r_l.fk_data_source = %(src_pk)s 239 AND 240 r_l.code = nullif(r_ls.loinc_num, '') 241 AND 242 r_l.term = coalesce ( 243 nullif(r_ls.long_common_name, ''), 244 ( 245 coalesce(nullif(r_ls.component, '') || ':', '') || 246 coalesce(nullif(r_ls.property, '') || ':', '') || 247 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 248 coalesce(nullif(r_ls.system, '') || ':', '') || 249 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 250 coalesce(nullif(r_ls.method_type, '') || ':', '') 251 ) 252 ) 253 )""" 254 }) 255 queries.append ({ 256 'args': args, 257 'cmd': u""" 258 UPDATE ref.loinc SET 259 comment = nullif(r_ls.comments, ''), 260 component = nullif(r_ls.component, ''), 261 property = nullif(r_ls.property, ''), 262 time_aspect = nullif(r_ls.time_aspect, ''), 263 system = nullif(r_ls.system, ''), 264 scale_type = nullif(r_ls.scale_type, ''), 265 method_type = nullif(r_ls.method_type, ''), 266 related_names_1_old = nullif(r_ls.related_names_1_old, ''), 267 grouping_class = nullif(r_ls.class, ''), 268 loinc_internal_source = nullif(r_ls.source, ''), 269 dt_last_change = nullif(r_ls.dt_last_change, ''), 270 change_type = nullif(r_ls.change_type, ''), 271 answer_list = nullif(r_ls.answer_list, ''), 272 code_status = nullif(r_ls.status, ''), 273 maps_to = nullif(r_ls.map_to, ''), 274 scope = nullif(r_ls.scope, ''), 275 normal_range = nullif(r_ls.normal_range, ''), 276 ipcc_units = nullif(r_ls.ipcc_units, ''), 277 reference = nullif(r_ls.reference, ''), 278 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''), 279 molar_mass = nullif(r_ls.molar_mass, ''), 280 grouping_class_type = nullif(r_ls.class_type, '')::smallint, 281 formula = nullif(r_ls.formula, ''), 282 species = nullif(r_ls.species, ''), 283 example_answers = nullif(r_ls.example_answers, ''), 284 acs_synonyms = nullif(r_ls.acs_synonyms, ''), 285 base_name = nullif(r_ls.base_name, ''), 286 final = nullif(r_ls.final, ''), 287 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''), 288 code_table = nullif(r_ls.code_table, ''), 289 is_set_root = nullif(r_ls.is_set_root, '')::boolean, 290 panel_elements = nullif(r_ls.panel_elements, ''), 291 survey_question_text = nullif(r_ls.survey_question_text, ''), 292 survey_question_source = nullif(r_ls.survey_question_source, ''), 293 units_required = nullif(r_ls.units_required, ''), 294 submitted_units = nullif(r_ls.submitted_units, ''), 295 related_names_2 = nullif(r_ls.related_names_2, ''), 296 short_name = nullif(r_ls.short_name, ''), 297 order_obs = nullif(r_ls.order_obs, ''), 298 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''), 299 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''), 300 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''), 301 example_units = nullif(r_ls.example_units, ''), 302 inpc_percentage = nullif(r_ls.inpc_percentage, ''), 303 long_common_name = nullif(r_ls.long_common_name, '') 304 FROM 305 ref.loinc_staging r_ls 306 WHERE 307 fk_data_source = %(src_pk)s 308 AND 309 code = nullif(r_ls.loinc_num, '') 310 AND 311 term = coalesce ( 312 nullif(r_ls.long_common_name, ''), 313 ( 314 coalesce(nullif(r_ls.component, '') || ':', '') || 315 coalesce(nullif(r_ls.property, '') || ':', '') || 316 coalesce(nullif(r_ls.time_aspect, '') || ':', '') || 317 coalesce(nullif(r_ls.system, '') || ':', '') || 318 coalesce(nullif(r_ls.scale_type, '') || ':', '') || 319 coalesce(nullif(r_ls.method_type, '') || ':', '') 320 ) 321 ) 322 """ 323 }) 324 curs = conn.cursor() 325 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 326 curs.close() 327 conn.commit() 328 _log.debug('transfer from staging table to real table done') 329 330 # clean out staging area 331 curs = conn.cursor() 332 cmd = u"""DELETE FROM ref.loinc_staging""" 333 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 334 curs.close() 335 conn.commit() 336 _log.debug('staging table emptied') 337 338 return True
339 340 #============================================================ 341 _SQL_LOINC_from_test_type = u""" 342 -- from test type 343 SELECT 344 loinc AS data, 345 loinc AS field_label, 346 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label 347 FROM clin.test_type 348 WHERE loinc %(fragment_condition)s 349 """ 350 351 _SQL_LOINC_from_i18n_coded_term = u""" 352 -- from coded term, in user language 353 SELECT 354 code AS data, 355 code AS field_label, 356 (code || ': ' || term) AS list_label 357 FROM ref.v_coded_terms 358 WHERE 359 coding_system = 'LOINC' 360 AND 361 lang = i18n.get_curr_lang() 362 AND 363 (code %(fragment_condition)s 364 OR 365 term %(fragment_condition)s) 366 """ 367 368 _SQL_LOINC_from_en_EN_coded_term = u""" 369 -- from coded term, in English 370 SELECT 371 code AS data, 372 code AS field_label, 373 (code || ': ' || term) AS list_label 374 FROM ref.v_coded_terms 375 WHERE 376 coding_system = 'LOINC' 377 AND 378 lang = 'en_EN' 379 AND 380 (code %(fragment_condition)s 381 OR 382 term %(fragment_condition)s) 383 """ 384 385 _SQL_LOINC_from_any_coded_term = u""" 386 -- from coded term, in any language 387 SELECT 388 code AS data, 389 code AS field_label, 390 (code || ': ' || term) AS list_label 391 FROM ref.v_coded_terms 392 WHERE 393 coding_system = 'LOINC' 394 AND 395 (code %(fragment_condition)s 396 OR 397 term %(fragment_condition)s) 398 """ 399
400 -class cLOINCMatchProvider(gmMatchProvider.cMatchProvider_SQL2):
401 402 _pattern = regex.compile(r'^\D+\s+\D+$', regex.UNICODE | regex.LOCALE) 403 404 _normal_query = u""" 405 SELECT DISTINCT ON (list_label) 406 data, 407 field_label, 408 list_label 409 FROM ( 410 (%s) UNION ALL ( 411 %s) 412 ) AS all_known_loinc""" % ( 413 _SQL_LOINC_from_test_type, 414 _SQL_LOINC_from_any_coded_term 415 ) 416 #-- %s) UNION ALL ( 417 #-- %s) UNION ALL ( 418 # % 419 # _SQL_LOINC_from_i18n_coded_term, 420 # _SQL_LOINC_from_en_EN_coded_term, 421 #--------------------------------------------------------
422 - def getMatchesByPhrase(self, aFragment):
423 """Return matches for aFragment at start of phrases.""" 424 425 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 426 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByPhrase(self, aFragment)
427 #--------------------------------------------------------
428 - def getMatchesByWord(self, aFragment):
429 """Return matches for aFragment at start of words inside phrases.""" 430 431 if cLOINCMatchProvider._pattern.match(aFragment): 432 fragmentA, fragmentB = aFragment.split(u' ', 1) 433 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentA)s'} 434 self._args['fragmentA'] = u"( %s)|(^%s)" % (fragmentA, fragmentA) 435 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u'~* %%(fragmentB)s'} 436 self._args['fragmentB'] = u"( %s)|(^%s)" % (fragmentB, fragmentB) 437 self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 438 return self._find_matches(u'dummy') 439 440 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 441 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByWord(self, aFragment)
442 #--------------------------------------------------------
443 - def getMatchesBySubstr(self, aFragment):
444 """Return matches for aFragment as a true substring.""" 445 446 if cLOINCMatchProvider._pattern.match(aFragment): 447 fragmentA, fragmentB = aFragment.split(u' ', 1) 448 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentA)s"} 449 self._args['fragmentA'] = u'%%%s%%' % fragmentA 450 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': u"ILIKE %%(fragmentB)s"} 451 self._args['fragmentB'] = u'%%%s%%' % fragmentB 452 self._queries = [u"SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 453 return self._find_matches(u'dummy') 454 455 self._queries = [cLOINCMatchProvider._normal_query + u'\nORDER BY list_label\nLIMIT 75'] 456 return gmMatchProvider.cMatchProvider_SQL2.getMatchesBySubstr(self, aFragment)
457 #============================================================ 458 # main 459 #------------------------------------------------------------ 460 if __name__ == "__main__": 461 462 if len(sys.argv) < 2: 463 sys.exit() 464 465 if sys.argv[1] != 'test': 466 sys.exit() 467 468 from Gnumed.pycommon import gmLog2 469 from Gnumed.pycommon import gmI18N 470 471 gmI18N.activate_locale() 472 # gmDateTime.init() 473 474 #--------------------------------------------------------
475 - def test_loinc_split():
476 print split_LOINCDBTXT(input_fname = sys.argv[2])
477 #--------------------------------------------------------
478 - def test_loinc_import():
479 loinc_import(version = '2.26')
480 #--------------------------------------------------------
481 - def test_loinc2term():
482 term = loinc2term(sys.argv[2]) 483 print sys.argv[2], '->', term
484 #-------------------------------------------------------- 485 test_loinc_split() 486 #test_loinc_import() 487 #test_loinc2term() 488 489 #============================================================ 490