Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf-8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 10   
 11  import sys 
 12  import io 
 13  import logging 
 14  import csv 
 15  import re as regex 
 16   
 17   
 18  if __name__ == '__main__': 
 19          sys.path.insert(0, '../../') 
 20  from Gnumed.pycommon import gmPG2 
 21  from Gnumed.pycommon import gmTools 
 22  from Gnumed.pycommon import gmMatchProvider 
 23   
 24   
 25  _log = logging.getLogger('gm.loinc') 
 26   
 27   
 28  origin_url = 'http://loinc.org' 
 29  file_encoding = 'latin1'                        # encoding is empirical 
 30  license_delimiter = 'Clip Here for Data' 
 31  version_tag = 'LOINC(R) Database Version' 
 32  name_long = 'LOINC® (Logical Observation Identifiers Names and Codes)' 
 33  name_short = 'LOINC' 
 34   
 35  loinc_fields = "LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 36   
 37  #============================================================ 
 38   
 39  LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7'] 
 40  LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1'] 
 41  LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9'] 
 42  LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9'] 
 43  LOINC_rr_quantity = ['8478-0', '8448-3', '8449-1', '8456-6', '8457-4', '8458-2', '55284-4', '50403-5', '50402-7', '45372-0'] 
 44  LOINC_heart_rate_quantity = ['8867-4', '67129-7', '40443-4', '69000-8', '69001-6', '68999-2'] 
 45  LOINC_inr_quantity = ['34714-6', '46418-0', '6301-6', '38875-1'] 
 46   
 47  #============================================================ 
48 -def loinc2data(loinc):
49 cmd = 'SELECT * FROM ref.loinc WHERE code = %(loinc)s' 50 args = {'loinc': loinc} 51 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 52 if len(rows) == 0: 53 return [] 54 return rows[0]
55 56 #============================================================
57 -def loinc2term(loinc=None):
58 59 # NOTE: will return [NULL] on no-match due to the coalesce() 60 cmd = """ 61 SELECT coalesce ( 62 (SELECT term 63 FROM ref.v_coded_terms 64 WHERE 65 coding_system = 'LOINC' 66 AND 67 code = %(loinc)s 68 AND 69 lang = i18n.get_curr_lang() 70 ), 71 (SELECT term 72 FROM ref.v_coded_terms 73 WHERE 74 coding_system = 'LOINC' 75 AND 76 code = %(loinc)s 77 AND 78 lang = 'en_EN' 79 ), 80 (SELECT term 81 FROM ref.v_coded_terms 82 WHERE 83 coding_system = 'LOINC' 84 AND 85 code = %(loinc)s 86 ) 87 )""" 88 args = {'loinc': loinc} 89 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False) 90 91 if rows[0][0] is None: 92 return [] 93 94 return [ r[0] for r in rows ]
95 96 #============================================================
97 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
98 99 _log.debug('splitting LOINC source file [%s]', input_fname) 100 101 if license_fname is None: 102 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 103 _log.debug('LOINC header: %s', license_fname) 104 105 if data_fname is None: 106 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 107 _log.debug('LOINC data: %s', data_fname) 108 109 loinc_file = io.open(input_fname, mode = 'rt', encoding = file_encoding, errors = 'replace') 110 out_file = io.open(license_fname, mode = 'wt', encoding = 'utf8', errors = 'replace') 111 112 for line in loinc_file: 113 114 if license_delimiter in line: 115 out_file.write(line) 116 out_file.close() 117 out_file = io.open(data_fname, mode = 'wt', encoding = 'utf8', errors = 'replace') 118 continue 119 120 out_file.write(line) 121 122 out_file.close() 123 124 return data_fname, license_fname
125 126 #============================================================
127 -def map_field_names(data_fname='loinc_data.csv'):
128 129 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 130 first_line = csv_file.readline() 131 sniffer = csv.Sniffer() 132 if sniffer.has_header(first_line): 133 pass
134 135 #============================================================
136 -def get_version(license_fname='loinc_license.txt'):
137 138 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 139 140 version = None 141 for line in in_file: 142 if line.startswith(version_tag): 143 version = line[len(version_tag):].strip() 144 break 145 146 in_file.close() 147 return version
148 149 #============================================================
150 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
151 152 if version is None: 153 version = get_version(license_fname = license_fname) 154 155 if version is None: 156 raise ValueError('cannot detect LOINC version') 157 158 _log.debug('importing LOINC version [%s]', version) 159 160 # clean out staging area 161 curs = conn.cursor() 162 cmd = """DELETE FROM staging.loinc_staging""" 163 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 164 curs.close() 165 conn.commit() 166 _log.debug('staging table emptied') 167 168 # import data from csv file into staging table 169 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 170 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 171 curs = conn.cursor() 172 cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1)) 173 first = False 174 for loinc_line in loinc_reader: 175 if not first: 176 first = True 177 continue 178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 179 curs.close() 180 conn.commit() 181 csv_file.close() 182 _log.debug('staging table loaded') 183 184 # create data source record 185 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace') 186 desc = in_file.read() 187 in_file.close() 188 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 189 queries = [ 190 # insert if not existing 191 {'args': args, 'cmd': """ 192 INSERT INTO ref.data_source (name_long, name_short, version) SELECT 193 %(name_long)s, 194 %(name_short)s, 195 %(ver)s 196 WHERE NOT EXISTS ( 197 SELECT 1 FROM ref.data_source WHERE 198 name_long = %(name_long)s 199 AND 200 name_short = %(name_short)s 201 AND 202 version = %(ver)s 203 )""" 204 }, 205 # update non-unique fields 206 {'args': args, 'cmd': """ 207 UPDATE ref.data_source SET 208 description = %(desc)s, 209 source = %(url)s, 210 lang = %(lang)s 211 WHERE 212 name_long = %(name_long)s 213 AND 214 name_short = %(name_short)s 215 AND 216 version = %(ver)s 217 """ 218 }, 219 # retrieve PK of data source 220 {'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""} 221 ] 222 curs = conn.cursor() 223 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True) 224 data_src_pk = rows[0][0] 225 curs.close() 226 _log.debug('data source record created or updated, pk is #%s', data_src_pk) 227 228 # import from staging table to real table 229 args = {'src_pk': data_src_pk} 230 queries = [] 231 queries.append ({ 232 'args': args, 233 'cmd': """ 234 INSERT INTO ref.loinc ( 235 fk_data_source, term, code 236 ) 237 SELECT 238 %(src_pk)s, 239 coalesce ( 240 nullif(long_common_name, ''), 241 ( 242 coalesce(nullif(component, '') || ':', '') || 243 coalesce(nullif(property, '') || ':', '') || 244 coalesce(nullif(time_aspect, '') || ':', '') || 245 coalesce(nullif(system, '') || ':', '') || 246 coalesce(nullif(scale_type, '') || ':', '') || 247 coalesce(nullif(method_type, '') || ':', '') 248 ) 249 ), 250 nullif(loinc_num, '') 251 FROM 252 staging.loinc_staging st_ls 253 WHERE NOT EXISTS ( 254 SELECT 1 FROM ref.loinc r_l WHERE 255 r_l.fk_data_source = %(src_pk)s 256 AND 257 r_l.code = nullif(st_ls.loinc_num, '') 258 AND 259 r_l.term = coalesce ( 260 nullif(st_ls.long_common_name, ''), 261 ( 262 coalesce(nullif(st_ls.component, '') || ':', '') || 263 coalesce(nullif(st_ls.property, '') || ':', '') || 264 coalesce(nullif(st_ls.time_aspect, '') || ':', '') || 265 coalesce(nullif(st_ls.system, '') || ':', '') || 266 coalesce(nullif(st_ls.scale_type, '') || ':', '') || 267 coalesce(nullif(st_ls.method_type, '') || ':', '') 268 ) 269 ) 270 )""" 271 }) 272 queries.append ({ 273 'args': args, 274 'cmd': """ 275 UPDATE ref.loinc SET 276 comment = nullif(st_ls.comments, ''), 277 component = nullif(st_ls.component, ''), 278 property = nullif(st_ls.property, ''), 279 time_aspect = nullif(st_ls.time_aspect, ''), 280 system = nullif(st_ls.system, ''), 281 scale_type = nullif(st_ls.scale_type, ''), 282 method_type = nullif(st_ls.method_type, ''), 283 related_names_1_old = nullif(st_ls.related_names_1_old, ''), 284 grouping_class = nullif(st_ls.class, ''), 285 loinc_internal_source = nullif(st_ls.source, ''), 286 dt_last_change = nullif(st_ls.dt_last_change, ''), 287 change_type = nullif(st_ls.change_type, ''), 288 answer_list = nullif(st_ls.answer_list, ''), 289 code_status = nullif(st_ls.status, ''), 290 maps_to = nullif(st_ls.map_to, ''), 291 scope = nullif(st_ls.scope, ''), 292 normal_range = nullif(st_ls.normal_range, ''), 293 ipcc_units = nullif(st_ls.ipcc_units, ''), 294 reference = nullif(st_ls.reference, ''), 295 exact_component_synonym = nullif(st_ls.exact_component_synonym, ''), 296 molar_mass = nullif(st_ls.molar_mass, ''), 297 grouping_class_type = nullif(st_ls.class_type, '')::smallint, 298 formula = nullif(st_ls.formula, ''), 299 species = nullif(st_ls.species, ''), 300 example_answers = nullif(st_ls.example_answers, ''), 301 acs_synonyms = nullif(st_ls.acs_synonyms, ''), 302 base_name = nullif(st_ls.base_name, ''), 303 final = nullif(st_ls.final, ''), 304 naa_ccr_id = nullif(st_ls.naa_ccr_id, ''), 305 code_table = nullif(st_ls.code_table, ''), 306 is_set_root = nullif(st_ls.is_set_root, '')::boolean, 307 panel_elements = nullif(st_ls.panel_elements, ''), 308 survey_question_text = nullif(st_ls.survey_question_text, ''), 309 survey_question_source = nullif(st_ls.survey_question_source, ''), 310 units_required = nullif(st_ls.units_required, ''), 311 submitted_units = nullif(st_ls.submitted_units, ''), 312 related_names_2 = nullif(st_ls.related_names_2, ''), 313 short_name = nullif(st_ls.short_name, ''), 314 order_obs = nullif(st_ls.order_obs, ''), 315 cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''), 316 hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''), 317 external_copyright_notice = nullif(st_ls.external_copyright_notice, ''), 318 example_units = nullif(st_ls.example_units, ''), 319 inpc_percentage = nullif(st_ls.inpc_percentage, ''), 320 long_common_name = nullif(st_ls.long_common_name, '') 321 FROM 322 staging.loinc_staging st_ls 323 WHERE 324 fk_data_source = %(src_pk)s 325 AND 326 code = nullif(st_ls.loinc_num, '') 327 AND 328 term = coalesce ( 329 nullif(st_ls.long_common_name, ''), 330 ( 331 coalesce(nullif(st_ls.component, '') || ':', '') || 332 coalesce(nullif(st_ls.property, '') || ':', '') || 333 coalesce(nullif(st_ls.time_aspect, '') || ':', '') || 334 coalesce(nullif(st_ls.system, '') || ':', '') || 335 coalesce(nullif(st_ls.scale_type, '') || ':', '') || 336 coalesce(nullif(st_ls.method_type, '') || ':', '') 337 ) 338 ) 339 """ 340 }) 341 curs = conn.cursor() 342 gmPG2.run_rw_queries(link_obj = curs, queries = queries) 343 curs.close() 344 conn.commit() 345 _log.debug('transfer from staging table to real table done') 346 347 # clean out staging area 348 curs = conn.cursor() 349 cmd = """DELETE FROM staging.loinc_staging""" 350 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 351 curs.close() 352 conn.commit() 353 _log.debug('staging table emptied') 354 355 return True
356 357 #============================================================ 358 _SQL_LOINC_from_test_type = """ 359 -- from test type 360 SELECT 361 loinc AS data, 362 loinc AS field_label, 363 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label 364 FROM clin.test_type 365 WHERE loinc %(fragment_condition)s 366 """ 367 368 _SQL_LOINC_from_i18n_coded_term = """ 369 -- from coded term, in user language 370 SELECT 371 code AS data, 372 code AS field_label, 373 (code || ': ' || term) AS list_label 374 FROM ref.v_coded_terms 375 WHERE 376 coding_system = 'LOINC' 377 AND 378 lang = i18n.get_curr_lang() 379 AND 380 (code %(fragment_condition)s 381 OR 382 term %(fragment_condition)s) 383 """ 384 385 _SQL_LOINC_from_en_EN_coded_term = """ 386 -- from coded term, in English 387 SELECT 388 code AS data, 389 code AS field_label, 390 (code || ': ' || term) AS list_label 391 FROM ref.v_coded_terms 392 WHERE 393 coding_system = 'LOINC' 394 AND 395 lang = 'en_EN' 396 AND 397 (code %(fragment_condition)s 398 OR 399 term %(fragment_condition)s) 400 """ 401 402 _SQL_LOINC_from_any_coded_term = """ 403 -- from coded term, in any language 404 SELECT 405 code AS data, 406 code AS field_label, 407 (code || ': ' || term) AS list_label 408 FROM ref.v_coded_terms 409 WHERE 410 coding_system = 'LOINC' 411 AND 412 (code %(fragment_condition)s 413 OR 414 term %(fragment_condition)s) 415 """ 416 417 #------------------------------------------------------------
418 -class cLOINCMatchProvider(gmMatchProvider.cMatchProvider_SQL2):
419 420 _pattern = regex.compile(r'^\D+\s+\D+$', regex.UNICODE) 421 422 _normal_query = """ 423 SELECT DISTINCT ON (list_label) 424 data, 425 field_label, 426 list_label 427 FROM ( 428 (%s) UNION ALL ( 429 %s) 430 ) AS all_known_loinc""" % ( 431 _SQL_LOINC_from_test_type, 432 _SQL_LOINC_from_any_coded_term 433 ) 434 #-- %s) UNION ALL ( 435 #-- %s) UNION ALL ( 436 # % 437 # _SQL_LOINC_from_i18n_coded_term, 438 # _SQL_LOINC_from_en_EN_coded_term, 439 440 #--------------------------------------------------------
441 - def getMatchesByPhrase(self, aFragment):
442 """Return matches for aFragment at start of phrases.""" 443 444 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 445 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByPhrase(self, aFragment)
446 447 #--------------------------------------------------------
448 - def getMatchesByWord(self, aFragment):
449 """Return matches for aFragment at start of words inside phrases.""" 450 451 if cLOINCMatchProvider._pattern.match(aFragment): 452 fragmentA, fragmentB = aFragment.split(' ', 1) 453 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': '~* %%(fragmentA)s'} 454 self._args['fragmentA'] = "( %s)|(^%s)" % (fragmentA, fragmentA) 455 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': '~* %%(fragmentB)s'} 456 self._args['fragmentB'] = "( %s)|(^%s)" % (fragmentB, fragmentB) 457 self._queries = ["SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 458 return self._find_matches('dummy') 459 460 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 461 return gmMatchProvider.cMatchProvider_SQL2.getMatchesByWord(self, aFragment)
462 463 #--------------------------------------------------------
464 - def getMatchesBySubstr(self, aFragment):
465 """Return matches for aFragment as a true substring.""" 466 467 if cLOINCMatchProvider._pattern.match(aFragment): 468 fragmentA, fragmentB = aFragment.split(' ', 1) 469 query1 = cLOINCMatchProvider._normal_query % {'fragment_condition': "ILIKE %%(fragmentA)s"} 470 self._args['fragmentA'] = '%%%s%%' % fragmentA 471 query2 = cLOINCMatchProvider._normal_query % {'fragment_condition': "ILIKE %%(fragmentB)s"} 472 self._args['fragmentB'] = '%%%s%%' % fragmentB 473 self._queries = ["SELECT * FROM (\n(%s\n) INTERSECT (%s)\n) AS intersected_matches\nORDER BY list_label\nLIMIT 75" % (query1, query2)] 474 return self._find_matches('dummy') 475 476 self._queries = [cLOINCMatchProvider._normal_query + '\nORDER BY list_label\nLIMIT 75'] 477 return gmMatchProvider.cMatchProvider_SQL2.getMatchesBySubstr(self, aFragment)
478 479 #============================================================ 480 # main 481 #------------------------------------------------------------ 482 if __name__ == "__main__": 483 484 if len(sys.argv) < 2: 485 sys.exit() 486 487 if sys.argv[1] != 'test': 488 sys.exit() 489 490 from Gnumed.pycommon import gmLog2 491 from Gnumed.pycommon import gmI18N 492 493 gmI18N.activate_locale() 494 # gmDateTime.init() 495 496 #--------------------------------------------------------
497 - def test_loinc_split():
498 print(split_LOINCDBTXT(input_fname = sys.argv[2]))
499 #--------------------------------------------------------
500 - def test_loinc_import():
501 loinc_import(version = '2.26')
502 #--------------------------------------------------------
503 - def test_loinc2term():
504 term = loinc2term(sys.argv[2]) 505 print(sys.argv[2], '->', term)
506 #-------------------------------------------------------- 507 test_loinc_split() 508 #test_loinc_import() 509 #test_loinc2term() 510 511 #============================================================ 512