1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
10
11 import sys
12 import io
13 import logging
14 import csv
15 import re as regex
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmPG2
21 from Gnumed.pycommon import gmTools
22 from Gnumed.pycommon import gmMatchProvider
23
24
25 _log = logging.getLogger('gm.loinc')
26
27
28 origin_url = 'http://loinc.org'
29 file_encoding = 'latin1'
30 license_delimiter = 'Clip Here for Data'
31 version_tag = 'LOINC(R) Database Version'
32 name_long = 'LOINC® (Logical Observation Identifiers Names and Codes)'
33 name_short = 'LOINC'
34
35 loinc_fields = "LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
36
37
38
39 LOINC_creatinine_quantity = ['2160-0', '14682-9', '40264-4', '40248-7']
40 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
41 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
42 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
43 LOINC_rr_quantity = ['8478-0', '8448-3', '8449-1', '8456-6', '8457-4', '8458-2', '55284-4', '50403-5', '50402-7', '45372-0']
44 LOINC_heart_rate_quantity = ['8867-4', '67129-7', '40443-4', '69000-8', '69001-6', '68999-2']
45 LOINC_inr_quantity = ['34714-6', '46418-0', '6301-6', '38875-1']
46
47
49 cmd = 'SELECT * FROM ref.loinc WHERE code = %(loinc)s'
50 args = {'loinc': loinc}
51 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
52 if len(rows) == 0:
53 return []
54 return rows[0]
55
56
58
59
60 cmd = """
61 SELECT coalesce (
62 (SELECT term
63 FROM ref.v_coded_terms
64 WHERE
65 coding_system = 'LOINC'
66 AND
67 code = %(loinc)s
68 AND
69 lang = i18n.get_curr_lang()
70 ),
71 (SELECT term
72 FROM ref.v_coded_terms
73 WHERE
74 coding_system = 'LOINC'
75 AND
76 code = %(loinc)s
77 AND
78 lang = 'en_EN'
79 ),
80 (SELECT term
81 FROM ref.v_coded_terms
82 WHERE
83 coding_system = 'LOINC'
84 AND
85 code = %(loinc)s
86 )
87 )"""
88 args = {'loinc': loinc}
89 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
90
91 if rows[0][0] is None:
92 return []
93
94 return [ r[0] for r in rows ]
95
96
98
99 _log.debug('splitting LOINC source file [%s]', input_fname)
100
101 if license_fname is None:
102 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
103 _log.debug('LOINC header: %s', license_fname)
104
105 if data_fname is None:
106 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
107 _log.debug('LOINC data: %s', data_fname)
108
109 loinc_file = io.open(input_fname, mode = 'rt', encoding = file_encoding, errors = 'replace')
110 out_file = io.open(license_fname, mode = 'wt', encoding = 'utf8', errors = 'replace')
111
112 for line in loinc_file:
113
114 if license_delimiter in line:
115 out_file.write(line)
116 out_file.close()
117 out_file = io.open(data_fname, mode = 'wt', encoding = 'utf8', errors = 'replace')
118 continue
119
120 out_file.write(line)
121
122 out_file.close()
123
124 return data_fname, license_fname
125
126
128
129 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
130 first_line = csv_file.readline()
131 sniffer = csv.Sniffer()
132 if sniffer.has_header(first_line):
133 pass
134
135
137
138 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
139
140 version = None
141 for line in in_file:
142 if line.startswith(version_tag):
143 version = line[len(version_tag):].strip()
144 break
145
146 in_file.close()
147 return version
148
149
150 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
151
152 if version is None:
153 version = get_version(license_fname = license_fname)
154
155 if version is None:
156 raise ValueError('cannot detect LOINC version')
157
158 _log.debug('importing LOINC version [%s]', version)
159
160
161 curs = conn.cursor()
162 cmd = """DELETE FROM staging.loinc_staging"""
163 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
164 curs.close()
165 conn.commit()
166 _log.debug('staging table emptied')
167
168
169 csv_file = io.open(data_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
170 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
171 curs = conn.cursor()
172 cmd = """INSERT INTO staging.loinc_staging values (%s%%s)""" % ('%s, ' * (len(loinc_fields) - 1))
173 first = False
174 for loinc_line in loinc_reader:
175 if not first:
176 first = True
177 continue
178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
179 curs.close()
180 conn.commit()
181 csv_file.close()
182 _log.debug('staging table loaded')
183
184
185 in_file = io.open(license_fname, mode = 'rt', encoding = 'utf8', errors = 'replace')
186 desc = in_file.read()
187 in_file.close()
188 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
189 queries = [
190
191 {'args': args, 'cmd': """
192 INSERT INTO ref.data_source (name_long, name_short, version) SELECT
193 %(name_long)s,
194 %(name_short)s,
195 %(ver)s
196 WHERE NOT EXISTS (
197 SELECT 1 FROM ref.data_source WHERE
198 name_long = %(name_long)s
199 AND
200 name_short = %(name_short)s
201 AND
202 version = %(ver)s
203 )"""
204 },
205
206 {'args': args, 'cmd': """
207 UPDATE ref.data_source SET
208 description = %(desc)s,
209 source = %(url)s,
210 lang = %(lang)s
211 WHERE
212 name_long = %(name_long)s
213 AND
214 name_short = %(name_short)s
215 AND
216 version = %(ver)s
217 """
218 },
219
220 {'args': args, 'cmd': """SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
221 ]
222 curs = conn.cursor()
223 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
224 data_src_pk = rows[0][0]
225 curs.close()
226 _log.debug('data source record created or updated, pk is #%s', data_src_pk)
227
228
229 args = {'src_pk': data_src_pk}
230 queries = []
231 queries.append ({
232 'args': args,
233 'cmd': """
234 INSERT INTO ref.loinc (
235 fk_data_source, term, code
236 )
237 SELECT
238 %(src_pk)s,
239 coalesce (
240 nullif(long_common_name, ''),
241 (
242 coalesce(nullif(component, '') || ':', '') ||
243 coalesce(nullif(property, '') || ':', '') ||
244 coalesce(nullif(time_aspect, '') || ':', '') ||
245 coalesce(nullif(system, '') || ':', '') ||
246 coalesce(nullif(scale_type, '') || ':', '') ||
247 coalesce(nullif(method_type, '') || ':', '')
248 )
249 ),
250 nullif(loinc_num, '')
251 FROM
252 staging.loinc_staging st_ls
253 WHERE NOT EXISTS (
254 SELECT 1 FROM ref.loinc r_l WHERE
255 r_l.fk_data_source = %(src_pk)s
256 AND
257 r_l.code = nullif(st_ls.loinc_num, '')
258 AND
259 r_l.term = coalesce (
260 nullif(st_ls.long_common_name, ''),
261 (
262 coalesce(nullif(st_ls.component, '') || ':', '') ||
263 coalesce(nullif(st_ls.property, '') || ':', '') ||
264 coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
265 coalesce(nullif(st_ls.system, '') || ':', '') ||
266 coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
267 coalesce(nullif(st_ls.method_type, '') || ':', '')
268 )
269 )
270 )"""
271 })
272 queries.append ({
273 'args': args,
274 'cmd': """
275 UPDATE ref.loinc SET
276 comment = nullif(st_ls.comments, ''),
277 component = nullif(st_ls.component, ''),
278 property = nullif(st_ls.property, ''),
279 time_aspect = nullif(st_ls.time_aspect, ''),
280 system = nullif(st_ls.system, ''),
281 scale_type = nullif(st_ls.scale_type, ''),
282 method_type = nullif(st_ls.method_type, ''),
283 related_names_1_old = nullif(st_ls.related_names_1_old, ''),
284 grouping_class = nullif(st_ls.class, ''),
285 loinc_internal_source = nullif(st_ls.source, ''),
286 dt_last_change = nullif(st_ls.dt_last_change, ''),
287 change_type = nullif(st_ls.change_type, ''),
288 answer_list = nullif(st_ls.answer_list, ''),
289 code_status = nullif(st_ls.status, ''),
290 maps_to = nullif(st_ls.map_to, ''),
291 scope = nullif(st_ls.scope, ''),
292 normal_range = nullif(st_ls.normal_range, ''),
293 ipcc_units = nullif(st_ls.ipcc_units, ''),
294 reference = nullif(st_ls.reference, ''),
295 exact_component_synonym = nullif(st_ls.exact_component_synonym, ''),
296 molar_mass = nullif(st_ls.molar_mass, ''),
297 grouping_class_type = nullif(st_ls.class_type, '')::smallint,
298 formula = nullif(st_ls.formula, ''),
299 species = nullif(st_ls.species, ''),
300 example_answers = nullif(st_ls.example_answers, ''),
301 acs_synonyms = nullif(st_ls.acs_synonyms, ''),
302 base_name = nullif(st_ls.base_name, ''),
303 final = nullif(st_ls.final, ''),
304 naa_ccr_id = nullif(st_ls.naa_ccr_id, ''),
305 code_table = nullif(st_ls.code_table, ''),
306 is_set_root = nullif(st_ls.is_set_root, '')::boolean,
307 panel_elements = nullif(st_ls.panel_elements, ''),
308 survey_question_text = nullif(st_ls.survey_question_text, ''),
309 survey_question_source = nullif(st_ls.survey_question_source, ''),
310 units_required = nullif(st_ls.units_required, ''),
311 submitted_units = nullif(st_ls.submitted_units, ''),
312 related_names_2 = nullif(st_ls.related_names_2, ''),
313 short_name = nullif(st_ls.short_name, ''),
314 order_obs = nullif(st_ls.order_obs, ''),
315 cdisc_common_tests = nullif(st_ls.cdisc_common_tests, ''),
316 hl7_field_subfield_id = nullif(st_ls.hl7_field_subfield_id, ''),
317 external_copyright_notice = nullif(st_ls.external_copyright_notice, ''),
318 example_units = nullif(st_ls.example_units, ''),
319 inpc_percentage = nullif(st_ls.inpc_percentage, ''),
320 long_common_name = nullif(st_ls.long_common_name, '')
321 FROM
322 staging.loinc_staging st_ls
323 WHERE
324 fk_data_source = %(src_pk)s
325 AND
326 code = nullif(st_ls.loinc_num, '')
327 AND
328 term = coalesce (
329 nullif(st_ls.long_common_name, ''),
330 (
331 coalesce(nullif(st_ls.component, '') || ':', '') ||
332 coalesce(nullif(st_ls.property, '') || ':', '') ||
333 coalesce(nullif(st_ls.time_aspect, '') || ':', '') ||
334 coalesce(nullif(st_ls.system, '') || ':', '') ||
335 coalesce(nullif(st_ls.scale_type, '') || ':', '') ||
336 coalesce(nullif(st_ls.method_type, '') || ':', '')
337 )
338 )
339 """
340 })
341 curs = conn.cursor()
342 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
343 curs.close()
344 conn.commit()
345 _log.debug('transfer from staging table to real table done')
346
347
348 curs = conn.cursor()
349 cmd = """DELETE FROM staging.loinc_staging"""
350 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
351 curs.close()
352 conn.commit()
353 _log.debug('staging table emptied')
354
355 return True
356
357
358 _SQL_LOINC_from_test_type = """
359 -- from test type
360 SELECT
361 loinc AS data,
362 loinc AS field_label,
363 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label
364 FROM clin.test_type
365 WHERE loinc %(fragment_condition)s
366 """
367
368 _SQL_LOINC_from_i18n_coded_term = """
369 -- from coded term, in user language
370 SELECT
371 code AS data,
372 code AS field_label,
373 (code || ': ' || term) AS list_label
374 FROM ref.v_coded_terms
375 WHERE
376 coding_system = 'LOINC'
377 AND
378 lang = i18n.get_curr_lang()
379 AND
380 (code %(fragment_condition)s
381 OR
382 term %(fragment_condition)s)
383 """
384
385 _SQL_LOINC_from_en_EN_coded_term = """
386 -- from coded term, in English
387 SELECT
388 code AS data,
389 code AS field_label,
390 (code || ': ' || term) AS list_label
391 FROM ref.v_coded_terms
392 WHERE
393 coding_system = 'LOINC'
394 AND
395 lang = 'en_EN'
396 AND
397 (code %(fragment_condition)s
398 OR
399 term %(fragment_condition)s)
400 """
401
402 _SQL_LOINC_from_any_coded_term = """
403 -- from coded term, in any language
404 SELECT
405 code AS data,
406 code AS field_label,
407 (code || ': ' || term) AS list_label
408 FROM ref.v_coded_terms
409 WHERE
410 coding_system = 'LOINC'
411 AND
412 (code %(fragment_condition)s
413 OR
414 term %(fragment_condition)s)
415 """
416
417
478
479
480
481
482 if __name__ == "__main__":
483
484 if len(sys.argv) < 2:
485 sys.exit()
486
487 if sys.argv[1] != 'test':
488 sys.exit()
489
490 from Gnumed.pycommon import gmLog2
491 from Gnumed.pycommon import gmI18N
492
493 gmI18N.activate_locale()
494
495
496
499
502
504 term = loinc2term(sys.argv[2])
505 print(sys.argv[2], '->', term)
506
507 test_loinc_split()
508
509
510
511
512