1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
10
11 import sys
12 import codecs
13 import logging
14 import csv
15 import re as regex
16
17
18 if __name__ == '__main__':
19 sys.path.insert(0, '../../')
20 from Gnumed.pycommon import gmPG2
21 from Gnumed.pycommon import gmTools
22 from Gnumed.pycommon import gmMatchProvider
23
24
25 _log = logging.getLogger('gm.loinc')
26
27
28 origin_url = u'http://loinc.org'
29 file_encoding = 'latin1'
30 license_delimiter = u'Clip Here for Data'
31 version_tag = u'LOINC(R) Database Version'
32 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
33 name_short = u'LOINC'
34
35 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
36
37
38
39 LOINC_gfr_quantity = ['33914-3', '45066-8', '48642-3', '48643-1', '50044-7', '50210-4', '50384-7', '62238-1', '69405-9', '70969-1']
40 LOINC_height = ['3137-7', '3138-5', '8301-4', '8302-2', '8305-5', '8306-3', '8307-1', '8308-9']
41 LOINC_weight = ['18833-4', '29463-7', '3141-9', '3142-7', '8335-2', '8339-4', '8344-4', '8346-9', '8351-9']
42
43
45
46
47 cmd = u"""
48 SELECT coalesce (
49 (SELECT term
50 FROM ref.v_coded_terms
51 WHERE
52 coding_system = 'LOINC'
53 AND
54 code = %(loinc)s
55 AND
56 lang = i18n.get_curr_lang()
57 ),
58 (SELECT term
59 FROM ref.v_coded_terms
60 WHERE
61 coding_system = 'LOINC'
62 AND
63 code = %(loinc)s
64 AND
65 lang = 'en_EN'
66 ),
67 (SELECT term
68 FROM ref.v_coded_terms
69 WHERE
70 coding_system = 'LOINC'
71 AND
72 code = %(loinc)s
73 )
74 )"""
75 args = {'loinc': loinc}
76 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
77
78 if rows[0][0] is None:
79 return []
80
81 return [ r[0] for r in rows ]
82
84
85 _log.debug('splitting LOINC source file [%s]', input_fname)
86
87 if license_fname is None:
88 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
89 _log.debug('LOINC header: %s', license_fname)
90
91 if data_fname is None:
92 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
93 _log.debug('LOINC data: %s', data_fname)
94
95 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
96 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
97
98 for line in loinc_file:
99
100 if license_delimiter in line:
101 out_file.write(line)
102 out_file.close()
103 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
104 continue
105
106 out_file.write(line)
107
108 out_file.close()
109
110 return data_fname, license_fname
111
113
114 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
115 first_line = csv_file.readline()
116 sniffer = csv.Sniffer()
117 if sniffer.has_header(first_line):
118 pass
119
121
122 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
123
124 version = None
125 for line in in_file:
126 if line.startswith(version_tag):
127 version = line[len(version_tag):].strip()
128 break
129
130 in_file.close()
131 return version
132
133 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
134
135 if version is None:
136 version = get_version(license_fname = license_fname)
137
138 if version is None:
139 raise ValueError('cannot detect LOINC version')
140
141 _log.debug('importing LOINC version [%s]', version)
142
143
144 curs = conn.cursor()
145 cmd = u"""DELETE FROM ref.loinc_staging"""
146 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
147 curs.close()
148 conn.commit()
149 _log.debug('staging table emptied')
150
151
152 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
153 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
154 curs = conn.cursor()
155 cmd = u"""INSERT INTO ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
156 first = False
157 for loinc_line in loinc_reader:
158 if not first:
159 first = True
160 continue
161 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
162 curs.close()
163 conn.commit()
164 csv_file.close()
165 _log.debug('staging table loaded')
166
167
168 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
169 desc = in_file.read()
170 in_file.close()
171 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
172 queries = [
173
174 {'args': args, 'cmd': u"""
175 INSERT INTO ref.data_source (name_long, name_short, version) SELECT
176 %(name_long)s,
177 %(name_short)s,
178 %(ver)s
179 WHERE NOT EXISTS (
180 SELECT 1 FROM ref.data_source WHERE
181 name_long = %(name_long)s
182 AND
183 name_short = %(name_short)s
184 AND
185 version = %(ver)s
186 )"""
187 },
188
189 {'args': args, 'cmd': u"""
190 UPDATE ref.data_source SET
191 description = %(desc)s,
192 source = %(url)s,
193 lang = %(lang)s
194 WHERE
195 name_long = %(name_long)s
196 AND
197 name_short = %(name_short)s
198 AND
199 version = %(ver)s
200 """
201 },
202
203 {'args': args, 'cmd': u"""SELECT pk FROM ref.data_source WHERE name_short = %(name_short)s AND version = %(ver)s"""}
204 ]
205 curs = conn.cursor()
206 rows, idx = gmPG2.run_rw_queries(link_obj = curs, queries = queries, return_data = True)
207 data_src_pk = rows[0][0]
208 curs.close()
209 _log.debug('data source record created or updated, pk is #%s', data_src_pk)
210
211
212 args = {'src_pk': data_src_pk}
213 queries = []
214 queries.append ({
215 'args': args,
216 'cmd': u"""
217 INSERT INTO ref.loinc (
218 fk_data_source, term, code
219 )
220 SELECT
221 %(src_pk)s,
222 coalesce (
223 nullif(long_common_name, ''),
224 (
225 coalesce(nullif(component, '') || ':', '') ||
226 coalesce(nullif(property, '') || ':', '') ||
227 coalesce(nullif(time_aspect, '') || ':', '') ||
228 coalesce(nullif(system, '') || ':', '') ||
229 coalesce(nullif(scale_type, '') || ':', '') ||
230 coalesce(nullif(method_type, '') || ':', '')
231 )
232 ),
233 nullif(loinc_num, '')
234 FROM
235 ref.loinc_staging r_ls
236 WHERE NOT EXISTS (
237 SELECT 1 FROM ref.loinc r_l WHERE
238 r_l.fk_data_source = %(src_pk)s
239 AND
240 r_l.code = nullif(r_ls.loinc_num, '')
241 AND
242 r_l.term = coalesce (
243 nullif(r_ls.long_common_name, ''),
244 (
245 coalesce(nullif(r_ls.component, '') || ':', '') ||
246 coalesce(nullif(r_ls.property, '') || ':', '') ||
247 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
248 coalesce(nullif(r_ls.system, '') || ':', '') ||
249 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
250 coalesce(nullif(r_ls.method_type, '') || ':', '')
251 )
252 )
253 )"""
254 })
255 queries.append ({
256 'args': args,
257 'cmd': u"""
258 UPDATE ref.loinc SET
259 comment = nullif(r_ls.comments, ''),
260 component = nullif(r_ls.component, ''),
261 property = nullif(r_ls.property, ''),
262 time_aspect = nullif(r_ls.time_aspect, ''),
263 system = nullif(r_ls.system, ''),
264 scale_type = nullif(r_ls.scale_type, ''),
265 method_type = nullif(r_ls.method_type, ''),
266 related_names_1_old = nullif(r_ls.related_names_1_old, ''),
267 grouping_class = nullif(r_ls.class, ''),
268 loinc_internal_source = nullif(r_ls.source, ''),
269 dt_last_change = nullif(r_ls.dt_last_change, ''),
270 change_type = nullif(r_ls.change_type, ''),
271 answer_list = nullif(r_ls.answer_list, ''),
272 code_status = nullif(r_ls.status, ''),
273 maps_to = nullif(r_ls.map_to, ''),
274 scope = nullif(r_ls.scope, ''),
275 normal_range = nullif(r_ls.normal_range, ''),
276 ipcc_units = nullif(r_ls.ipcc_units, ''),
277 reference = nullif(r_ls.reference, ''),
278 exact_component_synonym = nullif(r_ls.exact_component_synonym, ''),
279 molar_mass = nullif(r_ls.molar_mass, ''),
280 grouping_class_type = nullif(r_ls.class_type, '')::smallint,
281 formula = nullif(r_ls.formula, ''),
282 species = nullif(r_ls.species, ''),
283 example_answers = nullif(r_ls.example_answers, ''),
284 acs_synonyms = nullif(r_ls.acs_synonyms, ''),
285 base_name = nullif(r_ls.base_name, ''),
286 final = nullif(r_ls.final, ''),
287 naa_ccr_id = nullif(r_ls.naa_ccr_id, ''),
288 code_table = nullif(r_ls.code_table, ''),
289 is_set_root = nullif(r_ls.is_set_root, '')::boolean,
290 panel_elements = nullif(r_ls.panel_elements, ''),
291 survey_question_text = nullif(r_ls.survey_question_text, ''),
292 survey_question_source = nullif(r_ls.survey_question_source, ''),
293 units_required = nullif(r_ls.units_required, ''),
294 submitted_units = nullif(r_ls.submitted_units, ''),
295 related_names_2 = nullif(r_ls.related_names_2, ''),
296 short_name = nullif(r_ls.short_name, ''),
297 order_obs = nullif(r_ls.order_obs, ''),
298 cdisc_common_tests = nullif(r_ls.cdisc_common_tests, ''),
299 hl7_field_subfield_id = nullif(r_ls.hl7_field_subfield_id, ''),
300 external_copyright_notice = nullif(r_ls.external_copyright_notice, ''),
301 example_units = nullif(r_ls.example_units, ''),
302 inpc_percentage = nullif(r_ls.inpc_percentage, ''),
303 long_common_name = nullif(r_ls.long_common_name, '')
304 FROM
305 ref.loinc_staging r_ls
306 WHERE
307 fk_data_source = %(src_pk)s
308 AND
309 code = nullif(r_ls.loinc_num, '')
310 AND
311 term = coalesce (
312 nullif(r_ls.long_common_name, ''),
313 (
314 coalesce(nullif(r_ls.component, '') || ':', '') ||
315 coalesce(nullif(r_ls.property, '') || ':', '') ||
316 coalesce(nullif(r_ls.time_aspect, '') || ':', '') ||
317 coalesce(nullif(r_ls.system, '') || ':', '') ||
318 coalesce(nullif(r_ls.scale_type, '') || ':', '') ||
319 coalesce(nullif(r_ls.method_type, '') || ':', '')
320 )
321 )
322 """
323 })
324 curs = conn.cursor()
325 gmPG2.run_rw_queries(link_obj = curs, queries = queries)
326 curs.close()
327 conn.commit()
328 _log.debug('transfer from staging table to real table done')
329
330
331 curs = conn.cursor()
332 cmd = u"""DELETE FROM ref.loinc_staging"""
333 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
334 curs.close()
335 conn.commit()
336 _log.debug('staging table emptied')
337
338 return True
339
340
341 _SQL_LOINC_from_test_type = u"""
342 -- from test type
343 SELECT
344 loinc AS data,
345 loinc AS field_label,
346 (loinc || ': ' || abbrev || ' (' || name || ')') AS list_label
347 FROM clin.test_type
348 WHERE loinc %(fragment_condition)s
349 """
350
351 _SQL_LOINC_from_i18n_coded_term = u"""
352 -- from coded term, in user language
353 SELECT
354 code AS data,
355 code AS field_label,
356 (code || ': ' || term) AS list_label
357 FROM ref.v_coded_terms
358 WHERE
359 coding_system = 'LOINC'
360 AND
361 lang = i18n.get_curr_lang()
362 AND
363 (code %(fragment_condition)s
364 OR
365 term %(fragment_condition)s)
366 """
367
368 _SQL_LOINC_from_en_EN_coded_term = u"""
369 -- from coded term, in English
370 SELECT
371 code AS data,
372 code AS field_label,
373 (code || ': ' || term) AS list_label
374 FROM ref.v_coded_terms
375 WHERE
376 coding_system = 'LOINC'
377 AND
378 lang = 'en_EN'
379 AND
380 (code %(fragment_condition)s
381 OR
382 term %(fragment_condition)s)
383 """
384
385 _SQL_LOINC_from_any_coded_term = u"""
386 -- from coded term, in any language
387 SELECT
388 code AS data,
389 code AS field_label,
390 (code || ': ' || term) AS list_label
391 FROM ref.v_coded_terms
392 WHERE
393 coding_system = 'LOINC'
394 AND
395 (code %(fragment_condition)s
396 OR
397 term %(fragment_condition)s)
398 """
399
457
458
459
460 if __name__ == "__main__":
461
462 if len(sys.argv) < 2:
463 sys.exit()
464
465 if sys.argv[1] != 'test':
466 sys.exit()
467
468 from Gnumed.pycommon import gmLog2
469 from Gnumed.pycommon import gmI18N
470
471 gmI18N.activate_locale()
472
473
474
477
480
482 term = loinc2term(sys.argv[2])
483 print sys.argv[2], '->', term
484
485 test_loinc_split()
486
487
488
489
490