Package Gnumed :: Package business :: Module gmXdtObjects
[frames] | no frames]

Source Code for Module Gnumed.business.gmXdtObjects

  1  """GNUmed German XDT parsing objects. 
  2   
  3  This encapsulates some of the XDT data into 
  4  objects for easy access. 
  5  """ 
  6  #============================================================== 
  7  __version__ = "$Revision: 1.33 $" 
  8  __author__ = "K.Hilbert, S.Hilbert" 
  9  __license__ = "GPL" 
 10   
 11  import os.path, sys, linecache, codecs, re as regex, time, datetime as pyDT, logging 
 12   
 13   
 14  import mx.DateTime as mxDT 
 15   
 16   
 17  if __name__ == '__main__': 
 18          sys.path.insert(0, '../../') 
 19  from Gnumed.pycommon import gmDateTime, gmTools 
 20  from Gnumed.business import gmXdtMappings, gmPerson 
 21   
 22   
 23  _log = logging.getLogger('gm.xdt') 
 24  _log.info(__version__) 
 25   
 26  #============================================================== 
27 -class cDTO_xdt_person(gmPerson.cDTO_person):
28
29 - def store(self):
30 pass
31 #==============================================================
32 -def determine_xdt_encoding(filename=None, default_encoding=None):
33 34 f = codecs.open(filename=filename, mode='rU', encoding='utf8', errors='ignore') 35 36 file_encoding = None 37 for line in f: 38 field = line[3:7] 39 if field in gmXdtMappings._charset_fields: 40 _log.debug('found charset field [%s] in <%s>', field, filename) 41 val = line[7:8] 42 file_encoding = gmXdtMappings._map_field2charset[field][val] 43 _log.debug('encoding in file is "%s" (%s)', file_encoding, val) 44 break 45 f.close() 46 47 if file_encoding is None: 48 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding) 49 return default_encoding 50 51 return file_encoding
52 #==============================================================
53 -def read_person_from_xdt(filename=None, encoding=None, dob_format=None):
54 55 _map_id2name = { 56 '3101': 'lastnames', 57 '3102': 'firstnames', 58 '3103': 'dob', 59 '3110': 'gender', 60 '3106': 'zipurb', 61 '3107': 'street', 62 '3112': 'zip', 63 '3113': 'urb', 64 '8316': 'source' 65 } 66 67 needed_fields = ( 68 '3101', 69 '3102' 70 ) 71 72 interesting_fields = _map_id2name.keys() 73 74 data = {} 75 76 # try to find encoding if not given 77 if encoding is None: 78 encoding = determine_xdt_encoding(filename=filename) 79 80 xdt_file = codecs.open(filename=filename, mode='rU', encoding=encoding) 81 82 for line in xdt_file: 83 84 # # can't use more than what's interesting ... ;-) 85 # if len(data) == len(interesting_fields): 86 # break 87 88 line = line.replace('\015','') 89 line = line.replace('\012','') 90 91 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 92 field = line[3:7] 93 # do we care about this line ? 94 if field in interesting_fields: 95 try: 96 already_seen = data[_map_id2name[field]] 97 break 98 except KeyError: 99 data[_map_id2name[field]] = line[7:] 100 101 xdt_file.close() 102 103 # found enough data ? 104 if len(data) < len(needed_fields): 105 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data)) 106 107 from Gnumed.business import gmPerson 108 dto = gmPerson.cDTO_person() 109 110 dto.firstnames = data['firstnames'] 111 dto.lastnames = data['lastnames'] 112 113 # CAVE: different data orders are possible, so configuration may be needed 114 # FIXME: detect xDT version and use default from the standard when dob_format is None 115 try: 116 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y')) 117 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone) 118 except KeyError: 119 dto.dob = None 120 121 try: 122 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()] 123 except KeyError: 124 dto.gender = None 125 126 dto.zip = None 127 try: 128 dto.zip = regex.match('\d{5}', data['zipurb']).group() 129 except KeyError: pass 130 try: 131 dto.zip = data['zip'] 132 except KeyError: pass 133 134 dto.urb = None 135 try: 136 dto.urb = regex.sub('\d{5} ', '', data['zipurb']) 137 except KeyError: pass 138 try: 139 dto.urb = data['urb'] 140 except KeyError: pass 141 142 try: 143 dto.street = data['street'] 144 except KeyError: 145 dto.street = None 146 147 try: 148 dto.source = data['source'] 149 except KeyError: 150 dto.source = None 151 152 return dto
153 #==============================================================
154 -class cLDTFile(object):
155
156 - def __init__(self, filename=None, encoding=None, override_encoding=False):
157 158 file_encoding = determine_xdt_encoding(filename=filename) 159 if file_encoding is None: 160 _log.warning('LDT file <%s> does not specify encoding', filename) 161 if encoding is None: 162 raise ValueError('no encoding specified in file <%s> or method call' % filename) 163 164 if override_encoding: 165 if encoding is None: 166 raise ValueError('no encoding specified in method call for overriding encoding in file <%s>' % filename) 167 self.encoding = encoding 168 else: 169 if file_encoding is None: 170 self.encoding = encoding 171 else: 172 self.encoding = file_encoding 173 174 self.filename = filename 175 176 self.__header = None 177 self.__tail = None
178 #----------------------------------------------------------
179 - def _get_header(self):
180 181 if self.__header is not None: 182 return self.__header 183 184 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding) 185 self.__header = [] 186 for line in ldt_file: 187 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 188 # loop until found first LG-Bericht 189 if field == u'8000': 190 if content in [u'8202']: 191 break 192 self.__header.append(line) 193 194 ldt_file.close() 195 return self.__header
196 197 header = property(_get_header, lambda x:x) 198 #----------------------------------------------------------
199 - def _get_tail(self):
200 201 if self.__tail is not None: 202 return self.__tail 203 204 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding) 205 self.__tail = [] 206 in_tail = False 207 for line in ldt_file: 208 if in_tail: 209 self.__tail.append(line) 210 continue 211 212 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 213 214 # loop until found tail 215 if field == u'8000': 216 if content not in [u'8221']: 217 continue 218 in_tail = True 219 self.__tail.append(line) 220 221 ldt_file.close() 222 return self.__tail
223 224 tail = property(_get_tail, lambda x:x) 225 #----------------------------------------------------------
226 - def split_by_patient(self, dir=None, file=None):
227 228 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding) 229 out_file = None 230 231 in_patient = False 232 for line in ldt_file: 233 234 if in_patient: 235 out_file.write(line) 236 continue 237 238 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','') 239 240 # start of record 241 if field == u'8000': 242 # start of LG-Bericht 243 if content == u'8202': 244 in_patient = True 245 if out_file is not None: 246 out_file.write(u''.join(self.tail)) 247 out_file.close() 248 #out_file = codecs.open(filename=filename_xxxx, mode=xxxx_'rU', encoding=self.encoding) 249 out_file.write(u''.join(self.header)) 250 else: 251 in_patient = False 252 if out_file is not None: 253 out_file.write(u''.join(self.tail)) 254 out_file.close() 255 256 if out_file is not None: 257 if not out_file.closed: 258 out_file.write(u''.join(self.tail)) 259 out_file.close() 260 261 ldt_file.close()
262 #============================================================== 263 # FIXME: the following *should* get wrapped in class XdtFile ... 264 #--------------------------------------------------------------
265 -def xdt_get_pats(aFile):
266 pat_ids = [] 267 pat_names = [] 268 pats = {} 269 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 270 # read patient dat 271 for line in fileinput.input(aFile): 272 # remove trailing CR and/or LF 273 line = line.replace('\015','') 274 line = line.replace('\012','') 275 # do we care about this line ? 276 field = line[3:7] 277 # yes, if type = patient id 278 if field == '3000': 279 pat_id = line[7:] 280 if pat_id not in pat_ids: 281 pat_ids.append(pat_id) 282 continue 283 # yes, if type = patient name 284 if field == '3101': 285 pat_name = line [7:] 286 if pat_name not in pat_names: 287 pat_names.append(pat_name) 288 pats[pat_id] = pat_name 289 continue 290 fileinput.close() 291 292 _log.debug("patients found: %s" % len(pat_ids)) 293 return pats
294 #==============================================================
295 -def get_pat_files(aFile, ID, name, patdir = None, patlst = None):
296 _log.debug("getting files for patient [%s:%s]" % (ID, name)) 297 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files") 298 _log.debug("%s => %s" % (patdir, files)) 299 return [patdir, files]
300 #==============================================================
301 -def split_xdt_file(aFile,patlst,cfg):
302 content=[] 303 lineno = [] 304 305 # xDT line format: aaabbbbcccccccccccCRLF where aaa = length, bbbb = record type, cccc... = content 306 307 content = [] 308 record_start_lines = [] 309 310 # find record starts 311 for line in fileinput.input(aFile): 312 strippedline = line.replace('\015','') 313 strippedline = strippedline.replace('\012','') 314 # do we care about this line ? (records start with 8000) 315 if strippedline[3:7] == '8000': 316 record_start_lines.append(fileinput.filelineno()) 317 318 # loop over patient records 319 for aline in record_start_lines: 320 # WHY +2 ?!? 321 line = linecache.getline(aFile,aline+2) 322 # remove trailing CR and/or LF 323 strippedline = line.replace('\015','') 324 strippedline = strippedline.replace('\012','') 325 # do we care about this line ? 326 field = strippedline[3:7] 327 # extract patient id 328 if field == '3000': 329 ID = strippedline[7:] 330 line = linecache.getline(aFile,aline+3) 331 # remove trailing CR and/or LF 332 strippedline = line.replace('\015','') 333 strippedline = strippedline.replace('\012','') 334 # do we care about this line ? 335 field = strippedline[3:7] 336 if field == '3101': 337 name = strippedline [7:] 338 startline=aline 339 endline=record_start_lines[record_start_lines.index(aline)+1] 340 _log.debug("reading from%s" %str(startline)+' '+str(endline) ) 341 for tmp in range(startline,endline): 342 content.append(linecache.getline(aFile,tmp)) 343 _log.debug("reading %s"%tmp ) 344 hashes = check_for_previous_records(ID,name,patlst) 345 # is this new content ? 346 data_hash = md5.new() # FIXME: use hashlib 347 map(data_hash.update, content) 348 digest = data_hash.hexdigest() 349 if digest not in hashes: 350 pat_dir = cfg.get("xdt-viewer", "export-dir") 351 file = write_xdt_pat_data(content, pat_dir) 352 add_file_to_patlst(ID, name, patlst, file, ahash) 353 content = [] 354 else: 355 continue 356 # cleanup 357 fileinput.close() 358 patlst.store() 359 return 1
360 #==============================================================
361 -def get_rand_fname(aDir):
362 tmpname = gmTools.get_unique_filename(prefix='', suffix = time.strftime(".%Y%m%d-%H%M%S", time.localtime()), tmp_dir=aDir) 363 path, fname = os.path.split(tmpname) 364 return fname
365 #==============================================================
366 -def write_xdt_pat_data(data, aDir):
367 """write record for this patient to new file""" 368 pat_file = open(os.path.join(aDir, get_rand_fname(aDir)), "w") 369 map(pat_file.write, data) 370 pat_file.close() 371 return fname
372 #==============================================================
373 -def check_for_previous_records(ID, name, patlst):
374 anIdentity = "%s:%s" % (ID, name) 375 hashes = [] 376 # patient not listed yet 377 if anIdentity not in patlst.getGroups(): 378 _log.debug("identity not yet in list" ) 379 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '') 380 # file already listed ? 381 file_defs = patlst.get(aGroup = anIdentity, anOption = "files") 382 for line in file_defs: 383 file, ahash = line.split(':') 384 hashes.append(ahash) 385 386 return hashes
387 #==============================================================
388 -def add_file_to_patlst(ID, name, patlst, new_file, ahash):
389 anIdentity = "%s:%s" % (ID, name) 390 files = patlst.get(aGroup = anIdentity, anOption = "files") 391 for file in new_files: 392 files.append("%s:%s" % (file, ahash)) 393 _log.debug("files now there : %s" % files) 394 patlst.set(aGroup=anIdentity, anOption="files", aValue = files, aComment="")
395 #============================================================== 396 # main 397 #-------------------------------------------------------------- 398 if __name__ == "__main__": 399 from Gnumed.pycommon import gmI18N, gmLog2 400 401 root_log = logging.getLogger() 402 root_log.setLevel(logging.DEBUG) 403 _log = logging.getLogger('gm.xdt') 404 405 #from Gnumed.business import gmPerson 406 gmI18N.activate_locale() 407 gmI18N.install_domain() 408 gmDateTime.init() 409 410 ldt = cLDTFile(filename = sys.argv[1]) 411 print "header:" 412 for line in ldt.header: 413 print line.encode('utf8', 'replace') 414 print "tail:" 415 for line in ldt.tail: 416 print line.encode('utf8', 'replace') 417 418 # # test framework if run by itself 419 # patfile = sys.argv[1] 420 # dobformat = sys.argv[2] 421 # encoding = sys.argv[3] 422 # print "reading patient data from xDT file [%s]" % patfile 423 424 # dto = read_person_from_xdt(patfile, dob_format=dobformat, encoding=encoding) 425 # print "DTO:", dto 426 # print "dto.dob:", dto.dob, type(dto.dob) 427 # print "dto.dob.tz:", dto.dob.tzinfo 428 # print "dto.zip: %s dto.urb: %s" % (dto.zip, dto.urb) 429 # print "dto.street", dto.street 430 # searcher = gmPersonSearch.cPatientSearcher_SQL() 431 # ident = searcher.get_identities(dto=dto)[0] 432 # print ident 433 ## print ident.get_medical_age() 434 435 #============================================================== 436