Home | Trees | Indices | Help |
|
---|
|
1 """This module encapsulates a document description stored in an XML file. 2 3 This is mainly used by GNUmed/Archive. 4 5 @copyright: GPL v2 or later 6 """ 7 #============================================================ 8 # $Source: /home/ncq/Projekte/cvs2git/vcs-mirror/gnumed/gnumed/client/business/gmXmlDocDesc.py,v $ 9 # $Id: gmXmlDocDesc.py,v 1.6 2008-02-25 17:31:41 ncq Exp $ 10 __version__ = "$Revision: 1.6 $" 11 __author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>" 12 13 import sys, os.path, fileinput, types, string, logging 14 15 16 _log = logging.getLogger('gm.docs') 17 _log.info(__version__) 18 #============================================================20 # handlers for __getitem__() 21 _get_handler = {} 22 #--------------------------------------------------------233 #============================================================ 234 # main 235 #------------------------------------------------------------ 236 237 #============================================================ 238 # $Log: gmXmlDocDesc.py,v $ 239 # Revision 1.6 2008-02-25 17:31:41 ncq 240 # - logging cleanup 241 # 242 # Revision 1.5 2008/01/30 13:34:50 ncq 243 # - switch to std lib logging 244 # 245 # Revision 1.4 2004/03/19 17:07:20 shilbert 246 # - import statement fixed 247 # 248 # Revision 1.3 2004/02/25 09:46:20 ncq 249 # - import from pycommon now, not python-common 250 # 251 # Revision 1.2 2003/11/17 10:56:35 sjtan 252 # 253 # synced and commiting. 254 # 255 # Revision 1.1 2003/10/23 06:02:38 sjtan 256 # 257 # manual edit areas modelled after r.terry's specs. 258 # 259 # Revision 1.1 2003/04/20 15:42:27 ncq 260 # - first version 261 # 26224 # sanity checks 25 if aBaseDir is None: 26 raise ConstructorError, "need document path" 27 if not os.path.exists(os.path.abspath(aBaseDir)): 28 raise ConstructorError, "document path [%s] does not exist" % aBaseDir 29 self.__base_dir = aBaseDir 30 _log.debug("working from directory [%s]" % self.__base_dir) 31 32 if aCfg is None: 33 _log.warning('no config file specified') 34 import gmCfg 35 self.__cfg = gmCfg.gmDefCfgFile 36 else: 37 self.__cfg = aCfg 38 39 self.__group = str(aGroup) 40 41 tmp = self.__cfg.get(self.__group, "description") 42 self.__xml_file = os.path.join(self.__base_dir, tmp) 43 if not os.path.exists(self.__xml_file): 44 raise ConstructorError, "skipping [%s]: description file [%s] not found" % (self.__base_dir, tmp) 45 46 self.__data = {} 47 48 # if not self.__load_from_xml(): 49 # raise ConstructorError, "XML file [%s] cannot be parsed correctly" % anXmlFile 50 51 return None52 #--------------------------------------------------------54 """Load document metadata from XML file. 55 """ 56 # document type 57 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "type_tag"), anXMLfile = self.__xml_file) 58 if tmp is None: 59 _log.error("cannot load document type.") 60 return None 61 else: 62 self.__data['type'] = string.join(tmp) 63 # document comment 64 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "comment_tag"), anXMLfile = self.__xml_file) 65 if tmp is None: 66 _log.error("cannot load document comment") 67 return None 68 else: 69 self.__data['comment'] = string.join(tmp) 70 # document reference date 71 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "date_tag"), anXMLfile = self.__xml_file) 72 if tmp is None: 73 _log.error("cannot load document reference date.") 74 return None 75 else: 76 self.__data['date'] = string.join(tmp) 77 # external reference string 78 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "ref_tag"), anXMLfile = self.__xml_file) 79 if tmp is None: 80 _log.error("cannot load document reference string.") 81 return None 82 else: 83 self.__data['reference'] = string.join(tmp) 84 # document description 85 tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "aux_comment_tag"), anXMLfile = self.__xml_file) 86 if tmp is None: 87 _log.error("cannot load long document description.") 88 else: 89 self.__data['description'] = string.join(tmp) 90 # list of data files 91 # if not self.__read_img_list(self.__xml_file, aBaseDir, self.__group): 92 # _log.error("Cannot retrieve list of document data files.") 93 # return None 94 95 _log.debug("long document description: " + str(self.__data['description'])) 96 _log.debug("document reference string: " + str(self.__data['reference'])) 97 _log.debug("document reference date: " + str(self.__data['date'])) 98 _log.debug("Document comment: " + str(self.__data['comment'])) 99 _log.debug("Document type: " + str(self.__data['type'])) 100 101 return 1102 #-------------------------------------------------------- 103 # attribute access 104 #--------------------------------------------------------106 try: 107 return self.__data[item] 108 except KeyError: 109 try: 110 return xmlDocDesc._get_handler[item](self) 111 except KeyError: 112 _log.exception('[%s] neither cached in self.__data nor get handler available' % item, sys.exc_info()) 113 return None114 #--------------------------------------------------------116 try: 117 return self.__data['objects'] 118 except KeyError: 119 self.__load_obj_list() 120 return self.__data['objects'] 121 return None122 #-------------------------------------------------------- 123 _get_handler['objects'] = _get_obj_list 124 #--------------------------------------------------------126 """Read list of image files from XML metadata file. 127 128 We assume the order of file names to correspond to the sequence of pages. 129 - don't use self.__get_from_xml, because we want to 130 scan lines sequentially here 131 """ 132 self.__data['objects'] = {} 133 tag_name = self.__cfg.get(self.__group, "obj_tag") 134 # now scan the xml file 135 idx = 0 136 for line in fileinput.input(self.__xml_file): 137 content = self.__extract_xml_content(line, tag_name) 138 if content is None: 139 continue 140 idx += 1 141 tmp = {} 142 tmp['file name'] = os.path.abspath(os.path.join(self.__base_dir, content)) 143 # this 'index' defines the order of objects in the document 144 tmp['index'] = idx 145 # we must use imaginary oid's since we are reading from a file, 146 # this OID defines the object ID in the data store, this 147 # has nothing to do with the semantic order of objects 148 self.__data['objects'][idx] = tmp 149 150 # cleanup 151 fileinput.close() 152 153 if idx == 0: 154 _log.warning("no files found for import") 155 return None 156 157 _log.debug("document data files to be processed: %s" % self.__data['objects']) 158 159 return 1160 #-------------------------------------------------------- 161 # public methods 162 #-------------------------------------------------------- 167 #--------------------------------------------------------169 # sanity 170 if not type(aTag) is types.StringType: 171 _log.error("Argument aTag (" + str(aTag) + ") is not a string.") 172 return None 173 174 TagStart = "<" + aTag + ">" 175 TagEnd = "</" + aTag + ">" 176 177 _log.info("Retrieving " + TagStart + "content" + TagEnd + ".") 178 179 inTag = 0 180 content = [] 181 182 for line in fileinput.input(self.__xml_file): 183 tmp = line 184 185 # this line starts a description 186 if string.find(tmp, TagStart) != -1: 187 inTag = 1 188 # strip junk left of <tag> 189 (junk, good_stuff) = string.split (tmp, TagStart, 1) 190 _log.debug("Found tag start in line: junk='%s' content='%s'" % (junk, good_stuff)) 191 tmp = good_stuff 192 193 # this line ends a description 194 if string.find(tmp, TagEnd) != -1: 195 # only if tag start has been found already 196 if inTag == 1: 197 # strip junk right of </tag> 198 (good_stuff, junk) = string.split (tmp, TagEnd, 1) 199 _log.debug("Found tag end in line: junk='%s' content='%s'" % (junk, good_stuff)) 200 content.append(good_stuff) 201 # shortcut out of for loop 202 break 203 204 # might be in-tag data line or line with start tag only 205 if inTag == 1: 206 content.append(tmp) 207 208 # cleanup 209 fileinput.close() 210 211 # looped over all lines 212 if len(content) > 0: 213 _log.debug("%s tag content successfully read: %s" % (TagStart, str(content))) 214 return content 215 else: 216 return None217 218 #--------------------------------------------------------220 # is this a line we care about ? 221 start_tag_pos = string.find(aLine,'<%s' % aTag) 222 if start_tag_pos == -1: 223 return None 224 # yes, so check for closing tag 225 end_tag_pos = string.find(aLine, '</%s>' % aTag) 226 if end_tag_pos == -1: 227 # but we don't do multiline tags 228 _log.error("Line [%s] is incomplete for tag [%s]. We don't do multiline tags here." % (aLine, aTag)) 229 return None 230 # actually extract content 231 content_start = string.find(aLine,'>', start_tag_pos, end_tag_pos) + 1 232 return aLine[content_start:end_tag_pos]
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Sat Aug 3 03:56:15 2013 | http://epydoc.sourceforge.net |