1 """
2 I-Sites fragment library parser.
3
4 @deprecated: legacy module.
5 """
6
7 import os
8 import re
9 import csb.io
10 import csb.bio.structure as structure
11 import csb.bio.fragments.isites as isites
34
36 """
37 Implements an I-Sites fragment library parser v.5.1+ (2008).
38
39 @param flatfile: input *.isl I-Sites library file name
40 @type flatfile: str
41 @param express: if True, speeds up the parser by ignoring the covariance tensors
42 @type express: bool
43
44 @raise IOError: when the source file cannot be found
45 """
46
47 - def __init__(self, flatfile, express=False):
48 if not os.path.exists(flatfile):
49 raise IOError("Could not read file {0}".format(flatfile))
50
51 self._flatfile = flatfile
52 self.express = bool(express)
53 self._streams = [ ]
54 self._library = None
55
58
59 - def __exit__(self, exc_type, exc_value, traceback):
60 for s in self._streams:
61 try:
62 s.close()
63 except:
64 pass
65
67 for s in self._streams:
68 try:
69 s.close()
70 except:
71 pass
72
74
75 s = open(self._flatfile, mode='r')
76 self._streams.append(s)
77 return s
78
79 @property
81 """
82 Return the general properties of the library.
83 Library Clusters are iterable, but read efficiently only on demand.
84 """
85 return self.parse()
86
87 @property
96
98 """
99 Parse the whole library to end.
100
101 @return: object representation of the library with all clusters pre-parsed
102 @rtype: L{Library}
103 """
104 library = self.parse()
105 library.clusters = list(self.clusters)
106 return library
107
109 """
110 Parse I-sites library common/general properties. Clusters are not parsed,
111 but can be fetched on demand while iterating over C{library.clusters}.
112
113 @return: object representation of the library with a
114 bound clusters generator
115 @rtype: L{Library}
116 """
117
118 library = isites.Library()
119 library.centroids = [ ]
120 library.documentation = ''
121 library.clusters = self.clusters
122
123 stream = self._newstream()
124 done = False
125
126 while not done:
127
128 line = stream.readline()
129 if not line:
130 done = True
131 break
132
133 if line.startswith(Tags.REMARK) and line[len(Tags.REMARK):].strip() in ('===== start of library =====', '====== start of clusters ======='):
134 done = True
135 break
136
137 elif line.startswith(Tags.LIBRARY):
138 fields = line.split()[1:]
139 if len(fields) > 1:
140 library.name, library.version = fields[0], fields[1]
141 else:
142 library.version = fields[0]
143
144 elif line.startswith(Tags.CENTROID):
145 fields = line.split()
146 index = int(fields[1])
147 values = fields[2].split(',')
148
149 matrow = { }
150
151 for cn, aa in enumerate('ACDEFGHIKLMNPQRSTVWY'):
152 matrow[aa] = float(values[cn])
153
154 if index > len(library.centroids) - 1:
155 assert index == len(library.centroids), "Centroid offset indices are consecutive numbers, starting from 0."
156 library.centroids.append(matrow)
157
158 elif line.startswith(Tags.REMARK):
159 library.documentation += line[len(Tags.REMARK)+1:]
160
161 stream.close()
162
163 return library
164
165 - def parse_entry(self, entry):
166 """
167 Parse a single I-Sites entry.
168
169 @return: object representation of the entry
170 @rtype: L{Cluster}
171 """
172 cluster = isites.Cluster()
173 lines = iter(entry.splitlines())
174
175 done = False
176 in_profile = False
177 in_tensor = False
178
179 while not done:
180 try:
181 line = next(lines)
182 except StopIteration:
183 done = True
184 break
185
186 if line.startswith(Tags.CLUSTER):
187 fields = line.split()[1:]
188 cluster.id, cluster.motiflen, cluster.profilelen, cluster.overhang = map(int, fields)
189
190 elif line.startswith(Tags.FROM):
191 cluster.file = line.split()[1]
192
193 elif line.startswith(Tags.CREATEDBY):
194 cluster.program = line[len(Tags.CREATEDBY)+1:].strip()
195
196 elif line.startswith(Tags.MDACUT):
197 field = line.split()[1]
198 cluster.mda = float(field)
199
200 elif line.startswith(Tags.DMECUT):
201 field = line.split()[1]
202 cluster.dme = float(field)
203
204 elif line.startswith(Tags.PSEUDOCOUNT):
205 field = line.split()[1]
206 cluster.pseudocount = float(field)
207 assert cluster.pseudocount > 0
208
209 elif line.startswith(Tags.LINEARFIT):
210 fields = line.split()[1:]
211 cluster.linearfit = tuple(map(float, fields))
212
213 elif line.startswith(Tags.COVARWEIGHT):
214 field = line.split()[1]
215 cluster.covarweight = float(field)
216
217 elif line.startswith(Tags.PARADIGM):
218 fields = line.split()[1:]
219 cluster.representative = isites.RepStructureFragment(fields[0], fields[1], int(fields[2]))
220 if fields[1] == '_':
221 cluster.representative.chain = ''
222
223 elif line.startswith(Tags.ANGLES):
224 rn = -1
225 while True:
226 try:
227 subline = next(lines)
228 except StopIteration:
229 break
230 if subline.startswith(Tags.PROFILE):
231 in_profile = True
232 break
233 elif subline.startswith(Tags.END):
234 break
235
236 rn += 1
237 fields = subline.split()
238 angles = tuple(map(float, fields[1:]))
239
240 torsion = structure.TorsionAngles(angles[0], angles[1], angles[2], units=structure.AngleUnits.Degrees)
241 j = cluster.representative.angles.append(torsion)
242
243 assert rn == j-1 == int(fields[0]), "Angle offsets in a cluster are consecutive numbers, starting at 0."
244
245 elif line.startswith(Tags.PROFILE):
246 in_profile = True
247
248 elif in_profile:
249 cluster.profile = isites.ProteinProfile(isites.ProteinProfile.BackgroundFreqs, alpha=cluster.pseudocount)
250 rn = -1
251 subline = line
252
253 while True:
254 if subline.startswith(Tags.CREATEDBY) or subline.startswith(Tags.END):
255 in_profile = False
256 break
257 elif subline.startswith(Tags.COVARTENSOR):
258 in_tensor = True
259 in_profile = False
260 break
261
262 rn += 1
263 fields = subline.split()
264
265 assert rn == int(fields[0]), "ProteinProfile rows in a cluster are consecutive numbers," \
266 + " starting from 0 (cluster {0}, profile row {1}/{2}).".format(cluster.id, rn, fields[0])
267 column = { }
268 for cn, aa in enumerate('ACDEFGHIKLMNPQRSTVWY'):
269 column[aa] = float(fields[cn+1])
270 cluster.profile.add_column(**column)
271
272 try:
273 subline = next(lines)
274 except StopIteration:
275 in_profile = False
276 break
277
278 assert cluster.profilelen == cluster.profile.length
279
280 elif line.startswith(Tags.COVARTENSOR):
281 in_tensor = True
282
283 elif in_tensor:
284 if self.express:
285 break
286
287 motiflen = cluster.motiflen
288
289 cluster.covariance = [ ]
290 for mi in range(0, motiflen):
291 cluster.covariance.append([])
292 for mj in range(0, motiflen):
293 cluster.covariance[mi].append([])
294
295 rn = -1
296 i = j = -1
297 subline = line
298 dimline = re.compile('^[0-9]+\s+[0-9]+\s*$')
299
300 while True:
301 if subline.startswith(Tags.END):
302 in_tensor = False
303 break
304
305 rn += 1
306 fields = subline.split()
307
308 if re.match(dimline, subline):
309 istr, jstr = subline.split()
310 i, j = int(istr) - 1, int(jstr) - 1
311 assert 0 <= i < motiflen and 0 <= j < motiflen, "Covariance is a [motiflen x motiflen] matrix."
312 else:
313 values = list(map(float, subline.split()))
314 cluster.covariance[i][j].append(values)
315
316 try:
317 subline = next(lines)
318 except StopIteration:
319 in_tensor = False
320 break
321
322 elif line.startswith(Tags.END):
323 done = True
324 break
325
326 return cluster
327