1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21 from csb.bio.structure import SecondaryStructure
31
37
38 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
39 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
40 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
41 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
42 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
43 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
44 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
45 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
46 53: 11.831245255954073, 54: 11.918975893263905 }
49 """
50 Base class, representing a match between a fragment and its target.
51 """
52
53 - def __init__(self, id, qstart, qend, probability, rmsd, tm_score, qlength):
62
63 @property
66
67 @property
70
71 @property
74
75 @property
78
79 @property
82
83 @property
86
87 @property
89 return self._probability
90
91 @property
94
95 @property
97 raise NotImplementedError()
98
99 @property
101 raise NotImplementedError()
102
103 @property
105 raise NotImplementedError()
106
108
109 - def __init__(self, target, isites_prediction, hmm_prediction, combined_prediction):
110
111 self.target = target
112
113 self.isites = isites_prediction
114 self.hmm = hmm_prediction
115 self.combined = combined_prediction
116
118
119 - def __init__(self, alignment, coordinates):
123
125 """
126 Fragment-based phi/psi angles predictor.
127
128 @param target: target protein, containing fragment assignments
129 @type target: L{Target}
130 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
131 @type threshold: float
132 @param extend: pick alternative, longer cluster reps, if possible
133 @type extend: bool
134 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
135 will be performed on demand (the first time C{predictor.compute()} is invoked)
136
137 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
138 time. Subsequent calls will be very fast.
139 """
140
141 - def __init__(self, target, threshold=1.5, extend=False, init=False):
142
143 if not isinstance(target, Target):
144 raise TypeError(target)
145 if target.matches.length == 0:
146 raise ValueError('This target has no fragment assignments')
147
148 self._target = target
149 self._threshold = float(threshold)
150 self._extend = bool(extend)
151
152 self._initialized = False
153 self._reps = {}
154 self._clusters = {}
155
156 if init:
157 self.init()
158
159 @property
162
163 @property
165 return self._threshold
166
167 @property
170
191
207
215
238
271
273 """
274 Filter the current fragment map and create a new, completely flat,
275 non-overlapping map built from centroids, assigned iteratively by
276 decreasing confidence. Centroids with lower confidence which overlap
277 with previously assigned centroids will be trimmed to fill existing
278 gaps only.
279
280 @return: L{TorsionPredictionInfo} instances, one for each target residue
281 @rtype: tuple of L{TorsionPredictionInfo}
282 """
283
284 if not self._initialized:
285 self.init()
286
287 prediction = []
288 slots = set(range(1, self.target.length + 1))
289
290 reps = list(self._reps.values())
291 reps.sort(key=lambda i: i.confidence, reverse=True)
292
293 for rep in reps:
294
295 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
296 if rank in slots:
297 torsion = rep.centroid.torsion_at(rank, rank)[0]
298 ss = rep.centroid.sec_structure_at(rank, rank)[0]
299 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
300
301 prediction.append(info)
302 slots.remove(rank)
303
304 for rank in slots:
305 prediction.append(TorsionPredictionInfo(rank, 0, None))
306
307 prediction.sort(key=lambda i: i.rank)
308 return tuple(prediction)
309
311 """
312 Extract all torsion angles coming from all fragments, which had survived
313 the filtering and cover residue C{#rank}.
314
315 @param rank: target residue rank
316 @type rank: int
317
318 @return: all L{TorsionAngles} for a cluster at the specified residue
319 @rtype: tuple of L{TorsionAngles}
320 """
321
322 if not self._initialized:
323 self.init()
324 if rank not in self._clusters:
325 return tuple()
326
327 angles = []
328
329 for node in self._clusters[rank]:
330 fragment = node.fragment
331 torsion = fragment.torsion_at(rank, rank)[0]
332 angles.append(torsion)
333
334 return tuple(angles)
335
338 """
339 Struct container for a single torsion angle prediction.
340
341 @param rank: target residue rank
342 @type rank: int
343 @param confidence: confidence of prediction
344 @type confidence: float
345 @param torsion: assigned phi/psi/omega angles
346 @type torsion: L{TorsionAngles}
347 @param dssp: assigned secondary structure
348 @type dssp: L{SecondaryStructureElement}
349 @param primary: if True, designates that the assigned angles are extracted
350 from the L{ClusterRep} at residue C{#rank}; otherwise: the
351 angles are coming from another, overlapping L{ClusterRep}
352
353 """
354
355 - def __init__(self, rank, confidence, torsion, dssp, primary=False):
362
369
371 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
372
375
387
388 -class Target(csb.core.AbstractNIContainer):
389 """
390 Represents a protein structure prediction target.
391
392 @param id: target sequence ID, in PDB accnC format
393 @type id: str
394 @param length: total target sequence length
395 @type length: int
396 @param residues: a list, containing target's residues. See also
397 L{Target.from_sequence}
398 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
399 """
400
420
421 @staticmethod
442
443 @staticmethod
456
457 @staticmethod
462
463 @property
465 return self._residues
466
467 @property
470
471 @property
474
475 @property
477 return self._accession
478
479 @property
481 return self._chain_id
482
483 @property
486
487 @property
490
491 @property
494
495 @property
497 return self._assignments
498
499 @property
501 return self._residues
502
503 @property
505 return self._segments
506
528
530 """
531 Assign a bunch of fragments at once.
532 @type fragments: iterable of L{Assignment}s
533 """
534 for frag in fragments:
535 self.assign(frag)
536
537 - def filter(self, threshold=1.5, extend=False):
538 """
539 Filter the current fragment map using a L{FragmentCluster}.
540
541 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
542 @type threshold: float
543 @param extend: pick extended alternatives where possible (default=False)
544 @type extend: bool
545
546 @return: a new target, containing only cluster centroids/reps
547 @rtype: L{Target}
548 """
549
550 target = self.clone()
551
552 for residue in self.residues:
553 rep = residue.filter(threshold=threshold, extend=extend)
554
555 if rep is not None:
556 target.assign(rep.centroid)
557
558 return target
559
573
576 """
577 Wrapper around L{Target}'s native residues. Decorates them with additional,
578 fragment-related methods.
579
580 @type native_residue: L{csb.bio.structure.ProteinResidue}
581 """
582
588
589 @property
592
593 @property
596
597 @property
599 return self._assignments
600
601 - def assign(self, assignment_info):
602 self._assignments._append_item(assignment_info)
603
622
624 """
625 Filter all fragments, covering this position in the L{Target} using a
626 L{FragmentCluster}.
627
628 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
629 @type method: str
630 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
631 @type threshold: float
632 @param extend: pick extended alternative where possible (default=False)
633 @type extend: bool
634
635 @return: cluster's representative (if converged) or None
636 @rtype: L{ClusterRep} or None
637 """
638
639 try:
640 nodes = []
641 for ai in self.assignments:
642 node = ClusterNode.create(ai.fragment, method, extend)
643 nodes.append(node)
644
645 cluster = FragmentCluster(nodes, threshold=threshold)
646
647 center = cluster.shrink(minitems=0)
648 if center.has_alternative:
649 center.exchange()
650
651 return center
652
653 except (ClusterExhaustedError, ClusterDivergingError):
654 return None
655
668
670 """
671 @return: the residue-wise precision of the fragment library at the
672 current position (percentage).
673
674 @param threshold: true-positive RMSD cutoff (default=1.5)
675 @type threshold: float
676 @rtype: float
677 """
678
679 if self.assignments.length < 1:
680 return None
681 else:
682 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
683 pos = len(positive) * 100.0 / self.assignments.length
684
685 return pos
686
823
854
856
857 binsize = float(binsize)
858 bins = numpy.ceil(numpy.array(data) / binsize)
859
860 hist = dict.fromkeys(bins, 0)
861 for bin in bins:
862 hist[bin] += (1.0 / len(bins))
863
864 freq = numpy.array(hist.values())
865 return - numpy.sum(freq * numpy.log(freq))
866
868
869 rmsds = self.pairwise_rmsd()
870 return self._entropy(rmsds, binsize)
871
876
878
879 rmsds = self.pairwise_rmsd()
880
881 if len(rmsds) < 1:
882 return None
883
884 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
885
887
888 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
889
890 if len(sa_rmsds) < 1:
891 return None
892
893 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
894
901
903
904 cons = self.rmsd_consistency()
905
906 if cons is None:
907 return 0
908 else:
909 return numpy.log10(self.count) * cons
910
912
921
922 @property
924 return self._assignment.backbone[self._relrank]
925
926 @property
928 return self._assignment
929
931 """
932 Represents a match between a fragment and its target.
933
934 @param source: source structure (must have torsion angles precomputed)
935 @type source: L{csb.bio.structure.Chain}
936 @param start: start position in C{source} (rank)
937 @type start: int
938 @param end: end position in C{source} (rank)
939 @type end: int
940 @param id: fragment ID
941 @type id: str
942 @param qstart: start position in target (rank)
943 @type qstart: int
944 @param qend: end position in target (rank)
945 @type qend: int
946 @param probability: probability of assignment
947 @type probability: float
948 @param rmsd: RMSD of the fragment, compared to target's native structure
949 @type rmsd: float
950 """
951
952 - def __init__(self, source, start, end, id, qstart, qend, probability, rmsd, tm_score=None,
953 score=None, neff=None, segment=None, internal_id=None):
954
955 assert source.has_torsion
956 sub = source.subregion(start, end, clone=True)
957 try:
958 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
959 except csb.core.ItemNotFoundError:
960 raise csb.bio.structure.Broken3DStructureError()
961 torsion = [r.torsion.copy() for r in sub.residues]
962
963 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
964 self._torsion = torsion
965 self._sequence = sub.sequence
966
967 self._source_id = source.accession[:4] + source.id
968 self._start = start
969 self._end = end
970
971 self._score = score
972 self._neff = neff
973 self._ss = None
974
975 self._segment_start = segment
976 self.internal_id = internal_id
977
978 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
979
980 self._ss = SecondaryStructure('-' * self.length)
981
982 @staticmethod
1002
1003 @property
1006
1007 @property
1009 return self._sequence
1010
1011 @property
1013 return self._torsion
1014
1015 @property
1017 return self._source_id
1018
1019 @property
1022
1023 @property
1026
1027 @property
1030
1031 @property
1034
1035 @property
1037 return self._segment_start
1038
1039 @property
1042 @secondary_structure.setter
1051
1061
1066
1068 """
1069 @return: True if the fragment is centered around position=C{rank}.
1070 @rtype: bool
1071 """
1072
1073 if self.qstart < rank < self.qend:
1074 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1075 return True
1076
1077 return False
1078
1080 """
1081 @return: the CA coordinates of the fragment at the specified subregion.
1082 @rtype: list
1083 """
1084
1085 self._check_range(qstart, qend)
1086
1087 relstart = qstart - self.qstart
1088 relend = qend - self.qstart + 1
1089
1090 return self.backbone[relstart : relend]
1091
1093 """
1094 @return: the torsion angles of the fragment at the specified subregion.
1095 @rtype: list
1096 """
1097
1098 self._check_range(qstart, qend)
1099
1100 relstart = qstart - self.qstart
1101 relend = qend - self.qstart + 1
1102
1103 return self.torsion[relstart : relend]
1104
1106
1107 self._check_range(qstart, qend)
1108
1109 relstart = qstart - self.qstart
1110 relend = qend - self.qstart + 1
1111
1112 return sa_string[relstart : relend]
1113
1121
1133
1134 - def chain_at(self, source, qstart, qend):
1142
1144 """
1145 @type other: L{Assignment}
1146 @return: target positions, covered by both C{self} and C{other}
1147 @rtype: set of int
1148 """
1149
1150 qranks = set(range(self.qstart, self.qend + 1))
1151 sranks = set(range(other.qstart, other.qend + 1))
1152
1153 return qranks.intersection(sranks)
1154
1155 - def rmsd_to(self, other, min_overlap=5):
1156 """
1157 @return: the CA RMSD between C{self} and C{other}.
1158
1159 @param other: another fragment
1160 @type other: L{Assignment}
1161 @param min_overlap: require at least that number of overlapping residues
1162 (return None if not satisfied)
1163 @type min_overlap: int
1164
1165 @rtype: float
1166 """
1167
1168 common = self.overlap(other)
1169
1170 if len(common) >= min_overlap:
1171
1172 qstart, qend = min(common), max(common)
1173
1174 q = self.backbone_at(qstart, qend)
1175 s = other.backbone_at(qstart, qend)
1176
1177 if len(q) > 0 and len(s) > 0:
1178 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1179
1180 return None
1181
1182 - def nrmsd_to(self, other, min_overlap=5):
1197
1198 - def mda_to(self, other, min_overlap=5):
1199
1200 common = self.overlap(other)
1201
1202 if len(common) >= min_overlap:
1203
1204 qstart, qend = min(common), max(common)
1205
1206 q = self.torsion_at(qstart, qend)
1207 s = other.torsion_at(qstart, qend)
1208
1209 if len(q) > 0 and len(s) > 0:
1210
1211 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:])
1212 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1])
1213
1214 return max(maxphi, maxpsi)
1215
1216 return None
1217
1218 - def to_rosetta(self, source, qstart=None, qend=None, weight=None):
1219 """
1220 @deprecated: this method will be deleted soon. Use
1221 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1222 """
1223 stream = csb.io.MemoryStream()
1224
1225 if weight is None:
1226 weight = self.probability
1227 if not qstart:
1228 qstart = self.qstart
1229 if not qend:
1230 qend = self.qend
1231
1232 source.compute_torsion()
1233 chain = self.chain_at(source, qstart, qend)
1234
1235 for i, r in enumerate(chain.residues):
1236
1237 acc = self.source_id[:4]
1238 ch = self.source_id[4].upper()
1239
1240 start = qstart - self.qstart + self.start + i
1241 aa = r.type
1242 ss = 'L'
1243 phi, psi, omega = 0, 0, 0
1244 if r.torsion.phi:
1245 phi = r.torsion.phi
1246 if r.torsion.psi:
1247 psi = r.torsion.psi
1248 if r.torsion.omega:
1249 omega = r.torsion.omega
1250
1251 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1252
1253 return stream.getvalue()
1254
1257
1260
1263
1265 """
1266 Provides clustering/filtering of the fragments, covering a common residue
1267 in the target. Clustering is done via iterative shrinking of the cluster.
1268 At each iteration, node rejection (deletion) is attempted for each node. The
1269 node rejection, causing the most significant drop in the average pairwise
1270 distance (RMSD) in the cluster, is retained. This procedure is repeated
1271 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1272 2) the cluster gets exhausted or 3) node rejection no longer
1273 causes a drop in the average distance (not converging).
1274
1275 @param items: cluster members
1276 @type items: iterable of L{ClusterNode}s
1277 @param threshold: RMSD threshold; continue shrinking until the mean distance
1278 drops below this value (default=1.5)
1279 @type threshold: float
1280 @param connectedness: use only nodes which are connected to at least c% of all
1281 initial nodes (default=0.5, that means 50%)
1282 @type connectedness: float
1283 """
1284
1285 MIN_LENGTH = 6
1286
1287 - def __init__(self, items, threshold=1.5, connectedness=0.5):
1316
1317 @property
1319 return len(self._items)
1320
1321 @property
1323 return tuple(self._items)
1324
1325 @property
1327 return tuple(i.fragment for i in self._items)
1328
1329 @property
1331 return self._threshold
1332 @threshold.setter
1334 self._threshold = float(value)
1335
1336 @property
1338 return self._connectedness
1339
1341
1342 d = []
1343
1344 for i in self._matrix:
1345 if skip is i:
1346 continue
1347
1348 for j in self._matrix[i]:
1349 if skip is not j:
1350 d.append(self._matrix[i][j])
1351
1352 return d
1353
1355
1356 if j in self._matrix[i]:
1357 return self._matrix[i][j]
1358 else:
1359 return None
1360
1361 - def mean(self, skip=None):
1362 """
1363 @return: the current mean distance in the cluster
1364 @rtype: float
1365 """
1366
1367 d = self._distances(skip=skip)
1368
1369 if len(d) > 0:
1370 return numpy.mean(d)
1371 else:
1372 raise ClusterExhaustedError()
1373
1375 """
1376 @return: the current representative fragment
1377 @rtype: L{ClusterRep}
1378
1379 @note: the cluster rep is the node with the lowest average distance
1380 to all other nodes. If a fixed fragment exists, structurally similar
1381 to the rep, but longer, this fragment may be suggested as an alternative
1382 (see also L{ClusterRep}).
1383 """
1384
1385 alt = None
1386 cen = None
1387 avg = None
1388
1389 for i in self._matrix:
1390
1391 curravg = numpy.mean(list(self._matrix[i].values()))
1392
1393 if avg is None or curravg < avg:
1394 avg = curravg
1395 cen = i
1396 elif curravg == avg:
1397 if i.fragment.length > cen.fragment.length:
1398 cen = i
1399
1400 d = self._distances()
1401 mean = numpy.mean(d)
1402 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1403
1404 for i in self._matrix:
1405 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1406 distance = self._distance(i, cen)
1407 if distance is not None and distance < 0.5 * self.threshold:
1408 if alt is None or alt.fragment.length < i.fragment.length:
1409 alt = i
1410
1411 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1412 rejections=(self._initcount - self.count))
1413
1415 """
1416 Remove C{item} from the cluster.
1417
1418 @type item: L{ClusterNode}
1419 @raise ClusterExhaustedError: if this is the last remaining item
1420 """
1421
1422 if self.count == 1:
1423 raise ClusterExhaustedError()
1424
1425 assert not item.fixed
1426
1427 for i in self._matrix:
1428 if item in self._matrix[i]:
1429 del self._matrix[i][item]
1430
1431 del self._matrix[item]
1432 self._items.remove(item)
1433
1435 """
1436 Shrink the cluster by a single node.
1437
1438 @return: True on successful shrink, False otherwise (e.g. if
1439 already converged)
1440 @rtype: bool
1441 @raise ClusterExhaustedError: if exhausted
1442 @raise ClusterDivergingError: if not converging
1443 """
1444
1445 mean = self.mean()
1446 if mean <= self.threshold or self.count == 1:
1447 return False
1448
1449 m = {}
1450
1451 for i in self._matrix:
1452 if not i.fixed:
1453 newmean = self.mean(skip=i)
1454 m[newmean] = i
1455
1456 if len(m) == 0:
1457 raise ClusterExhaustedError()
1458
1459 newmean = min(m)
1460
1461 if newmean > mean:
1462 raise ClusterDivergingError()
1463 elif newmean < mean:
1464 junk = m[newmean]
1465 self.reject(junk)
1466 return True
1467 else:
1468 return False
1469
1470 - def shrink(self, minitems=2):
1471 """
1472 Start automatic shrinking.
1473
1474 @param minitems: absolute minimum of the number of nodes in the cluster
1475 @type minitems: int
1476
1477 @return: cluster's representative: the node with the lowest average
1478 distance to all other nodes in the cluster
1479 @rtype: L{ClusterRep}
1480
1481 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1482 still not converged
1483 """
1484
1485 if self.count > minitems:
1486
1487 while self.shrinkone():
1488 if self.count <= minitems:
1489 raise ClusterExhaustedError()
1490 else:
1491 raise ClusterExhaustedError()
1492
1493 return self.centroid()
1494
1496 """
1497 Cluster node.
1498
1499 @param fragment: fragment
1500 @type fragment: L{Assignment}
1501 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1502 @type distance: str
1503 @param fixed: mark this node as fixed (cannot be rejected)
1504 @type fixed: bool
1505 """
1506
1507 FIXED = 0.7
1508
1509 @staticmethod
1521
1531
1533 """
1534 @return: the distance between self and another node
1535 @type other: L{ClusterNode}
1536 @rtype: float
1537 """
1538 return self._distance(other.fragment)
1539
1541 """
1542 Cluster's representative (centroid) node. This object carries the
1543 result of shrinking itself.
1544
1545 @param centroid: rep node
1546 @type centroid: L{ClusterNode}
1547 @param mean: current mean distance in the cluster
1548 @type mean: float
1549 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1550 @type consistency: float
1551 @param count: current number of nodes in the cluster
1552 @type count: int
1553 @param rejections: total number of rejections
1554 @type rejections: int
1555 @param alternative: suggested cluster rep alternative (e.g. structurally
1556 similar to the centroid, but longer)
1557 @type param:
1558 """
1559
1560 - def __init__(self, centroid, mean, consistency, count, rejections=0, alternative=None):
1573
1574 @property
1576 """
1577 Confidence of assignment: log10(count) * consistency
1578 """
1579 if self.count <= 0 or self.count is None or self.consistency is None:
1580 return 0
1581 else:
1582 return numpy.log10(self.count) * self.consistency
1583
1584 @property
1586 return self._centroid
1587
1588 @property
1590 return self._alternative
1591
1592 @property
1594 return self._alternative is not None
1595
1596 @property
1599
1600 @property
1602 return self._consistency
1603
1604 @property
1607
1608 @property
1610 return self._rejections
1611
1613 """
1614 If an alternative is available, swap the centroid and the alternative.
1615 """
1616
1617 if self._alternative is not None:
1618
1619 centroid = self._centroid
1620 self._centroid = self._alternative
1621 self._alternative = centroid
1622
1628
1630
1631 @staticmethod
1645
1646 - def __init__(self, center, qstart, qend):
1656
1657 @property
1660
1661 @property
1664
1665 @property
1668
1669 @property
1672
1673 @property
1676
1677 @property
1680
1681 - def chain(self, source):
1683
1686
1742
1745
1746 - def __init__(self, rank, confidence=None, count=None, confident=True, rep=None):
1753
1755 """
1756 Simplifies the construction of fragment libraries.
1757 """
1758
1762
1779
1781 """
1782 Build a fixed-length fragment library from a list of
1783 variable-length L{Assignment}s.
1784
1785 @param fragments: source fragments
1786 @type fragments: iterable of L{RosettaFragment}s
1787 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1788 @type window: int
1789
1790 @return: fixed-length fragment library
1791 @rtype: L{RosettaFragmentMap}
1792 """
1793
1794 frags = []
1795
1796 for f in fragments:
1797 for qs in range(f.qstart, f.qend - window + 1):
1798 frags.append(f.subregion(qs, qs + window - 1))
1799
1800 return self.rosetta.RosettaFragmentMap(frags)
1801
1802 - def make_combined(self, target, filling, threshold=0.5, callback=None):
1803 """
1804 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1805 The regions to be complemented are determined by calculating the confidence
1806 at each residue (by filtering).
1807
1808
1809 @param target: target protein
1810 @type target: L{Target}
1811 @param filling: additional fragments to place in the low-conf regions
1812 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1813 @param threshold: confidence threshold
1814 @type threshold: float
1815
1816 @return: complemented fragment library
1817 @rtype: L{RosettaFragmentMap}
1818 """
1819
1820 fragmap = self.make_fragset(target)
1821 covered = set()
1822
1823 for r in target.residues:
1824
1825 if r.assignments.length == 0:
1826 if callback:
1827 callback(ResidueEventInfo(r.native.rank, None, 0, False))
1828 continue
1829
1830 cluster = r.filter()
1831 if cluster is None:
1832 if callback:
1833 callback(ResidueEventInfo(r.native.rank, 0, 0, False))
1834 continue
1835
1836 if cluster.confidence >= threshold:
1837 covered.add(r.native.rank)
1838 elif callback:
1839 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, False))
1840
1841 for r in target.residues:
1842 if r.native.rank not in covered:
1843 fragmap.mark_unconfident(r.native.rank)
1844
1845 for frag in filling:
1846 fragmap.complement(frag)
1847
1848 return fragmap
1849
1886
1887 - def mix(self, *fragsets):
1907
1910
1912
1913 FACTORY = None
1914 DSN = None
1915
1916 - def __init__(self, factory=None, dsn=None):
1917
1918 self.factory = factory or self.__class__.FACTORY
1919 self.cs = dsn or self.__class__.DSN
1920 self.connection = None
1921 self.cursor = None
1922
1924
1925 self.connection = self.factory(self.cs)
1926 try:
1927 self.cursor = self.connection.cursor()
1928 except:
1929 self.connection.close()
1930 raise
1931 return self
1932
1934 try:
1935 if not self.cursor.closed:
1936 self.cursor.close()
1937 finally:
1938 if not self.connection.closed:
1939 self.connection.close()
1940
1961
1962 @staticmethod
1963 - def connection_string(database='FragmentBenchmarks', host='', username='', password=''):
1964
1965 fields = ['dbname={0}'.format(database)]
1966
1967 if host:
1968 fields.append('host={0}'.format(host))
1969 if username:
1970 fields.append('user={0}'.format(username))
1971 fields.append('password={0}'.format(password))
1972
1973 return ' '.join(fields)
1974
1981
1988
1995
2002
2003 - def scores(self, benchmark_id, type):
2009
2016
2025
2026 - def structure(self, accession, chain=None):
2027
2028 pdbfile = self._find(accession, self._pdb)
2029
2030 if not pdbfile and chain:
2031 pdbfile = self._find(accession + chain, self._pdb)
2032
2033 if not pdbfile:
2034 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2035
2036 return self._parser(pdbfile).parse_structure()
2037
2038 - def prediction(self, target_id, type, ss=False):
2039
2040 info = self.target_details(target_id)
2041 if not info:
2042 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2043 row = info[0]
2044
2045 id = row["Accession"]
2046 length = float(row["Length"])
2047 overlap = float(row["MaxOverlap"]) / (length or 1.)
2048
2049 native = self.structure(id[:4], id[4]).chains[id[4]]
2050 segments = self.target_segments(target_id)
2051 target = self._factory.target(id, length, native.residues, overlap, segments)
2052
2053 source = None
2054
2055 for row in self.assignments(target_id, type):
2056
2057 src_accession = row['Source'][:4]
2058 src_chain = row['Source'][4]
2059
2060 if source is None or source.accession != src_accession:
2061 try:
2062 source = self.structure(src_accession, src_chain)
2063 except (IOError, ValueError) as ex:
2064 target.errors.append(ex)
2065 continue
2066
2067 if src_chain == '_':
2068 frag_chain = source.first_chain
2069 else:
2070 frag_chain = source.chains[src_chain]
2071 if not frag_chain.has_torsion:
2072 frag_chain.compute_torsion()
2073
2074 fragment = self._factory.assignment(
2075 source=frag_chain,
2076 start=row['SourceStart'],
2077 end=row['SourceEnd'],
2078 id=row['FragmentName'],
2079 qstart=row['Start'],
2080 qend=row['End'],
2081 probability=row['Probability'],
2082 score=row['Score'],
2083 neff=row['Neff'],
2084 rmsd=row['RMSD'],
2085 tm_score=row['TMScore'],
2086 segment=row['SegmentStart'],
2087 internal_id=row['InternalID'])
2088
2089 target.assign(fragment)
2090
2091 if ss:
2092 self._attach_sec_structure(target, target_id, type)
2093
2094 return target
2095
2111