libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/amino_acid/aamodification.h
3  * \date 7/3/2015
4  * \author Olivier Langella
5  * \brief amino acid modification model
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include <QRegExp>
32 #include <QDebug>
33 #include <cmath>
34 
35 #include "aamodification.h"
36 #include "aa.h"
37 #include "../pappsoexception.h"
38 #include "../mzrange.h"
39 #include "../peptide/peptide.h"
40 #include "../obo/filterobopsimodsink.h"
41 #include "../obo/filterobopsimodtermaccession.h"
42 #include "../exception/exceptionnotfound.h"
43 
44 /*
45 
46 inline void initMyResource() {
47  Q_INIT_RESOURCE(resources);
48 }
49 */
50 
51 namespace pappso
52 {
53 
55 
56 AaModification::AaModification(const QString &accession, pappso_double mass)
57  : m_accession(accession), m_mass(mass)
58 {
64 
65  m_mapIsotope = {{Isotope::C13, 0},
66  {Isotope::H2, 0},
67  {Isotope::N15, 0},
68  {Isotope::O17, 0},
69  {Isotope::O18, 0},
70  {Isotope::S33, 0},
71  {Isotope::S34, 0},
72  {Isotope::S36, 0}};
73 }
74 
75 
76 AaModification::AaModification(AaModification &&toCopy) // move constructor
77  : m_accession(toCopy.m_accession),
78  m_name(toCopy.m_name),
79  m_mass(toCopy.m_mass),
80  m_atomCount(std::move(toCopy.m_atomCount)),
81  m_mapIsotope(toCopy.m_mapIsotope)
82 {
83  m_origin = toCopy.m_origin;
84 }
85 
87 {
88 }
89 
90 const QString &
92 {
93 
94  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
95  return m_accession;
96 }
97 const QString &
99 {
100  return m_name;
101 }
104  MapAccessionModifications ret;
105 
106  return ret;
107  }();
108 
111 {
112  AaModification *new_mod;
113  // qDebug() << " AaModification::createInstance begin";
114  new_mod = new AaModification(term.m_accession, term.m_diffMono);
115  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
116  new_mod->setDiffFormula(term.m_diffFormula);
117  new_mod->setXrefOrigin(term.m_origin);
118  new_mod->m_name = term.m_name;
119  return new_mod;
120 }
121 
123 AaModification::createInstance(const QString &accession)
124 {
125  if(accession == "internal:Nter_hydrolytic_cleavage_H")
126  {
127  OboPsiModTerm term;
128  term.m_accession = accession;
129  term.m_diffFormula = "H 1";
130  term.m_diffMono = MPROTIUM;
131  term.m_name = "Nter hydrolytic cleavage H+";
132  return (AaModification::createInstance(term));
133  }
134  if(accession == "internal:Cter_hydrolytic_cleavage_HO")
135  {
136  OboPsiModTerm term;
137  term.m_accession = accession;
138  term.m_diffFormula = "H 1 O 1";
139  term.m_diffMono = MPROTIUM + MASSOXYGEN;
140  term.m_name = "Cter hydrolytic cleavage HO";
141  return (AaModification::createInstance(term));
142  }
143  if(accession.startsWith("MUTATION:"))
144  {
145  QRegExp regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
146  if(regexp_mutation.exactMatch(accession))
147  {
148  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
149  << regexp_mutation.capturedTexts()[1].at(0) << " "
150  << regexp_mutation.capturedTexts()[2].at(0);
151 
152  Aa aa_from(
153  regexp_mutation.capturedTexts()[1].toStdString().c_str()[0]);
154  Aa aa_to(regexp_mutation.capturedTexts()[2].toStdString().c_str()[0]);
155  AaModificationP instance_mutation =
156  createInstanceMutation(aa_from, aa_to);
157  return instance_mutation;
158  // m_psiModLabel<<"|";
159  }
160  }
161  // initMyResource();
162  FilterOboPsiModSink term_list;
163  FilterOboPsiModTermAccession filterm_accession(term_list, accession);
164 
165  OboPsiMod psimod(filterm_accession);
166 
167  try
168  {
169  return (AaModification::createInstance(term_list.getOne()));
170  }
171  catch(ExceptionNotFound &e)
172  {
173  throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
174  .arg(accession)
175  .arg(e.qwhat()));
176  }
177 }
178 
179 void
180 AaModification::setXrefOrigin(const QString &origin)
181 {
182  // xref: Origin: "N"
183  // xref: Origin: "X"
184  m_origin = origin;
185 }
186 void
187 AaModification::setDiffFormula(const QString &diff_formula)
188 {
189  QRegExp rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
190  int pos = 0;
191 
192  while((pos = rx.indexIn(diff_formula, pos)) != -1)
193  {
194  qDebug() << rx.cap(2) << " " << rx.cap(3);
195  if(rx.cap(2) == "C")
196  {
197  m_atomCount[AtomIsotopeSurvey::C] = rx.cap(3).toInt();
198  }
199  else if(rx.cap(2) == "H")
200  {
201  m_atomCount[AtomIsotopeSurvey::H] = rx.cap(3).toInt();
202  }
203  else if(rx.cap(2) == "N")
204  {
205  m_atomCount[AtomIsotopeSurvey::N] = rx.cap(3).toInt();
206  }
207  else if(rx.cap(2) == "O")
208  {
209  m_atomCount[AtomIsotopeSurvey::O] = rx.cap(3).toInt();
210  }
211  else if(rx.cap(2) == "S")
212  {
213  m_atomCount[AtomIsotopeSurvey::S] = rx.cap(3).toInt();
214  }
215  pos += rx.matchedLength();
216  }
217 
218  // look for isotopes :
219  rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
220  pos = 0;
221 
222  while((pos = rx.indexIn(diff_formula, pos)) != -1)
223  {
224  qDebug() << rx.cap(1) << " " << rx.cap(2) << " " << rx.cap(3);
225  int number_of_isotopes = rx.cap(3).toInt();
226  if(rx.cap(2) == "C")
227  {
228  if(rx.cap(1) == "13")
229  {
230  m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
231  }
232  m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
233  }
234  else if(rx.cap(2) == "H")
235  {
236  if(rx.cap(1) == "2")
237  {
238  m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
239  }
240  m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
241  }
242  else if(rx.cap(2) == "N")
243  {
244  if(rx.cap(1) == "15")
245  {
246  m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
247  }
248  m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
249  }
250  else if(rx.cap(2) == "O")
251  {
252  if(rx.cap(1) == "17")
253  {
254  m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
255  }
256  else if(rx.cap(1) == "18")
257  {
258  m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
259  }
260  m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
261  }
262  else if(rx.cap(2) == "S")
263  {
264  if(rx.cap(1) == "33")
265  {
266  m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
267  }
268  else if(rx.cap(1) == "34")
269  {
270  m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
271  }
272  else if(rx.cap(1) == "36")
273  {
274  m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
275  }
276  m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
277  }
278  pos += rx.matchedLength();
279  }
280 
281 
283 }
284 
285 
286 void
288 {
289  pappso_double theoreticalm_mass = 0;
290  std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
292  if(it_atom != m_atomCount.end())
293  {
294  theoreticalm_mass += MASSCARBON * (it_atom->second);
295  }
296  it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
297  if(it_atom != m_atomCount.end())
298  {
299  theoreticalm_mass += MPROTIUM * (it_atom->second);
300  }
301 
302  it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
303  if(it_atom != m_atomCount.end())
304  {
305  theoreticalm_mass += MASSOXYGEN * (it_atom->second);
306  }
307 
308  it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
309  if(it_atom != m_atomCount.end())
310  {
311  theoreticalm_mass += MASSNITROGEN * (it_atom->second);
312  }
313  it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
314  if(it_atom != m_atomCount.end())
315  {
316  theoreticalm_mass += MASSSULFUR * (it_atom->second);
317  }
318 
319  qDebug() << theoreticalm_mass;
320 
321  theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
322  theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
323  theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
324  theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
325  theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
326  theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
327  theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
328  theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
329 
330 
331  pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
332  if(diff < 0.001)
333  {
334  m_mass = theoreticalm_mass;
335  qDebug() << "AaModification::calculateMassFromChemicalComponents "
336  << diff;
337  }
338  else
339  {
340  qDebug()
341  << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
342  << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
343  << " accession=" << m_accession;
344  }
345 }
346 
349 {
350  QString accession = QString("%1").arg(modificationMass);
351  qDebug() << "AaModification::getInstanceCustomizedMod " << accession;
352  QMutexLocker locker(&m_mutex);
353  if(m_mapAccessionModifications.find(accession) ==
355  {
356  // not found
357  m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
358  accession, new AaModification(accession, modificationMass)));
359  }
360  else
361  {
362  // found
363  }
364  return m_mapAccessionModifications.at(accession);
365 }
366 
368 AaModification::getInstance(const QString &accession)
369 {
370  try
371  {
372  QMutexLocker locker(&m_mutex);
373  MapAccessionModifications::iterator it =
374  m_mapAccessionModifications.find(accession);
375  if(it == m_mapAccessionModifications.end())
376  {
377 
378  // not found
379  std::pair<MapAccessionModifications::iterator, bool> insert_res =
381  std::pair<QString, AaModificationP>(
382  accession, AaModification::createInstance(accession)));
383  it = insert_res.first;
384  }
385  else
386  {
387  // found
388  }
389  return it->second;
390  }
391  catch(ExceptionNotFound &e)
392  {
393  throw ExceptionNotFound(
394  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
395  .arg(accession)
396  .arg(e.qwhat()));
397  }
398  catch(PappsoException &e)
399  {
400  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
401  .arg(accession)
402  .arg(e.qwhat()));
403  }
404  catch(std::exception &e)
405  {
406  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
407  .arg(accession)
408  .arg(e.what()));
409  }
410 }
411 
414 {
415 
416  QMutexLocker locker(&m_mutex);
417  MapAccessionModifications::iterator it =
419  if(it == m_mapAccessionModifications.end())
420  {
421  // not found
422  std::pair<MapAccessionModifications::iterator, bool> insert_res =
423  m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
424  oboterm.m_accession, AaModification::createInstance(oboterm)));
425  it = insert_res.first;
426  }
427  else
428  {
429  // found
430  }
431  return it->second;
432 }
433 
434 
437  pappso_double mass,
438  const PeptideSp &peptide_sp,
439  unsigned int position)
440 {
442  if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
443  {
444  if(type == "M")
445  {
446  return getInstance("MOD:00719");
447  }
448  if(type == "K")
449  {
450  return getInstance("MOD:01047");
451  }
452  }
453  // accession== "MOD:00057"
454  if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
455  {
456  // id: MOD:00394
457  // name: acetylated residue
458  // potential N-terminus modifications
459  if(position == 0)
460  {
461  return getInstance("MOD:00408");
462  }
463  }
464  if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
465  {
466  //-17.02655
467  // loss of ammonia [MOD:01160] -17.026549
468  return getInstance("MOD:01160");
469  }
470 
471  if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
472  {
473  //// iodoacetamide [MOD:00397] 57.021464
474  if(type == "C")
475  {
476  return getInstance("MOD:01060");
477  }
478  else
479  {
480  return getInstance("MOD:00397");
481  }
482  }
483  if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
484  {
485  // loss of water
486  /*
487  if (position == 0) {
488  if (peptide_sp.get()->getSequence().startsWith("EG")) {
489  return getInstance("MOD:00365");
490  }
491  if (peptide_sp.get()->getSequence().startsWith("ES")) {
492  return getInstance("MOD:00953");
493  }
494  if (type == "E") {
495  return getInstance("MOD:00420");
496  }
497  }
498  */
499  // dehydrated residue [MOD:00704] -18.010565
500  return getInstance("MOD:00704");
501  }
502  if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
503  {
504  // phosphorylated residue [MOD:00696] 79.966330
505  return getInstance("MOD:00696");
506  }
507  bool isCter = false;
508  if(peptide_sp.get()->size() == (position + 1))
509  {
510  isCter = true;
511  }
512  if((position == 0) || isCter)
513  {
514  if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
515  {
516  // dimethyl
517  return getInstance("MOD:00429");
518  }
519  if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
520  {
521  // 4x(2)H labeled dimethyl residue
522  return getInstance("MOD:00552");
523  }
524  if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
525  {
526  // 2x(13)C,6x(2)H-dimethylated arginine
527  return getInstance("MOD:00638");
528  }
529  }
530  throw PappsoException(
531  QObject::tr("tandem modification not found : %1 %2 %3 %4")
532  .arg(type)
533  .arg(mass)
534  .arg(peptide_sp.get()->getSequence())
535  .arg(position));
536 }
537 
540 {
541  return m_mass;
542 }
543 
544 
545 int
547 {
548  // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
549  // IMPLEMENTED";
550  return m_atomCount.at(atom);
551 }
552 
553 
554 int
556 {
557  try
558  {
559  return m_mapIsotope.at(isotope);
560  }
561  catch(std::exception &e)
562  {
563  throw PappsoException(
564  QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
565  .arg(e.what()));
566  }
567 }
568 
569 
570 bool
572 {
573  if(m_accession.startsWith("internal:"))
574  {
575  return true;
576  }
577  return false;
578 }
579 
581 AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
582 {
583  QString accession(
584  QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
585  double diffMono = aa_to.getMass() - aa_from.getMass();
586  // not found
587  AaModification *instance_mutation;
588  // qDebug() << " AaModification::createInstance begin";
589  instance_mutation = new AaModification(accession, diffMono);
590  // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
591 
592  for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
593  atomInt != (std::int8_t)AtomIsotopeSurvey::last;
594  atomInt++)
595  {
596  AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
597  instance_mutation->m_atomCount[atom] =
598  aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
599  }
600  instance_mutation->m_name = QString("mutation from %1 to %2")
601  .arg(aa_from.getLetter())
602  .arg(aa_to.getLetter());
603  return instance_mutation;
604 }
605 
606 
608 AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
609 {
610  QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
611  try
612  {
613  QMutexLocker locker(&m_mutex);
614  MapAccessionModifications::iterator it =
615  m_mapAccessionModifications.find(accession);
616  if(it == m_mapAccessionModifications.end())
617  {
618  Aa aa_from(mut_from.toLatin1());
619  Aa aa_to(mut_to.toLatin1());
620  AaModificationP instance_mutation =
621  createInstanceMutation(aa_from, aa_to);
622 
623  std::pair<MapAccessionModifications::iterator, bool> insert_res =
625  std::pair<QString, AaModificationP>(accession,
626  instance_mutation));
627  it = insert_res.first;
628  }
629  else
630  {
631  // found
632  }
633  return it->second;
634  }
635  catch(ExceptionNotFound &e)
636  {
637  throw ExceptionNotFound(
638  QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
639  .arg(accession)
640  .arg(e.qwhat()));
641  }
642  catch(PappsoException &e)
643  {
644  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
645  .arg(accession)
646  .arg(e.qwhat()));
647  }
648  catch(std::exception &e)
649  {
650  throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
651  .arg(accession)
652  .arg(e.what()));
653  }
654 } // namespace pappso
655 
656 } // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:432
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
virtual const QString & qwhat() const
const char * what() const noexcept override
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:129
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:76
double pappso_double
A type definition for doubles.
Definition: types.h:48
Isotope
Definition: types.h:91
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)