libpappsomspp
Library for mass spectrometry
mzintegrationparams.cpp
Go to the documentation of this file.
1 /* BEGIN software license
2  *
3  * msXpertSuite - mass spectrometry software suite
4  * -----------------------------------------------
5  * Copyright(C) 2009,...,2018 Filippo Rusconi
6  *
7  * http://www.msxpertsuite.org
8  *
9  * This file is part of the msXpertSuite project.
10  *
11  * The msXpertSuite project is the successor of the massXpert project. This
12  * project now includes various independent modules:
13  *
14  * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15  * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16  *
17  * This program is free software: you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License as published by
19  * the Free Software Foundation, either version 3 of the License, or
20  * (at your option) any later version.
21  *
22  * This program is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  * GNU General Public License for more details.
26  *
27  * You should have received a copy of the GNU General Public License
28  * along with this program. If not, see <http://www.gnu.org/licenses/>.
29  *
30  * END software license
31  */
32 
33 
34 /////////////////////// StdLib includes
35 #include <map>
36 #include <cmath>
37 
38 
39 /////////////////////// Qt includes
40 #include <QDebug>
41 #include <QString>
42 #include <QFile>
43 #include <QDateTime>
44 
45 
46 /////////////////////// pappsomspp includes
47 #include "../../utils.h"
48 #include "../../massspectrum/massspectrum.h"
49 
50 
51 /////////////////////// Local includes
52 #include "mzintegrationparams.h"
53 
54 
55 namespace pappso
56 {
57 
58 
59 //! Map relating the BinningType to a textual representation
60 std::map<BinningType, QString> binningTypeMap{
61  {BinningType::NONE, "NONE"},
62  {BinningType::DATA_BASED, "DATA_BASED"},
63  {BinningType::ARBITRARY, "ARBITRARY"}};
64 
65 
67 {
70 }
71 
72 
75  BinningType binningType,
76  int decimalPlaces,
77  pappso::PrecisionPtr precisionPtr,
78  bool applyMzShift,
79  pappso::pappso_double mzShift,
80  bool removeZeroValDataPoints)
81  : m_smallestMz(minMz),
82  m_greatestMz(maxMz),
83  m_binningType(binningType),
84  m_decimalPlaces(decimalPlaces),
85  mp_precision(precisionPtr),
86  m_applyMzShift(applyMzShift),
87  m_mzShift(mzShift),
88  m_removeZeroValDataPoints(removeZeroValDataPoints)
89 {
90  if(mp_precision == nullptr)
92 }
93 
94 
96  : m_smallestMz(other.m_smallestMz),
97  m_greatestMz(other.m_greatestMz),
98  m_binningType(other.m_binningType),
99  m_decimalPlaces(other.m_decimalPlaces),
100  mp_precision(other.mp_precision),
101  m_applyMzShift(other.m_applyMzShift),
102  m_mzShift(other.m_mzShift),
103  m_removeZeroValDataPoints(other.m_removeZeroValDataPoints),
104  m_applySavGolFilter(other.m_applySavGolFilter)
105 {
106  if(mp_precision == nullptr)
108 
110 }
111 
112 
114  const pappso::SavGolParams &savGolParams)
115 {
118  setSavGolParams(savGolParams);
119 }
120 
121 
123 {
124 }
125 
126 
129 {
130  if(this == &other)
131  return *this;
132 
133  m_smallestMz = other.m_smallestMz;
134  m_greatestMz = other.m_greatestMz;
136 
138 
139  mp_precision = other.mp_precision;
140  if(mp_precision == nullptr)
142 
144  m_mzShift = other.m_mzShift;
146 
148 
150 
151  return *this;
152 }
153 
154 
155 void
157 {
158  m_smallestMz = value;
159 }
160 
161 
162 void
164 {
165  m_smallestMz = m_smallestMz > value ? value : m_smallestMz;
166 }
167 
168 
171 {
172  return m_smallestMz;
173 }
174 
175 
176 void
178 {
179  m_greatestMz = value;
180 }
181 
182 
183 void
185 {
186  m_greatestMz = m_greatestMz < value ? value : m_greatestMz;
187 }
188 
189 
192 {
193  return m_greatestMz;
194 }
195 
196 void
198 {
199  m_binningType = binningType;
200 }
201 
204 {
205  return m_binningType;
206 }
207 
208 void
210 {
211  m_decimalPlaces = decimal_places;
212 }
213 
214 
215 int
217 {
218  return m_decimalPlaces;
219 }
220 
221 void
223 {
224  mp_precision = precisionPtr;
225 
226  if(mp_precision == nullptr)
228 }
229 
232 {
233  return mp_precision;
234 }
235 
236 
237 void
239 {
240  m_applyMzShift = applyMzShift;
241 }
242 
243 
244 bool
246 {
247  return m_applyMzShift;
248 }
249 
250 
251 void
253 {
254  m_removeZeroValDataPoints = removeOrNot;
255 }
256 
257 
258 bool
260 {
262 }
263 
264 
265 void
267 {
268  m_mzShift = value;
269 }
270 
271 
272 double
274 {
275  return m_mzShift;
276 }
277 
278 
279 void
281 {
282  m_applySavGolFilter = applySavGolFilter;
283 }
284 
285 
286 bool
288 {
289  return m_applySavGolFilter;
290 }
291 
292 
293 void
295  int nL, int nR, int m, int lD, bool convolveWithNr)
296 {
297  m_savGolParams.nL = nL;
298  m_savGolParams.nR = nR;
299  m_savGolParams.m = m;
300  m_savGolParams.lD = lD;
301  m_savGolParams.convolveWithNr = convolveWithNr;
302 }
303 
304 
305 void
307 {
308  m_savGolParams.nL = params.nL;
309  m_savGolParams.nR = params.nR;
310  m_savGolParams.m = params.m;
311  m_savGolParams.lD = params.lD;
313 }
314 
315 
318 {
319  return m_savGolParams;
320 }
321 
322 
323 //! Reset the instance to default values.
324 void
326 {
327  m_smallestMz = std::numeric_limits<double>::min();
328  m_greatestMz = std::numeric_limits<double>::min();
330 
331  // Special case for this member datum
333 
334  m_applyMzShift = false;
335  m_mzShift = 0;
337 
338  m_savGolParams.nL = 15;
339  m_savGolParams.nR = 15;
340  m_savGolParams.m = 4;
341  m_savGolParams.lD = 0;
343 
344  m_applySavGolFilter = false;
345 }
346 
347 
348 bool
350 {
351  int errors = 0;
352 
354  {
355  // qDebug() << "m_smallestMz:" << m_smallestMz;
356  // qDebug() << "smallest is max:" << (m_smallestMz ==
357  // std::numeric_limits<double>::max());
358 
359  errors += (m_smallestMz == std::numeric_limits<double>::max() ? 1 : 0);
360 
361  // qDebug() << "m_greatestMz:" << m_greatestMz;
362  // qDebug() << "greatest is min:" << (m_greatestMz ==
363  // std::numeric_limits<double>::min());
364  errors += (m_greatestMz == std::numeric_limits<double>::min() ? 1 : 0);
365 
366  // if(mp_precision != nullptr)
367  // qDebug() << mp_precision->toString();
368 
369  errors += (mp_precision == nullptr ? 1 : 0);
370  }
371 
372  if(errors)
373  {
374  qDebug()
375  << "The m/z integration parameters are not valid or do not apply...";
376  }
377 
378  return !errors;
379 }
380 
381 
382 bool
384 {
385  return (m_smallestMz != std::numeric_limits<double>::max()) &&
386  (m_greatestMz != std::numeric_limits<double>::min());
387 }
388 
389 
390 std::vector<double>
392 {
393 
394  // qDebug();
395 
396  std::vector<double> bins;
397 
399  {
400  // If no binning is to be performed, fine.
401  return bins;
402  }
404  {
405  // Use only data in the MzIntegrationParams member data.
406  return createArbitraryBins();
407  }
409  {
410  // qDebug();
411 
412  qFatal("Programming error.");
413  }
414 
415  return bins;
416 }
417 
418 
419 std::vector<double>
421 {
422 
423  // qDebug();
424 
425  std::vector<double> bins;
426 
428  {
429  // If no binning is to be performed, fine.
430  return bins;
431  }
433  {
434  // Use only data in the MzIntegrationParams member data.
435  return createArbitraryBins();
436  }
438  {
439  // qDebug();
440 
441  // Use the first spectrum to perform the data-based bins
442 
443  return createDataBasedBins(mass_spectrum_csp);
444  }
445 
446  return bins;
447 }
448 
449 
450 std::vector<double>
452 {
453 
454  // qDebug();
455 
456  // Now starts the tricky stuff. Depending on how the binning has been
457  // configured, we need to take diverse actions.
458 
459  // qDebug() << "Bin size:" << mp_precision->toString();
460 
463 
464  // qDebug() << QString::asprintf("min_mz: %.6f\n", min_mz)
465  //<< QString::asprintf("max_mz: %.6f\n", max_mz);
466 
467  pappso::pappso_double binSize = mp_precision->delta(min_mz);
468 
469  // qDebug() << QString::asprintf(
470  //"binSize is the precision delta for min_mz: %.6f\n", binSize);
471 
472  // Only compute the decimal places if they were not configured already.
473  if(m_decimalPlaces == -1)
474  {
475 
476  // We want as many decimal places as there are 0s between the integral
477  // part of the double and the first non-0 cipher. For example, if
478  // binSize is 0.004, zero decimals is 2 and m_decimalPlaces is set to 3,
479  // because we want decimals up to 4 included.
480 
482 
483  // qDebug() << "With the binSize m_decimalPlaces was computed to be:"
484  //<< m_decimalPlaces;
485  }
486 
487  // Now that we have defined the value of m_decimalPlaces, let's use that
488  // value.
489 
490  double first_mz = ceil((min_mz * std::pow(10, m_decimalPlaces)) - 0.49) /
491  pow(10, m_decimalPlaces);
492  double last_mz =
493  ceil((max_mz * pow(10, m_decimalPlaces)) - 0.49) / pow(10, m_decimalPlaces);
494 
495  // qDebug() << "After having accounted for the decimals, new min/max values:"
496  //<< QString::asprintf("Very first data point: %.6f\n", first_mz)
497  //<< QString::asprintf("Very last data point to reach: %.6f\n",
498  // last_mz);
499 
500  // Instanciate the vector of mz double_s that we'll feed with the bins.
501 
502  std::vector<pappso::pappso_double> bins;
503 
504  // Store that very first value for later use in the loop.
505  // The bins are notking more than:
506  //
507  // 1. The first mz (that is the smallest mz value found in all the spectra
508  // 2. A sequence of mz values corresponding to that first mz value
509  // incremented by the bin size.
510 
511  // Seed the root of the bin vector with the first mz value rounded above as
512  // requested.
513  pappso::pappso_double previous_mz_bin = first_mz;
514 
515  bins.push_back(previous_mz_bin);
516 
517  // Now continue adding mz values until we have reached the end of the
518  // spectrum, that is the max_mz value, as converted using the decimals to
519  // last_mz.
520 
521  // debugCount value used below for debugging purposes.
522  // int debugCount = 0;
523 
524  while(previous_mz_bin <= last_mz)
525  {
526 
527  // Calculate dynamically the precision delta according to the current mz
528  // value.
529 
530  double current_mz =
531  previous_mz_bin + mp_precision->delta(previous_mz_bin);
532 
533  // qDebug() << QString::asprintf(
534  //"previous_mzBin: %.6f and current_mz: %.6f\n",
535  // previous_mz_bin,
536  // current_mz);
537 
538  // Now apply on the obtained mz value the decimals that were either set
539  // or computed earlier.
540 
541  double current_rounded_mz =
542  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
543  pow(10, m_decimalPlaces);
544 
545  // qDebug() << QString::asprintf(
546  //"current_mz: %.6f and current_rounded_mz: %.6f and previous_mzBin "
547  //": % .6f\n ",
548  // current_mz,
549  // current_rounded_mz,
550  // previous_mz_bin);
551 
552  // If rounding makes the new value identical to the previous one, then
553  // that means that we need to decrease roughness.
554 
555  if(current_rounded_mz == previous_mz_bin)
556  {
557  ++m_decimalPlaces;
558 
559  current_rounded_mz =
560  ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
561  pow(10, m_decimalPlaces);
562 
563  qDebug().noquote()
564  << "Had to increment decimal places by one while creating the bins "
565  "in BinningType::ARBITRARY mode..";
566  }
567 
568  bins.push_back(current_rounded_mz);
569 
570  // Use the local_mz value for the storage of the previous mz bin.
571  previous_mz_bin = current_rounded_mz;
572  }
573 
574 
575 #if 0
576 
577  QString fileName = "/tmp/massSpecArbitraryBins.txt-at-" +
578  QDateTime::currentDateTime().toString("yyyyMMdd-HH-mm-ss");
579 
580  qDebug() << "Writing the list of bins setup in the "
581  "mass spectrum in file "
582  << fileName;
583 
584  QFile file(fileName);
585  file.open(QIODevice::WriteOnly);
586 
587  QTextStream fileStream(&file);
588 
589  for(auto &&bin : bins)
590  fileStream << QString("%1\n").arg(bin, 0, 'f', 10);
591 
592  fileStream.flush();
593  file.close();
594 
595 #endif
596 
597  // qDebug() << "Prepared bins with " << bins.size() << "elements."
598  //<< "starting with mz" << bins.front() << "ending with mz"
599  //<< bins.back();
600 
601  return bins;
602 }
603 
604 
605 std::vector<double>
607  pappso::MassSpectrumCstSPtr mass_spectrum_csp)
608 {
609  // qDebug();
610 
611  // The bins in *this mass spectrum must be calculated starting from the
612  // data in the mass_spectrum_csp parameter.
613 
614  // Instanciate the vector of mz double_s that we'll feed with the bins.
615 
616  std::vector<pappso::pappso_double> bins;
617 
618  if(mass_spectrum_csp->size() < 2)
619  return bins;
620 
621  // Make sure the spectrum is sorted, as this functions takes for granted
622  // that the DataPoint instances are sorted in ascending x (== mz) value
623  // order.
624  pappso::MassSpectrum local_mass_spectrum = *mass_spectrum_csp;
625  local_mass_spectrum.sortMz();
626 
628 
629  // qDebug() << "The min_mz:" << min_mz;
630 
631  if(m_decimalPlaces != -1)
632  min_mz = ceil((min_mz * pow(10, m_decimalPlaces)) - 0.49) /
633  pow(10, m_decimalPlaces);
634 
635 
636  // Two values for the definition of a MassSpectrumBin.
637 
638  // The first value of the mz range that defines the bin. This value is part
639  // of the bin.
640  pappso::pappso_double start_mz_in = min_mz;
641 
642  // The second value of the mz range that defines the bin. This value is
643  // *not* part of the bin.
644  pappso::pappso_double end_mz_out;
645 
646  std::vector<pappso::DataPoint>::const_iterator it =
647  local_mass_spectrum.begin();
648 
649  pappso::pappso_double prev_mz = it->x;
650 
651  if(m_decimalPlaces != -1)
652  prev_mz = ceil((prev_mz * pow(10, m_decimalPlaces)) - 0.49) /
653  pow(10, m_decimalPlaces);
654 
655  ++it;
656 
657  while(it != local_mass_spectrum.end())
658  {
659  pappso::pappso_double next_mz = it->x;
660 
661  if(m_decimalPlaces != -1)
662  next_mz = ceil((next_mz * pow(10, m_decimalPlaces)) - 0.49) /
663  pow(10, m_decimalPlaces);
664 
665  pappso::pappso_double step = next_mz - prev_mz;
666  end_mz_out = start_mz_in + step;
667 
668  if(m_decimalPlaces != -1)
669  end_mz_out = ceil((end_mz_out * pow(10, m_decimalPlaces)) - 0.49) /
670  pow(10, m_decimalPlaces);
671 
672  // The data point that is crafted has a 0 y-value. The binning must
673  // indeed not create artificial intensity data.
674 
675  // qDebug() << "Pushing back bin:" << start_mz_in << end_mz_out;
676 
677  bins.push_back(start_mz_in);
678 
679  // Prepare next bin
680  start_mz_in = end_mz_out;
681 
682  // Update prev_mz to be the current one for next iteration.
683  prev_mz = next_mz;
684 
685  // Now got the next DataPoint instance.
686  ++it;
687  }
688 
689 #if 0
690 
691  QString fileName = "/tmp/massSpecDataBasedBins.txt";
692 
693  qDebug() << "Writing the list of bins setup in the "
694  "mass spectrum in file "
695  << fileName;
696 
697  QFile file(fileName);
698  file.open(QIODevice::WriteOnly);
699 
700  QTextStream fileStream(&file);
701 
702  for(auto &&bin : m_bins)
703  fileStream << QString("[%1-%2]\n")
704  .arg(bin.startMzIn, 0, 'f', 10)
705  .arg(bin.endMzOut, 0, 'f', 10);
706 
707  fileStream.flush();
708  file.close();
709 
710  qDebug() << "elements."
711  << "starting with mz" << m_bins.front().startMzIn << "ending with mz"
712  << m_bins.back().endMzOut;
713 
714 #endif
715 
716  return bins;
717 }
718 
719 
720 QString
721 MzIntegrationParams::toString(int offset, const QString &spacer) const
722 {
723  QString lead;
724 
725  for(int iter = 0; iter < offset; ++iter)
726  lead += spacer;
727 
728  QString text = lead;
729  text += "m/z integration parameters:\n";
730 
731  text += lead;
732  text += spacer;
733  if(m_smallestMz != std::numeric_limits<double>::max())
734  text.append(
735  QString::asprintf("Smallest (first) m/z: %.6f\n", m_smallestMz));
736 
737  text += lead;
738  text += spacer;
739  if(m_greatestMz != std::numeric_limits<double>::min())
740  text.append(QString::asprintf("Greatest (last) m/z: %.6f\n", m_greatestMz));
741 
742  text += lead;
743  text += spacer;
744  text.append(QString("Decimal places: %1\n").arg(m_decimalPlaces));
745 
746  std::map<BinningType, QString>::iterator it;
747  it = binningTypeMap.find(m_binningType);
748 
749  if(it == binningTypeMap.end())
750  qFatal("Programming error.");
751 
752  text += lead;
753  text += spacer;
754  text.append(QString("Binning type: %1\n").arg(it->second.toLatin1().data()));
755 
756  // Only provide the details relative to the ARBITRARY binning type.
757 
759  {
760  text += lead;
761  text += spacer;
762  text += spacer;
763  text.append(QString("Bin nominal size: %1\n")
764  .arg(mp_precision->getNominal(), 0, 'f', 6));
765 
766  text += lead;
767  text += spacer;
768  text += spacer;
769  text.append(QString("Bin size: %2\n")
770  .arg(mp_precision->toString().toLatin1().data()));
771  }
772 
773  // Now other data that are independent of the bin settings.
774 
775  text += lead;
776  text += spacer;
777  text +=
778  QString("Apply m/z shift: %1\n").arg(m_applyMzShift ? "true" : "false");
779 
780  if(m_applyMzShift)
781  {
782  text += lead;
783  text += spacer;
784  text += spacer;
785  text += QString("m/z shift: %1").arg(m_mzShift, 0, 'f', 6);
786  }
787 
788  text += lead;
789  text += spacer;
790  text += QString("Remove 0-val data points: %1\n")
791  .arg(m_removeZeroValDataPoints ? "true" : "false");
792 
793  // Finally the Savitzy-Golay parameters (if requested)
794 
796  {
797  text += lead;
798  text += spacer;
799  text.append("Savitzky-Golay parameters\n");
800  text += lead;
801  text += spacer;
802  text += spacer;
803  text +=
804  QString("nL = %1 ; nR = %2 ; m = %3 ; lD = %4 ; convolveWithNr = %5\n")
805  .arg(m_savGolParams.nL)
806  .arg(m_savGolParams.nR)
807  .arg(m_savGolParams.m)
808  .arg(m_savGolParams.lD)
809  .arg(m_savGolParams.convolveWithNr ? "true" : "false");
810  }
811 
812  return text;
813 }
814 
815 
816 } // namespace pappso
817 
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
The MzIntegrationParams class provides the parameters definining how m/z !
pappso::pappso_double getSmallestMz() const
pappso::pappso_double m_smallestMz
MzIntegrationParams & operator=(const MzIntegrationParams &other)
pappso::pappso_double getGreatestMz() const
pappso::pappso_double m_greatestMz
pappso::PrecisionPtr getPrecision() const
std::vector< double > createArbitraryBins()
void setPrecision(pappso::PrecisionPtr precisionPtr)
void updateSmallestMz(pappso::pappso_double value)
pappso::SavGolParams getSavGolParams() const
void setApplySavGolFilter(bool applySavGolFilter)
void updateGreatestMz(pappso::pappso_double value)
pappso::SavGolParams m_savGolParams
QString toString(int offset=0, const QString &spacer=QString()) const
pappso::PrecisionPtr mp_precision
void setSmallestMz(pappso::pappso_double value)
void setBinningType(BinningType binningType)
void reset()
Reset the instance to default values.
std::vector< double > createDataBasedBins(pappso::MassSpectrumCstSPtr massSpectrum)
void setApplyMzShift(bool applyMzShift)
void setDecimalPlaces(int decimal_places)
std::vector< pappso::pappso_double > createBins()
void setRemoveZeroValDataPoints(bool removeOrNot=true)
void setGreatestMz(pappso::pappso_double value)
void setSavGolParams(int nL=15, int nR=15, int m=4, int lD=0, bool convolveWithNr=false)
pappso::pappso_double m_mzShift
virtual QString toString() const =0
virtual pappso_double getNominal() const final
Definition: precision.cpp:64
virtual pappso_double delta(pappso_double value) const =0
static PrecisionPtr getPpmInstance(pappso_double value)
get a ppm precision pointer
Definition: precision.cpp:149
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:129
static int zeroDecimalsInValue(pappso_double value)
0.11 would return 0 (no empty decimal) 2.001 would return 2 1000.0001254 would return 3
Definition: utils.cpp:81
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
BinningType
Type of binning when performing integrations to a mass spectrum.
@ DATA_BASED
binning based on mass spectral data
@ ARBITRARY
binning based on arbitrary bin size value
@ NONE
< no binning
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::map< BinningType, QString > binningTypeMap
Map relating the BinningType to a textual representation.
Parameters for the Savitzky-Golay filter.
Definition: savgolfilter.h:49
void initialize(int nLParam, int nRParam, int mParam, int lDParam, bool convolveWithNrParam)
Definition: savgolfilter.h:85
int nR
number of data points on the right of the filtered point
Definition: savgolfilter.h:52
int nL
number of data points on the left of the filtered point
Definition: savgolfilter.h:50
bool convolveWithNr
set to false for best results
Definition: savgolfilter.h:60