libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief MSrun file reader base on proteowizard library
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 
32 #include <QDebug>
33 
34 #include "pwizmsrunreader.h"
35 
36 #include <pwiz/data/msdata/DefaultReaderList.hpp>
37 
38 
39 #include "../../utils.h"
40 #include "../../pappsoexception.h"
41 #include "../../exception/exceptionnotfound.h"
42 #include "../../exception/exceptionnotpossible.h"
43 
44 
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47 
48 
49 namespace pappso
50 {
51 
52 
54  : MsRunReader(msrun_id_csp)
55 {
56  // The initialization needs to be done immediately so that we get the pwiz
57  // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58  // pointer will be set to msp_msData.
59 
60  initialize();
61 }
62 
63 
64 void
66 {
67  std::string file_name_std =
69 
70  // Make a backup of the current locale
71  std::string env_backup = setlocale(LC_ALL, "");
72  // struct lconv *lc = localeconv();
73 
74  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75  //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76  //<< lc->decimal_point;
77 
78  // Now actually search the useful MSDataPtr to the member variable.
79 
80  pwiz::msdata::DefaultReaderList defaultReaderList;
81 
82  std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83 
84  try
85  {
86  defaultReaderList.read(file_name_std, msDataPtrVector);
87  }
88  catch(std::exception &error)
89  {
90  qDebug() << QString("Failed to read the data from file %1")
91  .arg(QString::fromStdString(file_name_std));
92  }
93 
94  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
95  //<< "The number of runs is:" << msDataPtrVector.size()
96  //<< "The number of spectra in first run is:"
97  //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
98 
99  // Single-run file handling here.
100 
101  // Specific case of the MGF data format: we do not have a run id for that kind
102  // of data. In this case there must be a single run!
103 
104  if(mcsp_msRunId->getRunId().isEmpty())
105  {
106  if(msDataPtrVector.size() != 1)
107  throw(
108  ExceptionNotPossible("For the kind of file at hand there can only be "
109  "one run in the file."));
110 
111  // At this point we know the single msDataPtr is the one we are looking
112  // for.
113 
114  msp_msData = msDataPtrVector.front();
115  }
116 
117  else
118  {
119  // Multi-run file handling here.
120  for(auto &msDataPtr : msDataPtrVector)
121  {
122  if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
123  {
124  msp_msData = msDataPtr;
125 
126  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
127  //<< "Found the right MSDataPtr for run id.";
128 
129  break;
130  }
131  }
132  }
133 
134  if(msp_msData == nullptr)
135  {
136  throw(ExceptionNotPossible(
137  "Could not find a MSDataPtr matching the requested run id."));
138  }
139 
140 
141  // check if this MS run can be used with scan numbers
142  // MS:1000490 Agilent instrument model
143  pwiz::cv::CVID native_id_format =
144  pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
145 
146  // msp_msData.get()->getDefaultNativeIDFormat();
147 
148  if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
149  {
150  m_hasScanNumbers = true;
151  }
152  else
153  {
154  m_hasScanNumbers = false;
155  }
156 
157  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
158  {
159  m_hasScanNumbers = true;
160  }
161 }
162 
163 
165 {
166 }
167 
168 
169 pwiz::msdata::SpectrumPtr
170 PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
171  std::size_t spectrum_index,
172  bool want_binary_data) const
173 {
174  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
175 
176  try
177  {
178  native_pwiz_spectrum_sp =
179  p_spectrum_list->spectrum(spectrum_index, want_binary_data);
180  }
181  catch(std::runtime_error &error)
182  {
183  qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
184  << typeid(error).name();
185 
186  throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
187  "MS file std::runtime_error :\n%2")
188  .arg(spectrum_index)
189  .arg(error.what()));
190  }
191  catch(std::exception &error)
192  {
193  qDebug() << "getPwizSpectrumPtr error " << error.what()
194  << typeid(error).name();
195 
196  throw ExceptionNotFound(
197  QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
198  .arg(spectrum_index)
199  .arg(error.what()));
200  }
201 
202  if(native_pwiz_spectrum_sp.get() == nullptr)
203  {
204  throw ExceptionNotFound(
205  QObject::tr(
206  "Pwiz spectrum index %1 not found in MS file : null pointer")
207  .arg(spectrum_index));
208  }
209 
210  return native_pwiz_spectrum_sp;
211 }
212 
213 
214 bool
216  pwiz::msdata::Spectrum *spectrum_p,
217  QualifiedMassSpectrum &qualified_mass_spectrum) const
218 {
219 
220  // We now have to set the retention time at which this mass spectrum
221  // was acquired. This is the scan start time.
222 
223  if(!spectrum_p->scanList.scans[0].hasCVParam(
224  pwiz::msdata::MS_scan_start_time))
225  {
226  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
227  { // MGF could not have scan start time
228  qualified_mass_spectrum.setRtInSeconds(-1);
229  }
230  else
231  {
232  throw(ExceptionNotPossible(
233  "The spectrum has no scan start time value set."));
234  }
235  }
236  else
237  {
238  pwiz::data::CVParam retention_time_cv_param =
239  spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
240 
241  // Try to get the units of the retention time value.
242 
243  std::string unit_name = retention_time_cv_param.unitsName();
244  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
245  //<< "Unit name for the retention time:"
246  //<< QString::fromStdString(unit_name);
247 
248  if(unit_name == "second")
249  {
250  qualified_mass_spectrum.setRtInSeconds(
251  retention_time_cv_param.valueAs<double>());
252  }
253  else if(unit_name == "minute")
254  {
255  qualified_mass_spectrum.setRtInSeconds(
256  retention_time_cv_param.valueAs<double>() * 60);
257  }
258  else
259  throw(
260  ExceptionNotPossible("Could not determine the unit for the "
261  "scan start time value."));
262  }
263 
264  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
265  //<< "Retention time for spectrum is:"
266  //<< qualified_mass_spectrum.getRtInSeconds();
267 
268  // Old version not checking unit (by default unit is minutes for RT,
269  // not seconds)
270  //
271  // pappso_double retentionTime =
272  // QString(spectrum_p->scanList.scans[0]
273  //.cvParam(pwiz::msdata::MS_scan_start_time)
274  //.value.c_str())
275  //.toDouble();
276  // qualified_mass_spectrum.setRtInSeconds(retentionTime);
277 
278  return true;
279 }
280 
281 
282 bool
284  pwiz::msdata::Spectrum *spectrum_p,
285  QualifiedMassSpectrum &qualified_mass_spectrum) const
286 {
287  // Not all the acquisitions have ion mobility data. We need to test
288  // that:
289 
290  if(spectrum_p->scanList.scans[0].hasCVParam(
291  pwiz::msdata::MS_ion_mobility_drift_time))
292  {
293 
294  // qDebug() << "as strings:"
295  //<< QString::fromStdString(
296  // spectrum_p->scanList.scans[0]
297  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
298  //.valueAs<std::string>());
299 
300  pappso_double driftTime =
301  spectrum_p->scanList.scans[0]
302  .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
303  .valueAs<double>();
304 
305  // qDebug() << "driftTime:" << driftTime;
306 
307  // Old version requiring use of QString.
308  // pappso_double driftTime =
309  // QString(spectrum_p->scanList.scans[0]
310  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
311  //.value.c_str())
312  //.toDouble();
313 
314  // Now make positively sure that the obtained value is correct.
315  // Note that I suffered a lot with Waters Synapt data that
316  // contained apparently correct drift time XML element that in
317  // fact contained either NaN or inf. When such mass spectra were
318  // encountered, the mz,i data were bogus and crashed the data
319  // loading functions. We just want to skip this kind of bogus mass
320  // spectrum by letting the caller know that the drift time was
321  // bogus ("I" is Filippo Rusconi).
322 
323  if(std::isnan(driftTime) || std::isinf(driftTime))
324  {
325  // qDebug() << "detected as nan or inf.";
326 
327  return false;
328  }
329  else
330  {
331  // The mzML standard stipulates that drift times are in
332  // milliseconds.
333  qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
334  }
335  }
336  // End of
337  // if(spectrum_p->scanList.scans[0].hasCVParam(
338  // pwiz::msdata::MS_ion_mobility_drift_time))
339  else
340  {
341  // Not a bogus mass spectrum but also not a drift spectrum, set -1
342  // as the drift time value.
343  qualified_mass_spectrum.setDtInMilliSeconds(-1);
344  }
345 
346  return true;
347 }
348 
349 
352  const MassSpectrumId &massSpectrumId,
353  pwiz::msdata::Spectrum *spectrum_p,
354  bool want_binary_data,
355  bool &ok) const
356 {
357  // qDebug();
358 
359  std::string env;
360  env = setlocale(LC_ALL, "");
361  setlocale(LC_ALL, "C");
362 
363  QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
364 
365  try
366  {
367 
368  // We want to store the ms level for this spectrum
369 
370  int msLevel =
371  (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
372 
373  qualified_mass_spectrum.setMsLevel(msLevel);
374 
375  // We want to know if this spectrum is a fragmentation spectrum obtained
376  // from a selected precursor ion.
377 
378  std::size_t precursor_list_size = spectrum_p->precursors.size();
379 
380  // qDebug() << "For spectrum at index:" <<
381  // massSpectrumId.getSpectrumIndex()
382  //<< "msLevel:" << msLevel
383  //<< "with number of precursors:" << precursor_list_size;
384 
385  if(precursor_list_size > 0)
386  {
387 
388  // Sanity check
389  if(msLevel < 2)
390  {
391  qDebug() << "Going to throw: msLevel cannot be less than two for "
392  "a spectrum that has items in its Precursor list.";
393 
394  throw(ExceptionNotPossible(
395  "msLevel cannot be less than two for "
396  "a spectrum that has items in its Precursor list."));
397  }
398 
399  // See what is the first precursor in the list.
400 
401  for(auto &precursor : spectrum_p->precursors)
402  {
403 
404  // Set this variable ready as we need that default value in
405  // certain circumstances.
406 
407  std::size_t precursor_spectrum_index =
408  std::numeric_limits<std::size_t>::max();
409 
410  // The spectrum ID of the precursor might be empty.
411 
412  if(precursor.spectrumID.empty())
413  {
414  // qDebug() << "The precursor's spectrum ID is empty.";
415 
416  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
417  {
418  // qDebug()
419  //<< "Format is MGF, precursor's spectrum ID can be
420  // empty.";
421  }
422  else
423  {
424  // When performing Lumos Fusion fragmentation experiments
425  // in Tune mode and with recording, the first spectrum of
426  // the list is a fragmentation spectrum (ms level 2) that
427  // has no identity for the precursor spectrum because
428  // there is no full scan accquisition.
429  }
430  }
431  // End of
432  // if(precursor.spectrumID.empty())
433  else
434  {
435  // We could get a native precursor spectrum id, so convert
436  // that native id to a spectrum index.
437 
438  qualified_mass_spectrum.setPrecursorNativeId(
439  QString::fromStdString(precursor.spectrumID));
440 
441  if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
442  {
443  // qDebug() << "The native id of the precursor spectrum is
444  // empty.";
445  }
446 
447  // Get the spectrum index of the spectrum that contained the
448  // precursor ion.
449 
450  precursor_spectrum_index =
451  msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
452 
453  // Note that the Mascot MGF format has a peculiar handling of
454  // the precursor ion stuff so we cannot throw.
455  if(precursor_spectrum_index ==
456  msp_msData->run.spectrumListPtr->size())
457  {
458  if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
459  {
460  throw(ExceptionNotPossible(
461  "Failed to find the index of the "
462  "precursor ion's spectrum."));
463  }
464  }
465 
466  qualified_mass_spectrum.setPrecursorSpectrumIndex(
467  precursor_spectrum_index);
468 
469  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
470  // "()"
471  //<< "Set the precursor spectrum index to:"
472  //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
473  //<< "for qualified mass spectrum:"
474  //<< &qualified_mass_spectrum;
475  }
476 
477  if(!precursor.selectedIons.size())
478  {
479  qDebug()
480  << "Going to throw The spectrum has msLevel > 1 but the "
481  "precursor ions's selected ions list is empty..";
482 
483  throw(
484  ExceptionNotPossible("The spectrum has msLevel > 1 but the "
485  "precursor ions's selected ions "
486  "list is empty."));
487  }
488 
489  pwiz::msdata::SelectedIon &ion =
490  *(precursor.selectedIons.begin());
491 
492  // selected ion m/z
493 
494  pappso_double selected_ion_mz =
495  QString(
496  ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
497  .toDouble();
498 
499  // selected ion peak intensity
500 
501  pappso_double selected_ion_peak_intensity =
502  QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
503  .toDouble();
504 
505  // charge state
506 
507  unsigned int selected_ion_charge_state =
508  QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
509  .toUInt();
510 
511  // At this point we can craft a new PrecursorIonData instance and
512  // push it back to the vector.
513 
514  PrecursorIonData precursor_ion_data(selected_ion_mz,
515  selected_ion_charge_state,
516  selected_ion_peak_intensity);
517 
518  qualified_mass_spectrum.appendPrecursorIonData(
519  precursor_ion_data);
520 
521  // General sum-up
522 
523  // qDebug()
524  //<< "Appended new PrecursorIonData:"
525  //<< "mz:"
526  //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
527  //<< "charge:"
528  //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
529  //<< "intensity:"
530  //<< qualified_mass_spectrum.getPrecursorIonData()
531  //.back()
532  //.intensity;
533  }
534  // End of
535  // for(auto &precursor : spectrum_p->precursors)
536  }
537  // End of
538  // if(precursor_list_size > 0)
539  else
540  {
541  // Sanity check
542 
543  // Unfortunately, logic here is defeated by some vendors that have
544  // files with MS2 spectra without <precursorList>. Thus we have
545  // spectrum_p->precursors.size() == 0 and msLevel > 1.
546 
547  // if(msLevel != 1)
548  //{
549  // throw(
550  // ExceptionNotPossible("msLevel cannot be different than 1 if "
551  //"there is not a single precursor ion."));
552  //}
553  }
554 
555  // Sanity check.
556 
557  if(precursor_list_size !=
558  qualified_mass_spectrum.getPrecursorIonData().size())
559  {
560  qDebug() << "Going to throw The number of precursors in the file is "
561  "different from the number of precursors in memory.";
562 
564  QObject::tr("The number of precursors in the file is different "
565  "from the number of precursors in memory."));
566  }
567 
568  // if(precursor_list_size == 1)
569  //{
570  // qDebug() << "Trying to get the mz value of the unique precursor ion:"
571  //<< qualified_mass_spectrum.getPrecursorMz();
572  //}
573 
574  processRetentionTime(spectrum_p, qualified_mass_spectrum);
575 
576  processDriftTime(spectrum_p, qualified_mass_spectrum);
577 
578  // for(pwiz::data::CVParam cv_param : ion.cvParams)
579  //{
580  // pwiz::msdata::CVID param_id = cv_param.cvid;
581  // qDebug() << param_id;
582  // qDebug() << cv_param.cvid.c_str();
583  // qDebug() << cv_param.name().c_str();
584  // qDebug() << cv_param.value.c_str();
585  //}
586 
587  if(want_binary_data)
588  {
589 
590  // Fill-in MZIntensityPair vector for convenient access to binary
591  // data
592 
593  std::vector<pwiz::msdata::MZIntensityPair> pairs;
594  spectrum_p->getMZIntensityPairs(pairs);
595 
596  MassSpectrum spectrum;
597  double tic = 0;
598  // std::size_t iterCount = 0;
599 
600  // Iterate through the m/z-intensity pairs
601  for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
602  it = pairs.begin(),
603  end = pairs.end();
604  it != end;
605  ++it)
606  {
607  //++iterCount;
608 
609  // qDebug() << "it->mz " << it->mz << " it->intensity" <<
610  // it->intensity;
611  if(it->intensity)
612  {
613  spectrum.push_back(DataPoint(it->mz, it->intensity));
614  tic += it->intensity;
615  }
616  }
617 
618  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
619  {
620  // Sort peaks by mz
621  spectrum.sortMz();
622  }
623 
624  // lc = localeconv ();
625  // qDebug() << " env=" << localeconv () << " lc->decimal_point "
626  // << lc->decimal_point;
627  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
628  // "<< spectrum.size();
629  MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
630  qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
631 
632  // double sumY =
633  // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
634  // <<
635  // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
636  //<< "iterCount:" << iterCount << "Spectrum size "
637  //<< spectrum.size() << "with tic:" << tic
638  //<< "and sumY:" << sumY;
639  }
640  else
641  qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
642  }
643  catch(PappsoException &errorp)
644  {
645  qDebug() << "Going to throw";
646 
648  QObject::tr("Error reading data using the proteowizard library: %1")
649  .arg(errorp.qwhat()));
650  }
651  catch(std::exception &error)
652  {
653  qDebug() << "Going to throw";
654 
656  QObject::tr("Error reading data using the proteowizard library: %1")
657  .arg(error.what()));
658  }
659 
660  // setlocale(LC_ALL, env.c_str());
661 
662  ok = true;
663 
664  // qDebug() << "QualifiedMassSpectrum: " <<
665  // qualified_mass_spectrum.toString();
666  return qualified_mass_spectrum;
667 }
668 
669 
672  bool want_binary_data,
673  bool &ok) const
674 {
675 
676  std::string env;
677  env = setlocale(LC_ALL, "");
678  // struct lconv *lc = localeconv();
679 
680  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
681  //<< "env=" << env.c_str()
682  //<< "lc->decimal_point:" << lc->decimal_point;
683 
684  setlocale(LC_ALL, "C");
685 
686  MassSpectrumId massSpectrumId(mcsp_msRunId);
687 
688  if(msp_msData == nullptr)
689  {
690  setlocale(LC_ALL, env.c_str());
691  return (QualifiedMassSpectrum(massSpectrumId));
692  }
693 
694  // const bool want_binary_data = true;
695 
696  pwiz::msdata::SpectrumListPtr spectrum_list_p =
697  msp_msData->run.spectrumListPtr;
698 
699  if(spectrum_index == spectrum_list_p.get()->size())
700  {
701  setlocale(LC_ALL, env.c_str());
702  throw ExceptionNotFound(
703  QObject::tr("The spectrum index cannot be equal to the size of the "
704  "spectrum list."));
705  }
706 
707  // At this point we know the spectrum index might be sane, so store it in
708  // the mass spec id object.
709  massSpectrumId.setSpectrumIndex(spectrum_index);
710 
711  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
712  getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
713 
714  setlocale(LC_ALL, env.c_str());
715 
716  massSpectrumId.setNativeId(
717  QString::fromStdString(native_pwiz_spectrum_sp->id));
718 
720  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
721 }
722 
723 
724 bool
725 PwizMsRunReader::accept(const QString &file_name) const
726 {
727  // We want to know if we can handle the file_name.
728  pwiz::msdata::ReaderList reader_list;
729 
730  std::string reader_type = reader_list.identify(file_name.toStdString());
731 
732  if(!reader_type.empty())
733  return true;
734 
735  return false;
736 }
737 
738 
740 PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
741 {
742  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
743  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
744 }
745 
747 PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
748 {
749  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
750  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
751 }
752 
754 PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
755  bool want_binary_data) const
756 {
757 
758  QualifiedMassSpectrum spectrum;
759  bool ok = false;
760 
761  spectrum =
762  qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
763 
764  if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
765  {
766  if(spectrum.getRtInSeconds() == 0)
767  {
768  // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
769  }
770  }
771 
772  // if(!ok)
773  // qDebug() << "Encountered a mass spectrum for which the status is bad.";
774 
775  return spectrum;
776 }
777 
778 
779 void
782 {
783 
784  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
785 
786  // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
787  // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
788  // spectrum has been fully qualified (that is, the member data have been
789  // set), it is transferred to the handler passed as parameter to this
790  // function for the consumer to do what it wants with it.
791 
792  // Does the handler consuming the mass spectra read from file want these
793  // mass spectra to hold the binary data arrays (mz/i vectors)?
794 
795  const bool want_binary_data = handler.needPeakList();
796 
797 
798  std::string env;
799  env = setlocale(LC_ALL, "");
800  setlocale(LC_ALL, "C");
801 
802 
803  // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
804  // run member of msp_msData.
805 
806  pwiz::msdata::SpectrumListPtr spectrum_list_p =
807  msp_msData->run.spectrumListPtr;
808 
809  // We'll need it to perform the looping in the spectrum list.
810  std::size_t spectrum_list_size = spectrum_list_p.get()->size();
811 
812  // qDebug() << "The spectrum list has size:" << spectrum_list_size;
813 
814  // Inform the handler of the spectrum list so that it can handle feedback to
815  // the user.
816  handler.spectrumListHasSize(spectrum_list_size);
817 
818  // Iterate in the full list of spectra.
819 
820  for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
821  {
822 
823  // If the user of this reader instance wants to stop reading the
824  // spectra, then break this loop.
825  if(handler.shouldStop())
826  {
827  qDebug() << "The operation was cancelled. Breaking the loop.";
828  break;
829  }
830 
831  // Get the native pwiz-spectrum from the spectrum list.
832  // Note that this pointer is a shared pointer from pwiz.
833 
834  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
835  getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
836 
837  /*
838  * we want to load metadata of the spectrum even if it does not contain
839  peaks
840 
841  * if(!native_pwiz_spectrum_sp->hasBinaryData())
842  {
843  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
844  "
845  ()"
846  //<< "native pwiz spectrum is empty, continuing.";
847  continue;
848  }
849  */
850 
851  // Instantiate the mass spectrum id that will hold critical information
852  // like the the native id string and the spectrum index.
853 
854  MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
855 
856  // Get the spectrum native id as a QString to store it in the mass
857  // spectrum id class. This is will allow later to refer to the same
858  // spectrum starting back from the file.
859 
860  QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
861  massSpectrumId.setNativeId(native_id);
862 
863  // Finally, instantiate the qualified mass spectrum with its id. This
864  // function will continue performing pappso-spectrum detailed
865  // qualification.
866 
867  bool ok = false;
868 
869  QualifiedMassSpectrum qualified_mass_spectrum =
871  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
872 
873  if(!ok)
874  {
875  // qDebug() << "Encountered a mass spectrum for which the returned "
876  //"status is bad.";
877  continue;
878  }
879 
880  // Before handing the mass spectrum out to the handler, see if the
881  // native mass spectrum was empty or not.
882 
883  // if(!native_pwiz_spectrum_sp->defaultArrayLength)
884  // qDebug() << "The mass spectrum has not defaultArrayLength";
885 
886  qualified_mass_spectrum.setEmptyMassSpectrum(
887  !native_pwiz_spectrum_sp->defaultArrayLength);
888 
889  // The handler will receive the index of the mass spectrum in the
890  // current run via the mass spectrum id member datum.
891  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
892  }
893 
894  setlocale(LC_ALL, env.c_str());
895  // End of
896  // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
897 
898  // Now let the loading handler know that the loading of the data has ended.
899  // The handler might need this "signal" to perform additional tasks or to
900  // cleanup cruft.
901 
902  // qDebug() << "Loading ended";
903  handler.loadingEnded();
904 }
905 
906 
907 std::size_t
909 {
910  return msp_msData->run.spectrumListPtr.get()->size();
911 }
912 
913 bool
915 {
916  return m_hasScanNumbers;
917 }
918 
919 bool
921 {
922  msp_msData = nullptr;
923  return true;
924 }
925 
926 bool
928 {
929  if(msp_msData == nullptr)
930  {
931  initialize();
932  }
933  return true;
934 }
935 
936 } // namespace pappso
pappso::PwizMsRunReader::initialize
virtual void initialize() override
Definition: pwizmsrunreader.cpp:65
pappso::MassSpectrum::makeMassSpectrumSPtr
MassSpectrumSPtr makeMassSpectrumSPtr() const
Definition: massspectrum.cpp:125
pappso::pappso_double
double pappso_double
A type definition for doubles.
Definition: types.h:48
pappso::MassSpectrumCstSPtr
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
pappso::QualifiedMassSpectrum::getMassSpectrumSPtr
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:133
pappso::MsRunReader
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:158
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizMSData
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:671
pappso::SpectrumCollectionHandlerInterface::spectrumListHasSize
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:55
pappso::PwizMsRunReader::m_hasScanNumbers
bool m_hasScanNumbers
Definition: pwizmsrunreader.h:100
pappso::PwizMsRunReader::hasScanNumbers
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
Definition: pwizmsrunreader.cpp:914
pappso::PwizMsRunReader::massSpectrumSPtr
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Definition: pwizmsrunreader.cpp:740
pappso
tries to keep as much as possible monoisotopes, removing any possible C13 peaks
Definition: aa.cpp:39
pappso::MassSpectrum
Class to represent a mass spectrum.
Definition: massspectrum.h:71
pappso::SpectrumCollectionHandlerInterface::setQualifiedMassSpectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizSpectrumPtr
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:351
pappso::MsRunIdCstSPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
pappso::MassSpectrum::sortMz
void sortMz()
Sort the DataPoint instances of this spectrum.
Definition: massspectrum.cpp:201
pappso::DataPoint
Definition: datapoint.h:21
pappso::PwizMsRunReader::~PwizMsRunReader
virtual ~PwizMsRunReader()
Definition: pwizmsrunreader.cpp:164
pappso::PwizMsRunReader::acquireDevice
virtual bool acquireDevice() override
acquire data back end device
Definition: pwizmsrunreader.cpp:927
pappso::SpectrumCollectionHandlerInterface::shouldStop
virtual bool shouldStop()
Definition: msrunreader.cpp:46
pappso::ExceptionNotPossible
Definition: exceptionnotpossible.h:32
pappso::MsRunReader::mcsp_msRunId
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:220
pappso::QualifiedMassSpectrum::getPrecursorIonData
const std::vector< PrecursorIonData > & getPrecursorIonData() const
Definition: qualifiedmassspectrum.cpp:438
pappso::QualifiedMassSpectrum::setMassSpectrumSPtr
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:125
pappso::SpectrumCollectionHandlerInterface::loadingEnded
virtual void loadingEnded()
Definition: msrunreader.cpp:51
pappso::QualifiedMassSpectrum::setMsLevel
void setMsLevel(uint ms_level)
Set the mass spectrum level.
Definition: qualifiedmassspectrum.cpp:179
pappso::QualifiedMassSpectrum::setRtInSeconds
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:195
pappso::PwizMsRunReader::processRetentionTime
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
Definition: pwizmsrunreader.cpp:215
pappso::PwizMsRunReader::processDriftTime
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
Definition: pwizmsrunreader.cpp:283
pappso::QualifiedMassSpectrum
Class representing a fully specified mass spectrum.
Definition: qualifiedmassspectrum.h:85
pappso::Utils::toUtf8StandardString
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:115
pappso::QualifiedMassSpectrum::setPrecursorNativeId
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
Definition: qualifiedmassspectrum.cpp:417
pappso::SpectrumCollectionHandlerInterface::needPeakList
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
pappso::QualifiedMassSpectrum::appendPrecursorIonData
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
Definition: qualifiedmassspectrum.cpp:430
pappso::QualifiedMassSpectrum::getPrecursorNativeId
const QString & getPrecursorNativeId() const
Definition: qualifiedmassspectrum.cpp:423
pappso::PwizMsRunReader::massSpectrumCstSPtr
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
Definition: pwizmsrunreader.cpp:747
pappso::ExceptionNotFound
Definition: exceptionnotfound.h:32
pappso::QualifiedMassSpectrum::getMassSpectrumCstSPtr
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
Definition: qualifiedmassspectrum.cpp:141
pappso::MassSpectrumId
Definition: massspectrumid.h:38
pappso::QualifiedMassSpectrum::setEmptyMassSpectrum
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
Definition: qualifiedmassspectrum.cpp:164
pappso::QualifiedMassSpectrum::setPrecursorSpectrumIndex
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
Definition: qualifiedmassspectrum.cpp:400
pappso::PwizMsRunReader::accept
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
Definition: pwizmsrunreader.cpp:725
pappso::PappsoException::qwhat
virtual const QString & qwhat() const
Definition: pappsoexception.h:66
pappso::PwizMsRunReader::qualifiedMassSpectrum
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
Definition: pwizmsrunreader.cpp:754
pappso::PwizMsRunReader::PwizMsRunReader
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
Definition: pwizmsrunreader.cpp:53
pappso::PwizMsRunReader::releaseDevice
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
Definition: pwizmsrunreader.cpp:920
pappso::MassSpectrumId::setSpectrumIndex
void setSpectrumIndex(std::size_t index)
Definition: massspectrumid.cpp:103
pappso::QualifiedMassSpectrum::setDtInMilliSeconds
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
Definition: qualifiedmassspectrum.cpp:219
pappso::MassSpectrumId::setNativeId
void setNativeId(const QString &native_id)
Definition: massspectrumid.cpp:89
pappso::PwizMsRunReader::msp_msData
pwiz::msdata::MSDataPtr msp_msData
Definition: pwizmsrunreader.h:74
pappso::PwizMsRunReader::getPwizSpectrumPtr
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
Definition: pwizmsrunreader.cpp:170
pwizmsrunreader.h
MSrun file reader base on proteowizard library.
pappso::PwizMsRunReader::spectrumListSize
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
Definition: pwizmsrunreader.cpp:908
pappso::PrecursorIonData
Definition: qualifiedmassspectrum.h:62
pappso::SpectrumCollectionHandlerInterface
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:59
pappso::MassSpectrumSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
pappso::PappsoException
Definition: pappsoexception.h:42
pappso::QualifiedMassSpectrum::getRtInSeconds
pappso_double getRtInSeconds() const
Get the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:203
pappso::PwizMsRunReader::readSpectrumCollection
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
Definition: pwizmsrunreader.cpp:780