libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief MSrun file reader base on proteowizard library
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 
32 #include <QDebug>
33 
34 #include "pwizmsrunreader.h"
35 
36 #include <pwiz/data/msdata/DefaultReaderList.hpp>
37 
38 
39 #include "../../utils.h"
40 #include "../../pappsoexception.h"
41 #include "../../exception/exceptionnotfound.h"
42 #include "../../exception/exceptionnotpossible.h"
43 
44 
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47 
48 
49 namespace pappso
50 {
51 
52 
54  : MsRunReader(msrun_id_csp)
55 {
56  // The initialization needs to be done immediately so that we get the pwiz
57  // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58  // pointer will be set to msp_msData.
59 
60  initialize();
61 }
62 
63 
64 void
66 {
67  std::string file_name_std =
69 
70  // Make a backup of the current locale
71  std::string env_backup = setlocale(LC_ALL, "");
72  // struct lconv *lc = localeconv();
73 
74  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75  //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76  //<< lc->decimal_point;
77 
78  // Now actually search the useful MSDataPtr to the member variable.
79 
80  pwiz::msdata::DefaultReaderList defaultReaderList;
81 
82  std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83 
84  try
85  {
86  defaultReaderList.read(file_name_std, msDataPtrVector);
87  }
88  catch(std::exception &error)
89  {
90  qDebug() << QString("Failed to read the data from file %1")
91  .arg(QString::fromStdString(file_name_std));
92  }
93 
94  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
95  //<< "The number of runs is:" << msDataPtrVector.size()
96  //<< "The number of spectra in first run is:"
97  //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
98 
99  // Single-run file handling here.
100 
101  // Specific case of the MGF data format: we do not have a run id for that kind
102  // of data. In this case there must be a single run!
103 
104  if(mcsp_msRunId->getRunId().isEmpty())
105  {
106  if(msDataPtrVector.size() != 1)
107  throw(
108  ExceptionNotPossible("For the kind of file at hand there can only be "
109  "one run in the file."));
110 
111  // At this point we know the single msDataPtr is the one we are looking
112  // for.
113 
114  msp_msData = msDataPtrVector.front();
115  }
116 
117  else
118  {
119  // Multi-run file handling here.
120  for(auto &msDataPtr : msDataPtrVector)
121  {
122  if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
123  {
124  msp_msData = msDataPtr;
125 
126  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
127  //<< "Found the right MSDataPtr for run id.";
128 
129  break;
130  }
131  }
132  }
133 
134  if(msp_msData == nullptr)
135  {
136  throw(ExceptionNotPossible(
137  "Could not find a MSDataPtr matching the requested run id."));
138  }
139 
140 
141  // check if this MS run can be used with scan numbers
142  // MS:1000490 Agilent instrument model
143  pwiz::cv::CVID native_id_format =
144  pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
145 
146  // msp_msData.get()->getDefaultNativeIDFormat();
147 
148  if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
149  {
150  m_hasScanNumbers = true;
151  }
152  else
153  {
154  m_hasScanNumbers = false;
155  }
156 
157  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
158  {
159  m_hasScanNumbers = true;
160  }
161 }
162 
163 
165 {
166 }
167 
168 
169 pwiz::msdata::SpectrumPtr
170 PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
171  std::size_t spectrum_index,
172  bool want_binary_data) const
173 {
174  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
175 
176  try
177  {
178  native_pwiz_spectrum_sp =
179  p_spectrum_list->spectrum(spectrum_index, want_binary_data);
180  }
181  catch(std::runtime_error &error)
182  {
183  qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
184  << typeid(error).name();
185 
186  throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
187  "MS file std::runtime_error :\n%2")
188  .arg(spectrum_index)
189  .arg(error.what()));
190  }
191  catch(std::exception &error)
192  {
193  qDebug() << "getPwizSpectrumPtr error " << error.what()
194  << typeid(error).name();
195 
196  throw ExceptionNotFound(
197  QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
198  .arg(spectrum_index)
199  .arg(error.what()));
200  }
201 
202  if(native_pwiz_spectrum_sp.get() == nullptr)
203  {
204  throw ExceptionNotFound(
205  QObject::tr(
206  "Pwiz spectrum index %1 not found in MS file : null pointer")
207  .arg(spectrum_index));
208  }
209 
210  return native_pwiz_spectrum_sp;
211 }
212 
213 
216  const MassSpectrumId &massSpectrumId,
217  pwiz::msdata::Spectrum *spectrum_p,
218  bool want_binary_data,
219  bool &ok) const
220 {
221  // qDebug();
222 
223  std::string env;
224  env = setlocale(LC_ALL, "");
225  setlocale(LC_ALL, "C");
226 
227  QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
228 
229  try
230  {
231 
232  // We want to store the ms level for this spectrum
233 
234  int msLevel =
235  (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
236 
237  qualified_mass_spectrum.setMsLevel(msLevel);
238 
239  //qDebug() << "for spectrum at index:" << massSpectrumId.getSpectrumIndex()
240  //<< "msLevel:" << msLevel
241  //<< "with number of precursors:" << spectrum_p->precursors.size();
242 
243  // We want to know if this spectrum is a fragmentation spectrum obtained
244  // from a selected precursor ion.
245 
246  if(spectrum_p->precursors.size() > 0)
247  {
248  //qDebug() << "The spectrum has precursor(s).";
249 
250  // Sanity check
251  if(msLevel <= 1)
252  {
253  qDebug() << "Going to throw.";
254 
255  throw(ExceptionNotPossible(
256  "msLevel cannot be less than two for "
257  "a spectrum that has items in its Precursor list."));
258  }
259 
260  // See what is the first precursor in the list.
261 
262  // qDebug() << "Now tring to get the precursor.";
263 
264  pwiz::msdata::Precursor &precursor = spectrum_p->precursors.front();
265 
266  // Set this variable ready as we need that default value in certain
267  // circumstances.
268 
269  std::size_t precursor_spectrum_index =
270  std::numeric_limits<std::size_t>::max();
271 
272  // qDebug() << "First precursor:" << precursor.spectrumID.c_str();
273 
274  if(precursor.spectrumID.empty())
275  {
276  // qDebug() << "The precursor's spectrum ID is empty.";
277 
278  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
279  {
280  // qDebug()
281  //<< "Format is MGF, precursor's spectrum ID can be empty.";
282  }
283  else
284  {
285  // When performing Lumos Fusion fragmentation experiments in
286  // Tune mode and with recording, the first spectrum of the
287  // list is a fragmentation spectrum (ms level 2) that has no
288  // identity for the precursor spectrum because there is no
289  // full scan accquisition.
290 
291 #if 0
292  // Let's try to understand if we are in this configuration.
293 
294  if(!massSpectrumId.getSpectrumIndex())
295  {
296  qDebug() << "This MS2 sprectrum is at index 0."
297  "This might indicate a fragmentation "
298  "experiment performed in Tune mode.";
299 
300  // The precursor spectrum index is initialized to max().
301  // So, it is not going to be changed in the present
302  // situation.
303  }
304  else
305  {
306  qDebug() << "This MS2 spectrum has no precursor's "
307  "spectrum index.";
308  }
309  // else
310  //{
311  // throw(
312  // ExceptionNotPossible("Failed to get the id of the "
313  //"precursor ion's spectrum."));
314  //}
315 #endif
316  }
317  }
318  // End of
319  // if(precursor.spectrumID.empty())
320  else
321  {
322  // We could get a native precursor spectrum id, so convert that
323  // native id to a spectrum index.
324 
325  qualified_mass_spectrum.setPrecursorNativeId(
326  QString::fromStdString(precursor.spectrumID));
327 
328  if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
329  {
330  // qDebug()
331  //<< "The native id of the precursor spectrum is empty !!!";
332  }
333 
334  // Get the spectrum index of the spectrum that contained the
335  // precursor ion.
336 
337  precursor_spectrum_index =
338  msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
339 
340  // Note that the Mascot MGF format has a peculiar handling of the
341  // precursor ion stuff so we cannot throw.
342  if(precursor_spectrum_index ==
343  msp_msData->run.spectrumListPtr->size())
344  {
345  if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
346  {
347  throw(
348  ExceptionNotPossible("Failed to find the index of the "
349  "precursor ion's spectrum."));
350  }
351  }
352 
353  qualified_mass_spectrum.setPrecursorSpectrumIndex(
354  precursor_spectrum_index);
355 
356  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
357  //<< "Set the precursor spectrum index to:"
358  //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
359  //<< "for qualified mass spectrum:"
360  //<< &qualified_mass_spectrum;
361  }
362 
363  if(!precursor.selectedIons.size())
364  {
365  qDebug() << "Going to throw.";
366 
367  throw(
368  ExceptionNotPossible("The spectrum has msLevel > 1 but the "
369  "precursor ions's selected ions "
370  "list is empty."));
371  }
372 
373  pwiz::msdata::SelectedIon &ion = *(precursor.selectedIons.begin());
374 
375  // selected ion m/z
376 
377  pappso_double selected_ion_mz =
378  QString(ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
379  .toDouble();
380 
381  qualified_mass_spectrum.setPrecursorMz(selected_ion_mz);
382 
383  // selected ion peak intensity
384 
385  pappso_double selected_ion_peak_intensity =
386  QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
387  .toDouble();
388 
389  qualified_mass_spectrum.setPrecursorIntensity(
390  selected_ion_peak_intensity);
391 
392  // unsigned int test =
393  // QString(ion.cvParam(pwiz::cv::MS_1200_series_LC_MSD_SL).value.c_str()).toUInt();
394  // qDebug() << " test "<< test;
395 
396  // charge state
397 
398  unsigned int selected_ion_charge_state =
399  QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
400  .toUInt();
401 
402  if(selected_ion_charge_state > 0)
403  {
404  qualified_mass_spectrum.setPrecursorCharge(
405  selected_ion_charge_state);
406  }
407 
408 
409  // General sum-up
410  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
411  //<< "precursor_scan_number:"
412  //<< "selected_ion_m_z:" << selected_ion_mz
413  //<< "selected_ion_peak_intensity:"
414  //<< selected_ion_peak_intensity << "selected_ion_charge_state"
415  //<< selected_ion_charge_state;
416  }
417  // End of
418  // if(spectrum_p->precursors.size() > 0)
419  else
420  {
421  // Sanity check
422 
423  // Unfortunately, logic here is defeated by some vendors that have
424  // files with MS2 spectra without <precursorList>. Thus we have
425  // spectrum_p->precursors.size() == 0 and msLevel > 1.
426 #if 0
427  if(msLevel != 1)
428  {
429  throw(
430  ExceptionNotPossible("msLevel cannot be different than 1 if "
431  "there is not a single precursor ion."));
432  }
433 #endif
434  }
435 
436  // We now have to set the retention time at which this mass spectrum was
437  // acquired. This is the scan start time.
438 
439  if(!spectrum_p->scanList.scans[0].hasCVParam(
440  pwiz::msdata::MS_scan_start_time))
441  {
442  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
443  { // MGF could not have scan start time
444  qualified_mass_spectrum.setRtInSeconds(-1);
445  }
446  else
447  {
448  throw(ExceptionNotPossible(
449  "The spectrum has no scan start time value set."));
450  }
451  }
452  else
453  {
454  pwiz::data::CVParam retention_time_cv_param =
455  spectrum_p->scanList.scans[0].cvParam(
456  pwiz::msdata::MS_scan_start_time);
457 
458  // Try to get the units of the retention time value.
459 
460  std::string unit_name = retention_time_cv_param.unitsName();
461  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
462  //<< "Unit name for the retention time:"
463  //<< QString::fromStdString(unit_name);
464 
465  if(unit_name == "second")
466  {
467  qualified_mass_spectrum.setRtInSeconds(
468  retention_time_cv_param.valueAs<double>());
469  }
470  else if(unit_name == "minute")
471  {
472  qualified_mass_spectrum.setRtInSeconds(
473  retention_time_cv_param.valueAs<double>() * 60);
474  }
475  else
476  throw(ExceptionNotPossible(
477  "Could not determine the unit for the scan start time value."));
478  }
479 
480  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
481  //<< "Retention time for spectrum is:"
482  //<< qualified_mass_spectrum.getRtInSeconds();
483 
484  // Old version not checking unit (by default unit is minutes for RT,
485  // not seconds)
486  //
487  // pappso_double retentionTime =
488  // QString(spectrum_p->scanList.scans[0]
489  //.cvParam(pwiz::msdata::MS_scan_start_time)
490  //.value.c_str())
491  //.toDouble();
492  // qualified_mass_spectrum.setRtInSeconds(retentionTime);
493 
494  // Not all the acquisitions have ion mobility data. We need to test
495  // that:
496 
497  if(spectrum_p->scanList.scans[0].hasCVParam(
498  pwiz::msdata::MS_ion_mobility_drift_time))
499  {
500 
501  // qDebug() << "as strings:"
502  //<< QString::fromStdString(
503  // spectrum_p->scanList.scans[0]
504  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
505  //.valueAs<std::string>());
506 
507  pappso_double driftTime =
508  spectrum_p->scanList.scans[0]
509  .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
510  .valueAs<double>();
511 
512  // qDebug() << "driftTime:" << driftTime;
513 
514  // Old version requiring use of QString.
515  // pappso_double driftTime =
516  // QString(spectrum_p->scanList.scans[0]
517  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
518  //.value.c_str())
519  //.toDouble();
520 
521  // Now make positively sure that the obtained value is correct.
522  // Note that I suffered a lot with Waters Synapt data that
523  // contained apparently correct drift time XML element that in
524  // fact contained either NaN or inf. When such mass spectra were
525  // encountered, the mz,i data were bogus and crashed the data loading
526  // functions. We just want to skip this kind of bogus mass spectrum by
527  // letting the caller know that the drift time was bogus ("I" is
528  // Filippo Rusconi).
529 
530  if(std::isnan(driftTime) || std::isinf(driftTime))
531  {
532  // qDebug() << "detected as nan or inf.";
533 
534  ok = false;
535 
536  return qualified_mass_spectrum;
537  }
538  else
539  {
540  // The mzML standard stipulates that drift times are in
541  // milliseconds.
542  qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
543  }
544  }
545  // End of
546  // if(spectrum_p->scanList.scans[0].hasCVParam(
547  // pwiz::msdata::MS_ion_mobility_drift_time))
548  else
549  {
550  // Not a bogus mass spectrum but also not a drift spectrum, set -1 as
551  // the drift time value.
552  qualified_mass_spectrum.setDtInMilliSeconds(-1);
553  }
554 
555  // for(pwiz::data::CVParam cv_param : ion.cvParams)
556  //{
557  // pwiz::msdata::CVID param_id = cv_param.cvid;
558  // qDebug() << param_id;
559  // qDebug() << cv_param.cvid.c_str();
560  // qDebug() << cv_param.name().c_str();
561  // qDebug() << cv_param.value.c_str();
562  //}
563 
564  if(want_binary_data)
565  {
566 
567  // Fill-in MZIntensityPair vector for convenient access to binary
568  // data
569 
570  std::vector<pwiz::msdata::MZIntensityPair> pairs;
571  spectrum_p->getMZIntensityPairs(pairs);
572 
573  MassSpectrum spectrum;
574  double tic = 0;
575  // std::size_t iterCount = 0;
576 
577  // Iterate through the m/z-intensity pairs
578  for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
579  it = pairs.begin(),
580  end = pairs.end();
581  it != end;
582  ++it)
583  {
584  //++iterCount;
585 
586  // qDebug() << "it->mz " << it->mz << " it->intensity" <<
587  // it->intensity;
588  if(it->intensity)
589  {
590  spectrum.push_back(DataPoint(it->mz, it->intensity));
591  tic += it->intensity;
592  }
593  }
594 
595  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
596  {
597  // Sort peaks by mz
598  spectrum.sortMz();
599  }
600 
601  // lc = localeconv ();
602  // qDebug() << " env=" << localeconv () << " lc->decimal_point "
603  // << lc->decimal_point;
604  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "() "<<
605  // spectrum.size();
606  MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
607  qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
608 
609  // double sumY =
610  // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
611  // <<
612  // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
613  //<< "iterCount:" << iterCount << "Spectrum size "
614  //<< spectrum.size() << "with tic:" << tic
615  //<< "and sumY:" << sumY;
616  }
617  else
618  qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
619  }
620  catch(PappsoException &errorp)
621  {
622  qDebug() << "Going to throw";
623 
625  QObject::tr("Error reading data using the proteowizard library: %1")
626  .arg(errorp.qwhat()));
627  }
628  catch(std::exception &error)
629  {
630  qDebug() << "Going to throw";
631 
633  QObject::tr("Error reading data using the proteowizard library: %1")
634  .arg(error.what()));
635  }
636 
637  // setlocale(LC_ALL, env.c_str());
638 
639  ok = true;
640  return qualified_mass_spectrum;
641 }
642 
643 
646  bool want_binary_data,
647  bool &ok) const
648 {
649 
650  std::string env;
651  env = setlocale(LC_ALL, "");
652  // struct lconv *lc = localeconv();
653 
654  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
655  //<< "env=" << env.c_str()
656  //<< "lc->decimal_point:" << lc->decimal_point;
657 
658  setlocale(LC_ALL, "C");
659 
660  MassSpectrumId massSpectrumId(mcsp_msRunId);
661 
662  if(msp_msData == nullptr)
663  {
664  setlocale(LC_ALL, env.c_str());
665  return (QualifiedMassSpectrum(massSpectrumId));
666  }
667 
668  // const bool want_binary_data = true;
669 
670  pwiz::msdata::SpectrumListPtr spectrum_list_p =
671  msp_msData->run.spectrumListPtr;
672 
673  if(spectrum_index == spectrum_list_p.get()->size())
674  {
675  setlocale(LC_ALL, env.c_str());
676  throw ExceptionNotFound(
677  QObject::tr("The spectrum index cannot be equal to the size of the "
678  "spectrum list."));
679  }
680 
681  // At this point we know the spectrum index might be sane, so store it in
682  // the mass spec id object.
683  massSpectrumId.setSpectrumIndex(spectrum_index);
684 
685  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
686  getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
687 
688  setlocale(LC_ALL, env.c_str());
689 
690  massSpectrumId.setNativeId(
691  QString::fromStdString(native_pwiz_spectrum_sp->id));
692 
694  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
695 }
696 
697 
698 bool
699 PwizMsRunReader::accept(const QString &file_name) const
700 {
701  // We want to know if we can handle the file_name.
702  pwiz::msdata::ReaderList reader_list;
703 
704  std::string reader_type = reader_list.identify(file_name.toStdString());
705 
706  if(!reader_type.empty())
707  return true;
708 
709  return false;
710 }
711 
712 
714 PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
715 {
716  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
717  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
718 }
719 
721 PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
722 {
723  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
724  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
725 }
726 
728 PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
729  bool want_binary_data) const
730 {
731 
732  QualifiedMassSpectrum spectrum;
733  bool ok = false;
734 
735  spectrum =
736  qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
737 
738  if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
739  {
740  if(spectrum.getRtInSeconds() == 0)
741  {
742  // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
743  }
744  }
745 
746  // if(!ok)
747  // qDebug() << "Encountered a mass spectrum for which the status is bad.";
748 
749  return spectrum;
750 }
751 
752 
753 void
756 {
757 
758  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
759 
760  // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
761  // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
762  // spectrum has been fully qualified (that is, the member data have been
763  // set), it is transferred to the handler passed as parameter to this
764  // function for the consumer to do what it wants with it.
765 
766  // Does the handler consuming the mass spectra read from file want these
767  // mass spectra to hold the binary data arrays (mz/i vectors)?
768 
769  const bool want_binary_data = handler.needPeakList();
770 
771 
772  std::string env;
773  env = setlocale(LC_ALL, "");
774  setlocale(LC_ALL, "C");
775 
776 
777  // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
778  // run member of msp_msData.
779 
780  pwiz::msdata::SpectrumListPtr spectrum_list_p =
781  msp_msData->run.spectrumListPtr;
782 
783  // We'll need it to perform the looping in the spectrum list.
784  std::size_t spectrum_list_size = spectrum_list_p.get()->size();
785 
786  // qDebug() << "The spectrum list has size:" << spectrum_list_size;
787 
788  // Inform the handler of the spectrum list so that it can handle feedback to
789  // the user.
790  handler.spectrumListHasSize(spectrum_list_size);
791 
792  // Iterate in the full list of spectra.
793 
794  for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
795  {
796 
797  // If the user of this reader instance wants to stop reading the spectra,
798  // then break this loop.
799  if(handler.shouldStop())
800  {
801  qDebug() << "The operation was cancelled. Breaking the loop.";
802  break;
803  }
804 
805  // Get the native pwiz-spectrum from the spectrum list.
806  // Note that this pointer is a shared pointer from pwiz.
807 
808  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
809  getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
810 
811  /*
812  * we want to load metadata of the spectrum even if it does not contain
813  peaks
814 
815  * if(!native_pwiz_spectrum_sp->hasBinaryData())
816  {
817  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "
818  ()"
819  //<< "native pwiz spectrum is empty, continuing.";
820  continue;
821  }
822  */
823 
824  // Instantiate the mass spectrum id that will hold critical information
825  // like the the native id string and the spectrum index.
826 
827  MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
828 
829  // Get the spectrum native id as a QString to store it in the mass
830  // spectrum id class. This is will allow later to refer to the same
831  // spectrum starting back from the file.
832 
833  QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
834  massSpectrumId.setNativeId(native_id);
835 
836  // Finally, instantiate the qualified mass spectrum with its id. This
837  // function will continue performing pappso-spectrum detailed
838  // qualification.
839 
840  bool ok = false;
841 
842  QualifiedMassSpectrum qualified_mass_spectrum =
844  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
845 
846  if(!ok)
847  {
848  // qDebug() << "Encountered a mass spectrum for which the returned "
849  //"status is bad.";
850  continue;
851  }
852 
853  // Before handing the mass spectrum out to the handler, see if the native
854  // mass spectrum was empty or not.
855 
856  // if(!native_pwiz_spectrum_sp->defaultArrayLength)
857  // qDebug() << "The mass spectrum has not defaultArrayLength";
858 
859  qualified_mass_spectrum.setEmptyMassSpectrum(
860  !native_pwiz_spectrum_sp->defaultArrayLength);
861 
862  // The handler will receive the index of the mass spectrum in the
863  // current run via the mass spectrum id member datum.
864  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
865  }
866 
867  setlocale(LC_ALL, env.c_str());
868  // End of
869  // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
870 
871  // Now let the loading handler know that the loading of the data has ended.
872  // The handler might need this "signal" to perform additional tasks or to
873  // cleanup cruft.
874 
875  // qDebug() << "Loading ended";
876  handler.loadingEnded();
877 }
878 
879 
880 std::size_t
882 {
883  return msp_msData->run.spectrumListPtr.get()->size();
884 }
885 
886 bool
888 {
889  return m_hasScanNumbers;
890 }
891 
892 
893 } // namespace pappso
pappso::PwizMsRunReader::initialize
virtual void initialize() override
Definition: pwizmsrunreader.cpp:65
pappso::MassSpectrum::makeMassSpectrumSPtr
MassSpectrumSPtr makeMassSpectrumSPtr() const
Definition: massspectrum.cpp:126
pappso::QualifiedMassSpectrum::setPrecursorIntensity
void setPrecursorIntensity(pappso_double intensity)
Set the intensity of the precursor ion.
Definition: qualifiedmassspectrum.cpp:293
pappso::pappso_double
double pappso_double
A type definition for doubles.
Definition: types.h:48
pappso::MassSpectrumCstSPtr
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
pappso::QualifiedMassSpectrum::getMassSpectrumSPtr
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:144
pappso::MsRunReader
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:158
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizMSData
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:645
pappso::SpectrumCollectionHandlerInterface::spectrumListHasSize
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:55
pappso::PwizMsRunReader::m_hasScanNumbers
bool m_hasScanNumbers
Definition: pwizmsrunreader.h:90
pappso::PwizMsRunReader::hasScanNumbers
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
Definition: pwizmsrunreader.cpp:887
pappso::QualifiedMassSpectrum::setPrecursorMz
void setPrecursorMz(pappso_double precursor_mz)
Set the precursor m/z ratio.
Definition: qualifiedmassspectrum.cpp:190
pappso::PwizMsRunReader::massSpectrumSPtr
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Definition: pwizmsrunreader.cpp:714
pappso
tries to keep as much as possible monoisotopes, removing any possible C13 peaks
Definition: aa.cpp:39
pappso::MassSpectrum
Class to represent a mass spectrum.
Definition: massspectrum.h:71
pappso::SpectrumCollectionHandlerInterface::setQualifiedMassSpectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
pappso::PwizMsRunReader::qualifiedMassSpectrumFromPwizSpectrumPtr
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
Definition: pwizmsrunreader.cpp:215
pappso::MsRunIdCstSPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
pappso::MassSpectrum::sortMz
void sortMz()
Sort the DataPoint instances of this spectrum.
Definition: massspectrum.cpp:202
pappso::DataPoint
Definition: datapoint.h:21
pappso::PwizMsRunReader::~PwizMsRunReader
virtual ~PwizMsRunReader()
Definition: pwizmsrunreader.cpp:164
pappso::SpectrumCollectionHandlerInterface::shouldStop
virtual bool shouldStop()
Definition: msrunreader.cpp:46
pappso::ExceptionNotPossible
Definition: exceptionnotpossible.h:32
pappso::MsRunReader::mcsp_msRunId
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:207
pappso::QualifiedMassSpectrum::setMassSpectrumSPtr
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
Definition: qualifiedmassspectrum.cpp:136
pappso::SpectrumCollectionHandlerInterface::loadingEnded
virtual void loadingEnded()
Definition: msrunreader.cpp:51
pappso::QualifiedMassSpectrum::setMsLevel
void setMsLevel(uint ms_level)
Set the mass spectrum level.
Definition: qualifiedmassspectrum.cpp:206
pappso::QualifiedMassSpectrum::setRtInSeconds
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:222
pappso::QualifiedMassSpectrum
Class representing a fully specified mass spectrum.
Definition: qualifiedmassspectrum.h:68
pappso::Utils::toUtf8StandardString
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:115
pappso::QualifiedMassSpectrum::setPrecursorNativeId
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
Definition: qualifiedmassspectrum.cpp:280
pappso::SpectrumCollectionHandlerInterface::needPeakList
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
pappso::QualifiedMassSpectrum::getPrecursorNativeId
const QString & getPrecursorNativeId() const
Definition: qualifiedmassspectrum.cpp:286
pappso::MassSpectrumId::getSpectrumIndex
std::size_t getSpectrumIndex() const
Definition: massspectrumid.cpp:110
pappso::PwizMsRunReader::massSpectrumCstSPtr
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
Definition: pwizmsrunreader.cpp:721
pappso::ExceptionNotFound
Definition: exceptionnotfound.h:32
pappso::QualifiedMassSpectrum::setPrecursorCharge
void setPrecursorCharge(uint precursor_charge)
Set the precursor charge.
Definition: qualifiedmassspectrum.cpp:174
pappso::QualifiedMassSpectrum::getMassSpectrumCstSPtr
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
Definition: qualifiedmassspectrum.cpp:152
pappso::MassSpectrumId
Definition: massspectrumid.h:38
pappso::QualifiedMassSpectrum::setEmptyMassSpectrum
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
Definition: qualifiedmassspectrum.cpp:159
pappso::QualifiedMassSpectrum::setPrecursorSpectrumIndex
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
Definition: qualifiedmassspectrum.cpp:265
pappso::PwizMsRunReader::accept
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
Definition: pwizmsrunreader.cpp:699
pappso::PappsoException::qwhat
virtual const QString & qwhat() const
Definition: pappsoexception.h:66
pappso::PwizMsRunReader::qualifiedMassSpectrum
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
Definition: pwizmsrunreader.cpp:728
pappso::PwizMsRunReader::PwizMsRunReader
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
Definition: pwizmsrunreader.cpp:53
pappso::MassSpectrumId::setSpectrumIndex
void setSpectrumIndex(std::size_t index)
Definition: massspectrumid.cpp:103
pappso::QualifiedMassSpectrum::setDtInMilliSeconds
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
Definition: qualifiedmassspectrum.cpp:246
pappso::MassSpectrumId::setNativeId
void setNativeId(const QString &native_id)
Definition: massspectrumid.cpp:89
pappso::PwizMsRunReader::msp_msData
pwiz::msdata::MSDataPtr msp_msData
Definition: pwizmsrunreader.h:70
pappso::PwizMsRunReader::getPwizSpectrumPtr
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
Definition: pwizmsrunreader.cpp:170
pwizmsrunreader.h
MSrun file reader base on proteowizard library.
pappso::PwizMsRunReader::spectrumListSize
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
Definition: pwizmsrunreader.cpp:881
pappso::SpectrumCollectionHandlerInterface
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:59
pappso::MassSpectrumSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
pappso::PappsoException
Definition: pappsoexception.h:42
pappso::QualifiedMassSpectrum::getRtInSeconds
pappso_double getRtInSeconds() const
Get the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:230
pappso::PwizMsRunReader::readSpectrumCollection
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
Definition: pwizmsrunreader.cpp:754