libpappsomspp
Library for mass spectrometry
mzxmloutput.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/output/mzxmloutput.cpp
3  * \date 23/11/2019
4  * \author Olivier Langella
5  * \brief write msrun peaks into mzxml output stream
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "mzxmloutput.h"
32 #include <QDebug>
33 #include <QStringList>
34 #include <algorithm>
35 #include <cstdio>
36 #include "../../config.h"
37 
38 using namespace pappso;
39 
40 
41 template <class T>
42 T
44 {
45  char *const p = reinterpret_cast<char *>(&in);
46  for(size_t i = 0; i < sizeof(T) / 2; ++i)
47  std::swap(p[i], p[sizeof(T) - i - 1]);
48  return in;
49 }
50 
51 
53 {
54  mp_output = p_mzxml_output;
55 }
57 {
58 }
59 void
61  const QualifiedMassSpectrum &spectrum)
62 {
63  qDebug();
64  mp_output->writeQualifiedMassSpectrum(spectrum);
65  qDebug();
66 }
67 bool
69 {
70  return true;
71 }
72 
73 
74 MzxmlOutput::MzxmlOutput(QIODevice *p_output_device)
75 {
76 
77  mpa_outputStream = new QXmlStreamWriter(p_output_device);
78  mpa_outputStream->setAutoFormatting(true);
79 
80  mpa_outputStream->writeStartDocument("1.0");
81 }
82 
84 {
85  close();
86  delete mpa_outputStream;
87 }
88 
89 void
90 MzxmlOutput::setReadAhead(bool isReadAhead)
91 {
92  m_isReadAhead = isReadAhead;
93 }
94 void
96 {
97  qDebug();
98  writeHeader(p_msrunreader);
99 
100  Translater translater(this);
101 
102  translater.setReadAhead(m_isReadAhead);
103 
104  translater.setNeedMsLevelPeakList(1, !m_ms1IsMasked);
105  // translater.setNeedMsLevelPeakList(1, false);
106  // translater.setNeedMsLevelPeakList(2, false);
107  p_msrunreader->readSpectrumCollection(translater);
108  qDebug();
109 }
110 
111 void
113 {
114 
115  mpa_outputStream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance",
116  "xsi");
117  // xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0"
118  // xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0
119  // http://sashimi.sourceforge.net/schema_revision/mzXML_2.0/mzXML_idx_2.0.xsd"
120  /*
121 114 writer.setPrefix("xsi", xmlnsxsi);
122 115 writer.setDefaultNamespace(namespaceURI);
123 mpa_outputStream->writeStartElement("mzXML");
124 117 writer.writeNamespace("xsi", xmlnsxsi);
125 118 writer.writeDefaultNamespace(namespaceURI);
126 119
127 120 writer.writeAttribute(xmlnsxsi, "schemaLocation",
128 xsischemaLocation); 121 */
129  mpa_outputStream->writeStartElement("mzXML");
130  mpa_outputStream->writeAttribute(
131  "xmlns", "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2");
132  mpa_outputStream->writeAttribute(
133  "xsi:schemaLocation",
134  "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 "
135  "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/"
136  "mzXML_idx_3.2.xsd");
137 
138  mpa_outputStream->writeStartElement("msRun");
139  mpa_outputStream->writeAttribute(
140  "scanCount", QString("%1").arg(p_msrunreader->spectrumListSize()));
141  //<msRun scanCount="16576" startTime="PT0.292553S" endTime="PT3000.34S">
142  // writer.writeAttribute("scanCount",
143  // ms_run.getSpectrumCount(this.controller).toString());
144 
145  /*
146  * # < parentFile fileName = #
147  * "file://SEQUEST1/raw/vidal/20060411_VIDAL_JEAN_1_PEPCR1_42140.RAW" #
148  * fileType = "RAWData" fileSha1 = #
149  * "23c1620d4ad3f4f0103b0141b7caec1e8b7eebf5" / >
150  */
151  mpa_outputStream->writeStartElement("parentFile");
152  mpa_outputStream->writeAttribute("fileName",
153  p_msrunreader->getMsRunId()->getFileName());
154  mpa_outputStream->writeAttribute("fileType", "RAWData");
155  mpa_outputStream->writeEndElement();
156  /*
157 144
158 145 MsInstrumentList instrument_list =
159 ms_run.getMsInstruments(controller); 146 for (MsInstrument
160 instrument : instrument_list) { 147 this.write(instrument); 148 }
161 */
162 
163  mpa_outputStream->writeStartElement("msInstrument");
164  mpa_outputStream->writeAttribute("msInstrumentID", "1");
165  //<msManufacturer category="msManufacturer" value="Thermo Scientific"/>
166  mpa_outputStream->writeStartElement("msManufacturer");
167  mpa_outputStream->writeAttribute("category", "msManufacturer");
168  mpa_outputStream->writeAttribute("value", "unknown");
169  mpa_outputStream->writeEndElement();
170  //<msModel category="msModel" value="Q Exactive"/>
171  // <msIonisation category="msIonisation" value="nanoelectrospray"/>
172  // <msMassAnalyzer category="msMassAnalyzer" value="quadrupole"/>
173  // <msDetector category="msDetector" value="inductive detector"/>
174  // <software type="acquisition" name="Xcalibur"
175  // version="2.1-152001/2.1.0.1520"/>
176  mpa_outputStream->writeEndElement();
177  /*
178 149
179 150 // #< dataProcessing centroided ="1" >
180 151 // my $ref_data_processings =
181 $ms_run_description->dataProcessing(); 152 MsDataProcessingList
182 dataProcList = ms_run.getMsDataProcessings(controller); 153 for
183 (MsDataProcessing msDataProc : dataProcList) { 154 this.write(msDataProc); 155 }
184 */
185  mpa_outputStream->writeStartElement("dataProcessing");
186  //<dataProcessing centroided="1">
187  mpa_outputStream->writeAttribute("centroided", "1");
188  // <software type="conversion" name="ProteoWizard" version="3.0.3706"/>
189  mpa_outputStream->writeStartElement("software");
190  mpa_outputStream->writeAttribute("type", "conversion");
191  mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
192  mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
193  mpa_outputStream->writeEndElement();
194  //<processingOperation name="Conversion to mzML"/>
195  mpa_outputStream->writeStartElement("processingOperation");
196  mpa_outputStream->writeAttribute("name", "Conversion to mzXML");
197  //<software type="processing" name="ProteoWizard" version="3.0.3706"/>
198  mpa_outputStream->writeStartElement("software");
199  mpa_outputStream->writeAttribute("type", "processing");
200  mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
201  mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
202  mpa_outputStream->writeEndElement();
203  //<comment>Thermo/Xcalibur peak picking</comment>
204  mpa_outputStream->writeStartElement("comment");
205  mpa_outputStream->writeCharacters("pappso::MzxmlOutput");
206  mpa_outputStream->writeEndElement();
207  //</dataProcessing>
208  mpa_outputStream->writeEndElement();
209  mpa_outputStream->writeEndElement();
210  // Peaks
211 }
212 
213 
214 void
216 {
217  mpa_outputStream->writeEndDocument();
218 }
219 
220 
221 std::size_t
222 MzxmlOutput::getScanNumberFromNativeId(const QString &native_id) const
223 {
224  QStringList native_id_list = native_id.split("=");
225  if(native_id_list.size() < 2)
226  {
227  }
228  else
229  {
230  return native_id_list.back().toULong();
231  }
232  return std::numeric_limits<std::size_t>::max();
233 }
234 
235 std::size_t
237 {
238  std::size_t scan_number =
240  if(scan_number == std::numeric_limits<std::size_t>::max())
241  {
242  scan_number = spectrum.getMassSpectrumId().getSpectrumIndex() + 1;
243  }
244  return scan_number;
245 }
246 
247 std::size_t
249 {
250 
251  std::size_t scan_number =
253  if(scan_number == std::numeric_limits<std::size_t>::max())
254  {
255  scan_number = spectrum.getPrecursorSpectrumIndex() + 1;
256  }
257  return scan_number;
258 }
259 
260 void
262  const pappso::QualifiedMassSpectrum &spectrum)
263 {
264  qDebug();
265  mpa_outputStream->writeStartElement("scan");
266  /*
267  <scan num="1"
268  scanType="Full"
269  centroided="1"
270  msLevel="1"
271  peaksCount="1552"
272  polarity="+"
273  retentionTime="PT0.292553S"
274  lowMz="400.153411865234"
275  highMz="1013.123352050781"
276  basePeakMz="445.12003"
277  basePeakIntensity="2.0422125e06"
278  totIonCurrent="1.737798e07">*/
279  mpa_outputStream->writeAttribute("num",
280  QString("%1").arg(getScanNumber(spectrum)));
281  mpa_outputStream->writeAttribute("centroided", QString("1"));
282  mpa_outputStream->writeAttribute("msLevel",
283  QString("%1").arg(spectrum.getMsLevel()));
284  if(spectrum.getMassSpectrumCstSPtr().get() == nullptr)
285  {
286  mpa_outputStream->writeAttribute("peaksCount", "0");
287  }
288  else
289  {
290  mpa_outputStream->writeAttribute("peaksCount",
291  QString("%1").arg(spectrum.size()));
292 
293  if(spectrum.size() > 0)
294  {
295  mpa_outputStream->writeAttribute(
296  "lowMz",
297  QString::number(
298  spectrum.getMassSpectrumCstSPtr().get()->front().x, 'f', 12));
299 
300  mpa_outputStream->writeAttribute(
301  "highMz",
302  QString::number(
303  spectrum.getMassSpectrumCstSPtr().get()->back().x, 'f', 12));
304  // mpa_outputStream->writeAttribute("highMz",
305  // QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x,
306  // 'f', 10)); basePeakMz="245.1271988"
307  // basePeakIntensity="5810.7739"
308  // totIonCurrent="57803.815999999999">
309  }
310  }
311  mpa_outputStream->writeAttribute("polarity", "+");
312  mpa_outputStream->writeAttribute(
313  "retentionTime",
314  QString("PT%1S").arg(QString::number(spectrum.getRtInSeconds(), 'f', 2)));
315 
316  if(spectrum.getMsLevel() > 1)
317  {
318 
319  //<precursorMz precursorScanNum="16574"
320  // precursorIntensity="58403.04296875" precursorCharge="2"
321  ////activationMethod="HCD">994.690619901808</precursorMz>
322  mpa_outputStream->writeStartElement("precursorMz");
323  mpa_outputStream->writeAttribute(
324  "precursorScanNum",
325  QString("%1").arg(getPrecursorScanNumber(spectrum)));
326  mpa_outputStream->writeAttribute(
327  "precursorIntensity",
328  QString::number(spectrum.getPrecursorIntensity(), 'f', 4));
329  mpa_outputStream->writeAttribute(
330  "precursorCharge", QString("%1").arg(spectrum.getPrecursorCharge()));
331  mpa_outputStream->writeCharacters(
332  QString::number(spectrum.getPrecursorMz(), 'f', 12));
333  mpa_outputStream->writeEndElement();
334  }
335 
336  /*<peaks compressionType="none"
337  compressedLen="0"
338  precision="64"
339  byteOrder="network"
340  contentType="m/z-int"></peaks>*/
341 
342  mpa_outputStream->writeStartElement("peaks");
343  mpa_outputStream->writeAttribute("compressionType", "none");
344  mpa_outputStream->writeAttribute("compressedLen", "0");
345  mpa_outputStream->writeAttribute("precision", "64");
346  mpa_outputStream->writeAttribute("byteOrder", "network");
347  mpa_outputStream->writeAttribute("contentType", "m/z-int");
348 
349  if(spectrum.getMassSpectrumCstSPtr().get() != nullptr)
350  {
351  QByteArray byte_array;
352  if(QSysInfo::ByteOrder == QSysInfo::LittleEndian)
353  {
354  for(const DataPoint &peak :
355  *(spectrum.getMassSpectrumCstSPtr().get()))
356  {
357  double swap = change_endian(peak.x);
358  byte_array.append((char *)&swap, 8);
359  swap = change_endian(peak.y);
360  byte_array.append((char *)&swap, 8);
361  }
362  }
363  else
364  {
365  for(const DataPoint &peak :
366  *(spectrum.getMassSpectrumCstSPtr().get()))
367  {
368  byte_array.append((char *)&peak.x, 8);
369  byte_array.append((char *)&peak.y, 8);
370  }
371  }
372  mpa_outputStream->writeCharacters(byte_array.toBase64());
373  }
374  mpa_outputStream->writeEndElement();
375 
376  // scan
377  mpa_outputStream->writeEndElement();
378  qDebug();
379 }
380 
381 void
382 MzxmlOutput::maskMs1(bool mask_ms1)
383 {
384  m_ms1IsMasked = mask_ms1;
385 }
pappso::QualifiedMassSpectrum::getMsLevel
uint getMsLevel() const
Get the mass spectrum level.
Definition: qualifiedmassspectrum.cpp:214
pappso::MsRunReader
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:158
pappso::MzxmlOutput
Definition: mzxmloutput.h:43
pappso::MzxmlOutput::m_isReadAhead
bool m_isReadAhead
Definition: mzxmloutput.h:90
pappso::MzxmlOutput::getScanNumberFromNativeId
std::size_t getScanNumberFromNativeId(const QString &native_id) const
Definition: mzxmloutput.cpp:222
pappso::MzxmlOutput::getScanNumber
std::size_t getScanNumber(const QualifiedMassSpectrum &spectrum) const
Definition: mzxmloutput.cpp:236
pappso
tries to keep as much as possible monoisotopes, removing any possible C13 peaks
Definition: aa.cpp:39
change_endian
T change_endian(T in)
Definition: mzxmloutput.cpp:43
pappso::MassSpectrumId::getNativeId
const QString & getNativeId() const
Definition: massspectrumid.cpp:96
pappso::MzxmlOutput::writeHeader
void writeHeader(MsRunReader *p_msrunreader)
Definition: mzxmloutput.cpp:112
pappso::MzxmlOutput::Translater::mp_output
MzxmlOutput * mp_output
Definition: mzxmloutput.h:84
pappso::MzxmlOutput::Translater::setQualifiedMassSpectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
Definition: mzxmloutput.cpp:60
PAPPSOMSPP_NAME
#define PAPPSOMSPP_NAME
Definition: config.h:3
PAPPSOMSPP_VERSION
#define PAPPSOMSPP_VERSION
Definition: config.h:4
pappso::DataPoint
Definition: datapoint.h:21
pappso::MsRunReader::spectrumListSize
virtual std::size_t spectrumListSize() const =0
get the totat number of spectrum conained in the MSrun data file
pappso::MzxmlOutput::write
void write(MsRunReader *p_msrunreader)
Definition: mzxmloutput.cpp:95
mzxmloutput.h
write msrun peaks into mzxml output stream
pappso::QualifiedMassSpectrum::getPrecursorIntensity
pappso_double getPrecursorIntensity() const
Get the intensity of the precursor ion.
Definition: qualifiedmassspectrum.cpp:301
pappso::MsRunReader::getMsRunId
const MsRunIdCstSPtr & getMsRunId() const
Definition: msrunreader.cpp:232
pappso::MzxmlOutput::m_ms1IsMasked
bool m_ms1IsMasked
Definition: mzxmloutput.h:91
pappso::MzxmlOutput::getPrecursorScanNumber
std::size_t getPrecursorScanNumber(const QualifiedMassSpectrum &spectrum) const
Definition: mzxmloutput.cpp:248
pappso::MsRunReader::readSpectrumCollection
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
pappso::QualifiedMassSpectrum::size
std::size_t size() const
Definition: qualifiedmassspectrum.cpp:308
pappso::QualifiedMassSpectrum
Class representing a fully specified mass spectrum.
Definition: qualifiedmassspectrum.h:68
pappso::MzxmlOutput::Translater::~Translater
virtual ~Translater()
Definition: mzxmloutput.cpp:56
pappso::SpectrumCollectionHandlerInterface::setNeedMsLevelPeakList
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:90
pappso::MzxmlOutput::~MzxmlOutput
~MzxmlOutput()
Definition: mzxmloutput.cpp:83
pappso::QualifiedMassSpectrum::getPrecursorCharge
uint getPrecursorCharge() const
Get the precursor charge.
Definition: qualifiedmassspectrum.cpp:182
pappso::QualifiedMassSpectrum::getPrecursorNativeId
const QString & getPrecursorNativeId() const
Definition: qualifiedmassspectrum.cpp:286
pappso::MassSpectrumId::getSpectrumIndex
std::size_t getSpectrumIndex() const
Definition: massspectrumid.cpp:110
pappso::SpectrumCollectionHandlerInterface::setReadAhead
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:60
pappso::MzxmlOutput::Translater::Translater
Translater(MzxmlOutput *p_mzxml_output)
Definition: mzxmloutput.cpp:52
pappso::QualifiedMassSpectrum::getMassSpectrumId
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
Definition: qualifiedmassspectrum.cpp:128
pappso::QualifiedMassSpectrum::getMassSpectrumCstSPtr
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
Definition: qualifiedmassspectrum.cpp:152
pappso::MzxmlOutput::MzxmlOutput
MzxmlOutput(QIODevice *p_output_device)
Definition: mzxmloutput.cpp:74
pappso::MzxmlOutput::writeQualifiedMassSpectrum
void writeQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)
Definition: mzxmloutput.cpp:261
pappso::MzxmlOutput::setReadAhead
void setReadAhead(bool read_ahead)
Definition: mzxmloutput.cpp:90
pappso::MzxmlOutput::maskMs1
void maskMs1(bool mask_ms1)
Definition: mzxmloutput.cpp:382
pappso::MzxmlOutput::close
void close()
Definition: mzxmloutput.cpp:215
pappso::MzxmlOutput::Translater::needPeakList
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: mzxmloutput.cpp:68
pappso::QualifiedMassSpectrum::getPrecursorMz
pappso_double getPrecursorMz() const
Get the precursor m/z ratio.
Definition: qualifiedmassspectrum.cpp:198
pappso::QualifiedMassSpectrum::getPrecursorSpectrumIndex
std::size_t getPrecursorSpectrumIndex() const
Get the scan number of the precursor ion.
Definition: qualifiedmassspectrum.cpp:273
pappso::MzxmlOutput::mpa_outputStream
QXmlStreamWriter * mpa_outputStream
Definition: mzxmloutput.h:89
pappso::QualifiedMassSpectrum::getRtInSeconds
pappso_double getRtInSeconds() const
Get the retention time in seconds.
Definition: qualifiedmassspectrum.cpp:230
pappso::MzxmlOutput::Translater
Definition: mzxmloutput.h:75