libpappsomspp
Library for mass spectrometry
timsdata.h
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/vendors/tims/timsdata.h
3  * \date 27/08/2019
4  * \author Olivier Langella
5  * \brief main Tims data handler
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #pragma once
29 
30 #include <QDir>
31 #include <QSqlDatabase>
32 #include "timsbindec.h"
33 #include "timsframe.h"
34 #include "../../massspectrum/qualifiedmassspectrum.h"
35 #include "../../processing/filters/filterinterface.h"
36 #include <deque>
37 #include <QMutex>
38 #include <QSqlQuery>
40 
41 namespace pappso
42 {
43 
44 
45 /** @brief structure needed to extract XIC from Tims data
46  */
47 
49 {
51  : mzRange(pappso_double(1), PrecisionFactory::getPpmInstance(10.0)){};
52  /** @brief the corresponding precursor id
53  */
54  std::size_t precursorId;
55 
56  /** @brief mass range to extract
57  */
59  /** @brief mobility index begin
60  */
61  std::size_t scanNumBegin;
62 
63  /** @brief mobility index end
64  */
65  std::size_t scanNumEnd;
66 
67  /** @brief targeted retention time for this precursor
68  */
70 
71  /** @brief extracted xic
72  */
73  XicSPtr xicSptr = nullptr;
74 
75  /** @brief targeted charge for this precursor
76  */
77 
78  unsigned int charge;
79 };
80 
81 class TimsData;
82 
83 /** \brief shared pointer on a TimsData object
84  */
85 typedef std::shared_ptr<TimsData> TimsDataSp;
86 
87 /**
88  * @todo write docs
89  */
91 {
92  public:
93  /** @brief build using the tims data directory
94  */
95  TimsData(QDir timsDataDirectory);
96 
97  /**
98  * Copy constructor
99  *
100  * @param other TODO
101  */
102  TimsData(const TimsData &other);
103 
104  /**
105  * Destructor
106  */
107  ~TimsData();
108 
109 
110  /** @brief get a mass spectrum given its spectrum index
111  * @param raw_index a number begining at 0, corresponding to a Tims Scan in
112  * the order they lies in the binary data file
113  */
115  getMassSpectrumCstSPtrByRawIndex(std::size_t raw_index);
116 
117  /** @brief get a mass spectrum given the tims frame database id and scan
118  * number within tims frame
119  */
120  pappso::MassSpectrumCstSPtr getMassSpectrumCstSPtr(std::size_t timsId,
121  std::size_t scanNum);
122 
123  /** @brief get the total number of scans
124  */
125  std::size_t getTotalNumberOfScans() const;
126 
127  /** @brief get the number of precursors analyzes by PASEF
128  */
129  std::size_t getTotalNumberOfPrecursors() const;
130 
131  /** @brief guess possible precursor ids given a charge, m/z, retention time
132  * and k0
133  * @return a list of possible precursor ids
134  */
135  std::vector<std::size_t> getPrecursorsFromMzRtCharge(int charge,
136  double mz_val,
137  double rt_sec,
138  double k0);
139 
140  unsigned int getMsLevelBySpectrumIndex(std::size_t spectrum_index);
141 
143  getQualifiedMassSpectrumByRawIndex(std::size_t spectrum_index,
144  bool want_binary_data);
145 
146  void
147  getQualifiedMs2MassSpectrumByPrecursorId(QualifiedMassSpectrum &mass_spectrum,
148  std::size_t ms2_index,
149  std::size_t precursor_index,
150  bool want_binary_data);
151 
152  QualifiedMassSpectrum getQualifiedMs1MassSpectrumByPrecursorId(
153  std::size_t ms2_index, std::size_t precursor_index, bool want_binary_data);
154 
155  /** @brief filter interface to apply just after raw MS2 specturm extraction
156  * the filter can be a list of filters inside a FilterSuite object
157  */
158  void setMs2FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
159 
160  /** @brief filter interface to apply just after raw MS1 specturm extraction
161  * the filter can be a list of filters inside a FilterSuite object
162  */
163  void setMs1FilterCstSPtr(pappso::FilterInterfaceCstSPtr &filter);
164 
165  /** @brief enable or disable simple centroid filter on raw tims data for MS2
166  */
167  void setMs2BuiltinCentroid(bool centroid);
168 
169 
170  /** @brief tells if simple centroid filter on raw tims data for MS2 is enabled
171  * or not
172  */
173  bool getMs2BuiltinCentroid() const;
174 
175 
176  std::vector<std::size_t> getTimsMS1FrameIdRange(double rt_begin,
177  double rt_end) const;
178 
179 
180  /** @brief get a Tims frame with his database ID
181  */
182  TimsFrameCstSPtr getTimsFrameCstSPtr(std::size_t timsId) const;
183 
185  getTimsXicStructureFromPrecursorId(std::size_t precursor_id,
186  PrecisionPtr precision_ptr) const;
187 
188 
189  /** @brief extract a list of XICs from Tims data
190  *
191  * @param tims_xic_structure_list the list of tims XIC structure to extract
192  * @param xicExtractMethod XIC extraction method (sum or max) to use
193  * @param rtRange retention time range in seconds to extract XIC from rtTarget
194  * - rtRange to rtTarget + rtRange
195  */
196  void extractXicListByTimsXicStructureList(
197  std::vector<TimsXicStructure> &tims_xic_structure_list,
198  XicExtractMethod xicExtractMethod,
199  double rtRange) const;
200 
201 
202  /** @brief get cumulated raw signal for a given precursor
203  * only to use to see the raw signal
204  *
205  * @param precursor_index precursor index to extract signal from
206  * @result a map of integers, x=time of flights, y= intensities
207  */
208  std::map<quint32, quint32>
209  getRawMs2ByPrecursorId(std::size_t precursor_index);
210 
211  private:
212  /** @brief extract a list of XICs from Tims data
213  *
214  * @param precursor_id_list the list of precursors to extract
215  * @param precision_ptr precision to compute the mz range to extract for each
216  * precursor mass
217  * @param xicExtractMethod XIC extraction method (sum or max) to use
218  * @param rtRange retention time range in seconds to extract XIC from rtTarget
219  * - rtRange to rtTarget + rtRange
220  * @result the corresponding XIC list as a specific structure
221  */
222  std::vector<TimsXicStructure> extractXicListByPrecursorIds(
223  const std::vector<std::size_t> &precursor_id_list,
224  PrecisionPtr precision_ptr,
225  XicExtractMethod xicExtractMethod,
226  double rtRange) const;
227 
228 
229  std::pair<std::size_t, std::size_t>
230  getScanCoordinateFromRawIndex(std::size_t spectrum_index) const;
231 
232  std::size_t getRawIndexFromCoordinate(std::size_t frame_id,
233  std::size_t scan_num) const;
234 
235  QSqlDatabase openDatabaseConnection() const;
236 
237 
238  /** @brief get a Tims frame base (no binary data file access) with his
239  * database ID
240  */
241  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtr(std::size_t timsId) const;
242 
243 
244  /** @brief get a Tims frame with his database ID
245  * but look in the cache first
246  */
247  TimsFrameCstSPtr getTimsFrameCstSPtrCached(std::size_t timsId);
248 
249  TimsFrameBaseCstSPtr getTimsFrameBaseCstSPtrCached(std::size_t timsId);
250 
251 
252  std::vector<std::size_t>
253  getMatchPrecursorIdByKo(std::vector<std::vector<double>> ids,
254  double ko_value);
255 
256  /** @todo documentation
257  */
258  std::vector<std::size_t>
259  getClosestPrecursorIdByMz(std::vector<std::vector<double>> ids,
260  double mz_value);
261 
262 
263  /** @brief private function to fill m_frameIdDescrList
264  */
265  void fillFrameIdDescrList();
266 
267 
268  private:
270  TimsBinDec *mpa_timsBinDec = nullptr;
271  // QSqlDatabase *mpa_qdb = nullptr;
272  std::size_t m_totalNumberOfScans;
274  std::size_t m_cacheSize = 60;
275  std::deque<TimsFrameCstSPtr> m_timsFrameCache;
276  std::deque<TimsFrameBaseCstSPtr> m_timsFrameBaseCache;
277 
278  pappso::FilterInterfaceCstSPtr mcsp_ms2Filter = nullptr;
279  pappso::FilterInterfaceCstSPtr mcsp_ms1Filter = nullptr;
280 
281  /** @brief enable builtin centroid on raw tims integers by default
282  */
283  bool m_builtinMs2Centroid = true;
284 
285 
286  std::map<int, QSqlRecord> m_mapMzCalibrationRecord;
287  std::map<int, QSqlRecord> m_mapTimsCalibrationRecord;
288  std::map<std::size_t, QSqlRecord> m_mapFramesRecord;
289 
291 
292 
294  {
295  std::size_t m_frameId; // frame id
296  std::size_t m_size; // frame size
297  std::size_t m_cumulSize; // cumulative size
298  };
299 
300  /** @brief store every frame id and corresponding sizes
301  */
302  std::vector<FrameIdDescr> m_frameIdDescrList;
303 
304  /** @brief index to find quickly a frameId in the description list with the
305  * raw index of spectrum modulo 1000
306  */
307  std::map<std::size_t, std::size_t> m_thousandIndexToFrameIdDescrListIndex;
308 
309  QMutex m_mutex;
310 };
311 } // namespace pappso
Class representing a fully specified mass spectrum.
std::vector< FrameIdDescr > m_frameIdDescrList
store every frame id and corresponding sizes
Definition: timsdata.h:302
std::map< std::size_t, QSqlRecord > m_mapFramesRecord
Definition: timsdata.h:288
std::size_t m_totalNumberOfScans
Definition: timsdata.h:272
std::deque< TimsFrameCstSPtr > m_timsFrameCache
Definition: timsdata.h:275
std::map< int, QSqlRecord > m_mapMzCalibrationRecord
Definition: timsdata.h:286
std::map< int, QSqlRecord > m_mapTimsCalibrationRecord
Definition: timsdata.h:287
QMutex m_mutex
Definition: timsdata.h:309
TimsData(const TimsData &other)
QDir m_timsDataDirectory
Definition: timsdata.h:269
MzCalibrationStore * mpa_mzCalibrationStore
Definition: timsdata.h:290
std::deque< TimsFrameBaseCstSPtr > m_timsFrameBaseCache
Definition: timsdata.h:276
std::map< std::size_t, std::size_t > m_thousandIndexToFrameIdDescrListIndex
index to find quickly a frameId in the description list with the raw index of spectrum modulo 1000
Definition: timsdata.h:307
std::size_t m_totalNumberOfPrecursors
Definition: timsdata.h:273
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const TimsFrameBase > TimsFrameBaseCstSPtr
Definition: timsframebase.h:41
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< TimsData > TimsDataSp
shared pointer on a TimsData object
Definition: timsdata.h:81
std::shared_ptr< Xic > XicSPtr
Definition: xic.h:39
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< const FilterInterface > FilterInterfaceCstSPtr
XicExtractMethod
Definition: types.h:200
std::shared_ptr< const TimsFrame > TimsFrameCstSPtr
Definition: timsframe.h:42
structure needed to extract XIC from Tims data
Definition: timsdata.h:49
pappso::pappso_double rtTarget
targeted retention time for this precursor
Definition: timsdata.h:69
std::size_t scanNumEnd
mobility index end
Definition: timsdata.h:65
MzRange mzRange
mass range to extract
Definition: timsdata.h:58
unsigned int charge
targeted charge for this precursor
Definition: timsdata.h:78
XicSPtr xicSptr
extracted xic
Definition: timsdata.h:73
std::size_t scanNumBegin
mobility index begin
Definition: timsdata.h:61
std::size_t precursorId
the corresponding precursor id
Definition: timsdata.h:51
binary file handler of Bruker's TimsTof raw data
handle a single Bruker's TimsTof frame