ProteoWizard
Classes | Functions | Variables
Serializer_pepXML_Test.cpp File Reference
#include "Serializer_pepXML.hpp"
#include "Diff.hpp"
#include "References.hpp"
#include "examples.hpp"
#include "pwiz/utility/misc/unit.hpp"
#include "pwiz/utility/misc/Std.hpp"
#include "pwiz/utility/misc/Filesystem.hpp"
#include "pwiz/data/proteome/Digestion.hpp"
#include "TextWriter.hpp"
#include "boost/range/adaptor/transformed.hpp"
#include "boost/range/algorithm/max_element.hpp"
#include "boost/range/algorithm/min_element.hpp"
#include <cstring>

Go to the source code of this file.

Classes

struct  EnzymePtr_specificity
 
struct  EnzymePtr_missedCleavages
 

Functions

void stripUnmappedMetadata (IdentData &mzid)
 
void testTranslation (const string &str)
 
void testSerializeReally (IdentData &mzid, const Serializer_pepXML::Config &config)
 
void testSerialize ()
 
void testPepXMLSpecificity ()
 
void testStripChargeFromConventionalSpectrumId ()
 
int main (int argc, char **argv)
 

Variables

ostream * os_ = 0
 

Function Documentation

void stripUnmappedMetadata ( IdentData mzid)

Definition at line 57 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::IdentData::analysisCollection, pwiz::identdata::DataCollection::analysisData, pwiz::identdata::IdentData::analysisProtocolCollection, pwiz::identdata::IdentData::analysisSampleCollection, pwiz::identdata::IdentData::analysisSoftwareList, pwiz::identdata::IdentData::auditCollection, BFS_STRING, pwiz::identdata::IdentData::bibliographicReference, pwiz::data::ParamContainer::clear(), pwiz::identdata::SpectrumIdentificationProtocol::databaseFilters, pwiz::identdata::SpectrumIdentificationProtocol::databaseTranslation, pwiz::identdata::IdentData::dataCollection, pwiz::identdata::SequenceCollection::dbSequences, pwiz::identdata::PeptideEvidence::end, pwiz::identdata::Enzymes::enzymes, pwiz::identdata::SpectrumIdentificationProtocol::enzymes, pwiz::identdata::DataCollection::inputs, pwiz::identdata::SpectrumIdentificationProtocol::massTable, pwiz::identdata::peptide(), pwiz::identdata::SequenceCollection::peptides, pwiz::identdata::PeptideEvidence::post, pwiz::identdata::PeptideEvidence::pre, pwiz::identdata::AnalysisCollection::proteinDetection, pwiz::identdata::AnalysisData::proteinDetectionListPtr, pwiz::identdata::IdentData::provider, pwiz::identdata::AnalysisSampleCollection::samples, pwiz::identdata::IdentData::sequenceCollection, pwiz::identdata::Inputs::sourceFile, pwiz::identdata::AnalysisCollection::spectrumIdentification, pwiz::identdata::AnalysisData::spectrumIdentificationList, pwiz::identdata::AnalysisProtocolCollection::spectrumIdentificationProtocol, pwiz::identdata::PeptideEvidence::start, and pwiz::identdata::SpectrumIdentificationProtocol::threshold.

Referenced by testSerialize().

58 {
59  mzid.bibliographicReference.clear();
60  mzid.analysisSampleCollection.samples.clear();
61  mzid.auditCollection.clear();
62  mzid.provider = Provider();
63  mzid.dataCollection.inputs.sourceFile.clear();
64 
65  BOOST_FOREACH(AnalysisSoftwarePtr& as, mzid.analysisSoftwareList)
66  {
67  as->URI.clear();
68  as->customizations.clear();
69  as->contactRolePtr.reset();
70  }
71 
73 
74  // pepXML only provides a single min_number_termini and max_num_internal_cleavages for all enzymes
75  int minSpecificity = *boost::range::min_element(sip.enzymes.enzymes | boost::adaptors::transformed(EnzymePtr_specificity()));
76  int maxMissedCleavages = *boost::range::max_element(sip.enzymes.enzymes | boost::adaptors::transformed(EnzymePtr_missedCleavages()));
77  BOOST_FOREACH(const EnzymePtr& ez, sip.enzymes.enzymes)
78  {
79  ez->terminalSpecificity = (proteome::Digestion::Specificity) minSpecificity;
80  ez->missedCleavages = maxMissedCleavages;
81  }
82 
83  // pepXML doesn't map these elements
84  sip.massTable.clear();
85  sip.threshold.clear();
86  sip.databaseFilters.clear();
87  sip.databaseTranslation.reset();
88 
89  // pepXML doesn't map these attributes
90  mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->name.clear();
91  mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->version.clear();
92  mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->releaseDate.clear();
93  mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->databaseName.clear();
94 
95  // pepXML doesn't reliably store location or file format
96  string& location = mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->location;
97  location = BFS_STRING(bfs::path(location).replace_extension("").filename());
98  mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->fileFormat = CVParam();
99 
100  string& location2 = mzid.analysisCollection.spectrumIdentification[0]->searchDatabase[0]->location;
101  location2 = BFS_STRING(bfs::path(location2).replace_extension("").filename());
102 
103  // pepXML doesn't support protein sequences
104  BOOST_FOREACH(DBSequencePtr& dbSequence, mzid.sequenceCollection.dbSequences)
105  {
106  dbSequence->seq.clear();
107  dbSequence->length = 0;
108  dbSequence->id = "DBSeq_" + dbSequence->accession;
109  }
110 
111  // pepXML can only support one mass type (we pick the max mass in case one of them is 0)
112  BOOST_FOREACH(PeptidePtr& peptide, mzid.sequenceCollection.peptides)
113  BOOST_FOREACH(ModificationPtr& mod, peptide->modification)
114  mod->monoisotopicMassDelta = mod->avgMassDelta = max(mod->monoisotopicMassDelta, mod->avgMassDelta);
115 
116  // pepXML doesn't support fragment metadata
117  mzid.dataCollection.analysisData.spectrumIdentificationList[0]->fragmentationTable.clear();
118 
119  BOOST_FOREACH(SpectrumIdentificationResultPtr& sir, mzid.dataCollection.analysisData.spectrumIdentificationList[0]->spectrumIdentificationResult)
120  BOOST_FOREACH(SpectrumIdentificationItemPtr& sii, sir->spectrumIdentificationItem)
121  {
122  // pepXML doesn't support fragment metadata or mass tables
123  sii->fragmentation.clear();
124  sii->massTablePtr.reset();
125 
126  for (size_t i=0; i < sii->peptideEvidencePtr.size(); ++i)
127  {
128  PeptideEvidence& pe = *sii->peptideEvidencePtr[i];
129 
130  // pepXML does not store peptide start and end offsets
131  pe.start = pe.end = 0;
132 
133  // pepXML's alternative_proteins do not store prev/next AA or missed cleavages
134  if (i > 0)
135  pe.pre = pe.post = '?';
136  }
137  }
138 
139  // pepXML doesn't have protein assembly
142 }
void testTranslation ( const string &  str)

Definition at line 144 of file Serializer_pepXML_Test.cpp.

References unit_assert.

Referenced by testSerializeReally().

145 {
146  // test that search engine name is written using preferred name
147  unit_assert(bal::contains(str, "search_engine=\"Mascot\""));
148 
149  // test that score names are written using preferred name
150  unit_assert(bal::contains(str, "name=\"ionscore\""));
151  unit_assert(bal::contains(str, "name=\"homologyscore\""));
152  unit_assert(bal::contains(str, "name=\"identityscore\""));
153  unit_assert(bal::contains(str, "name=\"expect\""));
154  unit_assert(bal::contains(str, "name=\"an extra score\""));
155 
156  // test that nativeID is preserved
157  unit_assert(bal::contains(str, "spectrumNativeID=\"controllerType=0 controllerNumber=1 scan=420\""));
158 }
void testSerializeReally ( IdentData mzid,
const Serializer_pepXML::Config config 
)

Definition at line 160 of file Serializer_pepXML_Test.cpp.

References diff(), os_, pwiz::identdata::Serializer_pepXML::read(), pwiz::identdata::Serializer_pepXML::Config::readSpectrumQueries, pwiz::identdata::References::resolve(), testTranslation(), unit_assert, and pwiz::identdata::Serializer_pepXML::write().

Referenced by testSerialize().

161 {
162  if (os_) *os_ << "begin testSerialize" << endl;
163 
164  Serializer_pepXML serializer(config);
165  ostringstream oss;
166  serializer.write(oss, mzid, "tiny.pepXML");
167 
168  if (os_) *os_ << "oss:\n" << oss.str() << endl;
169  if (config.readSpectrumQueries)
170  testTranslation(oss.str());
171 
172  shared_ptr<istringstream> iss(new istringstream(oss.str()));
173  IdentData mzid2;
174  serializer.read(iss, mzid2);
175 
176  References::resolve(mzid2);
177 
178  Diff<IdentData, DiffConfig> diff(mzid, mzid2);
179  if (os_ && diff) *os_ << diff << endl;
180  unit_assert(!diff);
181 }
void testSerialize ( )

Definition at line 183 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::IdentData::analysisCollection, pwiz::identdata::DataCollection::analysisData, pwiz::identdata::IdentData::analysisProtocolCollection, pwiz::identdata::IdentData::dataCollection, pwiz::identdata::SequenceCollection::dbSequences, pwiz::identdata::examples::initializeBasicSpectrumIdentification(), MS_Asp_N, MS_Trypsin_P, NonSpecific, pwiz::identdata::SequenceCollection::peptideEvidence, pwiz::identdata::SequenceCollection::peptides, pwiz::identdata::AnalysisData::proteinDetectionListPtr, SemiSpecific, pwiz::identdata::IdentData::sequenceCollection, pwiz::identdata::AnalysisCollection::spectrumIdentification, pwiz::identdata::AnalysisData::spectrumIdentificationList, pwiz::identdata::AnalysisProtocolCollection::spectrumIdentificationProtocol, stripUnmappedMetadata(), and testSerializeReally().

184 {
185  IdentData mzid;
187  stripUnmappedMetadata(mzid);
189 
190 
191  // test non-specific enzyme
192  mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.clear();
193  EnzymePtr noEnzyme(new Enzyme);
194  noEnzyme->id = "ENZ_1";
195  noEnzyme->cTermGain = "OH";
196  noEnzyme->nTermGain = "H";
197  noEnzyme->missedCleavages = 2;
198  noEnzyme->minDistance = 1;
199  noEnzyme->terminalSpecificity = proteome::Digestion::NonSpecific;
200  noEnzyme->siteRegexp = "(?<=[KR])";
201  noEnzyme->enzymeName.set(MS_Trypsin_P);
202  mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.push_back(noEnzyme);
204 
205 
206  // test sense="N" enzymes
207  mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.clear();
208  EnzymePtr aspN(new Enzyme);
209  aspN->id = "ENZ_1";
210  aspN->cTermGain = "OH";
211  aspN->nTermGain = "H";
212  aspN->missedCleavages = 2;
213  aspN->minDistance = 1;
214  aspN->terminalSpecificity = proteome::Digestion::FullySpecific;
215  aspN->siteRegexp = "(?=[BD])";
216  aspN->enzymeName.set(MS_Asp_N);
217  mzid.analysisProtocolCollection.spectrumIdentificationProtocol[0]->enzymes.enzymes.push_back(aspN);
219 
220  aspN->missedCleavages = 4;
221  aspN->minDistance = 2;
222  aspN->terminalSpecificity = proteome::Digestion::SemiSpecific;
223  aspN->siteRegexp = "(?=[BND])";
224  aspN->enzymeName.clear();
225  aspN->enzymeName.userParams.push_back(UserParam("custom"));
227 
228 
229  // test with readSpectrumQueries == false
230 
231  // clear the original SequenceCollection
232  mzid.sequenceCollection.dbSequences.clear();
233  mzid.sequenceCollection.peptides.clear();
234  mzid.sequenceCollection.peptideEvidence.clear();
235 
236  // clear the original analysis data
237  mzid.analysisCollection.spectrumIdentification[0]->inputSpectra[0]->spectrumIDFormat = CVParam();
238  mzid.analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr.reset();
241 
243 }
void testPepXMLSpecificity ( )

Definition at line 245 of file Serializer_pepXML_Test.cpp.

References pwiz::data::ParamContainer::clear(), pwiz::identdata::cleavageAgent(), pwiz::identdata::PepXMLSpecificity::cut, pwiz::identdata::Enzyme::enzymeName, pwiz::proteome::Digestion::getCleavageAgentRegex(), pwiz::proteome::Digestion::getCleavageAgents(), MS_Asp_N, MS_Trypsin, MS_Trypsin_P, pwiz::identdata::Identifiable::name, pwiz::identdata::PepXMLSpecificity::no_cut, pwiz::identdata::pepXMLSpecificity(), pwiz::identdata::PepXMLSpecificity::sense, pwiz::data::ParamContainer::set(), pwiz::identdata::Enzyme::siteRegexp, unit_assert, unit_assert_operator_equal, and pwiz::data::ParamContainer::userParams.

Referenced by main().

246 {
247  PepXMLSpecificity result;
248  Enzyme ez;
249 
251  result = pepXMLSpecificity(ez);
252  unit_assert_operator_equal("C", result.sense);
253  unit_assert_operator_equal("KR", result.cut);
254  unit_assert_operator_equal("P", result.no_cut);
255 
256  ez.enzymeName.clear();
258  result = pepXMLSpecificity(ez);
259  unit_assert_operator_equal("C", result.sense);
260  unit_assert_operator_equal("KR", result.cut);
262 
263  ez.enzymeName.clear();
264  ez.enzymeName.userParams.push_back(UserParam("trypsin/p"));
265  result = pepXMLSpecificity(ez);
266  unit_assert_operator_equal("C", result.sense);
267  unit_assert_operator_equal("KR", result.cut);
269 
270  ez.enzymeName.clear();
271  ez.name = "trypsin/p";
272  result = pepXMLSpecificity(ez);
273  unit_assert_operator_equal("C", result.sense);
274  unit_assert_operator_equal("KR", result.cut);
276 
277  ez.name.clear();
278  ez.enzymeName.set(MS_Asp_N);
279  result = pepXMLSpecificity(ez);
280  unit_assert_operator_equal("N", result.sense);
281  unit_assert_operator_equal("BD", result.cut);
283 
284  ez.enzymeName.clear();
286  result = pepXMLSpecificity(ez);
287  unit_assert_operator_equal("C", result.sense);
288  unit_assert_operator_equal("KR", result.cut);
289  unit_assert_operator_equal("P", result.no_cut);
290 
292  result = pepXMLSpecificity(ez);
293  unit_assert_operator_equal("C", result.sense);
294  unit_assert_operator_equal("KR", result.cut);
296 
298  result = pepXMLSpecificity(ez);
299  unit_assert_operator_equal("N", result.sense);
300  unit_assert_operator_equal("BD", result.cut);
302 
303 
304  // REMEMBER: update the pepXMLSpecificity function when new CV enzymes are added
305  bool allCleavageAgentsHandled = true;
306  ez.siteRegexp.clear();
308  try
309  {
310  ez.enzymeName.clear();
311  ez.enzymeName.set(cleavageAgent);
312  result = pepXMLSpecificity(ez);
313  }
314  catch (exception& e)
315  {
316  cerr << e.what() << endl;
317  allCleavageAgentsHandled = false;
318  }
319  unit_assert(allCleavageAgentsHandled);
320 
321 
322  ez.siteRegexp = "(?<=[QWERTY])(?=[QWERTY])";
323  result = pepXMLSpecificity(ez);
324  unit_assert_operator_equal("C", result.sense);
325  unit_assert_operator_equal("QWERTY", result.cut);
326  unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.no_cut);
327 
328  ez.siteRegexp = "(?<![QWERTY])(?![QWERTY])";
329  result = pepXMLSpecificity(ez);
330  unit_assert_operator_equal("C", result.sense);
331  unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
332  unit_assert_operator_equal("QWERTY", result.no_cut);
333 
334  ez.siteRegexp = "(?<=[QWERTY])";
335  result = pepXMLSpecificity(ez);
336  unit_assert_operator_equal("C", result.sense);
337  unit_assert_operator_equal("QWERTY", result.cut);
339 
340  ez.siteRegexp = "(?=[QWERTY])";
341  result = pepXMLSpecificity(ez);
342  unit_assert_operator_equal("N", result.sense);
343  unit_assert_operator_equal("QWERTY", result.cut);
345 
346  ez.siteRegexp = "(?<![QWERTY])";
347  result = pepXMLSpecificity(ez);
348  unit_assert_operator_equal("C", result.sense);
349  unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
351 
352  ez.siteRegexp = "(?![QWERTY])";
353  result = pepXMLSpecificity(ez);
354  unit_assert_operator_equal("N", result.sense);
355  unit_assert_operator_equal("ABCDFGHIJKLMNOPSUVZ", result.cut);
357 }
void testStripChargeFromConventionalSpectrumId ( )

Definition at line 360 of file Serializer_pepXML_Test.cpp.

References pwiz::identdata::stripChargeFromConventionalSpectrumId(), and unit_assert_operator_equal.

Referenced by main().

361 {
362  unit_assert_operator_equal("basename.123.123", stripChargeFromConventionalSpectrumId("basename.123.123.2"));
363  unit_assert_operator_equal("basename.ext.123.123", stripChargeFromConventionalSpectrumId("basename.ext.123.123.12"));
364  unit_assert_operator_equal("basename.2.2", stripChargeFromConventionalSpectrumId("basename.2.2.2"));
365  unit_assert_operator_equal("basename.ext.3.3", stripChargeFromConventionalSpectrumId("basename.ext.3.3.3"));
366  unit_assert_operator_equal("basename.123.123", stripChargeFromConventionalSpectrumId("basename.123.123"));
367  unit_assert_operator_equal("basename.ext.123.123", stripChargeFromConventionalSpectrumId("basename.ext.123.123"));
368  unit_assert_operator_equal("locus:1.1.1.123", stripChargeFromConventionalSpectrumId("locus:1.1.1.123.2"));
369  unit_assert_operator_equal("basename.123", stripChargeFromConventionalSpectrumId("basename.123"));
371 }
int main ( int  argc,
char **  argv 
)

Definition at line 374 of file Serializer_pepXML_Test.cpp.

References os_, TEST_EPILOG, TEST_FAILED, TEST_PROLOG, testPepXMLSpecificity(), testSerialize(), and testStripChargeFromConventionalSpectrumId().

375 {
376  TEST_PROLOG(argc, argv)
377 
378  try
379  {
380  if (argc>1 && !strcmp(argv[1],"-v")) os_ = &cout;
383  testSerialize();
384  }
385  catch (exception& e)
386  {
387  TEST_FAILED(e.what())
388  }
389  catch (...)
390  {
391  TEST_FAILED("Caught unknown exception.")
392  }
393 
395 }

Variable Documentation

ostream* os_ = 0

Definition at line 43 of file Serializer_pepXML_Test.cpp.