BamTools  2.4.0
Sort.h
Go to the documentation of this file.
1 // ***************************************************************************
2 // Sort.h (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 4 April 2012 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides sorting functionality.
9 // ***************************************************************************
10 
11 #ifndef ALGORITHMS_SORT_H
12 #define ALGORITHMS_SORT_H
13 
14 #include "api/api_global.h"
15 #include "api/BamAlignment.h"
16 #include "api/BamReader.h"
17 #include "api/BamMultiReader.h"
18 #include <cassert>
19 #include <algorithm>
20 #include <functional>
21 #include <string>
22 #include <vector>
23 
24 namespace BamTools {
25 namespace Algorithms {
26 
30 struct API_EXPORT Sort {
31 
33  enum Order { AscendingOrder = 0
34  , DescendingOrder
35  };
36 
42  template<typename ElemType>
43  static inline bool sort_helper(const Sort::Order& order, const ElemType& lhs, const ElemType& rhs) {
44  switch ( order ) {
45  case ( Sort::AscendingOrder ) : { std::less<ElemType> comp; return comp(lhs, rhs); }
46  case ( Sort::DescendingOrder ) : { std::greater<ElemType> comp; return comp(lhs, rhs); }
47  default : BT_ASSERT_UNREACHABLE;
48  }
49  return false; // <-- unreachable
50  }
51 
53  typedef std::binary_function<BamAlignment, BamAlignment, bool> AlignmentSortBase;
54 
71  struct ByName : public AlignmentSortBase {
72 
73  // ctor
75  : m_order(order)
76  { }
77 
78  // comparison function
80  return sort_helper(m_order, lhs.Name, rhs.Name);
81  }
82 
83  // used by BamMultiReader internals
84  static inline bool UsesCharData(void) { return true; }
85 
86  // data members
87  private:
88  const Sort::Order m_order;
89  };
90 
107  struct ByPosition : public AlignmentSortBase {
108 
109  // ctor
111  : m_order(order)
112  { }
113 
114  // comparison function
116 
117  // force unmapped aligmnents to end
118  if ( lhs.RefID == -1 ) return false;
119  if ( rhs.RefID == -1 ) return true;
120 
121  // if on same reference, sort on position
122  if ( lhs.RefID == rhs.RefID )
123  return sort_helper(m_order, lhs.Position, rhs.Position);
124 
125  // otherwise sort on reference ID
126  return sort_helper(m_order, lhs.RefID, rhs.RefID);
127  }
128 
129  // used by BamMultiReader internals
130  static inline bool UsesCharData(void) { return false; }
131 
132  // data members
133  private:
134  const Sort::Order m_order;
135  };
136 
153  template<typename T>
154  struct ByTag : public AlignmentSortBase {
155 
156  // ctor
157  ByTag(const std::string& tag,
158  const Sort::Order& order = Sort::AscendingOrder)
159  : m_tag(tag)
160  , m_order(order)
161  { }
162 
163  // comparison function
165 
166  // force alignments without tag to end
167  T lhsTagValue;
168  T rhsTagValue;
169  if ( !lhs.GetTag(m_tag, lhsTagValue) ) return false;
170  if ( !rhs.GetTag(m_tag, rhsTagValue) ) return true;
171 
172  // otherwise compare on tag values
173  return sort_helper(m_order, lhsTagValue, rhsTagValue);
174  }
175 
176  // used by BamMultiReader internals
177  static inline bool UsesCharData(void) { return true; }
178 
179  // data members
180  private:
181  const std::string m_tag;
182  const Sort::Order m_order;
183  };
184 
196  struct Unsorted : public AlignmentSortBase {
197 
198  // comparison function
200  return false; // returning false tends to retain insertion order
201  }
202 
203  // used by BamMultiReader internals
204  static inline bool UsesCharData(void) { return false; }
205  };
206 
220  template<typename Compare>
221  static inline void SortAlignments(std::vector<BamAlignment>& data,
222  const Compare& comp = Compare())
223  {
224  std::sort(data.begin(), data.end(), comp);
225  }
226 
242  template<typename Compare>
243  static inline std::vector<BamAlignment> SortAlignments(const std::vector<BamAlignment>& input,
244  const Compare& comp = Compare())
245  {
246  std::vector<BamAlignment> output(input);
247  SortAlignments(output, comp);
248  return output;
249  }
250 
271  template<typename Compare>
272  static std::vector<BamAlignment> GetSortedRegion(BamReader& reader,
273  const BamRegion& region,
274  const Compare& comp = Compare())
275  {
276  // return empty container if unable to find region
277  if ( !reader.IsOpen() ) return std::vector<BamAlignment>();
278  if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
279 
280  // iterate through region, grabbing alignments
281  BamAlignment al;
282  std::vector<BamAlignment> results;
283  while ( reader.GetNextAlignmentCore(al) )
284  results.push_back(al);
285 
286  // sort & return alignments
287  SortAlignments(results, comp);
288  return results;
289  }
290 
311  template<typename Compare>
312  static std::vector<BamAlignment> GetSortedRegion(BamMultiReader& reader,
313  const BamRegion& region,
314  const Compare& comp = Compare())
315  {
316  // return empty container if unable to find region
317  if ( !reader.HasOpenReaders() ) return std::vector<BamAlignment>();
318  if ( !reader.SetRegion(region) ) return std::vector<BamAlignment>();
319 
320  // iterate through region, grabbing alignments
321  BamAlignment al;
322  std::vector<BamAlignment> results;
323  while ( reader.GetNextAlignmentCore(al) )
324  results.push_back(al);
325 
326  // sort & return alignments
327  SortAlignments(results, comp);
328  return results;
329  }
330 };
331 
332 } // namespace Algorithms
333 } // namespace BamTools
334 
335 #endif // ALGORITHMS_SORT_H
static std::vector< BamAlignment > GetSortedRegion(BamMultiReader &reader, const BamRegion &region, const Compare &comp=Compare())
Definition: Sort.h:312
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs)
Definition: Sort.h:79
The main BAM alignment data structure.
Definition: BamAlignment.h:32
Function object for comparing alignments by tag value.
Definition: Sort.h:154
ByTag(const std::string &tag, const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:157
static std::vector< BamAlignment > GetSortedRegion(BamReader &reader, const BamRegion &region, const Compare &comp=Compare())
Definition: Sort.h:272
Represents a sequential genomic region.
Definition: BamAux.h:88
bool IsOpen(void) const
Returns true if a BAM file is open for reading.
Definition: BamReader.cpp:221
Function object for comparing alignments by name.
Definition: Sort.h:71
bool operator()(const BamTools::BamAlignment &, const BamTools::BamAlignment &)
Definition: Sort.h:199
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs)
Definition: Sort.h:164
bool SetRegion(const BamRegion &region)
Sets a target region of interest.
Definition: BamMultiReader.cpp:391
bool GetTag(const std::string &tag, T &destination) const
Definition: BamAlignment.h:416
Provides read access to BAM files.
Definition: BamReader.h:25
#define API_EXPORT
Definition: api_global.h:18
bool GetNextAlignmentCore(BamAlignment &alignment)
Retrieves next available alignment, without populating the alignment's string data fields...
Definition: BamReader.cpp:181
static void SortAlignments(std::vector< BamAlignment > &data, const Compare &comp=Compare())
Definition: Sort.h:221
Function object for comparing alignments by position.
Definition: Sort.h:107
bool HasOpenReaders(void) const
Returns true if there are any open BAM files.
Definition: BamMultiReader.cpp:231
int32_t Position
position (0-based) where alignment starts
Definition: BamAlignment.h:124
bool operator()(const BamTools::BamAlignment &lhs, const BamTools::BamAlignment &rhs)
Definition: Sort.h:115
std::string Name
read name
Definition: BamAlignment.h:117
Placeholder function object.
Definition: Sort.h:196
Provides classes & methods related to sorting BamAlignments.
Definition: Sort.h:30
static std::vector< BamAlignment > SortAlignments(const std::vector< BamAlignment > &input, const Compare &comp=Compare())
Definition: Sort.h:243
ByPosition(const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:110
bool SetRegion(const BamRegion &region)
Sets a target region of interest.
Definition: BamReader.cpp:353
bool GetNextAlignmentCore(BamAlignment &alignment)
Retrieves next available alignment.
Definition: BamMultiReader.cpp:188
Contains all BamTools classes & methods.
Definition: Sort.h:24
ByName(const Sort::Order &order=Sort::AscendingOrder)
Definition: Sort.h:74
int32_t RefID
ID number for reference sequence.
Definition: BamAlignment.h:123
Order
Provides explicit values for specifying desired sort ordering.
Definition: Sort.h:33
Convenience class for reading multiple BAM files.
Definition: BamMultiReader.h:26