mdds
segment_tree.hpp
1 /*************************************************************************
2  *
3  * Copyright (c) 2010-2015 Kohei Yoshida
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  ************************************************************************/
27 
28 #ifndef INCLUDED_MDDS_SEGMENTTREE_HPP
29 #define INCLUDED_MDDS_SEGMENTTREE_HPP
30 
31 #include "mdds/node.hpp"
32 #include "mdds/global.hpp"
33 
34 #include <vector>
35 #include <list>
36 #include <iostream>
37 #include <map>
38 #include <unordered_map>
39 #include <memory>
40 
41 #ifdef MDDS_UNIT_TEST
42 #include <sstream>
43 #endif
44 
45 namespace mdds {
46 
47 template<typename _Key, typename _Value>
48 class rectangle_set;
49 
50 template<typename _Key, typename _Value>
52 {
53  friend class rectangle_set<_Key, _Value>;
54 public:
55  typedef _Key key_type;
56  typedef _Value value_type;
57  typedef size_t size_type;
58  typedef ::std::vector<value_type> search_result_type;
59 
60 #ifdef MDDS_UNIT_TEST
61  struct segment_data
62  {
63  key_type begin_key;
64  key_type end_key;
65  value_type pdata;
66 
67  segment_data(key_type _beg, key_type _end, value_type p) :
68  begin_key(_beg), end_key(_end), pdata(p) {}
69 
70  bool operator==(const segment_data& r) const
71  {
72  return begin_key == r.begin_key && end_key == r.end_key && pdata == r.pdata;
73  }
74 
75  bool operator!=(const segment_data& r) const
76  {
77  return !operator==(r);
78  }
79  };
80 
81  struct segment_map_printer : public ::std::unary_function< ::std::pair<value_type, ::std::pair<key_type, key_type> >, void>
82  {
83  void operator() (const ::std::pair<value_type, ::std::pair<key_type, key_type> >& r) const
84  {
85  using namespace std;
86  cout << r.second.first << "-" << r.second.second << ": " << r.first->name << endl;
87  }
88  };
89 #endif
90 
91 public:
92  typedef ::std::vector<value_type> data_chain_type;
93  typedef std::unordered_map<value_type, ::std::pair<key_type, key_type> > segment_map_type;
94  typedef ::std::map<value_type, ::std::pair<key_type, key_type> > sorted_segment_map_type;
95 
97  {
98  key_type low;
99  key_type high;
100  data_chain_type* data_chain;
101 
102  bool operator== (const nonleaf_value_type& r) const
103  {
104  return low == r.low && high == r.high && data_chain == r.data_chain;
105  }
106  };
107 
109  {
110  key_type key;
111  data_chain_type* data_chain;
112 
113  bool operator== (const leaf_value_type& r) const
114  {
115  return key == r.key && data_chain == r.data_chain;
116  }
117  };
118 
120  struct init_handler;
121  struct dispose_handler;
122 #ifdef MDDS_UNIT_TEST
123  struct to_string_handler;
124 #endif
125 
127  typedef typename node::node_ptr node_ptr;
128 
130 
132  {
133  void operator() (__st::nonleaf_node<segment_tree>& _self, const __st::node_base* left_node, const __st::node_base* right_node)
134  {
135  // Parent node should carry the range of all of its child nodes.
136  if (left_node)
137  {
138  _self.value_nonleaf.low = left_node->is_leaf ?
139  static_cast<const node*>(left_node)->value_leaf.key :
140  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.low;
141  }
142  else
143  {
144  // Having a left node is prerequisite.
145  throw general_error("segment_tree::fill_nonleaf_value_handler: Having a left node is prerequisite.");
146  }
147 
148  if (right_node)
149  {
150  if (right_node->is_leaf)
151  {
152  // When the child nodes are leaf nodes, the upper bound
153  // must be the value of the node that comes after the
154  // right leaf node (if such node exists).
155 
156  const node* p = static_cast<const node*>(right_node);
157  if (p->next)
158  _self.value_nonleaf.high = p->next->value_leaf.key;
159  else
160  _self.value_nonleaf.high = p->value_leaf.key;
161  }
162  else
163  {
164  _self.value_nonleaf.high = static_cast<const nonleaf_node*>(right_node)->value_nonleaf.high;
165  }
166  }
167  else
168  {
169  _self.value_nonleaf.high = left_node->is_leaf ?
170  static_cast<const node*>(left_node)->value_leaf.key :
171  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.high;
172  }
173  }
174  };
175 
176 #ifdef MDDS_UNIT_TEST
177  struct to_string_handler
178  {
179  std::string operator() (const node& _self) const
180  {
181  std::ostringstream os;
182  os << "[" << _self.value_leaf.key << "] ";
183  return os.str();
184  }
185 
186  std::string operator() (const __st::nonleaf_node<segment_tree>& _self) const
187  {
188  std::ostringstream os;
189  os << "[" << _self.value_nonleaf.low << "-" << _self.value_nonleaf.high << ")";
190  if (_self.value_nonleaf.data_chain)
191  {
192  os << " { ";
193  typename data_chain_type::const_iterator
194  itr,
195  itr_beg = _self.value_nonleaf.data_chain->begin(),
196  itr_end = _self.value_nonleaf.data_chain->end();
197  for (itr = itr_beg; itr != itr_end; ++itr)
198  {
199  if (itr != itr_beg)
200  os << ", ";
201  os << (*itr)->name;
202  }
203  os << " }";
204  }
205  os << " ";
206  return os.str();
207  }
208  };
209 #endif
210 
212  {
213  void operator() (node& _self)
214  {
215  _self.value_leaf.data_chain = nullptr;
216  }
217 
218  void operator() (__st::nonleaf_node<segment_tree>& _self)
219  {
220  _self.value_nonleaf.data_chain = nullptr;
221  }
222  };
223 
225  {
226  void operator() (node& _self)
227  {
228  delete _self.value_leaf.data_chain;
229  }
230 
231  void operator() (__st::nonleaf_node<segment_tree>& _self)
232  {
233  delete _self.value_nonleaf.data_chain;
234  }
235  };
236 
237 #ifdef MDDS_UNIT_TEST
238  struct node_printer : public ::std::unary_function<const __st::node_base*, void>
239  {
240  void operator() (const __st::node_base* p) const
241  {
242  if (p->is_leaf)
243  std::cout << static_cast<const node*>(p)->to_string() << " ";
244  else
245  std::cout << static_cast<const nonleaf_node*>(p)->to_string() << " ";
246  }
247  };
248 #endif
249 
250 private:
251 
256  class search_result_base
257  {
258  public:
259  typedef std::vector<data_chain_type*> res_chains_type;
260  typedef std::shared_ptr<res_chains_type> res_chains_ptr;
261  public:
262 
263  search_result_base() :
264  mp_res_chains(static_cast<res_chains_type*>(nullptr)) {}
265 
266  search_result_base(const search_result_base& r) :
267  mp_res_chains(r.mp_res_chains) {}
268 
269  size_t size() const
270  {
271  size_t combined = 0;
272  if (!mp_res_chains)
273  return combined;
274 
275  typename res_chains_type::const_iterator
276  itr = mp_res_chains->begin(), itr_end = mp_res_chains->end();
277  for (; itr != itr_end; ++itr)
278  combined += (*itr)->size();
279  return combined;
280  }
281 
282  void push_back_chain(data_chain_type* chain)
283  {
284  if (!chain || chain->empty())
285  return;
286 
287  if (!mp_res_chains)
288  mp_res_chains.reset(new res_chains_type);
289  mp_res_chains->push_back(chain);
290  }
291 
292  res_chains_ptr& get_res_chains() { return mp_res_chains; }
293 
294  private:
295  res_chains_ptr mp_res_chains;
296  };
297 
298  class iterator_base
299  {
300  protected:
301  typedef typename search_result_base::res_chains_type res_chains_type;
302  typedef typename search_result_base::res_chains_ptr res_chains_ptr;
303 
304  iterator_base(const res_chains_ptr& p) :
305  mp_res_chains(p), m_end_pos(true) {}
306 
307  public:
308  typedef ::std::bidirectional_iterator_tag iterator_category;
309  typedef typename data_chain_type::value_type value_type;
310  typedef typename data_chain_type::pointer pointer;
311  typedef typename data_chain_type::reference reference;
312  typedef typename data_chain_type::difference_type difference_type;
313 
314  iterator_base() :
315  mp_res_chains(static_cast<res_chains_type*>(nullptr)), m_end_pos(true) {}
316 
317  iterator_base(const iterator_base& r) :
318  mp_res_chains(r.mp_res_chains),
319  m_cur_chain(r.m_cur_chain),
320  m_cur_pos_in_chain(r.m_cur_pos_in_chain),
321  m_end_pos(r.m_end_pos) {}
322 
323  iterator_base& operator= (const iterator_base& r)
324  {
325  mp_res_chains = r.mp_res_chains;
326  m_cur_chain = r.m_cur_chain;
327  m_cur_pos_in_chain = r.m_cur_pos_in_chain;
328  m_end_pos = r.m_end_pos;
329  return *this;
330  }
331 
332  typename data_chain_type::value_type* operator++ ()
333  {
334  // We don't check for end position flag for performance reasons.
335  // The caller is responsible for making sure not to increment past
336  // end position.
337 
338  // When reaching the end position, the internal iterators still
339  // need to be pointing at the last item before the end position.
340  // This is why we need to make copies of the iterators, and copy
341  // them back once done.
342 
343  typename data_chain_type::iterator cur_pos_in_chain = m_cur_pos_in_chain;
344 
345  if (++cur_pos_in_chain == (*m_cur_chain)->end())
346  {
347  // End of current chain. Inspect the next chain if exists.
348  typename res_chains_type::iterator cur_chain = m_cur_chain;
349  ++cur_chain;
350  if (cur_chain == mp_res_chains->end())
351  {
352  m_end_pos = true;
353  return nullptr;
354  }
355  m_cur_chain = cur_chain;
356  m_cur_pos_in_chain = (*m_cur_chain)->begin();
357  }
358  else
359  ++m_cur_pos_in_chain;
360 
361  return operator->();
362  }
363 
364  typename data_chain_type::value_type* operator-- ()
365  {
366  if (!mp_res_chains)
367  return nullptr;
368 
369  if (m_end_pos)
370  {
371  m_end_pos = false;
372  return &(*m_cur_pos_in_chain);
373  }
374 
375  if (m_cur_pos_in_chain == (*m_cur_chain)->begin())
376  {
377  if (m_cur_chain == mp_res_chains->begin())
378  {
379  // Already at the first data chain. Don't move the iterator position.
380  return nullptr;
381  }
382  --m_cur_chain;
383  m_cur_pos_in_chain = (*m_cur_chain)->end();
384  }
385  --m_cur_pos_in_chain;
386  return operator->();
387  }
388 
389  bool operator== (const iterator_base& r) const
390  {
391  if (mp_res_chains.get())
392  {
393  // non-empty result set.
394  return mp_res_chains.get() == r.mp_res_chains.get() &&
395  m_cur_chain == r.m_cur_chain && m_cur_pos_in_chain == r.m_cur_pos_in_chain &&
396  m_end_pos == r.m_end_pos;
397  }
398 
399  // empty result set.
400  if (r.mp_res_chains.get())
401  return false;
402  return m_end_pos == r.m_end_pos;
403  }
404 
405  bool operator!= (const iterator_base& r) const { return !operator==(r); }
406 
407  typename data_chain_type::value_type& operator*()
408  {
409  return *m_cur_pos_in_chain;
410  }
411 
412  typename data_chain_type::value_type* operator->()
413  {
414  return &(*m_cur_pos_in_chain);
415  }
416 
417  protected:
418  void move_to_front()
419  {
420  if (!mp_res_chains)
421  {
422  // Empty data set.
423  m_end_pos = true;
424  return;
425  }
426 
427  // We assume that there is at least one chain list, and no
428  // empty chain list exists. So, skip the check.
429  m_cur_chain = mp_res_chains->begin();
430  m_cur_pos_in_chain = (*m_cur_chain)->begin();
431  m_end_pos = false;
432  }
433 
434  void move_to_end()
435  {
436  m_end_pos = true;
437  if (!mp_res_chains)
438  // Empty data set.
439  return;
440 
441  m_cur_chain = mp_res_chains->end();
442  --m_cur_chain;
443  m_cur_pos_in_chain = (*m_cur_chain)->end();
444  --m_cur_pos_in_chain;
445  }
446 
447  private:
448  res_chains_ptr mp_res_chains;
449  typename res_chains_type::iterator m_cur_chain;
450  typename data_chain_type::iterator m_cur_pos_in_chain;
451  bool m_end_pos:1;
452  };
453 
454 public:
455 
456  class search_result : public search_result_base
457  {
458  typedef typename search_result_base::res_chains_type res_chains_type;
459  typedef typename search_result_base::res_chains_ptr res_chains_ptr;
460  public:
461 
462  class iterator : public iterator_base
463  {
464  friend class segment_tree<_Key,_Value>::search_result;
465  private:
466  iterator(const res_chains_ptr& p) : iterator_base(p) {}
467  public:
468  iterator() : iterator_base() {}
469  };
470 
471  typename search_result::iterator begin()
472  {
473  typename search_result::iterator itr(search_result_base::get_res_chains());
474  itr.move_to_front();
475  return itr;
476  }
477 
478  typename search_result::iterator end()
479  {
480  typename search_result::iterator itr(search_result_base::get_res_chains());
481  itr.move_to_end();
482  return itr;
483  }
484  };
485 
486  class search_result_vector_inserter : public ::std::unary_function<data_chain_type*, void>
487  {
488  public:
489  search_result_vector_inserter(search_result_type& result) : m_result(result) {}
490  void operator() (data_chain_type* node_data)
491  {
492  if (!node_data)
493  return;
494 
495  typename data_chain_type::const_iterator itr = node_data->begin(), itr_end = node_data->end();
496  for (; itr != itr_end; ++itr)
497  m_result.push_back(*itr);
498  }
499  private:
500  search_result_type& m_result;
501  };
502 
503  class search_result_inserter : public ::std::unary_function<data_chain_type*, void>
504  {
505  public:
506  search_result_inserter(search_result_base& result) : m_result(result) {}
507  void operator() (data_chain_type* node_data)
508  {
509  if (!node_data)
510  return;
511 
512  m_result.push_back_chain(node_data);
513  }
514  private:
515  search_result_base& m_result;
516  };
517 
518  segment_tree();
519  segment_tree(const segment_tree& r);
520  ~segment_tree();
521 
526  bool operator==(const segment_tree& r) const;
527 
528  bool operator!=(const segment_tree& r) const { return !operator==(r); }
529 
536  bool is_tree_valid() const { return m_valid_tree; }
537 
541  void build_tree();
542 
552  bool insert(key_type begin_key, key_type end_key, value_type pdata);
553 
570  bool search(key_type point, search_result_type& result) const;
571 
581  search_result search(key_type point) const;
582 
590  void remove(value_type value);
591 
595  void clear();
596 
600  size_t size() const;
601 
605  bool empty() const;
606 
612  size_t leaf_size() const;
613 
614 #ifdef MDDS_UNIT_TEST
615  void dump_tree() const;
616  void dump_leaf_nodes() const;
617  void dump_segment_data() const;
618  bool verify_node_lists() const;
619 
620  struct leaf_node_check
621  {
622  key_type key;
623  data_chain_type data_chain;
624  };
625 
626  bool verify_leaf_nodes(const ::std::vector<leaf_node_check>& checks) const;
627 
634  bool verify_segment_data(const segment_map_type& checks) const;
635 #endif
636 
637 private:
641  void search(key_type point, search_result_base& result) const;
642 
643  typedef std::vector<__st::node_base*> node_list_type;
644  typedef std::map<value_type, std::unique_ptr<node_list_type>> data_node_map_type;
645 
646  static void create_leaf_node_instances(const ::std::vector<key_type>& keys, node_ptr& left, node_ptr& right);
647 
653  void descend_tree_and_mark(
654  __st::node_base* pnode, value_type pdata, key_type begin_key, key_type end_key, node_list_type* plist);
655 
656  void build_leaf_nodes();
657 
662  void remove_data_from_nodes(node_list_type* plist, const value_type pdata);
663  void remove_data_from_chain(data_chain_type& chain, const value_type pdata);
664 
665  void clear_all_nodes();
666 
667 #ifdef MDDS_UNIT_TEST
668  static bool has_data_pointer(const node_list_type& node_list, const value_type pdata);
669  static void print_leaf_value(const leaf_value_type& v);
670 #endif
671 
672 private:
673  std::vector<nonleaf_node> m_nonleaf_node_pool;
674 
675  segment_map_type m_segment_data;
676 
682  data_node_map_type m_tagged_node_map;
683 
684  nonleaf_node* m_root_node;
685  node_ptr m_left_leaf;
686  node_ptr m_right_leaf;
687  bool m_valid_tree:1;
688 };
689 
690 }
691 
692 #include "segment_tree_def.inl"
693 
694 #endif
Definition: segment_tree.hpp:108
bool is_leaf
parent nonleaf_node
Definition: node.hpp:46
Definition: segment_tree.hpp:51
Definition: segment_tree.hpp:462
size_t size() const
bool insert(key_type begin_key, key_type end_key, value_type pdata)
bool operator==(const segment_tree &r) const
size_t leaf_size() const
Definition: node.hpp:43
Definition: node.hpp:53
bool is_tree_valid() const
Definition: segment_tree.hpp:536
Definition: rectangle_set.hpp:37
Definition: global.hpp:58
Definition: segment_tree.hpp:211
Definition: segment_tree.hpp:456
Definition: segment_tree.hpp:224
Definition: default_deleter.hpp:33
bool empty() const
data_chain_type * data_chain
high range value (non-inclusive)
Definition: segment_tree.hpp:100
bool search(key_type point, search_result_type &result) const
Definition: segment_tree.hpp:503
Definition: node.hpp:143
Definition: segment_tree.hpp:96
key_type high
low range value (inclusive)
Definition: segment_tree.hpp:99
node_ptr next
previous sibling leaf node.
Definition: node.hpp:166
Definition: segment_tree.hpp:131