mlpack  1.0.12
dtree.hpp
Go to the documentation of this file.
1 
15 #ifndef __MLPACK_METHODS_DET_DTREE_HPP
16 #define __MLPACK_METHODS_DET_DTREE_HPP
17 
18 #include <mlpack/core.hpp>
19 
20 namespace mlpack {
21 namespace det {
22 
46 class DTree
47 {
48  public:
52  DTree();
53 
62  DTree(const arma::vec& maxVals,
63  const arma::vec& minVals,
64  const size_t totalPoints);
65 
74  DTree(arma::mat& data);
75 
88  DTree(const arma::vec& maxVals,
89  const arma::vec& minVals,
90  const size_t start,
91  const size_t end,
92  const double logNegError);
93 
105  DTree(const arma::vec& maxVals,
106  const arma::vec& minVals,
107  const size_t totalPoints,
108  const size_t start,
109  const size_t end);
110 
112  ~DTree();
113 
124  double Grow(arma::mat& data,
125  arma::Col<size_t>& oldFromNew,
126  const bool useVolReg = false,
127  const size_t maxLeafSize = 10,
128  const size_t minLeafSize = 5);
129 
138  double PruneAndUpdate(const double oldAlpha,
139  const size_t points,
140  const bool useVolReg = false);
141 
147  double ComputeValue(const arma::vec& query) const;
148 
156  void WriteTree(FILE *fp, const size_t level = 0) const;
157 
165  int TagTree(const int tag = 0);
166 
173  int FindBucket(const arma::vec& query) const;
174 
180  void ComputeVariableImportance(arma::vec& importances) const;
181 
188  double LogNegativeError(const size_t totalPoints) const;
189 
193  bool WithinRange(const arma::vec& query) const;
194 
195  private:
196  // The indices in the complete set of points
197  // (after all forms of swapping in the original data
198  // matrix to align all the points in a node
199  // consecutively in the matrix. The 'old_from_new' array
200  // maps the points back to their original indices.
201 
204  size_t start;
207  size_t end;
208 
210  arma::vec maxVals;
212  arma::vec minVals;
213 
215  size_t splitDim;
216 
218  double splitValue;
219 
221  double logNegError;
222 
225 
228 
230  bool root;
231 
233  double ratio;
234 
236  double logVolume;
237 
240 
242  double alphaUpper;
243 
248 
249  public:
251  size_t Start() const { return start; }
253  size_t End() const { return end; }
255  size_t SplitDim() const { return splitDim; }
257  double SplitValue() const { return splitValue; }
259  double LogNegError() const { return logNegError; }
263  size_t SubtreeLeaves() const { return subtreeLeaves; }
266  double Ratio() const { return ratio; }
268  double LogVolume() const { return logVolume; }
270  DTree* Left() const { return left; }
272  DTree* Right() const { return right; }
274  bool Root() const { return root; }
276  double AlphaUpper() const { return alphaUpper; }
277 
279  const arma::vec& MaxVals() const { return maxVals; }
281  arma::vec& MaxVals() { return maxVals; }
282 
284  const arma::vec& MinVals() const { return minVals; }
286  arma::vec& MinVals() { return minVals; }
287 
291  std::string ToString() const;
292 
293  private:
294 
295  // Utility methods.
296 
300  bool FindSplit(const arma::mat& data,
301  size_t& splitDim,
302  double& splitValue,
303  double& leftError,
304  double& rightError,
305  const size_t minLeafSize = 5) const;
306 
310  size_t SplitData(arma::mat& data,
311  const size_t splitDim,
312  const double splitValue,
313  arma::Col<size_t>& oldFromNew) const;
314 
315 };
316 
317 }; // namespace det
318 }; // namespace mlpack
319 
320 #endif // __MLPACK_METHODS_DET_DTREE_HPP
arma::vec & MaxVals()
Modify the maximum values.
Definition: dtree.hpp:281
double ComputeValue(const arma::vec &query) const
Compute the logarithm of the density estimate of a given query point.
double LogNegativeError(const size_t totalPoints) const
Compute the log-negative-error for this point, given the total number of points in the dataset...
bool FindSplit(const arma::mat &data, size_t &splitDim, double &splitValue, double &leftError, double &rightError, const size_t minLeafSize=5) const
Find the dimension to split on.
size_t SplitDim() const
Return the split dimension of this node.
Definition: dtree.hpp:255
int FindBucket(const arma::vec &query) const
Return the tag of the leaf containing the query.
double splitValue
The split value on the splitting dimension for this node.
Definition: dtree.hpp:218
std::string ToString() const
Returns a string representation of this object.
double logNegError
log-negative-L2-error of the node.
Definition: dtree.hpp:221
void WriteTree(FILE *fp, const size_t level=0) const
Print the tree in a depth-first manner (this function is called recursively).
size_t Start() const
Return the starting index of points contained in this node.
Definition: dtree.hpp:251
~DTree()
Clean up memory allocated by the tree.
DTree * left
The left child.
Definition: dtree.hpp:245
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: load.hpp:23
arma::vec maxVals
Upper half of bounding box for this node.
Definition: dtree.hpp:210
size_t SubtreeLeaves() const
Return the number of leaves which are descendants of this node.
Definition: dtree.hpp:263
void ComputeVariableImportance(arma::vec &importances) const
Compute the variable importance of each dimension in the learned tree.
size_t End() const
Return the first index of a point not contained in this node.
Definition: dtree.hpp:253
size_t subtreeLeaves
Number of leaves of the subtree.
Definition: dtree.hpp:227
DTree * Left() const
Return the left child.
Definition: dtree.hpp:270
double LogVolume() const
Return the inverse of the volume of this node.
Definition: dtree.hpp:268
size_t start
The index of the first point in the dataset contained in this node (and its children).
Definition: dtree.hpp:204
arma::vec & MinVals()
Modify the minimum values.
Definition: dtree.hpp:286
size_t splitDim
The splitting dimension for this node.
Definition: dtree.hpp:215
const arma::vec & MaxVals() const
Return the maximum values.
Definition: dtree.hpp:279
DTree * Right() const
Return the right child.
Definition: dtree.hpp:272
double AlphaUpper() const
Return the upper part of the alpha sum.
Definition: dtree.hpp:276
double PruneAndUpdate(const double oldAlpha, const size_t points, const bool useVolReg=false)
Perform alpha pruning on a tree.
bool root
If true, this node is the root of the tree.
Definition: dtree.hpp:230
double LogNegError() const
Return the log negative error of this node.
Definition: dtree.hpp:259
DTree * right
The right child.
Definition: dtree.hpp:247
A density estimation tree is similar to both a decision tree and a space partitioning tree (like a kd...
Definition: dtree.hpp:46
double alphaUpper
Upper part of alpha sum; used for pruning.
Definition: dtree.hpp:242
double ratio
Ratio of the number of points in the node to the total number of points.
Definition: dtree.hpp:233
int bucketTag
The tag for the leaf, used for hashing points.
Definition: dtree.hpp:239
double subtreeLeavesLogNegError
Sum of the error of the leaves of the subtree.
Definition: dtree.hpp:224
double logVolume
The logarithm of the volume of the node.
Definition: dtree.hpp:236
bool WithinRange(const arma::vec &query) const
Return whether a query point is within the range of this node.
size_t SplitData(arma::mat &data, const size_t splitDim, const double splitValue, arma::Col< size_t > &oldFromNew) const
Split the data, returning the number of points left of the split.
arma::vec minVals
Lower half of bounding box for this node.
Definition: dtree.hpp:212
double SplitValue() const
Return the split value of this node.
Definition: dtree.hpp:257
double Ratio() const
Return the ratio of points in this node to the points in the whole dataset.
Definition: dtree.hpp:266
DTree()
Create an empty density estimation tree.
double Grow(arma::mat &data, arma::Col< size_t > &oldFromNew, const bool useVolReg=false, const size_t maxLeafSize=10, const size_t minLeafSize=5)
Greedily expand the tree.
bool Root() const
Return whether or not this is the root of the tree.
Definition: dtree.hpp:274
int TagTree(const int tag=0)
Index the buckets for possible usage later; this results in every leaf in the tree having a specific ...
size_t end
The index of the last point in the dataset contained in this node (and its children).
Definition: dtree.hpp:207
const arma::vec & MinVals() const
Return the minimum values.
Definition: dtree.hpp:284
double SubtreeLeavesLogNegError() const
Return the log negative error of all descendants of this node.
Definition: dtree.hpp:261