mlpack  1.0.12
lsh_search.hpp
Go to the documentation of this file.
1 
30 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
31 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
32 
33 #include <mlpack/core.hpp>
34 #include <vector>
35 #include <string>
36 
39 
40 namespace mlpack {
41 namespace neighbor {
42 
50 template<typename SortPolicy = NearestNeighborSort>
51 class LSHSearch
52 {
53  public:
75  LSHSearch(const arma::mat& referenceSet,
76  const arma::mat& querySet,
77  const size_t numProj,
78  const size_t numTables,
79  const double hashWidth = 0.0,
80  const size_t secondHashSize = 99901,
81  const size_t bucketSize = 500);
82 
103  LSHSearch(const arma::mat& referenceSet,
104  const size_t numProj,
105  const size_t numTables,
106  const double hashWidth = 0.0,
107  const size_t secondHashSize = 99901,
108  const size_t bucketSize = 500);
109 
128  void Search(const size_t k,
129  arma::Mat<size_t>& resultingNeighbors,
130  arma::mat& distances,
131  const size_t numTablesToSearch = 0);
132 
133  // Returns a string representation of this object.
134  std::string ToString() const;
135 
136  private:
150  void BuildHash();
151 
163  void ReturnIndicesFromTable(const size_t queryIndex,
164  arma::uvec& referenceIndices,
165  size_t numTablesToSearch);
166 
174  double BaseCase(const size_t queryIndex, const size_t referenceIndex);
175 
188  void InsertNeighbor(const size_t queryIndex, const size_t pos,
189  const size_t neighbor, const double distance);
190 
192  const arma::mat& referenceSet;
193 
195  const arma::mat& querySet;
196 
198  const size_t numProj;
199 
201  const size_t numTables;
202 
204  std::vector<arma::mat> projections; // should be [numProj x dims] x numTables
205 
207  arma::mat offsets; // should be numProj x numTables
208 
210  double hashWidth;
211 
213  const size_t secondHashSize;
214 
216  arma::vec secondHashWeights;
217 
219  const size_t bucketSize;
220 
223 
225  arma::Mat<size_t> secondHashTable;
226 
229  arma::Col<size_t> bucketContentSize;
230 
233  arma::Col<size_t> bucketRowInHashTable;
234 
236  arma::mat* distancePtr;
237 
239  arma::Mat<size_t>* neighborPtr;
240 }; // class LSHSearch
241 
242 }; // namespace neighbor
243 }; // namespace mlpack
244 
245 // Include implementation.
246 #include "lsh_search_impl.hpp"
247 
248 #endif
const arma::mat & referenceSet
Reference dataset.
Definition: lsh_search.hpp:192
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: load.hpp:23
const arma::mat & querySet
Query dataset (may not be given).
Definition: lsh_search.hpp:195
void BuildHash()
This function builds a hash table with two levels of hashing as presented in the paper.
const size_t secondHashSize
The big prime representing the size of the second hash.
Definition: lsh_search.hpp:213
metric::SquaredEuclideanDistance metric
Instantiation of the metric.
Definition: lsh_search.hpp:222
std::vector< arma::mat > projections
The std::vector containing the projection matrix of each table.
Definition: lsh_search.hpp:204
const size_t bucketSize
The bucket size of the second hash.
Definition: lsh_search.hpp:219
arma::Col< size_t > bucketContentSize
The number of elements present in each hash bucket; should be secondHashSize.
Definition: lsh_search.hpp:229
arma::Mat< size_t > * neighborPtr
The pointer to the nearest neighbor indices.
Definition: lsh_search.hpp:239
double BaseCase(const size_t queryIndex, const size_t referenceIndex)
This is a helper function that computes the distance of the query to the neighbor candidates and appr...
The LSHSearch class – This class builds a hash on the reference set and uses this hash to compute th...
Definition: lsh_search.hpp:51
const size_t numProj
The number of projections.
Definition: lsh_search.hpp:198
std::string ToString() const
arma::mat offsets
The list of the offset 'b' for each of the projection for each table.
Definition: lsh_search.hpp:207
The L_p metric for arbitrary integer p, with an option to take the root.
Definition: lmetric.hpp:65
LSHSearch(const arma::mat &referenceSet, const arma::mat &querySet, const size_t numProj, const size_t numTables, const double hashWidth=0.0, const size_t secondHashSize=99901, const size_t bucketSize=500)
This function initializes the LSH class.
void InsertNeighbor(const size_t queryIndex, const size_t pos, const size_t neighbor, const double distance)
This is a helper function that efficiently inserts better neighbor candidates into an existing set of...
const size_t numTables
The number of hash tables.
Definition: lsh_search.hpp:201
arma::Mat< size_t > secondHashTable
The final hash table; should be (< secondHashSize) x bucketSize.
Definition: lsh_search.hpp:225
void ReturnIndicesFromTable(const size_t queryIndex, arma::uvec &referenceIndices, size_t numTablesToSearch)
This function takes a query and hashes it into each of the hash tables to get keys for the query and ...
arma::vec secondHashWeights
The weights of the second hash.
Definition: lsh_search.hpp:216
void Search(const size_t k, arma::Mat< size_t > &resultingNeighbors, arma::mat &distances, const size_t numTablesToSearch=0)
Compute the nearest neighbors and store the output in the given matrices.
double hashWidth
The hash width.
Definition: lsh_search.hpp:210
arma::Col< size_t > bucketRowInHashTable
For a particular hash value, points to the row in secondHashTable corresponding to this value...
Definition: lsh_search.hpp:233
arma::mat * distancePtr
The pointer to the nearest neighbor distances.
Definition: lsh_search.hpp:236