root / rgbdslam / gicp / ann_1.1.1 / include / ANN / ANN.h @ 9240aaa3
History | View | Annotate | Download (33.9 KB)
1 | 9240aaa3 | Alex | //----------------------------------------------------------------------
|
---|---|---|---|
2 | // File: ANN.h
|
||
3 | // Programmer: Sunil Arya and David Mount
|
||
4 | // Last modified: 05/03/05 (Release 1.1)
|
||
5 | // Description: Basic include file for approximate nearest
|
||
6 | // neighbor searching.
|
||
7 | //----------------------------------------------------------------------
|
||
8 | // Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
|
||
9 | // David Mount. All Rights Reserved.
|
||
10 | //
|
||
11 | // This software and related documentation is part of the Approximate
|
||
12 | // Nearest Neighbor Library (ANN). This software is provided under
|
||
13 | // the provisions of the Lesser GNU Public License (LGPL). See the
|
||
14 | // file ../ReadMe.txt for further information.
|
||
15 | //
|
||
16 | // The University of Maryland (U.M.) and the authors make no
|
||
17 | // representations about the suitability or fitness of this software for
|
||
18 | // any purpose. It is provided "as is" without express or implied
|
||
19 | // warranty.
|
||
20 | //----------------------------------------------------------------------
|
||
21 | // History:
|
||
22 | // Revision 0.1 03/04/98
|
||
23 | // Initial release
|
||
24 | // Revision 1.0 04/01/05
|
||
25 | // Added copyright and revision information
|
||
26 | // Added ANNcoordPrec for coordinate precision.
|
||
27 | // Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
|
||
28 | // Cleaned up C++ structure for modern compilers
|
||
29 | // Revision 1.1 05/03/05
|
||
30 | // Added fixed-radius k-NN searching
|
||
31 | //----------------------------------------------------------------------
|
||
32 | |||
33 | //----------------------------------------------------------------------
|
||
34 | // ANN - approximate nearest neighbor searching
|
||
35 | // ANN is a library for approximate nearest neighbor searching,
|
||
36 | // based on the use of standard and priority search in kd-trees
|
||
37 | // and balanced box-decomposition (bbd) trees. Here are some
|
||
38 | // references to the main algorithmic techniques used here:
|
||
39 | //
|
||
40 | // kd-trees:
|
||
41 | // Friedman, Bentley, and Finkel, ``An algorithm for finding
|
||
42 | // best matches in logarithmic expected time,'' ACM
|
||
43 | // Transactions on Mathematical Software, 3(3):209-226, 1977.
|
||
44 | //
|
||
45 | // Priority search in kd-trees:
|
||
46 | // Arya and Mount, ``Algorithms for fast vector quantization,''
|
||
47 | // Proc. of DCC '93: Data Compression Conference, eds. J. A.
|
||
48 | // Storer and M. Cohn, IEEE Press, 1993, 381-390.
|
||
49 | //
|
||
50 | // Approximate nearest neighbor search and bbd-trees:
|
||
51 | // Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
|
||
52 | // algorithm for approximate nearest neighbor searching,''
|
||
53 | // 5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
|
||
54 | // 1994, 573-582.
|
||
55 | //----------------------------------------------------------------------
|
||
56 | |||
57 | #ifndef ANN_H
|
||
58 | #define ANN_H
|
||
59 | |||
60 | #ifdef WIN32
|
||
61 | //----------------------------------------------------------------------
|
||
62 | // For Microsoft Visual C++, externally accessible symbols must be
|
||
63 | // explicitly indicated with DLL_API, which is somewhat like "extern."
|
||
64 | //
|
||
65 | // The following ifdef block is the standard way of creating macros
|
||
66 | // which make exporting from a DLL simpler. All files within this DLL
|
||
67 | // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
|
||
68 | // command line. In contrast, projects that use (or import) the DLL
|
||
69 | // objects do not define the DLL_EXPORTS symbol. This way any other
|
||
70 | // project whose source files include this file see DLL_API functions as
|
||
71 | // being imported from a DLL, wheras this DLL sees symbols defined with
|
||
72 | // this macro as being exported.
|
||
73 | //----------------------------------------------------------------------
|
||
74 | #ifdef DLL_EXPORTS
|
||
75 | #define DLL_API __declspec(dllexport)
|
||
76 | #else
|
||
77 | #define DLL_API __declspec(dllimport)
|
||
78 | #endif
|
||
79 | //----------------------------------------------------------------------
|
||
80 | // DLL_API is ignored for all other systems
|
||
81 | //----------------------------------------------------------------------
|
||
82 | #else
|
||
83 | #define DLL_API
|
||
84 | #endif
|
||
85 | |||
86 | //----------------------------------------------------------------------
|
||
87 | // basic includes
|
||
88 | //----------------------------------------------------------------------
|
||
89 | |||
90 | #include <cstdlib> // exit(int) |
||
91 | #include <cmath> // math includes |
||
92 | #include <iostream> // I/O streams |
||
93 | |||
94 | //----------------------------------------------------------------------
|
||
95 | // Limits
|
||
96 | // There are a number of places where we use the maximum double value as
|
||
97 | // default initializers (and others may be used, depending on the
|
||
98 | // data/distance representation). These can usually be found in limits.h
|
||
99 | // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
|
||
100 | //
|
||
101 | // Not all systems have these files. If you are using such a system,
|
||
102 | // you should set the preprocessor symbol ANN_NO_LIMITS_H when
|
||
103 | // compiling, and modify the statements below to generate the
|
||
104 | // appropriate value. For practical purposes, this does not need to be
|
||
105 | // the maximum double value. It is sufficient that it be at least as
|
||
106 | // large than the maximum squared distance between between any two
|
||
107 | // points.
|
||
108 | //----------------------------------------------------------------------
|
||
109 | #ifdef ANN_NO_LIMITS_H // limits.h unavailable |
||
110 | #include <cvalues> // replacement for limits.h |
||
111 | const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double |
||
112 | #else
|
||
113 | #include <climits> |
||
114 | #include <cfloat> |
||
115 | const double ANN_DBL_MAX = DBL_MAX; |
||
116 | #endif
|
||
117 | |||
118 | #define ANNversion "1.1.1" // ANN version and information |
||
119 | #define ANNversionCmt "" |
||
120 | #define ANNcopyright "David M. Mount and Sunil Arya" |
||
121 | #define ANNlatestRev "Aug 4, 2006" |
||
122 | |||
123 | //----------------------------------------------------------------------
|
||
124 | // ANNbool
|
||
125 | // This is a simple boolean type. Although ANSI C++ is supposed
|
||
126 | // to support the type bool, some compilers do not have it.
|
||
127 | //----------------------------------------------------------------------
|
||
128 | |||
129 | enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++) |
||
130 | |||
131 | //----------------------------------------------------------------------
|
||
132 | // ANNcoord, ANNdist
|
||
133 | // ANNcoord and ANNdist are the types used for representing
|
||
134 | // point coordinates and distances. They can be modified by the
|
||
135 | // user, with some care. It is assumed that they are both numeric
|
||
136 | // types, and that ANNdist is generally of an equal or higher type
|
||
137 | // from ANNcoord. A variable of type ANNdist should be large
|
||
138 | // enough to store the sum of squared components of a variable
|
||
139 | // of type ANNcoord for the number of dimensions needed in the
|
||
140 | // application. For example, the following combinations are
|
||
141 | // legal:
|
||
142 | //
|
||
143 | // ANNcoord ANNdist
|
||
144 | // --------- -------------------------------
|
||
145 | // short short, int, long, float, double
|
||
146 | // int int, long, float, double
|
||
147 | // long long, float, double
|
||
148 | // float float, double
|
||
149 | // double double
|
||
150 | //
|
||
151 | // It is the user's responsibility to make sure that overflow does
|
||
152 | // not occur in distance calculation.
|
||
153 | //----------------------------------------------------------------------
|
||
154 | |||
155 | typedef double ANNcoord; // coordinate data type |
||
156 | typedef double ANNdist; // distance data type |
||
157 | |||
158 | //----------------------------------------------------------------------
|
||
159 | // ANNidx
|
||
160 | // ANNidx is a point index. When the data structure is built, the
|
||
161 | // points are given as an array. Nearest neighbor results are
|
||
162 | // returned as an integer index into this array. To make it
|
||
163 | // clearer when this is happening, we define the integer type
|
||
164 | // ANNidx. Indexing starts from 0.
|
||
165 | //
|
||
166 | // For fixed-radius near neighbor searching, it is possible that
|
||
167 | // there are not k nearest neighbors within the search radius. To
|
||
168 | // indicate this, the algorithm returns ANN_NULL_IDX as its result.
|
||
169 | // It should be distinguishable from any valid array index.
|
||
170 | //----------------------------------------------------------------------
|
||
171 | |||
172 | typedef int ANNidx; // point index |
||
173 | const ANNidx ANN_NULL_IDX = -1; // a NULL point index |
||
174 | |||
175 | //----------------------------------------------------------------------
|
||
176 | // Infinite distance:
|
||
177 | // The code assumes that there is an "infinite distance" which it
|
||
178 | // uses to initialize distances before performing nearest neighbor
|
||
179 | // searches. It should be as larger or larger than any legitimate
|
||
180 | // nearest neighbor distance.
|
||
181 | //
|
||
182 | // On most systems, these should be found in the standard include
|
||
183 | // file <limits.h> or possibly <float.h>. If you do not have these
|
||
184 | // file, some suggested values are listed below, assuming 64-bit
|
||
185 | // long, 32-bit int and 16-bit short.
|
||
186 | //
|
||
187 | // ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>)
|
||
188 | // ------- ------------ ------------------------------------
|
||
189 | // double DBL_MAX 1.79769313486231570e+308
|
||
190 | // float FLT_MAX 3.40282346638528860e+38
|
||
191 | // long LONG_MAX 0x7fffffffffffffff
|
||
192 | // int INT_MAX 0x7fffffff
|
||
193 | // short SHRT_MAX 0x7fff
|
||
194 | //----------------------------------------------------------------------
|
||
195 | |||
196 | const ANNdist ANN_DIST_INF = ANN_DBL_MAX;
|
||
197 | |||
198 | //----------------------------------------------------------------------
|
||
199 | // Significant digits for tree dumps:
|
||
200 | // When floating point coordinates are used, the routine that dumps
|
||
201 | // a tree needs to know roughly how many significant digits there
|
||
202 | // are in a ANNcoord, so it can output points to full precision.
|
||
203 | // This is defined to be ANNcoordPrec. On most systems these
|
||
204 | // values can be found in the standard include files <limits.h> or
|
||
205 | // <float.h>. For integer types, the value is essentially ignored.
|
||
206 | //
|
||
207 | // ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>)
|
||
208 | // -------- ------------ ------------------------------------
|
||
209 | // double DBL_DIG 15
|
||
210 | // float FLT_DIG 6
|
||
211 | // long doesn't matter 19
|
||
212 | // int doesn't matter 10
|
||
213 | // short doesn't matter 5
|
||
214 | //----------------------------------------------------------------------
|
||
215 | |||
216 | #ifdef DBL_DIG // number of sig. bits in ANNcoord |
||
217 | const int ANNcoordPrec = DBL_DIG; |
||
218 | #else
|
||
219 | const int ANNcoordPrec = 15; // default precision |
||
220 | #endif
|
||
221 | |||
222 | //----------------------------------------------------------------------
|
||
223 | // Self match?
|
||
224 | // In some applications, the nearest neighbor of a point is not
|
||
225 | // allowed to be the point itself. This occurs, for example, when
|
||
226 | // computing all nearest neighbors in a set. By setting the
|
||
227 | // parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
|
||
228 | // is the closest point whose distance from the query point is
|
||
229 | // strictly positive.
|
||
230 | //----------------------------------------------------------------------
|
||
231 | |||
232 | const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue;
|
||
233 | |||
234 | //----------------------------------------------------------------------
|
||
235 | // Norms and metrics:
|
||
236 | // ANN supports any Minkowski norm for defining distance. In
|
||
237 | // particular, for any p >= 1, the L_p Minkowski norm defines the
|
||
238 | // length of a d-vector (v0, v1, ..., v(d-1)) to be
|
||
239 | //
|
||
240 | // (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
|
||
241 | //
|
||
242 | // (where ^ denotes exponentiation, and |.| denotes absolute
|
||
243 | // value). The distance between two points is defined to be the
|
||
244 | // norm of the vector joining them. Some common distance metrics
|
||
245 | // include
|
||
246 | //
|
||
247 | // Euclidean metric p = 2
|
||
248 | // Manhattan metric p = 1
|
||
249 | // Max metric p = infinity
|
||
250 | //
|
||
251 | // In the case of the max metric, the norm is computed by taking
|
||
252 | // the maxima of the absolute values of the components. ANN is
|
||
253 | // highly "coordinate-based" and does not support general distances
|
||
254 | // functions (e.g. those obeying just the triangle inequality). It
|
||
255 | // also does not support distance functions based on
|
||
256 | // inner-products.
|
||
257 | //
|
||
258 | // For the purpose of computing nearest neighbors, it is not
|
||
259 | // necessary to compute the final power (1/p). Thus the only
|
||
260 | // component that is used by the program is |v(i)|^p.
|
||
261 | //
|
||
262 | // ANN parameterizes the distance computation through the following
|
||
263 | // macros. (Macros are used rather than procedures for
|
||
264 | // efficiency.) Recall that the distance between two points is
|
||
265 | // given by the length of the vector joining them, and the length
|
||
266 | // or norm of a vector v is given by formula:
|
||
267 | //
|
||
268 | // |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
|
||
269 | //
|
||
270 | // where ROOT, POW are unary functions and # is an associative and
|
||
271 | // commutative binary operator mapping the following types:
|
||
272 | //
|
||
273 | // ** POW: ANNcoord --> ANNdist
|
||
274 | // ** #: ANNdist x ANNdist --> ANNdist
|
||
275 | // ** ROOT: ANNdist (>0) --> double
|
||
276 | //
|
||
277 | // For early termination in distance calculation (partial distance
|
||
278 | // calculation) we assume that POW and # together are monotonically
|
||
279 | // increasing on sequences of arguments, meaning that for all
|
||
280 | // v0..vk and y:
|
||
281 | //
|
||
282 | // POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
|
||
283 | //
|
||
284 | // Incremental Distance Calculation:
|
||
285 | // The program uses an optimized method of computing distances for
|
||
286 | // kd-trees and bd-trees, called incremental distance calculation.
|
||
287 | // It is used when distances are to be updated when only a single
|
||
288 | // coordinate of a point has been changed. In order to use this,
|
||
289 | // we assume that there is an incremental update function DIFF(x,y)
|
||
290 | // for #, such that if:
|
||
291 | //
|
||
292 | // s = x0 # ... # xi # ... # xk
|
||
293 | //
|
||
294 | // then if s' is equal to s but with xi replaced by y, that is,
|
||
295 | //
|
||
296 | // s' = x0 # ... # y # ... # xk
|
||
297 | //
|
||
298 | // then the length of s' can be computed by:
|
||
299 | //
|
||
300 | // |s'| = |s| # DIFF(xi,y).
|
||
301 | //
|
||
302 | // Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity
|
||
303 | // norm we make use of the fact that in the program this function
|
||
304 | // is only invoked when y > xi, and hence DIFF(xi,y)=y.
|
||
305 | //
|
||
306 | // Finally, for approximate nearest neighbor queries we assume
|
||
307 | // that POW and ROOT are related such that
|
||
308 | //
|
||
309 | // v*ROOT(x) = ROOT(POW(v)*x)
|
||
310 | //
|
||
311 | // Here are the values for the various Minkowski norms:
|
||
312 | //
|
||
313 | // L_p: p even: p odd:
|
||
314 | // ------------------------- ------------------------
|
||
315 | // POW(v) = v^p POW(v) = |v|^p
|
||
316 | // ROOT(x) = x^(1/p) ROOT(x) = x^(1/p)
|
||
317 | // # = + # = +
|
||
318 | // DIFF(x,y) = y - x DIFF(x,y) = y - x
|
||
319 | //
|
||
320 | // L_inf:
|
||
321 | // POW(v) = |v|
|
||
322 | // ROOT(x) = x
|
||
323 | // # = max
|
||
324 | // DIFF(x,y) = y
|
||
325 | //
|
||
326 | // By default the Euclidean norm is assumed. To change the norm,
|
||
327 | // uncomment the appropriate set of macros below.
|
||
328 | //----------------------------------------------------------------------
|
||
329 | |||
330 | //----------------------------------------------------------------------
|
||
331 | // Use the following for the Euclidean norm
|
||
332 | //----------------------------------------------------------------------
|
||
333 | #define ANN_POW(v) ((v)*(v))
|
||
334 | #define ANN_ROOT(x) sqrt(x)
|
||
335 | #define ANN_SUM(x,y) ((x) + (y))
|
||
336 | #define ANN_DIFF(x,y) ((y) - (x))
|
||
337 | |||
338 | //----------------------------------------------------------------------
|
||
339 | // Use the following for the L_1 (Manhattan) norm
|
||
340 | //----------------------------------------------------------------------
|
||
341 | // #define ANN_POW(v) fabs(v)
|
||
342 | // #define ANN_ROOT(x) (x)
|
||
343 | // #define ANN_SUM(x,y) ((x) + (y))
|
||
344 | // #define ANN_DIFF(x,y) ((y) - (x))
|
||
345 | |||
346 | //----------------------------------------------------------------------
|
||
347 | // Use the following for a general L_p norm
|
||
348 | //----------------------------------------------------------------------
|
||
349 | // #define ANN_POW(v) pow(fabs(v),p)
|
||
350 | // #define ANN_ROOT(x) pow(fabs(x),1/p)
|
||
351 | // #define ANN_SUM(x,y) ((x) + (y))
|
||
352 | // #define ANN_DIFF(x,y) ((y) - (x))
|
||
353 | |||
354 | //----------------------------------------------------------------------
|
||
355 | // Use the following for the L_infinity (Max) norm
|
||
356 | //----------------------------------------------------------------------
|
||
357 | // #define ANN_POW(v) fabs(v)
|
||
358 | // #define ANN_ROOT(x) (x)
|
||
359 | // #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y))
|
||
360 | // #define ANN_DIFF(x,y) (y)
|
||
361 | |||
362 | //----------------------------------------------------------------------
|
||
363 | // Array types
|
||
364 | // The following array types are of basic interest. A point is
|
||
365 | // just a dimensionless array of coordinates, a point array is a
|
||
366 | // dimensionless array of points. A distance array is a
|
||
367 | // dimensionless array of distances and an index array is a
|
||
368 | // dimensionless array of point indices. The latter two are used
|
||
369 | // when returning the results of k-nearest neighbor queries.
|
||
370 | //----------------------------------------------------------------------
|
||
371 | |||
372 | typedef ANNcoord* ANNpoint; // a point |
||
373 | typedef ANNpoint* ANNpointArray; // an array of points |
||
374 | typedef ANNdist* ANNdistArray; // an array of distances |
||
375 | typedef ANNidx* ANNidxArray; // an array of point indices |
||
376 | |||
377 | //----------------------------------------------------------------------
|
||
378 | // Basic point and array utilities:
|
||
379 | // The following procedures are useful supplements to ANN's nearest
|
||
380 | // neighbor capabilities.
|
||
381 | //
|
||
382 | // annDist():
|
||
383 | // Computes the (squared) distance between a pair of points.
|
||
384 | // Note that this routine is not used internally by ANN for
|
||
385 | // computing distance calculations. For reasons of efficiency
|
||
386 | // this is done using incremental distance calculation. Thus,
|
||
387 | // this routine cannot be modified as a method of changing the
|
||
388 | // metric.
|
||
389 | //
|
||
390 | // Because points (somewhat like strings in C) are stored as
|
||
391 | // pointers. Consequently, creating and destroying copies of
|
||
392 | // points may require storage allocation. These procedures do
|
||
393 | // this.
|
||
394 | //
|
||
395 | // annAllocPt() and annDeallocPt():
|
||
396 | // Allocate a deallocate storage for a single point, and
|
||
397 | // return a pointer to it. The argument to AllocPt() is
|
||
398 | // used to initialize all components.
|
||
399 | //
|
||
400 | // annAllocPts() and annDeallocPts():
|
||
401 | // Allocate and deallocate an array of points as well a
|
||
402 | // place to store their coordinates, and initializes the
|
||
403 | // points to point to their respective coordinates. It
|
||
404 | // allocates point storage in a contiguous block large
|
||
405 | // enough to store all the points. It performs no
|
||
406 | // initialization.
|
||
407 | //
|
||
408 | // annCopyPt():
|
||
409 | // Creates a copy of a given point, allocating space for
|
||
410 | // the new point. It returns a pointer to the newly
|
||
411 | // allocated copy.
|
||
412 | //----------------------------------------------------------------------
|
||
413 | |||
414 | DLL_API ANNdist annDist( |
||
415 | int dim, // dimension of space |
||
416 | ANNpoint p, // points
|
||
417 | ANNpoint q); |
||
418 | |||
419 | DLL_API ANNpoint annAllocPt( |
||
420 | int dim, // dimension |
||
421 | ANNcoord c = 0); // coordinate value (all equal) |
||
422 | |||
423 | DLL_API ANNpointArray annAllocPts( |
||
424 | int n, // number of points |
||
425 | int dim); // dimension |
||
426 | |||
427 | DLL_API void annDeallocPt(
|
||
428 | ANNpoint &p); // deallocate 1 point
|
||
429 | |||
430 | DLL_API void annDeallocPts(
|
||
431 | ANNpointArray &pa); // point array
|
||
432 | |||
433 | DLL_API ANNpoint annCopyPt( |
||
434 | int dim, // dimension |
||
435 | ANNpoint source); // point to copy
|
||
436 | |||
437 | //----------------------------------------------------------------------
|
||
438 | //Overall structure: ANN supports a number of different data structures
|
||
439 | //for approximate and exact nearest neighbor searching. These are:
|
||
440 | //
|
||
441 | // ANNbruteForce A simple brute-force search structure.
|
||
442 | // ANNkd_tree A kd-tree tree search structure. ANNbd_tree
|
||
443 | // A bd-tree tree search structure (a kd-tree with shrink
|
||
444 | // capabilities).
|
||
445 | //
|
||
446 | // At a minimum, each of these data structures support k-nearest
|
||
447 | // neighbor queries. The nearest neighbor query, annkSearch,
|
||
448 | // returns an integer identifier and the distance to the nearest
|
||
449 | // neighbor(s) and annRangeSearch returns the nearest points that
|
||
450 | // lie within a given query ball.
|
||
451 | //
|
||
452 | // Each structure is built by invoking the appropriate constructor
|
||
453 | // and passing it (at a minimum) the array of points, the total
|
||
454 | // number of points and the dimension of the space. Each structure
|
||
455 | // is also assumed to support a destructor and member functions
|
||
456 | // that return basic information about the point set.
|
||
457 | //
|
||
458 | // Note that the array of points is not copied by the data
|
||
459 | // structure (for reasons of space efficiency), and it is assumed
|
||
460 | // to be constant throughout the lifetime of the search structure.
|
||
461 | //
|
||
462 | // The search algorithm, annkSearch, is given the query point (q),
|
||
463 | // and the desired number of nearest neighbors to report (k), and
|
||
464 | // the error bound (eps) (whose default value is 0, implying exact
|
||
465 | // nearest neighbors). It returns two arrays which are assumed to
|
||
466 | // contain at least k elements: one (nn_idx) contains the indices
|
||
467 | // (within the point array) of the nearest neighbors and the other
|
||
468 | // (dd) contains the squared distances to these nearest neighbors.
|
||
469 | //
|
||
470 | // The search algorithm, annkFRSearch, is a fixed-radius kNN
|
||
471 | // search. In addition to a query point, it is given a (squared)
|
||
472 | // radius bound. (This is done for consistency, because the search
|
||
473 | // returns distances as squared quantities.) It does two things.
|
||
474 | // First, it computes the k nearest neighbors within the radius
|
||
475 | // bound, and second, it returns the total number of points lying
|
||
476 | // within the radius bound. It is permitted to set k = 0, in which
|
||
477 | // case it effectively answers a range counting query. If the
|
||
478 | // error bound epsilon is positive, then the search is approximate
|
||
479 | // in the sense that it is free to ignore any point that lies
|
||
480 | // outside a ball of radius r/(1+epsilon), where r is the given
|
||
481 | // (unsquared) radius bound.
|
||
482 | //
|
||
483 | // The generic object from which all the search structures are
|
||
484 | // dervied is given below. It is a virtual object, and is useless
|
||
485 | // by itself.
|
||
486 | //----------------------------------------------------------------------
|
||
487 | |||
488 | class DLL_API ANNpointSet { |
||
489 | public:
|
||
490 | virtual ~ANNpointSet() {} // virtual distructor
|
||
491 | |||
492 | virtual void annkSearch( // approx k near neighbor search |
||
493 | ANNpoint q, // query point
|
||
494 | int k, // number of near neighbors to return |
||
495 | ANNidxArray nn_idx, // nearest neighbor array (modified)
|
||
496 | ANNdistArray dd, // dist to near neighbors (modified)
|
||
497 | double eps=0.0 // error bound |
||
498 | ) = 0; // pure virtual (defined elsewhere) |
||
499 | |||
500 | virtual int annkFRSearch( // approx fixed-radius kNN search |
||
501 | ANNpoint q, // query point
|
||
502 | ANNdist sqRad, // squared radius
|
||
503 | int k = 0, // number of near neighbors to return |
||
504 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
||
505 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
||
506 | double eps=0.0 // error bound |
||
507 | ) = 0; // pure virtual (defined elsewhere) |
||
508 | |||
509 | virtual int theDim() = 0; // return dimension of space |
||
510 | virtual int nPoints() = 0; // return number of points |
||
511 | // return pointer to points
|
||
512 | virtual ANNpointArray thePoints() = 0;
|
||
513 | }; |
||
514 | |||
515 | //----------------------------------------------------------------------
|
||
516 | // Brute-force nearest neighbor search:
|
||
517 | // The brute-force search structure is very simple but inefficient.
|
||
518 | // It has been provided primarily for the sake of comparison with
|
||
519 | // and validation of the more complex search structures.
|
||
520 | //
|
||
521 | // Query processing is the same as described above, but the value
|
||
522 | // of epsilon is ignored, since all distance calculations are
|
||
523 | // performed exactly.
|
||
524 | //
|
||
525 | // WARNING: This data structure is very slow, and should not be
|
||
526 | // used unless the number of points is very small.
|
||
527 | //
|
||
528 | // Internal information:
|
||
529 | // ---------------------
|
||
530 | // This data structure bascially consists of the array of points
|
||
531 | // (each a pointer to an array of coordinates). The search is
|
||
532 | // performed by a simple linear scan of all the points.
|
||
533 | //----------------------------------------------------------------------
|
||
534 | |||
535 | class DLL_API ANNbruteForce: public ANNpointSet { |
||
536 | int dim; // dimension |
||
537 | int n_pts; // number of points |
||
538 | ANNpointArray pts; // point array
|
||
539 | public:
|
||
540 | ANNbruteForce( // constructor from point array
|
||
541 | ANNpointArray pa, // point array
|
||
542 | int n, // number of points |
||
543 | int dd); // dimension |
||
544 | |||
545 | ~ANNbruteForce(); // destructor
|
||
546 | |||
547 | void annkSearch( // approx k near neighbor search |
||
548 | ANNpoint q, // query point
|
||
549 | int k, // number of near neighbors to return |
||
550 | ANNidxArray nn_idx, // nearest neighbor array (modified)
|
||
551 | ANNdistArray dd, // dist to near neighbors (modified)
|
||
552 | double eps=0.0); // error bound |
||
553 | |||
554 | int annkFRSearch( // approx fixed-radius kNN search |
||
555 | ANNpoint q, // query point
|
||
556 | ANNdist sqRad, // squared radius
|
||
557 | int k = 0, // number of near neighbors to return |
||
558 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
||
559 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
||
560 | double eps=0.0); // error bound |
||
561 | |||
562 | int theDim() // return dimension of space |
||
563 | { return dim; }
|
||
564 | |||
565 | int nPoints() // return number of points |
||
566 | { return n_pts; }
|
||
567 | |||
568 | ANNpointArray thePoints() // return pointer to points
|
||
569 | { return pts; }
|
||
570 | }; |
||
571 | |||
572 | //----------------------------------------------------------------------
|
||
573 | // kd- and bd-tree splitting and shrinking rules
|
||
574 | // kd-trees supports a collection of different splitting rules.
|
||
575 | // In addition to the standard kd-tree splitting rule proposed
|
||
576 | // by Friedman, Bentley, and Finkel, we have introduced a
|
||
577 | // number of other splitting rules, which seem to perform
|
||
578 | // as well or better (for the distributions we have tested).
|
||
579 | //
|
||
580 | // The splitting methods given below allow the user to tailor
|
||
581 | // the data structure to the particular data set. They are
|
||
582 | // are described in greater details in the kd_split.cc source
|
||
583 | // file. The method ANN_KD_SUGGEST is the method chosen (rather
|
||
584 | // subjectively) by the implementors as the one giving the
|
||
585 | // fastest performance, and is the default splitting method.
|
||
586 | //
|
||
587 | // As with splitting rules, there are a number of different
|
||
588 | // shrinking rules. The shrinking rule ANN_BD_NONE does no
|
||
589 | // shrinking (and hence produces a kd-tree tree). The rule
|
||
590 | // ANN_BD_SUGGEST uses the implementors favorite rule.
|
||
591 | //----------------------------------------------------------------------
|
||
592 | |||
593 | enum ANNsplitRule {
|
||
594 | ANN_KD_STD = 0, // the optimized kd-splitting rule |
||
595 | ANN_KD_MIDPT = 1, // midpoint split |
||
596 | ANN_KD_FAIR = 2, // fair split |
||
597 | ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method |
||
598 | ANN_KD_SL_FAIR = 4, // sliding fair split method |
||
599 | ANN_KD_SUGGEST = 5}; // the authors' suggestion for best |
||
600 | const int ANN_N_SPLIT_RULES = 6; // number of split rules |
||
601 | |||
602 | enum ANNshrinkRule {
|
||
603 | ANN_BD_NONE = 0, // no shrinking at all (just kd-tree) |
||
604 | ANN_BD_SIMPLE = 1, // simple splitting |
||
605 | ANN_BD_CENTROID = 2, // centroid splitting |
||
606 | ANN_BD_SUGGEST = 3}; // the authors' suggested choice |
||
607 | const int ANN_N_SHRINK_RULES = 4; // number of shrink rules |
||
608 | |||
609 | //----------------------------------------------------------------------
|
||
610 | // kd-tree:
|
||
611 | // The main search data structure supported by ANN is a kd-tree.
|
||
612 | // The main constructor is given a set of points and a choice of
|
||
613 | // splitting method to use in building the tree.
|
||
614 | //
|
||
615 | // Construction:
|
||
616 | // -------------
|
||
617 | // The constructor is given the point array, number of points,
|
||
618 | // dimension, bucket size (default = 1), and the splitting rule
|
||
619 | // (default = ANN_KD_SUGGEST). The point array is not copied, and
|
||
620 | // is assumed to be kept constant throughout the lifetime of the
|
||
621 | // search structure. There is also a "load" constructor that
|
||
622 | // builds a tree from a file description that was created by the
|
||
623 | // Dump operation.
|
||
624 | //
|
||
625 | // Search:
|
||
626 | // -------
|
||
627 | // There are two search methods:
|
||
628 | //
|
||
629 | // Standard search (annkSearch()):
|
||
630 | // Searches nodes in tree-traversal order, always visiting
|
||
631 | // the closer child first.
|
||
632 | // Priority search (annkPriSearch()):
|
||
633 | // Searches nodes in order of increasing distance of the
|
||
634 | // associated cell from the query point. For many
|
||
635 | // distributions the standard search seems to work just
|
||
636 | // fine, but priority search is safer for worst-case
|
||
637 | // performance.
|
||
638 | //
|
||
639 | // Printing:
|
||
640 | // ---------
|
||
641 | // There are two methods provided for printing the tree. Print()
|
||
642 | // is used to produce a "human-readable" display of the tree, with
|
||
643 | // indenation, which is handy for debugging. Dump() produces a
|
||
644 | // format that is suitable reading by another program. There is a
|
||
645 | // "load" constructor, which constructs a tree which is assumed to
|
||
646 | // have been saved by the Dump() procedure.
|
||
647 | //
|
||
648 | // Performance and Structure Statistics:
|
||
649 | // -------------------------------------
|
||
650 | // The procedure getStats() collects statistics information on the
|
||
651 | // tree (its size, height, etc.) See ANNperf.h for information on
|
||
652 | // the stats structure it returns.
|
||
653 | //
|
||
654 | // Internal information:
|
||
655 | // ---------------------
|
||
656 | // The data structure consists of three major chunks of storage.
|
||
657 | // The first (implicit) storage are the points themselves (pts),
|
||
658 | // which have been provided by the users as an argument to the
|
||
659 | // constructor, or are allocated dynamically if the tree is built
|
||
660 | // using the load constructor). These should not be changed during
|
||
661 | // the lifetime of the search structure. It is the user's
|
||
662 | // responsibility to delete these after the tree is destroyed.
|
||
663 | //
|
||
664 | // The second is the tree itself (which is dynamically allocated in
|
||
665 | // the constructor) and is given as a pointer to its root node
|
||
666 | // (root). These nodes are automatically deallocated when the tree
|
||
667 | // is deleted. See the file src/kd_tree.h for further information
|
||
668 | // on the structure of the tree nodes.
|
||
669 | //
|
||
670 | // Each leaf of the tree does not contain a pointer directly to a
|
||
671 | // point, but rather contains a pointer to a "bucket", which is an
|
||
672 | // array consisting of point indices. The third major chunk of
|
||
673 | // storage is an array (pidx), which is a large array in which all
|
||
674 | // these bucket subarrays reside. (The reason for storing them
|
||
675 | // separately is the buckets are typically small, but of varying
|
||
676 | // sizes. This was done to avoid fragmentation.) This array is
|
||
677 | // also deallocated when the tree is deleted.
|
||
678 | //
|
||
679 | // In addition to this, the tree consists of a number of other
|
||
680 | // pieces of information which are used in searching and for
|
||
681 | // subsequent tree operations. These consist of the following:
|
||
682 | //
|
||
683 | // dim Dimension of space
|
||
684 | // n_pts Number of points currently in the tree
|
||
685 | // n_max Maximum number of points that are allowed
|
||
686 | // in the tree
|
||
687 | // bkt_size Maximum bucket size (no. of points per leaf)
|
||
688 | // bnd_box_lo Bounding box low point
|
||
689 | // bnd_box_hi Bounding box high point
|
||
690 | // splitRule Splitting method used
|
||
691 | //
|
||
692 | //----------------------------------------------------------------------
|
||
693 | |||
694 | //----------------------------------------------------------------------
|
||
695 | // Some types and objects used by kd-tree functions
|
||
696 | // See src/kd_tree.h and src/kd_tree.cpp for definitions
|
||
697 | //----------------------------------------------------------------------
|
||
698 | class ANNkdStats; // stats on kd-tree
|
||
699 | class ANNkd_node; // generic node in a kd-tree
|
||
700 | typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node |
||
701 | |||
702 | class DLL_API ANNkd_tree: public ANNpointSet { |
||
703 | protected:
|
||
704 | int dim; // dimension of space |
||
705 | int n_pts; // number of points in tree |
||
706 | int bkt_size; // bucket size |
||
707 | ANNpointArray pts; // the points
|
||
708 | ANNidxArray pidx; // point indices (to pts array)
|
||
709 | ANNkd_ptr root; // root of kd-tree
|
||
710 | ANNpoint bnd_box_lo; // bounding box low point
|
||
711 | ANNpoint bnd_box_hi; // bounding box high point
|
||
712 | |||
713 | void SkeletonTree( // construct skeleton tree |
||
714 | int n, // number of points |
||
715 | int dd, // dimension |
||
716 | int bs, // bucket size |
||
717 | ANNpointArray pa = NULL, // point array (optional) |
||
718 | ANNidxArray pi = NULL); // point indices (optional) |
||
719 | |||
720 | public:
|
||
721 | ANNkd_tree( // build skeleton tree
|
||
722 | int n = 0, // number of points |
||
723 | int dd = 0, // dimension |
||
724 | int bs = 1); // bucket size |
||
725 | |||
726 | ANNkd_tree( // build from point array
|
||
727 | ANNpointArray pa, // point array
|
||
728 | int n, // number of points |
||
729 | int dd, // dimension |
||
730 | int bs = 1, // bucket size |
||
731 | ANNsplitRule split = ANN_KD_SUGGEST); // splitting method
|
||
732 | |||
733 | ANNkd_tree( // build from dump file
|
||
734 | std::istream& in); // input stream for dump file
|
||
735 | |||
736 | ~ANNkd_tree(); // tree destructor
|
||
737 | |||
738 | void annkSearch( // approx k near neighbor search |
||
739 | ANNpoint q, // query point
|
||
740 | int k, // number of near neighbors to return |
||
741 | ANNidxArray nn_idx, // nearest neighbor array (modified)
|
||
742 | ANNdistArray dd, // dist to near neighbors (modified)
|
||
743 | double eps=0.0); // error bound |
||
744 | |||
745 | void annkPriSearch( // priority k near neighbor search |
||
746 | ANNpoint q, // query point
|
||
747 | int k, // number of near neighbors to return |
||
748 | ANNidxArray nn_idx, // nearest neighbor array (modified)
|
||
749 | ANNdistArray dd, // dist to near neighbors (modified)
|
||
750 | double eps=0.0); // error bound |
||
751 | |||
752 | int annkFRSearch( // approx fixed-radius kNN search |
||
753 | ANNpoint q, // the query point
|
||
754 | ANNdist sqRad, // squared radius of query ball
|
||
755 | int k, // number of neighbors to return |
||
756 | ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) |
||
757 | ANNdistArray dd = NULL, // dist to near neighbors (modified) |
||
758 | double eps=0.0); // error bound |
||
759 | |||
760 | int theDim() // return dimension of space |
||
761 | { return dim; }
|
||
762 | |||
763 | int nPoints() // return number of points |
||
764 | { return n_pts; }
|
||
765 | |||
766 | ANNpointArray thePoints() // return pointer to points
|
||
767 | { return pts; }
|
||
768 | |||
769 | virtual void Print( // print the tree (for debugging) |
||
770 | ANNbool with_pts, // print points as well?
|
||
771 | std::ostream& out); // output stream
|
||
772 | |||
773 | virtual void Dump( // dump entire tree |
||
774 | ANNbool with_pts, // print points as well?
|
||
775 | std::ostream& out); // output stream
|
||
776 | |||
777 | virtual void getStats( // compute tree statistics |
||
778 | ANNkdStats& st); // the statistics (modified)
|
||
779 | }; |
||
780 | |||
781 | //----------------------------------------------------------------------
|
||
782 | // Box decomposition tree (bd-tree)
|
||
783 | // The bd-tree is inherited from a kd-tree. The main difference
|
||
784 | // in the bd-tree and the kd-tree is a new type of internal node
|
||
785 | // called a shrinking node (in the kd-tree there is only one type
|
||
786 | // of internal node, a splitting node). The shrinking node
|
||
787 | // makes it possible to generate balanced trees in which the
|
||
788 | // cells have bounded aspect ratio, by allowing the decomposition
|
||
789 | // to zoom in on regions of dense point concentration. Although
|
||
790 | // this is a nice idea in theory, few point distributions are so
|
||
791 | // densely clustered that this is really needed.
|
||
792 | //----------------------------------------------------------------------
|
||
793 | |||
794 | class DLL_API ANNbd_tree: public ANNkd_tree { |
||
795 | public:
|
||
796 | ANNbd_tree( // build skeleton tree
|
||
797 | int n, // number of points |
||
798 | int dd, // dimension |
||
799 | int bs = 1) // bucket size |
||
800 | : ANNkd_tree(n, dd, bs) {} // build base kd-tree
|
||
801 | |||
802 | ANNbd_tree( // build from point array
|
||
803 | ANNpointArray pa, // point array
|
||
804 | int n, // number of points |
||
805 | int dd, // dimension |
||
806 | int bs = 1, // bucket size |
||
807 | ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule
|
||
808 | ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule
|
||
809 | |||
810 | ANNbd_tree( // build from dump file
|
||
811 | std::istream& in); // input stream for dump file
|
||
812 | }; |
||
813 | |||
814 | //----------------------------------------------------------------------
|
||
815 | // Other functions
|
||
816 | // annMaxPtsVisit Sets a limit on the maximum number of points
|
||
817 | // to visit in the search.
|
||
818 | // annClose Can be called when all use of ANN is finished.
|
||
819 | // It clears up a minor memory leak.
|
||
820 | //----------------------------------------------------------------------
|
||
821 | |||
822 | DLL_API void annMaxPtsVisit( // max. pts to visit in search |
||
823 | int maxPts); // the limit |
||
824 | |||
825 | DLL_API void annClose(); // called to end use of ANN |
||
826 | |||
827 | #endif |