root / rgbdslam / gicp / ann_1.1.1 / src / kd_split.cpp @ 9240aaa3
History | View | Annotate | Download (16.5 KB)
1 |
//----------------------------------------------------------------------
|
---|---|
2 |
// File: kd_split.cpp
|
3 |
// Programmer: Sunil Arya and David Mount
|
4 |
// Description: Methods for splitting kd-trees
|
5 |
// Last modified: 01/04/05 (Version 1.0)
|
6 |
//----------------------------------------------------------------------
|
7 |
// Copyright (c) 1997-2005 University of Maryland and Sunil Arya and
|
8 |
// David Mount. All Rights Reserved.
|
9 |
//
|
10 |
// This software and related documentation is part of the Approximate
|
11 |
// Nearest Neighbor Library (ANN). This software is provided under
|
12 |
// the provisions of the Lesser GNU Public License (LGPL). See the
|
13 |
// file ../ReadMe.txt for further information.
|
14 |
//
|
15 |
// The University of Maryland (U.M.) and the authors make no
|
16 |
// representations about the suitability or fitness of this software for
|
17 |
// any purpose. It is provided "as is" without express or implied
|
18 |
// warranty.
|
19 |
//----------------------------------------------------------------------
|
20 |
// History:
|
21 |
// Revision 0.1 03/04/98
|
22 |
// Initial release
|
23 |
// Revision 1.0 04/01/05
|
24 |
//----------------------------------------------------------------------
|
25 |
|
26 |
#include "kd_tree.h" // kd-tree definitions |
27 |
#include "kd_util.h" // kd-tree utilities |
28 |
#include "kd_split.h" // splitting functions |
29 |
|
30 |
//----------------------------------------------------------------------
|
31 |
// Constants
|
32 |
//----------------------------------------------------------------------
|
33 |
|
34 |
const double ERR = 0.001; // a small value |
35 |
const double FS_ASPECT_RATIO = 3.0; // maximum allowed aspect ratio |
36 |
// in fair split. Must be >= 2.
|
37 |
|
38 |
//----------------------------------------------------------------------
|
39 |
// kd_split - Bentley's standard splitting routine for kd-trees
|
40 |
// Find the dimension of the greatest spread, and split
|
41 |
// just before the median point along this dimension.
|
42 |
//----------------------------------------------------------------------
|
43 |
|
44 |
void kd_split(
|
45 |
ANNpointArray pa, // point array (permuted on return)
|
46 |
ANNidxArray pidx, // point indices
|
47 |
const ANNorthRect &bnds, // bounding rectangle for cell |
48 |
int n, // number of points |
49 |
int dim, // dimension of space |
50 |
int &cut_dim, // cutting dimension (returned) |
51 |
ANNcoord &cut_val, // cutting value (returned)
|
52 |
int &n_lo) // num of points on low side (returned) |
53 |
{ |
54 |
// find dimension of maximum spread
|
55 |
cut_dim = annMaxSpread(pa, pidx, n, dim); |
56 |
n_lo = n/2; // median rank |
57 |
// split about median
|
58 |
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo); |
59 |
} |
60 |
|
61 |
//----------------------------------------------------------------------
|
62 |
// midpt_split - midpoint splitting rule for box-decomposition trees
|
63 |
//
|
64 |
// This is the simplest splitting rule that guarantees boxes
|
65 |
// of bounded aspect ratio. It simply cuts the box with the
|
66 |
// longest side through its midpoint. If there are ties, it
|
67 |
// selects the dimension with the maximum point spread.
|
68 |
//
|
69 |
// WARNING: This routine (while simple) doesn't seem to work
|
70 |
// well in practice in high dimensions, because it tends to
|
71 |
// generate a large number of trivial and/or unbalanced splits.
|
72 |
// Either kd_split(), sl_midpt_split(), or fair_split() are
|
73 |
// recommended, instead.
|
74 |
//----------------------------------------------------------------------
|
75 |
|
76 |
void midpt_split(
|
77 |
ANNpointArray pa, // point array
|
78 |
ANNidxArray pidx, // point indices (permuted on return)
|
79 |
const ANNorthRect &bnds, // bounding rectangle for cell |
80 |
int n, // number of points |
81 |
int dim, // dimension of space |
82 |
int &cut_dim, // cutting dimension (returned) |
83 |
ANNcoord &cut_val, // cutting value (returned)
|
84 |
int &n_lo) // num of points on low side (returned) |
85 |
{ |
86 |
int d;
|
87 |
|
88 |
ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; |
89 |
for (d = 1; d < dim; d++) { // find length of longest box side |
90 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
91 |
if (length > max_length) {
|
92 |
max_length = length; |
93 |
} |
94 |
} |
95 |
ANNcoord max_spread = -1; // find long side with most spread |
96 |
for (d = 0; d < dim; d++) { |
97 |
// is it among longest?
|
98 |
if (double(bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) { |
99 |
// compute its spread
|
100 |
ANNcoord spr = annSpread(pa, pidx, n, d); |
101 |
if (spr > max_spread) { // is it max so far? |
102 |
max_spread = spr; |
103 |
cut_dim = d; |
104 |
} |
105 |
} |
106 |
} |
107 |
// split along cut_dim at midpoint
|
108 |
cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim]) / 2;
|
109 |
// permute points accordingly
|
110 |
int br1, br2;
|
111 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
112 |
//------------------------------------------------------------------
|
113 |
// On return: pa[0..br1-1] < cut_val
|
114 |
// pa[br1..br2-1] == cut_val
|
115 |
// pa[br2..n-1] > cut_val
|
116 |
//
|
117 |
// We can set n_lo to any value in the range [br1..br2].
|
118 |
// We choose split so that points are most evenly divided.
|
119 |
//------------------------------------------------------------------
|
120 |
if (br1 > n/2) n_lo = br1; |
121 |
else if (br2 < n/2) n_lo = br2; |
122 |
else n_lo = n/2; |
123 |
} |
124 |
|
125 |
//----------------------------------------------------------------------
|
126 |
// sl_midpt_split - sliding midpoint splitting rule
|
127 |
//
|
128 |
// This is a modification of midpt_split, which has the nonsensical
|
129 |
// name "sliding midpoint". The idea is that we try to use the
|
130 |
// midpoint rule, by bisecting the longest side. If there are
|
131 |
// ties, the dimension with the maximum spread is selected. If,
|
132 |
// however, the midpoint split produces a trivial split (no points
|
133 |
// on one side of the splitting plane) then we slide the splitting
|
134 |
// (maintaining its orientation) until it produces a nontrivial
|
135 |
// split. For example, if the splitting plane is along the x-axis,
|
136 |
// and all the data points have x-coordinate less than the x-bisector,
|
137 |
// then the split is taken along the maximum x-coordinate of the
|
138 |
// data points.
|
139 |
//
|
140 |
// Intuitively, this rule cannot generate trivial splits, and
|
141 |
// hence avoids midpt_split's tendency to produce trees with
|
142 |
// a very large number of nodes.
|
143 |
//
|
144 |
//----------------------------------------------------------------------
|
145 |
|
146 |
void sl_midpt_split(
|
147 |
ANNpointArray pa, // point array
|
148 |
ANNidxArray pidx, // point indices (permuted on return)
|
149 |
const ANNorthRect &bnds, // bounding rectangle for cell |
150 |
int n, // number of points |
151 |
int dim, // dimension of space |
152 |
int &cut_dim, // cutting dimension (returned) |
153 |
ANNcoord &cut_val, // cutting value (returned)
|
154 |
int &n_lo) // num of points on low side (returned) |
155 |
{ |
156 |
int d;
|
157 |
|
158 |
ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; |
159 |
for (d = 1; d < dim; d++) { // find length of longest box side |
160 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
161 |
if (length > max_length) {
|
162 |
max_length = length; |
163 |
} |
164 |
} |
165 |
ANNcoord max_spread = -1; // find long side with most spread |
166 |
for (d = 0; d < dim; d++) { |
167 |
// is it among longest?
|
168 |
if ((bnds.hi[d] - bnds.lo[d]) >= (1-ERR)*max_length) { |
169 |
// compute its spread
|
170 |
ANNcoord spr = annSpread(pa, pidx, n, d); |
171 |
if (spr > max_spread) { // is it max so far? |
172 |
max_spread = spr; |
173 |
cut_dim = d; |
174 |
} |
175 |
} |
176 |
} |
177 |
// ideal split at midpoint
|
178 |
ANNcoord ideal_cut_val = (bnds.lo[cut_dim] + bnds.hi[cut_dim])/2;
|
179 |
|
180 |
ANNcoord min, max; |
181 |
annMinMax(pa, pidx, n, cut_dim, min, max); // find min/max coordinates
|
182 |
|
183 |
if (ideal_cut_val < min) // slide to min or max as needed |
184 |
cut_val = min; |
185 |
else if (ideal_cut_val > max) |
186 |
cut_val = max; |
187 |
else
|
188 |
cut_val = ideal_cut_val; |
189 |
|
190 |
// permute points accordingly
|
191 |
int br1, br2;
|
192 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
193 |
//------------------------------------------------------------------
|
194 |
// On return: pa[0..br1-1] < cut_val
|
195 |
// pa[br1..br2-1] == cut_val
|
196 |
// pa[br2..n-1] > cut_val
|
197 |
//
|
198 |
// We can set n_lo to any value in the range [br1..br2] to satisfy
|
199 |
// the exit conditions of the procedure.
|
200 |
//
|
201 |
// if ideal_cut_val < min (implying br2 >= 1),
|
202 |
// then we select n_lo = 1 (so there is one point on left) and
|
203 |
// if ideal_cut_val > max (implying br1 <= n-1),
|
204 |
// then we select n_lo = n-1 (so there is one point on right).
|
205 |
// Otherwise, we select n_lo as close to n/2 as possible within
|
206 |
// [br1..br2].
|
207 |
//------------------------------------------------------------------
|
208 |
if (ideal_cut_val < min) n_lo = 1; |
209 |
else if (ideal_cut_val > max) n_lo = n-1; |
210 |
else if (br1 > n/2) n_lo = br1; |
211 |
else if (br2 < n/2) n_lo = br2; |
212 |
else n_lo = n/2; |
213 |
} |
214 |
|
215 |
//----------------------------------------------------------------------
|
216 |
// fair_split - fair-split splitting rule
|
217 |
//
|
218 |
// This is a compromise between the kd-tree splitting rule (which
|
219 |
// always splits data points at their median) and the midpoint
|
220 |
// splitting rule (which always splits a box through its center.
|
221 |
// The goal of this procedure is to achieve both nicely balanced
|
222 |
// splits, and boxes of bounded aspect ratio.
|
223 |
//
|
224 |
// A constant FS_ASPECT_RATIO is defined. Given a box, those sides
|
225 |
// which can be split so that the ratio of the longest to shortest
|
226 |
// side does not exceed ASPECT_RATIO are identified. Among these
|
227 |
// sides, we select the one in which the points have the largest
|
228 |
// spread. We then split the points in a manner which most evenly
|
229 |
// distributes the points on either side of the splitting plane,
|
230 |
// subject to maintaining the bound on the ratio of long to short
|
231 |
// sides. To determine that the aspect ratio will be preserved,
|
232 |
// we determine the longest side (other than this side), and
|
233 |
// determine how narrowly we can cut this side, without causing the
|
234 |
// aspect ratio bound to be exceeded (small_piece).
|
235 |
//
|
236 |
// This procedure is more robust than either kd_split or midpt_split,
|
237 |
// but is more complicated as well. When point distribution is
|
238 |
// extremely skewed, this degenerates to midpt_split (actually
|
239 |
// 1/3 point split), and when the points are most evenly distributed,
|
240 |
// this degenerates to kd-split.
|
241 |
//----------------------------------------------------------------------
|
242 |
|
243 |
void fair_split(
|
244 |
ANNpointArray pa, // point array
|
245 |
ANNidxArray pidx, // point indices (permuted on return)
|
246 |
const ANNorthRect &bnds, // bounding rectangle for cell |
247 |
int n, // number of points |
248 |
int dim, // dimension of space |
249 |
int &cut_dim, // cutting dimension (returned) |
250 |
ANNcoord &cut_val, // cutting value (returned)
|
251 |
int &n_lo) // num of points on low side (returned) |
252 |
{ |
253 |
int d;
|
254 |
ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; |
255 |
cut_dim = 0;
|
256 |
for (d = 1; d < dim; d++) { // find length of longest box side |
257 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
258 |
if (length > max_length) {
|
259 |
max_length = length; |
260 |
cut_dim = d; |
261 |
} |
262 |
} |
263 |
|
264 |
ANNcoord max_spread = 0; // find legal cut with max spread |
265 |
cut_dim = 0;
|
266 |
for (d = 0; d < dim; d++) { |
267 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
268 |
// is this side midpoint splitable
|
269 |
// without violating aspect ratio?
|
270 |
if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) { |
271 |
// compute spread along this dim
|
272 |
ANNcoord spr = annSpread(pa, pidx, n, d); |
273 |
if (spr > max_spread) { // best spread so far |
274 |
max_spread = spr; |
275 |
cut_dim = d; // this is dimension to cut
|
276 |
} |
277 |
} |
278 |
} |
279 |
|
280 |
max_length = 0; // find longest side other than cut_dim |
281 |
for (d = 0; d < dim; d++) { |
282 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
283 |
if (d != cut_dim && length > max_length)
|
284 |
max_length = length; |
285 |
} |
286 |
// consider most extreme splits
|
287 |
ANNcoord small_piece = max_length / FS_ASPECT_RATIO; |
288 |
ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
|
289 |
ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
|
290 |
|
291 |
int br1, br2;
|
292 |
// is median below lo_cut ?
|
293 |
if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) { |
294 |
cut_val = lo_cut; // cut at lo_cut
|
295 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
296 |
n_lo = br1; |
297 |
} |
298 |
// is median above hi_cut?
|
299 |
else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) { |
300 |
cut_val = hi_cut; // cut at hi_cut
|
301 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
302 |
n_lo = br2; |
303 |
} |
304 |
else { // median cut preserves asp ratio |
305 |
n_lo = n/2; // split about median |
306 |
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo); |
307 |
} |
308 |
} |
309 |
|
310 |
//----------------------------------------------------------------------
|
311 |
// sl_fair_split - sliding fair split splitting rule
|
312 |
//
|
313 |
// Sliding fair split is a splitting rule that combines the
|
314 |
// strengths of both fair split with sliding midpoint split.
|
315 |
// Fair split tends to produce balanced splits when the points
|
316 |
// are roughly uniformly distributed, but it can produce many
|
317 |
// trivial splits when points are highly clustered. Sliding
|
318 |
// midpoint never produces trivial splits, and shrinks boxes
|
319 |
// nicely if points are highly clustered, but it may produce
|
320 |
// rather unbalanced splits when points are unclustered but not
|
321 |
// quite uniform.
|
322 |
//
|
323 |
// Sliding fair split is based on the theory that there are two
|
324 |
// types of splits that are "good": balanced splits that produce
|
325 |
// fat boxes, and unbalanced splits provided the cell with fewer
|
326 |
// points is fat.
|
327 |
//
|
328 |
// This splitting rule operates by first computing the longest
|
329 |
// side of the current bounding box. Then it asks which sides
|
330 |
// could be split (at the midpoint) and still satisfy the aspect
|
331 |
// ratio bound with respect to this side. Among these, it selects
|
332 |
// the side with the largest spread (as fair split would). It
|
333 |
// then considers the most extreme cuts that would be allowed by
|
334 |
// the aspect ratio bound. This is done by dividing the longest
|
335 |
// side of the box by the aspect ratio bound. If the median cut
|
336 |
// lies between these extreme cuts, then we use the median cut.
|
337 |
// If not, then consider the extreme cut that is closer to the
|
338 |
// median. If all the points lie to one side of this cut, then
|
339 |
// we slide the cut until it hits the first point. This may
|
340 |
// violate the aspect ratio bound, but will never generate empty
|
341 |
// cells. However the sibling of every such skinny cell is fat,
|
342 |
// and hence packing arguments still apply.
|
343 |
//
|
344 |
//----------------------------------------------------------------------
|
345 |
|
346 |
void sl_fair_split(
|
347 |
ANNpointArray pa, // point array
|
348 |
ANNidxArray pidx, // point indices (permuted on return)
|
349 |
const ANNorthRect &bnds, // bounding rectangle for cell |
350 |
int n, // number of points |
351 |
int dim, // dimension of space |
352 |
int &cut_dim, // cutting dimension (returned) |
353 |
ANNcoord &cut_val, // cutting value (returned)
|
354 |
int &n_lo) // num of points on low side (returned) |
355 |
{ |
356 |
int d;
|
357 |
ANNcoord min, max; // min/max coordinates
|
358 |
int br1, br2; // split break points |
359 |
|
360 |
ANNcoord max_length = bnds.hi[0] - bnds.lo[0]; |
361 |
cut_dim = 0;
|
362 |
for (d = 1; d < dim; d++) { // find length of longest box side |
363 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
364 |
if (length > max_length) {
|
365 |
max_length = length; |
366 |
cut_dim = d; |
367 |
} |
368 |
} |
369 |
|
370 |
ANNcoord max_spread = 0; // find legal cut with max spread |
371 |
cut_dim = 0;
|
372 |
for (d = 0; d < dim; d++) { |
373 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
374 |
// is this side midpoint splitable
|
375 |
// without violating aspect ratio?
|
376 |
if (((double) max_length)*2.0/((double) length) <= FS_ASPECT_RATIO) { |
377 |
// compute spread along this dim
|
378 |
ANNcoord spr = annSpread(pa, pidx, n, d); |
379 |
if (spr > max_spread) { // best spread so far |
380 |
max_spread = spr; |
381 |
cut_dim = d; // this is dimension to cut
|
382 |
} |
383 |
} |
384 |
} |
385 |
|
386 |
max_length = 0; // find longest side other than cut_dim |
387 |
for (d = 0; d < dim; d++) { |
388 |
ANNcoord length = bnds.hi[d] - bnds.lo[d]; |
389 |
if (d != cut_dim && length > max_length)
|
390 |
max_length = length; |
391 |
} |
392 |
// consider most extreme splits
|
393 |
ANNcoord small_piece = max_length / FS_ASPECT_RATIO; |
394 |
ANNcoord lo_cut = bnds.lo[cut_dim] + small_piece;// lowest legal cut
|
395 |
ANNcoord hi_cut = bnds.hi[cut_dim] - small_piece;// highest legal cut
|
396 |
// find min and max along cut_dim
|
397 |
annMinMax(pa, pidx, n, cut_dim, min, max); |
398 |
// is median below lo_cut?
|
399 |
if (annSplitBalance(pa, pidx, n, cut_dim, lo_cut) >= 0) { |
400 |
if (max > lo_cut) { // are any points above lo_cut? |
401 |
cut_val = lo_cut; // cut at lo_cut
|
402 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
403 |
n_lo = br1; // balance if there are ties
|
404 |
} |
405 |
else { // all points below lo_cut |
406 |
cut_val = max; // cut at max value
|
407 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
408 |
n_lo = n-1;
|
409 |
} |
410 |
} |
411 |
// is median above hi_cut?
|
412 |
else if (annSplitBalance(pa, pidx, n, cut_dim, hi_cut) <= 0) { |
413 |
if (min < hi_cut) { // are any points below hi_cut? |
414 |
cut_val = hi_cut; // cut at hi_cut
|
415 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
416 |
n_lo = br2; // balance if there are ties
|
417 |
} |
418 |
else { // all points above hi_cut |
419 |
cut_val = min; // cut at min value
|
420 |
annPlaneSplit(pa, pidx, n, cut_dim, cut_val, br1, br2); |
421 |
n_lo = 1;
|
422 |
} |
423 |
} |
424 |
else { // median cut is good enough |
425 |
n_lo = n/2; // split about median |
426 |
annMedianSplit(pa, pidx, n, cut_dim, cut_val, n_lo); |
427 |
} |
428 |
} |