Statistics
| Branch: | Revision:

root / rgbdslam / external / siftgpu / src / SiftGPU / PyramidCU.cpp @ 9240aaa3

History | View | Annotate | Download (30.1 KB)

1
////////////////////////////////////////////////////////////////////////////
2
//        File:                PyramidCU.cpp
3
//        Author:                Changchang Wu
4
//        Description : implementation of the PyramidCU class.
5
//                                CUDA-based implementation of SiftPyramid
6
//
7
//        Copyright (c) 2007 University of North Carolina at Chapel Hill
8
//        All Rights Reserved
9
//
10
//        Permission to use, copy, modify and distribute this software and its
11
//        documentation for educational, research and non-profit purposes, without
12
//        fee, and without a written agreement is hereby granted, provided that the
13
//        above copyright notice and the following paragraph appear in all copies.
14
//        
15
//        The University of North Carolina at Chapel Hill make no representations
16
//        about the suitability of this software for any purpose. It is provided
17
//        'as is' without express or implied warranty. 
18
//
19
//        Please send BUG REPORTS to ccwu@cs.unc.edu
20
//
21
////////////////////////////////////////////////////////////////////////////
22

    
23
#if defined(CUDA_SIFTGPU_ENABLED)
24

    
25

    
26
#include "GL/glew.h"
27
#include <iostream>
28
#include <vector>
29
#include <algorithm>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <math.h>
33
using namespace std;
34

    
35
#include "GlobalUtil.h"
36
#include "GLTexImage.h"
37
#include "CuTexImage.h" 
38
#include "SiftGPU.h"
39
#include "SiftPyramid.h"
40
#include "ProgramCU.h"
41
#include "PyramidCU.h"
42

    
43

    
44
//#include "imdebug/imdebuggl.h"
45
//#pragma comment (lib, "../lib/imdebug.lib")
46

    
47

    
48

    
49
#define USE_TIMING()                double t, t0, tt;
50
#define OCTAVE_START()                if(GlobalUtil::_timingO){        t = t0 = CLOCK();        cout<<"#"<<i+_down_sample_factor<<"\t";        }
51
#define LEVEL_FINISH()                if(GlobalUtil::_timingL){        ProgramCU::FinishCUDA();        tt = CLOCK();cout<<(tt-t)<<"\t";        t = CLOCK();}
52
#define OCTAVE_FINISH()                if(GlobalUtil::_timingO)cout<<"|\t"<<(CLOCK()-t0)<<endl;
53

    
54

    
55
PyramidCU::PyramidCU(SiftParam& sp) : SiftPyramid(sp)
56
{
57
        _allPyramid = NULL;
58
        _histoPyramidTex = NULL;
59
        _featureTex = NULL;
60
        _descriptorTex = NULL;
61
        _orientationTex = NULL;
62
        _bufferPBO = 0;
63
    _bufferTEX = NULL;
64
        _inputTex = new CuTexImage();
65

    
66
    /////////////////////////
67
    InitializeContext();
68
}
69

    
70
PyramidCU::~PyramidCU()
71
{
72
        DestroyPerLevelData();
73
        DestroySharedData();
74
        DestroyPyramidData();
75
        if(_inputTex) delete _inputTex;
76
    if(_bufferPBO) glDeleteBuffers(1, &_bufferPBO);
77
    if(_bufferTEX) delete _bufferTEX;
78
}
79

    
80
void PyramidCU::InitializeContext()
81
{
82
    GlobalUtil::InitGLParam(1);
83
    GlobalUtil::_GoodOpenGL = max(GlobalUtil::_GoodOpenGL, 1); 
84
}
85

    
86
void PyramidCU::InitPyramid(int w, int h, int ds)
87
{
88
        int wp, hp, toobig = 0;
89
        if(ds == 0)
90
        {
91
                //
92
                TruncateWidth(w);
93
                ////
94
                _down_sample_factor = 0;
95
                if(GlobalUtil::_octave_min_default>=0)
96
                {
97
                        wp = w >> _octave_min_default;
98
                        hp = h >> _octave_min_default;
99
                }else
100
                {
101
                        //can't upsample by more than 8
102
                        _octave_min_default = max(-3, _octave_min_default);
103
                        //
104
                        wp = w << (-_octave_min_default);
105
                        hp = h << (-_octave_min_default);
106
                }
107
                _octave_min = _octave_min_default;
108
        }else
109
        {
110
                //must use 0 as _octave_min; 
111
                _octave_min = 0;
112
                _down_sample_factor = ds;
113
                w >>= ds;
114
                h >>= ds;
115
                /////
116

    
117
                TruncateWidth(w);
118

    
119
                wp = w;
120
                hp = h; 
121

    
122
        }
123

    
124
        while(wp > GlobalUtil::_texMaxDim  || hp > GlobalUtil::_texMaxDim )
125
        {
126
                _octave_min ++;
127
                wp >>= 1;
128
                hp >>= 1;
129
                toobig = 1;
130
        }
131
        if(toobig && GlobalUtil::_verbose && _octave_min > 0)
132
        {
133
                std::cout<< "**************************************************************\n"
134
                                        "Image larger than allowed dimension, data will be downsampled!\n"
135
                                        "use -maxd to change the settings\n"
136
                                        "***************************************************************\n";
137
        }
138
        //ResizePyramid(wp, hp);
139
        if( wp == _pyramid_width && hp == _pyramid_height && _allocated )
140
        {
141
                FitPyramid(wp, hp);
142
        }else if(GlobalUtil::_ForceTightPyramid || _allocated ==0)
143
        {
144
                ResizePyramid(wp, hp);
145
        }
146
        else if( wp > _pyramid_width || hp > _pyramid_height )
147
        {
148
                ResizePyramid(max(wp, _pyramid_width), max(hp, _pyramid_height));
149
                if(wp < _pyramid_width || hp < _pyramid_height)  FitPyramid(wp, hp);
150
        }
151
        else
152
        {
153
                //try use the pyramid allocated for large image on small input images
154
                FitPyramid(wp, hp);
155
        }
156
}
157

    
158
void PyramidCU::ResizePyramid(int w, int h)
159
{
160
        //
161
        unsigned int totalkb = 0;
162
        int _octave_num_new, input_sz, i, j;
163
        //
164

    
165
        if(_pyramid_width == w && _pyramid_height == h && _allocated) return;
166

    
167
        if(w > GlobalUtil::_texMaxDim || h > GlobalUtil::_texMaxDim) return ;
168

    
169
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<w<<"x"<<h<<endl;
170
        //first octave does not change
171
        _pyramid_octave_first = 0;
172

    
173
        
174
        //compute # of octaves
175

    
176
        input_sz = min(w,h) ;
177
        _pyramid_width =  w;
178
        _pyramid_height =  h;
179

    
180

    
181

    
182
        //reset to preset parameters
183

    
184
        _octave_num_new  = GlobalUtil::_octave_num_default;
185

    
186
        if(_octave_num_new < 1) 
187
        {
188
                _octave_num_new = (int) floor (log ( double(input_sz))/log(2.0)) -3 ;
189
                if(_octave_num_new<1 ) _octave_num_new = 1;
190
        }
191

    
192
        if(_pyramid_octave_num != _octave_num_new)
193
        {
194
                //destroy the original pyramid if the # of octave changes
195
                if(_octave_num >0) 
196
                {
197
                        DestroyPerLevelData();
198
                        DestroyPyramidData();
199
                }
200
                _pyramid_octave_num = _octave_num_new;
201
        }
202

    
203
        _octave_num = _pyramid_octave_num;
204

    
205
        int noct = _octave_num;
206
        int nlev = param._level_num;
207

    
208
        //        //initialize the pyramid
209
        if(_allPyramid==NULL)        _allPyramid = new CuTexImage[ noct* nlev * DATA_NUM];
210

    
211
        CuTexImage * gus =  GetBaseLevel(_octave_min, DATA_GAUSSIAN);
212
        CuTexImage * dog =  GetBaseLevel(_octave_min, DATA_DOG);
213
        CuTexImage * got =  GetBaseLevel(_octave_min, DATA_GRAD);
214
        CuTexImage * key =  GetBaseLevel(_octave_min, DATA_KEYPOINT);
215

    
216
        ////////////there could be "out of memory" happening during the allocation
217

    
218
        for(i = 0; i< noct; i++)
219
        {
220
                int wa = ((w + 3) / 4) * 4;
221

    
222
                totalkb += ((nlev *8 -19)* (wa * h) * 4 / 1024);
223
                for( j = 0; j< nlev; j++, gus++, dog++, got++, key++)
224
                {
225
                        gus->InitTexture(wa, h); //nlev
226
                        if(j==0)continue;
227
                        dog->InitTexture(wa, h);  //nlev -1
228
                        if(        j >= 1 && j < 1 + param._dog_level_num)
229
                        {
230
                                got->InitTexture(wa, h, 2); //2 * nlev - 6
231
                                got->InitTexture2D();
232
                        }
233
                        if(j > 1 && j < nlev -1)        key->InitTexture(wa, h, 4); // nlev -3 ; 4 * nlev - 12
234
                }
235
                w>>=1;
236
                h>>=1;
237
        }
238

    
239
        totalkb += ResizeFeatureStorage();
240

    
241
        if(ProgramCU::CheckErrorCUDA("ResizePyramid")) SetFailStatus(); 
242

    
243
        _allocated = 1;
244

    
245
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<(totalkb/1024)<<"MB\n";
246

    
247
}
248

    
249
void PyramidCU::FitPyramid(int w, int h)
250
{
251
        _pyramid_octave_first = 0;
252
        //
253
        _octave_num  = GlobalUtil::_octave_num_default;
254

    
255
        int _octave_num_max = max(1, (int) floor (log ( double(min(w, h)))/log(2.0))  -3 );
256

    
257
        if(_octave_num < 1 || _octave_num > _octave_num_max) 
258
        {
259
                _octave_num = _octave_num_max;
260
        }
261

    
262

    
263
        int pw = _pyramid_width>>1, ph = _pyramid_height>>1;
264
        while(_pyramid_octave_first + _octave_num < _pyramid_octave_num &&  
265
                pw >= w && ph >= h)
266
        {
267
                _pyramid_octave_first++;
268
                pw >>= 1;
269
                ph >>= 1;
270
        }
271

    
272
        //////////////////
273
        int nlev = param._level_num;
274
        CuTexImage * gus =  GetBaseLevel(_octave_min, DATA_GAUSSIAN);
275
        CuTexImage * dog =  GetBaseLevel(_octave_min, DATA_DOG);
276
        CuTexImage * got =  GetBaseLevel(_octave_min, DATA_GRAD);
277
        CuTexImage * key =  GetBaseLevel(_octave_min, DATA_KEYPOINT);
278
        for(int i = 0; i< _octave_num; i++)
279
        {
280
                int wa = ((w + 3) / 4) * 4;
281

    
282
                for(int j = 0; j< nlev; j++, gus++, dog++, got++, key++)
283
                {
284
                        gus->InitTexture(wa, h); //nlev
285
                        if(j==0)continue;
286
                        dog->InitTexture(wa, h);  //nlev -1
287
                        if(        j >= 1 && j < 1 + param._dog_level_num)
288
                        {
289
                                got->InitTexture(wa, h, 2); //2 * nlev - 6
290
                                got->InitTexture2D();
291
                        }
292
                        if(j > 1 && j < nlev -1)        key->InitTexture(wa, h, 4); // nlev -3 ; 4 * nlev - 12
293
                }
294
                w>>=1;
295
                h>>=1;
296
        }
297
}
298

    
299
int PyramidCU::CheckCudaDevice(int device)
300
{
301
    return ProgramCU::CheckCudaDevice(device);
302
}
303

    
304
void PyramidCU::SetLevelFeatureNum(int idx, int fcount)
305
{
306
        _featureTex[idx].InitTexture(fcount, 1, 4);
307
        _levelFeatureNum[idx] = fcount;
308
}
309

    
310
int PyramidCU::ResizeFeatureStorage()
311
{
312
        int totalkb = 0;
313
        if(_levelFeatureNum==NULL)        _levelFeatureNum = new int[_octave_num * param._dog_level_num];
314
        std::fill(_levelFeatureNum, _levelFeatureNum+_octave_num * param._dog_level_num, 0); 
315

    
316
        int wmax = GetBaseLevel(_octave_min)->GetImgWidth();
317
        int hmax = GetBaseLevel(_octave_min)->GetImgHeight();
318
        int whmax = max(wmax, hmax);
319
        int w,  i;
320

    
321
        //
322
        int num = (int)ceil(log(double(whmax))/log(4.0));
323

    
324
        if( _hpLevelNum != num)
325
        {
326
                _hpLevelNum = num;
327
                if(_histoPyramidTex ) delete [] _histoPyramidTex;
328
                _histoPyramidTex = new CuTexImage[_hpLevelNum];
329
        }
330

    
331
        for(i = 0, w = 1; i < _hpLevelNum; i++)
332
        {
333
                _histoPyramidTex[i].InitTexture(w, whmax, 4);
334
                w<<=2;
335
        }
336

    
337
        // (4 ^ (_hpLevelNum) -1 / 3) pixels
338
        totalkb += (((1 << (2 * _hpLevelNum)) -1) / 3 * 16 / 1024);
339

    
340
        //initialize the feature texture
341
        int idx = 0, n = _octave_num * param._dog_level_num;
342
        if(_featureTex==NULL)        _featureTex = new CuTexImage[n];
343
        if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2==0 && _orientationTex== NULL)
344
                _orientationTex = new CuTexImage[n];
345

    
346

    
347
        for(i = 0; i < _octave_num; i++)
348
        {
349
                CuTexImage * tex = GetBaseLevel(i+_octave_min);
350
                int fmax = int(tex->GetImgWidth() * tex->GetImgHeight()*GlobalUtil::_MaxFeaturePercent);
351
                //
352
                if(fmax > GlobalUtil::_MaxLevelFeatureNum) fmax = GlobalUtil::_MaxLevelFeatureNum;
353
                else if(fmax < 32) fmax = 32;        //give it at least a space of 32 feature
354

    
355
                for(int j = 0; j < param._dog_level_num; j++, idx++)
356
                {
357
                        _featureTex[idx].InitTexture(fmax, 1, 4);
358
                        totalkb += fmax * 16 /1024;
359
                        //
360
                        if(GlobalUtil::_MaxOrientation>1 && GlobalUtil::_OrientationPack2 == 0)
361
                        {
362
                                _orientationTex[idx].InitTexture(fmax, 1, 4);
363
                                totalkb += fmax * 16 /1024;
364
                        }
365
                }
366
        }
367

    
368

    
369
        //this just need be initialized once
370
        if(_descriptorTex==NULL)
371
        {
372
                //initialize feature texture pyramid
373
                int fmax = _featureTex->GetImgWidth();
374
                _descriptorTex = new CuTexImage;
375
                totalkb += ( fmax /2);
376
                _descriptorTex->InitTexture(fmax *128, 1, 1);
377
        }else
378
        {
379
                totalkb +=  _descriptorTex->GetDataSize()/1024;
380
        }
381
        return totalkb;
382
}
383

    
384
void PyramidCU::GetFeatureDescriptors() 
385
{
386
        //descriptors...
387
        float* pd =  &_descriptor_buffer[0];
388
        vector<float> descriptor_buffer2;
389

    
390
        //use another buffer if we need to re-order the descriptors
391
        if(_keypoint_index.size() > 0)
392
        {
393
                descriptor_buffer2.resize(_descriptor_buffer.size());
394
                pd = &descriptor_buffer2[0];
395
        }
396

    
397
        CuTexImage * got, * ftex= _featureTex;
398
        for(int i = 0, idx = 0; i < _octave_num; i++)
399
        {
400
                got = GetBaseLevel(i + _octave_min, DATA_GRAD) + 1;
401
                for(int j = 0; j < param._dog_level_num; j++, ftex++, idx++, got++)
402
                {
403
                        if(_levelFeatureNum[idx]==0) continue;
404
            ProgramCU::ComputeDescriptor(ftex, got, _descriptorTex, IsUsingRectDescription());//process
405
                        _descriptorTex->CopyToHost(pd); //readback descriptor
406
                        pd += 128*_levelFeatureNum[idx];
407
                }
408
        }
409

    
410
        if(GlobalUtil::_timingS) ProgramCU::FinishCUDA();
411

    
412
        if(_keypoint_index.size() > 0)
413
        {
414
            //put the descriptor back to the original order for keypoint list.
415
                for(int i = 0; i < _featureNum; ++i)
416
                {
417
                        int index = _keypoint_index[i];
418
                        memcpy(&_descriptor_buffer[index*128], &descriptor_buffer2[i*128], 128 * sizeof(float));
419
                }
420
        }
421

    
422
        if(ProgramCU::CheckErrorCUDA("PyramidCU::GetFeatureDescriptors")) SetFailStatus(); 
423
}
424

    
425
void PyramidCU::GenerateFeatureListTex() 
426
{
427

    
428
        vector<float> list;
429
        int idx = 0;
430
        const double twopi = 2.0*3.14159265358979323846;
431
        float sigma_half_step = powf(2.0f, 0.5f / param._dog_level_num);
432
        float octave_sigma = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
433
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f; 
434
        if(_down_sample_factor>0) octave_sigma *= float(1<<_down_sample_factor); 
435

    
436
        _keypoint_index.resize(0); // should already be 0
437
        for(int i = 0; i < _octave_num; i++, octave_sigma*= 2.0f)
438
        {
439
                for(int j = 0; j < param._dog_level_num; j++, idx++)
440
                {
441
                        list.resize(0);
442
                        float level_sigma = param.GetLevelSigma(j + param._level_min + 1) * octave_sigma;
443
                        float sigma_min = level_sigma / sigma_half_step;
444
                        float sigma_max = level_sigma * sigma_half_step;
445
                        int fcount = 0 ;
446
                        for(int k = 0; k < _featureNum; k++)
447
                        {
448
                                float * key = &_keypoint_buffer[k*4];
449
                float sigmak = key[2]; 
450
                //////////////////////////////////////
451
                if(IsUsingRectDescription()) sigmak = min(key[2], key[3]) / 12.0f; 
452

    
453
                                if(   (sigmak >= sigma_min && sigmak < sigma_max)
454
                                        ||(sigmak < sigma_min && i ==0 && j == 0)
455
                                        ||(sigmak > sigma_max && i == _octave_num -1 && j == param._dog_level_num - 1))
456
                                {
457
                                        //add this keypoint to the list
458
                                        list.push_back((key[0] - offset) / octave_sigma + 0.5f);
459
                                        list.push_back((key[1] - offset) / octave_sigma + 0.5f);
460
                    if(IsUsingRectDescription())
461
                    {
462
                        list.push_back(key[2] / octave_sigma);
463
                        list.push_back(key[3] / octave_sigma);
464
                    }else
465
                    {
466
                                            list.push_back(key[2] / octave_sigma);
467
                                            list.push_back((float)fmod(twopi-key[3], twopi));
468
                    }
469
                                        fcount ++;
470
                                        //save the index of keypoints
471
                                        _keypoint_index.push_back(k);
472
                                }
473

    
474
                        }
475

    
476
                        _levelFeatureNum[idx] = fcount;
477
                        if(fcount==0)continue;
478
                        CuTexImage * ftex = _featureTex+idx;
479

    
480
                        SetLevelFeatureNum(idx, fcount);
481
                        ftex->CopyFromHost(&list[0]);
482
                }
483
        }
484

    
485
        if(GlobalUtil::_verbose)
486
        {
487
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
488
        }
489

    
490
}
491

    
492
void PyramidCU::ReshapeFeatureListCPU() 
493
{
494
        int i, szmax =0, sz;
495
        int n = param._dog_level_num*_octave_num;
496
        for( i = 0; i < n; i++) 
497
        {
498
                sz = _levelFeatureNum[i];
499
                if(sz > szmax ) szmax = sz;
500
        }
501
        float * buffer = new float[szmax*16];
502
        float * buffer1 = buffer;
503
        float * buffer2 = buffer + szmax*4;
504

    
505

    
506

    
507
        _featureNum = 0;
508

    
509
#ifdef NO_DUPLICATE_DOWNLOAD
510
        const double twopi = 2.0*3.14159265358979323846;
511
        _keypoint_buffer.resize(0);
512
        float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
513
        if(_down_sample_factor>0) os *= float(1<<_down_sample_factor); 
514
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
515
#endif
516

    
517

    
518
        for(i = 0; i < n; i++)
519
        {
520
                if(_levelFeatureNum[i]==0)continue;
521

    
522
                _featureTex[i].CopyToHost(buffer1);
523
                
524
                int fcount =0;
525
                float * src = buffer1;
526
                float * des = buffer2;
527
                const static double factor  = 2.0*3.14159265358979323846/65535.0;
528
                for(int j = 0; j < _levelFeatureNum[i]; j++, src+=4)
529
                {
530
                        unsigned short * orientations = (unsigned short*) (&src[3]);
531
                        if(orientations[0] != 65535)
532
                        {
533
                                des[0] = src[0];
534
                                des[1] = src[1];
535
                                des[2] = src[2];
536
                                des[3] = float( factor* orientations[0]);
537
                                fcount++;
538
                                des += 4;
539
                                if(orientations[1] != 65535 && orientations[1] != orientations[0])
540
                                {
541
                                        des[0] = src[0];
542
                                        des[1] = src[1];
543
                                        des[2] = src[2];
544
                                        des[3] = float(factor* orientations[1]);        
545
                                        fcount++;
546
                                        des += 4;
547
                                }
548
                        }
549
                }
550
                //texture size
551
                SetLevelFeatureNum(i, fcount);
552
                _featureTex[i].CopyFromHost(buffer2);
553
                
554
                if(fcount == 0) continue;
555

    
556
#ifdef NO_DUPLICATE_DOWNLOAD
557
                float oss = os * (1 << (i / param._dog_level_num));
558
                _keypoint_buffer.resize((_featureNum + fcount) * 4);
559
                float* ds = &_keypoint_buffer[_featureNum * 4];
560
                float* fs = buffer2;
561
                for(int k = 0;  k < fcount; k++, ds+=4, fs+=4)
562
                {
563
                        ds[0] = oss*(fs[0]-0.5f) + offset;        //x
564
                        ds[1] = oss*(fs[1]-0.5f) + offset;        //y
565
                        ds[2] = oss*fs[2];  //scale
566
                        ds[3] = (float)fmod(twopi-fs[3], twopi);        //orientation, mirrored
567
                }
568
#endif
569
                _featureNum += fcount;
570
        }
571
        delete[] buffer;
572
        if(GlobalUtil::_verbose)
573
        {
574
                std::cout<<"#Features MO:\t"<<_featureNum<<endl;
575
        }
576
}
577

    
578
void PyramidCU::GenerateFeatureDisplayVBO() 
579
{
580
        //it is weried that this part is very slow.
581
        //use a big VBO to save all the SIFT box vertices
582
        int nvbo = _octave_num * param._dog_level_num;
583
        if(_featureDisplayVBO==NULL)
584
        {
585
                //initialize the vbos
586
                _featureDisplayVBO = new GLuint[nvbo];
587
                _featurePointVBO = new GLuint[nvbo];
588
                glGenBuffers(nvbo, _featureDisplayVBO);        
589
                glGenBuffers(nvbo, _featurePointVBO);
590
        }
591
        for(int i = 0; i < nvbo; i++)
592
        {
593
                if(_levelFeatureNum[i]<=0)continue;
594
                CuTexImage * ftex  = _featureTex + i;
595
                CuTexImage texPBO1( _levelFeatureNum[i]* 10, 1, 4, _featureDisplayVBO[i]);
596
                CuTexImage texPBO2(_levelFeatureNum[i], 1, 4, _featurePointVBO[i]);
597
                ProgramCU::DisplayKeyBox(ftex, &texPBO1);
598
                ProgramCU::DisplayKeyPoint(ftex, &texPBO2);        
599
        }
600
}
601

    
602
void PyramidCU::DestroySharedData() 
603
{
604
        //histogram reduction
605
        if(_histoPyramidTex)
606
        {
607
                delete[]        _histoPyramidTex;
608
                _hpLevelNum = 0;
609
                _histoPyramidTex = NULL;
610
        }
611
        //descriptor storage shared by all levels
612
        if(_descriptorTex)
613
        {
614
                delete _descriptorTex;
615
                _descriptorTex = NULL;
616
        }
617
        //cpu reduction buffer.
618
        if(_histo_buffer)
619
        {
620
                delete[] _histo_buffer;
621
                _histo_buffer = 0;
622
        }
623
}
624

    
625
void PyramidCU::DestroyPerLevelData() 
626
{
627
        //integers vector to store the feature numbers.
628
        if(_levelFeatureNum)
629
        {
630
                delete [] _levelFeatureNum;
631
                _levelFeatureNum = NULL;
632
        }
633
        //texture used to store features
634
        if(        _featureTex)
635
        {
636
                delete [] _featureTex;
637
                _featureTex =        NULL;
638
        }
639
        //texture used for multi-orientation 
640
        if(_orientationTex)
641
        {
642
                delete [] _orientationTex;
643
                _orientationTex = NULL;
644
        }
645
        int no = _octave_num* param._dog_level_num;
646

    
647
        //two sets of vbos used to display the features
648
        if(_featureDisplayVBO)
649
        {
650
                glDeleteBuffers(no, _featureDisplayVBO);
651
                delete [] _featureDisplayVBO;
652
                _featureDisplayVBO = NULL;
653
        }
654
        if( _featurePointVBO)
655
        {
656
                glDeleteBuffers(no, _featurePointVBO);
657
                delete [] _featurePointVBO;
658
                _featurePointVBO = NULL;
659
        }
660
}
661

    
662
void PyramidCU::DestroyPyramidData()
663
{
664
        if(_allPyramid)
665
        {
666
                delete [] _allPyramid;
667
                _allPyramid = NULL;
668
        }
669
}
670

    
671
void PyramidCU::DownloadKeypoints() 
672
{
673
        const double twopi = 2.0*3.14159265358979323846;
674
        int idx = 0;
675
        float * buffer = &_keypoint_buffer[0];
676
        vector<float> keypoint_buffer2;
677
        //use a different keypoint buffer when processing with an exisint features list
678
        //without orientation information. 
679
        if(_keypoint_index.size() > 0)
680
        {
681
                keypoint_buffer2.resize(_keypoint_buffer.size());
682
                buffer = &keypoint_buffer2[0];
683
        }
684
        float * p = buffer, *ps;
685
        CuTexImage * ftex = _featureTex;
686
        /////////////////////
687
        float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
688
        if(_down_sample_factor>0) os *= float(1<<_down_sample_factor); 
689
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
690
        /////////////////////
691
        for(int i = 0; i < _octave_num; i++, os *= 2.0f)
692
        {
693
        
694
                for(int j = 0; j  < param._dog_level_num; j++, idx++, ftex++)
695
                {
696

    
697
                        if(_levelFeatureNum[idx]>0)
698
                        {        
699
                                ftex->CopyToHost(ps = p);
700
                                for(int k = 0;  k < _levelFeatureNum[idx]; k++, ps+=4)
701
                                {
702
                                        ps[0] = os*(ps[0]-0.5f) + offset;        //x
703
                                        ps[1] = os*(ps[1]-0.5f) + offset;        //y
704
                                        ps[2] = os*ps[2]; 
705
                                        ps[3] = (float)fmod(twopi-ps[3], twopi);        //orientation, mirrored
706
                                }
707
                                p+= 4* _levelFeatureNum[idx];
708
                        }
709
                }
710
        }
711

    
712
        //put the feature into their original order for existing keypoint 
713
        if(_keypoint_index.size() > 0)
714
        {
715
                for(int i = 0; i < _featureNum; ++i)
716
                {
717
                        int index = _keypoint_index[i];
718
                        memcpy(&_keypoint_buffer[index*4], &keypoint_buffer2[i*4], 4 * sizeof(float));
719
                }
720
        }
721
}
722

    
723
void PyramidCU::GenerateFeatureListCPU()
724
{
725
        //no cpu version provided
726
        GenerateFeatureList();
727
}
728

    
729
void PyramidCU::GenerateFeatureList(int i, int j, int reduction_count, vector<int>& hbuffer)
730
{
731
    int fcount = 0, idx = i * param._dog_level_num  + j;
732
        int hist_level_num = _hpLevelNum - _pyramid_octave_first /2; 
733
        int ii, k, len; 
734

    
735
        CuTexImage * htex, * ftex, * tex, *got;
736
        ftex = _featureTex + idx;
737
        htex = _histoPyramidTex + hist_level_num -1;
738
        tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2 + j;
739
        got = GetBaseLevel(_octave_min + i, DATA_GRAD) + 2 + j;
740

    
741
        ProgramCU::InitHistogram(tex, htex);
742

    
743
        for(k = 0; k < reduction_count - 1; k++, htex--)
744
        {
745
                ProgramCU::ReduceHistogram(htex, htex -1);        
746
        }
747
        
748
        //htex has the row reduction result
749
        len = htex->GetImgHeight() * 4;
750
        hbuffer.resize(len);
751
        ProgramCU::FinishCUDA();
752
        htex->CopyToHost(&hbuffer[0]);
753
        
754
    ////TO DO: track the error found here..
755
        for(ii = 0; ii < len; ++ii)     {if(!(hbuffer[ii]>= 0)) hbuffer[ii] = 0; }//?
756
        
757
    
758
    for(ii = 0; ii < len; ++ii)                fcount += hbuffer[ii];
759
        SetLevelFeatureNum(idx, fcount);
760
        
761
    //build the feature list
762
        if(fcount > 0)
763
        {
764
                _featureNum += fcount;
765
                _keypoint_buffer.resize(fcount * 4);
766
                //vector<int> ikbuf(fcount*4);
767
                int* ibuf = (int*) (&_keypoint_buffer[0]);
768

    
769
                for(ii = 0; ii < len; ++ii)
770
                {
771
                        int x = ii%4, y = ii / 4;
772
                        for(int jj = 0 ; jj < hbuffer[ii]; ++jj, ibuf+=4)
773
                        {
774
                                ibuf[0] = x; ibuf[1] = y; ibuf[2] = jj; ibuf[3] = 0;
775
                        }
776
                }
777
                _featureTex[idx].CopyFromHost(&_keypoint_buffer[0]);
778
        
779
                ////////////////////////////////////////////
780
                ProgramCU::GenerateList(_featureTex + idx, ++htex);
781
                for(k = 2; k < reduction_count; k++)
782
                {
783
                        ProgramCU::GenerateList(_featureTex + idx, ++htex);
784
                }
785
        }
786
}
787

    
788
void PyramidCU::GenerateFeatureList()
789
{
790
        double t1, t2; 
791
        int ocount = 0, reduction_count;
792
    int reverse = (GlobalUtil::_TruncateMethod == 1);
793

    
794
        vector<int> hbuffer;
795
        _featureNum = 0;
796

    
797
        //for(int i = 0, idx = 0; i < _octave_num; i++)
798
    FOR_EACH_OCTAVE(i, reverse)
799
        {
800
        CuTexImage* tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2;
801
                reduction_count = FitHistogramPyramid(tex);
802

    
803
                if(GlobalUtil::_timingO)
804
                {
805
                        t1 = CLOCK(); 
806
                        ocount = 0;
807
                        std::cout<<"#"<<i+_octave_min + _down_sample_factor<<":\t";
808
                }
809
                //for(int j = 0; j < param._dog_level_num; j++, idx++)
810
        FOR_EACH_LEVEL(j, reverse)
811
                {
812
            if(GlobalUtil::_TruncateMethod && GlobalUtil::_FeatureCountThreshold > 0 && _featureNum > GlobalUtil::_FeatureCountThreshold) continue;
813

    
814
                GenerateFeatureList(i, j, reduction_count, hbuffer);
815

    
816
                        /////////////////////////////
817
                        if(GlobalUtil::_timingO)
818
                        {
819
                int idx = i * param._dog_level_num + j;
820
                                ocount += _levelFeatureNum[idx];
821
                                std::cout<< _levelFeatureNum[idx] <<"\t";
822
                        }
823
                }
824
                if(GlobalUtil::_timingO)
825
                {        
826
                        t2 = CLOCK(); 
827
                        std::cout << "| \t" << int(ocount) << " :\t(" << (t2 - t1) << ")\n";
828
                }
829
        }
830
        /////
831
        CopyGradientTex();
832
        /////
833
        if(GlobalUtil::_timingS)ProgramCU::FinishCUDA();
834

    
835
        if(GlobalUtil::_verbose)
836
        {
837
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
838
        }
839

    
840
        if(ProgramCU::CheckErrorCUDA("PyramidCU::GenerateFeatureList")) SetFailStatus();
841
}
842

    
843
GLTexImage* PyramidCU::GetLevelTexture(int octave, int level)
844
{
845
        return GetLevelTexture(octave, level, DATA_GAUSSIAN);
846
}
847

    
848
GLTexImage* PyramidCU::ConvertTexCU2GL(CuTexImage* tex, int dataName)
849
{
850
        
851
        GLenum format = GL_LUMINANCE;
852
        int convert_done = 1;
853
    if(_bufferPBO == 0) glGenBuffers(1, &_bufferPBO);
854
    if(_bufferTEX == NULL) _bufferTEX = new GLTexImage;
855
        switch(dataName)
856
        {
857
        case DATA_GAUSSIAN:
858
                {
859
                        convert_done = tex->CopyToPBO(_bufferPBO);
860
                        break;
861
                }
862
        case DATA_DOG:
863
                {
864
                        CuTexImage texPBO(tex->GetImgWidth(), tex->GetImgHeight(), 1, _bufferPBO);
865
                        if(texPBO._cuData == 0 || tex->_cuData == NULL) convert_done = 0;
866
                        else ProgramCU::DisplayConvertDOG(tex, &texPBO);
867
                        break;
868
                }
869
        case DATA_GRAD:
870
                {
871
                        CuTexImage texPBO(tex->GetImgWidth(), tex->GetImgHeight(), 1, _bufferPBO);
872
                        if(texPBO._cuData == 0 || tex->_cuData == NULL) convert_done = 0;
873
                        else ProgramCU::DisplayConvertGRD(tex, &texPBO);
874
                        break;
875
                }
876
        case DATA_KEYPOINT:
877
                {
878
                        CuTexImage * dog = tex - param._level_num * _pyramid_octave_num;
879
                        format = GL_RGBA;
880
                        CuTexImage texPBO(tex->GetImgWidth(), tex->GetImgHeight(), 4, _bufferPBO);
881
                        if(texPBO._cuData == 0 || tex->_cuData == NULL) convert_done = 0;
882
                        else ProgramCU::DisplayConvertKEY(tex, dog, &texPBO);
883
                        break;
884
                }
885
        default:
886
                        convert_done = 0;
887
                        break;
888
        }
889

    
890
        if(convert_done)
891
        {
892
                _bufferTEX->InitTexture(max(_bufferTEX->GetTexWidth(), tex->GetImgWidth()), max(_bufferTEX->GetTexHeight(), tex->GetImgHeight()));
893
                _bufferTEX->CopyFromPBO(_bufferPBO, tex->GetImgWidth(), tex->GetImgHeight(), format);
894
        }else
895
        {
896
                _bufferTEX->SetImageSize(0, 0);
897
        }
898

    
899
        return _bufferTEX;
900
}
901

    
902
GLTexImage* PyramidCU::GetLevelTexture(int octave, int level, int dataName) 
903
{
904
        CuTexImage* tex = GetBaseLevel(octave, dataName) + (level - param._level_min);
905
        //CuTexImage* gus = GetBaseLevel(octave, DATA_GAUSSIAN) + (level - param._level_min); 
906
        return ConvertTexCU2GL(tex, dataName);
907
}
908

    
909
void PyramidCU::ConvertInputToCU(GLTexInput* input)
910
{
911
        int ws = input->GetImgWidth(), hs = input->GetImgHeight();
912
        TruncateWidth(ws);
913
        //copy the input image to pixel buffer object
914
    if(input->_pixel_data)
915
    {
916
        _inputTex->InitTexture(ws, hs, 1);
917
        _inputTex->CopyFromHost(input->_pixel_data); 
918
    }else
919
    {
920
        if(_bufferPBO == 0) glGenBuffers(1, &_bufferPBO);
921
        if(input->_rgb_converted && input->CopyToPBO(_bufferPBO, ws, hs, GL_LUMINANCE))
922
        {
923
                    _inputTex->InitTexture(ws, hs, 1);
924
            _inputTex->CopyFromPBO(ws, hs, _bufferPBO); 
925
        }else if(input->CopyToPBO(_bufferPBO, ws, hs))
926
            {
927
                    CuTexImage texPBO(ws, hs, 4, _bufferPBO);
928
                    _inputTex->InitTexture(ws, hs, 1);
929
                    ProgramCU::ReduceToSingleChannel(_inputTex, &texPBO, !input->_rgb_converted);
930
            }else
931
            {
932
                    std::cerr<< "Unable To Convert Intput\n";
933
            }
934
    }
935
}
936

    
937
void PyramidCU::BuildPyramid(GLTexInput * input)
938
{
939

    
940
        USE_TIMING();
941

    
942
        int i, j;
943
        
944
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
945
        {
946

    
947
                float* filter_sigma = param._sigma;
948
                CuTexImage *tex = GetBaseLevel(i);
949
                CuTexImage *buf = GetBaseLevel(i, DATA_KEYPOINT) +2;
950
                j = param._level_min + 1;
951

    
952
                OCTAVE_START();
953

    
954
                if( i == _octave_min )
955
                {        
956
                        ConvertInputToCU(input);
957

    
958
                        if(i == 0)
959
                        {
960
                                ProgramCU::FilterImage(tex, _inputTex, buf, 
961
                    param.GetInitialSmoothSigma(_octave_min + _down_sample_factor));
962
                        }else
963
                        {
964
                                if(i < 0)        ProgramCU::SampleImageU(tex, _inputTex, -i);                        
965
                                else                ProgramCU::SampleImageD(tex, _inputTex, i);
966
                                ProgramCU::FilterImage(tex, tex, buf, 
967
                    param.GetInitialSmoothSigma(_octave_min + _down_sample_factor));
968
                        }
969
                }else
970
                {
971
                        ProgramCU::SampleImageD(tex, GetBaseLevel(i - 1) + param._level_ds - param._level_min); 
972
                        if(param._sigma_skip1 > 0)
973
                        {
974
                                ProgramCU::FilterImage(tex, tex, buf, param._sigma_skip1);
975
                        }
976
                }
977
                LEVEL_FINISH();
978
                for( ; j <=  param._level_max ; j++, tex++, filter_sigma++)
979
                {
980
                        // filtering
981
                        ProgramCU::FilterImage(tex + 1, tex, buf, *filter_sigma);
982
                        LEVEL_FINISH();
983
                }
984
                OCTAVE_FINISH();
985
        }
986
        if(GlobalUtil::_timingS) ProgramCU::FinishCUDA();
987

    
988
        if(ProgramCU::CheckErrorCUDA("PyramidCU::BuildPyramid")) SetFailStatus();
989
}
990

    
991
void PyramidCU::DetectKeypointsEX()
992
{
993

    
994

    
995
        int i, j;
996
        double t0, t, ts, t1, t2;
997

    
998
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
999

    
1000
        for(i = _octave_min; i < _octave_min + _octave_num; i++)
1001
        {
1002
                CuTexImage * gus = GetBaseLevel(i) + 1;
1003
                CuTexImage * dog = GetBaseLevel(i, DATA_DOG) + 1;
1004
                CuTexImage * got = GetBaseLevel(i, DATA_GRAD) + 1;
1005
                //compute the gradient
1006
                for(j = param._level_min +1; j <=  param._level_max ; j++, gus++, dog++, got++)
1007
                {
1008
                        //input: gus and gus -1
1009
                        //output: gradient, dog, orientation
1010
                        ProgramCU::ComputeDOG(gus, dog, got);
1011
                }
1012
        }
1013
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)
1014
        {
1015
                ProgramCU::FinishCUDA();
1016
                t1 = CLOCK();
1017
        }
1018

    
1019
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
1020
        {
1021
                if(GlobalUtil::_timingO)
1022
                {
1023
                        t0 = CLOCK();
1024
                        std::cout<<"#"<<(i + _down_sample_factor)<<"\t";
1025
                }
1026
                CuTexImage * dog = GetBaseLevel(i, DATA_DOG) + 2;
1027
                CuTexImage * key = GetBaseLevel(i, DATA_KEYPOINT) +2;
1028

    
1029

    
1030
                for( j = param._level_min +2; j <  param._level_max ; j++, dog++, key++)
1031
                {
1032
                        if(GlobalUtil::_timingL)t = CLOCK();
1033
                        //input, dog, dog + 1, dog -1
1034
                        //output, key
1035
                        ProgramCU::ComputeKEY(dog, key, param._dog_threshold, param._edge_threshold);
1036
                        if(GlobalUtil::_timingL)
1037
                        {
1038
                                std::cout<<(CLOCK()-t)<<"\t";
1039
                        }
1040
                }
1041
                if(GlobalUtil::_timingO)
1042
                {
1043
                        std::cout<<"|\t"<<(CLOCK()-t0)<<"\n";
1044
                }
1045
        }
1046

    
1047
        if(GlobalUtil::_timingS)
1048
        {
1049
                ProgramCU::FinishCUDA();
1050
                if(GlobalUtil::_verbose) 
1051
                {        
1052
                        t2 = CLOCK();
1053
                        std::cout        <<"<Gradient, DOG  >\t"<<(t1-ts)<<"\n"
1054
                                                <<"<Get Keypoints  >\t"<<(t2-t1)<<"\n";
1055
                }                                
1056
        }
1057
}
1058

    
1059
void PyramidCU::CopyGradientTex()
1060
{
1061
        double ts, t1;
1062

    
1063
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
1064

    
1065
        for(int i = 0, idx = 0; i < _octave_num; i++)
1066
        {
1067
                CuTexImage * got = GetBaseLevel(i + _octave_min, DATA_GRAD) +  1;
1068
                //compute the gradient
1069
                for(int j = 0; j <  param._dog_level_num ; j++, got++, idx++)
1070
                {
1071
                        if(_levelFeatureNum[idx] > 0)        got->CopyToTexture2D();
1072
                }
1073
        }
1074
        if(GlobalUtil::_timingS)
1075
        {
1076
                ProgramCU::FinishCUDA();
1077
                if(GlobalUtil::_verbose)
1078
                {
1079
                        t1 = CLOCK();
1080
                        std::cout        <<"<Copy Grad/Orientation>\t"<<(t1-ts)<<"\n";
1081
                }
1082
        }
1083
}
1084

    
1085
void PyramidCU::ComputeGradient() 
1086
{
1087

    
1088
        int i, j;
1089
        double ts, t1;
1090

    
1091
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
1092

    
1093
        for(i = _octave_min; i < _octave_min + _octave_num; i++)
1094
        {
1095
                CuTexImage * gus = GetBaseLevel(i) +  1;
1096
                CuTexImage * dog = GetBaseLevel(i, DATA_DOG) +  1;
1097
                CuTexImage * got = GetBaseLevel(i, DATA_GRAD) +  1;
1098

    
1099
                //compute the gradient
1100
                for(j = 0; j <  param._dog_level_num ; j++, gus++, dog++, got++)
1101
                {
1102
                        ProgramCU::ComputeDOG(gus, dog, got);
1103
                }
1104
        }
1105
        if(GlobalUtil::_timingS)
1106
        {
1107
                ProgramCU::FinishCUDA();
1108
                if(GlobalUtil::_verbose)
1109
                {
1110
                        t1 = CLOCK();
1111
                        std::cout        <<"<Gradient, DOG  >\t"<<(t1-ts)<<"\n";
1112
                }
1113
        }
1114
}
1115

    
1116
int PyramidCU::FitHistogramPyramid(CuTexImage* tex)
1117
{
1118
        CuTexImage *htex;
1119
        int hist_level_num = _hpLevelNum - _pyramid_octave_first / 2; 
1120
        htex = _histoPyramidTex + hist_level_num - 1;
1121
        int w = (tex->GetImgWidth() + 2) >> 2;
1122
        int h = tex->GetImgHeight();
1123
        int count = 0; 
1124
        for(int k = 0; k < hist_level_num; k++, htex--)
1125
        {
1126
                //htex->SetImageSize(w, h);        
1127
                htex->InitTexture(w, h, 4); 
1128
                ++count;
1129
                if(w == 1)
1130
            break;
1131
                w = (w + 3)>>2; 
1132
        }
1133
        return count;
1134
}
1135

    
1136
void PyramidCU::GetFeatureOrientations() 
1137
{
1138

    
1139
        CuTexImage * ftex = _featureTex;
1140
        int * count         = _levelFeatureNum;
1141
        float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
1142

    
1143
        for(int i = 0; i < _octave_num; i++)
1144
        {
1145
                CuTexImage* got = GetBaseLevel(i + _octave_min, DATA_GRAD) + 1;
1146
                CuTexImage* key = GetBaseLevel(i + _octave_min, DATA_KEYPOINT) + 2;
1147

    
1148
                for(int j = 0; j < param._dog_level_num; j++, ftex++, count++, got++, key++)
1149
                {
1150
                        if(*count<=0)continue;
1151

    
1152
                        //if(ftex->GetImgWidth() < *count) ftex->InitTexture(*count, 1, 4);
1153

    
1154
                        sigma = param.GetLevelSigma(j+param._level_min+1);
1155

    
1156
                        ProgramCU::ComputeOrientation(ftex, got, key, sigma, sigma_step, _existing_keypoints);                
1157
                }
1158
        }
1159

    
1160
        if(GlobalUtil::_timingS)ProgramCU::FinishCUDA();
1161
        if(ProgramCU::CheckErrorCUDA("PyramidCU::GetFeatureOrientations")) SetFailStatus();
1162

    
1163
}
1164

    
1165
void PyramidCU::GetSimplifiedOrientation() 
1166
{
1167
        //no simplified orientation
1168
        GetFeatureOrientations();
1169
}
1170

    
1171
CuTexImage* PyramidCU::GetBaseLevel(int octave, int dataName)
1172
{
1173
        if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
1174
        int offset = (_pyramid_octave_first + octave - _octave_min) * param._level_num;
1175
        int num = param._level_num * _pyramid_octave_num;
1176
        if (dataName == DATA_ROT) dataName = DATA_GRAD;
1177
        return _allPyramid + num * dataName + offset;
1178
}
1179

    
1180
#endif
1181