Statistics
| Branch: | Revision:

root / rgbdslam / external / siftgpu / src / SiftGPU / PyramidGL.cpp @ 9240aaa3

History | View | Annotate | Download (68.3 KB)

1
////////////////////////////////////////////////////////////////////////////
2
//        File:                PyramidGL.cpp
3
//        Author:                Changchang Wu
4
//        Description : implementation of PyramidGL/PyramidNaive/PyramidPackdc .
5
//
6
//
7
//        Copyright (c) 2007 University of North Carolina at Chapel Hill
8
//        All Rights Reserved
9
//
10
//        Permission to use, copy, modify and distribute this software and its
11
//        documentation for educational, research and non-profit purposes, without
12
//        fee, and without a written agreement is hereby granted, provided that the
13
//        above copyright notice and the following paragraph appear in all copies.
14
//        
15
//        The University of North Carolina at Chapel Hill make no representations
16
//        about the suitability of this software for any purpose. It is provided
17
//        'as is' without express or implied warranty. 
18
//
19
//        Please send BUG REPORTS to ccwu@cs.unc.edu
20
//
21
////////////////////////////////////////////////////////////////////////////
22

    
23
#include "GL/glew.h"
24
#include <iostream>
25
#include <iomanip>
26
#include <vector>
27
#include <algorithm>
28
#include <fstream>
29
#include <math.h>
30
#include <string.h>
31
using namespace std;
32

    
33
#include "GlobalUtil.h"
34
#include "GLTexImage.h"
35
#include "SiftGPU.h"
36
#include "ShaderMan.h"
37
#include "SiftPyramid.h"
38
#include "ProgramGLSL.h"
39
#include "PyramidGL.h"
40
#include "FrameBufferObject.h"
41

    
42

    
43
#if defined(__SSE__) || _MSC_VER > 1200
44
#define USE_SSE_FOR_SIFTGPU
45
#include <xmmintrin.h>
46
#else
47
//#pragma message( "SSE optimization off!\n" )
48
#endif
49

    
50

    
51
#define USE_TIMING()                double t, t0, tt;
52
#define OCTAVE_START()                if(GlobalUtil::_timingO){        t = t0 = CLOCK();        cout<<"#"<<i+_down_sample_factor<<"\t";        }
53
#define LEVEL_FINISH()                if(GlobalUtil::_timingL){        glFinish();        tt = CLOCK();cout<<(tt-t)<<"\t";        t = CLOCK();}
54
#define OCTAVE_FINISH()                if(GlobalUtil::_timingO)cout<<"|\t"<<(CLOCK()-t0)<<endl;
55

    
56

    
57
//////////////////////////////////////////////////////////////////////
58
// Construction/Destruction
59
//////////////////////////////////////////////////////////////////////
60
PyramidNaive::PyramidNaive(SiftParam& sp): PyramidGL(sp)
61
{
62
        _texPyramid = NULL;
63
        _auxPyramid = NULL;
64
}
65

    
66
PyramidNaive::~PyramidNaive()
67
{
68
        DestroyPyramidData();
69
}
70

    
71
//align must be 2^i
72
void PyramidGL::        GetAlignedStorageSize(int num, int align,  int &fw, int &fh)
73
{
74
        if(num <=0)
75
        {
76
                fw = fh = 0;
77
        }else if(num < align*align)
78
        {
79
                fw = align;
80
                fh = (int)ceil(double(num) / fw);
81
        }else if(GlobalUtil::_NarrowFeatureTex)
82
        {
83
                double dn = double(num);
84
                int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/align);        
85
                fw = align * nb;        
86
                fh = (int)ceil(dn /fw);/**/
87
        }else
88
        {
89
                double dn = double(num);
90
                int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/align);
91
                fh = align * nb;
92
                if(nb <=1)
93
                {
94
                        fw = (int)ceil(dn / fh);
95
                        //align this dimension to blocksize
96
                        fw = ((int) ceil(double(fw) /align))*align;
97
                }else
98
                {
99
                        fw = GlobalUtil::_texMaxDim;
100
                }
101

    
102
        }
103

    
104

    
105
}
106

    
107
void PyramidGL::GetTextureStorageSize(int num, int &fw, int& fh)
108
{
109
        if(num <=0)
110
        {
111
                fw = fh = 0;
112
        }else if(num <= GlobalUtil::_FeatureTexBlock)
113
        {
114
                fw = num;
115
                fh = 1;
116
        }else if(GlobalUtil::_NarrowFeatureTex)
117
        {
118
                double dn = double(num);
119
                int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/GlobalUtil::_FeatureTexBlock);        
120
                fw = GlobalUtil::_FeatureTexBlock * nb;        
121
                fh = (int)ceil(dn /fw);/**/
122
        }else
123
        {
124
                double dn = double(num);
125
                int nb = (int) ceil(dn/GlobalUtil::_texMaxDim/GlobalUtil::_FeatureTexBlock);
126
                fh = GlobalUtil::_FeatureTexBlock * nb;
127
                if(nb <=1)
128
                {
129
                        fw = (int)ceil(dn / fh);
130

    
131
                        //align this dimension to blocksize
132

    
133
                        //
134
                        if( fw < fh)
135
                        {
136
                                int temp = fh;
137
                                fh = fw;
138
                                fw = temp;
139
                        }
140
                }else
141
                {
142
                        fw = GlobalUtil::_texMaxDim;
143
                }
144
        }
145
}
146

    
147
void PyramidNaive::DestroyPyramidData()
148
{
149
        if(_texPyramid)
150
        {
151
                delete [] _texPyramid;
152
                _texPyramid = NULL;
153
        }
154
        if(_auxPyramid)
155
        {
156
                delete [] _auxPyramid;  
157
                _auxPyramid = NULL;
158
        }
159
}
160
PyramidGL::PyramidGL(SiftParam &sp):SiftPyramid(sp)
161
{
162
        _featureTex = NULL;
163
        _orientationTex = NULL;
164
        _descriptorTex = NULL;
165
        _histoPyramidTex = NULL;        
166
    //////////////////////////
167

    
168
    InitializeContext();
169
}
170

    
171
PyramidGL::~PyramidGL()
172
{
173
        DestroyPerLevelData();
174
        DestroySharedData();
175
        ShaderMan::DestroyShaders();
176
}
177

    
178
void PyramidGL::InitializeContext()
179
{
180
    GlobalUtil::InitGLParam(0);
181
    if(!GlobalUtil::_GoodOpenGL) return;
182

    
183
    //////////////////////////////////////////////
184
        ShaderMan::InitShaderMan(param);
185
}
186

    
187
void PyramidGL::DestroyPerLevelData()
188
{
189
        //integers vector to store the feature numbers.
190
        if(_levelFeatureNum)
191
        {
192
                delete [] _levelFeatureNum;
193
                _levelFeatureNum = NULL;
194
        }
195
        //texture used to store features
196
        if(        _featureTex)
197
        {
198
                delete [] _featureTex;
199
                _featureTex =        NULL;
200
        }
201
        //texture used for multi-orientation 
202
        if(_orientationTex)
203
        {
204
                delete [] _orientationTex;
205
                _orientationTex = NULL;
206
        }
207
        int no = _octave_num* param._dog_level_num;
208

    
209
        //two sets of vbos used to display the features
210
        if(_featureDisplayVBO)
211
        {
212
                glDeleteBuffers(no, _featureDisplayVBO);
213
                delete [] _featureDisplayVBO;
214
                _featureDisplayVBO = NULL;
215
        }
216
        if( _featurePointVBO)
217
        {
218
                glDeleteBuffers(no, _featurePointVBO);
219
                delete [] _featurePointVBO;
220
                _featurePointVBO = NULL;
221
        }
222

    
223
}
224

    
225
void PyramidGL::DestroySharedData()
226
{
227
        //histogram reduction
228
        if(_histoPyramidTex)
229
        {
230
                delete[]        _histoPyramidTex;
231
                _hpLevelNum = 0;
232
                _histoPyramidTex = NULL;
233
        }
234

    
235
        //descriptor storage shared by all levels
236
        if(_descriptorTex)
237
        {
238
                delete [] _descriptorTex;
239
                _descriptorTex = NULL;
240
        }
241
        //cpu reduction buffer.
242
        if(_histo_buffer)
243
        {
244
                delete[] _histo_buffer;
245
                _histo_buffer = 0;
246
        }
247
}
248

    
249
void PyramidNaive::FitHistogramPyramid()
250
{
251
        GLTexImage * tex, *htex;
252
        int hist_level_num = _hpLevelNum - _pyramid_octave_first; 
253

    
254
        tex = GetBaseLevel(_octave_min , DATA_KEYPOINT) + 2;
255
        htex = _histoPyramidTex + hist_level_num - 1;
256
        int w = tex->GetImgWidth() >> 1;
257
        int h = tex->GetImgHeight() >> 1;
258

    
259
        for(int k = 0; k <hist_level_num -1; k++, htex--)
260
        {
261
                if(htex->GetImgHeight()!= h || htex->GetImgWidth() != w)
262
                {        
263
                        htex->SetImageSize(w, h);
264
                        htex->ZeroHistoMargin();
265
                }
266

    
267
                w = (w + 1)>>1; h = (h + 1) >> 1;
268
        }
269
}
270

    
271
void PyramidNaive::FitPyramid(int w, int h)
272
{
273
        //(w, h) <= (_pyramid_width, _pyramid_height);
274

    
275
        _pyramid_octave_first = 0;
276
        //
277
        _octave_num  = GlobalUtil::_octave_num_default;
278

    
279
        int _octave_num_max = GetRequiredOctaveNum(min(w, h));
280

    
281
        if(_octave_num < 1 || _octave_num > _octave_num_max) 
282
        {
283
                _octave_num = _octave_num_max;
284
        }
285

    
286

    
287
        int pw = _pyramid_width>>1, ph = _pyramid_height>>1;
288
        while(_pyramid_octave_first + _octave_num < _pyramid_octave_num &&  
289
                pw >= w && ph >= h)
290
        {
291
                _pyramid_octave_first++;
292
                pw >>= 1;
293
                ph >>= 1;
294
        }
295

    
296
        for(int i = 0; i < _octave_num; i++)
297
        {
298
                GLTexImage * tex = GetBaseLevel(i + _octave_min);
299
                GLTexImage * aux = GetBaseLevel(i + _octave_min, DATA_KEYPOINT);
300
                for(int j = param._level_min; j <= param._level_max; j++, tex++, aux++)
301
                {
302
                        tex->SetImageSize(w, h);
303
                        aux->SetImageSize(w, h);
304
                }
305
                w>>=1;
306
                h>>=1;
307
        }
308
}
309
void PyramidNaive::InitPyramid(int w, int h, int ds)
310
{
311
        int wp, hp, toobig = 0;
312
        if(ds == 0)
313
        {
314
                _down_sample_factor = 0;
315
                if(GlobalUtil::_octave_min_default>=0)
316
                {
317
                        wp = w >> GlobalUtil::_octave_min_default;
318
                        hp = h >> GlobalUtil::_octave_min_default;
319
                }else 
320
                {
321
                        wp = w << (-GlobalUtil::_octave_min_default);
322
                        hp = h << (-GlobalUtil::_octave_min_default);
323
                }
324
                _octave_min = _octave_min_default;
325
        }else
326
        {
327
                //must use 0 as _octave_min; 
328
                _octave_min = 0;
329
                _down_sample_factor = ds;
330
                w >>= ds;
331
                h >>= ds;
332
                wp = w;
333
                hp = h; 
334

    
335
        }
336

    
337
        while(wp > GlobalUtil::_texMaxDim || hp > GlobalUtil::_texMaxDim)
338
        {
339
                _octave_min ++;
340
                wp >>= 1;
341
                hp >>= 1;
342
                toobig = 1;
343
        }
344

    
345
        if(toobig && GlobalUtil::_verbose && _octave_min > 0)
346
        {
347

    
348
                std::cout<< "**************************************************************\n"
349
                                        "Image larger than allowed dimension, data will be downsampled!\n"
350
                                        "use -maxd to change the settings\n"
351
                                        "***************************************************************\n";
352
        }
353

    
354
        if( wp == _pyramid_width && hp == _pyramid_height && _allocated )
355
        {
356
                FitPyramid(wp, hp);
357
        }else if(GlobalUtil::_ForceTightPyramid || _allocated ==0)
358
        {
359
                ResizePyramid(wp, hp);
360
        }
361
        else if( wp > _pyramid_width || hp > _pyramid_height )
362
        {
363
                ResizePyramid(max(wp, _pyramid_width), max(hp, _pyramid_height));
364
                if(wp < _pyramid_width || hp < _pyramid_height)  FitPyramid(wp, hp);
365
        }
366
        else
367
        {
368
                //try use the pyramid allocated for large image on small input images
369
                FitPyramid(wp, hp);
370
        }
371

    
372
        //select the initial smoothing filter according to the new _octave_min
373
        ShaderMan::SelectInitialSmoothingFilter(_octave_min + _down_sample_factor, param);
374
}
375

    
376
void PyramidNaive::ResizePyramid( int w,  int h)
377
{
378
        //
379
        unsigned int totalkb = 0;
380
        int _octave_num_new, input_sz;
381
        int i, j;
382
        GLTexImage * tex, *aux;
383
        //
384

    
385
        if(_pyramid_width == w && _pyramid_height == h && _allocated) return;
386

    
387
        if(w > GlobalUtil::_texMaxDim || h > GlobalUtil::_texMaxDim) return ;
388

    
389
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<w<<"x"<<h<<endl;
390
        //first octave does not change
391
        _pyramid_octave_first = 0;
392

    
393
        
394
        //compute # of octaves
395

    
396
        input_sz = min(w,h) ;
397

    
398

    
399
        _pyramid_width =  w;
400
        _pyramid_height =  h;
401

    
402
        //reset to preset parameters
403
        _octave_num_new  = GlobalUtil::_octave_num_default;
404

    
405
        if(_octave_num_new < 1) _octave_num_new = GetRequiredOctaveNum(input_sz)  ;
406

    
407
        if(_pyramid_octave_num != _octave_num_new)
408
        {
409
                //destroy the original pyramid if the # of octave changes
410
                if(_octave_num >0)
411
                {
412
                        DestroyPerLevelData();
413
                        DestroyPyramidData();
414
                }
415
                _pyramid_octave_num = _octave_num_new;
416
        }
417

    
418
        _octave_num = _pyramid_octave_num;
419

    
420
        int noct = _octave_num;
421
        int nlev = param._level_num;
422

    
423
        //        //initialize the pyramid
424
        if(_texPyramid==NULL)        _texPyramid = new GLTexImage[ noct* nlev ];
425
        if(_auxPyramid==NULL)        _auxPyramid = new GLTexImage[ noct* nlev ];
426

    
427

    
428
        tex = GetBaseLevel(_octave_min, DATA_GAUSSIAN);
429
        aux = GetBaseLevel(_octave_min, DATA_KEYPOINT);
430
        for(i = 0; i< noct; i++)
431
        {
432
                totalkb += (nlev * w * h * 16 / 1024);
433
                for( j = 0; j< nlev; j++, tex++)
434
                {
435
                        tex->InitTexture(w, h);
436
                        //tex->AttachToFBO(0);
437
                }
438
                //several auxilary textures are not actually required
439
                totalkb += ((nlev - 3) * w * h * 16 /1024);
440
                for( j = 0; j< nlev ; j++, aux++)
441
                {
442
                        if(j < 2) continue;
443
                        if(j >= nlev - 1) continue;
444
                        aux->InitTexture(w, h, 0);
445
                        //aux->AttachToFBO(0);
446
                }
447

    
448
                w>>=1;
449
                h>>=1;
450
        }
451

    
452
        totalkb += ResizeFeatureStorage();
453

    
454

    
455
        //
456
        _allocated = 1;
457

    
458
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<(totalkb/1024)<<"MB\n";
459

    
460
}
461

    
462

    
463
int PyramidGL::ResizeFeatureStorage()
464
{
465
        int totalkb = 0;
466
        if(_levelFeatureNum==NULL)        _levelFeatureNum = new int[_octave_num * param._dog_level_num];
467
        std::fill(_levelFeatureNum, _levelFeatureNum+_octave_num * param._dog_level_num, 0); 
468

    
469
        int wmax = GetBaseLevel(_octave_min)->GetDrawWidth();
470
        int hmax = GetBaseLevel(_octave_min)->GetDrawHeight();
471
        int w ,h, i;
472

    
473
        //use a fbo to initialize textures..
474
        FrameBufferObject fbo;
475
        
476
        //
477
        if(_histo_buffer == NULL) _histo_buffer = new float[1 << (2 + 2 * GlobalUtil::_ListGenSkipGPU)];
478
        //histogram for feature detection
479

    
480
        int num = (int)ceil(log(double(max(wmax, hmax)))/log(2.0));
481

    
482
        if( _hpLevelNum != num)
483
        {
484
                _hpLevelNum = num;
485
                if(GlobalUtil::_ListGenGPU)
486
                {
487
                        if(_histoPyramidTex ) delete [] _histoPyramidTex;
488
                        _histoPyramidTex = new GLTexImage[_hpLevelNum];
489
                        w = h = 1 ;
490
                        for(i = 0; i < _hpLevelNum; i++)
491
                        {
492
                                _histoPyramidTex[i].InitTexture(w, h, 0);
493
                                _histoPyramidTex[i].AttachToFBO(0);
494
                                w<<=1;
495
                                h<<=1;
496
                        }
497
                }
498
        }
499

    
500
        // (4 ^ (_hpLevelNum) -1 / 3) pixels
501
        if(GlobalUtil::_ListGenGPU) totalkb += (((1 << (2 * _hpLevelNum)) -1) / 3 * 16 / 1024);
502

    
503

    
504

    
505
        //initialize the feature texture
506

    
507
        int idx = 0, n = _octave_num * param._dog_level_num;
508
        if(_featureTex==NULL)        _featureTex = new GLTexImage[n];
509
        if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2==0)
510
        {
511
                if(_orientationTex== NULL)                _orientationTex = new GLTexImage[n];
512
        }
513

    
514

    
515
        for(i = 0; i < _octave_num; i++)
516
        {
517
                GLTexImage * tex = GetBaseLevel(i+_octave_min);
518
                int fmax = int(tex->GetImgWidth()*tex->GetImgHeight()*GlobalUtil::_MaxFeaturePercent);
519
                int fw, fh;
520
                //
521
                if(fmax > GlobalUtil::_MaxLevelFeatureNum) fmax = GlobalUtil::_MaxLevelFeatureNum;
522
                else if(fmax < 32) fmax = 32;        //give it at least a space of 32 feature
523

    
524
                GetTextureStorageSize(fmax, fw, fh);
525
                
526
                for(int j = 0; j < param._dog_level_num; j++, idx++)
527
                {
528

    
529
                        _featureTex[idx].InitTexture(fw, fh, 0);
530
                        _featureTex[idx].AttachToFBO(0);
531
                        //
532
                        if(_orientationTex)
533
                        {
534
                                _orientationTex[idx].InitTexture(fw, fh, 0);
535
                                _orientationTex[idx].AttachToFBO(0);
536
                        }
537
                }
538
                totalkb += fw * fh * 16 * param._dog_level_num * (_orientationTex? 2 : 1) /1024;
539
        }
540

    
541

    
542
        //this just need be initialized once
543
        if(_descriptorTex==NULL)
544
        {
545
                //initialize feature texture pyramid
546
                wmax = _featureTex->GetImgWidth();
547
                hmax = _featureTex->GetImgHeight();
548

    
549
                int nf, ns;
550
                if(GlobalUtil::_DescriptorPPT)
551
                {
552
                        //32*4 = 128. 
553
                        nf = 32 / GlobalUtil::_DescriptorPPT;        // how many textures we need
554
                        ns = max(4, GlobalUtil::_DescriptorPPT);                    // how many point in one texture for one descriptor
555
                }else
556
                {
557
                        //at least one, resue for visualization and other work
558
                        nf = 1; ns = 4;
559
                }
560
                //
561
                _alignment = ns;
562
                //
563
                _descriptorTex = new GLTexImage[nf];
564

    
565
                int fw, fh;
566
                GetAlignedStorageSize(hmax*wmax* max(ns, 10), _alignment, fw, fh);
567

    
568
                if(fh < hmax ) fh = hmax;
569
                if(fw < wmax ) fw = wmax;
570

    
571
                totalkb += ( fw * fh * nf * 16 /1024);
572
                for(i =0; i < nf; i++)
573
                {
574
                        _descriptorTex[i].InitTexture(fw, fh);
575
                }
576
        }else
577
        {
578
                int nf = GlobalUtil::_DescriptorPPT? 32 / GlobalUtil::_DescriptorPPT: 1;
579
                totalkb += nf * _descriptorTex[0].GetTexWidth() * _descriptorTex[0].GetTexHeight() * 16 /1024;
580
        }
581
        return totalkb;
582
}
583

    
584

    
585
void PyramidNaive::BuildPyramid(GLTexInput *input)
586
{
587
        USE_TIMING();
588
        GLTexPacked * tex;
589
        FilterProgram** filter;
590
        FrameBufferObject fbo;
591

    
592
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
593
        input->FitTexViewPort();
594

    
595
        for (int i = _octave_min; i < _octave_min + _octave_num; i++)
596
        {
597

    
598
                tex = (GLTexPacked*)GetBaseLevel(i);
599
                filter = ShaderMan::s_bag->f_gaussian_step;
600

    
601
                OCTAVE_START();
602

    
603
                if( i == _octave_min )
604
                {
605
                        if(i < 0)        TextureUpSample(tex, input, 1<<(-i)        );                        
606
                        else        TextureDownSample(tex, input, 1<<i);
607
                ShaderMan::FilterInitialImage(tex, NULL);
608
                }else
609
                {
610
                        TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds)); 
611
            ShaderMan::FilterSampledImage(tex, NULL); 
612
        }
613
                LEVEL_FINISH();
614

    
615
                for(int j = param._level_min + 1; j <=  param._level_max ; j++, tex++, filter++)
616
                {
617
                        // filtering
618
            ShaderMan::FilterImage(*filter, tex+1, tex, NULL);
619
                        LEVEL_FINISH();
620
                }
621
                OCTAVE_FINISH();
622

    
623
        }
624
        if(GlobalUtil::_timingS)        glFinish();
625
        UnloadProgram();
626
}
627

    
628

    
629

    
630

    
631

    
632

    
633
GLTexImage*  PyramidNaive::GetLevelTexture(int octave, int level, int dataName)
634
{
635
        if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
636
        switch(dataName)
637
        {
638
                case DATA_GAUSSIAN:
639
                case DATA_DOG:
640
                case DATA_GRAD:
641
                case DATA_ROT:
642
                        return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num + (level - param._level_min);
643
                case DATA_KEYPOINT:
644
                        return _auxPyramid + (_pyramid_octave_first + octave - _octave_min) * param._level_num + (level - param._level_min);
645
                default:
646
                        return NULL;
647
        }
648
}
649

    
650
GLTexImage*  PyramidNaive::GetLevelTexture(int octave, int level)
651
{
652
        return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num 
653
                + (level - param._level_min);
654
}
655

    
656
//in the packed implementation
657
// DATA_GAUSSIAN, DATA_DOG, DATA_GAD will be stored in different textures.
658
GLTexImage*  PyramidNaive::GetBaseLevel(int octave, int dataName)
659
{
660
        if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
661
        switch(dataName)
662
        {
663
                case DATA_GAUSSIAN:
664
                case DATA_DOG:
665
                case DATA_GRAD:
666
                case DATA_ROT:
667
                        return _texPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num;
668
                case DATA_KEYPOINT:
669
                        return _auxPyramid + (_pyramid_octave_first + octave - _octave_min) * param._level_num;
670
                default:
671
                        return NULL;
672
        }
673
}
674

    
675

    
676

    
677

    
678

    
679

    
680

    
681

    
682

    
683
void PyramidNaive::ComputeGradient()
684
{
685

    
686
        int i, j;
687
        double  ts, t1;
688
        GLTexImage * tex;
689
        FrameBufferObject fbo;
690

    
691

    
692
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
693
        
694
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
695

    
696
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
697
        {
698
                for( j = param._level_min + 1 ; j < param._level_max ; j++)
699
                {
700
                        tex = GetLevelTexture(i, j);
701
                        tex->FitTexViewPort();
702
                        tex->AttachToFBO(0);
703
                        tex->BindTex();
704
                        ShaderMan::UseShaderGradientPass();
705
                        tex->DrawQuadMT4();
706
                }
707
        }                
708

    
709
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)
710
        {
711
                glFinish();
712
                t1 = CLOCK();        
713
                std::cout<<"<Compute Gradient>\t"<<(t1-ts)<<"\n";
714
        }
715

    
716
        UnloadProgram();
717
        GLTexImage::UnbindMultiTex(3);
718
        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
719
}
720

    
721

    
722
//keypoint detection with subpixel localization
723
void PyramidNaive::DetectKeypointsEX()
724
{
725
        int i, j;
726
        double t0, t, ts, t1, t2;
727
        GLTexImage * tex, *aux;
728
        FrameBufferObject fbo;
729

    
730
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
731
        
732
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
733
        //extra gradient data required for visualization
734
        int gradient_only_levels[2] = {param._level_min +1, param._level_max};
735
        int n_gradient_only_level = GlobalUtil::_UseSiftGPUEX ? 2 : 1;
736
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
737
        {
738
                for( j =0; j < n_gradient_only_level ; j++)
739
                {
740
                        tex = GetLevelTexture(i, gradient_only_levels[j]);
741
                        tex->FitTexViewPort();
742
                        tex->AttachToFBO(0);
743
                        tex->BindTex();
744
                        ShaderMan::UseShaderGradientPass();
745
                        tex->DrawQuadMT4();
746
                }
747
        }                
748

    
749
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)
750
        {
751
                glFinish();
752
                t1 = CLOCK();
753
        }
754

    
755
        GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
756
        glDrawBuffers(2, buffers);
757
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
758
        {
759
                if(GlobalUtil::_timingO)
760
                {
761
                        t0 = CLOCK();
762
                        std::cout<<"#"<<(i + _down_sample_factor)<<"\t";
763
                }
764
                tex = GetBaseLevel(i) + 2;
765
                aux = GetBaseLevel(i, DATA_KEYPOINT) +2;
766
                aux->FitTexViewPort();
767

    
768
                for( j = param._level_min + 2; j <  param._level_max ; j++, aux++, tex++)
769
                {
770
                        if(GlobalUtil::_timingL)t = CLOCK();                
771
                        tex->AttachToFBO(0);
772
                        aux->AttachToFBO(1);
773
                        glActiveTexture(GL_TEXTURE0);
774
                        tex->BindTex();
775
                        glActiveTexture(GL_TEXTURE1);
776
                        (tex+1)->BindTex();
777
                        glActiveTexture(GL_TEXTURE2);
778
                        (tex-1)->BindTex();
779
                        ShaderMan::UseShaderKeypoint((tex+1)->GetTexID(), (tex-1)->GetTexID());
780
                        aux->DrawQuadMT8();
781
        
782
                        if(GlobalUtil::_timingL)
783
                        {
784
                                glFinish();
785
                                std::cout<<(CLOCK()-t)<<"\t";
786
                        }
787
                        tex->DetachFBO(0);
788
                        aux->DetachFBO(1);
789
                }
790
                if(GlobalUtil::_timingO)
791
                {
792
                        std::cout<<"|\t"<<(CLOCK()-t0)<<"\n";
793
                }
794
        }
795

    
796
        if(GlobalUtil::_timingS)
797
        {
798
                glFinish();
799
                t2 = CLOCK();
800
                if(GlobalUtil::_verbose) 
801
                        std::cout        <<"<Get Keypoints ..  >\t"<<(t2-t1)<<"\n"
802
                                                <<"<Extra Gradient..  >\t"<<(t1-ts)<<"\n";
803
        }
804
        UnloadProgram();
805
        GLTexImage::UnbindMultiTex(3);
806
        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
807

    
808

    
809
}
810

    
811
void PyramidNaive::GenerateFeatureList(int i, int j)
812
{
813
        int hist_level_num = _hpLevelNum - _pyramid_octave_first; 
814
        int hist_skip_gpu = GlobalUtil::_ListGenSkipGPU; 
815
    int idx = i * param._dog_level_num + j;
816
    GLTexImage* htex, *ftex, *tex;
817
        tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2 + j;
818
    ftex = _featureTex + idx;
819
        htex = _histoPyramidTex + hist_level_num - 1 - i;
820

    
821
        ///
822
        glActiveTexture(GL_TEXTURE0);
823
        tex->BindTex();
824
        htex->AttachToFBO(0);
825
        int tight = ((htex->GetImgWidth() * 2 == tex->GetImgWidth() -1 || tex->GetTexWidth() == tex->GetImgWidth()) &&
826
                                 (htex->GetImgHeight() *2 == tex->GetImgHeight()-1 || tex->GetTexHeight() == tex->GetImgHeight()));
827
        ShaderMan::UseShaderGenListInit(tex->GetImgWidth(), tex->GetImgHeight(), tight);
828
        htex->FitTexViewPort();
829
        //this uses the fact that no feature is on the edge.
830
        htex->DrawQuadReduction();
831

    
832
        //reduction..
833
        htex--;
834

    
835
        //this part might have problems on several GPUS
836
        //because the output of one pass is the input of the next pass
837
        //need to call glFinish to make it right
838
        //but too much glFinish makes it slow
839
        for(int k = 0; k <hist_level_num - i - 1 - hist_skip_gpu; k++, htex--)
840
        {
841
                htex->AttachToFBO(0);
842
                htex->FitTexViewPort();
843
                (htex+1)->BindTex();
844
                ShaderMan::UseShaderGenListHisto();
845
                htex->DrawQuadReduction();                                        
846
        }
847

    
848
        //
849
        if(hist_skip_gpu == 0)
850
        {        
851
                //read back one pixel
852
                float fn[4], fcount;
853
                glReadPixels(0, 0, 1, 1, GL_RGBA , GL_FLOAT, fn);
854
                fcount = (fn[0] + fn[1] + fn[2] + fn[3]);
855
                if(fcount < 1) fcount = 0;
856

    
857

    
858
                _levelFeatureNum[ idx] = (int)(fcount);
859
                SetLevelFeatureNum(idx, (int)fcount);
860
                _featureNum += int(fcount);
861

    
862
                //
863
                if(fcount < 1.0) return;
864
        
865

    
866
                ///generate the feature texture
867

    
868
                htex=  _histoPyramidTex;
869

    
870
                htex->BindTex();
871

    
872
                //first pass
873
                ftex->AttachToFBO(0);
874
                if(GlobalUtil::_MaxOrientation>1)
875
                {
876
                        //this is very important...
877
                        ftex->FitRealTexViewPort();
878
                        glClear(GL_COLOR_BUFFER_BIT);
879
                        glFinish();
880
                }else
881
                {
882
                        ftex->FitTexViewPort();
883
            //glFinish();
884
                }
885

    
886

    
887
                ShaderMan::UseShaderGenListStart((float)ftex->GetImgWidth(), htex->GetTexID());
888

    
889
                ftex->DrawQuad();
890
                //make sure it finishes before the next step
891
                ftex->DetachFBO(0);
892

    
893
                //pass on each pyramid level
894
                htex++;
895
        }else
896
        {
897

    
898
                int tw = htex[1].GetDrawWidth(), th = htex[1].GetDrawHeight();
899
                int fc = 0;
900
                glReadPixels(0, 0, tw, th, GL_RGBA , GL_FLOAT, _histo_buffer);        
901
                _keypoint_buffer.resize(0);
902
                for(int y = 0, pos = 0; y < th; y++)
903
                {
904
                        for(int x= 0; x < tw; x++)
905
                        {
906
                                for(int c = 0; c < 4; c++, pos++)
907
                                {
908
                                        int ss =  (int) _histo_buffer[pos]; 
909
                                        if(ss == 0) continue;
910
                                        float ft[4] = {2 * x + (c%2? 1.5f:  0.5f), 2 * y + (c>=2? 1.5f: 0.5f), 0, 1 };
911
                                        for(int t = 0; t < ss; t++)
912
                                        {
913
                                                ft[2] = (float) t; 
914
                                                _keypoint_buffer.insert(_keypoint_buffer.end(), ft, ft+4);
915
                                        }
916
                                        fc += (int)ss; 
917
                                }
918
                        }
919
                }
920
                _levelFeatureNum[ idx] = fc;
921
                SetLevelFeatureNum(idx, fc);
922
                if(fc == 0)  return;
923
        _featureNum += fc;
924
                /////////////////////
925
                ftex->AttachToFBO(0);
926
                if(GlobalUtil::_MaxOrientation>1)
927
                {
928
                        ftex->FitRealTexViewPort();
929
                        glClear(GL_COLOR_BUFFER_BIT);
930
                }else
931
                {                                        
932
                        ftex->FitTexViewPort();
933
                }
934
                _keypoint_buffer.resize(ftex->GetDrawWidth() * ftex->GetDrawHeight()*4, 0);
935
                ///////////
936
                glActiveTexture(GL_TEXTURE0);
937
                ftex->BindTex();
938
                glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, ftex->GetDrawWidth(),
939
                        ftex->GetDrawHeight(), GL_RGBA, GL_FLOAT, &_keypoint_buffer[0]);
940
                htex += 2;
941
        }
942

    
943
        for(int lev = 1 + hist_skip_gpu; lev < hist_level_num  - i; lev++, htex++)
944
        {
945

    
946
                glActiveTexture(GL_TEXTURE0);
947
                ftex->BindTex();
948
                ftex->AttachToFBO(0);
949
                glActiveTexture(GL_TEXTURE1);
950
                htex->BindTex();
951
                ShaderMan::UseShaderGenListStep(ftex->GetTexID(), htex->GetTexID());
952
                ftex->DrawQuad();
953
                ftex->DetachFBO(0);        
954
        }
955
        GLTexImage::UnbindMultiTex(2);
956

    
957
}
958

    
959
//generate feature list on GPU
960
void PyramidNaive::GenerateFeatureList()
961
{
962
        //generate the histogram0pyramid
963
        FrameBufferObject fbo;
964
        glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
965
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
966
        double t1, t2; 
967
        int ocount, reverse = (GlobalUtil::_TruncateMethod == 1);
968
        _featureNum = 0;
969

    
970
        FitHistogramPyramid();
971

    
972
        //for(int i = 0, idx = 0; i < _octave_num; i++)
973
    FOR_EACH_OCTAVE(i, reverse)
974
        {
975
                //output
976
                if(GlobalUtil::_timingO)
977
                {
978
            t1= CLOCK();
979
                        ocount = 0;
980
                        std::cout<<"#"<<i+_octave_min + _down_sample_factor<<":\t";
981
                }
982
                //for(int j = 0; j < param._dog_level_num; j++, idx++)
983
        FOR_EACH_LEVEL(j, reverse)
984
        {
985
            if(GlobalUtil::_TruncateMethod && GlobalUtil::_FeatureCountThreshold > 0 && _featureNum > GlobalUtil::_FeatureCountThreshold) continue;
986

    
987
                        GenerateFeatureList(i, j); 
988
                        if(GlobalUtil::_timingO)        
989
            {
990
                int idx = i * param._dog_level_num + j;
991
                std::cout<< _levelFeatureNum[idx] <<"\t";
992
                ocount += _levelFeatureNum[idx];
993
            }
994
                }
995
                if(GlobalUtil::_timingO)
996
                {        
997
                        t2 = CLOCK(); 
998
                        std::cout << "| \t" << int(ocount) << " :\t(" << (t2 - t1) << ")\n";
999
                }
1000
        }
1001
        if(GlobalUtil::_timingS)glFinish();
1002
        if(GlobalUtil::_verbose)
1003
        {
1004
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
1005
        }
1006
}
1007

    
1008

    
1009
void PyramidGL::GenerateFeatureDisplayVBO()
1010
{
1011
        //use a big VBO to save all the SIFT box vertices
1012
        int w, h, esize; GLint bsize;
1013
        int nvbo = _octave_num * param._dog_level_num;
1014
        //initialize the vbos
1015
        if(_featureDisplayVBO==NULL)
1016
        {
1017
                _featureDisplayVBO = new GLuint[nvbo];
1018
                glGenBuffers( nvbo, _featureDisplayVBO );
1019
    }
1020
    if(_featurePointVBO == NULL)
1021
    {
1022
                _featurePointVBO = new GLuint[nvbo];
1023
                glGenBuffers(nvbo, _featurePointVBO);
1024
        }
1025

    
1026
        FrameBufferObject fbo;
1027
        glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
1028
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
1029
        glActiveTexture(GL_TEXTURE0);
1030
        
1031
        //
1032
        GLTexImage & tempTex = *_descriptorTex;
1033
        //
1034
        for(int i = 0, idx = 0; i < _octave_num; i++)
1035
        {
1036
                for(int j = 0; j < param._dog_level_num; j ++, idx++)
1037
                {
1038
                        GLTexImage * ftex  = _featureTex + idx;
1039

    
1040
                        if(_levelFeatureNum[idx]<=0)continue;
1041
                        //box display vbo
1042
                        int count = _levelFeatureNum[idx]* 10;
1043
                        GetAlignedStorageSize(count, _alignment, w, h);
1044
                        w = (int)ceil(double(count)/ h);
1045

    
1046
                        //input
1047
                        fbo.BindFBO();
1048
                        ftex->BindTex();
1049

    
1050
                        //output
1051
                        tempTex.AttachToFBO(0);
1052
                        GlobalUtil::FitViewPort(w, h);
1053
                        //shader
1054
                        ShaderMan::UseShaderGenVBO(        (float)ftex->GetImgWidth(),  (float) w, 
1055
                                param.GetLevelSigma(j + param._level_min + 1));
1056
                        GLTexImage::DrawQuad(0,  (float)w, 0, (float)h);
1057
                
1058
                        //
1059
                        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, _featureDisplayVBO[ idx]);
1060
                        glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
1061
                        esize = w*h * sizeof(float)*4;
1062
                        //increase size when necessary
1063
                        if(bsize < esize) glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2,        NULL, GL_STATIC_DRAW_ARB);
1064
                        
1065
                        //read back if we have enough buffer        
1066
                        glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
1067
                        if(bsize >= esize)        glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, 0);
1068
                        else glBufferData(GL_PIXEL_PACK_BUFFER_ARB, 0,        NULL, GL_STATIC_DRAW_ARB);
1069
                        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
1070

    
1071

    
1072
                        //copy the texture into vbo
1073
                        fbo.BindFBO();
1074
                        tempTex.AttachToFBO(0);
1075

    
1076
                        ftex->BindTex();
1077
                        ftex->FitTexViewPort();
1078
                        ShaderMan::UseShaderCopyKeypoint();
1079
                        ftex->DrawQuad();
1080

    
1081
                        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB,  _featurePointVBO[ idx]);
1082
                        glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
1083
                        esize = ftex->GetImgHeight() * ftex->GetImgWidth()*sizeof(float) *4;
1084

    
1085
                        //increase size when necessary
1086
                        if(bsize < esize)        glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2 ,        NULL, GL_STATIC_DRAW_ARB);
1087

    
1088
                        //read back if we have enough buffer
1089
                        glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize);
1090
                        if(bsize >= esize) glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(), GL_RGBA, GL_FLOAT, 0);
1091
                        else  glBufferData(GL_PIXEL_PACK_BUFFER_ARB, 0,        NULL, GL_STATIC_DRAW_ARB);
1092

    
1093
                        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
1094
                        
1095
                }
1096
        }
1097
        glReadBuffer(GL_NONE);
1098
        glFinish();
1099

    
1100
}
1101

    
1102

    
1103

    
1104

    
1105

    
1106
void PyramidNaive::GetFeatureOrientations()
1107
{
1108
        GLTexImage * gtex;
1109
        GLTexImage * stex = NULL;
1110
        GLTexImage * ftex = _featureTex;
1111
        GLTexImage * otex = _orientationTex;
1112
        int sid = 0; 
1113
        int * count         = _levelFeatureNum;
1114
        float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
1115
        FrameBufferObject fbo;
1116
        if(_orientationTex)
1117
        {
1118
                GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
1119
                glDrawBuffers(2, buffers);
1120
        }else
1121
        {
1122
                glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
1123
        }
1124
        for(int i = 0; i < _octave_num; i++)
1125
        {
1126
                gtex = GetLevelTexture(i+_octave_min, param._level_min + 1);
1127
                if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1128
                        stex = GetBaseLevel(i+_octave_min, DATA_KEYPOINT) + 2;
1129

    
1130
                for(int j = 0; j < param._dog_level_num; j++, ftex++, otex++, count++, gtex++, stex++)
1131
                {
1132
                        if(*count<=0)continue;
1133

    
1134
                        sigma = param.GetLevelSigma(j+param._level_min+1);
1135

    
1136
                        //
1137
                        ftex->FitTexViewPort();
1138

    
1139
                        glActiveTexture(GL_TEXTURE0);
1140
                        ftex->BindTex();
1141
                        glActiveTexture(GL_TEXTURE1);
1142
                        gtex->BindTex();
1143
                        //
1144
                        ftex->AttachToFBO(0);
1145
                        if(_orientationTex)                otex->AttachToFBO(1);
1146
                        if(!_existing_keypoints && (GlobalUtil::_SubpixelLocalization|| GlobalUtil::_KeepExtremumSign))
1147
                        {
1148
                                glActiveTexture(GL_TEXTURE2);
1149
                                stex->BindTex();
1150
                                sid = * stex;
1151
                        }
1152
                        ShaderMan::UseShaderOrientation(gtex->GetTexID(),
1153
                                gtex->GetImgWidth(), gtex->GetImgHeight(),
1154
                                sigma, sid, sigma_step, _existing_keypoints);
1155
                        ftex->DrawQuad();
1156
        //                glFinish();
1157
                        
1158
                }
1159
        }
1160

    
1161
        GLTexImage::UnbindMultiTex(3);
1162
        if(GlobalUtil::_timingS)glFinish();
1163

    
1164
        if(_orientationTex)        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
1165

    
1166
}
1167

    
1168

    
1169

    
1170
//to compare with GPU feature list generation
1171
void PyramidNaive::GenerateFeatureListCPU()
1172
{
1173

    
1174
        FrameBufferObject fbo;
1175
        _featureNum = 0;
1176
        GLTexImage * tex = GetBaseLevel(_octave_min);
1177
        float * mem = new float [tex->GetTexWidth()*tex->GetTexHeight()];
1178
        vector<float> list;
1179
        int idx = 0;
1180
        for(int i = 0; i < _octave_num; i++)
1181
        {
1182
                for(int j = 0; j < param._dog_level_num; j++, idx++)
1183
                {
1184
                        tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + j + 2;
1185
                        tex->BindTex();
1186
                        glGetTexImage(GlobalUtil::_texTarget, 0, GL_RED, GL_FLOAT, mem);
1187
                        //tex->AttachToFBO(0);
1188
                        //tex->FitTexViewPort();
1189
                        //glReadPixels(0, 0, tex->GetTexWidth(), tex->GetTexHeight(), GL_RED, GL_FLOAT, mem);
1190
                        //
1191
                        //make a list of 
1192
                        list.resize(0);
1193
                        float * p = mem;
1194
                        int fcount = 0 ;
1195
                        for(int k = 0; k < tex->GetTexHeight(); k++)
1196
                        {
1197
                                for( int m = 0; m < tex->GetTexWidth(); m ++, p++)
1198
                                {
1199
                                        if(*p==0)continue;
1200
                                        if(m ==0 || k ==0 || k >= tex->GetImgHeight() -1 || m >= tex->GetImgWidth() -1 ) continue;
1201
                                        list.push_back(m+0.5f);
1202
                                        list.push_back(k+0.5f);
1203
                                        list.push_back(0);
1204
                                        list.push_back(1);
1205
                                        fcount ++;
1206

    
1207

    
1208
                                }
1209
                        }
1210
                        if(fcount==0)continue;
1211

    
1212

    
1213
                        
1214
                        GLTexImage * ftex = _featureTex+idx;
1215
                        _levelFeatureNum[idx] = (fcount);
1216
                        SetLevelFeatureNum(idx, fcount);
1217

    
1218
                        _featureNum += (fcount);
1219

    
1220

    
1221
                        int fw = ftex->GetImgWidth();
1222
                        int fh = ftex->GetImgHeight();
1223

    
1224
                        list.resize(4*fh*fw);
1225

    
1226
                        ftex->BindTex();
1227
                        ftex->AttachToFBO(0);
1228
        //                glTexImage2D(GlobalUtil::_texTarget, 0, GlobalUtil::_iTexFormat, fw, fh, 0, GL_BGRA, GL_FLOAT, &list[0]);
1229
                        glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
1230
                        //
1231
                }
1232
        }
1233
        GLTexImage::UnbindTex();
1234
        delete mem;
1235
        if(GlobalUtil::_verbose)
1236
        {
1237
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
1238
        }
1239
}
1240

    
1241
#define FEATURELIST_USE_PBO
1242

    
1243
void PyramidGL::ReshapeFeatureListCPU()
1244
{
1245
        //make a compact feature list, each with only one orientation
1246
        //download orientations and the featue list
1247
        //reshape it and upload it
1248

    
1249
        FrameBufferObject fbo;
1250
        int i, szmax =0, sz;
1251
        int n = param._dog_level_num*_octave_num;
1252
        for( i = 0; i < n; i++)
1253
        {
1254
                sz = _featureTex[i].GetImgWidth() * _featureTex[i].GetImgHeight();
1255
                if(sz > szmax ) szmax = sz;
1256
        }
1257
        float * buffer = new float[szmax*24];
1258
        float * buffer1 = buffer;
1259
        float * buffer2 = buffer + szmax*4;
1260
        float * buffer3 = buffer + szmax*8;
1261

    
1262
        glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
1263
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
1264

    
1265
#ifdef FEATURELIST_USE_PBO
1266
    GLuint ListUploadPBO;
1267
    glGenBuffers(1, &ListUploadPBO); 
1268
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,  ListUploadPBO);
1269
        glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, szmax * 8 * sizeof(float),        NULL, GL_STREAM_DRAW);
1270
#endif
1271

    
1272
        _featureNum = 0;
1273

    
1274
#ifdef NO_DUPLICATE_DOWNLOAD
1275
        const double twopi = 2.0*3.14159265358979323846;
1276
        _keypoint_buffer.resize(0);
1277
        float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
1278
        if(_down_sample_factor>0) os *= float(1<<_down_sample_factor); 
1279
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
1280
#endif
1281

    
1282
        for(i = 0; i < n; i++)
1283
        {
1284
                if(_levelFeatureNum[i]==0)continue;
1285

    
1286
                _featureTex[i].AttachToFBO(0);
1287
                _featureTex[i].FitTexViewPort();
1288
                glReadPixels(0, 0, _featureTex[i].GetImgWidth(), _featureTex[i].GetImgHeight(),GL_RGBA, GL_FLOAT, buffer1);
1289
                
1290
                int fcount =0, ocount;
1291
                float * src = buffer1;
1292
                float * orientation  = buffer2;
1293
                float * des = buffer3;
1294
                if(GlobalUtil::_OrientationPack2 == 0)
1295
                {        
1296
                        //read back orientations from another texture
1297
                        _orientationTex[i].AttachToFBO(0);
1298
                        glReadPixels(0, 0, _orientationTex[i].GetImgWidth(), _orientationTex[i].GetImgHeight(),GL_RGBA, GL_FLOAT, buffer2);
1299
                        //make the feature list
1300
                        for(int j = 0; j < _levelFeatureNum[i]; j++, src+=4, orientation+=4)
1301
                        {
1302
                                if(_existing_keypoints) 
1303
                                {
1304
                                        des[0] = src[0];
1305
                                        des[1] = src[1];
1306
                                        des[2] = orientation[0];
1307
                                        des[3] = src[3];                        
1308
                                        fcount++;
1309
                                        des += 4;
1310
                                }else
1311
                                {
1312
                                        ocount = (int)src[2];
1313
                                        for(int k = 0 ; k < ocount; k++, des+=4)
1314
                                        {
1315
                                                des[0] = src[0];
1316
                                                des[1] = src[1];
1317
                                                des[2] = orientation[k];
1318
                                                des[3] = src[3];                        
1319
                                                fcount++;
1320
                                        }
1321
                                }
1322
                        }
1323
                }else
1324
                {
1325
                        _featureTex[i].DetachFBO(0);
1326
                        const static double factor  = 2.0*3.14159265358979323846/65535.0;
1327
                        for(int j = 0; j < _levelFeatureNum[i]; j++, src+=4)
1328
                        {
1329
                                unsigned short * orientations = (unsigned short*) (&src[2]);
1330
                                if(_existing_keypoints) 
1331
                                {
1332
                                        des[0] = src[0];
1333
                                        des[1] = src[1];
1334
                                        des[2] = float( factor* orientations[0]);
1335
                                        des[3] = src[3];                        
1336
                                        fcount++;
1337
                                        des += 4;
1338
                                }else
1339
                                {
1340
                                        if(orientations[0] != 65535)
1341
                                        {
1342
                                                des[0] = src[0];
1343
                                                des[1] = src[1];
1344
                                                des[2] = float( factor* orientations[0]);
1345
                                                des[3] = src[3];                        
1346
                                                fcount++;
1347
                                                des += 4;
1348

    
1349
                                                if(orientations[1] != 65535)
1350
                                                {
1351
                                                        des[0] = src[0];
1352
                                                        des[1] = src[1];
1353
                                                        des[2] = float(factor* orientations[1]);
1354
                                                        des[3] = src[3];                        
1355
                                                        fcount++;
1356
                                                        des += 4;
1357
                                                }
1358
                                        }
1359
                                }
1360
                        }
1361
                }
1362

    
1363
                //texture size --------------
1364
                SetLevelFeatureNum(i, fcount);
1365
                int nfw = _featureTex[i].GetImgWidth();
1366
                int nfh = _featureTex[i].GetImgHeight();
1367
                int sz = nfh * nfw;
1368
                if(sz > fcount) memset(des, 0, sizeof(float) * (sz - fcount) * 4);
1369

    
1370
#ifndef FEATURELIST_USE_PBO
1371
                _featureTex[i].BindTex();
1372
                glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, nfw, nfh, GL_RGBA, GL_FLOAT, buffer3);
1373
                _featureTex[i].UnbindTex();
1374
#else
1375
        float* mem = (float*) glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,  GL_WRITE_ONLY);
1376
        memcpy(mem, buffer3,  sz * 4 * sizeof(float) );
1377
        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB);
1378
                _featureTex[i].BindTex();
1379
                glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, nfw, nfh, GL_RGBA, GL_FLOAT, 0);
1380
                _featureTex[i].UnbindTex();
1381
#endif
1382

    
1383
#ifdef NO_DUPLICATE_DOWNLOAD
1384
                if(fcount > 0)
1385
                {
1386
                        float oss = os * (1 << (i / param._dog_level_num));
1387
                        _keypoint_buffer.resize((_featureNum + fcount) * 4);
1388
                        float* ds = &_keypoint_buffer[_featureNum * 4];
1389
                        float* fs = buffer3;
1390
                        for(int k = 0;  k < fcount; k++, ds+=4, fs+=4)
1391
                        {
1392
                                ds[0] = oss*(fs[0]-0.5f) + offset;        //x
1393
                                ds[1] = oss*(fs[1]-0.5f) + offset;        //y
1394
                                ds[3] = (float)fmod(twopi-fs[2], twopi);        //orientation, mirrored
1395
                                ds[2] = oss*fs[3];  //scale
1396
                        }
1397
                }
1398
#endif
1399
                _levelFeatureNum[i] = fcount;
1400
                _featureNum += fcount;
1401
        }
1402

    
1403
        delete[] buffer;
1404
        if(GlobalUtil::_verbose)
1405
        {
1406
                std::cout<<"#Features MO:\t"<<_featureNum<<endl;
1407
        }
1408
    ///////////////////////////////////
1409
#ifdef FEATURELIST_USE_PBO
1410
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB,  0);
1411
    glDeleteBuffers(1, &ListUploadPBO);
1412
#endif
1413
}
1414

    
1415

    
1416

    
1417
inline void PyramidGL::SetLevelFeatureNum(int idx, int fcount)
1418
{
1419
        int fw, fh;
1420
        GLTexImage * ftex = _featureTex + idx;
1421
        //set feature texture size. normally fh will be one
1422
        GetTextureStorageSize(fcount, fw, fh);
1423
        if(fcount >  ftex->GetTexWidth()*ftex->GetTexHeight())
1424
        {
1425
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)
1426
                        std::cout<<"Too many features, reallocate texture\n";
1427

    
1428
                ftex->InitTexture(fw, fh, 0);
1429
                if(_orientationTex)                        _orientationTex[idx].InitTexture(fw, fh, 0);
1430

    
1431
        }
1432
        if(GlobalUtil::_NarrowFeatureTex)
1433
                fh = fcount ==0? 0:(int)ceil(double(fcount)/fw);
1434
        else
1435
                fw = fcount ==0? 0:(int)ceil(double(fcount)/fh);
1436
        ftex->SetImageSize(fw, fh);
1437
        if(_orientationTex)                _orientationTex[idx].SetImageSize(fw, fh);
1438
}
1439

    
1440
void PyramidGL::CleanUpAfterSIFT()
1441
{
1442
        GLTexImage::UnbindMultiTex(3);
1443
        ShaderMan::UnloadProgram();
1444
        FrameBufferObject::DeleteGlobalFBO();
1445
        GlobalUtil::CleanupOpenGL();
1446
}
1447

    
1448
void PyramidNaive::GetSimplifiedOrientation()
1449
{
1450
        //
1451
        int idx = 0;
1452
//        int n = _octave_num  * param._dog_level_num;
1453
        float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num); 
1454
        GLTexImage * ftex = _featureTex;
1455

    
1456
        FrameBufferObject fbo;
1457
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
1458
        for(int i = 0; i < _octave_num; i++)
1459
        {
1460
                GLTexImage *gtex = GetLevelTexture(i+_octave_min, 2+param._level_min);
1461
                for(int j = 0; j < param._dog_level_num; j++, ftex++,  gtex++, idx ++)
1462
                {
1463
                        if(_levelFeatureNum[idx]<=0)continue;
1464
                        sigma = param.GetLevelSigma(j+param._level_min+1);
1465

    
1466
                        //
1467
                        ftex->AttachToFBO(0);
1468
                        ftex->FitTexViewPort();
1469

    
1470
                        glActiveTexture(GL_TEXTURE0);
1471
                        ftex->BindTex();
1472
                        glActiveTexture(GL_TEXTURE1);
1473
                        gtex->BindTex();
1474

    
1475
                        ShaderMan::UseShaderSimpleOrientation(gtex->GetTexID(), sigma, sigma_step);
1476
                        ftex->DrawQuad();
1477
                }
1478
        }
1479

    
1480
        GLTexImage::UnbindMultiTex(2);
1481

    
1482
}
1483

    
1484

    
1485
#ifdef USE_SSE_FOR_SIFTGPU
1486
        static inline float dotproduct_128d(float * p)
1487
        {
1488
                float z = 0.0f;
1489
                __m128 sse =_mm_load_ss(&z);
1490
                float* pf = (float*) (&sse);
1491
                for( int i = 0; i < 32; i++, p+=4)
1492
                {
1493
                        __m128 ps = _mm_loadu_ps(p);
1494
                        sse = _mm_add_ps(sse,  _mm_mul_ps(ps, ps));
1495
                }
1496
                return pf[0] + pf[1] + pf[2] + pf[3];
1497

    
1498
        }
1499
        static inline void multiply_and_truncate_128d(float* p, float m)
1500
        {
1501
                float z = 0.2f;
1502
                __m128 t = _mm_load_ps1(&z);
1503
                __m128 r = _mm_load_ps1(&m);
1504
                for(int i = 0; i < 32; i++, p+=4)
1505
                {
1506
                        __m128 ps = _mm_loadu_ps(p);
1507
                        _mm_storeu_ps(p, _mm_min_ps(_mm_mul_ps(ps, r), t));
1508
                }
1509
        }
1510
        static inline void multiply_128d(float* p, float m)
1511
        {
1512
                __m128 r = _mm_load_ps1(&m);
1513
                for(int i = 0; i < 32; i++, p+=4)
1514
                {
1515
                        __m128 ps = _mm_loadu_ps(p);
1516
                        _mm_storeu_ps(p, _mm_mul_ps(ps, r));
1517
                }
1518
        }
1519
#endif
1520

    
1521

    
1522
inline void PyramidGL::NormalizeDescriptor(int num, float*pd)
1523
{
1524

    
1525
#ifdef USE_SSE_FOR_SIFTGPU
1526
        for(int k = 0; k < num; k++, pd +=128)
1527
        {
1528
                float sq;
1529
                //normalize and truncate to .2
1530
                sq = dotproduct_128d(pd);                sq = 1.0f / sqrtf(sq);
1531
                multiply_and_truncate_128d(pd, sq);
1532

    
1533
                //renormalize
1534
                sq = dotproduct_128d(pd);                sq = 1.0f / sqrtf(sq);
1535
                multiply_128d(pd, sq);
1536
        }
1537
#else
1538
        //descriptor normalization runs on cpu for OpenGL implemenations
1539
        for(int k = 0; k < num; k++, pd +=128)
1540
        {
1541
                int v;
1542
                float* ppd, sq = 0;
1543
                //int v;
1544
                //normalize
1545
                ppd = pd;
1546
                for(v = 0 ; v < 128; v++, ppd++)        sq += (*ppd)*(*ppd);
1547
                sq = 1.0f / sqrtf(sq);
1548
                //truncate to .2
1549
                ppd = pd;
1550
                for(v = 0; v < 128; v ++, ppd++)        *ppd = min(*ppd*sq, 0.2f);
1551

    
1552
                //renormalize
1553
                ppd = pd; sq = 0;
1554
                for(v = 0; v < 128; v++, ppd++)        sq += (*ppd)*(*ppd);
1555
                sq = 1.0f / sqrtf(sq);
1556

    
1557
                ppd = pd;
1558
                for(v = 0; v < 128; v ++, ppd++)        *ppd = *ppd*sq;
1559
        }
1560

    
1561
#endif
1562
}
1563

    
1564
inline void PyramidGL::InterlaceDescriptorF2(int w, int h, float* buf, float* pd, int step)
1565
{
1566
        /*
1567
        if(GlobalUtil::_DescriptorPPR == 8)
1568
        {
1569
                const int dstep = w * 128;
1570
                float* pp1 = buf;
1571
                float* pp2 = buf + step;
1572

1573
                for(int u = 0; u < h ; u++, pd+=dstep)
1574
                {
1575
                        int v; 
1576
                        float* ppd = pd;
1577
                        for(v= 0; v < w; v++)
1578
                        {
1579
                                for(int t = 0; t < 8; t++)
1580
                                {
1581
                                        *ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
1582
                                        *ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
1583
                                }
1584
                                ppd += 64;
1585
                        }
1586
                        ppd = pd + 64;
1587
                        for(v= 0; v < w; v++)
1588
                        {
1589
                                for(int t = 0; t < 8; t++)
1590
                                {
1591
                                        *ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
1592
                                        *ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
1593
                                }
1594
                                ppd += 64;
1595
                        }
1596
                }
1597

1598
        }else */
1599
        if(GlobalUtil::_DescriptorPPR == 8)
1600
        {
1601
                //interlace
1602
                for(int k = 0; k < 2; k++)
1603
                {
1604
                        float* pp = buf + k * step;
1605
                        float* ppd = pd + k * 4;
1606
                        for(int u = 0; u < h ; u++)
1607
                        {
1608
                                int v; 
1609
                                for(v= 0; v < w; v++)
1610
                                {
1611
                                        for(int t = 0; t < 8; t++)
1612
                                        {
1613
                                                ppd[0] = pp[0];
1614
                                                ppd[1] = pp[1];
1615
                                                ppd[2] = pp[2];
1616
                                                ppd[3] = pp[3];
1617
                                                ppd += 8;
1618
                                                pp+= 4;
1619
                                        }
1620
                                        ppd += 64;
1621
                                }
1622
                                ppd += ( 64 - 128 * w );
1623
                                for(v= 0; v < w; v++)
1624
                                {
1625
                                        for(int t = 0; t < 8; t++)
1626
                                        {
1627
                                                ppd[0] = pp[0];
1628
                                                ppd[1] = pp[1];
1629
                                                ppd[2] = pp[2];
1630
                                                ppd[3] = pp[3];
1631

    
1632
                                                ppd += 8;
1633
                                                pp+= 4;
1634
                                        }
1635
                                        ppd += 64;
1636
                                }
1637
                                ppd -=64;
1638
                        }
1639
                }
1640
        }else if(GlobalUtil::_DescriptorPPR == 4)
1641
        {
1642

    
1643
        }
1644

    
1645

    
1646

    
1647
}
1648
void PyramidGL::GetFeatureDescriptors()
1649
{
1650
        //descriptors...
1651
        float sigma;
1652
        int idx, i, j, k, w, h;
1653
        int ndf = 32 / GlobalUtil::_DescriptorPPT; //number of textures
1654
        int block_width = GlobalUtil::_DescriptorPPR;
1655
        int block_height = GlobalUtil::_DescriptorPPT/GlobalUtil::_DescriptorPPR;
1656
        float* pd =  &_descriptor_buffer[0], * pbuf  = NULL;
1657
        vector<float>read_buffer, descriptor_buffer2;
1658

    
1659
        //use another buffer, if we need to re-order the descriptors
1660
        if(_keypoint_index.size() > 0)
1661
        {
1662
                descriptor_buffer2.resize(_descriptor_buffer.size());
1663
                pd = &descriptor_buffer2[0];
1664
        }
1665
        FrameBufferObject fbo;
1666

    
1667
        GLTexImage * gtex, *otex, * ftex;
1668
        GLenum buffers[8] = { 
1669
                GL_COLOR_ATTACHMENT0_EXT,                GL_COLOR_ATTACHMENT1_EXT ,
1670
                GL_COLOR_ATTACHMENT2_EXT,                GL_COLOR_ATTACHMENT3_EXT ,
1671
                GL_COLOR_ATTACHMENT4_EXT,                GL_COLOR_ATTACHMENT5_EXT ,
1672
                GL_COLOR_ATTACHMENT6_EXT,                GL_COLOR_ATTACHMENT7_EXT ,
1673
        };
1674

    
1675
        glDrawBuffers(ndf, buffers);
1676
        glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
1677

    
1678

    
1679
        for( i = 0, idx = 0, ftex = _featureTex; i < _octave_num; i++)
1680
        {
1681
                gtex = GetBaseLevel(i + _octave_min, DATA_GRAD) + 1;
1682
                otex = GetBaseLevel(i + _octave_min, DATA_ROT)  + 1;
1683
                for( j = 0; j < param._dog_level_num; j++, ftex++, idx++, gtex++, otex++)
1684
                {
1685
                        if(_levelFeatureNum[idx]==0)continue;
1686

    
1687
            sigma = IsUsingRectDescription()?  0 : param.GetLevelSigma(j+param._level_min+1);
1688
                        int count = _levelFeatureNum[idx] * block_width;
1689
                        GetAlignedStorageSize(count, block_width, w, h);
1690
                        h = ((int)ceil(double(count) / w)) * block_height;
1691

    
1692
                        //not enought space for holding the descriptor data
1693
                        if(w > _descriptorTex[0].GetTexWidth() || h > _descriptorTex[0].GetTexHeight())
1694
                        {
1695
                                for(k = 0; k < ndf; k++)_descriptorTex[k].InitTexture(w, h);
1696
                        }
1697
                        for(k = 0; k < ndf; k++)        _descriptorTex[k].AttachToFBO(k);
1698
                        GlobalUtil::FitViewPort(w, h);
1699
                        glActiveTexture(GL_TEXTURE0);
1700
                        ftex->BindTex();
1701
                        glActiveTexture(GL_TEXTURE1);
1702
                        gtex->BindTex();
1703
                        if(otex!=gtex)
1704
                        {
1705
                                glActiveTexture(GL_TEXTURE2);
1706
                                otex->BindTex();
1707
                        }
1708

    
1709
                        ShaderMan::UseShaderDescriptor(gtex->GetTexID(), otex->GetTexID(), 
1710
                                w, ftex->GetImgWidth(), gtex->GetImgWidth(), gtex->GetImgHeight(), sigma);
1711
                        GLTexImage::DrawQuad(0, (float)w, 0, (float)h);
1712

    
1713
                         //read back float format descriptors and do normalization on CPU
1714
                        int step = w*h*4;
1715
                        if((unsigned int)step*ndf > read_buffer.size())
1716
                        {
1717
                                read_buffer.resize(ndf*step);
1718
                        }
1719
                        pbuf = &read_buffer[0];
1720
                        
1721
                        //read back
1722
                        for(k = 0; k < ndf; k++, pbuf+=step)
1723
                        {
1724
                                glReadBuffer(GL_COLOR_ATTACHMENT0_EXT + k);
1725
                                glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, pbuf);
1726
                        }
1727
        
1728
                        //the following two steps run on cpu, so better cpu better speed
1729
                        //and release version can be a lot faster than debug version
1730
                        //interlace data on the two texture to get the descriptor
1731
                        InterlaceDescriptorF2(w / block_width, h / block_height, &read_buffer[0], pd, step);
1732
                        
1733
                        //need to do normalization
1734
                        //the new version uses SSE to speed up this part
1735
                        if(GlobalUtil::_NormalizedSIFT) NormalizeDescriptor(_levelFeatureNum[idx], pd);
1736

    
1737
                        pd += 128*_levelFeatureNum[idx];
1738
                        glReadBuffer(GL_NONE);
1739
                }
1740
        }
1741

    
1742

    
1743
        //finally, put the descriptor back to their original order for existing keypoint list.
1744
        if(_keypoint_index.size() > 0)
1745
        {
1746
                for(i = 0; i < _featureNum; ++i)
1747
                {
1748
                        int index = _keypoint_index[i];
1749
                        memcpy(&_descriptor_buffer[index*128], &descriptor_buffer2[i*128], 128 * sizeof(float));
1750
                }
1751
        }
1752

    
1753
        ////////////////////////
1754
        GLTexImage::UnbindMultiTex(3); 
1755
        glDrawBuffer(GL_NONE);
1756
        ShaderMan::UnloadProgram();
1757
        if(GlobalUtil::_timingS)glFinish();
1758
        for(i = 0; i < ndf; i++) fbo.UnattachTex(GL_COLOR_ATTACHMENT0_EXT +i);
1759

    
1760
}
1761

    
1762

    
1763
void PyramidGL::DownloadKeypoints()
1764
{
1765
        const double twopi = 2.0*3.14159265358979323846;
1766
        int idx = 0;
1767
        float * buffer = &_keypoint_buffer[0];
1768
        vector<float> keypoint_buffer2;
1769
        //use a different keypoint buffer when processing with an exisint features list
1770
        //without orientation information. 
1771
        if(_keypoint_index.size() > 0)
1772
        {
1773
                keypoint_buffer2.resize(_keypoint_buffer.size());
1774
                buffer = &keypoint_buffer2[0];
1775
        }
1776
        float * p = buffer, *ps, sigma;
1777
        GLTexImage * ftex = _featureTex;
1778
        FrameBufferObject fbo;
1779
        ftex->FitRealTexViewPort();
1780
        /////////////////////
1781
        float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
1782
        if(_down_sample_factor>0) os *= float(1<<_down_sample_factor); 
1783
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
1784
        /////////////////////
1785
        for(int i = 0; i < _octave_num; i++, os *= 2.0f)
1786
        {
1787
                
1788
                for(int j = 0; j  < param._dog_level_num; j++, idx++, ftex++)
1789
                {
1790

    
1791
                        if(_levelFeatureNum[idx]>0)
1792
                        {        
1793
                                ftex->AttachToFBO(0);
1794
                                glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(),GL_RGBA, GL_FLOAT, p);
1795
                                ps = p;
1796
                                for(int k = 0;  k < _levelFeatureNum[idx]; k++, ps+=4)
1797
                                {
1798
                                        ps[0] = os*(ps[0]-0.5f) + offset;        //x
1799
                                        ps[1] = os*(ps[1]-0.5f) + offset;        //y
1800
                                        sigma = os*ps[3]; 
1801
                                        ps[3] = (float)fmod(twopi-ps[2], twopi);        //orientation, mirrored
1802
                                        ps[2] = sigma;  //scale
1803
                                }
1804
                                p+= 4* _levelFeatureNum[idx];
1805
                        }
1806
                }
1807
        }
1808

    
1809
        //put the feature into their original order
1810

    
1811
        if(_keypoint_index.size() > 0)
1812
        {
1813
                for(int i = 0; i < _featureNum; ++i)
1814
                {
1815
                        int index = _keypoint_index[i];
1816
                        memcpy(&_keypoint_buffer[index*4], &keypoint_buffer2[i*4], 4 * sizeof(float));
1817
                }
1818
        }
1819
}
1820

    
1821

    
1822
void PyramidGL::GenerateFeatureListTex()
1823
{
1824
        //generate feature list texture from existing keypoints
1825
        //do feature sorting in the same time?
1826

    
1827
        FrameBufferObject fbo;
1828
        vector<float> list;
1829
        int idx = 0;
1830
        const double twopi = 2.0*3.14159265358979323846;
1831
        float sigma_half_step = powf(2.0f, 0.5f / param._dog_level_num);
1832
        float octave_sigma = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
1833
        float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f; 
1834
        if(_down_sample_factor>0) octave_sigma *= float(1<<_down_sample_factor); 
1835

    
1836
    
1837
    std::fill(_levelFeatureNum, _levelFeatureNum + _octave_num * param._dog_level_num, 0);
1838

    
1839
        _keypoint_index.resize(0); // should already be 0
1840
        for(int i = 0; i < _octave_num; i++, octave_sigma*= 2.0f)
1841
        {
1842
                for(int j = 0; j < param._dog_level_num; j++, idx++)
1843
                {
1844
                        list.resize(0);
1845
                        float level_sigma = param.GetLevelSigma(j + param._level_min + 1) * octave_sigma;
1846
                        float sigma_min = level_sigma / sigma_half_step;
1847
                        float sigma_max = level_sigma * sigma_half_step;
1848
                        int fcount = 0 ;
1849
                        for(int k = 0; k < _featureNum; k++)
1850
                        {
1851
                                float * key = &_keypoint_buffer[k*4];
1852
                float sigmak = key[2]; 
1853

    
1854
                //////////////////////////////////////
1855
                if(IsUsingRectDescription()) sigmak = min(key[2], key[3]) / 12.0f; 
1856

    
1857
                if( (sigmak >= sigma_min && sigmak < sigma_max)
1858
                                        ||(sigmak < sigma_min && i ==0 && j == 0)
1859
                                        ||(sigmak > sigma_max && j == param._dog_level_num - 1&& 
1860
                            (i == _octave_num -1  || GlobalUtil::_KeyPointListForceLevel0)))
1861
                                {
1862
                                        //add this keypoint to the list
1863
                                        list.push_back((key[0] - offset) / octave_sigma + 0.5f);
1864
                                        list.push_back((key[1] - offset) / octave_sigma + 0.5f);
1865
                    if(IsUsingRectDescription())
1866
                    {
1867
                        list.push_back(key[2] / octave_sigma);
1868
                        list.push_back(key[3] / octave_sigma);
1869
                    }else
1870
                    {
1871
                                            list.push_back((float)fmod(twopi-key[3], twopi));
1872
                                            list.push_back(key[2] / octave_sigma);
1873
                    }
1874
                                        fcount ++;
1875
                                        //save the index of keypoints
1876
                                        _keypoint_index.push_back(k);
1877
                                }
1878
                        }
1879

    
1880
                        _levelFeatureNum[idx] = fcount;
1881
                        if(fcount==0)continue;
1882
                        GLTexImage * ftex = _featureTex+idx;
1883

    
1884
                        SetLevelFeatureNum(idx, fcount);
1885

    
1886
                        int fw = ftex->GetImgWidth();
1887
                        int fh = ftex->GetImgHeight();
1888

    
1889
                        list.resize(4*fh*fw);
1890

    
1891
                        ftex->BindTex();
1892
                        ftex->AttachToFBO(0);
1893
                        glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
1894

    
1895
            if( fcount == _featureNum) _keypoint_index.resize(0);
1896
                }
1897
        if( GlobalUtil::_KeyPointListForceLevel0 ) break;
1898
        }
1899
        GLTexImage::UnbindTex();
1900
        if(GlobalUtil::_verbose)
1901
        {
1902
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
1903
        }
1904
}
1905

    
1906

    
1907

    
1908
PyramidPacked::PyramidPacked(SiftParam& sp): PyramidGL(sp)
1909
{
1910
        _allPyramid = NULL;
1911
}
1912

    
1913
PyramidPacked::~PyramidPacked()
1914
{
1915
        DestroyPyramidData();
1916
}
1917

    
1918

    
1919
//build the gaussian pyrmaid
1920

    
1921
void PyramidPacked::BuildPyramid(GLTexInput * input)
1922
{
1923
        //
1924
        USE_TIMING();
1925
        GLTexImage * tex, *tmp;
1926
        FilterProgram ** filter;
1927
        FrameBufferObject fbo;
1928

    
1929
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
1930
        input->FitTexViewPort();
1931

    
1932
        for (int i = _octave_min; i < _octave_min + _octave_num; i++)
1933
        {
1934
                tex = GetBaseLevel(i);
1935
                tmp = GetBaseLevel(i, DATA_DOG) + 2; //use this as a temperory texture
1936

    
1937
                
1938
                filter = ShaderMan::s_bag->f_gaussian_step;
1939

    
1940
                OCTAVE_START();
1941

    
1942
                if( i == _octave_min )
1943
                {
1944
                        if(i < 0)        TextureUpSample(tex, input, 1<<(-i-1));                        
1945
                        else            TextureDownSample(tex, input, 1<<(i+1));
1946
            ShaderMan::FilterInitialImage(tex, tmp); 
1947
                }else
1948
                {
1949
                        TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds)); 
1950
                        ShaderMan::FilterSampledImage(tex, tmp); 
1951
                }
1952
                LEVEL_FINISH();                
1953

    
1954
                for(int j = param._level_min + 1; j <=  param._level_max ; j++, tex++, filter++)
1955
                {
1956
                        // filtering
1957
            ShaderMan::FilterImage(*filter, tex+1, tex, tmp);
1958
                        LEVEL_FINISH();
1959
                }
1960

    
1961
                OCTAVE_FINISH();
1962

    
1963
        }
1964
        if(GlobalUtil::_timingS)        glFinish();
1965
        UnloadProgram();        
1966
}
1967

    
1968
void PyramidPacked::ComputeGradient()
1969
{
1970
        
1971
        //first pass, compute dog, gradient, orientation
1972
        GLenum buffers[4] = { 
1973
                GL_COLOR_ATTACHMENT0_EXT,                GL_COLOR_ATTACHMENT1_EXT ,
1974
                GL_COLOR_ATTACHMENT2_EXT,                GL_COLOR_ATTACHMENT3_EXT
1975
        };
1976

    
1977
        int i, j;
1978
        double ts, t1;
1979
        FrameBufferObject fbo;
1980

    
1981
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
1982

    
1983
        for(i = _octave_min; i < _octave_min + _octave_num; i++)
1984
        {
1985
                GLTexImage * gus = GetBaseLevel(i) +  1;
1986
                GLTexImage * dog = GetBaseLevel(i, DATA_DOG) +  1;
1987
                GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) +  1;
1988
                GLTexImage * rot = GetBaseLevel(i, DATA_ROT) +  1;
1989
                glDrawBuffers(3, buffers);
1990
                gus->FitTexViewPort();
1991
                //compute the gradient
1992
                for(j = 0; j <  param._dog_level_num ; j++, gus++, dog++, grd++, rot++)
1993
                {
1994
                        //gradient, dog, orientation
1995
                        glActiveTexture(GL_TEXTURE0);
1996
                        gus->BindTex();
1997
                        glActiveTexture(GL_TEXTURE1);
1998
                        (gus-1)->BindTex();
1999
                        //output
2000
                        dog->AttachToFBO(0);
2001
                        grd->AttachToFBO(1);
2002
                        rot->AttachToFBO(2);
2003
                        ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
2004
                        //compute
2005
                        dog->DrawQuadMT4();
2006
                }
2007
        }
2008
        if(GlobalUtil::_timingS)
2009
        {
2010
                glFinish();
2011
                if(GlobalUtil::_verbose)
2012
                {
2013
                        t1 = CLOCK();
2014
                        std::cout        <<"<Gradient, DOG  >\t"<<(t1-ts)<<"\n";
2015
                }
2016
        }
2017
        GLTexImage::DetachFBO(1);
2018
        GLTexImage::DetachFBO(2);
2019
        UnloadProgram();
2020
        GLTexImage::UnbindMultiTex(3);
2021
        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
2022
}
2023

    
2024
void PyramidPacked::DetectKeypointsEX()
2025
{
2026

    
2027
        //first pass, compute dog, gradient, orientation
2028
        GLenum buffers[4] = { 
2029
                GL_COLOR_ATTACHMENT0_EXT,                GL_COLOR_ATTACHMENT1_EXT ,
2030
                GL_COLOR_ATTACHMENT2_EXT,                GL_COLOR_ATTACHMENT3_EXT
2031
        };
2032

    
2033
        int i, j;
2034
        double t0, t, ts, t1, t2;
2035
        FrameBufferObject fbo;
2036

    
2037
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
2038

    
2039
        for(i = _octave_min; i < _octave_min + _octave_num; i++)
2040
        {
2041
                GLTexImage * gus = GetBaseLevel(i) + 1;
2042
                GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + 1;
2043
                GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) + 1;
2044
                GLTexImage * rot = GetBaseLevel(i, DATA_ROT) + 1;
2045
                glDrawBuffers(3, buffers);
2046
                gus->FitTexViewPort();
2047
                //compute the gradient
2048
                for(j = param._level_min +1; j <=  param._level_max ; j++, gus++, dog++, grd++, rot++)
2049
                {
2050
                        //gradient, dog, orientation
2051
                        glActiveTexture(GL_TEXTURE0);
2052
                        gus->BindTex();
2053
                        glActiveTexture(GL_TEXTURE1);
2054
                        (gus-1)->BindTex();
2055
                        //output
2056
                        dog->AttachToFBO(0);
2057
                        grd->AttachToFBO(1);
2058
                        rot->AttachToFBO(2);
2059
                        ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
2060
                        //compute
2061
                        dog->DrawQuadMT4();
2062
                }
2063
        }
2064
        if(GlobalUtil::_timingS && GlobalUtil::_verbose)
2065
        {
2066
                glFinish();
2067
                t1 = CLOCK();
2068
        }
2069
        GLTexImage::DetachFBO(1);
2070
        GLTexImage::DetachFBO(2);
2071
        //glDrawBuffers(1, buffers);
2072
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
2073
        
2074

    
2075
        GlobalUtil::CheckErrorsGL();
2076

    
2077
        for ( i = _octave_min; i < _octave_min + _octave_num; i++)
2078
        {
2079
                if(GlobalUtil::_timingO)
2080
                {
2081
                        t0 = CLOCK();
2082
                        std::cout<<"#"<<(i + _down_sample_factor)<<"\t";
2083
                }
2084
                GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + 2;
2085
                GLTexImage * key = GetBaseLevel(i, DATA_KEYPOINT) +2;
2086
                key->FitTexViewPort();
2087

    
2088
                for( j = param._level_min +2; j <  param._level_max ; j++, dog++, key++)
2089
                {
2090
                        if(GlobalUtil::_timingL)t = CLOCK();                
2091
                        key->AttachToFBO(0);
2092
                        glActiveTexture(GL_TEXTURE0);
2093
                        dog->BindTex();
2094
                        glActiveTexture(GL_TEXTURE1);
2095
                        (dog+1)->BindTex();
2096
                        glActiveTexture(GL_TEXTURE2);
2097
                        (dog-1)->BindTex();
2098
                        ShaderMan::UseShaderKeypoint((dog+1)->GetTexID(), (dog-1)->GetTexID());
2099
                        key->DrawQuadMT8();
2100
                        if(GlobalUtil::_timingL)
2101
                        {
2102
                                glFinish();
2103
                                std::cout<<(CLOCK()-t)<<"\t";
2104
                        }
2105
                }
2106
                if(GlobalUtil::_timingO)
2107
                {
2108
                        glFinish();
2109
                        std::cout<<"|\t"<<(CLOCK()-t0)<<"\n";
2110
                }
2111
        }
2112

    
2113
        if(GlobalUtil::_timingS)
2114
        {
2115
                glFinish();
2116
                if(GlobalUtil::_verbose) 
2117
                {        
2118
                        t2 = CLOCK();
2119
                        std::cout        <<"<Gradient, DOG  >\t"<<(t1-ts)<<"\n"
2120
                                                <<"<Get Keypoints  >\t"<<(t2-t1)<<"\n";
2121
                }
2122
                                                
2123
        }
2124
        UnloadProgram();
2125
        GLTexImage::UnbindMultiTex(3);
2126
        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
2127
}
2128

    
2129

    
2130
void PyramidPacked::GenerateFeatureList(int i, int j)
2131
{
2132
        float fcount = 0.0f; 
2133
        int hist_skip_gpu = GlobalUtil::_ListGenSkipGPU;
2134
    int idx = i * param._dog_level_num + j; 
2135
    int hist_level_num = _hpLevelNum - _pyramid_octave_first;
2136
        GLTexImage * htex, * ftex, * tex;
2137
        htex = _histoPyramidTex + hist_level_num - 1 - i;
2138
        ftex = _featureTex + idx;
2139
        tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + 2 + j;
2140

    
2141

    
2142
        //fill zero to an extra row/col if the height/width is odd
2143
        glActiveTexture(GL_TEXTURE0);
2144
        tex->BindTex();
2145
        htex->AttachToFBO(0);
2146
        int tight = (htex->GetImgWidth() * 4 == tex->GetImgWidth() -1 && htex->GetImgHeight() *4 == tex->GetImgHeight()-1 );
2147
        ShaderMan::UseShaderGenListInit(tex->GetImgWidth(), tex->GetImgHeight(), tight);
2148
        htex->FitTexViewPort();
2149
        //this uses the fact that no feature is on the edge.
2150
        htex->DrawQuadReduction();
2151
        //reduction..
2152
        htex--;
2153
        
2154
        //this part might have problems on several GPUS
2155
        //because the output of one pass is the input of the next pass
2156
        //may require glFinish to make it right, but too much glFinish makes it slow
2157
        for(int k = 0; k <hist_level_num - i-1 - hist_skip_gpu; k++, htex--)
2158
        {
2159
                htex->AttachToFBO(0);
2160
                htex->FitTexViewPort();
2161
                (htex+1)->BindTex();
2162
                ShaderMan::UseShaderGenListHisto();
2163
                htex->DrawQuadReduction();                
2164
        }
2165

    
2166
        if(hist_skip_gpu == 0)
2167
        {                
2168
                //read back one pixel
2169
                float fn[4];
2170
                glReadPixels(0, 0, 1, 1, GL_RGBA , GL_FLOAT, fn);
2171
                fcount = (fn[0] + fn[1] + fn[2] + fn[3]);
2172
                if(fcount < 1) fcount = 0;
2173

    
2174
                _levelFeatureNum[ idx] = (int)(fcount);
2175
                SetLevelFeatureNum(idx, (int)fcount);
2176

    
2177
                //save  number of features
2178
                _featureNum += int(fcount);
2179

    
2180
                //
2181
                if(fcount < 1.0)                                 return;;
2182

    
2183

    
2184
                ///generate the feature texture
2185
                htex=  _histoPyramidTex;
2186

    
2187
                htex->BindTex();
2188

    
2189
                //first pass
2190
                ftex->AttachToFBO(0);
2191
                if(GlobalUtil::_MaxOrientation>1)
2192
                {
2193
                        //this is very important...
2194
                        ftex->FitRealTexViewPort();
2195
                        glClear(GL_COLOR_BUFFER_BIT);
2196
                        glFinish();
2197
                }else
2198
                {        
2199
                        ftex->FitTexViewPort();
2200
                        //glFinish();
2201
                }
2202

    
2203

    
2204
                ShaderMan::UseShaderGenListStart((float)ftex->GetImgWidth(), htex->GetTexID());
2205

    
2206
                ftex->DrawQuad();
2207
                //make sure it finishes before the next step
2208
                ftex->DetachFBO(0);
2209
                //pass on each pyramid level
2210
                htex++;
2211
        }else
2212
        {
2213

    
2214
                int tw = htex[1].GetDrawWidth(), th = htex[1].GetDrawHeight();
2215
                int fc = 0;
2216
                glReadPixels(0, 0, tw, th, GL_RGBA , GL_FLOAT, _histo_buffer);        
2217
                _keypoint_buffer.resize(0);
2218
                for(int y = 0, pos = 0; y < th; y++)
2219
                {
2220
                        for(int x= 0; x < tw; x++)
2221
                        {
2222
                                for(int c = 0; c < 4; c++, pos++)
2223
                                {
2224
                                        int ss =  (int) _histo_buffer[pos]; 
2225
                                        if(ss == 0) continue;
2226
                                        float ft[4] = {2 * x + (c%2? 1.5f:  0.5f), 2 * y + (c>=2? 1.5f: 0.5f), 0, 1 };
2227
                                        for(int t = 0; t < ss; t++)
2228
                                        {
2229
                                                ft[2] = (float) t; 
2230
                                                _keypoint_buffer.insert(_keypoint_buffer.end(), ft, ft+4);
2231
                                        }
2232
                                        fc += (int)ss; 
2233
                                }
2234
                        }
2235
                }
2236
                _levelFeatureNum[ idx] = fc;
2237
                SetLevelFeatureNum(idx, fc);
2238
                if(fc == 0)  return; 
2239

    
2240
                fcount = (float) fc;         
2241
                _featureNum += fc;
2242
                /////////////////////
2243
                ftex->AttachToFBO(0);
2244
                if(GlobalUtil::_MaxOrientation>1)
2245
                {
2246
                        ftex->FitRealTexViewPort();
2247
                        glClear(GL_COLOR_BUFFER_BIT);
2248
                }else
2249
                {                                        
2250
                        ftex->FitTexViewPort();
2251
            glFlush();
2252
                }
2253
                _keypoint_buffer.resize(ftex->GetDrawWidth() * ftex->GetDrawHeight()*4, 0);
2254
                ///////////
2255
                glActiveTexture(GL_TEXTURE0);
2256
                ftex->BindTex();
2257
                glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, ftex->GetDrawWidth(),
2258
                        ftex->GetDrawHeight(), GL_RGBA, GL_FLOAT, &_keypoint_buffer[0]);
2259
                htex += 2; 
2260
        }
2261

    
2262
        for(int lev = 1 + hist_skip_gpu; lev < hist_level_num  - i; lev++, htex++)
2263
        {
2264
                glActiveTexture(GL_TEXTURE0);
2265
                ftex->BindTex();
2266
                ftex->AttachToFBO(0);
2267
                glActiveTexture(GL_TEXTURE1);
2268
                htex->BindTex();
2269
                ShaderMan::UseShaderGenListStep(ftex->GetTexID(), htex->GetTexID());
2270
                ftex->DrawQuad();
2271
                ftex->DetachFBO(0);        
2272
        }
2273

    
2274
        ftex->AttachToFBO(0);
2275
        glActiveTexture(GL_TEXTURE1);
2276
        tex->BindTex();
2277
        ShaderMan::UseShaderGenListEnd(tex->GetTexID());
2278
        ftex->DrawQuad();
2279
        GLTexImage::UnbindMultiTex(2);
2280

    
2281
}
2282

    
2283
void PyramidPacked::GenerateFeatureList()
2284
{
2285
        //generate the histogram pyramid
2286
        FrameBufferObject fbo;
2287
        glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
2288
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
2289
        double t1, t2; 
2290
        int ocount= 0, reverse = (GlobalUtil::_TruncateMethod == 1);
2291
        _featureNum = 0;
2292

    
2293
        FitHistogramPyramid();
2294
        //for(int i = 0, idx = 0; i < _octave_num; i++)
2295
    FOR_EACH_OCTAVE(i, reverse)
2296
        {
2297
                if(GlobalUtil::_timingO)
2298
                {
2299
            t1= CLOCK();
2300
                        ocount = 0;
2301
                        std::cout<<"#"<<i+_octave_min + _down_sample_factor<<":\t";
2302
                }
2303
                //for(int j = 0; j < param._dog_level_num; j++, idx++)
2304
        FOR_EACH_LEVEL(j, reverse)
2305
                {
2306
            if(GlobalUtil::_TruncateMethod && GlobalUtil::_FeatureCountThreshold > 0 && _featureNum > GlobalUtil::_FeatureCountThreshold) continue;
2307
            
2308
            GenerateFeatureList(i, j); 
2309

    
2310
                        if(GlobalUtil::_timingO)
2311
                        {
2312
                int idx = i * param._dog_level_num + j;
2313
                ocount += _levelFeatureNum[idx];
2314
                                std::cout<< _levelFeatureNum[idx] <<"\t";
2315
                        }
2316
                }
2317
                if(GlobalUtil::_timingO)
2318
                {        
2319
            t2 = CLOCK();
2320
                        std::cout << "| \t" << int(ocount) << " :\t(" << (t2 - t1) << ")\n";
2321
                }
2322
        }
2323
        if(GlobalUtil::_timingS)glFinish();
2324
        if(GlobalUtil::_verbose)
2325
        {
2326
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
2327
        }
2328

    
2329
}
2330

    
2331
void PyramidPacked::GenerateFeatureListCPU()
2332
{
2333
        FrameBufferObject fbo;
2334
        _featureNum = 0;
2335
        GLTexImage * tex = GetBaseLevel(_octave_min);
2336
        float * mem = new float [tex->GetTexWidth()*tex->GetTexHeight()*4];
2337
        vector<float> list;
2338
        int idx = 0;
2339
        for(int i = 0; i < _octave_num; i++)
2340
        {
2341
                for(int j = 0; j < param._dog_level_num; j++, idx++)
2342
                {
2343
                        tex = GetBaseLevel(_octave_min + i, DATA_KEYPOINT) + j + 2;
2344
                        tex->BindTex();
2345
                        glGetTexImage(GlobalUtil::_texTarget, 0, GL_RGBA, GL_FLOAT, mem);
2346
                        //tex->AttachToFBO(0);
2347
                        //tex->FitTexViewPort();
2348
                        //glReadPixels(0, 0, tex->GetTexWidth(), tex->GetTexHeight(), GL_RED, GL_FLOAT, mem);
2349
                        //
2350
                        //make a list of 
2351
                        list.resize(0);
2352
                        float *pl = mem;
2353
                        int fcount = 0 ;
2354
                        for(int k = 0; k < tex->GetDrawHeight(); k++)
2355
                        {
2356
                                float * p = pl; 
2357
                                pl += tex->GetTexWidth() * 4;
2358
                                for( int m = 0; m < tex->GetDrawWidth(); m ++, p+=4)
2359
                                {
2360
                                //        if(m ==0 || k ==0 || k == tex->GetDrawHeight() -1 || m == tex->GetDrawWidth() -1) continue;
2361
                                //        if(*p == 0) continue;
2362
                                        int t = ((int) fabs(p[0])) - 1;
2363
                                        if(t < 0) continue;
2364
                                        int xx = m + m + ( (t %2)? 1 : 0);
2365
                                        int yy = k + k + ( (t <2)? 0 : 1);
2366
                                        if(xx ==0 || yy == 0) continue;
2367
                                        if(xx >= tex->GetImgWidth() - 1 || yy >= tex->GetImgHeight() - 1)continue;
2368
                                        list.push_back(xx + 0.5f + p[1]);
2369
                                        list.push_back(yy + 0.5f + p[2]);
2370
                                        list.push_back(GlobalUtil::_KeepExtremumSign && p[0] < 0 ? -1.0f : 1.0f);
2371
                                        list.push_back(p[3]);
2372
                                        fcount ++;
2373
                                }
2374
                        }
2375
                        if(fcount==0)continue;
2376

    
2377
                        if(GlobalUtil::_timingL) std::cout<<fcount<<".";
2378
                        
2379
                        GLTexImage * ftex = _featureTex+idx;
2380
                        _levelFeatureNum[idx] = (fcount);
2381
                        SetLevelFeatureNum(idx, fcount);
2382

    
2383
                        _featureNum += (fcount);
2384

    
2385

    
2386
                        int fw = ftex->GetImgWidth();
2387
                        int fh = ftex->GetImgHeight();
2388

    
2389
                        list.resize(4*fh*fw);
2390

    
2391
                        ftex->BindTex();
2392
                        ftex->AttachToFBO(0);
2393
        //                glTexImage2D(GlobalUtil::_texTarget, 0, GlobalUtil::_iTexFormat, fw, fh, 0, GL_BGRA, GL_FLOAT, &list[0]);
2394
                        glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
2395
                        //
2396
                }
2397
        }
2398
        GLTexImage::UnbindTex();
2399
        delete mem;
2400
        if(GlobalUtil::_verbose)
2401
        {
2402
                std::cout<<"#Features:\t"<<_featureNum<<"\n";
2403
        }
2404
}
2405

    
2406

    
2407

    
2408
void PyramidPacked::GetFeatureOrientations()
2409
{
2410
        GLTexImage * gtex, * otex;
2411
        GLTexImage * ftex = _featureTex;
2412
        GLTexImage * fotex = _orientationTex; 
2413
        int * count         = _levelFeatureNum;
2414
        float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num);
2415

    
2416

    
2417
        FrameBufferObject fbo;
2418
        if(_orientationTex)
2419
        {
2420
                GLenum buffers[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
2421
                glDrawBuffers(2, buffers);
2422
        }else
2423
        {
2424
                glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
2425
        }
2426

    
2427
        for(int i = 0; i < _octave_num; i++)
2428
        {
2429
                gtex = GetBaseLevel(i+_octave_min, DATA_GRAD) + 1;
2430
                otex = GetBaseLevel(i+_octave_min, DATA_ROT) + 1;
2431

    
2432
                for(int j = 0; j < param._dog_level_num; j++, ftex++, otex++, count++, gtex++, fotex++)
2433
                {
2434
                        if(*count<=0)continue;
2435

    
2436
                        sigma = param.GetLevelSigma(j+param._level_min+1);
2437

    
2438

    
2439
                        ftex->FitTexViewPort();
2440

    
2441
                        glActiveTexture(GL_TEXTURE0);
2442
                        ftex->BindTex();
2443
                        glActiveTexture(GL_TEXTURE1);
2444
                        gtex->BindTex();
2445
                        glActiveTexture(GL_TEXTURE2);
2446
                        otex->BindTex();
2447
                        //
2448
                        ftex->AttachToFBO(0);
2449
                        if(_orientationTex)                fotex->AttachToFBO(1);
2450

    
2451
                        ShaderMan::UseShaderOrientation(gtex->GetTexID(),
2452
                                gtex->GetImgWidth(), gtex->GetImgHeight(),
2453
                                sigma, otex->GetTexID(), sigma_step, _existing_keypoints);
2454
                        ftex->DrawQuad();
2455
                }
2456
        }
2457

    
2458
        GLTexImage::UnbindMultiTex(3);
2459
        if(GlobalUtil::_timingS)glFinish();
2460
        if(_orientationTex)        fbo.UnattachTex(GL_COLOR_ATTACHMENT1_EXT);
2461

    
2462
}
2463

    
2464

    
2465
void PyramidPacked::GetSimplifiedOrientation()
2466
{
2467
        //
2468
        int idx = 0;
2469
//        int n = _octave_num  * param._dog_level_num;
2470
        float sigma, sigma_step = powf(2.0f, 1.0f/param._dog_level_num); 
2471
        GLTexImage * ftex = _featureTex;
2472

    
2473
        FrameBufferObject fbo;
2474
        glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
2475
        for(int i = 0; i < _octave_num; i++)
2476
        {
2477
                GLTexImage *otex = GetBaseLevel(i + _octave_min, DATA_ROT) + 2;
2478
                for(int j = 0; j < param._dog_level_num; j++, ftex++,  otex++, idx ++)
2479
                {
2480
                        if(_levelFeatureNum[idx]<=0)continue;
2481
                        sigma = param.GetLevelSigma(j+param._level_min+1);
2482
                        //
2483
                        ftex->AttachToFBO(0);
2484
                        ftex->FitTexViewPort();
2485

    
2486
                        glActiveTexture(GL_TEXTURE0);
2487
                        ftex->BindTex();
2488
                        glActiveTexture(GL_TEXTURE1);
2489
                        otex->BindTex();
2490

    
2491
                        ShaderMan::UseShaderSimpleOrientation(otex->GetTexID(), sigma, sigma_step);
2492
                        ftex->DrawQuad();
2493
                }
2494
        }
2495
        GLTexImage::UnbindMultiTex(2);
2496
}
2497

    
2498
void PyramidPacked::InitPyramid(int w, int h, int ds)
2499
{
2500
        int wp, hp, toobig = 0;
2501
        if(ds == 0)
2502
        {
2503
                _down_sample_factor = 0;
2504
                if(GlobalUtil::_octave_min_default>=0)
2505
                {
2506
                        wp = w >> GlobalUtil::_octave_min_default;
2507
                        hp = h >> GlobalUtil::_octave_min_default;
2508
                }else 
2509
                {
2510
                        wp = w << (-GlobalUtil::_octave_min_default);
2511
                        hp = h << (-GlobalUtil::_octave_min_default);
2512
                }
2513
                _octave_min = _octave_min_default;
2514
        }else
2515
        {
2516
                //must use 0 as _octave_min; 
2517
                _octave_min = 0;
2518
                _down_sample_factor = ds;
2519
                w >>= ds;
2520
                h >>= ds;
2521
                wp = w;
2522
                hp = h; 
2523
        }
2524

    
2525
        while(wp > GlobalUtil::_texMaxDim  || hp > GlobalUtil::_texMaxDim )
2526
        {
2527
                _octave_min ++;
2528
                wp >>= 1;
2529
                hp >>= 1;
2530
                toobig = 1;
2531
        }
2532
        if(toobig && GlobalUtil::_verbose && _octave_min > 0)
2533
        {
2534

    
2535
                std::cout<< "**************************************************************\n"
2536
                                        "Image larger than allowed dimension, data will be downsampled!\n"
2537
                                        "use -maxd to change the settings\n"
2538
                                        "***************************************************************\n";
2539
        }
2540

    
2541
        if( wp == _pyramid_width && hp == _pyramid_height && _allocated )
2542
        {
2543
                FitPyramid(wp, hp);
2544
        }else if(GlobalUtil::_ForceTightPyramid || _allocated ==0)
2545
        {
2546
                ResizePyramid(wp, hp);
2547
        }
2548
        else if( wp > _pyramid_width || hp > _pyramid_height )
2549
        {
2550
                ResizePyramid(max(wp, _pyramid_width), max(hp, _pyramid_height));
2551
                if(wp < _pyramid_width || hp < _pyramid_height)  FitPyramid(wp, hp);
2552
        }
2553
        else
2554
        {
2555
                //try use the pyramid allocated for large image on small input images
2556
                FitPyramid(wp, hp);
2557
        }
2558

    
2559
        //select the initial smoothing filter according to the new _octave_min
2560
        ShaderMan::SelectInitialSmoothingFilter(_octave_min + _down_sample_factor, param);
2561
}
2562

    
2563

    
2564

    
2565
void PyramidPacked::FitPyramid(int w, int h)
2566
{
2567
        //(w, h) <= (_pyramid_width, _pyramid_height);
2568

    
2569
        _pyramid_octave_first = 0;
2570
        //
2571
        _octave_num  = GlobalUtil::_octave_num_default;
2572

    
2573
        int _octave_num_max = GetRequiredOctaveNum(min(w, h));
2574

    
2575
        if(_octave_num < 1 || _octave_num > _octave_num_max) 
2576
        {
2577
                _octave_num = _octave_num_max;
2578
        }
2579

    
2580

    
2581
        int pw = _pyramid_width>>1, ph = _pyramid_height>>1;
2582
        while(_pyramid_octave_first + _octave_num < _pyramid_octave_num &&  
2583
                pw >= w && ph >= h)
2584
        {
2585
                _pyramid_octave_first++;
2586
                pw >>= 1;
2587
                ph >>= 1;
2588
        }
2589

    
2590
        for(int i = 0; i < _octave_num; i++)
2591
        {
2592
                GLTexImage * tex = GetBaseLevel(i + _octave_min);
2593
                GLTexImage * dog = GetBaseLevel(i + _octave_min, DATA_DOG);
2594
                GLTexImage * grd = GetBaseLevel(i + _octave_min, DATA_GRAD);
2595
                GLTexImage * rot = GetBaseLevel(i + _octave_min, DATA_ROT);
2596
                GLTexImage * key = GetBaseLevel(i + _octave_min, DATA_KEYPOINT);
2597
                for(int j = param._level_min; j <= param._level_max; j++, tex++, dog++, grd++, rot++, key++)
2598
                {
2599
                        tex->SetImageSize(w, h);
2600
                        if(j == param._level_min) continue;
2601
                        dog->SetImageSize(w, h);
2602
                        grd->SetImageSize(w, h);
2603
                        rot->SetImageSize(w, h);
2604
                        if(j == param._level_min + 1 || j == param._level_max) continue;
2605
                        key->SetImageSize(w, h);
2606
                }
2607
                w>>=1;
2608
                h>>=1;
2609
        }
2610
}
2611

    
2612

    
2613
void PyramidPacked::ResizePyramid( int w,  int h)
2614
{
2615
        //
2616
        unsigned int totalkb = 0;
2617
        int _octave_num_new, input_sz, i, j;
2618
        //
2619

    
2620
        if(_pyramid_width == w && _pyramid_height == h && _allocated) return;
2621

    
2622
        if(w > GlobalUtil::_texMaxDim || h > GlobalUtil::_texMaxDim) return ;
2623

    
2624
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<w<<"x"<<h<<endl;
2625
        //first octave does not change
2626
        _pyramid_octave_first = 0;
2627

    
2628
        
2629
        //compute # of octaves
2630

    
2631
        input_sz = min(w,h) ;
2632
        _pyramid_width =  w;
2633
        _pyramid_height =  h;
2634

    
2635
        //reset to preset parameters
2636
        _octave_num_new  = GlobalUtil::_octave_num_default;
2637

    
2638
        if(_octave_num_new < 1) _octave_num_new = GetRequiredOctaveNum(input_sz) ;
2639

    
2640

    
2641
        if(_pyramid_octave_num != _octave_num_new)
2642
        {
2643
                //destroy the original pyramid if the # of octave changes
2644
                if(_octave_num >0) 
2645
                {
2646
                        DestroyPerLevelData();
2647
                        DestroyPyramidData();
2648
                }
2649
                _pyramid_octave_num = _octave_num_new;
2650
        }
2651

    
2652
        _octave_num = _pyramid_octave_num;
2653

    
2654
        int noct = _octave_num;
2655
        int nlev = param._level_num;
2656

    
2657
        //        //initialize the pyramid
2658
        if(_allPyramid==NULL)        _allPyramid = new GLTexPacked[ noct* nlev * DATA_NUM];
2659

    
2660

    
2661
        GLTexPacked * gus = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_GAUSSIAN);
2662
        GLTexPacked * dog = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_DOG);
2663
        GLTexPacked * grd = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_GRAD);
2664
        GLTexPacked * rot = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_ROT);
2665
        GLTexPacked * key = (GLTexPacked *) GetBaseLevel(_octave_min, DATA_KEYPOINT);
2666

    
2667

    
2668
        ////////////there could be "out of memory" happening during the allocation
2669

    
2670
        for(i = 0; i< noct; i++)
2671
        {
2672
                for( j = 0; j< nlev; j++, gus++, dog++, grd++, rot++, key++)
2673
                {
2674
                        gus->InitTexture(w, h);
2675
                        if(j==0)continue;
2676
                        dog->InitTexture(w, h);
2677
                        grd->InitTexture(w, h, 0);
2678
                        rot->InitTexture(w, h);
2679
                        if(j<=1 || j >=nlev -1) continue;
2680
                        key->InitTexture(w, h, 0);
2681
                }
2682
                int tsz = (gus -1)->GetTexPixelCount() * 16;
2683
                totalkb += ((nlev *5 -6)* tsz / 1024);
2684
                //several auxilary textures are not actually required
2685
                w>>=1;
2686
                h>>=1;
2687
        }
2688

    
2689
        totalkb += ResizeFeatureStorage();
2690

    
2691
        _allocated = 1;
2692

    
2693
        if(GlobalUtil::_verbose && GlobalUtil::_timingS) std::cout<<"[Allocate Pyramid]:\t" <<(totalkb/1024)<<"MB\n";
2694

    
2695
}
2696

    
2697
void PyramidPacked::DestroyPyramidData()
2698
{
2699
        if(_allPyramid)
2700
        {
2701
                delete [] _allPyramid;
2702
                _allPyramid = NULL;
2703
        }
2704
}
2705

    
2706

    
2707
GLTexImage*  PyramidPacked::GetLevelTexture(int octave, int level, int dataName)
2708
{
2709
        return _allPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num 
2710
                + param._level_num * _pyramid_octave_num * dataName
2711
                + (level - param._level_min);
2712

    
2713
}
2714

    
2715
GLTexImage*  PyramidPacked::GetLevelTexture(int octave, int level)
2716
{
2717
        return _allPyramid+ (_pyramid_octave_first + octave - _octave_min) * param._level_num 
2718
                + (level - param._level_min);
2719
}
2720

    
2721
//in the packed implementation( still in progress)
2722
// DATA_GAUSSIAN, DATA_DOG, DATA_GAD will be stored in different textures.
2723

    
2724
GLTexImage*  PyramidPacked::GetBaseLevel(int octave, int dataName)
2725
{
2726
        if(octave <_octave_min || octave > _octave_min + _octave_num) return NULL;
2727
        int offset = (_pyramid_octave_first + octave - _octave_min) * param._level_num;
2728
        int num = param._level_num * _pyramid_octave_num;
2729
        return _allPyramid + num *dataName + offset;
2730
}
2731

    
2732

    
2733
void PyramidPacked::FitHistogramPyramid()
2734
{
2735
        GLTexImage * tex, *htex;
2736
        int hist_level_num = _hpLevelNum - _pyramid_octave_first; 
2737

    
2738
        tex = GetBaseLevel(_octave_min , DATA_KEYPOINT) + 2;
2739
        htex = _histoPyramidTex + hist_level_num - 1;
2740
        int w = (tex->GetImgWidth() + 2) >> 2;
2741
        int h = (tex->GetImgHeight() + 2)>> 2;
2742

    
2743

    
2744
        //4n+1 -> n; 4n+2,2, 3 -> n+1
2745
        for(int k = 0; k <hist_level_num -1; k++, htex--)
2746
        {
2747
                if(htex->GetImgHeight()!= h || htex->GetImgWidth() != w)
2748
                {        
2749
                        htex->SetImageSize(w, h);
2750
                        htex->ZeroHistoMargin();
2751
                }
2752

    
2753
                w = (w + 1)>>1; h = (h + 1) >> 1;
2754
        }
2755
}
2756