Project

General

Profile

Statistics
| Branch: | Revision:

root / rgbdslam / external / siftgpu / src / SiftGPU / ProgramGLSL.cpp @ 9240aaa3

History | View | Annotate | Download (81.7 KB)

1
////////////////////////////////////////////////////////////////////////////
2
//        File:                ProgramGLSL.cpp
3
//        Author:                Changchang Wu
4
//        Description : GLSL related classes
5
//                class ProgramGLSL                A simple wrapper of GLSL programs
6
//                class ShaderBagGLSL                GLSL shaders for SIFT
7
//                class FilterGLSL                GLSL gaussian filters for SIFT
8
//
9
//        Copyright (c) 2007 University of North Carolina at Chapel Hill
10
//        All Rights Reserved
11
//
12
//        Permission to use, copy, modify and distribute this software and its
13
//        documentation for educational, research and non-profit purposes, without
14
//        fee, and without a written agreement is hereby granted, provided that the
15
//        above copyright notice and the following paragraph appear in all copies.
16
//        
17
//        The University of North Carolina at Chapel Hill make no representations
18
//        about the suitability of this software for any purpose. It is provided
19
//        'as is' without express or implied warranty. 
20
//
21
//        Please send BUG REPORTS to ccwu@cs.unc.edu
22
//
23
////////////////////////////////////////////////////////////////////////////
24

    
25
  
26
#include "GL/glew.h"
27
#include <string.h>
28
#include <stdio.h>
29
#include <iomanip>
30
#include <iostream>
31
#include <strstream>
32
#include <vector>
33
#include <algorithm>
34
#include <math.h>
35
using namespace std;
36

    
37
#include "GlobalUtil.h"
38
#include "ProgramGLSL.h"
39
#include "GLTexImage.h"
40
#include "ShaderMan.h"
41
#include "SiftGPU.h"
42

    
43
ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource)
44
{
45

    
46

    
47
        _type = shadertype; 
48
        _compiled = 0;
49

    
50

    
51
        _shaderID = glCreateShader(shadertype);
52
        if(_shaderID == 0) return;
53
        
54
        if(source)
55
        {
56

    
57
                GLint                                code_length;
58
                if(filesource ==0)
59
                {
60
                        const char* code  = source;
61
                        code_length = strlen(code);
62
                        glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
63
                }else
64
                {
65
                        char * code;
66
                        if((code_length= ReadShaderFile(source, code)) ==0) return;
67
                        glShaderSource(_shaderID, 1, (const char **) &code, &code_length);
68
                        delete code;
69
                }
70

    
71
                glCompileShader(_shaderID);
72

    
73
                CheckCompileLog();
74

    
75
                if(!_compiled)                 std::cout << source;
76
        }
77

    
78

    
79

    
80

    
81
}
82

    
83
int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile,  char*& code )
84
{
85
        code = NULL;
86
        FILE * file;
87
        int    len=0;
88

    
89
        if(sourcefile == NULL) return 0;
90

    
91
        file = fopen(sourcefile,"rt");
92
        if(file == NULL) return 0;
93

    
94
        
95
        fseek(file, 0, SEEK_END);
96
        len = ftell(file);
97
        rewind(file);
98
        if(len >1)
99
        {
100
                code = new  char[len+1];
101
                fread(code, sizeof( char), len, file);
102
                code[len] = 0;
103
        }else
104
        {
105
                len = 0;
106
        }
107

    
108
        fclose(file);
109

    
110
        return len;
111
        
112
}
113

    
114
void ProgramGLSL::ShaderObject::CheckCompileLog()
115
{
116

    
117
        GLint status;
118
        glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status);
119
        _compiled = (status ==GL_TRUE);
120

    
121
        if(_compiled == 0)        PrintCompileLog(std::cout);
122

    
123

    
124
}
125

    
126
ProgramGLSL::ShaderObject::~ShaderObject()
127
{
128
        if(_shaderID)        glDeleteShader(_shaderID);
129

    
130
}
131

    
132
int ProgramGLSL::ShaderObject::IsValidFragmentShader()
133
{
134
        return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
135
}
136

    
137
int  ProgramGLSL::ShaderObject::IsValidVertexShader()
138
{
139
        return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
140
}
141

    
142

    
143
void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
144
{
145
        GLint len = 0;        
146

    
147
        glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len);
148
        if(len <=1) return;
149
        
150
        char * compileLog = new char[len+1];
151
        if(compileLog == NULL) return;
152

    
153
        glGetShaderInfoLog(_shaderID, len, &len, compileLog);
154
        
155

    
156
        os<<"Compile Log\n"<<compileLog<<"\n";
157

    
158
        delete compileLog;
159
}
160

    
161

    
162
ProgramGLSL::ProgramGLSL()
163
{
164
        _linked = 0;
165
        _programID = glCreateProgram();
166
}
167
ProgramGLSL::~ProgramGLSL()
168
{
169
        if(_programID)glDeleteProgram(_programID);
170
}
171
void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
172
{
173
        if(_programID  && shader.IsValidShaderObject()) 
174
                glAttachShader(_programID, shader.GetShaderID());
175
}
176
void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
177
{
178
        if(_programID  && shader.IsValidShaderObject()) 
179
                glDetachShader(_programID, shader.GetShaderID());
180
}
181
int ProgramGLSL::LinkProgram()
182
{
183
        _linked = 0;
184

    
185
        if(_programID==0) return 0;
186

    
187
        glLinkProgram(_programID);
188

    
189
        CheckLinkLog();
190

    
191
//        GlobalUtil::StartTimer("100 link test");
192
//        for(int i = 0; i<100; i++) glLinkProgram(_programID);
193
//        GlobalUtil::StopTimer();
194

    
195
        return _linked;
196
}
197

    
198
void ProgramGLSL::CheckLinkLog()
199
{
200
        GLint status;
201
        glGetProgramiv(_programID, GL_LINK_STATUS, &status);
202

    
203
        _linked = (status == GL_TRUE);
204

    
205
}
206

    
207

    
208
int ProgramGLSL::ValidateProgram()
209
{
210
        if(_programID && _linked)
211
        {
212
///                GLint status;
213
//                glValidateProgram(_programID);
214
//                glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
215
//                return status == GL_TRUE;
216
                return 1;
217
        }
218
        else
219
                return 0;
220
}
221

    
222
void ProgramGLSL::PrintLinkLog(std::ostream &os)
223
{
224
        GLint len = 0;        
225

    
226
        glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len);
227
        if(len <=1) return;
228
        
229
        char* linkLog = new char[len+1];
230
        if(linkLog == NULL) return;
231

    
232
        glGetProgramInfoLog(_programID, len, &len, linkLog);
233
        
234
        linkLog[len] = 0;
235

    
236
        if(strstr(linkLog, "failed"))
237
        {
238
                os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n";
239
                _linked = 0;
240
        }
241

    
242
        delete linkLog;
243
}
244

    
245
int ProgramGLSL::UseProgram()
246
{
247
        if(ValidateProgram())
248
        {
249
                glUseProgram(_programID);
250
                return true;
251
        }
252
        else
253
        {
254
                return false;
255
        }
256
}
257

    
258

    
259
ProgramGLSL::ProgramGLSL(const char *frag_source)
260
{
261
        _linked = 0;
262
        _programID = glCreateProgram();
263
        ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
264

    
265
        if(shader.IsValidFragmentShader())
266
        {
267
                AttachShaderObject(shader);
268
                LinkProgram();
269

    
270
                if(!_linked)
271
                {
272
                        //shader.PrintCompileLog(std::cout);
273
                        PrintLinkLog(std::cout);
274
                }
275
        }else
276
        {
277
                _linked = 0;
278
        }
279
        
280
}
281

    
282
/*
283
ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
284
{
285
        _used = 0;
286
        _linked = 0;
287
        _programID = glCreateProgram();
288
        ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
289
        ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
290
        AttachShaderObject(shader);
291
        AttachShaderObject(vertex_shader);
292
        LinkProgram();
293
        if(!_linked)
294
        {
295
                shader.PrintCompileLog(std::cout);
296
                vertex_shader.PrintCompileLog(std::cout);
297
                PrintLinkLog(std::cout);
298
                std::cout<<vert_source;
299
                std::cout<<frag_source;
300
        }
301

302
}
303
*/
304

    
305

    
306

    
307
void ProgramGLSL::ReLink()
308
{
309
        glLinkProgram(_programID);
310
}
311

    
312
int ProgramGLSL::IsNative()
313
{
314
        return _linked;
315
}
316

    
317
FilterGLSL::FilterGLSL(float sigma) 
318
{
319
        //pixel inside 3*sigma box
320
        int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;//
321
        int width = 2*sz + 1;
322

    
323
        //filter size truncation
324
        if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth)
325
        {
326
                std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl;
327
                sz = GlobalUtil::_MaxFilterWidth>>1;
328
                width = 2 * sz + 1;
329
        }
330

    
331
        int i;
332
        float * kernel = new float[width];
333
        float   rv = 1.0f/(sigma*sigma);
334
        float   v, ksum =0; 
335

    
336
        // pre-compute filter
337
        for( i = -sz ; i <= sz ; ++i) 
338
        {
339
                kernel[i+sz] =  v = exp(-0.5f * i * i *rv) ;
340
                ksum += v;
341
        }
342

    
343
        //normalize the kernel
344
        rv = 1.0f / ksum;
345
        for(i = 0; i< width ;i++) kernel[i]*=rv;
346
        //
347
        
348
    MakeFilterProgram(kernel, width);
349

    
350
        _size = sz;
351

    
352
        delete[] kernel; 
353
    if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl;
354
}
355

    
356

    
357
void FilterGLSL::MakeFilterProgram(float kernel[], int width)
358
{
359
        if(GlobalUtil::_usePackedTex)
360
        {
361
                s_shader_h = CreateFilterHPK(kernel, width);
362
                s_shader_v = CreateFilterVPK(kernel, width);
363
        }else
364
        {
365
                s_shader_h = CreateFilterH(kernel, width);
366
                s_shader_v = CreateFilterV(kernel, width);
367
        }
368
}
369

    
370
ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width)
371
{
372

    
373
        char buffer[10240];
374
        ostrstream out(buffer, 10240);
375
        out<<setprecision(8);
376

    
377
        out<<  "uniform sampler2DRect tex;";
378
        out<< "\nvoid main(void){ float intensity = 0.0 ;  vec2 pos;\n";
379

    
380
    int half_width = width / 2; 
381
        for(int i = 0; i< width; i++)
382
        {
383
                if(i == half_width)
384
                {
385

    
386
                        out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
387
                        out<<"intensity+= or * "<<kernel[i]<<";\n";
388
                }else
389
                {
390
                        out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n";
391
                        out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n";
392
                }
393
        }
394

    
395
        //copy original data to red channel
396
        out<<"gl_FragColor.r = or;\n"; 
397
        out<<"gl_FragColor.b  = intensity;}\n"<<'\0';
398

    
399
        return new ProgramGLSL( buffer);
400
}
401

    
402

    
403
ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height)
404
{
405

    
406
        char buffer[10240];
407
        ostrstream out(buffer, 10240);
408
        out<<setprecision(8);
409

    
410
        out<<  "uniform sampler2DRect tex;";
411
        out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
412
    int half_height = height / 2; 
413
        for(int i = 0; i< height; i++)
414
        {
415

    
416
                if(i == half_height)
417
                {
418
                        out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
419
                        out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
420

    
421
                }else
422
                {
423
                        out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n";
424
                        out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n";
425
                }
426
                
427
        }
428

    
429
        out<<"gl_FragColor.b = orb.y;\n";
430
        out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
431
        out<<"gl_FragColor.r = intensity;}\n"<<'\0';
432
        
433
//        std::cout<<buffer<<endl;
434
        return new ProgramGLSL( buffer);
435
}
436

    
437

    
438

    
439
ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width)
440
{
441
        //both h and v are packed...
442
        int i, j , xw, xwn;
443

    
444
        int halfwidth  = width >>1;
445
        float * pf = kernel + halfwidth;
446
        int nhpixel = (halfwidth+1)>>1;        //how many neighbour pixels need to be looked up
447
        int npixel  = (nhpixel<<1)+1;//
448
        char buffer[10240];
449
        float weight[3];
450
        ostrstream out(buffer, 10240);
451
        out<<setprecision(8);
452

    
453
        out<<  "uniform sampler2DRect tex;";
454
        out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
455
        ///use multi texture coordinate because nhpixels can be at most 3
456
        out<<"vec4 pc; vec2 coord; \n";
457
        for( i = 0 ; i < npixel ; i++)
458
        {
459
                out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n";
460
                out<<"pc=texture2DRect(tex, coord);\n";
461
                if(GlobalUtil::_PreciseBorder)                out<<"if(coord.x < 0) pc = pc.rrbb;\n";
462
                //for each sub-pixel j  in center, the weight of sub-pixel k 
463
                xw = (i - nhpixel)*2;
464
                for( j = 0; j < 3; j++)
465
                {
466
                        xwn = xw  + j  -1;
467
                        weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
468
                }
469
                if(weight[1] == 0.0)
470
                {
471
                        out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
472
                }
473
                else
474
                {
475
                        out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
476
                        out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
477
                }        
478
        
479
        }
480
        out<<"gl_FragColor = result;}\n"<<'\0';
481
        
482
        return new ProgramGLSL( buffer);
483

    
484

    
485
}
486

    
487

    
488
ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height)
489
{
490

    
491
        //both h and v are packed...
492
        int i, j, yw, ywn;
493

    
494
        int halfh  = height >>1;
495
        float * pf = kernel + halfh;
496
        int nhpixel = (halfh+1)>>1;        //how many neighbour pixels need to be looked up
497
        int npixel  = (nhpixel<<1)+1;//
498
        char buffer[10240];
499
        float weight[3];
500
        ostrstream out(buffer, 10240);
501
        out<<setprecision(8);
502

    
503
        out<<  "uniform sampler2DRect tex;";
504
        out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
505
        ///use multi texture coordinate because nhpixels can be at most 3
506
        out<<"vec4 pc; vec2 coord;\n";
507
        for( i = 0 ; i < npixel ; i++)
508
        {
509
                out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n";
510
                out<<"pc=texture2DRect(tex, coord);\n";
511
                if(GlobalUtil::_PreciseBorder)        out<<"if(coord.y < 0) pc = pc.rgrg;\n";
512

    
513
                //for each sub-pixel j  in center, the weight of sub-pixel k 
514
                yw = (i - nhpixel)*2;
515
                for( j = 0; j < 3; j++)
516
                {
517
                        ywn = yw + j  -1;
518
                        weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
519
                }
520
                if(weight[1] == 0.0)
521
                {
522
                        out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
523
                }else
524
                {
525
                        out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
526
                        out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
527
                }
528
        }
529
        out<<"gl_FragColor = result;}\n"<<'\0';
530

    
531
        return new ProgramGLSL( buffer);
532
}
533

    
534

    
535

    
536
ShaderBag::ShaderBag()
537
{
538
        s_debug = 0;
539
        s_orientation = 0;
540
        s_display_gaussian = 0;
541
        s_display_dog = 0;
542
        s_display_grad = 0;
543
        s_display_keys = 0;
544
        s_sampling = 0;
545
        s_grad_pass = 0;
546
        s_dog_pass = 0;
547
        s_keypoint = 0;
548
        s_genlist_init_tight = 0;
549
        s_genlist_init_ex = 0;
550
        s_genlist_histo = 0;
551
        s_genlist_start = 0;
552
        s_genlist_step = 0;
553
        s_genlist_end = 0;
554
        s_vertex_list = 0;
555
        s_descriptor_fp = 0;
556
        s_margin_copy = 0;
557
    ////////////
558
    f_gaussian_skip0 = NULL;
559
    f_gaussian_skip1 = NULL;
560
    f_gaussian_step = NULL;
561
    _gaussian_step_num = 0; 
562

    
563
}
564

    
565
ShaderBag::~ShaderBag()
566
{
567
        if(s_debug)delete s_debug;
568
        if(s_orientation)delete s_orientation;
569
        if(s_display_gaussian)delete s_display_gaussian;
570
        if(s_display_dog)delete s_display_dog;
571
        if(s_display_grad)delete s_display_grad;
572
        if(s_display_keys)delete s_display_keys;
573
        if(s_sampling)delete s_sampling;
574
        if(s_grad_pass)delete s_grad_pass;
575
        if(s_dog_pass) delete s_dog_pass;
576
        if(s_keypoint)delete s_keypoint;
577
        if(s_genlist_init_tight)delete s_genlist_init_tight;
578
        if(s_genlist_init_ex)delete s_genlist_init_ex;
579
        if(s_genlist_histo)delete s_genlist_histo;
580
        if(s_genlist_start)delete s_genlist_start;
581
        if(s_genlist_step)delete s_genlist_step;
582
        if(s_genlist_end)delete s_genlist_end;
583
        if(s_vertex_list)delete s_vertex_list;
584
        if(s_descriptor_fp)delete s_descriptor_fp;
585
        if(s_margin_copy) delete s_margin_copy;
586

    
587
    //////////////////////////////////////////////
588
    if(f_gaussian_skip1) delete f_gaussian_skip1;
589

    
590
    for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
591
    {
592
            if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i];
593
    }
594
    if(f_gaussian_step && _gaussian_step_num > 0) 
595
    {
596
            for(int i = 0; i< _gaussian_step_num; i++)
597
            {
598
                    delete f_gaussian_step[i];
599
            }
600
            delete[] f_gaussian_step;
601
    }
602
}
603

    
604

    
605
void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam&param)
606
{
607
    float sigma = param.GetInitialSmoothSigma(octave_min);
608
    if(sigma == 0) 
609
    {
610
       f_gaussian_skip0 = NULL; 
611
    }else
612
    {
613
            for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++)
614
            {
615
                    if(f_gaussian_skip0_v[i]->_id == octave_min)
616
                    {
617
                            f_gaussian_skip0 = f_gaussian_skip0_v[i];
618
                            return ;
619
                    }
620
            }
621
            FilterGLSL * filter = new FilterGLSL(sigma); 
622
            filter->_id = octave_min;
623
            f_gaussian_skip0_v.push_back(filter);
624
            f_gaussian_skip0 = filter; 
625
    }
626
}
627

    
628
void ShaderBag::CreateGaussianFilters(SiftParam&param)
629
{
630
        if(param._sigma_skip0>0.0f) 
631
        {
632
        FilterGLSL * filter;
633
                f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
634
                filter->_id = GlobalUtil::_octave_min_default; 
635
                f_gaussian_skip0_v.push_back(filter);
636
        }
637
        if(param._sigma_skip1>0.0f) 
638
        {
639
                f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
640
        }
641

    
642
        f_gaussian_step = new FilterProgram*[param._sigma_num];
643
        for(int i = 0; i< param._sigma_num; i++)
644
        {
645
                f_gaussian_step[i] =  new FilterGLSL(param._sigma[i]);
646
        }
647
    _gaussian_step_num = param._sigma_num;
648
}
649

    
650

    
651
void ShaderBag::LoadDynamicShaders(SiftParam& param)
652
{
653
    LoadKeypointShader(param._dog_threshold, param._edge_threshold);
654
    LoadGenListShader(param._dog_level_num, 0);
655
    CreateGaussianFilters(param);
656
}
657

    
658

    
659
void ShaderBagGLSL::LoadFixedShaders()
660
{
661

    
662

    
663
        s_gray = new ProgramGLSL( 
664
                "uniform sampler2DRect rgbTex; void main(void){\n"
665
                "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(rgbTex,gl_TexCoord[0].st ).rgb);\n"
666
                "gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
667

    
668

    
669
        s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg =  gl_TexCoord[0].st;}");
670

    
671

    
672
        s_sampling = new ProgramGLSL(
673
                "uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
674

    
675
        //
676
        s_grad_pass = new ProgramGLSL(
677
        "uniform sampler2DRect tex; void main ()\n"
678
        "{\n"
679
        "        vec4 v1, v2, gg;\n"
680
        "        vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
681
        "        gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
682
        "        gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
683
        "        gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
684
        "        gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
685
        "        vec2 dxdy = (gg.yw - gg.xz); \n"
686
        "        float grad = 0.5*length(dxdy);\n"
687
        "        float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"
688
        "        gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
689
        "}\n\0");
690

    
691
        ProgramGLSL * program;
692
        s_margin_copy = program = new ProgramGLSL(
693
        "uniform sampler2DRect tex; uniform vec2 truncate;\n"
694
        "void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
695

    
696
        _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
697

    
698

    
699
        GlobalUtil::_OrientationPack2 = 0;
700
        LoadOrientationShader();
701

    
702
        if(s_orientation == NULL)
703
        {
704
                //Load a simplified version if the right version is not supported
705
                s_orientation = program =  new ProgramGLSL(
706
                "uniform sampler2DRect fTex; uniform sampler2DRect oTex;\n"
707
        "        uniform float size; void main(){\n"
708
        "        vec4 cc = texture2DRect(fTex, gl_TexCoord[0].st);\n"
709
        "        vec4 oo = texture2DRect(oTex, cc.rg);\n"
710
        "        gl_FragColor.rg = cc.rg;\n"
711
        "        gl_FragColor.b = oo.a;\n"
712
        "        gl_FragColor.a = size;}");  
713

    
714
                _param_orientation_gtex = glGetUniformLocation(*program, "oTex");
715
                _param_orientation_size = glGetUniformLocation(*program, "size");
716

    
717
                GlobalUtil::_MaxOrientation = 0;
718
                GlobalUtil::_FullSupported = 0;
719
                std::cerr<<"Orientation simplified on this hardware"<<endl;
720
        }
721

    
722

    
723
        if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
724
        if(s_descriptor_fp == NULL) 
725
        {
726
                GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0; 
727
                std::cerr<<"Descriptor ignored on this hardware"<<endl;
728
        }
729

    
730
        s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
731
}
732

    
733

    
734
void ShaderBagGLSL::LoadDisplayShaders()
735
{
736
        s_copy_key = new ProgramGLSL(
737
                "uniform sampler2DRect tex; void main(){\n"
738
        "gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0);        }");
739
        
740

    
741
        ProgramGLSL * program;
742
        s_vertex_list = program = new ProgramGLSL(
743
        "uniform vec4 sizes; uniform sampler2DRect tex;\n"
744
        "void main(void){\n"
745
        "float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
746
        "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
747
        "float px = mod(index, twidth);\n"
748
        "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
749
        "vec4 cc = texture2DRect(tex, tpos );\n"
750
        "float size = 3.0f * cc.a; //sizes.x;// \n"
751
        "gl_FragColor.zw = vec2(0.0, 1.0);\n"
752
        "if(cc.x<=0 || cc.y <=0) {gl_FragColor.xy = cc.xy; }\n"
753
        "else {float type = fract(px);\n"
754
        "vec2 dxy; \n"
755
        "dxy.x = type < 0.1 ? 0.0 : ((type <0.5 || type > 0.9)? size : -size);\n"
756
        "dxy.y = type < 0.2 ? 0.0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
757
        "float s = sin(cc.b); float c = cos(cc.b); \n"
758
        "gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
759
        "gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
760

    
761

    
762
        _param_genvbo_size = glGetUniformLocation(*program, "sizes");
763
        s_display_gaussian =  new ProgramGLSL(
764
        "uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
765
        "gl_FragColor = vec4(r, r, r, 1);}" );
766

    
767
        s_display_dog =  new ProgramGLSL(
768
        "uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
769
        "gl_FragColor = vec4(g, g, g, 0.0);}" );
770

    
771
        s_display_grad = new ProgramGLSL(
772
                "uniform sampler2DRect tex; void main(void){\n"
773
    "        vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
774

    
775
        s_display_keys= new ProgramGLSL(
776
                "uniform sampler2DRect tex; void main(void){\n"
777
        "        vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
778
        "        if(cc.r ==0.0) discard; gl_FragColor =  (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
779
}
780

    
781
void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold)
782
{
783

    
784
        char buffer[10240];
785
        float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);        
786
        float threshold1 = threshold;
787
        float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
788
        ostrstream out(buffer, 10240);
789
        streampos pos;
790

    
791
        //tex(X)(Y)
792
        //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
793
        //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
794

    
795
        out <<        "#define THRESHOLD0 " << threshold0 << "\n"
796
                        "#define THRESHOLD1 " << threshold1 << "\n"
797
                        "#define THRESHOLD2 " << threshold2 << "\n";
798

    
799
        out<<
800
        "uniform sampler2DRect tex, texU, texD; void main ()\n"
801
        "{\n"
802
        "        vec4 v1, v2, gg, temp;\n"
803
        "        vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
804
        "        vec4 cc  = texture2DRect(tex, gl_TexCoord[0].xy);\n"
805
        "        temp =  texture2DRect(tex, gl_TexCoord[1].xy);\n"
806
        "        v1.x =  temp.g;                        gg.x = temp.r;\n"
807
        "        temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
808
        "        v1.y = temp.g;                        gg.y = temp.r;\n"
809
        "        temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
810
        "        v1.z = temp.g;                        gg.z = temp.r;\n"
811
        "        temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
812
        "        v1.w = temp.g;                        gg.w = temp.r;\n"
813
        "        v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
814
        "        v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
815
        "        v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
816
        "        v2.w = texture2DRect(tex, TexRU.xy).g;\n"
817
        "        vec2 dxdy = (gg.yw - gg.xz); \n"
818
        "        float grad = 0.5*length(dxdy);\n"
819
        "        float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"
820
        "        gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
821

    
822
        //test against 8 neighbours
823
        //use variable to identify type of extremum
824
        //1.0 for local maximum and 0.5 for minimum
825
        <<
826
        "        float dog = 0.0; \n"
827
        "        gl_FragData[1] = vec4(0, 0, 0, 0); \n"
828
        "        dog = cc.g > THRESHOLD0 && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
829
        "        dog = cc.g < -THRESHOLD0 && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
830
        "        if(dog == 0.0) return;\n";
831

    
832
        pos = out.tellp();
833
        //do edge supression first.. 
834
        //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
835
        //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
836

    
837
        out<<
838
        "        float fxx, fyy, fxy; \n"
839
        "        vec4 D2 = v1.xyzw - cc.gggg;\n"
840
        "        vec2 D4 = v2.xw - v2.yz;\n"
841
        "        fxx = D2.x + D2.y;\n"
842
        "        fyy = D2.z + D2.w;\n"
843
        "        fxy = 0.25*(D4.x + D4.y);\n"
844
        "        float fxx_plus_fyy = fxx + fyy;\n"
845
        "        float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
846
        "        float score_down = (fxx*fyy - fxy*fxy);\n"
847
        "        if( score_down <= 0 || score_up > THRESHOLD2 * score_down)return;\n";
848

    
849
        //...
850
        out<<" \n"
851
        "        vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
852
        "        float fx = D5.x, fy = D5.y ; \n"
853
        "        float fs, fss , fxs, fys ; \n"
854
        "        vec2 v3; vec4 v4, v5, v6;\n"
855
        //read 9 pixels of upper level
856
        <<
857
        "        v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
858
        "        v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
859
        "        v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
860
        "        v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
861
        "        v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
862
        "        v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
863
        "        v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
864
        "        v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
865
        "        v6.w = texture2DRect(texU, TexRU.xy).g;\n"
866
        //compare with 9 pixels of upper level
867
        //read and compare with 9 pixels of lower level
868
        //the maximum case
869
        <<
870
        "        if(dog == 1.0)\n"
871
        "        {\n"
872
        "                if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
873
        "                v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
874
        "                v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
875
        "                v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
876
        "                v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
877
        "                v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
878
        "                v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
879
        "                v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
880
        "                v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
881
        "                v6.w = texture2DRect(texD, TexRU.xy).g;\n"
882
        "                if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
883
        "        }\n"
884
        //the minimum case
885
        <<
886
        "        else{\n"
887
        "        if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
888
        "                v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
889
        "                v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
890
        "                v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
891
        "                v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
892
        "                v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
893
        "                v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
894
        "                v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
895
        "                v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
896
        "                v6.w = texture2DRect(texD, TexRU.xy).g;\n"
897
        "                if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
898
        "        }\n";
899

    
900
        if(GlobalUtil::_SubpixelLocalization)
901

    
902
        // sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
903
        out <<
904
        "        fs = 0.5*( v3.x - v3.y );  \n"
905
        "        fss = v3.x + v3.y - cc.g - cc.g;\n"
906
        "        fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
907
        "        fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
908
        
909
        // 
910
        // let dog difference be quatratic function  of dx, dy, ds; 
911
        // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds + 
912
        //                                  + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
913
        //                                  + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
914
        // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
915
        
916
        //the local extremum satisfies
917
        // df/dx = 0, df/dy = 0, df/dz = 0
918
        
919
        //that is 
920
        // |-fx|     | fxx fxy fxs |   |dx|
921
        // |-fy|  =  | fxy fyy fys | * |dy|
922
        // |-fs|     | fxs fys fss |   |ds|
923
        // need to solve dx, dy, ds
924

    
925
        // Use Gauss elimination to solve the linear system
926
    <<
927
        "        vec3 dxys = vec3(0.0);                \n"
928
        "        vec4 A0, A1, A2 ;                        \n"
929
        "        A0 = vec4(fxx, fxy, fxs, -fx);        \n"
930
        "        A1 = vec4(fxy, fyy, fys, -fy);        \n"
931
        "        A2 = vec4(fxs, fys, fss, -fs);        \n"
932
        "        vec3 x3 = abs(vec3(fxx, fxy, fxs));                \n"
933
        "        float maxa = max(max(x3.x, x3.y), x3.z);        \n"
934
        "        if(maxa >= 1e-10 ) {                                                \n"
935
        "                if(x3.y ==maxa )                                                        \n"
936
        "                {                                                                                        \n"
937
        "                        vec4 TEMP = A1; A1 = A0; A0 = TEMP;        \n"
938
        "                }else if( x3.z == maxa )                                        \n"
939
        "                {                                                                                        \n"
940
        "                        vec4 TEMP = A2; A2 = A0; A0 = TEMP;        \n"
941
        "                }                                                                                        \n"
942
        "                A0 /= A0.x;                                                                        \n"
943
        "                A1 -= A1.x * A0;                                                        \n"
944
        "                A2 -= A2.x * A0;                                                        \n"
945
        "                vec2 x2 = abs(vec2(A1.y, A2.y));                \n"
946
        "                if( x2.y > x2.x )                                                        \n"
947
        "                {                                                                                        \n"
948
        "                        vec3 TEMP = A2.yzw;                                        \n"
949
        "                        A2.yzw = A1.yzw;                                                \n"
950
        "                        A1.yzw = TEMP;                                                        \n"
951
        "                        x2.x = x2.y;                                                        \n"
952
        "                }                                                                                        \n"
953
        "                if(x2.x >= 1e-10) {                                                \n"
954
        "                        A1.yzw /= A1.y;                                                                \n"
955
        "                        A2.yzw -= A2.y * A1.yzw;                                        \n"
956
        "                        if(abs(A2.z) >= 1e-10) {                \n"
957
        // compute dx, dy, ds: 
958
        <<
959
        "                                \n"
960
        "                                dxys.z = A2.w /A2.z;                                    \n"
961
        "                                dxys.y = A1.w - dxys.z*A1.z;                            \n"
962
        "                                dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;        \n"
963

    
964
        //one more threshold which I forgot in versions prior to 286
965
        <<
966
        "                                bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= THRESHOLD1) ;\n"
967
        "                                if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0;\n"
968
        "                        }\n"
969
        "                }\n"
970
        "        }\n"
971
    //keep the point when the offset is less than 1
972
        <<
973
        "        gl_FragData[1] = vec4( dog, dxys); \n";
974
        else
975

    
976
        out<<
977
        "        gl_FragData[1] =  vec4( dog, 0, 0, 0) ;        \n";
978

    
979
        out<<
980
        "}\n" <<'\0';
981

    
982

    
983

    
984
        ProgramGLSL * program = new ProgramGLSL(buffer); 
985
        if(program->IsNative())
986
        {
987
                s_keypoint = program ;
988
                //parameter
989
        }else
990
        {
991
                delete program;
992
                out.seekp(pos);
993
                out << 
994
        "        gl_FragData[1] =  vec4(dog, 0, 0, 0) ;        \n"
995
        "}\n" <<'\0';
996
                s_keypoint = program = new ProgramGLSL(buffer);
997
                GlobalUtil::_SubpixelLocalization = 0;
998
                std::cerr<<"Detection simplified on this hardware"<<endl;
999
        }
1000

    
1001
        _param_dog_texu = glGetUniformLocation(*program, "texU");
1002
        _param_dog_texd = glGetUniformLocation(*program, "texD");
1003
}
1004

    
1005

    
1006
void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
1007
{
1008
        glUniform1i(_param_dog_texu, 1);
1009
        glUniform1i(_param_dog_texd, 2);
1010
}
1011

    
1012
void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
1013
{
1014
        glUniform1i(_param_genlist_step_tex0, 1);        
1015
}
1016
void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth,  float size)
1017
{
1018
        float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
1019
        glUniform4fv(_param_genvbo_size, 1, sizes);
1020

    
1021
}
1022

    
1023

    
1024

    
1025
void ShaderBagGLSL::UnloadProgram()
1026
{
1027
        glUseProgram(0);
1028
} 
1029

    
1030

    
1031

    
1032
void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
1033
{
1034
        ProgramGLSL * program;
1035

    
1036
        s_genlist_init_tight = new ProgramGLSL(
1037
        "uniform sampler2DRect tex; void main (void){\n"
1038
        "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1039
        "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1040
        "gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
1041
        "}");
1042

    
1043
        
1044
        s_genlist_init_ex = program = new ProgramGLSL(
1045
        "uniform sampler2DRect tex;uniform vec2 bbox;\n"
1046
        "void main (void ){\n"
1047
        "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
1048
        "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
1049
        "bvec4 helper2 = bvec4( \n"
1050
        "all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0,\n"
1051
        "all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0,\n"
1052
        "all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0,\n"
1053
        "all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0);\n"
1054
        "gl_FragColor = vec4(helper2);\n"
1055
        "}");
1056
        _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1057

    
1058

    
1059
        //reduction ...
1060
        s_genlist_histo = new ProgramGLSL(
1061
        "uniform sampler2DRect tex; void main (void){\n"
1062
        "vec4 helper; vec4 helper2; \n"
1063
        "helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
1064
        "helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
1065
        "gl_FragColor.rg = helper2.xz + helper2.yw;\n"
1066
        "helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
1067
        "helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
1068
        "gl_FragColor.ba= helper2.xz+helper2.yw;\n"
1069
        "}");
1070

    
1071

    
1072
        //read of the first part, which generates tex coordinates 
1073
        s_genlist_start= program =  LoadGenListStepShader(1, 1);
1074
        _param_ftex_width= glGetUniformLocation(*program, "width");
1075
        _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
1076
        //stepping
1077
        s_genlist_step = program = LoadGenListStepShader(0, 1);
1078
        _param_genlist_step_tex= glGetUniformLocation(*program, "tex");
1079
        _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
1080

    
1081
}
1082

    
1083
void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
1084
{
1085
        float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
1086
        glUniform2fv(_param_margin_copy_truncate, 1, truncate);
1087
}
1088

    
1089
void ShaderBagGLSL::SetGenListInitParam(int w, int h)
1090
{
1091
        float bbox[2] = {w - 1.0f, h - 1.0f};
1092
        glUniform2fv(_param_genlist_init_bbox, 1, bbox);
1093
}
1094
void ShaderBagGLSL::SetGenListStartParam(float width, int tex0)
1095
{
1096
        glUniform1f(_param_ftex_width, width);
1097
}
1098

    
1099

    
1100
ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step)
1101
{
1102
        int i;
1103
        char buffer[10240];
1104
        // char chanels[5] = "rgba";
1105
        ostrstream out(buffer, 10240);
1106

    
1107
        for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
1108
        if(start)
1109
        {
1110
                out<<"uniform float width;\n";
1111
                out<<"void main(void){\n";
1112
                out<<"float  index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
1113
                out<<"vec2 pos = vec2(0.5, 0.5);\n";
1114
        }else
1115
        {
1116
                out<<"uniform sampler2DRect tex;\n";
1117
                out<<"void main(void){\n";
1118
                out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
1119
                out<<"vec2 pos = tc.rg; float index = tc.b;\n";
1120
        }
1121
        out<<"vec2 sum;         vec4 cc;\n";
1122

    
1123

    
1124
        if(step>0)
1125
        {
1126
                out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
1127
                for(i = 0; i < step; i++)
1128
                {
1129

    
1130
                        out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
1131
                        out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b;  \n";
1132
                        out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1133
                        out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1134
                        out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1135
                        out<<"else {opos = cpos.yy; index -= sum.y;}\n";
1136
                        out<<"pos = (pos + pos + opos);\n";
1137
                }
1138
        }
1139
        out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
1140
        out<<"}\n"<<'\0';
1141
        return new ProgramGLSL(buffer);
1142
}
1143

    
1144

    
1145
void ShaderBagGLSL::LoadOrientationShader()
1146
{
1147
        char buffer[10240];
1148
        ostrstream out(buffer,10240);
1149

    
1150
        if(GlobalUtil::_IsNvidia)
1151
        {
1152
        out <<        "#pragma optionNV(ifcvt none)\n"
1153
                        "#pragma optionNV(unroll all)\n";
1154
        }
1155

    
1156
        out<<"\n"
1157
        "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
1158
        "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1159
        "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1160
        "uniform sampler2DRect tex;                                        \n"
1161
        "uniform sampler2DRect gradTex;                                \n"
1162
        "uniform vec4 size;                                                \n"
1163
        << (GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign? 
1164
        "        uniform sampler2DRect texS;        \n" : " ")
1165
        <<
1166
        "void main()                \n"
1167
        "{                                                                                                        \n"
1168
        "        vec4 bins[10];                                                                \n"
1169
        "        bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);        \n"
1170
        "        bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);        \n"
1171
        "        bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);        \n"
1172
        "        vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy);        \n"
1173
        "        vec2 pos = loc.xy;                \n"
1174
        "        bool orientation_mode = (size.z != 0);                        \n"
1175
        "        float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1176
        if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1177
        {
1178
                out<<
1179
        "        if(orientation_mode){\n"
1180
        "                vec4 offset = texture2DRect(texS, pos);\n"
1181
        "                pos.xy = pos.xy + offset.yz; \n"
1182
        "                sigma = sigma * pow(size.w, offset.w);\n"
1183
        "                #if "<< GlobalUtil::_KeepExtremumSign << "\n"
1184
        "                        if(offset.x < 0.6) sigma = -sigma; \n"
1185
        "                #endif\n"
1186
        "        }\n";
1187
        }
1188
        out<<
1189
        "        //bool fixed_orientation = (size.z < 0);                \n"
1190
        "        if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"
1191
        "        float gsigma = sigma * GAUSSIAN_WF;                                \n"
1192
        "        vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF);        \n"
1193
        "        vec2 dim = size.xy;                                                        \n"
1194
        "        float dist_threshold = win.x*win.x+0.5;                        \n"
1195
        "        float factor = -0.5/(gsigma*gsigma);                        \n"
1196
        "        vec4 sz;        vec2 spos;                                                \n"
1197
        "        //if(any(pos.xy <= 1)) discard;                                        \n"
1198
        "        sz.xy = max( pos - win, vec2(1,1));                        \n"
1199
        "        sz.zw = min( pos + win, dim-2);                                \n"
1200
        "        sz = floor(sz)+0.5;";
1201
        //loop to get the histogram
1202

    
1203
        out<<"\n"
1204
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
1205
        "        {                                                                                                                                \n"
1206
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
1207
        "                {                                                                                                                        \n"
1208
        "                        vec2 offset = spos - pos;                                                                \n"
1209
        "                        float sq_dist = dot(offset,offset);                                                \n"
1210
        "                        if( sq_dist < dist_threshold){                                                        \n"
1211
        "                                vec4 cc = texture2DRect(gradTex, spos);                                \n"
1212
        "                                float grad = cc.b;        float theta = cc.a;                                \n"
1213
        "                                float idx = floor(degrees(theta)*0.1);                                \n"
1214
        "                                if(idx < 0 ) idx += 36;                                                                        \n"
1215
        "                                float weight = grad*exp(sq_dist * factor);                                \n"
1216
        "                                float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ;                                                        \n"
1217
        "                                vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
1218

    
1219
        if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)
1220
        {
1221
                //dynamic indexing may not be faster
1222
                out<<"\n"
1223
        "                                int iidx = int((idx*0.25));        \n"
1224
        "                                bins[iidx]+=inc;                                        \n"
1225
        "                        }                                                                                \n"
1226
        "                }                                                                                        \n"
1227
        "        }";
1228

    
1229
        }else
1230
        {
1231
                //nvfp40 still does not support dynamic array indexing
1232
                //unrolled binary search...
1233
                out<<"\n"
1234
        "                                if(idx < 16)                                                        \n"
1235
        "                                {                                                                                \n"
1236
        "                                        if(idx < 8)                                                        \n"
1237
        "                                        {                                                                        \n"
1238
        "                                                if(idx < 4)        {        bins[0]+=inc;}        \n"
1239
        "                                                else                {        bins[1]+=inc;}        \n"
1240
        "                                        }else                                                                \n"
1241
        "                                        {                                                                        \n"
1242
        "                                                if(idx < 12){        bins[2]+=inc;}        \n"
1243
        "                                                else                {        bins[3]+=inc;}        \n"
1244
        "                                        }                                                                        \n"
1245
        "                                }else if(idx < 32)                                                \n"
1246
        "                                {                                                                                \n"
1247
        "                                        if(idx < 24)                                                \n"
1248
        "                                        {                                                                        \n"
1249
        "                                                if(idx <20)        {        bins[4]+=inc;}        \n"
1250
        "                                                else                {        bins[5]+=inc;}        \n"
1251
        "                                        }else                                                                \n"
1252
        "                                        {                                                                        \n"
1253
        "                                                if(idx < 28){        bins[6]+=inc;}        \n"
1254
        "                                                else                {        bins[7]+=inc;}        \n"
1255
        "                                        }                                                                        \n"
1256
        "                                }else                                                 \n"
1257
        "                                {                                                                                \n"
1258
        "                                        bins[8]+=inc;                                                \n"
1259
        "                                }                                                                                \n"
1260
        "                        }                                                                                \n"
1261
        "                }                                                                                        \n"
1262
        "        }";
1263

    
1264
        }
1265

    
1266
        WriteOrientationCodeToStream(out);
1267

    
1268
        ProgramGLSL * program = new ProgramGLSL(buffer);
1269
        if(program->IsNative())
1270
        {
1271
                s_orientation = program ;
1272
                _param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
1273
                _param_orientation_size = glGetUniformLocation(*program, "size");
1274
                _param_orientation_stex = glGetUniformLocation(*program, "texS");
1275
        }else
1276
        {
1277
                delete program;
1278
        }
1279
}
1280

    
1281

    
1282
void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
1283
{
1284
        //smooth histogram and find the largest
1285
/*
1286
        smoothing kernel:         (1 3 6 7 6 3 1 )/27
1287
        the same as 3 pass of (1 1 1)/3 averaging
1288
        maybe better to use 4 pass on the vectors...
1289
*/
1290

    
1291

    
1292
        //the inner loop on different array numbers is always unrolled in fp40
1293

    
1294
        //bug fixed here:)
1295
        out<<"\n"
1296
        "        //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;  \n"
1297
        "        mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0;  \n"
1298
        "        mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
1299
        "        #define FILTER_CODE(i) {                                                \\\n"
1300
        "                        vec4 newb        =        (bins[i]* m2);                        \\\n"
1301
        "                        newb.xyz        +=        ( prev.yzw * m1);                \\\n"
1302
        "                        prev = bins[i];                                                        \\\n"
1303
        "                        newb.wzy        +=        ( bins[i+1].zyx *m1);        \\\n"
1304
        "                        bins[i] = newb;}\n"
1305
        "        for (int j=0; j<2; j++)                                                                \n"
1306
        "        {                                                                                                \n"
1307
        "                vec4 prev  = bins[8];                                                \n"
1308
        "                bins[9]                 = bins[0];                                                \n";
1309

    
1310
        if(GlobalUtil::_IsNvidia)
1311
        {
1312
                out<<
1313
        "                for (int i=0; i<9; i++)                                                        \n"
1314
        "                {                                                                                                \n"
1315
        "                        FILTER_CODE(i);                                                                \n"
1316
        "                }                                                                                                \n"
1317
        "        }";
1318

    
1319
        }else
1320
        {
1321
                //manually unroll the loop for ATI.
1322
                out << 
1323
        "           FILTER_CODE(0);\n"
1324
        "           FILTER_CODE(1);\n"
1325
        "           FILTER_CODE(2);\n"
1326
        "           FILTER_CODE(3);\n"
1327
        "           FILTER_CODE(4);\n"
1328
        "           FILTER_CODE(5);\n"
1329
        "           FILTER_CODE(6);\n"
1330
        "           FILTER_CODE(7);\n"
1331
        "           FILTER_CODE(8);\n"
1332
        "        }\n";
1333
        }
1334
        //find the maximum voting
1335
        out<<"\n"
1336
        "        vec4 maxh; vec2 maxh2;         \n"
1337
        "        vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
1338
        "                        bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
1339
        "        maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
1340

    
1341
        char *testpeak_code;
1342
        char *savepeak_code;
1343

    
1344
        //save two/three/four orientations with the largest votings?
1345

    
1346
        if(GlobalUtil::_MaxOrientation>1)
1347
        {
1348
                out<<"\n"
1349
                "        vec4 Orientations = vec4(0, 0, 0, 0);                                \n"
1350
                "        vec4 weights = vec4(0,0,0,0);                ";        
1351
                
1352
                testpeak_code = "\\\n"
1353
                "        {test = greaterThan(bins[i], hh);";
1354

    
1355
                //save the orientations in weight-decreasing order
1356
                if(GlobalUtil::_MaxOrientation ==2)
1357
                {
1358
                savepeak_code = "\\\n"
1359
                "                        if(weight <=weights.g){}\\\n"
1360
                "                        else if(weight >weights.r)\\\n"
1361
                "                        {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
1362
                "                        else {weights.g = weight; Orientations.g = th;}";
1363
                }else if(GlobalUtil::_MaxOrientation ==3)
1364
                {
1365
                savepeak_code = "\\\n"
1366
                "                        if(weight <=weights.b){}\\\n"
1367
                "                        else if(weight >weights.r)\\\n"
1368
                "                        {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
1369
                "                        else if(weight >weights.g)\\\n"
1370
                "                        {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
1371
                "                        else {weights.b = weight; Orientations.b = th;}";
1372
                }else
1373
                {
1374
                savepeak_code = "\\\n"
1375
                "                        if(weight <=weights.a){}\\\n"
1376
                "                        else if(weight >weights.r)\\\n"
1377
                "                        {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
1378
                "                        else if(weight >weights.g)\\\n"
1379
                "                        {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
1380
                "                        else if(weight >weights.b)\\\n"
1381
                "                        {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
1382
                "                        else {weights.a = weight; Orientations.a = th;}";
1383
                }
1384

    
1385
        }else
1386
        {
1387
                out<<"\n"
1388
                "        float Orientation;                                ";
1389
                testpeak_code ="\\\n"
1390
                "        if(npeaks<=0.0){\\\n"
1391
                "        test = equal(bins[i], maxh)        ;";
1392
                savepeak_code="\\\n"
1393
                "                        npeaks++;        \\\n"
1394
                "                        Orientation = th;";
1395

    
1396
        }
1397
        //find the peaks
1398
        out <<"\n"
1399
        "        #define FINDPEAK(i, k)"        <<testpeak_code<<"\\\n"
1400
        "        if( any ( test) )                                                        \\\n"
1401
        "        {                                                                                        \\\n"
1402
        "                if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )        \\\n"
1403
        "                {                                                                                        \\\n"
1404
        "                    float        di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
1405
        "                    float        th = (k+di+0.5);        float weight = bins[i].x;"
1406
                                <<savepeak_code<<"\\\n"
1407
        "                }\\\n"
1408
        "                else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) )        \\\n"
1409
        "                {                                                                                        \\\n"
1410
        "                    float        di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
1411
        "                    float        th = (k+di+1.5);        float weight = bins[i].y;                                "
1412
                                <<savepeak_code<<"        \\\n"
1413
        "                }\\\n"
1414
        "                if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) )        \\\n"
1415
        "                {                                                                                        \\\n"
1416
        "                    float        di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
1417
        "                    float        th = (k+di+2.5);        float weight = bins[i].z;                                "
1418
                                <<savepeak_code<<"        \\\n"
1419
        "                }\\\n"
1420
        "                else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )        \\\n"
1421
        "                {                                                                                        \\\n"
1422
        "                    float        di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
1423
        "                    float        th = (k+di+3.5);        float weight = bins[i].w;                                "
1424
                                <<savepeak_code<<"        \\\n"
1425
        "                }\\\n"
1426
        "        }}\\\n"
1427
        "        prevb = bins[i].w;";
1428
        //the following loop will be unrolled anyway in fp40,
1429
        //taking more than 1000 instrucsions..
1430
        //....
1431
        if(GlobalUtil::_IsNvidia)
1432
        {
1433
        out<<"\n"
1434
        "        vec4 hh = maxh * ORIENTATION_THRESHOLD;        bvec4 test;        \n"
1435
        "        bins[9] = bins[0];                                                                \n"
1436
        "        float npeaks = 0.0, k = 0;                                                \n"
1437
        "        float prevb        = bins[8].w;                                                \n"
1438
        "        for (int i = 0; i <9 ; i++)                                                \n"
1439
        "        {\n"
1440
        "                FINDPEAK(i, k);\n"
1441
        "                k = k + 4.0;        \n"
1442
        "        }";
1443
        }else
1444
        {
1445
                //loop unroll for ATI.
1446
        out <<"\n"
1447
        "        vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
1448
        "        bins[9] = bins[0];                                                                \n"
1449
        "        float npeaks = 0.0;                                                                \n"
1450
        "        float prevb        = bins[8].w;                                                \n"
1451
        "        FINDPEAK(0, 0.0);\n"
1452
        "        FINDPEAK(1, 4.0);\n"
1453
        "        FINDPEAK(2, 8.0);\n"
1454
        "        FINDPEAK(3, 12.0);\n"
1455
        "        FINDPEAK(4, 16.0);\n"
1456
        "        FINDPEAK(5, 20.0);\n"
1457
        "        FINDPEAK(6, 24.0);\n"
1458
        "        FINDPEAK(7, 28.0);\n"
1459
        "        FINDPEAK(8, 32.0);\n";
1460
        }
1461
        //WRITE output
1462
        if(GlobalUtil::_MaxOrientation>1)
1463
        {
1464
        out<<"\n"
1465
        "        if(orientation_mode){\n"
1466
        "                npeaks = dot(vec4(1,1,"
1467
                        <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1468
                        <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
1469
        "                gl_FragData[1] = radians((Orientations )*10.0);\n"
1470
        "                gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
1471
        "        }else{\n"
1472
        "                gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
1473
        "        }\n";
1474
        }else
1475
        {
1476
        out<<"\n"
1477
        "         gl_FragData[0] = vec4(pos, radians((Orientation.x)*10.0), sigma);\n";
1478
        }
1479
        //end
1480
        out<<"\n"
1481
        "}\n"<<'\0';
1482

    
1483

    
1484
}
1485

    
1486
void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1487
{
1488
        glUniform1i(_param_orientation_gtex, 1);
1489
        glUniform1f(_param_orientation_size, sigma);
1490
}
1491

    
1492

    
1493

    
1494

    
1495
void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1496
{
1497
        ///
1498
        glUniform1i(_param_orientation_gtex, 1);        
1499

    
1500
        if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
1501
        {
1502
                //specify texutre for subpixel subscale localization
1503
                glUniform1i(_param_orientation_stex, 2);
1504
        }
1505

    
1506
        float size[4];
1507
        size[0] = (float)width;
1508
        size[1] = (float)height;
1509
        size[2] = sigma;
1510
        size[3] = step;
1511
        glUniform4fv(_param_orientation_size, 1, size);
1512

    
1513
}
1514

    
1515

    
1516
void ShaderBagGLSL::LoadDescriptorShaderF2()
1517
{
1518
        //one shader outpout 128/8 = 16 , each fragout encodes 4
1519
        //const double twopi = 2.0*3.14159265358979323846;
1520
        //const double rpi  = 8.0/twopi;
1521
        char buffer[10240];
1522
        ostrstream out(buffer, 10240);
1523

    
1524
        out<<setprecision(8);
1525

    
1526
        out<<"\n"
1527
        "#define M_PI 3.14159265358979323846\n"
1528
        "#define TWO_PI (2.0*M_PI)\n"
1529
        "#define RPI 1.2732395447351626861510701069801\n"
1530
        "#define WF  size.z\n"
1531
        "uniform sampler2DRect tex;                                \n"
1532
        "uniform sampler2DRect gradTex;                        \n"
1533
        "uniform vec4 dsize;                                                \n"
1534
        "uniform vec3 size;                                                \n"
1535
        "void main()                \n"
1536
        "{\n"
1537
        "        vec2 dim        = size.xy;        //image size                        \n"
1538
        "        float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
1539
        "        float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5));                \n"
1540
        "        index = floor(index*0.125) + 0.49;  \n"
1541
        "        vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1542
        "        vec2 pos = texture2DRect(tex, coord).xy;                \n"
1543
        "        if(any(lessThanEqual(pos.xy,  vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard;        \n"
1544
        "        { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
1545
        "        float  anglef = texture2DRect(tex, coord).z;\n"
1546
        "        if(anglef > M_PI) anglef -= TWO_PI;\n"
1547
        "        float sigma = texture2DRect(tex, coord).w; \n"
1548
        "        float spt  = abs(sigma * WF);        //default to be 3*sigma        \n";
1549

    
1550
        //rotation
1551
        out<<
1552
        "        vec4 cscs, rots;                                                                \n"
1553
        "        cscs.y = sin(anglef);        cscs.x = cos(anglef);        \n"
1554
        "        cscs.zw = - cscs.xy;                                                        \n"
1555
        "        rots = cscs /spt;                                                                \n"
1556
        "        cscs *= spt; \n";
1557

    
1558
        //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1559
        //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
1560
        //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1561
        //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1562

    
1563
        out<<
1564
        "vec4 temp; vec2 pt, offsetpt;                                \n"
1565
        "        /*the fraction part of idx is .5*/                        \n"
1566
        "        offsetpt.x = 4.0* fract(idx*0.25) - 2.0;                                \n"
1567
        "        offsetpt.y = floor(idx*0.25) - 1.5;                        \n"
1568
        "        temp = cscs.xwyx*offsetpt.xyxy;                                \n"
1569
        "        pt = pos + temp.xz + temp.yw;                                \n";
1570
        
1571
        //get a horizontal bounding box of the rotated rectangle
1572
        out<<
1573
        "        vec2 bwin = abs(cscs.xy);                                        \n"
1574
        "        float bsz = bwin.x + bwin.y;                                        \n"
1575
        "        vec4 sz;                                        \n"
1576
        "        sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
1577
        "        sz.zw = min(pt + vec2(bsz), dim - 2);                \n"
1578
        "        sz = floor(sz)+0.5;"; //move sample point to pixel center
1579
        //get voting for two box
1580

    
1581
        out<<"\n"
1582
        "        vec4 DA, DB; vec2 spos;                        \n"
1583
        "        DA = DB  = vec4(0, 0, 0, 0);                \n"
1584
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
1585
        "        {                                                                                                                                \n"
1586
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
1587
        "                {                                                                                                                        \n"
1588
        "                        vec2 diff = spos - pt;                                                                \n"
1589
        "                        temp = rots.xywx * diff.xyxy;\n"
1590
        "                        vec2 nxy = (temp.xz + temp.yw); \n"
1591
        "                        vec2 nxyn = abs(nxy);                        \n"
1592
        "                        if(all( lessThan(nxyn, vec2(1.0)) ))\n"
1593
        "                        {\n"
1594
        "                                vec4 cc = texture2DRect(gradTex, spos);                                                \n"
1595
        "                                float mod = cc.b;        float angle = cc.a;                                        \n"
1596
        "                                float theta0 = RPI * (anglef - angle);                                \n"
1597
        "                                float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
1598
        "                                diff = nxy + offsetpt.xy;                                                                \n"
1599
        "                                float ww = exp(-0.125*dot(diff, diff));\n"
1600
        "                                vec2 weights = 1 - nxyn;\n"
1601
        "                                float weight = weights.x * weights.y *mod*ww; \n"
1602
        "                                float theta1 = floor(theta); \n"
1603
        "                                float weight2 = (theta - theta1) * weight;\n"
1604
        "                                float weight1 = weight - weight2;\n"
1605
        "                                DA += vec4(equal(vec4(theta1),  vec4(0, 1, 2, 3)))*weight1;\n"
1606
        "                                DA += vec4(equal(vec4(theta1),  vec4(7, 0, 1, 2)))*weight2; \n"
1607
        "                                DB += vec4(equal(vec4(theta1),  vec4(4, 5, 6, 7)))*weight1;\n"
1608
        "                                DB += vec4(equal(vec4(theta1),  vec4(3, 4, 5, 6)))*weight2; \n"
1609
        "                        }\n"
1610
        "                }\n"
1611
        "        }\n";
1612

    
1613
        out<<
1614
        "         gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
1615
        "}\n"<<'\0';
1616

    
1617
        ProgramGLSL * program =  new ProgramGLSL(buffer); 
1618

    
1619
        if(program->IsNative())
1620
        {
1621
                s_descriptor_fp = program ;
1622
                _param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
1623
                _param_descriptor_size = glGetUniformLocation(*program, "size");
1624
                _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
1625
        }else
1626
        {
1627
                delete program;
1628
        }
1629

    
1630

    
1631
}
1632

    
1633
void ShaderBagGLSL::LoadDescriptorShader()
1634
{
1635
        GlobalUtil::_DescriptorPPT = 16;
1636
        LoadDescriptorShaderF2();
1637
}
1638

    
1639

    
1640
void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
1641
{
1642
        ///
1643
        glUniform1i(_param_descriptor_gtex, 1);        
1644

    
1645
        float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1646
        glUniform4fv(_param_descriptor_dsize, 1, dsize);
1647
        float size[3];
1648
        size[0] = width;
1649
        size[1] = height;
1650
        size[2] = GlobalUtil::_DescriptorWindowFactor;
1651
        glUniform3fv(_param_descriptor_size, 1, size);
1652

    
1653
}
1654

    
1655
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1656

    
1657
void ShaderBagPKSL::LoadFixedShaders()
1658
{
1659
        ProgramGLSL * program;
1660

    
1661

    
1662
        s_gray = new ProgramGLSL( 
1663
        "uniform sampler2DRect tex; void main(){\n"
1664
        "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
1665
        "gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}"        );
1666

    
1667

    
1668
        s_sampling = new ProgramGLSL(
1669
        "uniform sampler2DRect tex; void main(){\n"
1670
        "gl_FragColor= vec4(        texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
1671
        "                                                texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}"        );
1672

    
1673

    
1674
        s_margin_copy = program = new ProgramGLSL(
1675
        "uniform sampler2DRect tex;  uniform vec4 truncate; void main(){\n"
1676
        "vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
1677
        "bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
1678
        "if(ob.y) { gl_FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
1679
        "else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1680
        "else {        vec4 weights = vec4(vec4(0, 1, 2, 3) == truncate.w);\n"
1681
        "float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
1682

    
1683
        _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
1684

    
1685

    
1686

    
1687
        s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");
1688

    
1689

    
1690

    
1691
        s_grad_pass = program = new ProgramGLSL(
1692
        "uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
1693
        "{\n"
1694
        "        vec4 v1, v2, gg;\n"
1695
        "        vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
1696
        "        vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
1697
        "        gl_FragData[0] = cc - cp; \n"
1698
        "        vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
1699
        "        vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
1700
        "        vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
1701
        "        vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
1702
        "        vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
1703
        "        gl_FragData[1] = grad;\n"
1704
        "        vec4 invalid = vec4(equal(grad, vec4(0.0)));        \n"
1705
        "        vec4 ov = atan(dy, dx + invalid);                \n"
1706
        "        gl_FragData[2] = ov; \n"
1707
        "}\n\0"); //when 
1708

    
1709
        _param_grad_pass_texp = glGetUniformLocation(*program, "texp");
1710

    
1711

    
1712
        GlobalUtil::_OrientationPack2 = 0;
1713
        LoadOrientationShader();
1714

    
1715
        if(s_orientation == NULL)
1716
        {
1717
                //Load a simplified version if the right version is not supported
1718
                s_orientation = program =  new ProgramGLSL(
1719
                "uniform sampler2DRect fTex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
1720
                "        vec4 cc = texture2DRect(fTex, gl_TexCoord[0].xy);\n"
1721
                "        vec2 co = cc.xy * 0.5; \n"
1722
                "        vec4 oo = texture2DRect(oTex, co);\n"
1723
                "        bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
1724
                "        float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1725
                "        gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");  
1726
                _param_orientation_gtex= glGetUniformLocation(*program, "oTex");
1727
                _param_orientation_size= glGetUniformLocation(*program, "size");
1728

    
1729
                GlobalUtil::_MaxOrientation = 0;
1730
                GlobalUtil::_FullSupported = 0;
1731
                std::cerr<<"Orientation simplified on this hardware"<<endl;
1732
        }
1733

    
1734
        if(GlobalUtil::_DescriptorPPT)
1735
        {
1736
                LoadDescriptorShader();
1737
                if(s_descriptor_fp == NULL) 
1738
                {
1739
                        GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0; 
1740
                        std::cerr<<"Descriptor ignored on this hardware"<<endl;
1741
                }
1742
        }
1743
}
1744

    
1745

    
1746
void ShaderBagPKSL::LoadDisplayShaders()
1747
{
1748
        ProgramGLSL * program;
1749

    
1750
        s_copy_key = new ProgramGLSL(
1751
        "uniform sampler2DRect tex;void main(){\n"
1752
        "gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
1753

    
1754
        //shader used to write a vertex buffer object
1755
        //which is used to draw the quads of each feature
1756
        s_vertex_list = program = new ProgramGLSL(
1757
        "uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
1758
        "float fwidth = sizes.y; \n"
1759
        "float twidth = sizes.z; \n"
1760
        "float rwidth = sizes.w; \n"
1761
        "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
1762
        "float px = mod(index, twidth);\n"
1763
        "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
1764
        "vec4 cc = texture2DRect(tex, tpos );\n"
1765
        "float size = 3.0f * cc.a; \n"
1766
        "gl_FragColor.zw = vec2(0.0, 1.0);\n"
1767
        "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
1768
        "{\n"
1769
        "        float type = fract(px);\n"
1770
        "        vec2 dxy; float s, c;\n"
1771
        "        dxy.x = type < 0.1 ? 0.0 : ((type <0.5 || type > 0.9)? size : -size);\n"
1772
        "        dxy.y = type < 0.2 ? 0.0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
1773
        "        s = sin(cc.b); c = cos(cc.b); \n"
1774
        "        gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1775
        "        gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1776
        "}\n\0");
1777
        /*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
1778

    
1779
        _param_genvbo_size = glGetUniformLocation(*program, "sizes");
1780

    
1781
        s_display_gaussian = new ProgramGLSL(
1782
        "uniform sampler2DRect tex; void main(){\n"
1783
    "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy);        bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1784
    "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
1785

    
1786
        s_display_dog =  new ProgramGLSL(
1787
        "uniform sampler2DRect tex; void main(){\n"
1788
        "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1789
        "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1790
        "gl_FragColor = vec4(g, g, g, 1.0);}" );
1791

    
1792

    
1793
        s_display_grad = new ProgramGLSL(
1794
        "uniform sampler2DRect tex; void main(){\n"
1795
        "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
1796
        "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
1797

    
1798
        s_display_keys= new ProgramGLSL(
1799
        "uniform sampler2DRect tex; void main(){\n"
1800
        "vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
1801
        "vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1802
        "bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
1803
        "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1804
        "if(v == 0) discard;        \n"
1805
        "else if(oc.r > 0) gl_FragColor = vec4(1.0, 0, 0,1.0); \n"
1806
        "else gl_FragColor = vec4(0.0,1.0,0.0,1.0);        }" );
1807
}
1808

    
1809
void ShaderBagPKSL::LoadOrientationShader(void)
1810
{
1811
        char buffer[10240];
1812
        ostrstream out(buffer,10240);
1813
        if(GlobalUtil::_IsNvidia)
1814
        {
1815
                out <<        "#pragma optionNV(ifcvt none)\n"
1816
                                "#pragma optionNV(unroll all)\n";
1817
        }
1818
        out<<"\n"
1819
        "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
1820
        "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1821
        "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1822
        "uniform sampler2DRect tex;        uniform sampler2DRect gtex;\n"
1823
        "uniform sampler2DRect otex; uniform vec4 size;\n"
1824
        "void main()                \n"
1825
        "{                                                                                                        \n"
1826
        "        vec4 bins[10];                                                                \n"
1827
        "        bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);        \n"
1828
        "        bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);        \n"
1829
        "        bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);        \n"
1830
        "        vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy);        \n"
1831
        "        vec2 pos = sift.xy; \n"
1832
        "        bool orientation_mode = (size.z != 0);                \n"
1833
        "        float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
1834
        "        //bool fixed_orientation = (size.z < 0);                \n"
1835
        "        if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"
1836
        "        float gsigma = sigma * GAUSSIAN_WF;                                \n"
1837
        "        vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF);        \n"
1838
        "        vec2 dim = size.xy;                                                        \n"
1839
        "        vec4 dist_threshold = vec4(win.x*win.x+0.5);                        \n"
1840
        "        float factor = -0.5/(gsigma*gsigma);                        \n"
1841
        "        vec4 sz;        vec2 spos;                                                \n"
1842
        "        //if(any(pos.xy <= 1)) discard;                                        \n"
1843
        "        sz.xy = max( pos - win, vec2(2,2));                        \n"
1844
        "        sz.zw = min( pos + win, dim-3);                                \n"
1845
        "        sz = floor(sz*0.5) + 0.5; ";
1846
                //loop to get the histogram
1847

    
1848
        out<<"\n"
1849
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
1850
        "        {                                                                                                                                \n"
1851
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
1852
        "                {                                                                                                                        \n"
1853
        "                        vec2 offset = 2* spos - pos - 0.5;                                        \n"
1854
        "                        vec4 off = vec4(offset, offset + 1);                                \n"
1855
        "                        vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;        \n"
1856
        "                        bvec4 inside = lessThan(distsq, dist_threshold);                        \n"
1857
        "                        if(any(inside))                                                                                \n"
1858
        "                        {                                                                                                                \n"
1859
        "                                vec4 gg = texture2DRect(gtex, spos);                                \n"
1860
        "                                vec4 oo = texture2DRect(otex, spos);                                \n"
1861
        "                                vec4 weight = gg * exp(distsq * factor);                        \n"
1862
        "                                vec4 idxv  = floor(degrees(oo)*0.1);                                 \n"
1863
        "                                idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0);                         \n"
1864
        "                                vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0);        \n";
1865
        //
1866
        if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)
1867
        {
1868
                // it might be slow on some GPUs
1869
                out<<"\n"
1870
        "                                for(int i = 0 ; i < 4; i++)\n"
1871
        "                                {\n"
1872
        "                                        if(inside[i])\n"
1873
        "                                        {\n"
1874
        "                                                float idx = idxv[i];                                                                \n"
1875
        "                                                vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3)));        \n"
1876
        "                                                int iidx = int(floor(idx*0.25));        \n"
1877
        "                                                bins[iidx]+=inc;                                        \n"
1878
        "                                        }                                                                                \n"
1879
        "                                }                                                                                        \n"
1880
        "                        }                                                                                                \n"
1881
        "                }                                                                                                        \n"
1882
        "        }";
1883

    
1884
        }else
1885
        {
1886
                //nvfp40 still does not support dynamic array indexing
1887
                //unrolled binary search
1888
                //it seems to be faster than the dyanmic indexing version on some GPUs
1889
                out<<"\n"
1890
        "                                for(int i = 0 ; i < 4; i++)\n"
1891
        "                                {\n"
1892
        "                                        if(inside[i])\n"
1893
        "                                        {\n"
1894
        "                                                float idx = idxv[i];                                                                                 \n"
1895
        "                                                vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3)));        \n"
1896
        "                                                if(idx < 16)                                                        \n"
1897
        "                                                {                                                                                \n"
1898
        "                                                        if(idx < 8)                                                        \n"
1899
        "                                                        {                                                                        \n"
1900
        "                                                                if(idx < 4)        {        bins[0]+=inc;}        \n"
1901
        "                                                                else                {        bins[1]+=inc;}        \n"
1902
        "                                                        }else                                                                \n"
1903
        "                                                        {                                                                        \n"
1904
        "                                                                if(idx < 12){        bins[2]+=inc;}        \n"
1905
        "                                                                else                {        bins[3]+=inc;}        \n"
1906
        "                                                        }                                                                        \n"
1907
        "                                                }else if(idx < 32)                                                \n"
1908
        "                                                {                                                                                \n"
1909
        "                                                        if(idx < 24)                                                \n"
1910
        "                                                        {                                                                        \n"
1911
        "                                                                if(idx <20)        {        bins[4]+=inc;}        \n"
1912
        "                                                                else                {        bins[5]+=inc;}        \n"
1913
        "                                                        }else                                                                \n"
1914
        "                                                        {                                                                        \n"
1915
        "                                                                if(idx < 28){        bins[6]+=inc;}        \n"
1916
        "                                                                else                {        bins[7]+=inc;}        \n"
1917
        "                                                        }                                                                        \n"
1918
        "                                                }else                                                 \n"
1919
        "                                                {                                                                                \n"
1920
        "                                                        bins[8]+=inc;                                                \n"
1921
        "                                                }                                                                                \n"
1922
        "                                        }                                                                                        \n"
1923
        "                                }                                                                                                \n"
1924
        "                        }                                                                                \n"
1925
        "                }                                                                                        \n"
1926
        "        }";
1927

    
1928
        }
1929

    
1930
        //reuse the code from the unpacked version..
1931
        ShaderBagGLSL::WriteOrientationCodeToStream(out);
1932

    
1933

    
1934

    
1935
        ProgramGLSL * program = new ProgramGLSL(buffer);
1936
        if(program->IsNative())
1937
        {
1938
                s_orientation = program ;
1939
                _param_orientation_gtex = glGetUniformLocation(*program, "gtex");
1940
                _param_orientation_otex = glGetUniformLocation(*program, "otex");
1941
                _param_orientation_size = glGetUniformLocation(*program, "size");
1942
        }else
1943
        {
1944
                delete program;
1945
        }
1946
}
1947

    
1948
void ShaderBagPKSL::SetGenListStartParam(float width, int tex0)
1949
{
1950
        glUniform1f(_param_ftex_width, width);
1951
}
1952

    
1953
void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev)
1954
{
1955
        ProgramGLSL * program;
1956

    
1957
        s_genlist_init_tight = new ProgramGLSL(
1958
        "uniform sampler2DRect tex; void main ()\n"
1959
        "{\n"
1960
        "        vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
1961
        "                                        texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
1962
        "                                        texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
1963
        "                                        texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
1964
        "                                        gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
1965
        "}");
1966

    
1967
        s_genlist_init_ex = program = new ProgramGLSL(
1968
        "uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
1969
        "{\n"
1970
        "        vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1971
        "        vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1972
        "        vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1973
        "        vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
1974
        "        vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
1975
        "        vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
1976
        "        vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
1977
        "        vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
1978
        "        helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
1979
        "        helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
1980
        "        helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
1981
        "        helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
1982
        "        gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0))));        \n"
1983
        "        gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0))));        \n"
1984
        "        gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0))));        \n"
1985
        "        gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0))));        \n"
1986
        "}");
1987
        _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
1988

    
1989
        s_genlist_end = program = new ProgramGLSL(
1990
                GlobalUtil::_KeepExtremumSign == 0 ? 
1991
        
1992
        "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
1993
        "{\n"
1994
        "        vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
1995
        "        vec2 pos = tc.rg; float index = tc.b;\n"
1996
        "        vec4 tk = texture2DRect( ktex, pos); \n"
1997
        "        vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
1998
        "        vec2 opos; \n"
1999
        "        opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
2000
        "        opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
2001
        "        gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
2002
        "}" : 
2003
        
2004
        "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
2005
        "{\n"
2006
        "        vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
2007
        "        vec2 pos = tc.rg; float index = tc.b;\n"
2008
        "        vec4 tk = texture2DRect( ktex, pos); \n"
2009
        "        vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
2010
        "        vec2 opos; \n"
2011
        "        opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
2012
        "        opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
2013
        "        gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
2014
        "}"        
2015
        );
2016
        _param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
2017

    
2018
        //reduction ...
2019
        s_genlist_histo = new ProgramGLSL(
2020
        "uniform sampler2DRect tex; void main ()\n"
2021
        "{\n"
2022
        "        vec4 helper; vec4 helper2; \n"
2023
        "        helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
2024
        "        helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
2025
        "        gl_FragColor.rg = helper2.xz + helper2.yw;\n"
2026
        "        helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
2027
        "        helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
2028
        "        gl_FragColor.ba= helper2.xz+helper2.yw;\n"
2029
        "}");
2030

    
2031

    
2032
        //read of the first part, which generates tex coordinates 
2033

    
2034
        s_genlist_start= program =  ShaderBagGLSL::LoadGenListStepShader(1, 1);
2035
        _param_ftex_width= glGetUniformLocation(*program, "width");
2036
        _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
2037
        //stepping
2038
        s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1);
2039
        _param_genlist_step_tex= glGetUniformLocation(*program, "tex");
2040
        _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
2041

    
2042
}
2043
void ShaderBagPKSL::UnloadProgram(void)
2044
{
2045
        glUseProgram(0);
2046
}
2047
void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold)
2048
{
2049
        //
2050
        char buffer[10240];
2051
        float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2052
        float threshold1 = dog_threshold;
2053
        float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2054
        ostrstream out(buffer, 10240);
2055
        out<<setprecision(8);
2056

    
2057
        if(GlobalUtil::_IsNvidia)
2058
        {
2059
                out << "#pragma optionNV(ifcvt none)\n"
2060
                                "#pragma optionNV(unroll all)\n"
2061
                                "#define REPEAT4(FUNCTION)\\\n"
2062
                                "for(int i = 0; i < 4; ++i)\\\n"
2063
                                "{\\\n"
2064
                                "        FUNCTION(i);\\\n"
2065
                                "}\n";
2066
        }else
2067
        {
2068
                //loop unroll
2069
                out <<  "#define REPEAT4(FUNCTION)\\\n"
2070
                                "FUNCTION(0);\\\n"
2071
                                "FUNCTION(1);\\\n"
2072
                                "FUNCTION(2);\\\n"
2073
                                "FUNCTION(3);\n";
2074
        }
2075
        //tex(X)(Y)
2076
        //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
2077
        //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
2078
        out <<        "#define THRESHOLD0 " << threshold0 << "\n"
2079
                        "#define THRESHOLD1 " << threshold1 << "\n"
2080
                        "#define THRESHOLD2 " << threshold2 << "\n";
2081

    
2082
        out<<
2083
        "uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
2084
        "uniform sampler2DRect texD; void main ()\n"
2085
        "{\n"
2086
        "        vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
2087
        "        vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
2088
        "        vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
2089
        "        vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
2090
        "        vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
2091
        "        vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
2092
        "        vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
2093
        "        vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
2094
        "        vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
2095
        "        vec4 cru = texture2DRect(tex, TexRU.xy);\n"
2096
        "        vec4  cc = ccc;\n"
2097
        "        vec4  v1[4], v2[4];\n"
2098
        "        v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2099
        "        v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2100
        "        v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2101
        "        v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2102
        "        v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2103
        "        v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2104
        "        v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2105
        "        v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2106

    
2107
        //test against 8 neighbours
2108
        //use variable to identify type of extremum
2109
        //1.0 for local maximum and -1.0 for minimum
2110
        <<
2111
        "        vec4 key = vec4(0.0); \n"
2112
        "        #define KEYTEST_STEP0(i) \\\n"
2113
        "        {\\\n"
2114
        "                bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
2115
        "                key[i] = cc[i] > THRESHOLD0 && all(test1)?1.0: 0.0;\\\n"
2116
        "                key[i] = cc[i] < -THRESHOLD0 && all(test2)? -1.0: key[i];\\\n"
2117
        "        }\n"
2118
        "        REPEAT4(KEYTEST_STEP0);\n"
2119
        "        if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
2120
        "        if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
2121
        "        gl_FragColor = vec4(0.0);\n"
2122
        "        if(any(notEqual(key, vec4(0.0)))) {\n";
2123

    
2124
        //do edge supression first.. 
2125
        //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2126
        //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2127

    
2128
        out<<
2129
        "        float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2130
        "        #define EDGE_SUPPRESION(i) \\\n"
2131
        "        if(key[i] != 0)\\\n"
2132
        "        {\\\n"
2133
        "                vec4 D2 = v1[i].xyzw - cc[i];\\\n"
2134
        "                vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
2135
        "                vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
2136
        "                fx[i] = D5.x;        fy[i] = D5.y ;\\\n"
2137
        "                fxx[i] = D2.x + D2.y;\\\n"
2138
        "                fyy[i] = D2.z + D2.w;\\\n"
2139
        "                fxy[i] = 0.25*(D4.x + D4.y);\\\n"
2140
        "                float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
2141
        "                float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
2142
        "                float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
2143
        "                if( score_down <= 0 || score_up > THRESHOLD2 * score_down)key[i] = 0;\\\n"
2144
        "        }\n"
2145
        "        REPEAT4(EDGE_SUPPRESION);\n"
2146
        "        if(any(notEqual(key, vec4(0.0)))) {\n";
2147
        
2148
        ////////////////////////////////////////////////
2149
        //read 9 pixels of upper/lower level
2150
        out<<
2151
        "        vec4  v4[4], v5[4], v6[4];\n"
2152
        "        ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
2153
        "        clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
2154
        "        crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
2155
        "        ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
2156
        "        ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
2157
        "        cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
2158
        "        clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
2159
        "        crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
2160
        "        cru = texture2DRect(texU, TexRU.xy);\n"
2161
        "        vec4 cu = ccc;\n"
2162
        "        v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2163
        "        v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2164
        "        v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2165
        "        v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2166
        "        v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2167
        "        v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2168
        "        v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2169
        "        v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2170
        <<
2171
        "        #define KEYTEST_STEP1(i)\\\n"
2172
        "        if(key[i] == 1.0)\\\n"
2173
        "        {\\\n"
2174
        "                bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
2175
        "                if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
2176
        "        }else if(key[i] == -1.0)\\\n"
2177
        "        {\\\n"
2178
        "                bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
2179
        "                if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
2180
        "        }\n"
2181
        "        REPEAT4(KEYTEST_STEP1);\n"
2182
        "        if(any(notEqual(key, vec4(0.0)))) { \n"
2183
        <<
2184
        "        ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
2185
        "        clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
2186
        "        crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
2187
        "        ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
2188
        "        ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
2189
        "        cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
2190
        "        clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
2191
        "        crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
2192
        "        cru = texture2DRect(texD, TexRU.xy);\n"
2193
        "        vec4 cd = ccc;\n"
2194
        "        v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2195
        "        v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2196
        "        v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2197
        "        v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2198
        "        v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
2199
        "        v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
2200
        "        v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
2201
        "        v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
2202
        <<
2203
        "        #define KEYTEST_STEP2(i)\\\n"
2204
        "        if(key[i] == 1.0)\\\n"
2205
        "        {\\\n"
2206
        "                bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
2207
        "                if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
2208
        "        }else if(key[i] == -1.0)\\\n"
2209
        "        {\\\n"
2210
        "                bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
2211
        "                if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
2212
        "        }\n"
2213
        "        REPEAT4(KEYTEST_STEP2);\n"
2214
        "        float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
2215
        "        //assume there is only one keypoint in the four. \n"
2216
        "        if(keysum==1.0) {\n";
2217

    
2218
        //////////////////////////////////////////////////////////////////////
2219
        if(GlobalUtil::_SubpixelLocalization)
2220

    
2221
        out <<
2222
        "        vec3 offset = vec3(0, 0, 0); \n"
2223
        "        #define TESTMOVE_KEYPOINT(idx) \\\n"
2224
        "        if(key[idx] != 0) \\\n"
2225
        "        {\\\n"
2226
        "                cu[0] = cu[idx];        cd[0] = cd[idx];        cc[0] = cc[idx];        \\\n"
2227
        "                v4[0] = v4[idx];        v5[0] = v5[idx];                                                \\\n"
2228
        "                fxy[0] = fxy[idx];        fxx[0] = fxx[idx];        fyy[0] = fyy[idx];        \\\n"
2229
        "                fx[0] = fx[idx];        fy[0] = fy[idx];                                                \\\n"
2230
        "        }\n"
2231
        "        TESTMOVE_KEYPOINT(1);\n"
2232
        "        TESTMOVE_KEYPOINT(2);\n"
2233
        "        TESTMOVE_KEYPOINT(3);\n"
2234
        <<
2235
                
2236
        "        float fs = 0.5*( cu[0] - cd[0] );                                \n"
2237
        "        float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2238
        "        float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2239
        "        float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2240
        "        vec4 A0, A1, A2 ;                        \n"
2241
        "        A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]);        \n"
2242
        "        A1 = vec4(fxy[0], fyy[0], fys, -fy[0]);        \n"
2243
        "        A2 = vec4(fxs, fys, fss, -fs);        \n"
2244
        "        vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs));                \n"
2245
        "        float maxa = max(max(x3.x, x3.y), x3.z);        \n"
2246
        "        if(maxa >= 1e-10 ) \n"
2247
        "        {                                                                                                \n"
2248
        "                if(x3.y ==maxa )                                                        \n"
2249
        "                {                                                                                        \n"
2250
        "                        vec4 TEMP = A1; A1 = A0; A0 = TEMP;        \n"
2251
        "                }else if( x3.z == maxa )                                        \n"
2252
        "                {                                                                                        \n"
2253
        "                        vec4 TEMP = A2; A2 = A0; A0 = TEMP;        \n"
2254
        "                }                                                                                        \n"
2255
        "                A0 /= A0.x;                                                                        \n"
2256
        "                A1 -= A1.x * A0;                                                        \n"
2257
        "                A2 -= A2.x * A0;                                                        \n"
2258
        "                vec2 x2 = abs(vec2(A1.y, A2.y));                \n"
2259
        "                if( x2.y > x2.x )                                                        \n"
2260
        "                {                                                                                        \n"
2261
        "                        vec3 TEMP = A2.yzw;                                        \n"
2262
        "                        A2.yzw = A1.yzw;                                                \n"
2263
        "                        A1.yzw = TEMP;                                                        \n"
2264
        "                        x2.x = x2.y;                                                        \n"
2265
        "                }                                                                                        \n"
2266
        "                if(x2.x >= 1e-10) {                                                                \n"
2267
        "                        A1.yzw /= A1.y;                                                                \n"
2268
        "                        A2.yzw -= A2.y * A1.yzw;                                        \n"
2269
        "                        if(abs(A2.z) >= 1e-10) {\n"
2270
        "                                offset.z = A2.w /A2.z;                                    \n"
2271
        "                                offset.y = A1.w - offset.z*A1.z;                            \n"
2272
        "                                offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;        \n"
2273
        "                                bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"
2274
        "                                if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
2275
        "                        }\n"
2276
        "                }\n"
2277
        "        }\n"
2278
        <<"\n"
2279
        "        float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2280
        "        gl_FragColor = vec4(keyv,  offset);\n"
2281
        "        }}}}\n"
2282
        "}\n"        <<'\0';
2283

    
2284
        else out << "\n"
2285
        "        float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
2286
        "        gl_FragColor =  vec4(keyv, 0, 0, 0);\n"
2287
        "        }}}}\n"
2288
        "}\n"        <<'\0';
2289

    
2290
        ProgramGLSL * program = new ProgramGLSL(buffer); 
2291
        s_keypoint = program ;
2292

    
2293
        //parameter
2294
        _param_dog_texu = glGetUniformLocation(*program, "texU");
2295
        _param_dog_texd = glGetUniformLocation(*program, "texD");
2296
}
2297
void ShaderBagPKSL::SetDogTexParam(int texU, int texD)
2298
{
2299
        glUniform1i(_param_dog_texu, 1);
2300
        glUniform1i(_param_dog_texd, 2);
2301
}
2302
void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0)
2303
{
2304
        glUniform1i(_param_genlist_step_tex0, 1);        
2305
}
2306

    
2307
void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size)
2308
{
2309
        float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2310
        glUniform4fv(_param_genvbo_size, 1, sizes);
2311
}
2312
void ShaderBagPKSL::SetGradPassParam(int texP)
2313
{
2314
        glUniform1i(_param_grad_pass_texp, 1);
2315
}
2316

    
2317
void ShaderBagPKSL::LoadDescriptorShader()
2318
{
2319
        GlobalUtil::_DescriptorPPT = 16;
2320
        LoadDescriptorShaderF2();
2321
    s_rect_description = LoadDescriptorProgramRECT();
2322
}
2323

    
2324
ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT()
2325
{
2326
        //one shader outpout 128/8 = 16 , each fragout encodes 4
2327
        //const double twopi = 2.0*3.14159265358979323846;
2328
        //const double rpi  = 8.0/twopi;
2329
        char buffer[10240];
2330
        ostrstream out(buffer, 10240);
2331

    
2332
        out<<setprecision(8);
2333

    
2334
        out<<"\n"
2335
        "#define M_PI 3.14159265358979323846\n"
2336
        "#define TWO_PI (2.0*M_PI)\n"
2337
        "#define RPI 1.2732395447351626861510701069801\n"
2338
        "#define WF size.z\n"
2339
        "uniform sampler2DRect tex;                        \n"
2340
        "uniform sampler2DRect gtex;                        \n"
2341
        "uniform sampler2DRect otex;                        \n"
2342
        "uniform vec4                dsize;                                \n"
2343
        "uniform vec3                size;                                \n"
2344
        "void main()                        \n"
2345
        "{\n"
2346
        "        vec2 dim        = size.xy;        //image size                        \n"
2347
        "        float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2348
        "        float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));                \n"
2349
        "        index = floor(index*0.125)+ 0.49;  \n"
2350
        "        vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2351
        "        vec2 pos = texture2DRect(tex, coord).xy;                \n"
2352
        "        vec2 wsz = texture2DRect(tex, coord).zw;\n"
2353
    "   float aspect_ratio = wsz.y / wsz.x;\n"
2354
    "   float aspect_sq = aspect_ratio * aspect_ratio; \n"
2355
        "        vec2 spt  = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
2356

    
2357
        //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2358
        //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
2359
        //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2360
        //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2361
        out<<
2362
        "        vec4 temp; vec2 pt;                                \n"
2363
    "        pt.x = pos.x + fract(idx*0.25) * wsz.x;                                \n"
2364
        "        pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y;                        \n";
2365
        
2366
        //get a horizontal bounding box of the rotated rectangle
2367
        out<<
2368
    "        vec4 sz;                                        \n"
2369
        "        sz.xy = max(pt - spt, vec2(2,2));\n"
2370
        "        sz.zw = min(pt + spt, dim - 3);                \n"
2371
        "        sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2372
        //get voting for two box
2373

    
2374
        out<<"\n"
2375
        "        vec4 DA, DB;   vec2 spos;                        \n"
2376
        "        DA = DB  = vec4(0, 0, 0, 0);                \n"
2377
        "        vec4 nox = vec4(0, 1.0, 0.0, 1.0);                                        \n"
2378
        "        vec4 noy = vec4(0, 0.0, 1.0, 1.0);                                        \n"
2379
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
2380
        "        {                                                                                                                                \n"
2381
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
2382
        "                {                                                                                                                        \n"
2383
        "                        vec2 tpt = spos * 2.0 - pt - 0.5;                                        \n"
2384
    "                        vec4 nx = (tpt.x + nox) * ispt.x;                                                                \n"
2385
    "                        vec4 ny = (tpt.y + noy) * ispt.y;                        \n"
2386
        "                        vec4 nxn = abs(nx), nyn = abs(ny);                                                \n"
2387
    "                        bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));        \n"
2388
        "                        if(any(inside))\n"
2389
        "                        {\n"
2390
        "                                vec4 gg = texture2DRect(gtex, spos);\n"
2391
        "                                vec4 oo = texture2DRect(otex, spos);\n"
2392
    //"               vec4 cc = cos(oo), ss = sin(oo); \n"
2393
    //"               oo = atan(ss* aspect_ratio, cc); \n"
2394
    //"               gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
2395
        "                                vec4 theta0 = (- oo)*RPI;\n"
2396
        "                                vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);                        \n"
2397
        "                                vec4 theta1 = floor(theta);                                                                \n"
2398
        "                                vec4 weight = (1 - nxn) * (1 - nyn) * gg; \n"
2399
        "                                vec4 weight2 = (theta - theta1) * weight;                                \n"
2400
        "                                vec4 weight1 = weight - weight2;                                                \n"
2401
        "                                for(int i = 0;i < 4; i++)\n"
2402
        "                                {\n"
2403
        "                                        if(inside[i])\n"
2404
        "                                        {\n"
2405
        "                                                DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"
2406
        "                                                DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"
2407
        "                                                DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"
2408
        "                                                DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"
2409
        "                                        }\n"
2410
        "                                }\n"
2411
        "                        }\n"
2412
        "                }\n"
2413
        "        }\n";
2414
        out<<
2415
        "         gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2416
        "}\n"<<'\0';
2417

    
2418
        ProgramGLSL * program =  new ProgramGLSL(buffer); 
2419
        if(program->IsNative()) 
2420
        {
2421
                return program;
2422
        }
2423
        else
2424
        {
2425
                delete program;
2426
                return NULL;
2427
        }
2428
}
2429

    
2430
ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL()
2431
{
2432
        //one shader outpout 128/8 = 16 , each fragout encodes 4
2433
        //const double twopi = 2.0*3.14159265358979323846;
2434
        //const double rpi  = 8.0/twopi;
2435
        char buffer[10240];
2436
        ostrstream out(buffer, 10240);
2437

    
2438
        out<<setprecision(8);
2439

    
2440
        out<<"\n"
2441
        "#define M_PI 3.14159265358979323846\n"
2442
        "#define TWO_PI (2.0*M_PI)\n"
2443
        "#define RPI 1.2732395447351626861510701069801\n"
2444
        "#define WF size.z\n"
2445
        "uniform sampler2DRect tex;                        \n"
2446
        "uniform sampler2DRect gtex;                        \n"
2447
        "uniform sampler2DRect otex;                        \n"
2448
        "uniform vec4                dsize;                                \n"
2449
        "uniform vec3                size;                                \n"
2450
        "void main()                        \n"
2451
        "{\n"
2452
        "        vec2 dim        = size.xy;        //image size                        \n"
2453
        "        float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
2454
        "        float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5));                \n"
2455
        "        index = floor(index*0.125)+ 0.49;  \n"
2456
        "        vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2457
        "        vec2 pos = texture2DRect(tex, coord).xy;                \n"
2458
        "        if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
2459
        "        //discard;        \n" 
2460
        "        { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
2461
        "        float anglef = texture2DRect(tex, coord).z;\n"
2462
        "        if(anglef > M_PI) anglef -= TWO_PI;\n"
2463
        "        float sigma = texture2DRect(tex, coord).w; \n"
2464
        "        float spt  = abs(sigma * WF);        //default to be 3*sigma        \n";
2465
        //rotation
2466
        out<<
2467
        "        vec4 cscs, rots;                                                \n"
2468
        "        cscs.x = cos(anglef); cscs.y = sin(anglef);        \n"
2469
        "        cscs.zw = - cscs.xy;                                                        \n"
2470
        "        rots = cscs /spt;                                                                \n"
2471
        "        cscs *= spt; \n";
2472

    
2473
        //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2474
        //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
2475
        //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2476
        //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2477
        out<<
2478
        "        vec4 temp; vec2 pt, offsetpt;                                \n"
2479
        "        /*the fraction part of idx is .5*/                        \n"
2480
        "        offsetpt.x = 4.0* fract(idx*0.25) - 2.0;                                \n"
2481
        "        offsetpt.y = floor(idx*0.25) - 1.5;                        \n"
2482
        "        temp = cscs.xwyx*offsetpt.xyxy;                                \n"
2483
        "        pt = pos + temp.xz + temp.yw;                                \n";
2484
        
2485
        //get a horizontal bounding box of the rotated rectangle
2486
        out<<
2487
        "        vec2 bwin = abs(cscs.xy);                                        \n"
2488
        "        float bsz = bwin.x + bwin.y;                                        \n"
2489
        "        vec4 sz;                                        \n"
2490
        "        sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
2491
        "        sz.zw = min(pt + vec2(bsz), dim - 3);                \n"
2492
        "        sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center
2493
        //get voting for two box
2494

    
2495
        out<<"\n"
2496
        "        vec4 DA, DB;   vec2 spos;                        \n"
2497
        "        DA = DB  = vec4(0, 0, 0, 0);                \n"
2498
        "        vec4 nox = vec4(0, rots.xy, rots.x + rots.y);                                        \n"
2499
        "        vec4 noy = vec4(0, rots.wx, rots.w + rots.x);                                        \n"
2500
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
2501
        "        {                                                                                                                                \n"
2502
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
2503
        "                {                                                                                                                        \n"
2504
        "                        vec2 tpt = spos * 2.0 - pt - 0.5;                                        \n"
2505
        "                        vec4 temp = rots.xywx * tpt.xyxy;                                                \n"
2506
        "                        vec2 temp2 = temp.xz + temp.yw;                                                \n"
2507
        "                        vec4 nx = temp2.x + nox;                                                                \n"
2508
        "                        vec4 ny = temp2.y + noy;                        \n"
2509
        "                        vec4 nxn = abs(nx), nyn = abs(ny);                                                \n"
2510
        "                        bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0));        \n"
2511
        "                        if(any(inside))\n"
2512
        "                        {\n"
2513
        "                                vec4 gg = texture2DRect(gtex, spos);\n"
2514
        "                                vec4 oo = texture2DRect(otex, spos);\n"
2515
        "                                vec4 theta0 = (anglef - oo)*RPI;\n"
2516
        "                                vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0);                        \n"
2517
        "                                vec4 theta1 = floor(theta);                                                                \n"
2518
        "                                vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;        \n"
2519
        "                                vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));        \n"
2520
        "                                vec4 weight = (1 - nxn) * (1 - nyn) * gg * ww; \n"
2521
        "                                vec4 weight2 = (theta - theta1) * weight;                                \n"
2522
        "                                vec4 weight1 = weight - weight2;                                                \n"
2523
        "                                for(int i = 0;i < 4; i++)\n"
2524
        "                                {\n"
2525
        "                                        if(inside[i])\n"
2526
        "                                        {\n"
2527
        "                                                DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"
2528
        "                                                DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"
2529
        "                                                DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"
2530
        "                                                DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"
2531
        "                                        }\n"
2532
        "                                }\n"
2533
        "                        }\n"
2534
        "                }\n"
2535
        "        }\n";
2536
        out<<
2537
        "         gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
2538
        "}\n"<<'\0';
2539

    
2540
        ProgramGLSL * program =  new ProgramGLSL(buffer); 
2541
        if(program->IsNative()) 
2542
        {
2543
                return program;
2544
        }
2545
        else
2546
        {
2547
                delete program;
2548
                return NULL;
2549
        }
2550
}
2551

    
2552
void ShaderBagPKSL::LoadDescriptorShaderF2()
2553
{
2554

    
2555
        ProgramGLSL * program = LoadDescriptorProgramPKSL();
2556
        if( program )
2557
        {
2558
                s_descriptor_fp = program;
2559
                _param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
2560
                _param_descriptor_otex = glGetUniformLocation(*program, "otex");
2561
                _param_descriptor_size = glGetUniformLocation(*program, "size");
2562
                _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
2563
        }
2564
}
2565

    
2566

    
2567

    
2568
void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2569
{
2570
        glUniform1i(_param_orientation_gtex, 1);
2571
        glUniform2f(_param_orientation_size, sigma, sigma_step);
2572
}
2573

    
2574

    
2575
void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2576
{
2577
        ///
2578
        glUniform1i(_param_orientation_gtex, 1);        
2579
        glUniform1i(_param_orientation_otex, 2);        
2580

    
2581
        float size[4];
2582
        size[0] = (float)width;
2583
        size[1] = (float)height;
2584
        size[2] = sigma;
2585
        size[3] = step;
2586
        glUniform4fv(_param_orientation_size, 1, size);
2587

    
2588
}
2589

    
2590
void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
2591
{
2592
    if(sigma == 0 && s_rect_description)
2593
    {
2594
        //rectangle description mode
2595
        s_rect_description->UseProgram();       
2596
        GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
2597
                GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
2598
                GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
2599
                GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
2600
            ///
2601
            glUniform1i(param_descriptor_gtex, 1);        
2602
            glUniform1i(param_descriptor_otex, 2);        
2603

    
2604
            float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2605
            glUniform4fv(param_descriptor_dsize, 1, dsize);
2606
            float size[3];
2607
            size[0] = width;
2608
            size[1] = height;
2609
            size[2] = GlobalUtil::_DescriptorWindowFactor;
2610
            glUniform3fv(param_descriptor_size, 1, size);
2611
    }else
2612
    {
2613
            ///
2614
            glUniform1i(_param_descriptor_gtex, 1);        
2615
            glUniform1i(_param_descriptor_otex, 2);        
2616

    
2617

    
2618
            float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2619
            glUniform4fv(_param_descriptor_dsize, 1, dsize);
2620
            float size[3];
2621
            size[0] = width;
2622
            size[1] = height;
2623
            size[2] = GlobalUtil::_DescriptorWindowFactor;
2624
            glUniform3fv(_param_descriptor_size, 1, size);
2625
    }
2626

    
2627
}
2628

    
2629

    
2630
void ShaderBagPKSL::SetGenListEndParam(int ktex)
2631
{
2632
        glUniform1i(_param_genlist_end_ktex, 1);
2633
}
2634
void ShaderBagPKSL::SetGenListInitParam(int w, int h)
2635
{
2636
        float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f,  (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2637
        glUniform4fv(_param_genlist_init_bbox, 1, bbox);
2638
}
2639

    
2640
void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax)
2641
{
2642
        float truncate[4];
2643
        truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1)  >> 1) - 0.5f;
2644
        truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1)  >> 1) - 0.5f;
2645
        truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2646
        truncate[3] = truncate[2] +  (((ymax % 2) == 1)? 0.0f : 2.0f);
2647
        glUniform4fv(_param_margin_copy_truncate, 1,  truncate);
2648
}