Project

General

Profile

Statistics
| Branch: | Revision:

root / rgbdslam / external / siftgpu / src / SiftGPU / ProgramCG.cpp @ 9240aaa3

History | View | Annotate | Download (92.7 KB)

1
//////////////////////////////////////////////////////////////////////////////
2
//        File:                ProgramCG.cpp
3
//        Author:                Changchang Wu
4
//        Description :        implementation of cg related class.
5
//                class ProgramCG                        A simple wrapper of Cg programs
6
//                class ShaderBagCG                cg shaders for SIFT
7
//                class FilterCGGL                cg gaussian filters for SIFT
8
//
9
//        Copyright (c) 2007 University of North Carolina at Chapel Hill
10
//        All Rights Reserved
11
//
12
//        Permission to use, copy, modify and distribute this software and its
13
//        documentation for educational, research and non-profit purposes, without
14
//        fee, and without a written agreement is hereby granted, provided that the
15
//        above copyright notice and the following paragraph appear in all copies.
16
//        
17
//        The University of North Carolina at Chapel Hill make no representations
18
//        about the suitability of this software for any purpose. It is provided
19
//        'as is' without express or implied warranty. 
20
//
21
//        Please send BUG REPORTS to ccwu@cs.unc.edu
22
//
23
////////////////////////////////////////////////////////////////////////////
24

    
25
#if defined(CG_SIFTGPU_ENABLED) 
26

    
27
#include "GL/glew.h"
28

    
29
#include <iostream>
30
#include <iomanip>
31
#include <vector>
32
#include <strstream>
33
#include <algorithm>
34
#include <stdlib.h>
35
#include <math.h>
36
#include <string.h>
37
using namespace std;
38

    
39
#include "GlobalUtil.h"
40
#include "ProgramCG.h"
41
#include "GLTexImage.h"
42
#include "ShaderMan.h"
43
#include "FrameBufferObject.h"
44

    
45

    
46

    
47
#if  defined(_WIN32) 
48
        #pragma comment (lib, "../../lib/cg.lib")
49
        #pragma comment (lib, "../../lib/cggl.lib")
50
#endif
51

    
52
CGcontext        ProgramCG::_Context        =0;
53
CGprofile        ProgramCG::_FProfile;
54

    
55
//////////////////////////////////////////////////////////////////////
56
// Construction/Destruction
57
//////////////////////////////////////////////////////////////////////
58

    
59
ProgramCG::ProgramCG()
60
{
61
        _programID = NULL;
62
}
63

    
64
ProgramCG::~ProgramCG()
65
{
66
        if(_programID) cgDestroyProgram(_programID);
67
}
68

    
69
ProgramCG::ProgramCG(const char *code, const char** cg_compile_args, CGprofile profile)
70
{
71
        _valid = 0;
72
        _profile = profile;
73
        GLint epos;
74
        const char* ati_args[] = {"-po", "ATI_draw_buffers",0}; 
75
        const char* fp40_args[] = {"-ifcvt", "none","-unroll", "all", GlobalUtil::_UseFastMath? "-fastmath" : 0, 0};
76
        if(cg_compile_args == NULL) cg_compile_args = GlobalUtil::_IsNvidia? (GlobalUtil::_SupportFP40? fp40_args:NULL) : ati_args;
77
        _programID = ::cgCreateProgram(_Context, CG_SOURCE, code, profile, NULL, cg_compile_args);
78
        if(_programID)
79
        {
80
                cgGLLoadProgram(_programID );
81
                //_texParamID = cgGetNamedParameter(_programID, "tex");
82

    
83
                glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
84
                if(epos >=0)
85
                {
86
                        std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
87
                        std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
88
                }else
89
                {
90
                        _valid = 1;
91
                }
92
        }else
93
        {
94
                std::cerr<<code<<endl;
95
                glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos);
96
                if(epos >=0)
97
                {
98
                        std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl;
99
                        std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
100
                }else
101
                {
102
                        std::cout<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl;
103
                }
104
        }
105

    
106
}
107

    
108
void ProgramCG::ErrorCallback()
109
{
110
        CGerror err = cgGetError();
111
        if(err)
112
        {
113
                std::cerr<< cgGetErrorString(err)<<endl;
114
        }
115
}
116

    
117

    
118
void ProgramCG::InitContext()
119
{
120
        if(_Context == 0)
121
        {
122
                _Context        = cgCreateContext();
123
 
124
                /////////////
125
                _FProfile = cgGLGetLatestProfile(CG_GL_FRAGMENT);
126
                cgGLSetOptimalOptions(_FProfile);
127

    
128
                if(GlobalUtil::_verbose) std::cout<<"Shader Profile: "<<cgGetProfileString(_FProfile)<<endl;
129

    
130
                cgSetErrorCallback(ErrorCallback);
131
        }
132
}
133

    
134
void ProgramCG::DestroyContext()
135
{
136
        cgDestroyContext(_Context);
137
}
138

    
139
ShaderBagCG::ShaderBagCG()
140
{
141
        ProgramCG::InitContext();
142
}
143

    
144

    
145
int ProgramCG::UseProgram()
146
{
147
        if(_programID)
148
        {
149
                cgGLEnableProfile(_profile);
150
                cgGLBindProgram(_programID);
151

    
152
                return 1;
153
        }else
154
        {
155
                return 0;
156
        }
157
}
158

    
159
void ShaderBagCG::UnloadProgram()
160
{
161

    
162
        cgGLUnbindProgram(ProgramCG::_FProfile);
163
        cgGLDisableProfile(ProgramCG::_FProfile);
164
}
165

    
166

    
167
void ShaderBagCG::LoadFixedShaders()
168
{
169
//        s_debug = new ProgramCG( "void main(float4 TexCoord0:TEXCOORD0, out float4 FragColor:COLOR0,"
170
//                "uniform samplerRECT tex){ gl_FragColor.rg =  gl_TexCoord[0].st;}");
171

    
172
        s_gray = new ProgramCG( 
173
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
174
        "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
175
        "FragColor= float4(intensity, intensity, intensity, 1.0);}"        );
176

    
177

    
178
        s_sampling = new ProgramCG(
179
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
180
        "float4 cc = texRECT(tex, TexCoord0.xy);        FragColor = float4(cc.rg, 0.0, 0.0);        }"        );
181

    
182

    
183
        s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
184

    
185

    
186
        ProgramCG * program;
187
        s_margin_copy = program = new ProgramCG(
188
        "void main(float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
189
        "uniform samplerRECT tex, uniform float2 truncate){\n"
190
        "FragColor = texRECT(tex, min(texCoord0.xy, truncate)); }");
191

    
192
        _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
193

    
194

    
195
        s_grad_pass = new ProgramCG(
196
        "void main (\n"
197
        "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
198
        "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
199
        "out float4 FragData0 : COLOR0, uniform samplerRECT tex)\n"
200
        "{\n"
201
        "        float4 v1, v2, gg;\n"
202
        "        float4 cc  = texRECT(tex, TexCC.xy);\n"
203
        "        gg.x = texRECT(tex, TexLC.xy).r;\n"
204
        "        gg.y = texRECT(tex, TexRC.xy).r;\n"
205
        "        gg.z = texRECT(tex, TexCD.xy).r;\n"
206
        "        gg.w = texRECT(tex, TexCU.xy).r;\n"
207
        "        float2 dxdy = (gg.yw - gg.xz); \n"
208
        "        float grad = 0.5*length(dxdy);\n"
209
        "        float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
210
        "        FragData0 = float4(cc.rg, grad, theta);\n"
211
        "}\n\0");
212

    
213

    
214
        if(GlobalUtil::_SupportFP40)
215
        {
216
                //use the packing mode for cpu list reshape and two orientations
217
                if(GlobalUtil::_MaxOrientation != 2) GlobalUtil::_OrientationPack2 = 0;
218

    
219
                LoadOrientationShader();
220

    
221

    
222
                if(GlobalUtil::_DescriptorPPT)                LoadDescriptorShader();
223

    
224
        }else
225
        {
226
                s_orientation = program =  new ProgramCG(
227
                "void main(out float4 FragColor : COLOR0, \n"
228
        "        uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
229
        "        uniform float size, \n"
230
        "        in float2 tpos : TEXCOORD0){\n"
231
        "        float4 cc = texRECT(fTex, tpos);\n"
232
        "        float4 oo = texRECT(oTex, cc.rg);\n"
233
        "        FragColor = float4(cc.rg, oo.a, size);}");  
234
                _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
235
                _param_orientation_size= cgGetNamedParameter(*program, "size");
236

    
237

    
238
                ///
239
                GlobalUtil::_FullSupported = 0;
240
                GlobalUtil::_MaxOrientation = 0;  //0 for simplified version
241
                GlobalUtil::_DescriptorPPT = 0;
242
                std::cerr<<"Orientation simplified on this hardware"<<endl;
243
                std::cerr<<"Descriptor ignored on this hardware"<<endl;
244
        }
245

    
246

    
247
}
248

    
249
void ShaderBagCG::LoadDisplayShaders()
250
{
251
        s_copy_key = new ProgramCG(
252
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
253
        "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1);        }");
254

    
255
        //shader used to write a vertex buffer object
256
        //which is used to draw the quads of each feature
257
        ProgramCG * program;
258
        s_vertex_list = program = new ProgramCG(
259
        "void main(in float4 TexCoord0: TEXCOORD0,\n"
260
        "uniform float4 sizes, \n"
261
        "uniform samplerRECT tex, \n"
262
        "out float4 FragColor: COLOR0){\n"
263
        "float fwidth = sizes.y; \n"
264
        "float twidth = sizes.z; \n"
265
        "float rwidth = sizes.w; \n"
266
        "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
267
        "float px = fmod(index, twidth);\n"
268
        "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
269
        "float4 cc = texRECT(tex, tpos );\n"
270
        "float size = cc.a * 3.0f;//sizes.x;// \n"
271
        "FragColor.zw = float2(0.0, 1.0);\n"
272
        "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
273
        "{\n"
274
        "        float type = frac(px);\n"
275
        "        float2 dxy; float s, c;\n"
276
        "        dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
277
        "        dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
278
        "        sincos(cc.b, s, c);\n"
279
        "        FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
280
        "        FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
281
        "}\n\0");
282
        /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
283

    
284
        _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
285

    
286

    
287
        s_display_gaussian =  new ProgramCG(
288
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
289
        "float r = texRECT(tex, TexCoord0.xy).r;\n"
290
        "FragColor = float4(r, r, r, 1.0);}");
291

    
292

    
293
        s_display_dog =  new ProgramCG(
294
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
295
        "float g = (0.5+20.0*texRECT(tex, TexCoord0.xy).g);\n"
296
        "FragColor = float4(g, g, g, 1.0);}" );
297

    
298

    
299
        s_display_grad = new ProgramCG(
300
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
301
        "float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(5.0 * cc.bbb, 1.0); }");
302

    
303

    
304
        s_display_keys= new ProgramCG(
305
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
306
        "float4 cc = texRECT(tex, TexCoord0.xy);\n"
307
        "if(cc.r ==1.0) FragColor = float4(1.0, 0, 0,1.0); \n"
308
        "else {if (cc.r ==0.5) FragColor = float4(0.0,1.0,0.0,1.0);        else discard;}}");        
309

    
310
}
311

    
312
void ShaderBagCG::SetMarginCopyParam(int xmax, int ymax)
313
{
314
        float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
315
        cgGLSetParameter2fv(_param_margin_copy_truncate, truncate);
316
}
317

    
318

    
319
int ShaderBagCG::LoadKeypointShaderMR(float threshold, float edge_threshold)
320
{
321
        char buffer[10240];
322
        float threshold0 = threshold * 0.8f;
323
        float threshold1 = threshold;
324
        float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
325
        int   max_refine = max(2, GlobalUtil::_SubpixelLocalization);
326
        ostrstream out(buffer, 10240);
327

    
328
        out <<        "#define THRESHOLD0 " << threshold0 << "\n"
329
                        "#define THRESHOLD1 " << threshold1 << "\n"
330
                        "#define THRESHOLD2 " << threshold2 << "\n"
331
                        "#define MAX_REFINE " << max_refine << "\n";
332
        out<<
333
        "void main (\n"
334
        "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
335
        "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
336
        "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
337
        "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
338
        "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
339
        "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
340
        "{\n"
341
        "        float4 v1, v2, gg;\n"
342
        "        float2 TexRU = float2(TexRC.x, TexCU.y); \n"
343
        "        float4 cc  = texRECT(tex, TexCC.xy);\n"
344
        "        v1.x = texRECT(tex, TexLC.xy).g;\n"
345
        "        gg.x = texRECT(tex, TexLC.xy).r;\n"
346
        "        v1.y = texRECT(tex, TexRC.xy).g;\n"
347
        "        gg.y = texRECT(tex, TexRC.xy).r;\n"
348
        "        v1.z = texRECT(tex, TexCD.xy).g;\n"
349
        "        gg.z = texRECT(tex, TexCD.xy).r;\n"
350
        "        v1.w = texRECT(tex, TexCU.xy).g;\n"
351
        "        gg.w = texRECT(tex, TexCU.xy).r;\n"
352
        "        v2.x = texRECT(tex, TexLD.xy).g;\n"
353
        "        v2.y = texRECT(tex, TexLU.xy).g;\n"
354
        "        v2.z = texRECT(tex, TexRD.xy).g;\n"
355
        "        v2.w = texRECT(tex, TexRU.xy).g;\n"
356
        "        float2 dxdy = 0.5*(gg.yw - gg.xz); \n"
357
        "        float grad = length(dxdy);\n"
358
        "        float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
359
        "        FragData0 = float4(cc.rg, grad, theta);\n"
360
        <<
361
        "        float dog = 0.0; \n"
362
        "        FragData1 = float4(0, 0, 0, 0); \n"
363
        "        float2 v3; float4 v4, v5, v6;\n"
364
        <<
365
        "        if( cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2)))\n"
366
        "        {\n"
367
        "                v3.x = texRECT(texU, TexCC.xy).g;\n"
368
        "                v4.x = texRECT(texU, TexLC.xy).g;\n"
369
        "                v4.y = texRECT(texU, TexRC.xy).g;\n"
370
        "                v4.z = texRECT(texU, TexCD.xy).g;\n"
371
        "                v4.w = texRECT(texU, TexCU.xy).g;\n"
372
        "                v6.x = texRECT(texU, TexLD.xy).g;\n"
373
        "                v6.y = texRECT(texU, TexLU.xy).g;\n"
374
        "                v6.z = texRECT(texU, TexRD.xy).g;\n"
375
        "                v6.w = texRECT(texU, TexRU.xy).g;\n"
376
        "                if(cc.g < v3.x || any(cc.gggg<v4.xyzw || cc.gggg<v6.xyzw))return; \n"
377
        "                v3.y = texRECT(texD, TexCC.xy).g;\n"
378
        "                v5.x = texRECT(texD, TexLC.xy).g;\n"
379
        "                v5.y = texRECT(texD, TexRC.xy).g;\n"
380
        "                v5.z = texRECT(texD, TexCD.xy).g;\n"
381
        "                v5.w = texRECT(texD, TexCU.xy).g;\n"
382
        "                v6.x = texRECT(texD, TexLD.xy).g;\n"
383
        "                v6.y = texRECT(texD, TexLU.xy).g;\n"
384
        "                v6.z = texRECT(texD, TexRD.xy).g;\n"
385
        "                v6.w = texRECT(texD, TexRU.xy).g;\n"
386
        "                if(cc.g < v3.y || any(cc.gggg<v5.xyzw || cc.gggg<v6.xyzw))return; \n"
387
        "                dog = 1.0; \n"
388
        "        }\n"
389
        //the minimum case
390
        <<
391
        "  else if(cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2)))\n"
392
        "  {\n"
393
        "                v3.x = texRECT(texU, TexCC.xy).g;\n"
394
        "                v4.x = texRECT(texU, TexLC.xy).g;\n"
395
        "                v4.y = texRECT(texU, TexRC.xy).g;\n"
396
        "                v4.z = texRECT(texU, TexCD.xy).g;\n"
397
        "                v4.w = texRECT(texU, TexCU.xy).g;\n"
398
        "                v6.x = texRECT(texU, TexLD.xy).g;\n"
399
        "                v6.y = texRECT(texU, TexLU.xy).g;\n"
400
        "                v6.z = texRECT(texU, TexRD.xy).g;\n"
401
        "                v6.w = texRECT(texU, TexRU.xy).g;\n"
402
        "                if(cc.g > v3.x || any(cc.gggg>v4.xyzw || cc.gggg>v6.xyzw))return; \n"
403
        "                v3.y = texRECT(texD, TexCC.xy).g;\n"
404
        "                v5.x = texRECT(texD, TexLC.xy).g;\n"
405
        "                v5.y = texRECT(texD, TexRC.xy).g;\n"
406
        "                v5.z = texRECT(texD, TexCD.xy).g;\n"
407
        "                v5.w = texRECT(texD, TexCU.xy).g;\n"
408
        "                v6.x = texRECT(texD, TexLD.xy).g;\n"
409
        "                v6.y = texRECT(texD, TexLU.xy).g;\n"
410
        "                v6.z = texRECT(texD, TexRD.xy).g;\n"
411
        "                v6.w = texRECT(texD, TexRU.xy).g;\n"
412
        "                if(cc.g > v3.y || any(cc.gggg>v5.xyzw || cc.gggg>v6.xyzw))return; \n"
413
        "                dog = 0.5 ; \n"
414
        "        }\n"
415
        "        else\n"
416
        "                return;\n"
417
        <<
418
        "  int i = 0; \n"
419
        "  float2 offset = float2(0, 0);\n"
420
        "  float2 offsets = float2(0, 0);\n"
421
        "  float3 dxys;                bool key_moved;        \n"
422
        "  float fx, fy, fs; \n"
423
        "  float fxx, fyy, fxy; \n"
424
        "  float fxs, fys, fss; \n"
425
        "  do\n"
426
        "  {\n"
427
        "        dxys = float3(0, 0, 0);\n"
428
        "        offset = float2(0, 0);\n"
429
        "        float4 D2 = v1.xyzw - cc.gggg;\n"
430
        "        fxx = D2.x + D2.y;\n"
431
        "        fyy = D2.z + D2.w;\n"
432
        "        float2 D4 = v2.xw - v2.yz;\n"
433
        "        fxy = 0.25*(D4.x + D4.y);\n"
434
        "        float2 D5 = 0.5*(v1.yw-v1.xz); \n"
435
        "        fx = D5.x;\n"
436
        "        fy = D5.y ; \n"
437
        "        fs = 0.5*( v3.x - v3.y ); \n"
438
        "        fss = v3.x + v3.y - cc.g - cc.g;\n"
439
        "        fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
440
        "        fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
441
        "        float4 A0, A1, A2 ;                        \n"
442
        "        A0 = float4(fxx, fxy, fxs, -fx);        \n"
443
        "        A1 = float4(fxy, fyy, fys, -fy);        \n"
444
        "        A2 = float4(fxs, fys, fss, -fs);        \n"
445
        "        float3 x3 = abs(float3(fxx, fxy, fxs));                \n"
446
        "        float maxa = max(max(x3.x, x3.y), x3.z);        \n"
447
        "        if(maxa > 1e-10 )                                                        \n"
448
        "        {\n"
449
        "                if(x3.y ==maxa )                                                        \n"
450
        "                {                                                                                        \n"
451
        "                        float4 TEMP = A1; A1 = A0; A0 = TEMP;        \n"
452
        "                }else if( x3.z == maxa )                                        \n"
453
        "                {                                                                                        \n"
454
        "                        float4 TEMP = A2; A2 = A0; A0 = TEMP;        \n"
455
        "                }                                                                                        \n"
456
        "                A0 /= A0.x;                                                                        \n"
457
        "                A1 -= A1.x * A0;                                                        \n"
458
        "                A2 -= A2.x * A0;                                                        \n"
459
        "                float2 x2 = abs(float2(A1.y, A2.y));                \n"
460
        "                if( x2.y > x2.x )                                                        \n"
461
        "                {                                                                                        \n"
462
        "                        float3 TEMP = A2.yzw;                                        \n"
463
        "                        A2.yzw = A1.yzw;                                                \n"
464
        "                        A1.yzw = TEMP;                                                        \n"
465
        "                        x2.x = x2.y;                                                        \n"
466
        "                }                                                                                        \n"
467
        "                if(x2.x > 1e-10)                                                        \n"
468
        "                {\n"
469
        "                        A1.yzw /= A1.y;                                                        \n"
470
        "                        A2.yzw -= A2.y * A1.yzw;                                \n"
471
        "                        if(abs(A2.z) > 1e-10)                                        \n"
472
        "                        {\n"
473
        // compute dx, dy, ds: 
474
        <<
475
        "                                dxys.z = A2.w /A2.z;                                                \n"
476
        "                                dxys.y = A1.w - dxys.z*A1.z;                            \n"
477
        "                                dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;        \n"
478
        "                        }\n"
479
        "                }\n"
480
        "        }\n"
481
        "        offset.x = dxys.x > 0.6 ? 1 : 0 + dxys.x < -0.6 ? -1 : 0;\n"
482
        "        offset.y = dxys.y > 0.6 ? 1 : 0 + dxys.y < - 0.6? -1 : 0;\n"
483
        "        i++; key_moved = i < MAX_REFINE && any(abs(offset)>0) ;        \n"
484
        "        if(key_moved)\n"
485
        "        {\n"
486
        "                offsets += offset; \n"
487
        "            cc  =  texRECT(tex, TexCC.xy  + offsets);\n"
488
        "                v1.x = texRECT(tex , TexLC.xy + offsets).g;\n"
489
        "                v1.y = texRECT(tex , TexRC.xy + offsets).g;\n"
490
        "                v1.z = texRECT(tex , TexCD.xy + offsets).g;\n"
491
        "                v1.w = texRECT(tex , TexCU.xy + offsets).g;\n"
492
        "                v2.x = texRECT(tex , TexLD.xy + offsets).g;\n"
493
        "                v2.y = texRECT(tex , TexLU.xy + offsets).g;\n"
494
        "                v2.z = texRECT(tex , TexRD.xy + offsets).g;\n"
495
        "                v2.w = texRECT(tex , TexRU.xy + offsets).g;\n"
496
        "                v3.x = texRECT(texU, TexCC.xy + offsets).g;\n"
497
        "                v4.x = texRECT(texU, TexLC.xy + offsets).g;\n"
498
        "                v4.y = texRECT(texU, TexRC.xy + offsets).g;\n"
499
        "                v4.z = texRECT(texU, TexCD.xy + offsets).g;\n"
500
        "                v4.w = texRECT(texU, TexCU.xy + offsets).g;\n"
501
        "                v3.y = texRECT(texD, TexCC.xy + offsets).g;\n"
502
        "                v5.x = texRECT(texD, TexLC.xy + offsets).g;\n"
503
        "                v5.y = texRECT(texD, TexRC.xy + offsets).g;\n"
504
        "                v5.z = texRECT(texD, TexCD.xy + offsets).g;\n"
505
        "                v5.w = texRECT(texD, TexCU.xy + offsets).g;\n"
506
        "        }\n"
507
        "  }while(key_moved);\n"
508
          <<
509
        "  bool test1 = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys ))> THRESHOLD1) ;\n"
510
        "  float test2_v1= fxx*fyy - fxy *fxy; \n"
511
        "  float test2_v2 = (fxx+fyy); \n"
512
        "  test2_v2 = test2_v2*test2_v2;\n"
513
        "  bool test2 = test2_v1>0 && test2_v2 < THRESHOLD2 * test2_v1; \n "
514
    //keep the point when the offset is less than 1
515
        <<
516
        "  FragData1 = test1 && test2 && all( abs(dxys) < 1)? float4( dog, dxys.xy+offsets, dxys.z) : float4(0, 0, 0, 0); \n"
517
        "}\n"        
518
        <<'\0';
519

    
520
        ProgramCG * program; 
521
        s_keypoint = program = new ProgramCG(buffer);
522
        //parameter
523
        _param_dog_texu = cgGetNamedParameter(*program, "texU");
524
        _param_dog_texd = cgGetNamedParameter(*program, "texD");
525

    
526
        return 1;
527

    
528
}
529

    
530
//keypoint detection shader
531
//1. compare with 26 neighbours
532
//2. sub-pixel sub-scale localization
533
//3. output: [dog, offset(x,y,s)]
534

    
535
void ShaderBagCG:: LoadKeypointShader(float threshold, float edge_threshold)
536
{
537
        char buffer[10240];
538
        float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
539
        float threshold1 = threshold;
540
        float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
541
        ostrstream out(buffer, 10240);
542
        out<<setprecision(8);
543
        streampos pos;
544
        //tex(X)(Y)
545
        //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
546
        //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
547

    
548
        out <<        "#define THRESHOLD0 " << threshold0 << "\n"
549
                        "#define THRESHOLD1 " << threshold1 << "\n"
550
                        "#define THRESHOLD2 " << threshold2 << "\n";
551
        out<<
552
        "void main (\n"
553
        "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
554
        "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
555
        "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
556
        "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
557
        "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
558
        "uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"
559
        "{\n"
560
        "        float4 v1, v2, gg;\n"
561
        "        float2 TexRU = float2(TexRC.x, TexCU.y); \n"
562
        "        float4 cc  = texRECT(tex, TexCC.xy);\n"
563
        "        v1.x = texRECT(tex, TexLC.xy).g;\n"
564
        "        gg.x = texRECT(tex, TexLC.xy).r;\n"
565
        "        v1.y = texRECT(tex, TexRC.xy).g;\n"
566
        "        gg.y = texRECT(tex, TexRC.xy).r;\n"
567
        "        v1.z = texRECT(tex, TexCD.xy).g;\n"
568
        "        gg.z = texRECT(tex, TexCD.xy).r;\n"
569
        "        v1.w = texRECT(tex, TexCU.xy).g;\n"
570
        "        gg.w = texRECT(tex, TexCU.xy).r;\n"
571
        "        v2.x = texRECT(tex, TexLD.xy).g;\n"
572
        "        v2.y = texRECT(tex, TexLU.xy).g;\n"
573
        "        v2.z = texRECT(tex, TexRD.xy).g;\n"
574
        "        v2.w = texRECT(tex, TexRU.xy).g;\n"
575
        "        float2 dxdy = (gg.yw - gg.xz); \n"
576
        "        float grad = 0.5*length(dxdy);\n"
577
        "        float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"
578
        "        FragData0 = float4(cc.rg, grad, theta);\n"
579

    
580
        //test against 8 neighbours
581
        //use variable to identify type of extremum
582
        //1.0 for local maximum and 0.5 for minimum
583
        <<
584
        "        float dog = 0.0; \n"
585
        "        FragData1 = float4(0, 0, 0, 0); \n"
586
        "        dog = cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2))?1.0: 0.0;\n"
587
        "        dog = cc.g < -THRESHOLD0 && all(cc.gggg < min(v1, v2))?0.5: dog;\n";
588

    
589
        pos = out.tellp();
590
        //do edge supression first.. 
591
        //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
592
        //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
593

    
594
        out<<
595
        "        if(dog == 0.0) return;\n"
596
        "        float fxx, fyy, fxy; \n"
597
        "        float4 D2 = v1.xyzw - cc.gggg;\n"
598
        "        float2 D4 = v2.xw - v2.yz;\n"
599
        "        fxx = D2.x + D2.y;\n"
600
        "        fyy = D2.z + D2.w;\n"
601
        "        fxy = 0.25*(D4.x + D4.y);\n"
602
        "        float fxx_plus_fyy = fxx + fyy;\n"
603
        "        float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
604
        "        float score_down = (fxx*fyy - fxy*fxy);\n"
605
        "        if( score_down <= 0 || score_up > THRESHOLD2 * score_down)return;\n"
606
        //...
607
        <<
608
        "        float2 D5 = 0.5*(v1.yw-v1.xz); \n"
609
        "        float fx = D5.x, fy = D5.y ; \n"
610
        "        float fs, fss , fxs, fys ; \n"
611
        "        float2 v3; float4 v4, v5, v6;\n"
612
        //read 9 pixels of upper level
613
        <<
614
        "        v3.x = texRECT(texU, TexCC.xy).g;\n"
615
        "        v4.x = texRECT(texU, TexLC.xy).g;\n"
616
        "        v4.y = texRECT(texU, TexRC.xy).g;\n"
617
        "        v4.z = texRECT(texU, TexCD.xy).g;\n"
618
        "        v4.w = texRECT(texU, TexCU.xy).g;\n"
619
        "        v6.x = texRECT(texU, TexLD.xy).g;\n"
620
        "        v6.y = texRECT(texU, TexLU.xy).g;\n"
621
        "        v6.z = texRECT(texU, TexRD.xy).g;\n"
622
        "        v6.w = texRECT(texU, TexRU.xy).g;\n"
623
        //compare with 9 pixels of upper level
624
        //read and compare with 9 pixels of lower level
625
        //the maximum case
626
        <<
627
        "        if(dog == 1.0)\n"
628
        "        {\n"
629
        "                bool4 test = cc.gggg < max(v4, v6); \n"
630
        "                if(cc.g < v3.x || any(test.xy||test.zw))return; \n"
631
        "                v3.y = texRECT(texD, TexCC.xy).g;\n"
632
        "                v5.x = texRECT(texD, TexLC.xy).g;\n"
633
        "                v5.y = texRECT(texD, TexRC.xy).g;\n"
634
        "                v5.z = texRECT(texD, TexCD.xy).g;\n"
635
        "                v5.w = texRECT(texD, TexCU.xy).g;\n"
636
        "                v6.x = texRECT(texD, TexLD.xy).g;\n"
637
        "                v6.y = texRECT(texD, TexLU.xy).g;\n"
638
        "                v6.z = texRECT(texD, TexRD.xy).g;\n"
639
        "                v6.w = texRECT(texD, TexRU.xy).g;\n"
640
        "                test = cc.gggg<max(v5, v6); \n"
641
        "                if(cc.g < v3.y || any(test.xy||test.zw))return; \n"
642
        "        }\n"
643
        //the minimum case
644
        <<
645
        "        else{\n"
646
        "                bool4 test = cc.gggg>min(v4, v6); \n"
647
        "                if(cc.g > v3.x || any(test.xy||test.zw))return; \n"
648
        "                v3.y = texRECT(texD, TexCC.xy).g;\n"
649
        "                v5.x = texRECT(texD, TexLC.xy).g;\n"
650
        "                v5.y = texRECT(texD, TexRC.xy).g;\n"
651
        "                v5.z = texRECT(texD, TexCD.xy).g;\n"
652
        "                v5.w = texRECT(texD, TexCU.xy).g;\n"
653
        "                v6.x = texRECT(texD, TexLD.xy).g;\n"
654
        "                v6.y = texRECT(texD, TexLU.xy).g;\n"
655
        "                v6.z = texRECT(texD, TexRD.xy).g;\n"
656
        "                v6.w = texRECT(texD, TexRU.xy).g;\n"
657
        "                test = cc.gggg>min(v5, v6); \n"
658
        "                if(cc.g > v3.y || any(test.xy||test.zw))return; \n"
659
        "        }\n";
660

    
661
        if(GlobalUtil::_SubpixelLocalization)
662

    
663
        // sub-pixel localization FragData1 = float4(dog, 0, 0, 0); return;
664
        out <<
665
        "        fs = 0.5*( v3.x - v3.y ); //bug fix 9/12/2007 \n"
666
        "        fss = v3.x + v3.y - cc.g - cc.g;\n"
667
        "        fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
668
        "        fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
669
        
670
        ///////////////////////////////////////////////////////////////// 
671
        // let dog difference be quatratic function  of dx, dy, ds; 
672
        // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds + 
673
        //                                  + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
674
        //                                  + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
675
        // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
676
        
677
        //the local extremum satisfies
678
        // df/dx = 0, df/dy = 0, df/dz = 0
679
        
680
        //that is 
681
        // |-fx|     | fxx fxy fxs |   |dx|
682
        // |-fy|  =  | fxy fyy fys | * |dy|
683
        // |-fs|     | fxs fys fss |   |ds|
684
        // need to solve dx, dy, ds
685

    
686
        // Use Gauss elimination to solve the linear system
687
    <<
688
        "        float3 dxys = float3(0.0);                        \n"
689
        "        float4 A0, A1, A2 ;                        \n"
690
        "        A0 = float4(fxx, fxy, fxs, -fx);        \n"
691
        "        A1 = float4(fxy, fyy, fys, -fy);        \n"
692
        "        A2 = float4(fxs, fys, fss, -fs);        \n"
693
        "        float3 x3 = abs(float3(fxx, fxy, fxs));                \n"
694
        "        float maxa = max(max(x3.x, x3.y), x3.z);        \n"
695
        "        if(maxa >= 1e-10 ) {                                                \n"
696
        "        if(x3.y ==maxa )                                                        \n"
697
        "        {                                                                                        \n"
698
        "                float4 TEMP = A1; A1 = A0; A0 = TEMP;        \n"
699
        "        }else if( x3.z == maxa )                                        \n"
700
        "        {                                                                                        \n"
701
        "                float4 TEMP = A2; A2 = A0; A0 = TEMP;        \n"
702
        "        }                                                                                        \n"
703
        "        A0 /= A0.x;                                                                        \n"
704
        "        A1 -= A1.x * A0;                                                        \n"
705
        "        A2 -= A2.x * A0;                                                        \n"
706
        "        float2 x2 = abs(float2(A1.y, A2.y));                \n"
707
        "        if( x2.y > x2.x )                                                        \n"
708
        "        {                                                                                        \n"
709
        "                float3 TEMP = A2.yzw;                                        \n"
710
        "                A2.yzw = A1.yzw;                                                \n"
711
        "                A1.yzw = TEMP;                                                        \n"
712
        "                x2.x = x2.y;                                                        \n"
713
        "        }                                                                                        \n"
714
        "        if(x2.x >= 1e-10) {                                                \n"
715
        "        A1.yzw /= A1.y;                                                                \n"
716
        "        A2.yzw -= A2.y * A1.yzw;                                        \n"
717
        "        if(abs(A2.z) >= 1e-10) {                                \n"
718
        // compute dx, dy, ds: 
719
        <<
720
        "        dxys.z = A2.w /A2.z;                                    \n"
721
        "        dxys.y = A1.w - dxys.z*A1.z;                            \n"
722
        "        dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y;        \n"
723

    
724
        //one more threshold which I forgot in  versions prior to 286
725
        <<
726
        "        bool bugfix_test = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys )) < THRESHOLD1) ;\n"
727
        "        if(bugfix_test || any(abs(dxys) >= 1.0)) dog = 0; \n"
728
        "        }}}\n"
729
    //keep the point when the offset is less than 1
730
        <<
731
        "        FragData1 = float4( dog, dxys); \n"
732
        "}\n"        <<'\0';
733

    
734
        else                out<<
735
        "        FragData1 =  float4( dog, 0, 0, 0) ;        \n"
736
        "}\n"        <<'\0';
737

    
738
        ProgramCG * program; 
739
        s_keypoint = program = new ProgramCG(buffer);
740
        if(!program->IsValidProgram())
741
        {
742
                delete program;
743
                out.seekp(pos);
744
                out << 
745
        "        FragData1 =  float4( fabs(cc.g) > 2.0 * THRESHOLD0? dog : 0, 0, 0, 0) ;        \n"
746
        "}\n" <<'\0';
747
                s_keypoint = program = new ProgramCG(buffer);
748
                GlobalUtil::_SubpixelLocalization = 0;
749
                std::cerr<<"Detection simplified on this hardware"<<endl;
750
        }
751
        //parameter
752
        _param_dog_texu = cgGetNamedParameter(*program, "texU");
753
        _param_dog_texd = cgGetNamedParameter(*program, "texD");
754

    
755

    
756

    
757

    
758
}
759

    
760

    
761
void ShaderBagCG::SetDogTexParam(int texU, int texD)
762
{
763
        cgGLSetTextureParameter(_param_dog_texu, texU);
764
        cgGLEnableTextureParameter(_param_dog_texu);
765
        cgGLSetTextureParameter(_param_dog_texd, texD);
766
        cgGLEnableTextureParameter(_param_dog_texd);
767
}
768

    
769
void ShaderBagCG::SetGenListStepParam(int tex, int tex0)
770
{
771
        cgGLSetTextureParameter(_param_genlist_step_tex, tex);
772
        cgGLEnableTextureParameter(_param_genlist_step_tex);
773
        cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
774
        cgGLEnableTextureParameter(_param_genlist_step_tex0);
775
}
776

    
777
void ShaderBagCG::SetGenVBOParam(float width, float fwidth, float size)
778
{
779
        float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
780
        cgGLSetParameter4fv(_param_genvbo_size, sizes);
781
}
782

    
783

    
784
ProgramGPU* FilterGLCG::CreateFilterH(float kernel[], float offset[], int width)
785
{
786

    
787

    
788
        char buffer[10240];
789
        ostrstream out(buffer, 10240);
790

    
791
        out<<setprecision(8);
792

    
793
        if(GlobalUtil::_BetaFilter)
794
        {
795
                out<< "void main(uniform samplerRECT tex,";
796
                out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
797
                out<<"\n\tout float4 FragColor : COLOR0 )";
798
                out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
799
                out<<"float or = texRECT(tex, TexCoord0.xy).r, intensity;\n";
800

    
801
                for(int i = 0; i< width; i+=4)
802
                {
803
                        out <<"data = float4(";
804
                        for(int j = i; j < i + 4; j++)
805
                        {
806
                                if(j != i) out <<", \n";
807
                                if(j >= width)
808
                                {
809
                                        out<<"0";
810
                                }else if(offset[j]==0.0)
811
                                {
812
                                        out<<"or";
813
                                }else
814
                                {
815
                                        out<<"texRECT(tex, TexCoord0.xy + float2(float("<<offset[j] <<") , 0)).r";
816
                                }
817
                        }
818
                        out << ");\n";
819
                        out << "intensity4 += data * float4(";
820
                        for(int k = i; k < i + 4; k++)
821
                        {
822
                                if(k != i) out <<", ";
823
                                if(k >= width)        out<<"0";
824
                                else                        out<<kernel[k];
825
                        }
826
                        out << ");\n";
827

    
828
                 }
829
                out << "intensity4.xy += intensity4.zw;\n";
830
                out << "intensity = intensity4.x + intensity4.y;\n";
831
        }else
832
        {
833
                out<< "void main(uniform samplerRECT tex,";
834
                out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
835
                out<<"\n\tout float4 FragColor : COLOR0 )";
836
                out<<"\n{\n\tfloat intensity = 0.0 ;  float2 pos;\n";
837

    
838
                for(int i = 0; i< width; i++)
839
                {
840
                        if(offset[i]==0.0)
841
                        {
842
                                out<<"float or = texRECT(tex, TexCoord0.xy).r;\n";
843
                                out<<"intensity+= or * "<<kernel[i]<<";\n";
844

    
845
                        }else
846
                        {
847
                                out<<"pos = TexCoord0.xy + float2(float("<<offset[i] <<") , 0);\n";
848
                                out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).r;\n";
849
                        }
850
                }
851
        }
852
        //copy original data to red channel
853
        out<<"FragColor.r = or;\n"; 
854
        out<<"FragColor.b  = intensity;}\n"<<'\0';
855

    
856
        return new ProgramCG( buffer);
857
}
858

    
859

    
860
ProgramGPU* FilterGLCG::CreateFilterV(float kernel[], float offset[], int height)
861
{
862
        char buffer[10240];
863
        ostrstream out(buffer, 10240);
864
        out<<setprecision(8);
865

    
866
        if(GlobalUtil::_BetaFilter)
867
        {
868
                out<< "void main(uniform samplerRECT tex,";
869
                out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
870
                out<<"\n\tout float4 FragColor : COLOR0 )";
871
                out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";
872
                out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb; float intensity;\n";
873

    
874
                for(int i = 0; i< height; i+=4)
875
                {
876
                        out <<"data = float4(";
877
                        for(int j = i; j < i + 4; j++)
878
                        {
879
                                if(j != i) out <<", \n";
880
                                if(j >= height)
881
                                {
882
                                        out<<"0";
883
                                }else if(offset[j]==0.0)
884
                                {
885
                                        out<<"orb.y";
886
                                }else
887
                                {
888
                                        out<<"texRECT(tex, TexCoord0.xy + float2(0, float("<<offset[j] <<"))).b";
889
                                }
890
                        }
891
                        out << ");\n";
892
                        out << "intensity4 += data * float4(";
893
                        for(int k = i; k < i + 4; k++)
894
                        {
895
                                if(k != i) out <<", ";
896
                                if(k >= height)        out<<"0";
897
                                else                        out<<kernel[k];
898
                        }
899
                        out << ");\n";
900

    
901
                 }
902
                out << "intensity4.xy += intensity4.zw;\n";
903
                out << "intensity = intensity4.x + intensity4.y;\n";
904
        }else
905
        {
906
                out<< "void main(uniform samplerRECT tex,";
907
                out<<"\n\tin float4 TexCoord0: TEXCOORD0,";
908
                out<<"\n\tout float4 FragColor : COLOR0 )";
909
                out<<"\n{\n\tfloat intensity = 0.0 ;  float2 pos;\n";
910

    
911
                for(int i = 0; i< height; i++)
912
                {
913
                        if(offset[i]==0.0)
914
                        {
915
                                out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb;\n";
916
                                out<<"intensity+= orb.y * "<<kernel[i]<<";\n";
917

    
918
                        }else
919
                        {
920
                                out<<"pos = TexCoord0.xy + float2(0, float("<<offset[i] <<"));\n";
921
                                out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).b;\n";
922
                        }
923
                }
924
        }
925
        out<<"FragColor.b = orb.y;\n";
926
        out<<"FragColor.g = intensity - orb.x;\n"; // difference of gaussian..
927
        out<<"FragColor.r = intensity;}\n"<<'\0';
928
        
929
        return new ProgramCG( buffer);
930
}
931

    
932

    
933
ProgramGPU* FilterGLCG::CreateFilterHPK(float kernel[], float offset[], int width)
934
{
935
        //both h and v are packed...
936
        int i, j , xw, xwn;
937
        int halfwidth  = width >>1;
938
        float * pf = kernel + halfwidth;
939
        int nhpixel = (halfwidth+1)>>1;        //how many neighbour pixels need to be looked up
940
        int npixel  = (nhpixel<<1)+1;//
941
        char buffer[10240];
942
        float weight[3];
943
        ostrstream out(buffer, 10240);
944
        out<<setprecision(8);
945

    
946
        out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
947
        out<< "float4 result = float4(0, 0, 0, 0); \nfloat4 pc; float2 coord; \n";
948
        ///use multi texture coordinate because nhpixels can be at most 3
949
        for( i = 0 ; i < npixel ; i++)
950
        {
951

    
952
                out<<"coord = TexCoord0.xy + float2(float("<<i-nhpixel<<"),0);\n";
953
                out<<"pc=texRECT(tex, coord);\n";
954
                if(GlobalUtil::_PreciseBorder)                out<<"if(coord.x < 0) pc = pc.rrbb;\n";
955

    
956
                //for each sub-pixel j  in center, the weight of sub-pixel k 
957
                xw = (i - nhpixel)*2;
958
                for( j = 0; j < 3; j++)
959
                {
960
                        xwn = xw  + j  -1;
961
                        weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
962
                }
963
                //if(weight[1]!=0.0)        out<<"FragColor += "<<weight[1]<<"*pc;\n";
964
                //out<<"FragColor += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
965

    
966
                if(weight[1] == 0.0)
967
                {
968
                        out<<"result += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";
969
                }
970
                else
971
                {
972
                        out<<"result += float4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n";
973
                        out<<"result += float4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n";
974
                }
975

    
976
        }
977
        out<<
978
        "         FragColor = result; }\n"<<'\0';
979
        return new ProgramCG( buffer);
980
}
981

    
982
ProgramGPU* FilterGLCG::CreateFilterVPK(float kernel[], float offset[], int height)
983
{
984

    
985
        //both h and v are packed...
986
        int i, j , yw, ywn;
987
        int halfh  = height >>1;
988
        float * pf = kernel + halfh;
989
        int nhpixel = (halfh+1)>>1;        //how many neighbour pixels need to be looked up
990
        int npixel  = (nhpixel<<1)+1;//
991
        char buffer[10240];
992
        float weight[3];
993
        ostrstream out(buffer, 10240);
994
        out<<setprecision(8);
995

    
996
        out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";
997
        out<< "float4 result = float4(0, 0, 0, 0);\nfloat4 pc; float2 coord;\n";
998
        ///use multi texture coordinate because nhpixels can be at most 3
999

    
1000
        for( i = 0 ; i < npixel ; i++)
1001
        {
1002

    
1003
                out<<"coord = TexCoord0.xy + float2(0, float("<<i-nhpixel<<"));\n";
1004
                out<<"pc=texRECT(tex, coord);\n";
1005
                if(GlobalUtil::_PreciseBorder)        out<<"if(coord.y < 0) pc = pc.rgrg;\n";
1006
                //for each sub-pixel j  in center, the weight of sub-pixel k 
1007
                yw = (i - nhpixel)*2;
1008
                for( j = 0; j < 3; j++)
1009
                {
1010
                        ywn = yw + j  -1;
1011
                        weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
1012
                }
1013
                //if(weight[1]!=0.0)        out<<"FragColor += "<<weight[1]<<"*pc;\n";
1014
                //out<<"FragColor += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
1015
                if(weight[1] == 0.0)
1016
                {
1017
                        out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";
1018
                }else
1019
                {
1020
                        out<<"result += float4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n";
1021
                        out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n";
1022
                }
1023
        }
1024
        out<<
1025
        "         FragColor = result; }\n"<<'\0';
1026
        return new ProgramCG( buffer);
1027
}
1028

    
1029

    
1030
void ShaderBagCG::LoadGenListShader(int ndoglev, int nlev)
1031
{
1032
        ProgramCG * program;
1033

    
1034
        s_genlist_init_tight = new ProgramCG(
1035
        "void main (\n"
1036
        "uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1037
        "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1038
        "out float4 FragColor : COLOR0){\n"
1039
        "float4 helper = float4( texRECT(tex, TexCoord0.xy).r,  texRECT(tex, TexCoord1.xy).r,\n"
1040
        "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
1041
        "FragColor = float4(helper>0.0);\n"
1042
        "}");
1043

    
1044
        s_genlist_init_ex = program = new ProgramCG(
1045
        "void main (uniform float2 bbox, \n"
1046
        "uniform samplerRECT tex, \n"
1047
        "in float4 TexCoord0 : TEXCOORD0,\n"
1048
        "in float4 TexCoord1 : TEXCOORD1, \n"
1049
        "in float4 TexCoord2 : TEXCOORD2, \n"
1050
        "in float4 TexCoord3 : TEXCOORD3,\n"
1051
        "out float4 FragColor : COLOR0){\n"
1052
        "float4 helper = float4( \n"
1053
        "texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"
1054
        "texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"
1055
        "bool4 helper4 = bool4(TexCoord0.xy < bbox, TexCoord3.xy < bbox); \n"
1056
        "bool4 helper2 = helper4.xzxz && helper4.yyww; \n"
1057
        "FragColor = float4(helper2 && (helper>0.0 ));\n"
1058
        "}");
1059
        _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
1060

    
1061

    
1062
        //reduction ...
1063
        s_genlist_histo = new ProgramCG(
1064
        "void main (\n"
1065
        "uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
1066
        "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, in float2 TexCoord3 : TEXCOORD3,\n"
1067
        "out float4 FragColor : COLOR0){\n"
1068
        "float4 helper; float4 helper2; \n"
1069
        "helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
1070
        "helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
1071
        "FragColor.rg = helper2.xz + helper2.yw;\n"
1072
        "helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
1073
        "helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
1074
        "FragColor.ba= helper2.xz+helper2.yw;\n"
1075
        "}");
1076

    
1077

    
1078
        //read of the first part, which generates tex coordinates 
1079

    
1080
        s_genlist_start= program =  LoadGenListStepShader(1, 1);
1081
        _param_ftex_width= cgGetNamedParameter(*program, "width");
1082
        _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
1083
        //stepping
1084
        s_genlist_step = program = LoadGenListStepShader(0, 1);
1085
        _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
1086
        _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
1087

    
1088

    
1089
}
1090

    
1091
ProgramCG* ShaderBagCG::LoadGenListStepShader(int start, int step)
1092
{
1093
        int i;
1094
        char buffer[10240];
1095
        //char chanels[5] = "rgba";
1096
        ostrstream out(buffer, 10240);
1097
        out<<"void main(out float4 FragColor : COLOR0, \n";
1098

    
1099
        for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
1100

    
1101
        if(start)
1102
        {
1103
                out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
1104
                out<<"float  index = floor(tpos.y) * width + floor(tpos.x) + 0.0001;\n";
1105
                out<<"float2 pos = float2(0.5, 0.5);\n";
1106
        }else
1107
        {
1108
                out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
1109
                out<<"float4 tc = texRECT( tex, tpos);\n";
1110
                out<<"float2 pos = tc.rg; float index = tc.b;\n";
1111
        }
1112
        out<<"float2 sum;         float4 cc;\n";
1113

    
1114

    
1115

    
1116
        if(step>0)
1117
        {
1118
                out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
1119
                for(i = 0; i < step; i++)
1120
                {
1121
//#define SETP_CODE_2
1122

    
1123
#ifndef SETP_CODE_2
1124
/*                        out<<"cc = texRECT(tex"<<i<<", pos);\n";
1125
                        out<<"float sum3[3] = {cc.r, cc.r + cc.g, cc.r + cc.g + cc.b};\n";
1126
                        out<<"float3 cmp = float3(index > float3(sum3[0], sum3[1], sum3[2]));\n";
1127
                        out<<"opos.y = -0.5 + cmp.y; opos.x = -0.5 + cmp.x + (cmp.z - cmp.y);\n";
1128
                        out<<"index -= dot(cmp, cc.rgb);\n";
1129
                        out<<"pos = (pos + pos + opos);\n";*/
1130

    
1131
                        out<<"cc = texRECT(tex"<<i<<", pos); sum.x = cc.r + cc.g;\n";
1132
                        out<<"if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
1133
                        out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
1134
                        out<<"pos = (pos + pos + opos);\n";
1135

    
1136
/*                        out<<"cc = texRECT(tex"<<i<<", pos);\n";
1137
                        out<<"if (index <cc.r){ opos = cpos.xx;}\n";
1138
                        out<<"else{sum.x = cc.r + cc.g;";
1139
                                        out<<"if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
1140
                                        out<<"else{sum.y = sum.x + cc.b;";
1141
                                                        out<<"if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
1142
                                                        out<<"else {opos = cpos.yy; index -= sum.y;}}}\n";
1143
                        out<<"pos = (pos + pos + opos);\n";*/
1144

    
1145
#else
1146
                        out<<"cc = texRECT(tex"<<i<<", pos);\n";
1147
                        out<<"if (index < cc.r) opos = cpos.xx;\n";
1148
                        out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index -= cc.r;}\n";
1149
                        out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index -= (cc.r + cc.g);}\n";
1150
                        out<<"else {opos = cpos.yy; index -= (cc.r + cc.g + cc.b);}\n";
1151
                        out<<"pos = (pos + pos + opos);\n";
1152
#endif
1153
                }
1154
        }
1155
        out<<"FragColor = float4(pos, index, 1);\n";
1156
        out<<"}\n"<<'\0';
1157
        return new ProgramCG(buffer);
1158
}
1159

    
1160
void ShaderBagCG::SetGenListInitParam(int w, int h)
1161
{
1162
        float bbox[2] = {w -1.0f, h - 1.0f};
1163
        cgGLSetParameter2fv(_param_genlist_init_bbox, bbox);
1164
}
1165

    
1166
void ShaderBagCG::SetGenListStartParam(float width, int tex0)
1167
{
1168
        cgGLSetParameter1f(_param_ftex_width, width);
1169

    
1170
        if(_param_genlist_start_tex0)
1171
        {
1172
                cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
1173
                cgGLEnableTextureParameter(_param_genlist_start_tex0);
1174
        }
1175
}
1176

    
1177
void ShaderBagCG::LoadDescriptorShaderF2()
1178
{
1179
        //one shader outpout 128/8 = 16 , each fragout encodes 4
1180
        //const double twopi = 2.0*3.14159265358979323846;
1181
        //const double rpi  = 8.0/twopi;
1182
        char buffer[10240];
1183
        ostrstream out(buffer, 10240);
1184

    
1185
        out<<setprecision(8);
1186

    
1187
        out<<"\n"
1188
        "#define M_PI 3.14159265358979323846\n"
1189
        "#define TWO_PI (2.0*M_PI)\n"
1190
        "#define RPI 1.2732395447351626861510701069801\n"
1191
        "#define WF size.z\n"
1192
        "void main(uniform samplerRECT tex,                \n"
1193
        "uniform        samplerRECT gradTex,                        \n"
1194
        "uniform float4                dsize,                                \n"
1195
        "uniform float3                size,                                \n"
1196
        "in                float2        TexCoord0 : TEXCOORD0,        \n"
1197
        "out                float4  FragData0:COLOR0,                \n"
1198
        "out                float4        FragData1:COLOR1)                \n"
1199
        "{\n"
1200
        "        float2 dim        = size.xy;        //image size                        \n"
1201
        "        float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
1202
        "        float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5));                \n"
1203
        "        index = floor(index*0.125) + 0.49;  \n"
1204
        "        float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
1205
        "        float2 pos = texRECT(tex, coord).xy;                \n"
1206
        "        if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
1207
        "        //discard;        \n"
1208
        "        { FragData0 = FragData1 = float4(0.0); return; }\n"
1209
        "        float  anglef = texRECT(tex, coord).z;\n"
1210
        "        if(anglef > M_PI) anglef -= TWO_PI;\n"
1211
        "        float sigma = texRECT(tex, coord).w; \n"
1212
        "        float spt  = abs(sigma * WF);        //default to be 3*sigma        \n";
1213

    
1214
        //rotation
1215
        out<<
1216
        "        float4 cscs, rots;                                                                \n"
1217
        "        sincos(anglef, cscs.y, cscs.x);                                        \n"
1218
        "        cscs.zw = - cscs.xy;                                                        \n"
1219
        "        rots = cscs /spt;                                                                \n"
1220
        "        cscs *= spt; \n";
1221

    
1222
        //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
1223
        //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
1224
        //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
1225
        //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
1226
        out<<
1227
        "        float4 temp; float2 pt, offsetpt;                                \n"
1228
        "        /*the fraction part of idx is .5*/                        \n"
1229
        "        offsetpt.x = 4.0 * frac(idx*0.25) - 2.0;                                \n"
1230
        "        offsetpt.y = floor(idx*0.25) - 1.5;                        \n"
1231
        "        temp = cscs.xwyx*offsetpt.xyxy;                                \n"
1232
        "        pt = pos + temp.xz + temp.yw;                                \n";
1233
        
1234
        //get a horizontal bounding box of the rotated rectangle
1235
        out<<
1236
        "        float2 bwin = abs(cscs.xy);                                        \n"
1237
        "        float bsz = bwin.x + bwin.y;                                        \n"
1238
        "        float4 sz;        float2 spos;                                        \n"
1239
        "        sz.xy = max(pt - bsz, float2(1,1));\n"
1240
        "        sz.zw = min(pt + bsz, dim - 2);                \n"
1241
        "        sz = floor(sz)+0.5;"; //move sample point to pixel center
1242

    
1243
        //get voting for two box
1244
        out<<"\n"
1245
        "        float4 DA, DB;                        \n"
1246
        "        DA = DB  = float4(0, 0, 0, 0);                \n"
1247
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
1248
        "        {                                                                                                                                \n"
1249
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
1250
        "                {                                                                                                                        \n"
1251
        "                        float2 diff = spos - pt;                                                                \n"
1252
        "                        temp = rots.xywx * diff.xyxy;                                                        \n"
1253
        "                        float2 nxy = (temp.xz + temp.yw);                                                \n"
1254
        "                        float2 nxyn = abs(nxy);                                                                        \n"
1255
        "                        if(all(nxyn < float2(1.0)))\n"
1256
        "                        {\n"
1257
        "                                float4 cc = texRECT(gradTex, spos);                                                \n"
1258
        "                                float mod = cc.b;        float angle = cc.a;                                        \n"
1259
        "                                float theta0 = (anglef - angle)*RPI;                                \n"
1260
        "                                float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"
1261
        "                                diff = nxy + offsetpt.xy;                                                                \n"
1262
        "                                float ww = exp(-0.125*dot(diff, diff));\n"
1263
        "                                float2 weights = 1 - nxyn;\n"
1264
        "                                float weight = weights.x * weights.y *mod*ww; \n"
1265
        "                                float theta1 = floor(theta); \n"
1266
        "                                float weight2 = (theta - theta1) * weight; \n"
1267
        "                                float weight1 = weight - weight2;\n"
1268
        "                                DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"
1269
        "                                DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"
1270
        "                                DB += float4(theta1 == float4(4, 5, 6, 7))*weight1;        \n"
1271
        "                                DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"
1272
        "                        }\n"
1273
        "                }\n"
1274
        "        }\n";
1275

    
1276
        out<<
1277
        "        FragData0 = DA; FragData1 = DB;\n"
1278
        "}\n"<<'\0';
1279

    
1280
        ProgramCG * program; 
1281
        s_descriptor_fp = program =  new ProgramCG(buffer);
1282
        _param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");
1283
        _param_descriptor_size = cgGetNamedParameter(*program, "size");
1284
        _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
1285

    
1286

    
1287
}
1288

    
1289
//the shader that computes the descriptors
1290
void ShaderBagCG::LoadDescriptorShader()
1291
{
1292
        GlobalUtil::_DescriptorPPT = 16;
1293
        LoadDescriptorShaderF2();
1294
}
1295

    
1296
void ShaderBagCG::LoadOrientationShader()
1297
{
1298

    
1299
        char buffer[10240];
1300
        ostrstream out(buffer,10240);
1301

    
1302

    
1303
        out<<"\n"
1304
        "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
1305
        "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
1306
        "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
1307
        "void main(uniform samplerRECT tex,                        \n"
1308
        "uniform samplerRECT gradTex,                \n"
1309
        "                uniform float4 size,                                \n"
1310
        "                in float2 TexCoord0 : TEXCOORD0,        \n"
1311
        "                out float4 FeatureData : COLOR0        ";
1312

    
1313
        //multi orientation output
1314
        //use one additional texture to store up to four orientations
1315
        //when we use one 32bit float to store two orientations, no extra texture is required
1316

    
1317
        if(GlobalUtil::_MaxOrientation >1  && GlobalUtil::_OrientationPack2 == 0)
1318
                out<<", out float4 OrientationData : COLOR1";
1319

    
1320
        if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1321
        {
1322
                //data for sub-pixel localization
1323
                out<<", uniform samplerRECT texS";
1324
        }
1325

    
1326
        //use 9 float4 to store histogram of 36 directions
1327
        out<<")                \n"
1328
        "{                                                                                                        \n"
1329
        "        float4 bins[10];                                                                \n"
1330
        "        for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0);        \n"
1331
        "        const float4 loc = texRECT(tex, TexCoord0);                        \n"
1332
        "        const bool orientation_mode = (size.z != 0);                        \n"
1333
        "        float2 pos = loc.xy;                                                        \n"
1334
        "        float sigma = orientation_mode? abs(size.z) : loc.w; \n";
1335
        if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
1336
        {
1337
                out<<
1338
        "        if(orientation_mode) {\n"
1339
        "                float4 keyx = texRECT(texS, pos);\n"
1340
        "                sigma = sigma * pow(size.w, keyx.w); \n"
1341
        "                pos.xy = pos.xy + keyx.yz; \n"
1342
        "                #if " << GlobalUtil::_KeepExtremumSign << "\n"
1343
        "                        if(keyx.x<0.6) sigma = - sigma;\n"
1344
        "                #endif\n"
1345
        "        }\n";
1346
        }
1347

    
1348
        out<<
1349
        "        //bool fixed_orientation = (size.z < 0);                \n"
1350
        "        if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
1351
        "        const float gsigma = sigma * GAUSSIAN_WF;                                \n"
1352
        "        const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF);        \n"
1353
        "        const float2 dim = size.xy;                                                        \n"
1354
        "        const float dist_threshold = win.x*win.x+0.5;                        \n"
1355
        "        const float factor = -0.5/(gsigma*gsigma);                        \n"
1356
        "        float4 sz;        float2 spos;                                                \n"
1357
        "        //if(any(pos.xy <= 1)) discard;                                        \n"
1358
        "        sz.xy = max( pos - win, float2(1,1));                        \n"
1359
        "        sz.zw = min( pos + win, dim-2);                                \n"
1360
        "        sz = floor(sz)+0.5;";
1361
        //loop to get the histogram
1362

    
1363
        out<<"\n"
1364
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
1365
        "        {                                                                                                                                \n"
1366
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
1367
        "                {                                                                                                                        \n"
1368
        "                        const float2 offset = spos - pos;                                                \n"
1369
        "                        const float sq_dist = dot(offset,offset);                                \n"
1370
        "                        if( sq_dist < dist_threshold){                                                        \n"
1371
        "                                const float4 cc = texRECT(gradTex, spos);                        \n"
1372
        "                                const float grad = cc.b;        float theta = cc.a;                \n"
1373
        "                                float idx = floor(degrees(theta)*0.1);                \n"
1374
        "                                const float weight = grad*exp(sq_dist * factor);                                \n"
1375
        "                                if(idx < 0 ) idx += 36;                                                                        \n"
1376
        "                                const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4);                                                                \n"
1377
        "                                const float4 inc = weight*float4(vidx == float4(0,1,2,3));        ";
1378

    
1379
        if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
1380
//        if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
1381
        {
1382
                //gp_fp supports dynamic indexing
1383
                out<<"\n"
1384
        "                                int iidx = int(floor(idx*0.25));        \n"
1385
        "                                bins[iidx]+=inc;                                        \n"
1386
        "                        }                                                                                \n"
1387
        "                }                                                                                        \n"
1388
        "        }";
1389

    
1390
        }else
1391
        {
1392
                //nvfp40 still does not support dynamic array indexing
1393
                //unrolled binary search...
1394
                out<<"\n"
1395
        "                                if(idx < 16)                                                        \n"
1396
        "                                {                                                                                \n"
1397
        "                                        if(idx < 8)                                                        \n"
1398
        "                                        {                                                                        \n"
1399
        "                                                if(idx < 4)        {        bins[0]+=inc;}        \n"
1400
        "                                                else                {        bins[1]+=inc;}        \n"
1401
        "                                        }else                                                                \n"
1402
        "                                        {                                                                        \n"
1403
        "                                                if(idx < 12){        bins[2]+=inc;}        \n"
1404
        "                                                else                {        bins[3]+=inc;}        \n"
1405
        "                                        }                                                                        \n"
1406
        "                                }else if(idx < 32)                                                \n"
1407
        "                                {                                                                                \n"
1408
        "                                        if(idx < 24)                                                \n"
1409
        "                                        {                                                                        \n"
1410
        "                                                if(idx <20)        {        bins[4]+=inc;}        \n"
1411
        "                                                else                {        bins[5]+=inc;}        \n"
1412
        "                                        }else                                                                \n"
1413
        "                                        {                                                                        \n"
1414
        "                                                if(idx < 28){        bins[6]+=inc;}        \n"
1415
        "                                                else                {        bins[7]+=inc;}        \n"
1416
        "                                        }                                                                        \n"
1417
        "                                }else                                                 \n"
1418
        "                                {                                                                                \n"
1419
        "                                        bins[8]+=inc;                                                \n"
1420
        "                                }                                                                                \n"
1421
        "                        }                                                                                \n"
1422
        "                }                                                                                        \n"
1423
        "        }";
1424

    
1425
        }
1426

    
1427
        WriteOrientationCodeToStream(out);
1428

    
1429
        ProgramCG * program;
1430
        s_orientation = program = new ProgramCG(buffer);
1431
        _param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");
1432
        _param_orientation_size = cgGetNamedParameter(*program, "size");
1433
        _param_orientation_stex = cgGetNamedParameter(*program, "texS");
1434
}
1435

    
1436
void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)
1437
{
1438
        //smooth histogram and find the largest
1439
/*
1440
        smoothing kernel:         (1 3 6 7 6 3 1 )/27
1441
        the same as 3 pass of (1 1 1)/3 averaging
1442
        maybe better to use 4 pass on the vectors...
1443
*/
1444

    
1445

    
1446
        //the inner loop on different array numbers is always unrolled in fp40
1447

    
1448
        //bug fixed here:)
1449
        out<<"\n"
1450
        "        float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"
1451
        "        float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"
1452
        "        for (int j=0; j<2; j++)                                                                \n"
1453
        "        {                                                                                                \n"
1454
        "                float4 prev  = bins[8];                                                \n"
1455
        "                bins[9]                 = bins[0];                                                \n"
1456
        "                for (int i=0; i<9; i++)                                                        \n"
1457
        "                {                                                                                                \n"
1458
        "                        float4 newb        =        mul ( bins[i], mat2);                \n"
1459
        "                        newb.xyz        +=        mul ( prev.yzw, mat1);                \n"
1460
        "                        prev = bins[i];                                                                \n"
1461
        "                        newb.wzy        +=        mul        ( bins[i+1].zyx, mat1);        \n"
1462
        "                        bins[i] = newb;                                                        \n"
1463
        "                }                                                                                                \n"
1464
        "        }";
1465

    
1466

    
1467
        //find the maximum voting
1468
        out<<"\n"
1469
        "        float4 maxh; float2 maxh2; float4 maxh4 = bins[0];                                \n"
1470
        "        for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]);                                \n"
1471
        "        maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";
1472

    
1473
        char *testpeak_code;
1474
        char *savepeak_code;
1475

    
1476

    
1477

    
1478
        //save two/three/four orientations with the largest votings?
1479

    
1480
        //
1481
        if(GlobalUtil::_MaxOrientation>1)
1482
        {
1483
                out<<"\n"
1484
        "        float4 Orientations = float4(0, 0, 0, 0);                                \n"
1485
        "        float4 weights = float4(0,0,0,0);                ";        
1486
                
1487
                testpeak_code = "\n"
1488
        "                {test = bins[i]>hh;";
1489

    
1490
                //save the orientations in weight-decreasing order
1491
                if(GlobalUtil::_MaxOrientation ==2)
1492
                {
1493
                savepeak_code = "\n"
1494
        "                if(weight <=weights.g){}\n"
1495
        "                else if(weight >weights.r)\n"
1496
        "                {weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"
1497
        "                else {weights.g = weight; Orientations.g = th;}";
1498

    
1499
                }else if(GlobalUtil::_MaxOrientation ==3)
1500
                {
1501
                savepeak_code = "\n"
1502
        "                if(weight <=weights.b){}\n"
1503
        "                else if(weight >weights.r)\n"
1504
        "                {weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"
1505
        "                else if(weight >weights.g)\n"
1506
        "                {weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"
1507
        "                else {weights.b = weight; Orientations.b = th;}";
1508
                }else
1509
                {
1510
                savepeak_code = "\n"
1511
        "                if(weight <=weights.a){}\n"
1512
        "                else if(weight >weights.r)\n"
1513
        "                {weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"
1514
        "                else if(weight >weights.g)\n"
1515
        "                {weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"
1516
        "                else if(weight >weights.b)\n"
1517
        "                {weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"
1518
        "                else {weights.a = weight; Orientations.a = th;}";
1519
                }
1520

    
1521
        }else
1522
        {
1523
                out<<"\n"
1524
        "        float Orientations = 0;                                ";
1525
                testpeak_code ="\n"
1526
        "                if(npeaks==0){                                                                \n"
1527
        "                test = (bins[i] >= maxh)        ;";
1528
                savepeak_code="\n"
1529
        "                                npeaks++;                                                                \n"
1530
        "                                Orientations = th.x;";
1531

    
1532
        }
1533

    
1534
        //find the peaks
1535
        //the following loop will be unrolled
1536

    
1537
        out<<"\n"
1538
        "        const float4 hh = maxh * ORIENTATION_THRESHOLD;        bool4 test;        \n"
1539
        "        bins[9] = bins[0];                                                                \n"
1540
        "        float npeaks = 0, k = 0;                        \n"
1541
        "        float prevb        = bins[8].w;                                                \n"
1542
        "        for (int i = 0; i <9 ; i++)                                                \n"
1543
        "        {"
1544
                <<testpeak_code<<"                                                                        \n"
1545
        "                if( any ( test.xy || test.zw) )                                                        \n"
1546
        "                {                                                                                        \n"
1547
        "                        if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )        \n"
1548
        "                        {                                                                                        \n"
1549
        "                            float        di = 0.5 * (bins[i].y-prevb) / (bins[i].x *2.0 -bins[i].y -prevb) ; \n"
1550
        "                                float        th = (k+di+0.5);        float weight = bins[i].x;"
1551
                                        <<savepeak_code<<"\n"
1552
        "                        }\n"
1553
        "                        else if(test.g && all( bins[i].yy > bins[i].xz) )        \n"
1554
        "                        {                                                                                        \n"
1555
        "                            float        di = 0.5 * (bins[i].z-bins[i].x) / (bins[i].y * 2.0 - bins[i].z - bins[i].x) ; \n"
1556
        "                                float        th = (k+di+1.5);        float weight = bins[i].y;                                "
1557
                                        <<savepeak_code<<"        \n"
1558
        "                        }"
1559
                <<"\n"
1560
        "                        if(test.b && all( bins[i].zz > bins[i].yw) )        \n"
1561
        "                        {                                                                                        \n"
1562
        "                            float        di = 0.5 * (bins[i].w-bins[i].y) / (bins[i].z * 2.0-bins[i].w-bins[i].y) ; \n"
1563
        "                                float        th = (k+di+2.5);        float weight = bins[i].z;                                "
1564
                                        <<savepeak_code<<"        \n"
1565
        "                        }\n"
1566
        "                        else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )        \n"
1567
        "                        {                                                                                        \n"
1568
        "                            float        di = 0.5 * (bins[i+1].x-bins[i].z) / (bins[i].w * 2.0- bins[i+1].x-bins[i].z) ; \n"
1569
        "                                float        th = (k+di+3.5);        float weight = bins[i].w;                                "
1570
                                        <<savepeak_code<<"        \n"
1571
        "                        }\n"
1572
        "                }}\n"
1573
        "                k = k + 4.0;                                                \n"
1574
        "                prevb = bins[i].w;\n"
1575
        "        }";
1576
        //WRITE output
1577
        if(GlobalUtil::_OrientationPack2)
1578
        {
1579
                //pack two orientations in one float
1580
        out<<"\n"
1581
        "         if(orientation_mode){\n"
1582
        "                Orientations.xy = frac(Orientations.xy / 36.0 + 1.0);\n"
1583
        "                if(weights.x <= 0) Orientations.x = 1.0;\n"
1584
        "                if(weights.y <= 0) Orientations.y = 1.0;\n"
1585
        "                float packed_orientation = pack_2ushort(Orientations.xy); \n"
1586
        "                FeatureData = float4(pos, packed_orientation, sigma);\n"
1587
        "        }else{\n"
1588
        "                FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
1589
        "        }\n";                
1590
        }else if(GlobalUtil::_MaxOrientation>1)
1591
        {
1592
        out<<"\n"
1593
        "         if(orientation_mode){\n"
1594
        "         npeaks = dot(float4(1,1,"
1595
                        <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
1596
                        <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), float4(weights>hh));\n"
1597
        "                OrientationData = radians((Orientations )*10.0);\n"
1598
        "                FeatureData = float4(pos, npeaks, sigma);\n"
1599
        "        }else{\n"
1600
        "                FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"
1601
        "        }\n";
1602
        }else
1603
        {
1604
        out<<"\n"
1605
        "         FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);";
1606
        }
1607
        //end
1608
        out<<"\n"
1609
        "}\n"<<'\0';
1610

    
1611

    
1612
}
1613

    
1614
void ShaderBagCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
1615
{
1616
        cgGLSetTextureParameter(_param_orientation_gtex, oTex);
1617
        cgGLEnableTextureParameter(_param_orientation_gtex);
1618
        cgGLSetParameter1f(_param_orientation_size, sigma);
1619
}
1620

    
1621
void ShaderBagCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
1622
{
1623
        ///
1624
        cgGLSetTextureParameter(_param_orientation_gtex, gtex);        
1625
        cgGLEnableTextureParameter(_param_orientation_gtex);
1626

    
1627
        if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
1628
        {
1629
                //specify texutre for subpixel subscale localization
1630
                cgGLSetTextureParameter(_param_orientation_stex, stex);
1631
                cgGLEnableTextureParameter(_param_orientation_stex);
1632
        }
1633

    
1634
        float size[4];
1635
        size[0] = (float)width;
1636
        size[1] = (float)height;
1637
        size[2] = sigma;
1638
        size[3] = step;
1639
        cgGLSetParameter4fv(_param_orientation_size, size);
1640

    
1641
}
1642

    
1643
void ShaderBagCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
1644
{
1645
        ///
1646
        cgGLSetTextureParameter(_param_descriptor_gtex, gtex);        
1647
        cgGLEnableTextureParameter(_param_descriptor_gtex);
1648

    
1649
        float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
1650
        cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
1651
        float size[3];
1652
        size[0] = width;
1653
        size[1] = height;
1654
        size[2] = GlobalUtil::_DescriptorWindowFactor;        
1655
        cgGLSetParameter3fv(_param_descriptor_size, size);
1656
}
1657

    
1658

    
1659
///////////////////////////////////////////////////////////////////////////////////
1660
/////////////////////////////////PACKED VERSION?///////////////////////////////////
1661

    
1662
ShaderBagPKCG::ShaderBagPKCG()
1663
{
1664
        ProgramCG::InitContext();
1665
}
1666

    
1667
void ShaderBagPKCG::UnloadProgram()
1668
{
1669

    
1670
        cgGLUnbindProgram(ProgramCG::_FProfile);
1671
        cgGLDisableProfile(ProgramCG::_FProfile);
1672
}
1673

    
1674
void ShaderBagPKCG::LoadFixedShaders()
1675
{
1676
        ProgramCG * program;
1677

    
1678
        /*
1679
        char *rgb2gray_packing_code =
1680
                "void main(uniform samplerRECT rgbTex, in float4 TexCoord0 : TEXCOORD0, \n"
1681
        "                in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1682
        "                in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0){\n"
1683
        "                const float3 weight = vec3(0.299, 0.587, 0.114);\n"
1684
        "                FragData.r = dot(weight, texRECT(rgbTex,TexCoord0.st ).rgb);\n"
1685
        "                FragData.g = dot(weight, texRECT(rgbTex,TexCoord1.st ).rgb);\n"
1686
        "                FragData.b = dot(weight, texRECT(rgbTex,TexCoord2.st ).rgb);\n"
1687
        "                FragData.a = dot(weight, texRECT(rgbTex,TexCoord3.st ).rgb);}";//
1688
        s_gray = new ProgramCG( rgb2gray_packing_code);
1689
        */
1690

    
1691
        s_gray = new ProgramCG( 
1692
        "void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1693
        "float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"
1694
        "FragColor= float4(intensity, intensity, intensity, 1.0);}"        );
1695

    
1696

    
1697
        s_sampling = new ProgramCG(
1698
        "void main(uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0, \n"
1699
        "                in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1700
        "                in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0 ){\n"
1701
        "                FragData= float4(        texRECT(tex,TexCoord0.st ).r,texRECT(tex,TexCoord1.st ).r,\n"
1702
        "                                                        texRECT(tex,TexCoord2.st ).r,texRECT(tex,TexCoord3.st ).r);}"        );
1703

    
1704

    
1705
        s_margin_copy = program = new ProgramCG(
1706
        "void main(in float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"
1707
        "uniform samplerRECT tex, uniform float4 truncate){\n"
1708
        "float4 cc = texRECT(tex, min(texCoord0.xy, truncate.xy)); \n"
1709
        "bool2 ob = texCoord0.xy < truncate.xy;\n"
1710
        "if(ob.y) { FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
1711
        "else if(ob.x) {FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
1712
        "else {        float4 weights = float4(float4(0, 1, 2, 3) == truncate.w);\n"
1713
        "float v = dot(weights, cc); FragColor = v.xxxx;}}");
1714

    
1715
        _param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");
1716

    
1717

    
1718
        s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}");
1719

    
1720
        s_grad_pass = program = new ProgramCG(
1721
        "void main (\n"
1722
        "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
1723
        "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"
1724
        "out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"
1725
        "out float4 FragData2 : COLOR2, uniform samplerRECT tex, uniform samplerRECT texp)\n"
1726
        "{\n"
1727
        "        float4 v1, v2, gg;\n"
1728
        "        float4 cc = texRECT(tex, TexCC.xy);\n"
1729
        "        float4 cp = texRECT(texp, TexCC.xy);\n"
1730
        "        FragData0 = cc - cp; \n"
1731
        "        float4 cl = texRECT(tex, TexLC.xy);        float4 cr = texRECT(tex, TexRC.xy);\n"
1732
        "        float4 cd = texRECT(tex, TexCD.xy); float4 cu = texRECT(tex, TexCU.xy);\n"
1733
        "        float4 dx = (float4(cr.rb, cc.ga) - float4(cc.rb, cl.ga)).zxwy;\n"
1734
        "        float4 dy = (float4(cu.rg, cc.ba) - float4(cc.rg, cd.ba)).zwxy;\n"
1735
        "        FragData1 = 0.5 * sqrt(dx*dx + dy * dy);\n"
1736
        "        FragData2 = FragData1 > 0?  atan2(dy, dx) : float4(0);\n"
1737
        "}\n\0");
1738

    
1739
        _param_grad_pass_texp = cgGetNamedParameter(*program, "texp");
1740

    
1741

    
1742
        s_dog_pass = program = new ProgramCG(
1743
        "void main (float4 TexCC : TEXCOORD0, out float4 FragData0 : COLOR0, \n"
1744
        "        uniform samplerRECT tex, uniform samplerRECT texp)\n"
1745
        "{\n"
1746
        "        float4 cc = texRECT(tex, TexCC.xy);\n"
1747
        "        float4 cp = texRECT(texp, TexCC.xy);\n"
1748
        "        FragData0 = cc - cp; \n"
1749
        "}\n\0");
1750

    
1751
        ////
1752
        if(GlobalUtil::_SupportFP40)
1753
        {
1754
                LoadOrientationShader();
1755
                if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
1756
        }else
1757
        {
1758
                s_orientation = program =  new ProgramCG(
1759
                "void main(out float4 FragColor : COLOR0, \n"
1760
        "        uniform samplerRECT fTex, uniform samplerRECT oTex, \n"
1761
        "        uniform float2 size, \n"
1762
        "        in float2 tpos : TEXCOORD0){\n"
1763
        "        float4 cc = texRECT(fTex, tpos);\n"
1764
        "        float2 co = cc.xy * 0.5; \n"
1765
        "        float4 oo = texRECT(oTex, co);\n"
1766
        "        bool2 bo = frac(co) < 0.5; \n"
1767
        "        float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
1768
        "        FragColor = float4(cc.rg, o, size.x * pow(size.y, cc.a));}");  
1769
                _param_orientation_gtex= cgGetNamedParameter(*program, "oTex");
1770
                _param_orientation_size= cgGetNamedParameter(*program, "size");
1771

    
1772
                GlobalUtil::_FullSupported = 0;
1773
                GlobalUtil::_MaxOrientation = 0;
1774
                GlobalUtil::_DescriptorPPT = 0;
1775
                std::cerr<<"Orientation simplified on this hardware"<<endl;
1776
                std::cerr<<"Descriptor ignored on this hardware"<<endl;
1777
        }
1778
}
1779

    
1780
void ShaderBagPKCG::LoadDisplayShaders()
1781
{
1782
        ProgramCG * program;
1783

    
1784
        s_copy_key = new ProgramCG(
1785
        "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1786
        "FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1);        }");
1787

    
1788
        //shader used to write a vertex buffer object
1789
        //which is used to draw the quads of each feature
1790
        s_vertex_list = program = new ProgramCG(
1791
        "void main(in float4 TexCoord0: TEXCOORD0,\n"
1792
        "uniform float4 sizes, \n"
1793
        "uniform samplerRECT tex, \n"
1794
        "out float4 FragColor: COLOR0){\n"
1795
        "float fwidth = sizes.y; \n"
1796
        "float twidth = sizes.z; \n"
1797
        "float rwidth = sizes.w; \n"
1798
        "float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"
1799
        "float px = fmod(index, twidth);\n"
1800
        "float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"
1801
        "float4 cc = texRECT(tex, tpos );\n"
1802
        "float size = 3.0f * cc.a;// sizes.x;// \n"
1803
        "FragColor.zw = float2(0.0, 1.0);\n"
1804
        "if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"
1805
        "{\n"
1806
        "        float type = frac(px);\n"
1807
        "        float2 dxy; float s, c;\n"
1808
        "        dxy.x = type < 0.1 ? 0 : ((type <0.5 || type > 0.9)? size : -size);\n"
1809
        "        dxy.y = type < 0.2 ? 0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
1810
        "        sincos(cc.b, s, c);\n"
1811
        "        FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
1812
        "        FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
1813
        "}\n\0");
1814
        /*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/
1815

    
1816
        _param_genvbo_size = cgGetNamedParameter(*program, "sizes");
1817

    
1818
        s_display_gaussian = new ProgramCG(
1819
                "void main(uniform samplerRECT tex, in float4 TexCoord0:TEXCOORD0, out float4 FragData: COLOR0 ){\n"
1820
                "float4 pc = texRECT(tex, TexCoord0.xy);         bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1821
    "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); FragData = float4(v.xxx, 1.0);}");
1822

    
1823
        s_display_dog =  new ProgramCG(
1824
        "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1825
        "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1826
        "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
1827
        "FragColor = float4(g, g, g, 1.0);}" );
1828

    
1829

    
1830
        s_display_grad = new ProgramCG(
1831
        "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1832
        "float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1833
        "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); FragColor = float4(5.0 *v.xxx, 1.0); }");
1834

    
1835
        s_display_keys= new ProgramCG(
1836
        "void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"
1837
        "float4 oc = texRECT(tex, TexCoord0.xy); \n"
1838
        "float4 cc = float4(abs(oc.r) == float4(1.0, 2.0, 3.0, 4.0));\n"
1839
        "bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"
1840
        "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
1841
        "if(oc.r == 0) discard;\n"
1842
        "else if(oc.r > 0) FragColor = float4(1.0, 0, 0,1.0); \n"
1843
        "else FragColor = float4(0.0,1.0,0.0,1.0); }" );                
1844
}
1845

    
1846
void ShaderBagPKCG::LoadGenListShader(int ndoglev, int nlev)
1847
{
1848

    
1849
        //the V2 algorithms are only slightly faster, but way more complicated
1850
        //LoadGenListShaderV2(ndoglev, nlev); return; 
1851
        ProgramCG * program;
1852

    
1853
        s_genlist_init_tight = new ProgramCG(
1854
        "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1855
        "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1856
        "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1857
        "{\n"
1858
        "        float4 data = float4(        texRECT(tex, TexCoord0.xy).r,\n"
1859
        "                                                        texRECT(tex, TexCoord1.xy).r,\n"
1860
        "                                                        texRECT(tex, TexCoord2.xy).r,\n"
1861
        "                                                        texRECT(tex, TexCoord3.xy).r);\n"
1862
        "        FragColor = float4(data != 0);\n"
1863
        "}");
1864

    
1865
        s_genlist_init_ex = program = new ProgramCG(
1866
        "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
1867
        "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
1868
        "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1869
        "out float4 FragColor : COLOR0)\n"
1870
        "{\n"
1871
        "        bool4 helper1 = abs(texRECT(tex, TexCoord0.xy).r)== float4(1.0, 2.0, 3.0, 4.0); \n"
1872
        "        bool4 helper2 = abs(texRECT(tex, TexCoord1.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1873
        "        bool4 helper3 = abs(texRECT(tex, TexCoord2.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1874
        "        bool4 helper4 = abs(texRECT(tex, TexCoord3.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"
1875
        "        bool4 bx1 = TexCoord0.xxyy < bbox; \n"
1876
        "        bool4 bx4 = TexCoord3.xxyy < bbox; \n"
1877
        "        bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
1878
        "        bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
1879
        "        helper1 = (bx1.xyxy && bx1.zzww && helper1);\n"
1880
        "        helper2 = (bx2.xyxy && bx2.zzww && helper2);\n"
1881
        "        helper3 = (bx3.xyxy && bx3.zzww && helper3);\n"
1882
        "        helper4 = (bx4.xyxy && bx4.zzww && helper4);\n"
1883
        "        FragColor.r = any(helper1.xy || helper1.zw);        \n"
1884
        "        FragColor.g = any(helper2.xy || helper2.zw);        \n"
1885
        "        FragColor.b = any(helper3.xy || helper3.zw);        \n"
1886
        "        FragColor.a = any(helper4.xy || helper4.zw);        \n"
1887
        "}");
1888
        _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
1889

    
1890
        s_genlist_end = program = new ProgramCG(
1891
                GlobalUtil::_KeepExtremumSign == 0 ? 
1892
        
1893
        "void main(        uniform samplerRECT tex, uniform samplerRECT ktex,\n"
1894
        "                        in float4 tpos : TEXCOORD0,        out float4 FragColor : COLOR0)\n"
1895
        "{\n"
1896
        "        float4 tc = texRECT( tex, tpos.xy);\n"
1897
        "        float2 pos = tc.rg; float index = tc.b;\n"
1898
        "        float4 tk = texRECT( ktex, pos); \n"
1899
        "        float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
1900
        "        float2 opos; \n"
1901
        "        opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
1902
        "        opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
1903
        "        FragColor = float4(opos + pos + pos + tk.yz, 1.0, tk.w);\n"
1904
        "}" : 
1905
        
1906
        "void main(        uniform samplerRECT tex, uniform samplerRECT ktex,\n"
1907
        "                        in float4 tpos : TEXCOORD0,        out float4 FragColor : COLOR0)\n"
1908
        "{\n"
1909
        "        float4 tc = texRECT( tex, tpos.xy);\n"
1910
        "        float2 pos = tc.rg; float index = tc.b;\n"
1911
        "        float4 tk = texRECT( ktex, pos); \n"
1912
        "        float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"
1913
        "        float2 opos; \n"
1914
        "        opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
1915
        "        opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
1916
        "        FragColor = float4(opos + pos + pos + tk.yz, sign(tk.x), tk.w);\n"
1917
        "}"        
1918
        );
1919
        _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
1920

    
1921
        //reduction ...
1922
        s_genlist_histo = new ProgramCG(
1923
        "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
1924
        "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
1925
        "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1926
        "{\n"
1927
        "        float4 helper; float4 helper2; \n"
1928
        "        helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"
1929
        "        helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"
1930
        "        FragColor.rg = helper2.xz + helper2.yw;\n"
1931
        "        helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"
1932
        "        helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"
1933
        "        FragColor.ba= helper2.xz+helper2.yw;\n"
1934
        "}");
1935

    
1936

    
1937
        //read of the first part, which generates tex coordinates 
1938

    
1939
        s_genlist_start= program =  ShaderBagCG::LoadGenListStepShader(1, 1);
1940
        _param_ftex_width= cgGetNamedParameter(*program, "width");
1941
        _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
1942
        //stepping
1943
        s_genlist_step = program = ShaderBagCG::LoadGenListStepShader(0, 1);
1944
        _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
1945
        _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
1946

    
1947

    
1948
}
1949

    
1950

    
1951

    
1952
void ShaderBagPKCG::LoadGenListShaderV2(int ndoglev, int nlev)
1953
{
1954
        ProgramCG * program;
1955

    
1956
        s_genlist_init_tight = new ProgramCG(
1957
        "void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"
1958
        "in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"
1959
        "in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
1960
        "{\n"
1961
        "        float4 data1 = texRECT(tex, TexCoord0.xy);\n"
1962
        "        float4 data2 = texRECT(tex, TexCoord1.xy);\n"
1963
        "        float4 data3 = texRECT(tex, TexCoord2.xy);\n"
1964
        "        float4 data4 = texRECT(tex, TexCoord3.xy);\n"
1965
        "        bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
1966
        "        bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1967
        "        bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1968
        "        bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
1969
        "        FragColor.r = any(helper1.xy || helper1.zw);        \n"
1970
        "        FragColor.g = any(helper2.xy || helper2.zw);        \n"
1971
        "        FragColor.b = any(helper3.xy || helper3.zw);        \n"
1972
        "        FragColor.a = any(helper4.xy || helper4.zw);        \n"
1973
        "        if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
1974
        "        {\n"
1975
        "                //use a special method if there is only one in the 16, \n"
1976
        "                float4 data, helper; float2 pos, opos; \n"
1977
        "                if(FragColor.r){ \n"
1978
        "                        data = data1; helper = helper1; pos = TexCoord0.xy;\n"
1979
        "                }else if(FragColor.g){\n"
1980
        "                        data = data2; helper = helper2; pos = TexCoord1.xy;\n"
1981
        "                }else if(FragColor.b){\n"
1982
        "                        data = data3; helper = helper3; pos = TexCoord2.xy;\n"
1983
        "                }else{\n"
1984
        "                        data = data4; helper = helper4; pos = TexCoord3.xy;\n"
1985
        "                }\n"
1986
        "                opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
1987
        "                opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
1988
        "                FragColor = float4( pos + pos + opos + data.yz, -1, data.w); \n"
1989
        "        }\n"
1990
        "}");
1991

    
1992
        s_genlist_init_ex = program = new ProgramCG(
1993
        "void main (uniform float4 bbox, uniform samplerRECT tex, \n"
1994
        "in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"
1995
        "in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"
1996
        "out float4 FragColor : COLOR0)\n"
1997
        "{\n"
1998
        "        float4 data1 = texRECT(tex, TexCoord0.xy);\n"
1999
        "        float4 data2 = texRECT(tex, TexCoord1.xy);\n"
2000
        "        float4 data3 = texRECT(tex, TexCoord2.xy);\n"
2001
        "        float4 data4 = texRECT(tex, TexCoord3.xy);\n"
2002
        "        bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"
2003
        "        bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2004
        "        bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2005
        "        bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"
2006
        "        bool4 bx1 = TexCoord0.xxyy < bbox; \n"
2007
        "        bool4 bx4 = TexCoord3.xxyy < bbox; \n"
2008
        "        bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"
2009
        "        bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"
2010
        "        helper1 = bx1.xyxy && bx1.zzww && helper1; \n"
2011
        "        helper2 = bx2.xyxy && bx2.zzww && helper2; \n"
2012
        "        helper3 = bx3.xyxy && bx3.zzww && helper3; \n"
2013
        "        helper4 = bx4.xyxy && bx4.zzww && helper4; \n"
2014
        "        FragColor.r = any(helper1.xy || helper1.zw);        \n"
2015
        "        FragColor.g = any(helper2.xy || helper2.zw);        \n"
2016
        "        FragColor.b = any(helper3.xy || helper3.zw);        \n"
2017
        "        FragColor.a = any(helper4.xy || helper4.zw);        \n"
2018
        "        if(dot(FragColor, float4(1,1,1,1)) == 1) \n"
2019
        "        {\n"
2020
        "                //use a special method if there is only one in the 16, \n"
2021
        "                float4 data, helper; bool4 bhelper; float2 pos, opos; \n"
2022
        "                if(FragColor.r){ \n"
2023
        "                        data = data1; bhelper = helper1; pos = TexCoord0.xy;\n"
2024
        "                }else if(FragColor.g){\n"
2025
        "                        data = data2; bhelper = helper2; pos = TexCoord1.xy;\n"
2026
        "                }else if(FragColor.b){\n"
2027
        "                        data = data3; bhelper = helper3; pos = TexCoord2.xy;\n"
2028
        "                }else{\n"
2029
        "                        data = data4; bhelper = helper4; pos = TexCoord3.xy;\n"
2030
        "                }\n"
2031
        "                helper = float4(bhelper); \n"
2032
        "                opos.x = dot(helper, float4(-0.5, 0.5, -0.5, 0.5));\n"
2033
        "                opos.y = dot(helper, float4(-0.5, -0.5, 0.5, 0.5));\n"
2034
        "                FragColor = float4(pos + pos + opos + data.yz, -1, data.w); \n"
2035
        "        }\n"
2036
        "}");
2037
        _param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");
2038

    
2039
        s_genlist_end = program = new ProgramCG(
2040
        
2041
        "void main(        uniform samplerRECT tex, uniform samplerRECT ktex,\n"
2042
        "                        in float4 tpos : TEXCOORD0,        out float4 FragColor : COLOR0)\n"
2043
        "{\n"
2044
        "        float4 tc = texRECT( tex, tpos.xy);\n"
2045
        "        float2 pos = tc.rg; float index = tc.b;\n"
2046
        "        if(index == -1)\n"
2047
        "        {\n"
2048
        "                FragColor = float4(tc.xy, 0, tc.w);\n"
2049
        "        }else\n"
2050
        "        {\n"
2051
        "                float4 tk = texRECT( ktex, pos); \n"
2052
        "                float4 keys = float4(abs(tk.r) == float4(1.0, 2.0, 3.0, 4.0)); \n"
2053
        "                float2 opos; \n"
2054
        "                opos.x = dot(keys, float4(-0.5, 0.5, -0.5, 0.5));\n"
2055
        "                opos.y = dot(keys, float4(-0.5, -0.5, 0.5, 0.5));\n"
2056
        "                FragColor = float4(opos + pos + pos + tk.yz, 0, tk.w);\n"
2057
        "        }\n"
2058
        "}");
2059
        _param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");
2060

    
2061
        //reduction ...
2062
        s_genlist_histo = new ProgramCG(
2063
        "void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"
2064
        "in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"
2065
        "in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"
2066
        "{\n"
2067
        "        float4 helper[4]; float4 helper2; \n"
2068
        "        helper[0] = texRECT(tex, TexCoord0); helper2.xy = helper[0].xy + helper[0].zw; \n"
2069
        "        helper[1] = texRECT(tex, TexCoord1); helper2.zw = helper[1].xy + helper[1].zw; \n"
2070
        "        FragColor.rg = helper2.xz + helper2.yw;\n"
2071
        "        helper[2] = texRECT(tex, TexCoord2); helper2.xy = helper[2].xy + helper[2].zw; \n"
2072
        "        helper[3] = texRECT(tex, TexCoord3); helper2.zw = helper[3].xy + helper[3].zw; \n"
2073
        "        FragColor.ba= helper2.xz+helper2.yw;\n"
2074
        "        bool4 keyt = float4(helper[0].z, helper[1].z, helper[2].z, helper[3].z) == -1.0; \n"
2075
        "        float keyc = dot(float4(keyt), float4(1,1,1,1)); \n"
2076
        "        if(keyc == 1.0 && dot(FragColor, float4(1,1,1,1)) == -1.0) \n"
2077
        "        {\n"
2078
        "                if(keyt.x) FragColor = helper[0];\n"
2079
        "                else if(keyt.y) FragColor = helper[1]; \n"
2080
        "                else if(keyt.z) FragColor = helper[2]; \n"
2081
        "                else FragColor = helper[3]; \n"
2082
        "        }else\n"
2083
        "        {\n"
2084
        "                FragColor = keyt? float4(1,1,1,1) : FragColor;\n"
2085
        "        }\n"
2086
        "}");
2087

    
2088
        //read of the first part, which generates tex coordinates 
2089

    
2090
        s_genlist_start= program =  ShaderBagCG::LoadGenListStepShaderV2(1, 1);
2091
        _param_ftex_width= cgGetNamedParameter(*program, "width");
2092
        _param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");
2093
        //stepping
2094
        s_genlist_step = program = ShaderBagCG::LoadGenListStepShaderV2(0, 1);
2095
        _param_genlist_step_tex= cgGetNamedParameter(*program, "tex");
2096
        _param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");
2097

    
2098

    
2099
}
2100

    
2101

    
2102

    
2103
ProgramCG* ShaderBagCG::LoadGenListStepShaderV2(int start, int step)
2104
{
2105
        int i;
2106
        char buffer[10240];
2107
        //char chanels[5] = "rgba";
2108
        ostrstream out(buffer, 10240);
2109
        out<<"void main(out float4 FragColor : COLOR0, \n";
2110

    
2111
        for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n";
2112

    
2113
        if(start)
2114
        {
2115
                out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";
2116
                out<<"float  index = floor(tpos.y) * width + floor(tpos.x);\n";
2117
                out<<"float2 pos = float2(0.5, 0.5);\n";
2118
        }else
2119
        {
2120
                out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";
2121
                out<<"float4 tc = texRECT( tex, tpos);\n";
2122
                out<<"float2 pos = tc.rg; float index = tc.b;\n";
2123
                out<<"if(index==-1) {FragColor = tc; return;}\n";
2124
        }
2125
        out<<"float2 sum;         float4 cc;\n";
2126

    
2127

    
2128

    
2129
        if(step>0)
2130
        {
2131
                out<<"float2 cpos = float2(-0.5, 0.5);\t float2 opos;\n";
2132
                for(i = 0; i < step; i++)
2133
                {
2134

    
2135
                        out<<"cc = texRECT(tex"<<i<<", pos);\n";
2136
                        out<<"if(cc.z == -1){FragColor = cc; return;}";
2137
                        out<<"sum.x = cc.r + cc.g;if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index -= cc.r;}}\n";
2138
                        out<<"else {index -= sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index -= cc.b;}}";
2139
                        out<<"pos = (pos + pos + opos);\n";
2140
                }
2141
        }
2142
        out<<"FragColor = float4(pos, index, 1);\n";
2143
        out<<"}\n"<<'\0';
2144
        return new ProgramCG(buffer);
2145
}
2146

    
2147

    
2148
void ShaderBagPKCG:: LoadKeypointShader(float threshold, float edge_threshold)
2149
{
2150
        //
2151
        ProgramCG * program;
2152
        char buffer[10240];
2153
        float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f);
2154
        float threshold1 = threshold;
2155
        float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold;
2156
        ostrstream out(buffer, 10240);
2157
        out<<setprecision(8);
2158
        //tex(X)(Y)
2159
        //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)  
2160
        //Y: (CDU) (CENTER 0, DOWN -1, UP    +1) 
2161
        out <<        "#define THRESHOLD0 " << threshold0 << "\n"
2162
                        "#define THRESHOLD1 " << threshold1 << "\n"
2163
                        "#define THRESHOLD2 " << threshold2 << "\n";
2164

    
2165
        out<<
2166
        "void main (\n"
2167
        "float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"
2168
        "float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"
2169
        "float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"
2170
        "float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"
2171
        "out float4 FragData0 : COLOR0, uniform samplerRECT tex, \n"
2172
        "uniform samplerRECT texU, uniform samplerRECT texD)\n"
2173
        "{\n"
2174
        "        float2 TexRU = float2(TexRC.x, TexCU.y); \n"
2175
        "        float4 ccc = texRECT(tex, TexCC.xy);\n"
2176
        "        float4 clc = texRECT(tex, TexLC.xy);\n"
2177
        "        float4 crc = texRECT(tex, TexRC.xy);\n"
2178
        "        float4 ccd = texRECT(tex, TexCD.xy);\n"
2179
        "        float4 ccu = texRECT(tex, TexCU.xy);\n"
2180
        "        float4 cld = texRECT(tex, TexLD.xy);\n"
2181
        "        float4 clu = texRECT(tex, TexLU.xy);\n"
2182
        "        float4 crd = texRECT(tex, TexRD.xy);\n"
2183
        "        float4 cru = texRECT(tex, TexRU.xy);\n"
2184
        "        float4  cc = ccc;\n"
2185
        "        float4  v1[4], v2[4];\n"
2186
        "        v1[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2187
        "        v1[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2188
        "        v1[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2189
        "        v1[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2190
        "        v2[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2191
        "        v2[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2192
        "        v2[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2193
        "        v2[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2194

    
2195
        //test against 8 neighbours
2196
        //use variable to identify type of extremum
2197
        //1.0 for local maximum and -1.0 for minimum
2198
        <<
2199
        "        float4 key ={0, 0, 0, 0}; \n"
2200
        "        for(int i = 0; i < 4; i++)\n"
2201
        "        {\n"
2202
        "                bool4 test1 = cc[i] > max(v1[i], v2[i]), test2 = cc[i] < min(v1[i], v2[i]);\n"
2203
        "                key[i] = cc[i] > THRESHOLD0 && all(test1.xy&&test1.zw)?1.0: 0.0;\n"
2204
        "                key[i] = cc[i] < -THRESHOLD0 && all(test2.xy&&test2.zw)? -1.0: key[i];\n"
2205
        "        }\n"
2206
        "        if(TexCC.x < 1.0) {key.rb = 0;}\n"
2207
        "        if(TexCC.y < 1.0) {key.rg = 0;}\n"
2208
        "        FragData0 = float4(0.0);\n"
2209
        "        if(all(key == 0.0)) return; \n";
2210

    
2211
        //do edge supression first.. 
2212
        //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
2213
        //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
2214

    
2215
        out<<
2216
        "        float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
2217
        "        for(int i = 0; i < 4; i++) \n"
2218
        "        {\n"
2219
        "                if(key[i] != 0)\n"
2220
        "                {\n"
2221
        "                        float4 D2 = v1[i].xyzw - cc[i];\n"
2222
        "                        float2 D4 = v2[i].xw - v2[i].yz;\n"
2223
        "                        float2 D5 = 0.5*(v1[i].yw-v1[i].xz); \n"
2224
        "                        fx[i] = D5.x;\n"
2225
        "                        fy[i] = D5.y ;\n"
2226
        "                        fxx[i] = D2.x + D2.y;\n"
2227
        "                        fyy[i] = D2.z + D2.w;\n"
2228
        "                        fxy[i] = 0.25*(D4.x + D4.y);\n"
2229
        "                        float fxx_plus_fyy = fxx[i] + fyy[i];\n"
2230
        "                        float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
2231
        "                        float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\n"
2232
        "                        if( score_down <= 0 || score_up > THRESHOLD2 * score_down)key[i] = 0;\n"
2233
        "                }\n"
2234
        "        }\n"
2235
        "        if(all(key == 0.0)) return; \n\n";
2236

    
2237
        ////////////////////////////////////////////////
2238
        //read 9 pixels of upper/lower level
2239
        out<<
2240
        "        float4  v4[4], v5[4], v6[4];\n"
2241
        "        ccc = texRECT(texU, TexCC.xy);\n"
2242
        "        clc = texRECT(texU, TexLC.xy);\n"
2243
        "        crc = texRECT(texU, TexRC.xy);\n"
2244
        "        ccd = texRECT(texU, TexCD.xy);\n"
2245
        "        ccu = texRECT(texU, TexCU.xy);\n"
2246
        "        cld = texRECT(texU, TexLD.xy);\n"
2247
        "        clu = texRECT(texU, TexLU.xy);\n"
2248
        "        crd = texRECT(texU, TexRD.xy);\n"
2249
        "        cru = texRECT(texU, TexRU.xy);\n"
2250
        "        float4  cu = ccc;\n"
2251
        "        v4[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2252
        "        v4[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2253
        "        v4[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2254
        "        v4[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2255
        "        v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2256
        "        v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2257
        "        v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2258
        "        v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2259
        <<
2260
        "        for(int i = 0; i < 4; i++)\n"
2261
        "        {\n"
2262
        "                if(key[i] == 1.0)\n"
2263
        "                {\n"
2264
        "                        bool4 test = cc[i]< max(v4[i], v6[i]); \n"
2265
        "                        if(cc[i] < cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2266
        "                }else if(key[i] == -1.0)\n"
2267
        "                {\n"
2268
        "                        bool4 test = cc[i]> min( v4[i], v6[i]); \n"
2269
        "                        if(cc[i] > cu[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2270
        "                }\n"
2271
        "        }\n"
2272
        "        if(all(key == 0.0)) return; \n"
2273
        <<
2274
        "        ccc = texRECT(texD, TexCC.xy);\n"
2275
        "        clc = texRECT(texD, TexLC.xy);\n"
2276
        "        crc = texRECT(texD, TexRC.xy);\n"
2277
        "        ccd = texRECT(texD, TexCD.xy);\n"
2278
        "        ccu = texRECT(texD, TexCU.xy);\n"
2279
        "        cld = texRECT(texD, TexLD.xy);\n"
2280
        "        clu = texRECT(texD, TexLU.xy);\n"
2281
        "        crd = texRECT(texD, TexRD.xy);\n"
2282
        "        cru = texRECT(texD, TexRU.xy);\n"
2283
        "        float4  cd = ccc;\n"
2284
        "        v5[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"
2285
        "        v5[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"
2286
        "        v5[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"
2287
        "        v5[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"
2288
        "        v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"
2289
        "        v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"
2290
        "        v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"
2291
        "        v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"
2292
        <<
2293
        "        for(int i = 0; i < 4; i++)\n"
2294
        "        {\n"
2295
        "                if(key[i] == 1.0)\n"
2296
        "                {\n"
2297
        "                        bool4 test = cc[i]< max(v5[i], v6[i]);\n"
2298
        "                        if(cc[i] < cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2299
        "                }else if(key[i] == -1.0)\n"
2300
        "                {\n"
2301
        "                        bool4 test = cc[i]>min(v5[i],v6[i]);\n"
2302
        "                        if(cc[i] > cd[i] || any(test.xy||test.zw))key[i] = 0.0; \n"
2303
        "                }\n"
2304
        "        }\n"
2305
        "        float keysum = dot(abs(key), float4(1, 1, 1, 1)) ;\n"
2306
        "        //assume there is only one keypoint in the four. \n"
2307
        "        if(keysum != 1.0) return;        \n";
2308

    
2309
        //////////////////////////////////////////////////////////////////////
2310
        if(GlobalUtil::_SubpixelLocalization)
2311

    
2312
        out <<
2313
        "        float3 offset = float3(0, 0, 0); \n"
2314
        "        /*The unrolled follwing loop is faster than a dynamic indexing version.*/\n"
2315
        "        for(int idx = 1; idx < 4; idx++)\n"
2316
        "        {\n"
2317
        "                if(key[idx] != 0) \n"
2318
        "                {\n"
2319
        "                        cu[0] = cu[idx];        cd[0] = cd[idx];        cc[0] = cc[idx];        \n"
2320
        "                        v4[0] = v4[idx];        v5[0] = v5[idx];                                                \n"
2321
        "                        fxy[0] = fxy[idx];        fxx[0] = fxx[idx];        fyy[0] = fyy[idx];        \n"
2322
        "                        fx[0] = fx[idx];        fy[0] = fy[idx];                                                \n"
2323
        "                }\n"
2324
        "        }\n"
2325
        <<
2326
                
2327
        "        float fs = 0.5*( cu[0] - cd[0] );                                \n"
2328
        "        float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
2329
        "        float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
2330
        "        float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
2331
        "        float4 A0, A1, A2 ;                        \n"
2332
        "        A0 = float4(fxx[0], fxy[0], fxs, -fx[0]);        \n"
2333
        "        A1 = float4(fxy[0], fyy[0], fys, -fy[0]);        \n"
2334
        "        A2 = float4(fxs, fys, fss, -fs);        \n"
2335
        "        float3 x3 = abs(float3(fxx[0], fxy[0], fxs));                \n"
2336
        "        float maxa = max(max(x3.x, x3.y), x3.z);        \n"
2337
        "        if(maxa >= 1e-10 ) \n"
2338
        "        {                                                                                                \n"
2339
        "                if(x3.y ==maxa )                                                        \n"
2340
        "                {                                                                                        \n"
2341
        "                        float4 TEMP = A1; A1 = A0; A0 = TEMP;        \n"
2342
        "                }else if( x3.z == maxa )                                        \n"
2343
        "                {                                                                                        \n"
2344
        "                        float4 TEMP = A2; A2 = A0; A0 = TEMP;        \n"
2345
        "                }                                                                                        \n"
2346
        "                A0 /= A0.x;                                                                        \n"
2347
        "                A1 -= A1.x * A0;                                                        \n"
2348
        "                A2 -= A2.x * A0;                                                        \n"
2349
        "                float2 x2 = abs(float2(A1.y, A2.y));                \n"
2350
        "                if( x2.y > x2.x )                                                        \n"
2351
        "                {                                                                                        \n"
2352
        "                        float3 TEMP = A2.yzw;                                        \n"
2353
        "                        A2.yzw = A1.yzw;                                                \n"
2354
        "                        A1.yzw = TEMP;                                                        \n"
2355
        "                        x2.x = x2.y;                                                        \n"
2356
        "                }                                                                                        \n"
2357
        "                if(x2.x >= 1e-10) {                                                                \n"
2358
        "                        A1.yzw /= A1.y;                                                                \n"
2359
        "                        A2.yzw -= A2.y * A1.yzw;                                        \n"
2360
        "                        if(abs(A2.z) >= 1e-10) {\n"
2361
        "                                offset.z = A2.w /A2.z;                                    \n"
2362
        "                                offset.y = A1.w - offset.z*A1.z;                            \n"
2363
        "                                offset.x = A0.w - offset.z*A0.z - offset.y*A0.y;        \n"
2364
        "                                bool test = (abs(cc[0] + 0.5*dot(float3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"
2365
        "                                if(!test || any( abs(offset) >= 1.0)) return;\n"
2366
        "                        }\n"
2367
        "                }\n"
2368
        "        }\n"
2369
        <<"\n"
2370
        "        float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
2371
        "        FragData0 = float4(keyv,  offset);\n"
2372
        "}\n"        <<'\0';
2373

    
2374
        else out << "\n"
2375
        "        float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"
2376
        "        FragData0 =  float4(keyv, 0, 0, 0);\n"
2377
        "}\n"        <<'\0';
2378

    
2379
        s_keypoint = program = new ProgramCG(buffer);
2380
        //parameter
2381
        _param_dog_texu = cgGetNamedParameter(*program, "texU");
2382
        _param_dog_texd = cgGetNamedParameter(*program, "texD");
2383
}
2384

    
2385
void ShaderBagPKCG::LoadOrientationShader()
2386
{
2387
        char buffer[10240];
2388
        ostrstream out(buffer,10240);
2389

    
2390
        out<<"\n"
2391
        "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
2392
        "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
2393
        "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
2394
        "void main(uniform samplerRECT tex,        uniform samplerRECT gtex,                \n"
2395
        "                uniform samplerRECT otex,         uniform float4 size, in float2 TexCoord0 : TEXCOORD0,        \n"
2396
        "                out float4 FeatureData : COLOR0        ";
2397

    
2398
        //multi orientation output
2399
        //use one additional texture to store up to four orientations
2400
        //when we use one 32bit float to store two orientations, no extra texture is required
2401

    
2402
        if(GlobalUtil::_MaxOrientation >1  && GlobalUtil::_OrientationPack2 == 0)
2403
                out<<", out float4 OrientationData : COLOR1";
2404

    
2405

    
2406
        //use 9 float4 to store histogram of 36 directions
2407
        out<<")                \n"
2408
        "{                                                                                                        \n"
2409
        "        float4 bins[10];                                                                \n"
2410
        "        for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0);        \n"
2411
        "        float4 sift = texRECT(tex, TexCoord0);                \n"
2412
        "        float2 pos = sift.xy; \n"
2413
        "        bool orientation_mode = (size.z != 0);                \n"
2414
        "        float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
2415
        "        //bool fixed_orientation = (size.z < 0);                \n"
2416
        "        if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
2417
        "        float gsigma = sigma * GAUSSIAN_WF;                                \n"
2418
        "        float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF);        \n"
2419
        "        float2 dim = size.xy;                                                        \n"
2420
        "        float4 dist_threshold = float4(win.x*win.x+0.5);        \n"
2421
        "        float factor = -0.5/(gsigma*gsigma);                        \n"
2422
        "        float4 sz;        float2 spos;                                                \n"
2423
        "        //if(any(pos.xy <= 1)) discard;                                        \n"
2424
        "        sz.xy = max( pos - win, float2(2,2));                        \n"
2425
        "        sz.zw = min( pos + win, dim-3);                                \n"
2426
        "        sz = floor(sz*0.5) + 0.5; ";
2427
        //loop to get the histogram
2428

    
2429
        out<<"\n"
2430
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
2431
        "        {                                                                                                                                \n"
2432
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
2433
        "                {                                                                                                                        \n"
2434
        "                        float2 offset = 2* spos - pos - 0.5;                                        \n"
2435
        "                        float4 off = float4(offset, offset + 1);                                \n"
2436
        "                        float4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww;        \n"
2437
        "                        bool4 inside = distsq < dist_threshold;                                        \n"
2438
        "                        if(any(inside.xy||inside.zw))                                                        \n"
2439
        "                        {                                                                                                                \n"
2440
        "                                float4 gg = texRECT(gtex, spos);                                        \n"
2441
        "                                float4 oo = texRECT(otex, spos);                                        \n"
2442
        "                                float4 weight = gg * exp(distsq * factor);                        \n"
2443
        "                                float4 idxv  = floor(degrees(oo)*0.1);                                 \n"
2444
        "                                idxv = idxv<0? idxv + 36.0: idxv;                                        \n"
2445
        "                                float4 vidx = 4.0* fract(idxv * 0.25);//fmod(idxv, 4.0);\n";
2446

    
2447
        //
2448
        if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
2449
        //if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
2450
        {
2451
                //gp4fp supports dynamic indexing, but it might be slow on some GPUs
2452
                out<<"\n"
2453
        "                                for(int i = 0 ; i < 4; i++)\n"
2454
        "                                {\n"
2455
        "                                        if(inside[i])\n"
2456
        "                                        {\n"
2457
        "                                                float idx = idxv[i];                                                                \n"
2458
        "                                                float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3));        \n"
2459
        "                                                int iidx = int(floor(idx*0.25));        \n"
2460
        "                                                bins[iidx]+=inc;                                        \n"
2461
        "                                        }                                                                                \n"
2462
        "                                }                                                                                        \n"
2463
        "                        }                                                                                                \n"
2464
        "                }                                                                                                        \n"
2465
        "        }";
2466

    
2467
        }else
2468
        {
2469
                //nvfp40 still does not support dynamic array indexing
2470
                //unrolled binary search
2471
                //it seems to be faster than the dyanmic indexing version on some GPUs
2472
                out<<"\n"
2473
        "                                for(int i = 0 ; i < 4; i++)\n"
2474
        "                                {\n"
2475
        "                                        if(inside[i])\n"
2476
        "                                        {\n"
2477
        "                                                float idx = idxv[i];                                                                                        \n"
2478
        "                                                float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3));        \n"
2479
        "                                                if(idx < 16)                                                        \n"
2480
        "                                                {                                                                                \n"
2481
        "                                                        if(idx < 8)                                                        \n"
2482
        "                                                        {                                                                        \n"
2483
        "                                                                if(idx < 4)        {        bins[0]+=inc;}        \n"
2484
        "                                                                else                {        bins[1]+=inc;}        \n"
2485
        "                                                        }else                                                                \n"
2486
        "                                                        {                                                                        \n"
2487
        "                                                                if(idx < 12){        bins[2]+=inc;}        \n"
2488
        "                                                                else                {        bins[3]+=inc;}        \n"
2489
        "                                                        }                                                                        \n"
2490
        "                                                }else if(idx < 32)                                                \n"
2491
        "                                                {                                                                                \n"
2492
        "                                                        if(idx < 24)                                                \n"
2493
        "                                                        {                                                                        \n"
2494
        "                                                                if(idx <20)        {        bins[4]+=inc;}        \n"
2495
        "                                                                else                {        bins[5]+=inc;}        \n"
2496
        "                                                        }else                                                                \n"
2497
        "                                                        {                                                                        \n"
2498
        "                                                                if(idx < 28){        bins[6]+=inc;}        \n"
2499
        "                                                                else                {        bins[7]+=inc;}        \n"
2500
        "                                                        }                                                                        \n"
2501
        "                                                }else                                                 \n"
2502
        "                                                {                                                                                \n"
2503
        "                                                        bins[8]+=inc;                                                \n"
2504
        "                                                }                                                                                \n"
2505
        "                                        }                                                                                        \n"
2506
        "                                }                                                                                                \n"
2507
        "                        }                                                                                \n"
2508
        "                }                                                                                        \n"
2509
        "        }";
2510

    
2511
        }
2512

    
2513
        //reuse the code from the unpacked version..
2514
        ShaderBagCG::WriteOrientationCodeToStream(out);
2515

    
2516

    
2517
        ProgramCG * program;
2518
        s_orientation = program = new ProgramCG(buffer);
2519
        _param_orientation_gtex = cgGetNamedParameter(*program, "gtex");
2520
        _param_orientation_otex = cgGetNamedParameter(*program, "otex");
2521
        _param_orientation_size = cgGetNamedParameter(*program, "size");
2522

    
2523

    
2524
}
2525

    
2526
void ShaderBagPKCG::LoadDescriptorShader()
2527
{
2528
        GlobalUtil::_DescriptorPPT = 16;
2529
        LoadDescriptorShaderF2();
2530

    
2531
}
2532

    
2533
void ShaderBagPKCG::LoadDescriptorShaderF2()
2534
{
2535
        //one shader outpout 128/8 = 16 , each fragout encodes 4
2536
        //const double twopi = 2.0*3.14159265358979323846;
2537
        //const double rpi  = 8.0/twopi;
2538
        char buffer[10240];
2539
        ostrstream out(buffer, 10240);
2540

    
2541
        out<<setprecision(8);
2542

    
2543
        out<<"\n"
2544
        "#define M_PI 3.14159265358979323846\n"
2545
        "#define TWO_PI (2.0*M_PI)\n"
2546
        "#define RPI 1.2732395447351626861510701069801\n"
2547
        "#define WF size.z\n"
2548
        "void main(uniform samplerRECT tex,                \n"
2549
        "uniform        samplerRECT gtex,                                \n"
2550
        "uniform samplerRECT otex,                                \n"
2551
        "uniform float4                dsize,                                \n"
2552
        "uniform float3                size,                                \n"
2553
        "in                float2        TexCoord0 : TEXCOORD0,        \n"
2554
        "out                float4  FragData0:COLOR0,                \n"
2555
        "out                float4        FragData1:COLOR1)                \n"
2556
        "{\n"
2557
        "        float2 dim        = size.xy;        //image size                        \n"
2558
        "        float index = dsize.x*floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
2559
        "        float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5));                \n"
2560
        "        index = floor(index*0.125)+ 0.49;  \n"
2561
        "        float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
2562
        "        float2 pos = texRECT(tex, coord).xy;                \n"
2563
        "        if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
2564
        "        //discard;        \n"
2565
        "        { FragData0 = FragData1 = float4(0.0); return; }\n"
2566
        "        float anglef = texRECT(tex, coord).z;\n"
2567
        "        if(anglef > M_PI) anglef -= TWO_PI;\n"
2568
        "        float sigma = texRECT(tex, coord).w; \n"
2569
        "        float spt  = abs(sigma * WF);        //default to be 3*sigma        \n";
2570
        //rotation
2571
        out<<
2572
        "        float4 cscs, rots;                                                                \n"
2573
        "        sincos(anglef, cscs.y, cscs.x);                                        \n"
2574
        "        cscs.zw = - cscs.xy;                                                        \n"
2575
        "        rots = cscs /spt;                                                                \n"
2576
        "        cscs *= spt; \n";
2577

    
2578
        //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
2579
        //and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
2580
        //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
2581
        //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
2582
        out<<
2583
        "        float4 temp; float2 pt, offsetpt;                                \n"
2584
        "        /*the fraction part of idx is .5*/                        \n"
2585
        "        offsetpt.x = 4.0 * fract(idx * 0.25) - 2.0;                                \n"
2586
        "        offsetpt.y = floor(idx*0.25) - 1.5;                        \n"
2587
        "        temp = cscs.xwyx*offsetpt.xyxy;                                \n"
2588
        "        pt = pos + temp.xz + temp.yw;                                \n";
2589
        
2590
        //get a horizontal bounding box of the rotated rectangle
2591
        out<<
2592
        "        float2 bwin = abs(cscs.xy);                                        \n"
2593
        "        float bsz = bwin.x + bwin.y;                                        \n"
2594
        "        float4 sz;        float2 spos;                                        \n"
2595
        "        sz.xy = max(pt - bsz, float2(2,2));\n"
2596
        "        sz.zw = min(pt + bsz, dim - 3);                \n"
2597
        "        sz = floor(sz * 0.5) + 0.5;"; //move sample point to pixel center
2598
        //get voting for two box
2599

    
2600
        out<<"\n"
2601
        "        float4 DA, DB;                                                \n"
2602
        "        DA = DB  = float4(0, 0, 0, 0);                \n"
2603
        "        float4 nox = float4(0, rots.xy, rots.x + rots.y);                                        \n"
2604
        "        float4 noy = float4(0, rots.wx, rots.w + rots.x);                                        \n"
2605
        "        for(spos.y = sz.y; spos.y <= sz.w;        spos.y+=1.0)                                \n"
2606
        "        {                                                                                                                                \n"
2607
        "                for(spos.x = sz.x; spos.x <= sz.z;        spos.x+=1.0)                        \n"
2608
        "                {                                                                                                                        \n"
2609
        "                        float2 tpt = spos * 2.0 - pt - 0.5;                                        \n"
2610
        "                        float4 temp = rots.xywx * tpt.xyxy;                                                \n"
2611
        "                        float2 temp2 = temp.xz + temp.yw;                                                \n"
2612
        "                        float4 nx = temp2.x + nox;                                                                \n"
2613
        "                        float4 ny = temp2.y + noy;                        \n"
2614
        "                        float4 nxn = abs(nx), nyn = abs(ny);                                                \n"
2615
        "                        bool4 inside = (max(nxn, nyn) < 1.0);        \n"
2616
        "                        if(any(inside.xy || inside.zw))\n"
2617
        "                        {\n"
2618
        "                                float4 gg = texRECT(gtex, spos);\n"
2619
        "                                float4 oo = texRECT(otex, spos);\n"
2620
        "                                float4 theta0 = (anglef - oo)*RPI;\n"
2621
        "                                float4 theta = theta0 < 0? theta0 + 8.0 : theta0;//8.0 * frac(1.0 + 0.125 * theta0);//                 \n"
2622
        "                                float4 theta1 = floor(theta); \n"
2623
        "                                float4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y;        \n"
2624
        "                                float4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy ));        \n"
2625
        "                                float4 weight = (1 - nxn) * (1 - nyn) * gg * ww; \n"
2626
        "                                float4 weight2 = (theta - theta1) * weight;                                                        \n"
2627
        "                                float4 weight1 = weight - weight2;                                                \n"
2628
        "                                for(int i = 0;i < 4; i++)\n"
2629
        "                                {\n"
2630
        "                                        if(inside[i])\n"
2631
        "                                        {\n"
2632
        "                                                DA += float4(theta1[i] == float4(0, 1, 2, 3))*weight1[i];        \n"
2633
        "                                                DA += float4(theta1[i] == float4(7, 0, 1, 2))*weight2[i];        \n"
2634
        "                                                DB += float4(theta1[i] == float4(4, 5, 6, 7))*weight1[i];        \n"
2635
        "                                                DB += float4(theta1[i] == float4(3, 4, 5, 6))*weight2[i];        \n"
2636
        "                                        }\n"
2637
        "                                }\n"
2638
        "                        }\n"
2639
        "                }\n"
2640
        "        }\n";
2641
        out<<
2642
        "         FragData0 = DA; FragData1 = DB;\n"
2643
        "}\n"<<'\0';
2644
        ProgramCG * program; 
2645

    
2646
        s_descriptor_fp = program =  new ProgramCG(buffer);
2647
        _param_descriptor_gtex = cgGetNamedParameter(*program, "gtex");
2648
        _param_descriptor_otex = cgGetNamedParameter(*program, "otex");
2649
        _param_descriptor_size = cgGetNamedParameter(*program, "size");
2650
        _param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
2651

    
2652
}
2653

    
2654
void ShaderBagPKCG::SetMarginCopyParam(int xmax, int ymax)
2655
{
2656
        float truncate[4];
2657
        truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1)  >> 1) - 0.5f;
2658
        truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1)  >> 1) - 0.5f;
2659
        truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f;
2660
        truncate[3] = truncate[2] +  (((ymax % 2) == 1)? 0.0f : 2.0f);
2661
        cgGLSetParameter4fv(_param_margin_copy_truncate, truncate);
2662
}
2663

    
2664
void ShaderBagPKCG::SetGradPassParam(int texP)
2665
{
2666
        cgGLSetTextureParameter(_param_grad_pass_texp, texP);
2667
        cgGLEnableTextureParameter(_param_grad_pass_texp);
2668
}
2669

    
2670
void ShaderBagPKCG::SetGenListEndParam(int ktex)
2671
{
2672
        cgGLSetTextureParameter(_param_genlist_end_ktex, ktex);
2673
        cgGLEnableTextureParameter(_param_genlist_end_ktex);
2674
}
2675

    
2676
void ShaderBagPKCG::SetDogTexParam(int texU, int texD)
2677
{
2678
        cgGLSetTextureParameter(_param_dog_texu, texU);
2679
        cgGLEnableTextureParameter(_param_dog_texu);
2680
        cgGLSetTextureParameter(_param_dog_texd, texD);
2681
        cgGLEnableTextureParameter(_param_dog_texd);
2682
}
2683

    
2684
void ShaderBagPKCG::SetGenListInitParam(int w, int h)
2685
{
2686
        float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f,  (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f};
2687
        cgGLSetParameter4fv(_param_genlist_init_bbox, bbox);
2688
}
2689

    
2690

    
2691
void ShaderBagPKCG::SetGenListStartParam(float width, int tex0)
2692
{
2693
        cgGLSetParameter1f(_param_ftex_width, width);
2694

    
2695
        if(_param_genlist_start_tex0)
2696
        {
2697
                cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
2698
                cgGLEnableTextureParameter(_param_genlist_start_tex0);
2699
        }
2700
}
2701

    
2702

    
2703

    
2704
void ShaderBagPKCG::SetGenListStepParam(int tex, int tex0)
2705
{
2706
        cgGLSetTextureParameter(_param_genlist_step_tex, tex);
2707
        cgGLEnableTextureParameter(_param_genlist_step_tex);
2708
        cgGLSetTextureParameter(_param_genlist_step_tex0, tex0);
2709
        cgGLEnableTextureParameter(_param_genlist_step_tex0);
2710
}
2711

    
2712
void ShaderBagPKCG::SetGenVBOParam(float width, float fwidth, float size)
2713
{
2714
        float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
2715
        cgGLSetParameter4fv(_param_genvbo_size, sizes);
2716
}
2717

    
2718
void ShaderBagPKCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
2719
{
2720
        cgGLSetTextureParameter(_param_orientation_gtex, oTex);
2721
        cgGLEnableTextureParameter(_param_orientation_gtex);
2722
        cgGLSetParameter2f(_param_orientation_size, sigma, sigma_step);
2723
}
2724

    
2725

    
2726
void ShaderBagPKCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step)
2727
{
2728
        ///
2729
        cgGLSetTextureParameter(_param_orientation_gtex, gtex);        
2730
        cgGLEnableTextureParameter(_param_orientation_gtex);
2731
        cgGLSetTextureParameter(_param_orientation_otex, otex);        
2732
        cgGLEnableTextureParameter(_param_orientation_otex);
2733

    
2734
        float size[4];
2735
        size[0] = (float)width;
2736
        size[1] = (float)height;
2737
        size[2] = sigma;
2738
        size[3] = step;
2739
        cgGLSetParameter4fv(_param_orientation_size, size);
2740

    
2741
}
2742

    
2743
void ShaderBagPKCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
2744
{
2745
        ///
2746

    
2747
        cgGLSetTextureParameter(_param_descriptor_gtex, gtex);        
2748
        cgGLEnableTextureParameter(_param_descriptor_gtex);
2749
        cgGLSetTextureParameter(_param_descriptor_otex, otex);        
2750
        cgGLEnableTextureParameter(_param_descriptor_otex);
2751

    
2752

    
2753
        float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
2754
        cgGLSetParameter4fv(_param_descriptor_dsize, dsize);
2755
        float size[3];
2756
        size[0] = width;
2757
        size[1] = height;
2758
        size[2] = GlobalUtil::_DescriptorWindowFactor;
2759
        cgGLSetParameter3fv(_param_descriptor_size, size);
2760

    
2761

    
2762
}
2763

    
2764
#endif
2765