1 
//////////////////////////////////////////////////////////////////////////////


2 
// File: ProgramCG.cpp

3 
// Author: Changchang Wu

4 
// Description : implementation of cg related class.

5 
// class ProgramCG A simple wrapper of Cg programs

6 
// class ShaderBagCG cg shaders for SIFT

7 
// class FilterCGGL cg gaussian filters for SIFT

8 
//

9 
// Copyright (c) 2007 University of North Carolina at Chapel Hill

10 
// All Rights Reserved

11 
//

12 
// Permission to use, copy, modify and distribute this software and its

13 
// documentation for educational, research and nonprofit purposes, without

14 
// fee, and without a written agreement is hereby granted, provided that the

15 
// above copyright notice and the following paragraph appear in all copies.

16 
//

17 
// The University of North Carolina at Chapel Hill make no representations

18 
// about the suitability of this software for any purpose. It is provided

19 
// 'as is' without express or implied warranty.

20 
//

21 
// Please send BUG REPORTS to ccwu@cs.unc.edu

22 
//

23 
////////////////////////////////////////////////////////////////////////////

24  
25 
#if defined(CG_SIFTGPU_ENABLED)

26  
27 
#include "GL/glew.h" 
28  
29 
#include <iostream> 
30 
#include <iomanip> 
31 
#include <vector> 
32 
#include <strstream> 
33 
#include <algorithm> 
34 
#include <stdlib.h> 
35 
#include <math.h> 
36 
#include <string.h> 
37 
using namespace std; 
38  
39 
#include "GlobalUtil.h" 
40 
#include "ProgramCG.h" 
41 
#include "GLTexImage.h" 
42 
#include "ShaderMan.h" 
43 
#include "FrameBufferObject.h" 
44  
45  
46  
47 
#if defined(_WIN32)

48 
#pragma comment (lib, "../../lib/cg.lib") 
49 
#pragma comment (lib, "../../lib/cggl.lib") 
50 
#endif

51  
52 
CGcontext ProgramCG::_Context =0;

53 
CGprofile ProgramCG::_FProfile; 
54  
55 
//////////////////////////////////////////////////////////////////////

56 
// Construction/Destruction

57 
//////////////////////////////////////////////////////////////////////

58  
59 
ProgramCG::ProgramCG() 
60 
{ 
61 
_programID = NULL;

62 
} 
63  
64 
ProgramCG::~ProgramCG() 
65 
{ 
66 
if(_programID) cgDestroyProgram(_programID);

67 
} 
68  
69 
ProgramCG::ProgramCG(const char *code, const char** cg_compile_args, CGprofile profile) 
70 
{ 
71 
_valid = 0;

72 
_profile = profile; 
73 
GLint epos; 
74 
const char* ati_args[] = {"po", "ATI_draw_buffers",0}; 
75 
const char* fp40_args[] = {"ifcvt", "none","unroll", "all", GlobalUtil::_UseFastMath? "fastmath" : 0, 0}; 
76 
if(cg_compile_args == NULL) cg_compile_args = GlobalUtil::_IsNvidia? (GlobalUtil::_SupportFP40? fp40_args:NULL) : ati_args; 
77 
_programID = ::cgCreateProgram(_Context, CG_SOURCE, code, profile, NULL, cg_compile_args);

78 
if(_programID)

79 
{ 
80 
cgGLLoadProgram(_programID ); 
81 
//_texParamID = cgGetNamedParameter(_programID, "tex");

82  
83 
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos); 
84 
if(epos >=0) 
85 
{ 
86 
std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl; 
87 
std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl; 
88 
}else

89 
{ 
90 
_valid = 1;

91 
} 
92 
}else

93 
{ 
94 
std::cerr<<code<<endl; 
95 
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &epos); 
96 
if(epos >=0) 
97 
{ 
98 
std::cout<<cgGetProgramString(_programID, CG_COMPILED_PROGRAM)<<endl; 
99 
std::cerr<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl; 
100 
}else

101 
{ 
102 
std::cout<<glGetString(GL_PROGRAM_ERROR_STRING_ARB)<<endl; 
103 
} 
104 
} 
105  
106 
} 
107  
108 
void ProgramCG::ErrorCallback()

109 
{ 
110 
CGerror err = cgGetError(); 
111 
if(err)

112 
{ 
113 
std::cerr<< cgGetErrorString(err)<<endl; 
114 
} 
115 
} 
116  
117  
118 
void ProgramCG::InitContext()

119 
{ 
120 
if(_Context == 0) 
121 
{ 
122 
_Context = cgCreateContext(); 
123 

124 
/////////////

125 
_FProfile = cgGLGetLatestProfile(CG_GL_FRAGMENT); 
126 
cgGLSetOptimalOptions(_FProfile); 
127  
128 
if(GlobalUtil::_verbose) std::cout<<"Shader Profile: "<<cgGetProfileString(_FProfile)<<endl; 
129  
130 
cgSetErrorCallback(ErrorCallback); 
131 
} 
132 
} 
133  
134 
void ProgramCG::DestroyContext()

135 
{ 
136 
cgDestroyContext(_Context); 
137 
} 
138  
139 
ShaderBagCG::ShaderBagCG() 
140 
{ 
141 
ProgramCG::InitContext(); 
142 
} 
143  
144  
145 
int ProgramCG::UseProgram()

146 
{ 
147 
if(_programID)

148 
{ 
149 
cgGLEnableProfile(_profile); 
150 
cgGLBindProgram(_programID); 
151  
152 
return 1; 
153 
}else

154 
{ 
155 
return 0; 
156 
} 
157 
} 
158  
159 
void ShaderBagCG::UnloadProgram()

160 
{ 
161  
162 
cgGLUnbindProgram(ProgramCG::_FProfile); 
163 
cgGLDisableProfile(ProgramCG::_FProfile); 
164 
} 
165  
166  
167 
void ShaderBagCG::LoadFixedShaders()

168 
{ 
169 
// s_debug = new ProgramCG( "void main(float4 TexCoord0:TEXCOORD0, out float4 FragColor:COLOR0,"

170 
// "uniform samplerRECT tex){ gl_FragColor.rg = gl_TexCoord[0].st;}");

171  
172 
s_gray = new ProgramCG(

173 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

174 
"float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"

175 
"FragColor= float4(intensity, intensity, intensity, 1.0);}" );

176  
177  
178 
s_sampling = new ProgramCG(

179 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

180 
"float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(cc.rg, 0.0, 0.0); }" );

181  
182  
183 
s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}"); 
184  
185  
186 
ProgramCG * program; 
187 
s_margin_copy = program = new ProgramCG(

188 
"void main(float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"

189 
"uniform samplerRECT tex, uniform float2 truncate){\n"

190 
"FragColor = texRECT(tex, min(texCoord0.xy, truncate)); }");

191  
192 
_param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");

193  
194  
195 
s_grad_pass = new ProgramCG(

196 
"void main (\n"

197 
"float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"

198 
"float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"

199 
"out float4 FragData0 : COLOR0, uniform samplerRECT tex)\n"

200 
"{\n"

201 
" float4 v1, v2, gg;\n"

202 
" float4 cc = texRECT(tex, TexCC.xy);\n"

203 
" gg.x = texRECT(tex, TexLC.xy).r;\n"

204 
" gg.y = texRECT(tex, TexRC.xy).r;\n"

205 
" gg.z = texRECT(tex, TexCD.xy).r;\n"

206 
" gg.w = texRECT(tex, TexCU.xy).r;\n"

207 
" float2 dxdy = (gg.yw  gg.xz); \n"

208 
" float grad = 0.5*length(dxdy);\n"

209 
" float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"

210 
" FragData0 = float4(cc.rg, grad, theta);\n"

211 
"}\n\0");

212  
213  
214 
if(GlobalUtil::_SupportFP40)

215 
{ 
216 
//use the packing mode for cpu list reshape and two orientations

217 
if(GlobalUtil::_MaxOrientation != 2) GlobalUtil::_OrientationPack2 = 0; 
218  
219 
LoadOrientationShader(); 
220  
221  
222 
if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();

223  
224 
}else

225 
{ 
226 
s_orientation = program = new ProgramCG(

227 
"void main(out float4 FragColor : COLOR0, \n"

228 
" uniform samplerRECT fTex, uniform samplerRECT oTex, \n"

229 
" uniform float size, \n"

230 
" in float2 tpos : TEXCOORD0){\n"

231 
" float4 cc = texRECT(fTex, tpos);\n"

232 
" float4 oo = texRECT(oTex, cc.rg);\n"

233 
" FragColor = float4(cc.rg, oo.a, size);}");

234 
_param_orientation_gtex= cgGetNamedParameter(*program, "oTex");

235 
_param_orientation_size= cgGetNamedParameter(*program, "size");

236  
237  
238 
///

239 
GlobalUtil::_FullSupported = 0;

240 
GlobalUtil::_MaxOrientation = 0; //0 for simplified version 
241 
GlobalUtil::_DescriptorPPT = 0;

242 
std::cerr<<"Orientation simplified on this hardware"<<endl;

243 
std::cerr<<"Descriptor ignored on this hardware"<<endl;

244 
} 
245  
246  
247 
} 
248  
249 
void ShaderBagCG::LoadDisplayShaders()

250 
{ 
251 
s_copy_key = new ProgramCG(

252 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

253 
"FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1); }");

254  
255 
//shader used to write a vertex buffer object

256 
//which is used to draw the quads of each feature

257 
ProgramCG * program; 
258 
s_vertex_list = program = new ProgramCG(

259 
"void main(in float4 TexCoord0: TEXCOORD0,\n"

260 
"uniform float4 sizes, \n"

261 
"uniform samplerRECT tex, \n"

262 
"out float4 FragColor: COLOR0){\n"

263 
"float fwidth = sizes.y; \n"

264 
"float twidth = sizes.z; \n"

265 
"float rwidth = sizes.w; \n"

266 
"float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"

267 
"float px = fmod(index, twidth);\n"

268 
"float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"

269 
"float4 cc = texRECT(tex, tpos );\n"

270 
"float size = cc.a * 3.0f;//sizes.x;// \n"

271 
"FragColor.zw = float2(0.0, 1.0);\n"

272 
"if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"

273 
"{\n"

274 
" float type = frac(px);\n"

275 
" float2 dxy; float s, c;\n"

276 
" dxy.x = type < 0.1 ? 0 : ((type <0.5  type > 0.9)? size : size);\n"

277 
" dxy.y = type < 0.2 ? 0 : ((type < 0.3  type > 0.7 )? size :size); \n"

278 
" sincos(cc.b, s, c);\n"

279 
" FragColor.x = cc.x + c*dxy.xs*dxy.y;\n"

280 
" FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"

281 
"}\n\0");

282 
/*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/

283  
284 
_param_genvbo_size = cgGetNamedParameter(*program, "sizes");

285  
286  
287 
s_display_gaussian = new ProgramCG(

288 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

289 
"float r = texRECT(tex, TexCoord0.xy).r;\n"

290 
"FragColor = float4(r, r, r, 1.0);}");

291  
292  
293 
s_display_dog = new ProgramCG(

294 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

295 
"float g = (0.5+20.0*texRECT(tex, TexCoord0.xy).g);\n"

296 
"FragColor = float4(g, g, g, 1.0);}" );

297  
298  
299 
s_display_grad = new ProgramCG(

300 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

301 
"float4 cc = texRECT(tex, TexCoord0.xy); FragColor = float4(5.0 * cc.bbb, 1.0); }");

302  
303  
304 
s_display_keys= new ProgramCG(

305 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

306 
"float4 cc = texRECT(tex, TexCoord0.xy);\n"

307 
"if(cc.r ==1.0) FragColor = float4(1.0, 0, 0,1.0); \n"

308 
"else {if (cc.r ==0.5) FragColor = float4(0.0,1.0,0.0,1.0); else discard;}}");

309  
310 
} 
311  
312 
void ShaderBagCG::SetMarginCopyParam(int xmax, int ymax) 
313 
{ 
314 
float truncate[2] = {xmax  0.5f , ymax  0.5f}; 
315 
cgGLSetParameter2fv(_param_margin_copy_truncate, truncate); 
316 
} 
317  
318  
319 
int ShaderBagCG::LoadKeypointShaderMR(float threshold, float edge_threshold) 
320 
{ 
321 
char buffer[10240]; 
322 
float threshold0 = threshold * 0.8f; 
323 
float threshold1 = threshold;

324 
float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; 
325 
int max_refine = max(2, GlobalUtil::_SubpixelLocalization); 
326 
ostrstream out(buffer, 10240);

327  
328 
out << "#define THRESHOLD0 " << threshold0 << "\n" 
329 
"#define THRESHOLD1 " << threshold1 << "\n" 
330 
"#define THRESHOLD2 " << threshold2 << "\n" 
331 
"#define MAX_REFINE " << max_refine << "\n"; 
332 
out<< 
333 
"void main (\n"

334 
"float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"

335 
"float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"

336 
"float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"

337 
"float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"

338 
"out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"

339 
"uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"

340 
"{\n"

341 
" float4 v1, v2, gg;\n"

342 
" float2 TexRU = float2(TexRC.x, TexCU.y); \n"

343 
" float4 cc = texRECT(tex, TexCC.xy);\n"

344 
" v1.x = texRECT(tex, TexLC.xy).g;\n"

345 
" gg.x = texRECT(tex, TexLC.xy).r;\n"

346 
" v1.y = texRECT(tex, TexRC.xy).g;\n"

347 
" gg.y = texRECT(tex, TexRC.xy).r;\n"

348 
" v1.z = texRECT(tex, TexCD.xy).g;\n"

349 
" gg.z = texRECT(tex, TexCD.xy).r;\n"

350 
" v1.w = texRECT(tex, TexCU.xy).g;\n"

351 
" gg.w = texRECT(tex, TexCU.xy).r;\n"

352 
" v2.x = texRECT(tex, TexLD.xy).g;\n"

353 
" v2.y = texRECT(tex, TexLU.xy).g;\n"

354 
" v2.z = texRECT(tex, TexRD.xy).g;\n"

355 
" v2.w = texRECT(tex, TexRU.xy).g;\n"

356 
" float2 dxdy = 0.5*(gg.yw  gg.xz); \n"

357 
" float grad = length(dxdy);\n"

358 
" float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"

359 
" FragData0 = float4(cc.rg, grad, theta);\n"

360 
<< 
361 
" float dog = 0.0; \n"

362 
" FragData1 = float4(0, 0, 0, 0); \n"

363 
" float2 v3; float4 v4, v5, v6;\n"

364 
<< 
365 
" if( cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2)))\n"

366 
" {\n"

367 
" v3.x = texRECT(texU, TexCC.xy).g;\n"

368 
" v4.x = texRECT(texU, TexLC.xy).g;\n"

369 
" v4.y = texRECT(texU, TexRC.xy).g;\n"

370 
" v4.z = texRECT(texU, TexCD.xy).g;\n"

371 
" v4.w = texRECT(texU, TexCU.xy).g;\n"

372 
" v6.x = texRECT(texU, TexLD.xy).g;\n"

373 
" v6.y = texRECT(texU, TexLU.xy).g;\n"

374 
" v6.z = texRECT(texU, TexRD.xy).g;\n"

375 
" v6.w = texRECT(texU, TexRU.xy).g;\n"

376 
" if(cc.g < v3.x  any(cc.gggg<v4.xyzw  cc.gggg<v6.xyzw))return; \n"

377 
" v3.y = texRECT(texD, TexCC.xy).g;\n"

378 
" v5.x = texRECT(texD, TexLC.xy).g;\n"

379 
" v5.y = texRECT(texD, TexRC.xy).g;\n"

380 
" v5.z = texRECT(texD, TexCD.xy).g;\n"

381 
" v5.w = texRECT(texD, TexCU.xy).g;\n"

382 
" v6.x = texRECT(texD, TexLD.xy).g;\n"

383 
" v6.y = texRECT(texD, TexLU.xy).g;\n"

384 
" v6.z = texRECT(texD, TexRD.xy).g;\n"

385 
" v6.w = texRECT(texD, TexRU.xy).g;\n"

386 
" if(cc.g < v3.y  any(cc.gggg<v5.xyzw  cc.gggg<v6.xyzw))return; \n"

387 
" dog = 1.0; \n"

388 
" }\n"

389 
//the minimum case

390 
<< 
391 
" else if(cc.g < THRESHOLD0 && all(cc.gggg < min(v1, v2)))\n"

392 
" {\n"

393 
" v3.x = texRECT(texU, TexCC.xy).g;\n"

394 
" v4.x = texRECT(texU, TexLC.xy).g;\n"

395 
" v4.y = texRECT(texU, TexRC.xy).g;\n"

396 
" v4.z = texRECT(texU, TexCD.xy).g;\n"

397 
" v4.w = texRECT(texU, TexCU.xy).g;\n"

398 
" v6.x = texRECT(texU, TexLD.xy).g;\n"

399 
" v6.y = texRECT(texU, TexLU.xy).g;\n"

400 
" v6.z = texRECT(texU, TexRD.xy).g;\n"

401 
" v6.w = texRECT(texU, TexRU.xy).g;\n"

402 
" if(cc.g > v3.x  any(cc.gggg>v4.xyzw  cc.gggg>v6.xyzw))return; \n"

403 
" v3.y = texRECT(texD, TexCC.xy).g;\n"

404 
" v5.x = texRECT(texD, TexLC.xy).g;\n"

405 
" v5.y = texRECT(texD, TexRC.xy).g;\n"

406 
" v5.z = texRECT(texD, TexCD.xy).g;\n"

407 
" v5.w = texRECT(texD, TexCU.xy).g;\n"

408 
" v6.x = texRECT(texD, TexLD.xy).g;\n"

409 
" v6.y = texRECT(texD, TexLU.xy).g;\n"

410 
" v6.z = texRECT(texD, TexRD.xy).g;\n"

411 
" v6.w = texRECT(texD, TexRU.xy).g;\n"

412 
" if(cc.g > v3.y  any(cc.gggg>v5.xyzw  cc.gggg>v6.xyzw))return; \n"

413 
" dog = 0.5 ; \n"

414 
" }\n"

415 
" else\n"

416 
" return;\n"

417 
<< 
418 
" int i = 0; \n"

419 
" float2 offset = float2(0, 0);\n"

420 
" float2 offsets = float2(0, 0);\n"

421 
" float3 dxys; bool key_moved; \n"

422 
" float fx, fy, fs; \n"

423 
" float fxx, fyy, fxy; \n"

424 
" float fxs, fys, fss; \n"

425 
" do\n"

426 
" {\n"

427 
" dxys = float3(0, 0, 0);\n"

428 
" offset = float2(0, 0);\n"

429 
" float4 D2 = v1.xyzw  cc.gggg;\n"

430 
" fxx = D2.x + D2.y;\n"

431 
" fyy = D2.z + D2.w;\n"

432 
" float2 D4 = v2.xw  v2.yz;\n"

433 
" fxy = 0.25*(D4.x + D4.y);\n"

434 
" float2 D5 = 0.5*(v1.ywv1.xz); \n"

435 
" fx = D5.x;\n"

436 
" fy = D5.y ; \n"

437 
" fs = 0.5*( v3.x  v3.y ); \n"

438 
" fss = v3.x + v3.y  cc.g  cc.g;\n"

439 
" fxs = 0.25 * ( v4.y + v5.x  v4.x  v5.y);\n"

440 
" fys = 0.25 * ( v4.w + v5.z  v4.z  v5.w);\n"

441 
" float4 A0, A1, A2 ; \n"

442 
" A0 = float4(fxx, fxy, fxs, fx); \n"

443 
" A1 = float4(fxy, fyy, fys, fy); \n"

444 
" A2 = float4(fxs, fys, fss, fs); \n"

445 
" float3 x3 = abs(float3(fxx, fxy, fxs)); \n"

446 
" float maxa = max(max(x3.x, x3.y), x3.z); \n"

447 
" if(maxa > 1e10 ) \n"

448 
" {\n"

449 
" if(x3.y ==maxa ) \n"

450 
" { \n"

451 
" float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"

452 
" }else if( x3.z == maxa ) \n"

453 
" { \n"

454 
" float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"

455 
" } \n"

456 
" A0 /= A0.x; \n"

457 
" A1 = A1.x * A0; \n"

458 
" A2 = A2.x * A0; \n"

459 
" float2 x2 = abs(float2(A1.y, A2.y)); \n"

460 
" if( x2.y > x2.x ) \n"

461 
" { \n"

462 
" float3 TEMP = A2.yzw; \n"

463 
" A2.yzw = A1.yzw; \n"

464 
" A1.yzw = TEMP; \n"

465 
" x2.x = x2.y; \n"

466 
" } \n"

467 
" if(x2.x > 1e10) \n"

468 
" {\n"

469 
" A1.yzw /= A1.y; \n"

470 
" A2.yzw = A2.y * A1.yzw; \n"

471 
" if(abs(A2.z) > 1e10) \n"

472 
" {\n"

473 
// compute dx, dy, ds:

474 
<< 
475 
" dxys.z = A2.w /A2.z; \n"

476 
" dxys.y = A1.w  dxys.z*A1.z; \n"

477 
" dxys.x = A0.w  dxys.z*A0.z  dxys.y*A0.y; \n"

478 
" }\n"

479 
" }\n"

480 
" }\n"

481 
" offset.x = dxys.x > 0.6 ? 1 : 0 + dxys.x < 0.6 ? 1 : 0;\n"

482 
" offset.y = dxys.y > 0.6 ? 1 : 0 + dxys.y <  0.6? 1 : 0;\n"

483 
" i++; key_moved = i < MAX_REFINE && any(abs(offset)>0) ; \n"

484 
" if(key_moved)\n"

485 
" {\n"

486 
" offsets += offset; \n"

487 
" cc = texRECT(tex, TexCC.xy + offsets);\n"

488 
" v1.x = texRECT(tex , TexLC.xy + offsets).g;\n"

489 
" v1.y = texRECT(tex , TexRC.xy + offsets).g;\n"

490 
" v1.z = texRECT(tex , TexCD.xy + offsets).g;\n"

491 
" v1.w = texRECT(tex , TexCU.xy + offsets).g;\n"

492 
" v2.x = texRECT(tex , TexLD.xy + offsets).g;\n"

493 
" v2.y = texRECT(tex , TexLU.xy + offsets).g;\n"

494 
" v2.z = texRECT(tex , TexRD.xy + offsets).g;\n"

495 
" v2.w = texRECT(tex , TexRU.xy + offsets).g;\n"

496 
" v3.x = texRECT(texU, TexCC.xy + offsets).g;\n"

497 
" v4.x = texRECT(texU, TexLC.xy + offsets).g;\n"

498 
" v4.y = texRECT(texU, TexRC.xy + offsets).g;\n"

499 
" v4.z = texRECT(texU, TexCD.xy + offsets).g;\n"

500 
" v4.w = texRECT(texU, TexCU.xy + offsets).g;\n"

501 
" v3.y = texRECT(texD, TexCC.xy + offsets).g;\n"

502 
" v5.x = texRECT(texD, TexLC.xy + offsets).g;\n"

503 
" v5.y = texRECT(texD, TexRC.xy + offsets).g;\n"

504 
" v5.z = texRECT(texD, TexCD.xy + offsets).g;\n"

505 
" v5.w = texRECT(texD, TexCU.xy + offsets).g;\n"

506 
" }\n"

507 
" }while(key_moved);\n"

508 
<< 
509 
" bool test1 = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys ))> THRESHOLD1) ;\n"

510 
" float test2_v1= fxx*fyy  fxy *fxy; \n"

511 
" float test2_v2 = (fxx+fyy); \n"

512 
" test2_v2 = test2_v2*test2_v2;\n"

513 
" bool test2 = test2_v1>0 && test2_v2 < THRESHOLD2 * test2_v1; \n "

514 
//keep the point when the offset is less than 1

515 
<< 
516 
" FragData1 = test1 && test2 && all( abs(dxys) < 1)? float4( dog, dxys.xy+offsets, dxys.z) : float4(0, 0, 0, 0); \n"

517 
"}\n"

518 
<<'\0';

519  
520 
ProgramCG * program; 
521 
s_keypoint = program = new ProgramCG(buffer);

522 
//parameter

523 
_param_dog_texu = cgGetNamedParameter(*program, "texU");

524 
_param_dog_texd = cgGetNamedParameter(*program, "texD");

525  
526 
return 1; 
527  
528 
} 
529  
530 
//keypoint detection shader

531 
//1. compare with 26 neighbours

532 
//2. subpixel subscale localization

533 
//3. output: [dog, offset(x,y,s)]

534  
535 
void ShaderBagCG:: LoadKeypointShader(float threshold, float edge_threshold) 
536 
{ 
537 
char buffer[10240]; 
538 
float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); 
539 
float threshold1 = threshold;

540 
float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; 
541 
ostrstream out(buffer, 10240);

542 
out<<setprecision(8);

543 
streampos pos; 
544 
//tex(X)(Y)

545 
//X: (CLR) (CENTER 0, LEFT 1, RIGHT +1)

546 
//Y: (CDU) (CENTER 0, DOWN 1, UP +1)

547  
548 
out << "#define THRESHOLD0 " << threshold0 << "\n" 
549 
"#define THRESHOLD1 " << threshold1 << "\n" 
550 
"#define THRESHOLD2 " << threshold2 << "\n"; 
551 
out<< 
552 
"void main (\n"

553 
"float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"

554 
"float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"

555 
"float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"

556 
"float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"

557 
"out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"

558 
"uniform samplerRECT tex, uniform samplerRECT texU, uniform samplerRECT texD)\n"

559 
"{\n"

560 
" float4 v1, v2, gg;\n"

561 
" float2 TexRU = float2(TexRC.x, TexCU.y); \n"

562 
" float4 cc = texRECT(tex, TexCC.xy);\n"

563 
" v1.x = texRECT(tex, TexLC.xy).g;\n"

564 
" gg.x = texRECT(tex, TexLC.xy).r;\n"

565 
" v1.y = texRECT(tex, TexRC.xy).g;\n"

566 
" gg.y = texRECT(tex, TexRC.xy).r;\n"

567 
" v1.z = texRECT(tex, TexCD.xy).g;\n"

568 
" gg.z = texRECT(tex, TexCD.xy).r;\n"

569 
" v1.w = texRECT(tex, TexCU.xy).g;\n"

570 
" gg.w = texRECT(tex, TexCU.xy).r;\n"

571 
" v2.x = texRECT(tex, TexLD.xy).g;\n"

572 
" v2.y = texRECT(tex, TexLU.xy).g;\n"

573 
" v2.z = texRECT(tex, TexRD.xy).g;\n"

574 
" v2.w = texRECT(tex, TexRU.xy).g;\n"

575 
" float2 dxdy = (gg.yw  gg.xz); \n"

576 
" float grad = 0.5*length(dxdy);\n"

577 
" float theta = grad==0? 0: atan2(dxdy.y, dxdy.x);\n"

578 
" FragData0 = float4(cc.rg, grad, theta);\n"

579  
580 
//test against 8 neighbours

581 
//use variable to identify type of extremum

582 
//1.0 for local maximum and 0.5 for minimum

583 
<< 
584 
" float dog = 0.0; \n"

585 
" FragData1 = float4(0, 0, 0, 0); \n"

586 
" dog = cc.g > THRESHOLD0 && all(cc.gggg > max(v1, v2))?1.0: 0.0;\n"

587 
" dog = cc.g < THRESHOLD0 && all(cc.gggg < min(v1, v2))?0.5: dog;\n";

588  
589 
pos = out.tellp(); 
590 
//do edge supression first..

591 
//vector v1 is < (1, 0), (1, 0), (0,1), (0, 1)>

592 
//vector v2 is < (1,1), (1,1), (1,1), (1, 1)>

593  
594 
out<< 
595 
" if(dog == 0.0) return;\n"

596 
" float fxx, fyy, fxy; \n"

597 
" float4 D2 = v1.xyzw  cc.gggg;\n"

598 
" float2 D4 = v2.xw  v2.yz;\n"

599 
" fxx = D2.x + D2.y;\n"

600 
" fyy = D2.z + D2.w;\n"

601 
" fxy = 0.25*(D4.x + D4.y);\n"

602 
" float fxx_plus_fyy = fxx + fyy;\n"

603 
" float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"

604 
" float score_down = (fxx*fyy  fxy*fxy);\n"

605 
" if( score_down <= 0  score_up > THRESHOLD2 * score_down)return;\n"

606 
//...

607 
<< 
608 
" float2 D5 = 0.5*(v1.ywv1.xz); \n"

609 
" float fx = D5.x, fy = D5.y ; \n"

610 
" float fs, fss , fxs, fys ; \n"

611 
" float2 v3; float4 v4, v5, v6;\n"

612 
//read 9 pixels of upper level

613 
<< 
614 
" v3.x = texRECT(texU, TexCC.xy).g;\n"

615 
" v4.x = texRECT(texU, TexLC.xy).g;\n"

616 
" v4.y = texRECT(texU, TexRC.xy).g;\n"

617 
" v4.z = texRECT(texU, TexCD.xy).g;\n"

618 
" v4.w = texRECT(texU, TexCU.xy).g;\n"

619 
" v6.x = texRECT(texU, TexLD.xy).g;\n"

620 
" v6.y = texRECT(texU, TexLU.xy).g;\n"

621 
" v6.z = texRECT(texU, TexRD.xy).g;\n"

622 
" v6.w = texRECT(texU, TexRU.xy).g;\n"

623 
//compare with 9 pixels of upper level

624 
//read and compare with 9 pixels of lower level

625 
//the maximum case

626 
<< 
627 
" if(dog == 1.0)\n"

628 
" {\n"

629 
" bool4 test = cc.gggg < max(v4, v6); \n"

630 
" if(cc.g < v3.x  any(test.xytest.zw))return; \n"

631 
" v3.y = texRECT(texD, TexCC.xy).g;\n"

632 
" v5.x = texRECT(texD, TexLC.xy).g;\n"

633 
" v5.y = texRECT(texD, TexRC.xy).g;\n"

634 
" v5.z = texRECT(texD, TexCD.xy).g;\n"

635 
" v5.w = texRECT(texD, TexCU.xy).g;\n"

636 
" v6.x = texRECT(texD, TexLD.xy).g;\n"

637 
" v6.y = texRECT(texD, TexLU.xy).g;\n"

638 
" v6.z = texRECT(texD, TexRD.xy).g;\n"

639 
" v6.w = texRECT(texD, TexRU.xy).g;\n"

640 
" test = cc.gggg<max(v5, v6); \n"

641 
" if(cc.g < v3.y  any(test.xytest.zw))return; \n"

642 
" }\n"

643 
//the minimum case

644 
<< 
645 
" else{\n"

646 
" bool4 test = cc.gggg>min(v4, v6); \n"

647 
" if(cc.g > v3.x  any(test.xytest.zw))return; \n"

648 
" v3.y = texRECT(texD, TexCC.xy).g;\n"

649 
" v5.x = texRECT(texD, TexLC.xy).g;\n"

650 
" v5.y = texRECT(texD, TexRC.xy).g;\n"

651 
" v5.z = texRECT(texD, TexCD.xy).g;\n"

652 
" v5.w = texRECT(texD, TexCU.xy).g;\n"

653 
" v6.x = texRECT(texD, TexLD.xy).g;\n"

654 
" v6.y = texRECT(texD, TexLU.xy).g;\n"

655 
" v6.z = texRECT(texD, TexRD.xy).g;\n"

656 
" v6.w = texRECT(texD, TexRU.xy).g;\n"

657 
" test = cc.gggg>min(v5, v6); \n"

658 
" if(cc.g > v3.y  any(test.xytest.zw))return; \n"

659 
" }\n";

660  
661 
if(GlobalUtil::_SubpixelLocalization)

662  
663 
// subpixel localization FragData1 = float4(dog, 0, 0, 0); return;

664 
out << 
665 
" fs = 0.5*( v3.x  v3.y ); //bug fix 9/12/2007 \n"

666 
" fss = v3.x + v3.y  cc.g  cc.g;\n"

667 
" fxs = 0.25 * ( v4.y + v5.x  v4.x  v5.y);\n"

668 
" fys = 0.25 * ( v4.w + v5.z  v4.z  v5.w);\n"

669 

670 
/////////////////////////////////////////////////////////////////

671 
// let dog difference be quatratic function of dx, dy, ds;

672 
// df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +

673 
// + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)

674 
// + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)

675 
// (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)

676 

677 
//the local extremum satisfies

678 
// df/dx = 0, df/dy = 0, df/dz = 0

679 

680 
//that is

681 
// fx  fxx fxy fxs  dx

682 
// fy =  fxy fyy fys  * dy

683 
// fs  fxs fys fss  ds

684 
// need to solve dx, dy, ds

685  
686 
// Use Gauss elimination to solve the linear system

687 
<< 
688 
" float3 dxys = float3(0.0); \n"

689 
" float4 A0, A1, A2 ; \n"

690 
" A0 = float4(fxx, fxy, fxs, fx); \n"

691 
" A1 = float4(fxy, fyy, fys, fy); \n"

692 
" A2 = float4(fxs, fys, fss, fs); \n"

693 
" float3 x3 = abs(float3(fxx, fxy, fxs)); \n"

694 
" float maxa = max(max(x3.x, x3.y), x3.z); \n"

695 
" if(maxa >= 1e10 ) { \n"

696 
" if(x3.y ==maxa ) \n"

697 
" { \n"

698 
" float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"

699 
" }else if( x3.z == maxa ) \n"

700 
" { \n"

701 
" float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"

702 
" } \n"

703 
" A0 /= A0.x; \n"

704 
" A1 = A1.x * A0; \n"

705 
" A2 = A2.x * A0; \n"

706 
" float2 x2 = abs(float2(A1.y, A2.y)); \n"

707 
" if( x2.y > x2.x ) \n"

708 
" { \n"

709 
" float3 TEMP = A2.yzw; \n"

710 
" A2.yzw = A1.yzw; \n"

711 
" A1.yzw = TEMP; \n"

712 
" x2.x = x2.y; \n"

713 
" } \n"

714 
" if(x2.x >= 1e10) { \n"

715 
" A1.yzw /= A1.y; \n"

716 
" A2.yzw = A2.y * A1.yzw; \n"

717 
" if(abs(A2.z) >= 1e10) { \n"

718 
// compute dx, dy, ds:

719 
<< 
720 
" dxys.z = A2.w /A2.z; \n"

721 
" dxys.y = A1.w  dxys.z*A1.z; \n"

722 
" dxys.x = A0.w  dxys.z*A0.z  dxys.y*A0.y; \n"

723  
724 
//one more threshold which I forgot in versions prior to 286

725 
<< 
726 
" bool bugfix_test = (abs(cc.g + 0.5*dot(float3(fx, fy, fs), dxys )) < THRESHOLD1) ;\n"

727 
" if(bugfix_test  any(abs(dxys) >= 1.0)) dog = 0; \n"

728 
" }}}\n"

729 
//keep the point when the offset is less than 1

730 
<< 
731 
" FragData1 = float4( dog, dxys); \n"

732 
"}\n" <<'\0'; 
733  
734 
else out<<

735 
" FragData1 = float4( dog, 0, 0, 0) ; \n"

736 
"}\n" <<'\0'; 
737  
738 
ProgramCG * program; 
739 
s_keypoint = program = new ProgramCG(buffer);

740 
if(!program>IsValidProgram())

741 
{ 
742 
delete program;

743 
out.seekp(pos); 
744 
out << 
745 
" FragData1 = float4( fabs(cc.g) > 2.0 * THRESHOLD0? dog : 0, 0, 0, 0) ; \n"

746 
"}\n" <<'\0'; 
747 
s_keypoint = program = new ProgramCG(buffer);

748 
GlobalUtil::_SubpixelLocalization = 0;

749 
std::cerr<<"Detection simplified on this hardware"<<endl;

750 
} 
751 
//parameter

752 
_param_dog_texu = cgGetNamedParameter(*program, "texU");

753 
_param_dog_texd = cgGetNamedParameter(*program, "texD");

754  
755  
756  
757  
758 
} 
759  
760  
761 
void ShaderBagCG::SetDogTexParam(int texU, int texD) 
762 
{ 
763 
cgGLSetTextureParameter(_param_dog_texu, texU); 
764 
cgGLEnableTextureParameter(_param_dog_texu); 
765 
cgGLSetTextureParameter(_param_dog_texd, texD); 
766 
cgGLEnableTextureParameter(_param_dog_texd); 
767 
} 
768  
769 
void ShaderBagCG::SetGenListStepParam(int tex, int tex0) 
770 
{ 
771 
cgGLSetTextureParameter(_param_genlist_step_tex, tex); 
772 
cgGLEnableTextureParameter(_param_genlist_step_tex); 
773 
cgGLSetTextureParameter(_param_genlist_step_tex0, tex0); 
774 
cgGLEnableTextureParameter(_param_genlist_step_tex0); 
775 
} 
776  
777 
void ShaderBagCG::SetGenVBOParam(float width, float fwidth, float size) 
778 
{ 
779 
float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; 
780 
cgGLSetParameter4fv(_param_genvbo_size, sizes); 
781 
} 
782  
783  
784 
ProgramGPU* FilterGLCG::CreateFilterH(float kernel[], float offset[], int width) 
785 
{ 
786  
787  
788 
char buffer[10240]; 
789 
ostrstream out(buffer, 10240);

790  
791 
out<<setprecision(8);

792  
793 
if(GlobalUtil::_BetaFilter)

794 
{ 
795 
out<< "void main(uniform samplerRECT tex,";

796 
out<<"\n\tin float4 TexCoord0: TEXCOORD0,";

797 
out<<"\n\tout float4 FragColor : COLOR0 )";

798 
out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";

799 
out<<"float or = texRECT(tex, TexCoord0.xy).r, intensity;\n";

800  
801 
for(int i = 0; i< width; i+=4) 
802 
{ 
803 
out <<"data = float4(";

804 
for(int j = i; j < i + 4; j++) 
805 
{ 
806 
if(j != i) out <<", \n"; 
807 
if(j >= width)

808 
{ 
809 
out<<"0";

810 
}else if(offset[j]==0.0) 
811 
{ 
812 
out<<"or";

813 
}else

814 
{ 
815 
out<<"texRECT(tex, TexCoord0.xy + float2(float("<<offset[j] <<") , 0)).r"; 
816 
} 
817 
} 
818 
out << ");\n";

819 
out << "intensity4 += data * float4(";

820 
for(int k = i; k < i + 4; k++) 
821 
{ 
822 
if(k != i) out <<", "; 
823 
if(k >= width) out<<"0"; 
824 
else out<<kernel[k];

825 
} 
826 
out << ");\n";

827  
828 
} 
829 
out << "intensity4.xy += intensity4.zw;\n";

830 
out << "intensity = intensity4.x + intensity4.y;\n";

831 
}else

832 
{ 
833 
out<< "void main(uniform samplerRECT tex,";

834 
out<<"\n\tin float4 TexCoord0: TEXCOORD0,";

835 
out<<"\n\tout float4 FragColor : COLOR0 )";

836 
out<<"\n{\n\tfloat intensity = 0.0 ; float2 pos;\n";

837  
838 
for(int i = 0; i< width; i++) 
839 
{ 
840 
if(offset[i]==0.0) 
841 
{ 
842 
out<<"float or = texRECT(tex, TexCoord0.xy).r;\n";

843 
out<<"intensity+= or * "<<kernel[i]<<";\n"; 
844  
845 
}else

846 
{ 
847 
out<<"pos = TexCoord0.xy + float2(float("<<offset[i] <<") , 0);\n"; 
848 
out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).r;\n"; 
849 
} 
850 
} 
851 
} 
852 
//copy original data to red channel

853 
out<<"FragColor.r = or;\n";

854 
out<<"FragColor.b = intensity;}\n"<<'\0'; 
855  
856 
return new ProgramCG( buffer); 
857 
} 
858  
859  
860 
ProgramGPU* FilterGLCG::CreateFilterV(float kernel[], float offset[], int height) 
861 
{ 
862 
char buffer[10240]; 
863 
ostrstream out(buffer, 10240);

864 
out<<setprecision(8);

865  
866 
if(GlobalUtil::_BetaFilter)

867 
{ 
868 
out<< "void main(uniform samplerRECT tex,";

869 
out<<"\n\tin float4 TexCoord0: TEXCOORD0,";

870 
out<<"\n\tout float4 FragColor : COLOR0 )";

871 
out<<"\n{\n\tfloat4 intensity4 = float4(0, 0, 0, 0), data;\n";

872 
out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb; float intensity;\n";

873  
874 
for(int i = 0; i< height; i+=4) 
875 
{ 
876 
out <<"data = float4(";

877 
for(int j = i; j < i + 4; j++) 
878 
{ 
879 
if(j != i) out <<", \n"; 
880 
if(j >= height)

881 
{ 
882 
out<<"0";

883 
}else if(offset[j]==0.0) 
884 
{ 
885 
out<<"orb.y";

886 
}else

887 
{ 
888 
out<<"texRECT(tex, TexCoord0.xy + float2(0, float("<<offset[j] <<"))).b"; 
889 
} 
890 
} 
891 
out << ");\n";

892 
out << "intensity4 += data * float4(";

893 
for(int k = i; k < i + 4; k++) 
894 
{ 
895 
if(k != i) out <<", "; 
896 
if(k >= height) out<<"0"; 
897 
else out<<kernel[k];

898 
} 
899 
out << ");\n";

900  
901 
} 
902 
out << "intensity4.xy += intensity4.zw;\n";

903 
out << "intensity = intensity4.x + intensity4.y;\n";

904 
}else

905 
{ 
906 
out<< "void main(uniform samplerRECT tex,";

907 
out<<"\n\tin float4 TexCoord0: TEXCOORD0,";

908 
out<<"\n\tout float4 FragColor : COLOR0 )";

909 
out<<"\n{\n\tfloat intensity = 0.0 ; float2 pos;\n";

910  
911 
for(int i = 0; i< height; i++) 
912 
{ 
913 
if(offset[i]==0.0) 
914 
{ 
915 
out<<"float2 orb = texRECT(tex, TexCoord0.xy).rb;\n";

916 
out<<"intensity+= orb.y * "<<kernel[i]<<";\n"; 
917  
918 
}else

919 
{ 
920 
out<<"pos = TexCoord0.xy + float2(0, float("<<offset[i] <<"));\n"; 
921 
out<<"intensity+= "<<kernel[i]<<"*texRECT(tex, pos).b;\n"; 
922 
} 
923 
} 
924 
} 
925 
out<<"FragColor.b = orb.y;\n";

926 
out<<"FragColor.g = intensity  orb.x;\n"; // difference of gaussian.. 
927 
out<<"FragColor.r = intensity;}\n"<<'\0'; 
928 

929 
return new ProgramCG( buffer); 
930 
} 
931  
932  
933 
ProgramGPU* FilterGLCG::CreateFilterHPK(float kernel[], float offset[], int width) 
934 
{ 
935 
//both h and v are packed...

936 
int i, j , xw, xwn;

937 
int halfwidth = width >>1; 
938 
float * pf = kernel + halfwidth;

939 
int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up 
940 
int npixel = (nhpixel<<1)+1;// 
941 
char buffer[10240]; 
942 
float weight[3]; 
943 
ostrstream out(buffer, 10240);

944 
out<<setprecision(8);

945  
946 
out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";

947 
out<< "float4 result = float4(0, 0, 0, 0); \nfloat4 pc; float2 coord; \n";

948 
///use multi texture coordinate because nhpixels can be at most 3

949 
for( i = 0 ; i < npixel ; i++) 
950 
{ 
951  
952 
out<<"coord = TexCoord0.xy + float2(float("<<inhpixel<<"),0);\n"; 
953 
out<<"pc=texRECT(tex, coord);\n";

954 
if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0) pc = pc.rrbb;\n"; 
955  
956 
//for each subpixel j in center, the weight of subpixel k

957 
xw = (i  nhpixel)*2;

958 
for( j = 0; j < 3; j++) 
959 
{ 
960 
xwn = xw + j 1;

961 
weight[j] = xwn < halfwidth  xwn > halfwidth? 0 : pf[xwn];

962 
} 
963 
//if(weight[1]!=0.0) out<<"FragColor += "<<weight[1]<<"*pc;\n";

964 
//out<<"FragColor += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n";

965  
966 
if(weight[1] == 0.0) 
967 
{ 
968 
out<<"result += float4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n"; 
969 
} 
970 
else

971 
{ 
972 
out<<"result += float4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n"; 
973 
out<<"result += float4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n"; 
974 
} 
975  
976 
} 
977 
out<< 
978 
" FragColor = result; }\n"<<'\0'; 
979 
return new ProgramCG( buffer); 
980 
} 
981  
982 
ProgramGPU* FilterGLCG::CreateFilterVPK(float kernel[], float offset[], int height) 
983 
{ 
984  
985 
//both h and v are packed...

986 
int i, j , yw, ywn;

987 
int halfh = height >>1; 
988 
float * pf = kernel + halfh;

989 
int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up 
990 
int npixel = (nhpixel<<1)+1;// 
991 
char buffer[10240]; 
992 
float weight[3]; 
993 
ostrstream out(buffer, 10240);

994 
out<<setprecision(8);

995  
996 
out<< "void main(uniform samplerRECT tex, float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0 ){\n";

997 
out<< "float4 result = float4(0, 0, 0, 0);\nfloat4 pc; float2 coord;\n";

998 
///use multi texture coordinate because nhpixels can be at most 3

999  
1000 
for( i = 0 ; i < npixel ; i++) 
1001 
{ 
1002  
1003 
out<<"coord = TexCoord0.xy + float2(0, float("<<inhpixel<<"));\n"; 
1004 
out<<"pc=texRECT(tex, coord);\n";

1005 
if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0) pc = pc.rgrg;\n"; 
1006 
//for each subpixel j in center, the weight of subpixel k

1007 
yw = (i  nhpixel)*2;

1008 
for( j = 0; j < 3; j++) 
1009 
{ 
1010 
ywn = yw + j 1;

1011 
weight[j] = ywn < halfh  ywn > halfh? 0 : pf[ywn];

1012 
} 
1013 
//if(weight[1]!=0.0) out<<"FragColor += "<<weight[1]<<"*pc;\n";

1014 
//out<<"FragColor += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n";

1015 
if(weight[1] == 0.0) 
1016 
{ 
1017 
out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n"; 
1018 
}else

1019 
{ 
1020 
out<<"result += float4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n"; 
1021 
out<<"result += float4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n"; 
1022 
} 
1023 
} 
1024 
out<< 
1025 
" FragColor = result; }\n"<<'\0'; 
1026 
return new ProgramCG( buffer); 
1027 
} 
1028  
1029  
1030 
void ShaderBagCG::LoadGenListShader(int ndoglev, int nlev) 
1031 
{ 
1032 
ProgramCG * program; 
1033  
1034 
s_genlist_init_tight = new ProgramCG(

1035 
"void main (\n"

1036 
"uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"

1037 
"in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"

1038 
"out float4 FragColor : COLOR0){\n"

1039 
"float4 helper = float4( texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"

1040 
"texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"

1041 
"FragColor = float4(helper>0.0);\n"

1042 
"}");

1043  
1044 
s_genlist_init_ex = program = new ProgramCG(

1045 
"void main (uniform float2 bbox, \n"

1046 
"uniform samplerRECT tex, \n"

1047 
"in float4 TexCoord0 : TEXCOORD0,\n"

1048 
"in float4 TexCoord1 : TEXCOORD1, \n"

1049 
"in float4 TexCoord2 : TEXCOORD2, \n"

1050 
"in float4 TexCoord3 : TEXCOORD3,\n"

1051 
"out float4 FragColor : COLOR0){\n"

1052 
"float4 helper = float4( \n"

1053 
"texRECT(tex, TexCoord0.xy).r, texRECT(tex, TexCoord1.xy).r,\n"

1054 
"texRECT(tex, TexCoord2.xy).r, texRECT(tex, TexCoord3.xy).r);\n"

1055 
"bool4 helper4 = bool4(TexCoord0.xy < bbox, TexCoord3.xy < bbox); \n"

1056 
"bool4 helper2 = helper4.xzxz && helper4.yyww; \n"

1057 
"FragColor = float4(helper2 && (helper>0.0 ));\n"

1058 
"}");

1059 
_param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");

1060  
1061  
1062 
//reduction ...

1063 
s_genlist_histo = new ProgramCG(

1064 
"void main (\n"

1065 
"uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"

1066 
"in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, in float2 TexCoord3 : TEXCOORD3,\n"

1067 
"out float4 FragColor : COLOR0){\n"

1068 
"float4 helper; float4 helper2; \n"

1069 
"helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"

1070 
"helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"

1071 
"FragColor.rg = helper2.xz + helper2.yw;\n"

1072 
"helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"

1073 
"helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"

1074 
"FragColor.ba= helper2.xz+helper2.yw;\n"

1075 
"}");

1076  
1077  
1078 
//read of the first part, which generates tex coordinates

1079  
1080 
s_genlist_start= program = LoadGenListStepShader(1, 1); 
1081 
_param_ftex_width= cgGetNamedParameter(*program, "width");

1082 
_param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");

1083 
//stepping

1084 
s_genlist_step = program = LoadGenListStepShader(0, 1); 
1085 
_param_genlist_step_tex= cgGetNamedParameter(*program, "tex");

1086 
_param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");

1087  
1088  
1089 
} 
1090  
1091 
ProgramCG* ShaderBagCG::LoadGenListStepShader(int start, int step) 
1092 
{ 
1093 
int i;

1094 
char buffer[10240]; 
1095 
//char chanels[5] = "rgba";

1096 
ostrstream out(buffer, 10240);

1097 
out<<"void main(out float4 FragColor : COLOR0, \n";

1098  
1099 
for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n"; 
1100  
1101 
if(start)

1102 
{ 
1103 
out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";

1104 
out<<"float index = floor(tpos.y) * width + floor(tpos.x) + 0.0001;\n";

1105 
out<<"float2 pos = float2(0.5, 0.5);\n";

1106 
}else

1107 
{ 
1108 
out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";

1109 
out<<"float4 tc = texRECT( tex, tpos);\n";

1110 
out<<"float2 pos = tc.rg; float index = tc.b;\n";

1111 
} 
1112 
out<<"float2 sum; float4 cc;\n";

1113  
1114  
1115  
1116 
if(step>0) 
1117 
{ 
1118 
out<<"float2 cpos = float2(0.5, 0.5);\t float2 opos;\n";

1119 
for(i = 0; i < step; i++) 
1120 
{ 
1121 
//#define SETP_CODE_2

1122  
1123 
#ifndef SETP_CODE_2

1124 
/* out<<"cc = texRECT(tex"<<i<<", pos);\n";

1125 
out<<"float sum3[3] = {cc.r, cc.r + cc.g, cc.r + cc.g + cc.b};\n";

1126 
out<<"float3 cmp = float3(index > float3(sum3[0], sum3[1], sum3[2]));\n";

1127 
out<<"opos.y = 0.5 + cmp.y; opos.x = 0.5 + cmp.x + (cmp.z  cmp.y);\n";

1128 
out<<"index = dot(cmp, cc.rgb);\n";

1129 
out<<"pos = (pos + pos + opos);\n";*/

1130  
1131 
out<<"cc = texRECT(tex"<<i<<", pos); sum.x = cc.r + cc.g;\n"; 
1132 
out<<"if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index = cc.r;}}\n";

1133 
out<<"else {index = sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index = cc.b;}}";

1134 
out<<"pos = (pos + pos + opos);\n";

1135  
1136 
/* out<<"cc = texRECT(tex"<<i<<", pos);\n";

1137 
out<<"if (index <cc.r){ opos = cpos.xx;}\n";

1138 
out<<"else{sum.x = cc.r + cc.g;";

1139 
out<<"if(index < sum.x ) {opos = cpos.yx; index = cc.r;}\n";

1140 
out<<"else{sum.y = sum.x + cc.b;";

1141 
out<<"if(index < sum.y ) {opos = cpos.xy; index = sum.x;}\n";

1142 
out<<"else {opos = cpos.yy; index = sum.y;}}}\n";

1143 
out<<"pos = (pos + pos + opos);\n";*/

1144  
1145 
#else

1146 
out<<"cc = texRECT(tex"<<i<<", pos);\n"; 
1147 
out<<"if (index < cc.r) opos = cpos.xx;\n";

1148 
out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index = cc.r;}\n";

1149 
out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index = (cc.r + cc.g);}\n";

1150 
out<<"else {opos = cpos.yy; index = (cc.r + cc.g + cc.b);}\n";

1151 
out<<"pos = (pos + pos + opos);\n";

1152 
#endif

1153 
} 
1154 
} 
1155 
out<<"FragColor = float4(pos, index, 1);\n";

1156 
out<<"}\n"<<'\0'; 
1157 
return new ProgramCG(buffer); 
1158 
} 
1159  
1160 
void ShaderBagCG::SetGenListInitParam(int w, int h) 
1161 
{ 
1162 
float bbox[2] = {w 1.0f, h  1.0f}; 
1163 
cgGLSetParameter2fv(_param_genlist_init_bbox, bbox); 
1164 
} 
1165  
1166 
void ShaderBagCG::SetGenListStartParam(float width, int tex0) 
1167 
{ 
1168 
cgGLSetParameter1f(_param_ftex_width, width); 
1169  
1170 
if(_param_genlist_start_tex0)

1171 
{ 
1172 
cgGLSetTextureParameter(_param_genlist_start_tex0, tex0); 
1173 
cgGLEnableTextureParameter(_param_genlist_start_tex0); 
1174 
} 
1175 
} 
1176  
1177 
void ShaderBagCG::LoadDescriptorShaderF2()

1178 
{ 
1179 
//one shader outpout 128/8 = 16 , each fragout encodes 4

1180 
//const double twopi = 2.0*3.14159265358979323846;

1181 
//const double rpi = 8.0/twopi;

1182 
char buffer[10240]; 
1183 
ostrstream out(buffer, 10240);

1184  
1185 
out<<setprecision(8);

1186  
1187 
out<<"\n"

1188 
"#define M_PI 3.14159265358979323846\n"

1189 
"#define TWO_PI (2.0*M_PI)\n"

1190 
"#define RPI 1.2732395447351626861510701069801\n"

1191 
"#define WF size.z\n"

1192 
"void main(uniform samplerRECT tex, \n"

1193 
"uniform samplerRECT gradTex, \n"

1194 
"uniform float4 dsize, \n"

1195 
"uniform float3 size, \n"

1196 
"in float2 TexCoord0 : TEXCOORD0, \n"

1197 
"out float4 FragData0:COLOR0, \n"

1198 
"out float4 FragData1:COLOR1) \n"

1199 
"{\n"

1200 
" float2 dim = size.xy; //image size \n"

1201 
" float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"

1202 
" float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5)); \n"

1203 
" index = floor(index*0.125) + 0.49; \n"

1204 
" float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"

1205 
" float2 pos = texRECT(tex, coord).xy; \n"

1206 
" if(any(pos.xy <= 1)  any(pos.xy >=dim1)) "

1207 
" //discard; \n"

1208 
" { FragData0 = FragData1 = float4(0.0); return; }\n"

1209 
" float anglef = texRECT(tex, coord).z;\n"

1210 
" if(anglef > M_PI) anglef = TWO_PI;\n"

1211 
" float sigma = texRECT(tex, coord).w; \n"

1212 
" float spt = abs(sigma * WF); //default to be 3*sigma \n";

1213  
1214 
//rotation

1215 
out<< 
1216 
" float4 cscs, rots; \n"

1217 
" sincos(anglef, cscs.y, cscs.x); \n"

1218 
" cscs.zw =  cscs.xy; \n"

1219 
" rots = cscs /spt; \n"

1220 
" cscs *= spt; \n";

1221  
1222 
//here cscs is actually (cos, sin, cos, sin) * (factor: 3)*sigma

1223 
//and rots is (cos, sin, cos, sin ) /(factor*sigma)

1224 
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread

1225 
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

1226 
out<< 
1227 
" float4 temp; float2 pt, offsetpt; \n"

1228 
" /*the fraction part of idx is .5*/ \n"

1229 
" offsetpt.x = 4.0 * frac(idx*0.25)  2.0; \n"

1230 
" offsetpt.y = floor(idx*0.25)  1.5; \n"

1231 
" temp = cscs.xwyx*offsetpt.xyxy; \n"

1232 
" pt = pos + temp.xz + temp.yw; \n";

1233 

1234 
//get a horizontal bounding box of the rotated rectangle

1235 
out<< 
1236 
" float2 bwin = abs(cscs.xy); \n"

1237 
" float bsz = bwin.x + bwin.y; \n"

1238 
" float4 sz; float2 spos; \n"

1239 
" sz.xy = max(pt  bsz, float2(1,1));\n"

1240 
" sz.zw = min(pt + bsz, dim  2); \n"

1241 
" sz = floor(sz)+0.5;"; //move sample point to pixel center 
1242  
1243 
//get voting for two box

1244 
out<<"\n"

1245 
" float4 DA, DB; \n"

1246 
" DA = DB = float4(0, 0, 0, 0); \n"

1247 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

1248 
" { \n"

1249 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

1250 
" { \n"

1251 
" float2 diff = spos  pt; \n"

1252 
" temp = rots.xywx * diff.xyxy; \n"

1253 
" float2 nxy = (temp.xz + temp.yw); \n"

1254 
" float2 nxyn = abs(nxy); \n"

1255 
" if(all(nxyn < float2(1.0)))\n"

1256 
" {\n"

1257 
" float4 cc = texRECT(gradTex, spos); \n"

1258 
" float mod = cc.b; float angle = cc.a; \n"

1259 
" float theta0 = (anglef  angle)*RPI; \n"

1260 
" float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"

1261 
" diff = nxy + offsetpt.xy; \n"

1262 
" float ww = exp(0.125*dot(diff, diff));\n"

1263 
" float2 weights = 1  nxyn;\n"

1264 
" float weight = weights.x * weights.y *mod*ww; \n"

1265 
" float theta1 = floor(theta); \n"

1266 
" float weight2 = (theta  theta1) * weight; \n"

1267 
" float weight1 = weight  weight2;\n"

1268 
" DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"

1269 
" DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"

1270 
" DB += float4(theta1 == float4(4, 5, 6, 7))*weight1; \n"

1271 
" DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"

1272 
" }\n"

1273 
" }\n"

1274 
" }\n";

1275  
1276 
out<< 
1277 
" FragData0 = DA; FragData1 = DB;\n"

1278 
"}\n"<<'\0'; 
1279  
1280 
ProgramCG * program; 
1281 
s_descriptor_fp = program = new ProgramCG(buffer);

1282 
_param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");

1283 
_param_descriptor_size = cgGetNamedParameter(*program, "size");

1284 
_param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");

1285  
1286  
1287 
} 
1288  
1289 
//the shader that computes the descriptors

1290 
void ShaderBagCG::LoadDescriptorShader()

1291 
{ 
1292 
GlobalUtil::_DescriptorPPT = 16;

1293 
LoadDescriptorShaderF2(); 
1294 
} 
1295  
1296 
void ShaderBagCG::LoadOrientationShader()

1297 
{ 
1298  
1299 
char buffer[10240]; 
1300 
ostrstream out(buffer,10240);

1301  
1302  
1303 
out<<"\n"

1304 
"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" 
1305 
"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" 
1306 
"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" 
1307 
"void main(uniform samplerRECT tex, \n"

1308 
"uniform samplerRECT gradTex, \n"

1309 
" uniform float4 size, \n"

1310 
" in float2 TexCoord0 : TEXCOORD0, \n"

1311 
" out float4 FeatureData : COLOR0 ";

1312  
1313 
//multi orientation output

1314 
//use one additional texture to store up to four orientations

1315 
//when we use one 32bit float to store two orientations, no extra texture is required

1316  
1317 
if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2 == 0) 
1318 
out<<", out float4 OrientationData : COLOR1";

1319  
1320 
if(GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign)

1321 
{ 
1322 
//data for subpixel localization

1323 
out<<", uniform samplerRECT texS";

1324 
} 
1325  
1326 
//use 9 float4 to store histogram of 36 directions

1327 
out<<") \n"

1328 
"{ \n"

1329 
" float4 bins[10]; \n"

1330 
" for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0); \n"

1331 
" const float4 loc = texRECT(tex, TexCoord0); \n"

1332 
" const bool orientation_mode = (size.z != 0); \n"

1333 
" float2 pos = loc.xy; \n"

1334 
" float sigma = orientation_mode? abs(size.z) : loc.w; \n";

1335 
if(GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign)

1336 
{ 
1337 
out<< 
1338 
" if(orientation_mode) {\n"

1339 
" float4 keyx = texRECT(texS, pos);\n"

1340 
" sigma = sigma * pow(size.w, keyx.w); \n"

1341 
" pos.xy = pos.xy + keyx.yz; \n"

1342 
" #if " << GlobalUtil::_KeepExtremumSign << "\n" 
1343 
" if(keyx.x<0.6) sigma =  sigma;\n"

1344 
" #endif\n"

1345 
" }\n";

1346 
} 
1347  
1348 
out<< 
1349 
" //bool fixed_orientation = (size.z < 0); \n"

1350 
" if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"

1351 
" const float gsigma = sigma * GAUSSIAN_WF; \n"

1352 
" const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"

1353 
" const float2 dim = size.xy; \n"

1354 
" const float dist_threshold = win.x*win.x+0.5; \n"

1355 
" const float factor = 0.5/(gsigma*gsigma); \n"

1356 
" float4 sz; float2 spos; \n"

1357 
" //if(any(pos.xy <= 1)) discard; \n"

1358 
" sz.xy = max( pos  win, float2(1,1)); \n"

1359 
" sz.zw = min( pos + win, dim2); \n"

1360 
" sz = floor(sz)+0.5;";

1361 
//loop to get the histogram

1362  
1363 
out<<"\n"

1364 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

1365 
" { \n"

1366 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

1367 
" { \n"

1368 
" const float2 offset = spos  pos; \n"

1369 
" const float sq_dist = dot(offset,offset); \n"

1370 
" if( sq_dist < dist_threshold){ \n"

1371 
" const float4 cc = texRECT(gradTex, spos); \n"

1372 
" const float grad = cc.b; float theta = cc.a; \n"

1373 
" float idx = floor(degrees(theta)*0.1); \n"

1374 
" const float weight = grad*exp(sq_dist * factor); \n"

1375 
" if(idx < 0 ) idx += 36; \n"

1376 
" const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4); \n"

1377 
" const float4 inc = weight*float4(vidx == float4(0,1,2,3)); ";

1378  
1379 
if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0) 
1380 
// if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5

1381 
{ 
1382 
//gp_fp supports dynamic indexing

1383 
out<<"\n"

1384 
" int iidx = int(floor(idx*0.25)); \n"

1385 
" bins[iidx]+=inc; \n"

1386 
" } \n"

1387 
" } \n"

1388 
" }";

1389  
1390 
}else

1391 
{ 
1392 
//nvfp40 still does not support dynamic array indexing

1393 
//unrolled binary search...

1394 
out<<"\n"

1395 
" if(idx < 16) \n"

1396 
" { \n"

1397 
" if(idx < 8) \n"

1398 
" { \n"

1399 
" if(idx < 4) { bins[0]+=inc;} \n"

1400 
" else { bins[1]+=inc;} \n"

1401 
" }else \n"

1402 
" { \n"

1403 
" if(idx < 12){ bins[2]+=inc;} \n"

1404 
" else { bins[3]+=inc;} \n"

1405 
" } \n"

1406 
" }else if(idx < 32) \n"

1407 
" { \n"

1408 
" if(idx < 24) \n"

1409 
" { \n"

1410 
" if(idx <20) { bins[4]+=inc;} \n"

1411 
" else { bins[5]+=inc;} \n"

1412 
" }else \n"

1413 
" { \n"

1414 
" if(idx < 28){ bins[6]+=inc;} \n"

1415 
" else { bins[7]+=inc;} \n"

1416 
" } \n"

1417 
" }else \n"

1418 
" { \n"

1419 
" bins[8]+=inc; \n"

1420 
" } \n"

1421 
" } \n"

1422 
" } \n"

1423 
" }";

1424  
1425 
} 
1426  
1427 
WriteOrientationCodeToStream(out); 
1428  
1429 
ProgramCG * program; 
1430 
s_orientation = program = new ProgramCG(buffer);

1431 
_param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");

1432 
_param_orientation_size = cgGetNamedParameter(*program, "size");

1433 
_param_orientation_stex = cgGetNamedParameter(*program, "texS");

1434 
} 
1435  
1436 
void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)

1437 
{ 
1438 
//smooth histogram and find the largest

1439 
/*

1440 
smoothing kernel: (1 3 6 7 6 3 1 )/27

1441 
the same as 3 pass of (1 1 1)/3 averaging

1442 
maybe better to use 4 pass on the vectors...

1443 
*/

1444  
1445  
1446 
//the inner loop on different array numbers is always unrolled in fp40

1447  
1448 
//bug fixed here:)

1449 
out<<"\n"

1450 
" float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"

1451 
" float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"

1452 
" for (int j=0; j<2; j++) \n"

1453 
" { \n"

1454 
" float4 prev = bins[8]; \n"

1455 
" bins[9] = bins[0]; \n"

1456 
" for (int i=0; i<9; i++) \n"

1457 
" { \n"

1458 
" float4 newb = mul ( bins[i], mat2); \n"

1459 
" newb.xyz += mul ( prev.yzw, mat1); \n"

1460 
" prev = bins[i]; \n"

1461 
" newb.wzy += mul ( bins[i+1].zyx, mat1); \n"

1462 
" bins[i] = newb; \n"

1463 
" } \n"

1464 
" }";

1465  
1466  
1467 
//find the maximum voting

1468 
out<<"\n"

1469 
" float4 maxh; float2 maxh2; float4 maxh4 = bins[0]; \n"

1470 
" for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]); \n"

1471 
" maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";

1472  
1473 
char *testpeak_code;

1474 
char *savepeak_code;

1475  
1476  
1477  
1478 
//save two/three/four orientations with the largest votings?

1479  
1480 
//

1481 
if(GlobalUtil::_MaxOrientation>1) 
1482 
{ 
1483 
out<<"\n"

1484 
" float4 Orientations = float4(0, 0, 0, 0); \n"

1485 
" float4 weights = float4(0,0,0,0); ";

1486 

1487 
testpeak_code = "\n"

1488 
" {test = bins[i]>hh;";

1489  
1490 
//save the orientations in weightdecreasing order

1491 
if(GlobalUtil::_MaxOrientation ==2) 
1492 
{ 
1493 
savepeak_code = "\n"

1494 
" if(weight <=weights.g){}\n"

1495 
" else if(weight >weights.r)\n"

1496 
" {weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"

1497 
" else {weights.g = weight; Orientations.g = th;}";

1498  
1499 
}else if(GlobalUtil::_MaxOrientation ==3) 
1500 
{ 
1501 
savepeak_code = "\n"

1502 
" if(weight <=weights.b){}\n"

1503 
" else if(weight >weights.r)\n"

1504 
" {weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"

1505 
" else if(weight >weights.g)\n"

1506 
" {weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"

1507 
" else {weights.b = weight; Orientations.b = th;}";

1508 
}else

1509 
{ 
1510 
savepeak_code = "\n"

1511 
" if(weight <=weights.a){}\n"

1512 
" else if(weight >weights.r)\n"

1513 
" {weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"

1514 
" else if(weight >weights.g)\n"

1515 
" {weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"

1516 
" else if(weight >weights.b)\n"

1517 
" {weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"

1518 
" else {weights.a = weight; Orientations.a = th;}";

1519 
} 
1520  
1521 
}else

1522 
{ 
1523 
out<<"\n"

1524 
" float Orientations = 0; ";

1525 
testpeak_code ="\n"

1526 
" if(npeaks==0){ \n"

1527 
" test = (bins[i] >= maxh) ;";

1528 
savepeak_code="\n"

1529 
" npeaks++; \n"

1530 
" Orientations = th.x;";

1531  
1532 
} 
1533  
1534 
//find the peaks

1535 
//the following loop will be unrolled

1536  
1537 
out<<"\n"

1538 
" const float4 hh = maxh * ORIENTATION_THRESHOLD; bool4 test; \n"

1539 
" bins[9] = bins[0]; \n"

1540 
" float npeaks = 0, k = 0; \n"

1541 
" float prevb = bins[8].w; \n"

1542 
" for (int i = 0; i <9 ; i++) \n"

1543 
" {"

1544 
<<testpeak_code<<" \n"

1545 
" if( any ( test.xy  test.zw) ) \n"

1546 
" { \n"

1547 
" if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \n"

1548 
" { \n"

1549 
" float di = 0.5 * (bins[i].yprevb) / (bins[i].x *2.0 bins[i].y prevb) ; \n"

1550 
" float th = (k+di+0.5); float weight = bins[i].x;"

1551 
<<savepeak_code<<"\n"

1552 
" }\n"

1553 
" else if(test.g && all( bins[i].yy > bins[i].xz) ) \n"

1554 
" { \n"

1555 
" float di = 0.5 * (bins[i].zbins[i].x) / (bins[i].y * 2.0  bins[i].z  bins[i].x) ; \n"

1556 
" float th = (k+di+1.5); float weight = bins[i].y; "

1557 
<<savepeak_code<<" \n"

1558 
" }"

1559 
<<"\n"

1560 
" if(test.b && all( bins[i].zz > bins[i].yw) ) \n"

1561 
" { \n"

1562 
" float di = 0.5 * (bins[i].wbins[i].y) / (bins[i].z * 2.0bins[i].wbins[i].y) ; \n"

1563 
" float th = (k+di+2.5); float weight = bins[i].z; "

1564 
<<savepeak_code<<" \n"

1565 
" }\n"

1566 
" else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \n"

1567 
" { \n"

1568 
" float di = 0.5 * (bins[i+1].xbins[i].z) / (bins[i].w * 2.0 bins[i+1].xbins[i].z) ; \n"

1569 
" float th = (k+di+3.5); float weight = bins[i].w; "

1570 
<<savepeak_code<<" \n"

1571 
" }\n"

1572 
" }}\n"

1573 
" k = k + 4.0; \n"

1574 
" prevb = bins[i].w;\n"

1575 
" }";

1576 
//WRITE output

1577 
if(GlobalUtil::_OrientationPack2)

1578 
{ 
1579 
//pack two orientations in one float

1580 
out<<"\n"

1581 
" if(orientation_mode){\n"

1582 
" Orientations.xy = frac(Orientations.xy / 36.0 + 1.0);\n"

1583 
" if(weights.x <= 0) Orientations.x = 1.0;\n"

1584 
" if(weights.y <= 0) Orientations.y = 1.0;\n"

1585 
" float packed_orientation = pack_2ushort(Orientations.xy); \n"

1586 
" FeatureData = float4(pos, packed_orientation, sigma);\n"

1587 
" }else{\n"

1588 
" FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"

1589 
" }\n";

1590 
}else if(GlobalUtil::_MaxOrientation>1) 
1591 
{ 
1592 
out<<"\n"

1593 
" if(orientation_mode){\n"

1594 
" npeaks = dot(float4(1,1,"

1595 
<<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<"," 
1596 
<<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), float4(weights>hh));\n" 
1597 
" OrientationData = radians((Orientations )*10.0);\n"

1598 
" FeatureData = float4(pos, npeaks, sigma);\n"

1599 
" }else{\n"

1600 
" FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);\n"

1601 
" }\n";

1602 
}else

1603 
{ 
1604 
out<<"\n"

1605 
" FeatureData = float4(pos, radians((Orientations.x)*10.0), sigma);";

1606 
} 
1607 
//end

1608 
out<<"\n"

1609 
"}\n"<<'\0'; 
1610  
1611  
1612 
} 
1613  
1614 
void ShaderBagCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) 
1615 
{ 
1616 
cgGLSetTextureParameter(_param_orientation_gtex, oTex); 
1617 
cgGLEnableTextureParameter(_param_orientation_gtex); 
1618 
cgGLSetParameter1f(_param_orientation_size, sigma); 
1619 
} 
1620  
1621 
void ShaderBagCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step) 
1622 
{ 
1623 
///

1624 
cgGLSetTextureParameter(_param_orientation_gtex, gtex); 
1625 
cgGLEnableTextureParameter(_param_orientation_gtex); 
1626  
1627 
if((GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign)&& stex)

1628 
{ 
1629 
//specify texutre for subpixel subscale localization

1630 
cgGLSetTextureParameter(_param_orientation_stex, stex); 
1631 
cgGLEnableTextureParameter(_param_orientation_stex); 
1632 
} 
1633  
1634 
float size[4]; 
1635 
size[0] = (float)width; 
1636 
size[1] = (float)height; 
1637 
size[2] = sigma;

1638 
size[3] = step;

1639 
cgGLSetParameter4fv(_param_orientation_size, size); 
1640  
1641 
} 
1642  
1643 
void ShaderBagCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) 
1644 
{ 
1645 
///

1646 
cgGLSetTextureParameter(_param_descriptor_gtex, gtex); 
1647 
cgGLEnableTextureParameter(_param_descriptor_gtex); 
1648  
1649 
float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; 
1650 
cgGLSetParameter4fv(_param_descriptor_dsize, dsize); 
1651 
float size[3]; 
1652 
size[0] = width;

1653 
size[1] = height;

1654 
size[2] = GlobalUtil::_DescriptorWindowFactor;

1655 
cgGLSetParameter3fv(_param_descriptor_size, size); 
1656 
} 
1657  
1658  
1659 
///////////////////////////////////////////////////////////////////////////////////

1660 
/////////////////////////////////PACKED VERSION?///////////////////////////////////

1661  
1662 
ShaderBagPKCG::ShaderBagPKCG() 
1663 
{ 
1664 
ProgramCG::InitContext(); 
1665 
} 
1666  
1667 
void ShaderBagPKCG::UnloadProgram()

1668 
{ 
1669  
1670 
cgGLUnbindProgram(ProgramCG::_FProfile); 
1671 
cgGLDisableProfile(ProgramCG::_FProfile); 
1672 
} 
1673  
1674 
void ShaderBagPKCG::LoadFixedShaders()

1675 
{ 
1676 
ProgramCG * program; 
1677  
1678 
/*

1679 
char *rgb2gray_packing_code =

1680 
"void main(uniform samplerRECT rgbTex, in float4 TexCoord0 : TEXCOORD0, \n"

1681 
" in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"

1682 
" in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0){\n"

1683 
" const float3 weight = vec3(0.299, 0.587, 0.114);\n"

1684 
" FragData.r = dot(weight, texRECT(rgbTex,TexCoord0.st ).rgb);\n"

1685 
" FragData.g = dot(weight, texRECT(rgbTex,TexCoord1.st ).rgb);\n"

1686 
" FragData.b = dot(weight, texRECT(rgbTex,TexCoord2.st ).rgb);\n"

1687 
" FragData.a = dot(weight, texRECT(rgbTex,TexCoord3.st ).rgb);}";//

1688 
s_gray = new ProgramCG( rgb2gray_packing_code);

1689 
*/

1690  
1691 
s_gray = new ProgramCG(

1692 
"void main(float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

1693 
"float intensity = dot(float3(0.299, 0.587, 0.114), texRECT(tex,TexCoord0.xy ).rgb);\n"

1694 
"FragColor= float4(intensity, intensity, intensity, 1.0);}" );

1695  
1696  
1697 
s_sampling = new ProgramCG(

1698 
"void main(uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0, \n"

1699 
" in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"

1700 
" in float4 TexCoord3 : TEXCOORD3, out float4 FragData : COLOR0 ){\n"

1701 
" FragData= float4( texRECT(tex,TexCoord0.st ).r,texRECT(tex,TexCoord1.st ).r,\n"

1702 
" texRECT(tex,TexCoord2.st ).r,texRECT(tex,TexCoord3.st ).r);}" );

1703  
1704  
1705 
s_margin_copy = program = new ProgramCG(

1706 
"void main(in float4 texCoord0: TEXCOORD0, out float4 FragColor: COLOR0, \n"

1707 
"uniform samplerRECT tex, uniform float4 truncate){\n"

1708 
"float4 cc = texRECT(tex, min(texCoord0.xy, truncate.xy)); \n"

1709 
"bool2 ob = texCoord0.xy < truncate.xy;\n"

1710 
"if(ob.y) { FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"

1711 
"else if(ob.x) {FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"

1712 
"else { float4 weights = float4(float4(0, 1, 2, 3) == truncate.w);\n"

1713 
"float v = dot(weights, cc); FragColor = v.xxxx;}}");

1714  
1715 
_param_margin_copy_truncate = cgGetNamedParameter(*program, "truncate");

1716  
1717  
1718 
s_zero_pass = new ProgramCG("void main(out float4 FragColor : COLOR0){FragColor = 0;}"); 
1719  
1720 
s_grad_pass = program = new ProgramCG(

1721 
"void main (\n"

1722 
"float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"

1723 
"float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, float4 TexCU : TEXCOORD4,\n"

1724 
"out float4 FragData0 : COLOR0, out float4 FragData1 : COLOR1, \n"

1725 
"out float4 FragData2 : COLOR2, uniform samplerRECT tex, uniform samplerRECT texp)\n"

1726 
"{\n"

1727 
" float4 v1, v2, gg;\n"

1728 
" float4 cc = texRECT(tex, TexCC.xy);\n"

1729 
" float4 cp = texRECT(texp, TexCC.xy);\n"

1730 
" FragData0 = cc  cp; \n"

1731 
" float4 cl = texRECT(tex, TexLC.xy); float4 cr = texRECT(tex, TexRC.xy);\n"

1732 
" float4 cd = texRECT(tex, TexCD.xy); float4 cu = texRECT(tex, TexCU.xy);\n"

1733 
" float4 dx = (float4(cr.rb, cc.ga)  float4(cc.rb, cl.ga)).zxwy;\n"

1734 
" float4 dy = (float4(cu.rg, cc.ba)  float4(cc.rg, cd.ba)).zwxy;\n"

1735 
" FragData1 = 0.5 * sqrt(dx*dx + dy * dy);\n"

1736 
" FragData2 = FragData1 > 0? atan2(dy, dx) : float4(0);\n"

1737 
"}\n\0");

1738  
1739 
_param_grad_pass_texp = cgGetNamedParameter(*program, "texp");

1740  
1741  
1742 
s_dog_pass = program = new ProgramCG(

1743 
"void main (float4 TexCC : TEXCOORD0, out float4 FragData0 : COLOR0, \n"

1744 
" uniform samplerRECT tex, uniform samplerRECT texp)\n"

1745 
"{\n"

1746 
" float4 cc = texRECT(tex, TexCC.xy);\n"

1747 
" float4 cp = texRECT(texp, TexCC.xy);\n"

1748 
" FragData0 = cc  cp; \n"

1749 
"}\n\0");

1750  
1751 
////

1752 
if(GlobalUtil::_SupportFP40)

1753 
{ 
1754 
LoadOrientationShader(); 
1755 
if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();

1756 
}else

1757 
{ 
1758 
s_orientation = program = new ProgramCG(

1759 
"void main(out float4 FragColor : COLOR0, \n"

1760 
" uniform samplerRECT fTex, uniform samplerRECT oTex, \n"

1761 
" uniform float2 size, \n"

1762 
" in float2 tpos : TEXCOORD0){\n"

1763 
" float4 cc = texRECT(fTex, tpos);\n"

1764 
" float2 co = cc.xy * 0.5; \n"

1765 
" float4 oo = texRECT(oTex, co);\n"

1766 
" bool2 bo = frac(co) < 0.5; \n"

1767 
" float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"

1768 
" FragColor = float4(cc.rg, o, size.x * pow(size.y, cc.a));}");

1769 
_param_orientation_gtex= cgGetNamedParameter(*program, "oTex");

1770 
_param_orientation_size= cgGetNamedParameter(*program, "size");

1771  
1772 
GlobalUtil::_FullSupported = 0;

1773 
GlobalUtil::_MaxOrientation = 0;

1774 
GlobalUtil::_DescriptorPPT = 0;

1775 
std::cerr<<"Orientation simplified on this hardware"<<endl;

1776 
std::cerr<<"Descriptor ignored on this hardware"<<endl;

1777 
} 
1778 
} 
1779  
1780 
void ShaderBagPKCG::LoadDisplayShaders()

1781 
{ 
1782 
ProgramCG * program; 
1783  
1784 
s_copy_key = new ProgramCG(

1785 
"void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

1786 
"FragColor.rg= texRECT(tex, TexCoord0.xy).rg; FragColor.ba = float2(0,1); }");

1787  
1788 
//shader used to write a vertex buffer object

1789 
//which is used to draw the quads of each feature

1790 
s_vertex_list = program = new ProgramCG(

1791 
"void main(in float4 TexCoord0: TEXCOORD0,\n"

1792 
"uniform float4 sizes, \n"

1793 
"uniform samplerRECT tex, \n"

1794 
"out float4 FragColor: COLOR0){\n"

1795 
"float fwidth = sizes.y; \n"

1796 
"float twidth = sizes.z; \n"

1797 
"float rwidth = sizes.w; \n"

1798 
"float index = 0.1*(fwidth*floor(TexCoord0.y) + TexCoord0.x);\n"

1799 
"float px = fmod(index, twidth);\n"

1800 
"float2 tpos= floor(float2(px, index*rwidth))+0.5;\n"

1801 
"float4 cc = texRECT(tex, tpos );\n"

1802 
"float size = 3.0f * cc.a;// sizes.x;// \n"

1803 
"FragColor.zw = float2(0.0, 1.0);\n"

1804 
"if(any(cc.xy <=0)) {FragColor.xy = cc.xy;}else \n"

1805 
"{\n"

1806 
" float type = frac(px);\n"

1807 
" float2 dxy; float s, c;\n"

1808 
" dxy.x = type < 0.1 ? 0 : ((type <0.5  type > 0.9)? size : size);\n"

1809 
" dxy.y = type < 0.2 ? 0 : ((type < 0.3  type > 0.7 )? size :size); \n"

1810 
" sincos(cc.b, s, c);\n"

1811 
" FragColor.x = cc.x + c*dxy.xs*dxy.y;\n"

1812 
" FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"

1813 
"}\n\0");

1814 
/*FragColor = float4(tpos, 0.0, 1.0);}\n\0");*/

1815  
1816 
_param_genvbo_size = cgGetNamedParameter(*program, "sizes");

1817  
1818 
s_display_gaussian = new ProgramCG(

1819 
"void main(uniform samplerRECT tex, in float4 TexCoord0:TEXCOORD0, out float4 FragData: COLOR0 ){\n"

1820 
"float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"

1821 
"float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); FragData = float4(v.xxx, 1.0);}");

1822  
1823 
s_display_dog = new ProgramCG(

1824 
"void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

1825 
"float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"

1826 
"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"

1827 
"FragColor = float4(g, g, g, 1.0);}" );

1828  
1829  
1830 
s_display_grad = new ProgramCG(

1831 
"void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

1832 
"float4 pc = texRECT(tex, TexCoord0.xy); bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"

1833 
"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); FragColor = float4(5.0 *v.xxx, 1.0); }");

1834  
1835 
s_display_keys= new ProgramCG(

1836 
"void main(in float4 TexCoord0 : TEXCOORD0, out float4 FragColor : COLOR0, uniform samplerRECT tex){\n"

1837 
"float4 oc = texRECT(tex, TexCoord0.xy); \n"

1838 
"float4 cc = float4(abs(oc.r) == float4(1.0, 2.0, 3.0, 4.0));\n"

1839 
"bool2 ff = (frac(TexCoord0.xy) < 0.5);\n"

1840 
"float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"

1841 
"if(oc.r == 0) discard;\n"

1842 
"else if(oc.r > 0) FragColor = float4(1.0, 0, 0,1.0); \n"

1843 
"else FragColor = float4(0.0,1.0,0.0,1.0); }" );

1844 
} 
1845  
1846 
void ShaderBagPKCG::LoadGenListShader(int ndoglev, int nlev) 
1847 
{ 
1848  
1849 
//the V2 algorithms are only slightly faster, but way more complicated

1850 
//LoadGenListShaderV2(ndoglev, nlev); return;

1851 
ProgramCG * program; 
1852  
1853 
s_genlist_init_tight = new ProgramCG(

1854 
"void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"

1855 
"in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"

1856 
"in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"

1857 
"{\n"

1858 
" float4 data = float4( texRECT(tex, TexCoord0.xy).r,\n"

1859 
" texRECT(tex, TexCoord1.xy).r,\n"

1860 
" texRECT(tex, TexCoord2.xy).r,\n"

1861 
" texRECT(tex, TexCoord3.xy).r);\n"

1862 
" FragColor = float4(data != 0);\n"

1863 
"}");

1864  
1865 
s_genlist_init_ex = program = new ProgramCG(

1866 
"void main (uniform float4 bbox, uniform samplerRECT tex, \n"

1867 
"in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"

1868 
"in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"

1869 
"out float4 FragColor : COLOR0)\n"

1870 
"{\n"

1871 
" bool4 helper1 = abs(texRECT(tex, TexCoord0.xy).r)== float4(1.0, 2.0, 3.0, 4.0); \n"

1872 
" bool4 helper2 = abs(texRECT(tex, TexCoord1.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"

1873 
" bool4 helper3 = abs(texRECT(tex, TexCoord2.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"

1874 
" bool4 helper4 = abs(texRECT(tex, TexCoord3.xy).r)== float4(1.0, 2.0, 3.0, 4.0);\n"

1875 
" bool4 bx1 = TexCoord0.xxyy < bbox; \n"

1876 
" bool4 bx4 = TexCoord3.xxyy < bbox; \n"

1877 
" bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"

1878 
" bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"

1879 
" helper1 = (bx1.xyxy && bx1.zzww && helper1);\n"

1880 
" helper2 = (bx2.xyxy && bx2.zzww && helper2);\n"

1881 
" helper3 = (bx3.xyxy && bx3.zzww && helper3);\n"

1882 
" helper4 = (bx4.xyxy && bx4.zzww && helper4);\n"

1883 
" FragColor.r = any(helper1.xy  helper1.zw); \n"

1884 
" FragColor.g = any(helper2.xy  helper2.zw); \n"

1885 
" FragColor.b = any(helper3.xy  helper3.zw); \n"

1886 
" FragColor.a = any(helper4.xy  helper4.zw); \n"

1887 
"}");

1888 
_param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");

1889  
1890 
s_genlist_end = program = new ProgramCG(

1891 
GlobalUtil::_KeepExtremumSign == 0 ?

1892 

1893 
"void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"

1894 
" in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"

1895 
"{\n"

1896 
" float4 tc = texRECT( tex, tpos.xy);\n"

1897 
" float2 pos = tc.rg; float index = tc.b;\n"

1898 
" float4 tk = texRECT( ktex, pos); \n"

1899 
" float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"

1900 
" float2 opos; \n"

1901 
" opos.x = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

1902 
" opos.y = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

1903 
" FragColor = float4(opos + pos + pos + tk.yz, 1.0, tk.w);\n"

1904 
"}" :

1905 

1906 
"void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"

1907 
" in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"

1908 
"{\n"

1909 
" float4 tc = texRECT( tex, tpos.xy);\n"

1910 
" float2 pos = tc.rg; float index = tc.b;\n"

1911 
" float4 tk = texRECT( ktex, pos); \n"

1912 
" float4 keys = float4(abs(tk.x) == float4(1.0, 2.0, 3.0, 4.0)); \n"

1913 
" float2 opos; \n"

1914 
" opos.x = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

1915 
" opos.y = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

1916 
" FragColor = float4(opos + pos + pos + tk.yz, sign(tk.x), tk.w);\n"

1917 
"}"

1918 
); 
1919 
_param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");

1920  
1921 
//reduction ...

1922 
s_genlist_histo = new ProgramCG(

1923 
"void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"

1924 
"in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"

1925 
"in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"

1926 
"{\n"

1927 
" float4 helper; float4 helper2; \n"

1928 
" helper = texRECT(tex, TexCoord0); helper2.xy = helper.xy + helper.zw; \n"

1929 
" helper = texRECT(tex, TexCoord1); helper2.zw = helper.xy + helper.zw; \n"

1930 
" FragColor.rg = helper2.xz + helper2.yw;\n"

1931 
" helper = texRECT(tex, TexCoord2); helper2.xy = helper.xy + helper.zw; \n"

1932 
" helper = texRECT(tex, TexCoord3); helper2.zw = helper.xy + helper.zw; \n"

1933 
" FragColor.ba= helper2.xz+helper2.yw;\n"

1934 
"}");

1935  
1936  
1937 
//read of the first part, which generates tex coordinates

1938  
1939 
s_genlist_start= program = ShaderBagCG::LoadGenListStepShader(1, 1); 
1940 
_param_ftex_width= cgGetNamedParameter(*program, "width");

1941 
_param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");

1942 
//stepping

1943 
s_genlist_step = program = ShaderBagCG::LoadGenListStepShader(0, 1); 
1944 
_param_genlist_step_tex= cgGetNamedParameter(*program, "tex");

1945 
_param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");

1946  
1947  
1948 
} 
1949  
1950  
1951  
1952 
void ShaderBagPKCG::LoadGenListShaderV2(int ndoglev, int nlev) 
1953 
{ 
1954 
ProgramCG * program; 
1955  
1956 
s_genlist_init_tight = new ProgramCG(

1957 
"void main (uniform samplerRECT tex, in float4 TexCoord0 : TEXCOORD0,\n"

1958 
"in float4 TexCoord1 : TEXCOORD1, in float4 TexCoord2 : TEXCOORD2, \n"

1959 
"in float4 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"

1960 
"{\n"

1961 
" float4 data1 = texRECT(tex, TexCoord0.xy);\n"

1962 
" float4 data2 = texRECT(tex, TexCoord1.xy);\n"

1963 
" float4 data3 = texRECT(tex, TexCoord2.xy);\n"

1964 
" float4 data4 = texRECT(tex, TexCoord3.xy);\n"

1965 
" bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"

1966 
" bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"

1967 
" bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"

1968 
" bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"

1969 
" FragColor.r = any(helper1.xy  helper1.zw); \n"

1970 
" FragColor.g = any(helper2.xy  helper2.zw); \n"

1971 
" FragColor.b = any(helper3.xy  helper3.zw); \n"

1972 
" FragColor.a = any(helper4.xy  helper4.zw); \n"

1973 
" if(dot(FragColor, float4(1,1,1,1)) == 1) \n"

1974 
" {\n"

1975 
" //use a special method if there is only one in the 16, \n"

1976 
" float4 data, helper; float2 pos, opos; \n"

1977 
" if(FragColor.r){ \n"

1978 
" data = data1; helper = helper1; pos = TexCoord0.xy;\n"

1979 
" }else if(FragColor.g){\n"

1980 
" data = data2; helper = helper2; pos = TexCoord1.xy;\n"

1981 
" }else if(FragColor.b){\n"

1982 
" data = data3; helper = helper3; pos = TexCoord2.xy;\n"

1983 
" }else{\n"

1984 
" data = data4; helper = helper4; pos = TexCoord3.xy;\n"

1985 
" }\n"

1986 
" opos.x = dot(helper, float4(0.5, 0.5, 0.5, 0.5));\n"

1987 
" opos.y = dot(helper, float4(0.5, 0.5, 0.5, 0.5));\n"

1988 
" FragColor = float4( pos + pos + opos + data.yz, 1, data.w); \n"

1989 
" }\n"

1990 
"}");

1991  
1992 
s_genlist_init_ex = program = new ProgramCG(

1993 
"void main (uniform float4 bbox, uniform samplerRECT tex, \n"

1994 
"in float4 TexCoord0 : TEXCOORD0, in float4 TexCoord1 : TEXCOORD1, \n"

1995 
"in float4 TexCoord2 : TEXCOORD2, in float4 TexCoord3 : TEXCOORD3,\n"

1996 
"out float4 FragColor : COLOR0)\n"

1997 
"{\n"

1998 
" float4 data1 = texRECT(tex, TexCoord0.xy);\n"

1999 
" float4 data2 = texRECT(tex, TexCoord1.xy);\n"

2000 
" float4 data3 = texRECT(tex, TexCoord2.xy);\n"

2001 
" float4 data4 = texRECT(tex, TexCoord3.xy);\n"

2002 
" bool4 helper1 = (abs(data1.r), float4(1.0, 2.0, 3.0, 4.0)); \n"

2003 
" bool4 helper2 = (abs(data2.r), float4(1.0, 2.0, 3.0, 4.0));\n"

2004 
" bool4 helper3 = (abs(data3.r), float4(1.0, 2.0, 3.0, 4.0));\n"

2005 
" bool4 helper4 = (abs(data4.r), float4(1.0, 2.0, 3.0, 4.0));\n"

2006 
" bool4 bx1 = TexCoord0.xxyy < bbox; \n"

2007 
" bool4 bx4 = TexCoord3.xxyy < bbox; \n"

2008 
" bool4 bx2 = bool4(bx4.xy, bx1.zw); \n"

2009 
" bool4 bx3 = bool4(bx1.xy, bx4.zw);\n"

2010 
" helper1 = bx1.xyxy && bx1.zzww && helper1; \n"

2011 
" helper2 = bx2.xyxy && bx2.zzww && helper2; \n"

2012 
" helper3 = bx3.xyxy && bx3.zzww && helper3; \n"

2013 
" helper4 = bx4.xyxy && bx4.zzww && helper4; \n"

2014 
" FragColor.r = any(helper1.xy  helper1.zw); \n"

2015 
" FragColor.g = any(helper2.xy  helper2.zw); \n"

2016 
" FragColor.b = any(helper3.xy  helper3.zw); \n"

2017 
" FragColor.a = any(helper4.xy  helper4.zw); \n"

2018 
" if(dot(FragColor, float4(1,1,1,1)) == 1) \n"

2019 
" {\n"

2020 
" //use a special method if there is only one in the 16, \n"

2021 
" float4 data, helper; bool4 bhelper; float2 pos, opos; \n"

2022 
" if(FragColor.r){ \n"

2023 
" data = data1; bhelper = helper1; pos = TexCoord0.xy;\n"

2024 
" }else if(FragColor.g){\n"

2025 
" data = data2; bhelper = helper2; pos = TexCoord1.xy;\n"

2026 
" }else if(FragColor.b){\n"

2027 
" data = data3; bhelper = helper3; pos = TexCoord2.xy;\n"

2028 
" }else{\n"

2029 
" data = data4; bhelper = helper4; pos = TexCoord3.xy;\n"

2030 
" }\n"

2031 
" helper = float4(bhelper); \n"

2032 
" opos.x = dot(helper, float4(0.5, 0.5, 0.5, 0.5));\n"

2033 
" opos.y = dot(helper, float4(0.5, 0.5, 0.5, 0.5));\n"

2034 
" FragColor = float4(pos + pos + opos + data.yz, 1, data.w); \n"

2035 
" }\n"

2036 
"}");

2037 
_param_genlist_init_bbox = cgGetNamedParameter( *program, "bbox");

2038  
2039 
s_genlist_end = program = new ProgramCG(

2040 

2041 
"void main( uniform samplerRECT tex, uniform samplerRECT ktex,\n"

2042 
" in float4 tpos : TEXCOORD0, out float4 FragColor : COLOR0)\n"

2043 
"{\n"

2044 
" float4 tc = texRECT( tex, tpos.xy);\n"

2045 
" float2 pos = tc.rg; float index = tc.b;\n"

2046 
" if(index == 1)\n"

2047 
" {\n"

2048 
" FragColor = float4(tc.xy, 0, tc.w);\n"

2049 
" }else\n"

2050 
" {\n"

2051 
" float4 tk = texRECT( ktex, pos); \n"

2052 
" float4 keys = float4(abs(tk.r) == float4(1.0, 2.0, 3.0, 4.0)); \n"

2053 
" float2 opos; \n"

2054 
" opos.x = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

2055 
" opos.y = dot(keys, float4(0.5, 0.5, 0.5, 0.5));\n"

2056 
" FragColor = float4(opos + pos + pos + tk.yz, 0, tk.w);\n"

2057 
" }\n"

2058 
"}");

2059 
_param_genlist_end_ktex = cgGetNamedParameter(*program, "ktex");

2060  
2061 
//reduction ...

2062 
s_genlist_histo = new ProgramCG(

2063 
"void main (uniform samplerRECT tex, in float2 TexCoord0 : TEXCOORD0,\n"

2064 
"in float2 TexCoord1 : TEXCOORD1, in float2 TexCoord2 : TEXCOORD2, \n"

2065 
"in float2 TexCoord3 : TEXCOORD3, out float4 FragColor : COLOR0)\n"

2066 
"{\n"

2067 
" float4 helper[4]; float4 helper2; \n"

2068 
" helper[0] = texRECT(tex, TexCoord0); helper2.xy = helper[0].xy + helper[0].zw; \n"

2069 
" helper[1] = texRECT(tex, TexCoord1); helper2.zw = helper[1].xy + helper[1].zw; \n"

2070 
" FragColor.rg = helper2.xz + helper2.yw;\n"

2071 
" helper[2] = texRECT(tex, TexCoord2); helper2.xy = helper[2].xy + helper[2].zw; \n"

2072 
" helper[3] = texRECT(tex, TexCoord3); helper2.zw = helper[3].xy + helper[3].zw; \n"

2073 
" FragColor.ba= helper2.xz+helper2.yw;\n"

2074 
" bool4 keyt = float4(helper[0].z, helper[1].z, helper[2].z, helper[3].z) == 1.0; \n"

2075 
" float keyc = dot(float4(keyt), float4(1,1,1,1)); \n"

2076 
" if(keyc == 1.0 && dot(FragColor, float4(1,1,1,1)) == 1.0) \n"

2077 
" {\n"

2078 
" if(keyt.x) FragColor = helper[0];\n"

2079 
" else if(keyt.y) FragColor = helper[1]; \n"

2080 
" else if(keyt.z) FragColor = helper[2]; \n"

2081 
" else FragColor = helper[3]; \n"

2082 
" }else\n"

2083 
" {\n"

2084 
" FragColor = keyt? float4(1,1,1,1) : FragColor;\n"

2085 
" }\n"

2086 
"}");

2087  
2088 
//read of the first part, which generates tex coordinates

2089  
2090 
s_genlist_start= program = ShaderBagCG::LoadGenListStepShaderV2(1, 1); 
2091 
_param_ftex_width= cgGetNamedParameter(*program, "width");

2092 
_param_genlist_start_tex0 = cgGetNamedParameter(*program, "tex0");

2093 
//stepping

2094 
s_genlist_step = program = ShaderBagCG::LoadGenListStepShaderV2(0, 1); 
2095 
_param_genlist_step_tex= cgGetNamedParameter(*program, "tex");

2096 
_param_genlist_step_tex0= cgGetNamedParameter(*program, "tex0");

2097  
2098  
2099 
} 
2100  
2101  
2102  
2103 
ProgramCG* ShaderBagCG::LoadGenListStepShaderV2(int start, int step) 
2104 
{ 
2105 
int i;

2106 
char buffer[10240]; 
2107 
//char chanels[5] = "rgba";

2108 
ostrstream out(buffer, 10240);

2109 
out<<"void main(out float4 FragColor : COLOR0, \n";

2110  
2111 
for(i = 0; i < step; i++) out<<"uniform samplerRECT tex"<<i<<",\n"; 
2112  
2113 
if(start)

2114 
{ 
2115 
out<<"uniform float width, \nin float2 tpos : TEXCOORD0){\n";

2116 
out<<"float index = floor(tpos.y) * width + floor(tpos.x);\n";

2117 
out<<"float2 pos = float2(0.5, 0.5);\n";

2118 
}else

2119 
{ 
2120 
out<<"uniform samplerRECT tex, in float2 tpos: TEXCOORD0 ){\n";

2121 
out<<"float4 tc = texRECT( tex, tpos);\n";

2122 
out<<"float2 pos = tc.rg; float index = tc.b;\n";

2123 
out<<"if(index==1) {FragColor = tc; return;}\n";

2124 
} 
2125 
out<<"float2 sum; float4 cc;\n";

2126  
2127  
2128  
2129 
if(step>0) 
2130 
{ 
2131 
out<<"float2 cpos = float2(0.5, 0.5);\t float2 opos;\n";

2132 
for(i = 0; i < step; i++) 
2133 
{ 
2134  
2135 
out<<"cc = texRECT(tex"<<i<<", pos);\n"; 
2136 
out<<"if(cc.z == 1){FragColor = cc; return;}";

2137 
out<<"sum.x = cc.r + cc.g;if (index < sum.x){ if(index < cc.r) opos = cpos.xx; else {opos = cpos.yx; index = cc.r;}}\n";

2138 
out<<"else {index = sum.x; if(index < cc.b) opos = cpos.xy; else{opos = cpos.yy; index = cc.b;}}";

2139 
out<<"pos = (pos + pos + opos);\n";

2140 
} 
2141 
} 
2142 
out<<"FragColor = float4(pos, index, 1);\n";

2143 
out<<"}\n"<<'\0'; 
2144 
return new ProgramCG(buffer); 
2145 
} 
2146  
2147  
2148 
void ShaderBagPKCG:: LoadKeypointShader(float threshold, float edge_threshold) 
2149 
{ 
2150 
//

2151 
ProgramCG * program; 
2152 
char buffer[10240]; 
2153 
float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); 
2154 
float threshold1 = threshold;

2155 
float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; 
2156 
ostrstream out(buffer, 10240);

2157 
out<<setprecision(8);

2158 
//tex(X)(Y)

2159 
//X: (CLR) (CENTER 0, LEFT 1, RIGHT +1)

2160 
//Y: (CDU) (CENTER 0, DOWN 1, UP +1)

2161 
out << "#define THRESHOLD0 " << threshold0 << "\n" 
2162 
"#define THRESHOLD1 " << threshold1 << "\n" 
2163 
"#define THRESHOLD2 " << threshold2 << "\n"; 
2164  
2165 
out<< 
2166 
"void main (\n"

2167 
"float4 TexCC : TEXCOORD0, float4 TexLC : TEXCOORD1,\n"

2168 
"float4 TexRC : TEXCOORD2, float4 TexCD : TEXCOORD3, \n"

2169 
"float4 TexCU : TEXCOORD4, float4 TexLD : TEXCOORD5, \n"

2170 
"float4 TexLU : TEXCOORD6, float4 TexRD : TEXCOORD7,\n"

2171 
"out float4 FragData0 : COLOR0, uniform samplerRECT tex, \n"

2172 
"uniform samplerRECT texU, uniform samplerRECT texD)\n"

2173 
"{\n"

2174 
" float2 TexRU = float2(TexRC.x, TexCU.y); \n"

2175 
" float4 ccc = texRECT(tex, TexCC.xy);\n"

2176 
" float4 clc = texRECT(tex, TexLC.xy);\n"

2177 
" float4 crc = texRECT(tex, TexRC.xy);\n"

2178 
" float4 ccd = texRECT(tex, TexCD.xy);\n"

2179 
" float4 ccu = texRECT(tex, TexCU.xy);\n"

2180 
" float4 cld = texRECT(tex, TexLD.xy);\n"

2181 
" float4 clu = texRECT(tex, TexLU.xy);\n"

2182 
" float4 crd = texRECT(tex, TexRD.xy);\n"

2183 
" float4 cru = texRECT(tex, TexRU.xy);\n"

2184 
" float4 cc = ccc;\n"

2185 
" float4 v1[4], v2[4];\n"

2186 
" v1[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2187 
" v1[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2188 
" v1[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2189 
" v1[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2190 
" v2[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"

2191 
" v2[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"

2192 
" v2[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"

2193 
" v2[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"

2194  
2195 
//test against 8 neighbours

2196 
//use variable to identify type of extremum

2197 
//1.0 for local maximum and 1.0 for minimum

2198 
<< 
2199 
" float4 key ={0, 0, 0, 0}; \n"

2200 
" for(int i = 0; i < 4; i++)\n"

2201 
" {\n"

2202 
" bool4 test1 = cc[i] > max(v1[i], v2[i]), test2 = cc[i] < min(v1[i], v2[i]);\n"

2203 
" key[i] = cc[i] > THRESHOLD0 && all(test1.xy&&test1.zw)?1.0: 0.0;\n"

2204 
" key[i] = cc[i] < THRESHOLD0 && all(test2.xy&&test2.zw)? 1.0: key[i];\n"

2205 
" }\n"

2206 
" if(TexCC.x < 1.0) {key.rb = 0;}\n"

2207 
" if(TexCC.y < 1.0) {key.rg = 0;}\n"

2208 
" FragData0 = float4(0.0);\n"

2209 
" if(all(key == 0.0)) return; \n";

2210  
2211 
//do edge supression first..

2212 
//vector v1 is < (1, 0), (1, 0), (0,1), (0, 1)>

2213 
//vector v2 is < (1,1), (1,1), (1,1), (1, 1)>

2214  
2215 
out<< 
2216 
" float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"

2217 
" for(int i = 0; i < 4; i++) \n"

2218 
" {\n"

2219 
" if(key[i] != 0)\n"

2220 
" {\n"

2221 
" float4 D2 = v1[i].xyzw  cc[i];\n"

2222 
" float2 D4 = v2[i].xw  v2[i].yz;\n"

2223 
" float2 D5 = 0.5*(v1[i].ywv1[i].xz); \n"

2224 
" fx[i] = D5.x;\n"

2225 
" fy[i] = D5.y ;\n"

2226 
" fxx[i] = D2.x + D2.y;\n"

2227 
" fyy[i] = D2.z + D2.w;\n"

2228 
" fxy[i] = 0.25*(D4.x + D4.y);\n"

2229 
" float fxx_plus_fyy = fxx[i] + fyy[i];\n"

2230 
" float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"

2231 
" float score_down = (fxx[i]*fyy[i]  fxy[i]*fxy[i]);\n"

2232 
" if( score_down <= 0  score_up > THRESHOLD2 * score_down)key[i] = 0;\n"

2233 
" }\n"

2234 
" }\n"

2235 
" if(all(key == 0.0)) return; \n\n";

2236  
2237 
////////////////////////////////////////////////

2238 
//read 9 pixels of upper/lower level

2239 
out<< 
2240 
" float4 v4[4], v5[4], v6[4];\n"

2241 
" ccc = texRECT(texU, TexCC.xy);\n"

2242 
" clc = texRECT(texU, TexLC.xy);\n"

2243 
" crc = texRECT(texU, TexRC.xy);\n"

2244 
" ccd = texRECT(texU, TexCD.xy);\n"

2245 
" ccu = texRECT(texU, TexCU.xy);\n"

2246 
" cld = texRECT(texU, TexLD.xy);\n"

2247 
" clu = texRECT(texU, TexLU.xy);\n"

2248 
" crd = texRECT(texU, TexRD.xy);\n"

2249 
" cru = texRECT(texU, TexRU.xy);\n"

2250 
" float4 cu = ccc;\n"

2251 
" v4[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2252 
" v4[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2253 
" v4[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2254 
" v4[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2255 
" v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"

2256 
" v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"

2257 
" v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"

2258 
" v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"

2259 
<< 
2260 
" for(int i = 0; i < 4; i++)\n"

2261 
" {\n"

2262 
" if(key[i] == 1.0)\n"

2263 
" {\n"

2264 
" bool4 test = cc[i]< max(v4[i], v6[i]); \n"

2265 
" if(cc[i] < cu[i]  any(test.xytest.zw))key[i] = 0.0; \n"

2266 
" }else if(key[i] == 1.0)\n"

2267 
" {\n"

2268 
" bool4 test = cc[i]> min( v4[i], v6[i]); \n"

2269 
" if(cc[i] > cu[i]  any(test.xytest.zw))key[i] = 0.0; \n"

2270 
" }\n"

2271 
" }\n"

2272 
" if(all(key == 0.0)) return; \n"

2273 
<< 
2274 
" ccc = texRECT(texD, TexCC.xy);\n"

2275 
" clc = texRECT(texD, TexLC.xy);\n"

2276 
" crc = texRECT(texD, TexRC.xy);\n"

2277 
" ccd = texRECT(texD, TexCD.xy);\n"

2278 
" ccu = texRECT(texD, TexCU.xy);\n"

2279 
" cld = texRECT(texD, TexLD.xy);\n"

2280 
" clu = texRECT(texD, TexLU.xy);\n"

2281 
" crd = texRECT(texD, TexRD.xy);\n"

2282 
" cru = texRECT(texD, TexRU.xy);\n"

2283 
" float4 cd = ccc;\n"

2284 
" v5[0] = float4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2285 
" v5[1] = float4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2286 
" v5[2] = float4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2287 
" v5[3] = float4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2288 
" v6[0] = float4(cld.a, clc.a, ccd.a, ccc.a);\n"

2289 
" v6[1] = float4(ccd.b, ccc.b, crd.b, crc.b);\n"

2290 
" v6[2] = float4(clc.g, clu.g, ccc.g, ccu.g);\n"

2291 
" v6[3] = float4(ccc.r, ccu.r, crc.r, cru.r);\n"

2292 
<< 
2293 
" for(int i = 0; i < 4; i++)\n"

2294 
" {\n"

2295 
" if(key[i] == 1.0)\n"

2296 
" {\n"

2297 
" bool4 test = cc[i]< max(v5[i], v6[i]);\n"

2298 
" if(cc[i] < cd[i]  any(test.xytest.zw))key[i] = 0.0; \n"

2299 
" }else if(key[i] == 1.0)\n"

2300 
" {\n"

2301 
" bool4 test = cc[i]>min(v5[i],v6[i]);\n"

2302 
" if(cc[i] > cd[i]  any(test.xytest.zw))key[i] = 0.0; \n"

2303 
" }\n"

2304 
" }\n"

2305 
" float keysum = dot(abs(key), float4(1, 1, 1, 1)) ;\n"

2306 
" //assume there is only one keypoint in the four. \n"

2307 
" if(keysum != 1.0) return; \n";

2308  
2309 
//////////////////////////////////////////////////////////////////////

2310 
if(GlobalUtil::_SubpixelLocalization)

2311  
2312 
out << 
2313 
" float3 offset = float3(0, 0, 0); \n"

2314 
" /*The unrolled follwing loop is faster than a dynamic indexing version.*/\n"

2315 
" for(int idx = 1; idx < 4; idx++)\n"

2316 
" {\n"

2317 
" if(key[idx] != 0) \n"

2318 
" {\n"

2319 
" cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \n"

2320 
" v4[0] = v4[idx]; v5[0] = v5[idx]; \n"

2321 
" fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \n"

2322 
" fx[0] = fx[idx]; fy[0] = fy[idx]; \n"

2323 
" }\n"

2324 
" }\n"

2325 
<< 
2326 

2327 
" float fs = 0.5*( cu[0]  cd[0] ); \n"

2328 
" float fss = cu[0] + cd[0]  cc[0]  cc[0];\n"

2329 
" float fxs = 0.25 * (v4[0].y + v5[0].x  v4[0].x  v5[0].y);\n"

2330 
" float fys = 0.25 * (v4[0].w + v5[0].z  v4[0].z  v5[0].w);\n"

2331 
" float4 A0, A1, A2 ; \n"

2332 
" A0 = float4(fxx[0], fxy[0], fxs, fx[0]); \n"

2333 
" A1 = float4(fxy[0], fyy[0], fys, fy[0]); \n"

2334 
" A2 = float4(fxs, fys, fss, fs); \n"

2335 
" float3 x3 = abs(float3(fxx[0], fxy[0], fxs)); \n"

2336 
" float maxa = max(max(x3.x, x3.y), x3.z); \n"

2337 
" if(maxa >= 1e10 ) \n"

2338 
" { \n"

2339 
" if(x3.y ==maxa ) \n"

2340 
" { \n"

2341 
" float4 TEMP = A1; A1 = A0; A0 = TEMP; \n"

2342 
" }else if( x3.z == maxa ) \n"

2343 
" { \n"

2344 
" float4 TEMP = A2; A2 = A0; A0 = TEMP; \n"

2345 
" } \n"

2346 
" A0 /= A0.x; \n"

2347 
" A1 = A1.x * A0; \n"

2348 
" A2 = A2.x * A0; \n"

2349 
" float2 x2 = abs(float2(A1.y, A2.y)); \n"

2350 
" if( x2.y > x2.x ) \n"

2351 
" { \n"

2352 
" float3 TEMP = A2.yzw; \n"

2353 
" A2.yzw = A1.yzw; \n"

2354 
" A1.yzw = TEMP; \n"

2355 
" x2.x = x2.y; \n"

2356 
" } \n"

2357 
" if(x2.x >= 1e10) { \n"

2358 
" A1.yzw /= A1.y; \n"

2359 
" A2.yzw = A2.y * A1.yzw; \n"

2360 
" if(abs(A2.z) >= 1e10) {\n"

2361 
" offset.z = A2.w /A2.z; \n"

2362 
" offset.y = A1.w  offset.z*A1.z; \n"

2363 
" offset.x = A0.w  offset.z*A0.z  offset.y*A0.y; \n"

2364 
" bool test = (abs(cc[0] + 0.5*dot(float3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"

2365 
" if(!test  any( abs(offset) >= 1.0)) return;\n"

2366 
" }\n"

2367 
" }\n"

2368 
" }\n"

2369 
<<"\n"

2370 
" float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"

2371 
" FragData0 = float4(keyv, offset);\n"

2372 
"}\n" <<'\0'; 
2373  
2374 
else out << "\n" 
2375 
" float keyv = dot(key, float4(1.0, 2.0, 3.0, 4.0));\n"

2376 
" FragData0 = float4(keyv, 0, 0, 0);\n"

2377 
"}\n" <<'\0'; 
2378  
2379 
s_keypoint = program = new ProgramCG(buffer);

2380 
//parameter

2381 
_param_dog_texu = cgGetNamedParameter(*program, "texU");

2382 
_param_dog_texd = cgGetNamedParameter(*program, "texD");

2383 
} 
2384  
2385 
void ShaderBagPKCG::LoadOrientationShader()

2386 
{ 
2387 
char buffer[10240]; 
2388 
ostrstream out(buffer,10240);

2389  
2390 
out<<"\n"

2391 
"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" 
2392 
"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" 
2393 
"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" 
2394 
"void main(uniform samplerRECT tex, uniform samplerRECT gtex, \n"

2395 
" uniform samplerRECT otex, uniform float4 size, in float2 TexCoord0 : TEXCOORD0, \n"

2396 
" out float4 FeatureData : COLOR0 ";

2397  
2398 
//multi orientation output

2399 
//use one additional texture to store up to four orientations

2400 
//when we use one 32bit float to store two orientations, no extra texture is required

2401  
2402 
if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2 == 0) 
2403 
out<<", out float4 OrientationData : COLOR1";

2404  
2405  
2406 
//use 9 float4 to store histogram of 36 directions

2407 
out<<") \n"

2408 
"{ \n"

2409 
" float4 bins[10]; \n"

2410 
" for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0); \n"

2411 
" float4 sift = texRECT(tex, TexCoord0); \n"

2412 
" float2 pos = sift.xy; \n"

2413 
" bool orientation_mode = (size.z != 0); \n"

2414 
" float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"

2415 
" //bool fixed_orientation = (size.z < 0); \n"

2416 
" if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"

2417 
" float gsigma = sigma * GAUSSIAN_WF; \n"

2418 
" float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"

2419 
" float2 dim = size.xy; \n"

2420 
" float4 dist_threshold = float4(win.x*win.x+0.5); \n"

2421 
" float factor = 0.5/(gsigma*gsigma); \n"

2422 
" float4 sz; float2 spos; \n"

2423 
" //if(any(pos.xy <= 1)) discard; \n"

2424 
" sz.xy = max( pos  win, float2(2,2)); \n"

2425 
" sz.zw = min( pos + win, dim3); \n"

2426 
" sz = floor(sz*0.5) + 0.5; ";

2427 
//loop to get the histogram

2428  
2429 
out<<"\n"

2430 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

2431 
" { \n"

2432 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

2433 
" { \n"

2434 
" float2 offset = 2* spos  pos  0.5; \n"

2435 
" float4 off = float4(offset, offset + 1); \n"

2436 
" float4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"

2437 
" bool4 inside = distsq < dist_threshold; \n"

2438 
" if(any(inside.xyinside.zw)) \n"

2439 
" { \n"

2440 
" float4 gg = texRECT(gtex, spos); \n"

2441 
" float4 oo = texRECT(otex, spos); \n"

2442 
" float4 weight = gg * exp(distsq * factor); \n"

2443 
" float4 idxv = floor(degrees(oo)*0.1); \n"

2444 
" idxv = idxv<0? idxv + 36.0: idxv; \n"

2445 
" float4 vidx = 4.0* fract(idxv * 0.25);//fmod(idxv, 4.0);\n";

2446  
2447 
//

2448 
if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0) 
2449 
//if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5

2450 
{ 
2451 
//gp4fp supports dynamic indexing, but it might be slow on some GPUs

2452 
out<<"\n"

2453 
" for(int i = 0 ; i < 4; i++)\n"

2454 
" {\n"

2455 
" if(inside[i])\n"

2456 
" {\n"

2457 
" float idx = idxv[i]; \n"

2458 
" float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3)); \n"

2459 
" int iidx = int(floor(idx*0.25)); \n"

2460 
" bins[iidx]+=inc; \n"

2461 
" } \n"

2462 
" } \n"

2463 
" } \n"

2464 
" } \n"

2465 
" }";

2466  
2467 
}else

2468 
{ 
2469 
//nvfp40 still does not support dynamic array indexing

2470 
//unrolled binary search

2471 
//it seems to be faster than the dyanmic indexing version on some GPUs

2472 
out<<"\n"

2473 
" for(int i = 0 ; i < 4; i++)\n"

2474 
" {\n"

2475 
" if(inside[i])\n"

2476 
" {\n"

2477 
" float idx = idxv[i]; \n"

2478 
" float4 inc = weight[i] * float4(vidx[i] == float4(0,1,2,3)); \n"

2479 
" if(idx < 16) \n"

2480 
" { \n"

2481 
" if(idx < 8) \n"

2482 
" { \n"

2483 
" if(idx < 4) { bins[0]+=inc;} \n"

2484 
" else { bins[1]+=inc;} \n"

2485 
" }else \n"

2486 
" { \n"

2487 
" if(idx < 12){ bins[2]+=inc;} \n"

2488 
" else { bins[3]+=inc;} \n"

2489 
" } \n"

2490 
" }else if(idx < 32) \n"

2491 
" { \n"

2492 
" if(idx < 24) \n"

2493 
" { \n"

2494 
" if(idx <20) { bins[4]+=inc;} \n"

2495 
" else { bins[5]+=inc;} \n"

2496 
" }else \n"

2497 
" { \n"

2498 
" if(idx < 28){ bins[6]+=inc;} \n"

2499 
" else { bins[7]+=inc;} \n"

2500 
" } \n"

2501 
" }else \n"

2502 
" { \n"

2503 
" bins[8]+=inc; \n"

2504 
" } \n"

2505 
" } \n"

2506 
" } \n"

2507 
" } \n"

2508 
" } \n"

2509 
" }";

2510  
2511 
} 
2512  
2513 
//reuse the code from the unpacked version..

2514 
ShaderBagCG::WriteOrientationCodeToStream(out); 
2515  
2516  
2517 
ProgramCG * program; 
2518 
s_orientation = program = new ProgramCG(buffer);

2519 
_param_orientation_gtex = cgGetNamedParameter(*program, "gtex");

2520 
_param_orientation_otex = cgGetNamedParameter(*program, "otex");

2521 
_param_orientation_size = cgGetNamedParameter(*program, "size");

2522  
2523  
2524 
} 
2525  
2526 
void ShaderBagPKCG::LoadDescriptorShader()

2527 
{ 
2528 
GlobalUtil::_DescriptorPPT = 16;

2529 
LoadDescriptorShaderF2(); 
2530  
2531 
} 
2532  
2533 
void ShaderBagPKCG::LoadDescriptorShaderF2()

2534 
{ 
2535 
//one shader outpout 128/8 = 16 , each fragout encodes 4

2536 
//const double twopi = 2.0*3.14159265358979323846;

2537 
//const double rpi = 8.0/twopi;

2538 
char buffer[10240]; 
2539 
ostrstream out(buffer, 10240);

2540  
2541 
out<<setprecision(8);

2542  
2543 
out<<"\n"

2544 
"#define M_PI 3.14159265358979323846\n"

2545 
"#define TWO_PI (2.0*M_PI)\n"

2546 
"#define RPI 1.2732395447351626861510701069801\n"

2547 
"#define WF size.z\n"

2548 
"void main(uniform samplerRECT tex, \n"

2549 
"uniform samplerRECT gtex, \n"

2550 
"uniform samplerRECT otex, \n"

2551 
"uniform float4 dsize, \n"

2552 
"uniform float3 size, \n"

2553 
"in float2 TexCoord0 : TEXCOORD0, \n"

2554 
"out float4 FragData0:COLOR0, \n"

2555 
"out float4 FragData1:COLOR1) \n"

2556 
"{\n"

2557 
" float2 dim = size.xy; //image size \n"

2558 
" float index = dsize.x*floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"

2559 
" float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5)); \n"

2560 
" index = floor(index*0.125)+ 0.49; \n"

2561 
" float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"

2562 
" float2 pos = texRECT(tex, coord).xy; \n"

2563 
" if(any(pos.xy <= 1)  any(pos.xy >=dim1)) "

2564 
" //discard; \n"

2565 
" { FragData0 = FragData1 = float4(0.0); return; }\n"

2566 
" float anglef = texRECT(tex, coord).z;\n"

2567 
" if(anglef > M_PI) anglef = TWO_PI;\n"

2568 
" float sigma = texRECT(tex, coord).w; \n"

2569 
" float spt = abs(sigma * WF); //default to be 3*sigma \n";

2570 
//rotation

2571 
out<< 
2572 
" float4 cscs, rots; \n"

2573 
" sincos(anglef, cscs.y, cscs.x); \n"

2574 
" cscs.zw =  cscs.xy; \n"

2575 
" rots = cscs /spt; \n"

2576 
" cscs *= spt; \n";

2577  
2578 
//here cscs is actually (cos, sin, cos, sin) * (factor: 3)*sigma

2579 
//and rots is (cos, sin, cos, sin ) /(factor*sigma)

2580 
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread

2581 
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

2582 
out<< 
2583 
" float4 temp; float2 pt, offsetpt; \n"

2584 
" /*the fraction part of idx is .5*/ \n"

2585 
" offsetpt.x = 4.0 * fract(idx * 0.25)  2.0; \n"

2586 
" offsetpt.y = floor(idx*0.25)  1.5; \n"

2587 
" temp = cscs.xwyx*offsetpt.xyxy; \n"

2588 
" pt = pos + temp.xz + temp.yw; \n";

2589 

2590 
//get a horizontal bounding box of the rotated rectangle

2591 
out<< 
2592 
" float2 bwin = abs(cscs.xy); \n"

2593 
" float bsz = bwin.x + bwin.y; \n"

2594 
" float4 sz; float2 spos; \n"

2595 
" sz.xy = max(pt  bsz, float2(2,2));\n"

2596 
" sz.zw = min(pt + bsz, dim  3); \n"

2597 
" sz = floor(sz * 0.5) + 0.5;"; //move sample point to pixel center 
2598 
//get voting for two box

2599  
2600 
out<<"\n"

2601 
" float4 DA, DB; \n"

2602 
" DA = DB = float4(0, 0, 0, 0); \n"

2603 
" float4 nox = float4(0, rots.xy, rots.x + rots.y); \n"

2604 
" float4 noy = float4(0, rots.wx, rots.w + rots.x); \n"

2605 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

2606 
" { \n"

2607 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

2608 
" { \n"

2609 
" float2 tpt = spos * 2.0  pt  0.5; \n"

2610 
" float4 temp = rots.xywx * tpt.xyxy; \n"

2611 
" float2 temp2 = temp.xz + temp.yw; \n"

2612 
" float4 nx = temp2.x + nox; \n"

2613 
" float4 ny = temp2.y + noy; \n"

2614 
" float4 nxn = abs(nx), nyn = abs(ny); \n"

2615 
" bool4 inside = (max(nxn, nyn) < 1.0); \n"

2616 
" if(any(inside.xy  inside.zw))\n"

2617 
" {\n"

2618 
" float4 gg = texRECT(gtex, spos);\n"

2619 
" float4 oo = texRECT(otex, spos);\n"

2620 
" float4 theta0 = (anglef  oo)*RPI;\n"

2621 
" float4 theta = theta0 < 0? theta0 + 8.0 : theta0;//8.0 * frac(1.0 + 0.125 * theta0);// \n"

2622 
" float4 theta1 = floor(theta); \n"

2623 
" float4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"

2624 
" float4 ww = exp(0.125 * (diffx * diffx + diffy * diffy )); \n"

2625 
" float4 weight = (1  nxn) * (1  nyn) * gg * ww; \n"

2626 
" float4 weight2 = (theta  theta1) * weight; \n"

2627 
" float4 weight1 = weight  weight2; \n"

2628 
" for(int i = 0;i < 4; i++)\n"

2629 
" {\n"

2630 
" if(inside[i])\n"

2631 
" {\n"

2632 
" DA += float4(theta1[i] == float4(0, 1, 2, 3))*weight1[i]; \n"

2633 
" DA += float4(theta1[i] == float4(7, 0, 1, 2))*weight2[i]; \n"

2634 
" DB += float4(theta1[i] == float4(4, 5, 6, 7))*weight1[i]; \n"

2635 
" DB += float4(theta1[i] == float4(3, 4, 5, 6))*weight2[i]; \n"

2636 
" }\n"

2637 
" }\n"

2638 
" }\n"

2639 
" }\n"

2640 
" }\n";

2641 
out<< 
2642 
" FragData0 = DA; FragData1 = DB;\n"

2643 
"}\n"<<'\0'; 
2644 
ProgramCG * program; 
2645  
2646 
s_descriptor_fp = program = new ProgramCG(buffer);

2647 
_param_descriptor_gtex = cgGetNamedParameter(*program, "gtex");

2648 
_param_descriptor_otex = cgGetNamedParameter(*program, "otex");

2649 
_param_descriptor_size = cgGetNamedParameter(*program, "size");

2650 
_param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");

2651  
2652 
} 
2653  
2654 
void ShaderBagPKCG::SetMarginCopyParam(int xmax, int ymax) 
2655 
{ 
2656 
float truncate[4]; 
2657 
truncate[0] = (xmax  0.5f) * 0.5f; //((xmax + 1) >> 1)  0.5f; 
2658 
truncate[1] = (ymax  0.5f) * 0.5f; //((ymax + 1) >> 1)  0.5f; 
2659 
truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f; 
2660 
truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f); 
2661 
cgGLSetParameter4fv(_param_margin_copy_truncate, truncate); 
2662 
} 
2663  
2664 
void ShaderBagPKCG::SetGradPassParam(int texP) 
2665 
{ 
2666 
cgGLSetTextureParameter(_param_grad_pass_texp, texP); 
2667 
cgGLEnableTextureParameter(_param_grad_pass_texp); 
2668 
} 
2669  
2670 
void ShaderBagPKCG::SetGenListEndParam(int ktex) 
2671 
{ 
2672 
cgGLSetTextureParameter(_param_genlist_end_ktex, ktex); 
2673 
cgGLEnableTextureParameter(_param_genlist_end_ktex); 
2674 
} 
2675  
2676 
void ShaderBagPKCG::SetDogTexParam(int texU, int texD) 
2677 
{ 
2678 
cgGLSetTextureParameter(_param_dog_texu, texU); 
2679 
cgGLEnableTextureParameter(_param_dog_texu); 
2680 
cgGLSetTextureParameter(_param_dog_texd, texD); 
2681 
cgGLEnableTextureParameter(_param_dog_texd); 
2682 
} 
2683  
2684 
void ShaderBagPKCG::SetGenListInitParam(int w, int h) 
2685 
{ 
2686 
float bbox[4] = {(w 1.0f) * 0.5f +0.25f, (w1.0f) * 0.5f  0.25f, (h  1.0f) * 0.5f + 0.25f, (h1.0f) * 0.5f  0.25f}; 
2687 
cgGLSetParameter4fv(_param_genlist_init_bbox, bbox); 
2688 
} 
2689  
2690  
2691 
void ShaderBagPKCG::SetGenListStartParam(float width, int tex0) 
2692 
{ 
2693 
cgGLSetParameter1f(_param_ftex_width, width); 
2694  
2695 
if(_param_genlist_start_tex0)

2696 
{ 
2697 
cgGLSetTextureParameter(_param_genlist_start_tex0, tex0); 
2698 
cgGLEnableTextureParameter(_param_genlist_start_tex0); 
2699 
} 
2700 
} 
2701  
2702  
2703  
2704 
void ShaderBagPKCG::SetGenListStepParam(int tex, int tex0) 
2705 
{ 
2706 
cgGLSetTextureParameter(_param_genlist_step_tex, tex); 
2707 
cgGLEnableTextureParameter(_param_genlist_step_tex); 
2708 
cgGLSetTextureParameter(_param_genlist_step_tex0, tex0); 
2709 
cgGLEnableTextureParameter(_param_genlist_step_tex0); 
2710 
} 
2711  
2712 
void ShaderBagPKCG::SetGenVBOParam(float width, float fwidth, float size) 
2713 
{ 
2714 
float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; 
2715 
cgGLSetParameter4fv(_param_genvbo_size, sizes); 
2716 
} 
2717  
2718 
void ShaderBagPKCG::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) 
2719 
{ 
2720 
cgGLSetTextureParameter(_param_orientation_gtex, oTex); 
2721 
cgGLEnableTextureParameter(_param_orientation_gtex); 
2722 
cgGLSetParameter2f(_param_orientation_size, sigma, sigma_step); 
2723 
} 
2724  
2725  
2726 
void ShaderBagPKCG::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step) 
2727 
{ 
2728 
///

2729 
cgGLSetTextureParameter(_param_orientation_gtex, gtex); 
2730 
cgGLEnableTextureParameter(_param_orientation_gtex); 
2731 
cgGLSetTextureParameter(_param_orientation_otex, otex); 
2732 
cgGLEnableTextureParameter(_param_orientation_otex); 
2733  
2734 
float size[4]; 
2735 
size[0] = (float)width; 
2736 
size[1] = (float)height; 
2737 
size[2] = sigma;

2738 
size[3] = step;

2739 
cgGLSetParameter4fv(_param_orientation_size, size); 
2740  
2741 
} 
2742  
2743 
void ShaderBagPKCG::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) 
2744 
{ 
2745 
///

2746  
2747 
cgGLSetTextureParameter(_param_descriptor_gtex, gtex); 
2748 
cgGLEnableTextureParameter(_param_descriptor_gtex); 
2749 
cgGLSetTextureParameter(_param_descriptor_otex, otex); 
2750 
cgGLEnableTextureParameter(_param_descriptor_otex); 
2751  
2752  
2753 
float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; 
2754 
cgGLSetParameter4fv(_param_descriptor_dsize, dsize); 
2755 
float size[3]; 
2756 
size[0] = width;

2757 
size[1] = height;

2758 
size[2] = GlobalUtil::_DescriptorWindowFactor;

2759 
cgGLSetParameter3fv(_param_descriptor_size, size); 
2760  
2761  
2762 
} 
2763  
2764 
#endif

2765 