root / rgbdslam / external / siftgpu / src / SiftGPU / ProgramGLSL.cpp @ 9240aaa3
History  View  Annotate  Download (81.7 KB)
1 
////////////////////////////////////////////////////////////////////////////


2 
// File: ProgramGLSL.cpp

3 
// Author: Changchang Wu

4 
// Description : GLSL related classes

5 
// class ProgramGLSL A simple wrapper of GLSL programs

6 
// class ShaderBagGLSL GLSL shaders for SIFT

7 
// class FilterGLSL GLSL gaussian filters for SIFT

8 
//

9 
// Copyright (c) 2007 University of North Carolina at Chapel Hill

10 
// All Rights Reserved

11 
//

12 
// Permission to use, copy, modify and distribute this software and its

13 
// documentation for educational, research and nonprofit purposes, without

14 
// fee, and without a written agreement is hereby granted, provided that the

15 
// above copyright notice and the following paragraph appear in all copies.

16 
//

17 
// The University of North Carolina at Chapel Hill make no representations

18 
// about the suitability of this software for any purpose. It is provided

19 
// 'as is' without express or implied warranty.

20 
//

21 
// Please send BUG REPORTS to ccwu@cs.unc.edu

22 
//

23 
////////////////////////////////////////////////////////////////////////////

24  
25 

26 
#include "GL/glew.h" 
27 
#include <string.h> 
28 
#include <stdio.h> 
29 
#include <iomanip> 
30 
#include <iostream> 
31 
#include <strstream> 
32 
#include <vector> 
33 
#include <algorithm> 
34 
#include <math.h> 
35 
using namespace std; 
36  
37 
#include "GlobalUtil.h" 
38 
#include "ProgramGLSL.h" 
39 
#include "GLTexImage.h" 
40 
#include "ShaderMan.h" 
41 
#include "SiftGPU.h" 
42  
43 
ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource) 
44 
{ 
45  
46  
47 
_type = shadertype; 
48 
_compiled = 0;

49  
50  
51 
_shaderID = glCreateShader(shadertype); 
52 
if(_shaderID == 0) return; 
53 

54 
if(source)

55 
{ 
56  
57 
GLint code_length; 
58 
if(filesource ==0) 
59 
{ 
60 
const char* code = source; 
61 
code_length = strlen(code); 
62 
glShaderSource(_shaderID, 1, (const char **) &code, &code_length); 
63 
}else

64 
{ 
65 
char * code;

66 
if((code_length= ReadShaderFile(source, code)) ==0) return; 
67 
glShaderSource(_shaderID, 1, (const char **) &code, &code_length); 
68 
delete code;

69 
} 
70  
71 
glCompileShader(_shaderID); 
72  
73 
CheckCompileLog(); 
74  
75 
if(!_compiled) std::cout << source;

76 
} 
77  
78  
79  
80  
81 
} 
82  
83 
int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile, char*& code ) 
84 
{ 
85 
code = NULL;

86 
FILE * file; 
87 
int len=0; 
88  
89 
if(sourcefile == NULL) return 0; 
90  
91 
file = fopen(sourcefile,"rt");

92 
if(file == NULL) return 0; 
93  
94 

95 
fseek(file, 0, SEEK_END);

96 
len = ftell(file); 
97 
rewind(file); 
98 
if(len >1) 
99 
{ 
100 
code = new char[len+1]; 
101 
fread(code, sizeof( char), len, file); 
102 
code[len] = 0;

103 
}else

104 
{ 
105 
len = 0;

106 
} 
107  
108 
fclose(file); 
109  
110 
return len;

111 

112 
} 
113  
114 
void ProgramGLSL::ShaderObject::CheckCompileLog()

115 
{ 
116  
117 
GLint status; 
118 
glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status); 
119 
_compiled = (status ==GL_TRUE); 
120  
121 
if(_compiled == 0) PrintCompileLog(std::cout); 
122  
123  
124 
} 
125  
126 
ProgramGLSL::ShaderObject::~ShaderObject() 
127 
{ 
128 
if(_shaderID) glDeleteShader(_shaderID);

129  
130 
} 
131  
132 
int ProgramGLSL::ShaderObject::IsValidFragmentShader()

133 
{ 
134 
return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;

135 
} 
136  
137 
int ProgramGLSL::ShaderObject::IsValidVertexShader()

138 
{ 
139 
return _type == GL_VERTEX_SHADER && _shaderID && _compiled;

140 
} 
141  
142  
143 
void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)

144 
{ 
145 
GLint len = 0;

146  
147 
glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len); 
148 
if(len <=1) return; 
149 

150 
char * compileLog = new char[len+1]; 
151 
if(compileLog == NULL) return; 
152  
153 
glGetShaderInfoLog(_shaderID, len, &len, compileLog); 
154 

155  
156 
os<<"Compile Log\n"<<compileLog<<"\n"; 
157  
158 
delete compileLog;

159 
} 
160  
161  
162 
ProgramGLSL::ProgramGLSL() 
163 
{ 
164 
_linked = 0;

165 
_programID = glCreateProgram(); 
166 
} 
167 
ProgramGLSL::~ProgramGLSL() 
168 
{ 
169 
if(_programID)glDeleteProgram(_programID);

170 
} 
171 
void ProgramGLSL::AttachShaderObject(ShaderObject &shader)

172 
{ 
173 
if(_programID && shader.IsValidShaderObject())

174 
glAttachShader(_programID, shader.GetShaderID()); 
175 
} 
176 
void ProgramGLSL::DetachShaderObject(ShaderObject &shader)

177 
{ 
178 
if(_programID && shader.IsValidShaderObject())

179 
glDetachShader(_programID, shader.GetShaderID()); 
180 
} 
181 
int ProgramGLSL::LinkProgram()

182 
{ 
183 
_linked = 0;

184  
185 
if(_programID==0) return 0; 
186  
187 
glLinkProgram(_programID); 
188  
189 
CheckLinkLog(); 
190  
191 
// GlobalUtil::StartTimer("100 link test");

192 
// for(int i = 0; i<100; i++) glLinkProgram(_programID);

193 
// GlobalUtil::StopTimer();

194  
195 
return _linked;

196 
} 
197  
198 
void ProgramGLSL::CheckLinkLog()

199 
{ 
200 
GLint status; 
201 
glGetProgramiv(_programID, GL_LINK_STATUS, &status); 
202  
203 
_linked = (status == GL_TRUE); 
204  
205 
} 
206  
207  
208 
int ProgramGLSL::ValidateProgram()

209 
{ 
210 
if(_programID && _linked)

211 
{ 
212 
/// GLint status;

213 
// glValidateProgram(_programID);

214 
// glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);

215 
// return status == GL_TRUE;

216 
return 1; 
217 
} 
218 
else

219 
return 0; 
220 
} 
221  
222 
void ProgramGLSL::PrintLinkLog(std::ostream &os)

223 
{ 
224 
GLint len = 0;

225  
226 
glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len); 
227 
if(len <=1) return; 
228 

229 
char* linkLog = new char[len+1]; 
230 
if(linkLog == NULL) return; 
231  
232 
glGetProgramInfoLog(_programID, len, &len, linkLog); 
233 

234 
linkLog[len] = 0;

235  
236 
if(strstr(linkLog, "failed")) 
237 
{ 
238 
os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n"; 
239 
_linked = 0;

240 
} 
241  
242 
delete linkLog;

243 
} 
244  
245 
int ProgramGLSL::UseProgram()

246 
{ 
247 
if(ValidateProgram())

248 
{ 
249 
glUseProgram(_programID); 
250 
return true; 
251 
} 
252 
else

253 
{ 
254 
return false; 
255 
} 
256 
} 
257  
258  
259 
ProgramGLSL::ProgramGLSL(const char *frag_source) 
260 
{ 
261 
_linked = 0;

262 
_programID = glCreateProgram(); 
263 
ShaderObject shader(GL_FRAGMENT_SHADER, frag_source); 
264  
265 
if(shader.IsValidFragmentShader())

266 
{ 
267 
AttachShaderObject(shader); 
268 
LinkProgram(); 
269  
270 
if(!_linked)

271 
{ 
272 
//shader.PrintCompileLog(std::cout);

273 
PrintLinkLog(std::cout); 
274 
} 
275 
}else

276 
{ 
277 
_linked = 0;

278 
} 
279 

280 
} 
281  
282 
/*

283 
ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)

284 
{

285 
_used = 0;

286 
_linked = 0;

287 
_programID = glCreateProgram();

288 
ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);

289 
ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);

290 
AttachShaderObject(shader);

291 
AttachShaderObject(vertex_shader);

292 
LinkProgram();

293 
if(!_linked)

294 
{

295 
shader.PrintCompileLog(std::cout);

296 
vertex_shader.PrintCompileLog(std::cout);

297 
PrintLinkLog(std::cout);

298 
std::cout<<vert_source;

299 
std::cout<<frag_source;

300 
}

301 

302 
}

303 
*/

304  
305  
306  
307 
void ProgramGLSL::ReLink()

308 
{ 
309 
glLinkProgram(_programID); 
310 
} 
311  
312 
int ProgramGLSL::IsNative()

313 
{ 
314 
return _linked;

315 
} 
316  
317 
FilterGLSL::FilterGLSL(float sigma)

318 
{ 
319 
//pixel inside 3*sigma box

320 
int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma 0.5) ) ;// 
321 
int width = 2*sz + 1; 
322  
323 
//filter size truncation

324 
if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth) 
325 
{ 
326 
std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl; 
327 
sz = GlobalUtil::_MaxFilterWidth>>1;

328 
width = 2 * sz + 1; 
329 
} 
330  
331 
int i;

332 
float * kernel = new float[width]; 
333 
float rv = 1.0f/(sigma*sigma); 
334 
float v, ksum =0; 
335  
336 
// precompute filter

337 
for( i = sz ; i <= sz ; ++i)

338 
{ 
339 
kernel[i+sz] = v = exp(0.5f * i * i *rv) ; 
340 
ksum += v; 
341 
} 
342  
343 
//normalize the kernel

344 
rv = 1.0f / ksum; 
345 
for(i = 0; i< width ;i++) kernel[i]*=rv; 
346 
//

347 

348 
MakeFilterProgram(kernel, width); 
349  
350 
_size = sz; 
351  
352 
delete[] kernel;

353 
if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl; 
354 
} 
355  
356  
357 
void FilterGLSL::MakeFilterProgram(float kernel[], int width) 
358 
{ 
359 
if(GlobalUtil::_usePackedTex)

360 
{ 
361 
s_shader_h = CreateFilterHPK(kernel, width); 
362 
s_shader_v = CreateFilterVPK(kernel, width); 
363 
}else

364 
{ 
365 
s_shader_h = CreateFilterH(kernel, width); 
366 
s_shader_v = CreateFilterV(kernel, width); 
367 
} 
368 
} 
369  
370 
ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width) 
371 
{ 
372  
373 
char buffer[10240]; 
374 
ostrstream out(buffer, 10240);

375 
out<<setprecision(8);

376  
377 
out<< "uniform sampler2DRect tex;";

378 
out<< "\nvoid main(void){ float intensity = 0.0 ; vec2 pos;\n";

379  
380 
int half_width = width / 2; 
381 
for(int i = 0; i< width; i++) 
382 
{ 
383 
if(i == half_width)

384 
{ 
385  
386 
out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";

387 
out<<"intensity+= or * "<<kernel[i]<<";\n"; 
388 
}else

389 
{ 
390 
out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i  half_width) <<") , 0);\n"; 
391 
out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n"; 
392 
} 
393 
} 
394  
395 
//copy original data to red channel

396 
out<<"gl_FragColor.r = or;\n";

397 
out<<"gl_FragColor.b = intensity;}\n"<<'\0'; 
398  
399 
return new ProgramGLSL( buffer); 
400 
} 
401  
402  
403 
ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height) 
404 
{ 
405  
406 
char buffer[10240]; 
407 
ostrstream out(buffer, 10240);

408 
out<<setprecision(8);

409  
410 
out<< "uniform sampler2DRect tex;";

411 
out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";

412 
int half_height = height / 2; 
413 
for(int i = 0; i< height; i++) 
414 
{ 
415  
416 
if(i == half_height)

417 
{ 
418 
out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";

419 
out<<"intensity+= orb.y * "<<kernel[i]<<";\n"; 
420  
421 
}else

422 
{ 
423 
out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i  half_height) <<") );\n"; 
424 
out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n"; 
425 
} 
426 

427 
} 
428  
429 
out<<"gl_FragColor.b = orb.y;\n";

430 
out<<"gl_FragColor.g = intensity  orb.x;\n"; // difference of gaussian.. 
431 
out<<"gl_FragColor.r = intensity;}\n"<<'\0'; 
432 

433 
// std::cout<<buffer<<endl;

434 
return new ProgramGLSL( buffer); 
435 
} 
436  
437  
438  
439 
ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width) 
440 
{ 
441 
//both h and v are packed...

442 
int i, j , xw, xwn;

443  
444 
int halfwidth = width >>1; 
445 
float * pf = kernel + halfwidth;

446 
int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up 
447 
int npixel = (nhpixel<<1)+1;// 
448 
char buffer[10240]; 
449 
float weight[3]; 
450 
ostrstream out(buffer, 10240);

451 
out<<setprecision(8);

452  
453 
out<< "uniform sampler2DRect tex;";

454 
out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";

455 
///use multi texture coordinate because nhpixels can be at most 3

456 
out<<"vec4 pc; vec2 coord; \n";

457 
for( i = 0 ; i < npixel ; i++) 
458 
{ 
459 
out<<"coord = gl_TexCoord[0].xy + vec2(float("<<inhpixel<<"),0);\n"; 
460 
out<<"pc=texture2DRect(tex, coord);\n";

461 
if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0) pc = pc.rrbb;\n"; 
462 
//for each subpixel j in center, the weight of subpixel k

463 
xw = (i  nhpixel)*2;

464 
for( j = 0; j < 3; j++) 
465 
{ 
466 
xwn = xw + j 1;

467 
weight[j] = xwn < halfwidth  xwn > halfwidth? 0 : pf[xwn];

468 
} 
469 
if(weight[1] == 0.0) 
470 
{ 
471 
out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n"; 
472 
} 
473 
else

474 
{ 
475 
out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n"; 
476 
out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n"; 
477 
} 
478 

479 
} 
480 
out<<"gl_FragColor = result;}\n"<<'\0'; 
481 

482 
return new ProgramGLSL( buffer); 
483  
484  
485 
} 
486  
487  
488 
ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height) 
489 
{ 
490  
491 
//both h and v are packed...

492 
int i, j, yw, ywn;

493  
494 
int halfh = height >>1; 
495 
float * pf = kernel + halfh;

496 
int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up 
497 
int npixel = (nhpixel<<1)+1;// 
498 
char buffer[10240]; 
499 
float weight[3]; 
500 
ostrstream out(buffer, 10240);

501 
out<<setprecision(8);

502  
503 
out<< "uniform sampler2DRect tex;";

504 
out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";

505 
///use multi texture coordinate because nhpixels can be at most 3

506 
out<<"vec4 pc; vec2 coord;\n";

507 
for( i = 0 ; i < npixel ; i++) 
508 
{ 
509 
out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<inhpixel<<"));\n"; 
510 
out<<"pc=texture2DRect(tex, coord);\n";

511 
if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0) pc = pc.rgrg;\n"; 
512  
513 
//for each subpixel j in center, the weight of subpixel k

514 
yw = (i  nhpixel)*2;

515 
for( j = 0; j < 3; j++) 
516 
{ 
517 
ywn = yw + j 1;

518 
weight[j] = ywn < halfh  ywn > halfh? 0 : pf[ywn];

519 
} 
520 
if(weight[1] == 0.0) 
521 
{ 
522 
out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n"; 
523 
}else

524 
{ 
525 
out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n"; 
526 
out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n"; 
527 
} 
528 
} 
529 
out<<"gl_FragColor = result;}\n"<<'\0'; 
530  
531 
return new ProgramGLSL( buffer); 
532 
} 
533  
534  
535  
536 
ShaderBag::ShaderBag() 
537 
{ 
538 
s_debug = 0;

539 
s_orientation = 0;

540 
s_display_gaussian = 0;

541 
s_display_dog = 0;

542 
s_display_grad = 0;

543 
s_display_keys = 0;

544 
s_sampling = 0;

545 
s_grad_pass = 0;

546 
s_dog_pass = 0;

547 
s_keypoint = 0;

548 
s_genlist_init_tight = 0;

549 
s_genlist_init_ex = 0;

550 
s_genlist_histo = 0;

551 
s_genlist_start = 0;

552 
s_genlist_step = 0;

553 
s_genlist_end = 0;

554 
s_vertex_list = 0;

555 
s_descriptor_fp = 0;

556 
s_margin_copy = 0;

557 
////////////

558 
f_gaussian_skip0 = NULL;

559 
f_gaussian_skip1 = NULL;

560 
f_gaussian_step = NULL;

561 
_gaussian_step_num = 0;

562  
563 
} 
564  
565 
ShaderBag::~ShaderBag() 
566 
{ 
567 
if(s_debug)delete s_debug; 
568 
if(s_orientation)delete s_orientation; 
569 
if(s_display_gaussian)delete s_display_gaussian; 
570 
if(s_display_dog)delete s_display_dog; 
571 
if(s_display_grad)delete s_display_grad; 
572 
if(s_display_keys)delete s_display_keys; 
573 
if(s_sampling)delete s_sampling; 
574 
if(s_grad_pass)delete s_grad_pass; 
575 
if(s_dog_pass) delete s_dog_pass; 
576 
if(s_keypoint)delete s_keypoint; 
577 
if(s_genlist_init_tight)delete s_genlist_init_tight; 
578 
if(s_genlist_init_ex)delete s_genlist_init_ex; 
579 
if(s_genlist_histo)delete s_genlist_histo; 
580 
if(s_genlist_start)delete s_genlist_start; 
581 
if(s_genlist_step)delete s_genlist_step; 
582 
if(s_genlist_end)delete s_genlist_end; 
583 
if(s_vertex_list)delete s_vertex_list; 
584 
if(s_descriptor_fp)delete s_descriptor_fp; 
585 
if(s_margin_copy) delete s_margin_copy; 
586  
587 
//////////////////////////////////////////////

588 
if(f_gaussian_skip1) delete f_gaussian_skip1; 
589  
590 
for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++) 
591 
{ 
592 
if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i]; 
593 
} 
594 
if(f_gaussian_step && _gaussian_step_num > 0) 
595 
{ 
596 
for(int i = 0; i< _gaussian_step_num; i++) 
597 
{ 
598 
delete f_gaussian_step[i];

599 
} 
600 
delete[] f_gaussian_step;

601 
} 
602 
} 
603  
604  
605 
void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam¶m) 
606 
{ 
607 
float sigma = param.GetInitialSmoothSigma(octave_min);

608 
if(sigma == 0) 
609 
{ 
610 
f_gaussian_skip0 = NULL;

611 
}else

612 
{ 
613 
for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++) 
614 
{ 
615 
if(f_gaussian_skip0_v[i]>_id == octave_min)

616 
{ 
617 
f_gaussian_skip0 = f_gaussian_skip0_v[i]; 
618 
return ;

619 
} 
620 
} 
621 
FilterGLSL * filter = new FilterGLSL(sigma);

622 
filter>_id = octave_min; 
623 
f_gaussian_skip0_v.push_back(filter); 
624 
f_gaussian_skip0 = filter; 
625 
} 
626 
} 
627  
628 
void ShaderBag::CreateGaussianFilters(SiftParam¶m)

629 
{ 
630 
if(param._sigma_skip0>0.0f) 
631 
{ 
632 
FilterGLSL * filter; 
633 
f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);

634 
filter>_id = GlobalUtil::_octave_min_default; 
635 
f_gaussian_skip0_v.push_back(filter); 
636 
} 
637 
if(param._sigma_skip1>0.0f) 
638 
{ 
639 
f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);

640 
} 
641  
642 
f_gaussian_step = new FilterProgram*[param._sigma_num];

643 
for(int i = 0; i< param._sigma_num; i++) 
644 
{ 
645 
f_gaussian_step[i] = new FilterGLSL(param._sigma[i]);

646 
} 
647 
_gaussian_step_num = param._sigma_num; 
648 
} 
649  
650  
651 
void ShaderBag::LoadDynamicShaders(SiftParam& param)

652 
{ 
653 
LoadKeypointShader(param._dog_threshold, param._edge_threshold); 
654 
LoadGenListShader(param._dog_level_num, 0);

655 
CreateGaussianFilters(param); 
656 
} 
657  
658  
659 
void ShaderBagGLSL::LoadFixedShaders()

660 
{ 
661  
662  
663 
s_gray = new ProgramGLSL(

664 
"uniform sampler2DRect rgbTex; void main(void){\n"

665 
"float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(rgbTex,gl_TexCoord[0].st ).rgb);\n"

666 
"gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");

667  
668  
669 
s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg = gl_TexCoord[0].st;}"); 
670  
671  
672 
s_sampling = new ProgramGLSL(

673 
"uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");

674  
675 
//

676 
s_grad_pass = new ProgramGLSL(

677 
"uniform sampler2DRect tex; void main ()\n"

678 
"{\n"

679 
" vec4 v1, v2, gg;\n"

680 
" vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"

681 
" gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"

682 
" gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"

683 
" gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"

684 
" gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"

685 
" vec2 dxdy = (gg.yw  gg.xz); \n"

686 
" float grad = 0.5*length(dxdy);\n"

687 
" float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"

688 
" gl_FragData[0] = vec4(cc.rg, grad, theta);\n"

689 
"}\n\0");

690  
691 
ProgramGLSL * program; 
692 
s_margin_copy = program = new ProgramGLSL(

693 
"uniform sampler2DRect tex; uniform vec2 truncate;\n"

694 
"void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");

695  
696 
_param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");

697  
698  
699 
GlobalUtil::_OrientationPack2 = 0;

700 
LoadOrientationShader(); 
701  
702 
if(s_orientation == NULL) 
703 
{ 
704 
//Load a simplified version if the right version is not supported

705 
s_orientation = program = new ProgramGLSL(

706 
"uniform sampler2DRect fTex; uniform sampler2DRect oTex;\n"

707 
" uniform float size; void main(){\n"

708 
" vec4 cc = texture2DRect(fTex, gl_TexCoord[0].st);\n"

709 
" vec4 oo = texture2DRect(oTex, cc.rg);\n"

710 
" gl_FragColor.rg = cc.rg;\n"

711 
" gl_FragColor.b = oo.a;\n"

712 
" gl_FragColor.a = size;}");

713  
714 
_param_orientation_gtex = glGetUniformLocation(*program, "oTex");

715 
_param_orientation_size = glGetUniformLocation(*program, "size");

716  
717 
GlobalUtil::_MaxOrientation = 0;

718 
GlobalUtil::_FullSupported = 0;

719 
std::cerr<<"Orientation simplified on this hardware"<<endl;

720 
} 
721  
722  
723 
if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();

724 
if(s_descriptor_fp == NULL) 
725 
{ 
726 
GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;

727 
std::cerr<<"Descriptor ignored on this hardware"<<endl;

728 
} 
729  
730 
s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}"); 
731 
} 
732  
733  
734 
void ShaderBagGLSL::LoadDisplayShaders()

735 
{ 
736 
s_copy_key = new ProgramGLSL(

737 
"uniform sampler2DRect tex; void main(){\n"

738 
"gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0); }");

739 

740  
741 
ProgramGLSL * program; 
742 
s_vertex_list = program = new ProgramGLSL(

743 
"uniform vec4 sizes; uniform sampler2DRect tex;\n"

744 
"void main(void){\n"

745 
"float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"

746 
"float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"

747 
"float px = mod(index, twidth);\n"

748 
"vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"

749 
"vec4 cc = texture2DRect(tex, tpos );\n"

750 
"float size = 3.0f * cc.a; //sizes.x;// \n"

751 
"gl_FragColor.zw = vec2(0.0, 1.0);\n"

752 
"if(cc.x<=0  cc.y <=0) {gl_FragColor.xy = cc.xy; }\n"

753 
"else {float type = fract(px);\n"

754 
"vec2 dxy; \n"

755 
"dxy.x = type < 0.1 ? 0.0 : ((type <0.5  type > 0.9)? size : size);\n"

756 
"dxy.y = type < 0.2 ? 0.0 : ((type < 0.3  type > 0.7 )? size :size); \n"

757 
"float s = sin(cc.b); float c = cos(cc.b); \n"

758 
"gl_FragColor.x = cc.x + c*dxy.xs*dxy.y;\n"

759 
"gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");

760  
761  
762 
_param_genvbo_size = glGetUniformLocation(*program, "sizes");

763 
s_display_gaussian = new ProgramGLSL(

764 
"uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"

765 
"gl_FragColor = vec4(r, r, r, 1);}" );

766  
767 
s_display_dog = new ProgramGLSL(

768 
"uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"

769 
"gl_FragColor = vec4(g, g, g, 0.0);}" );

770  
771 
s_display_grad = new ProgramGLSL(

772 
"uniform sampler2DRect tex; void main(void){\n"

773 
" vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");

774  
775 
s_display_keys= new ProgramGLSL(

776 
"uniform sampler2DRect tex; void main(void){\n"

777 
" vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"

778 
" if(cc.r ==0.0) discard; gl_FragColor = (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");

779 
} 
780  
781 
void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold) 
782 
{ 
783  
784 
char buffer[10240]; 
785 
float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); 
786 
float threshold1 = threshold;

787 
float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; 
788 
ostrstream out(buffer, 10240);

789 
streampos pos; 
790  
791 
//tex(X)(Y)

792 
//X: (CLR) (CENTER 0, LEFT 1, RIGHT +1)

793 
//Y: (CDU) (CENTER 0, DOWN 1, UP +1)

794  
795 
out << "#define THRESHOLD0 " << threshold0 << "\n" 
796 
"#define THRESHOLD1 " << threshold1 << "\n" 
797 
"#define THRESHOLD2 " << threshold2 << "\n"; 
798  
799 
out<< 
800 
"uniform sampler2DRect tex, texU, texD; void main ()\n"

801 
"{\n"

802 
" vec4 v1, v2, gg, temp;\n"

803 
" vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"

804 
" vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"

805 
" temp = texture2DRect(tex, gl_TexCoord[1].xy);\n"

806 
" v1.x = temp.g; gg.x = temp.r;\n"

807 
" temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"

808 
" v1.y = temp.g; gg.y = temp.r;\n"

809 
" temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"

810 
" v1.z = temp.g; gg.z = temp.r;\n"

811 
" temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"

812 
" v1.w = temp.g; gg.w = temp.r;\n"

813 
" v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"

814 
" v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"

815 
" v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"

816 
" v2.w = texture2DRect(tex, TexRU.xy).g;\n"

817 
" vec2 dxdy = (gg.yw  gg.xz); \n"

818 
" float grad = 0.5*length(dxdy);\n"

819 
" float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"

820 
" gl_FragData[0] = vec4(cc.rg, grad, theta);\n"

821  
822 
//test against 8 neighbours

823 
//use variable to identify type of extremum

824 
//1.0 for local maximum and 0.5 for minimum

825 
<< 
826 
" float dog = 0.0; \n"

827 
" gl_FragData[1] = vec4(0, 0, 0, 0); \n"

828 
" dog = cc.g > THRESHOLD0 && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"

829 
" dog = cc.g < THRESHOLD0 && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"

830 
" if(dog == 0.0) return;\n";

831  
832 
pos = out.tellp(); 
833 
//do edge supression first..

834 
//vector v1 is < (1, 0), (1, 0), (0,1), (0, 1)>

835 
//vector v2 is < (1,1), (1,1), (1,1), (1, 1)>

836  
837 
out<< 
838 
" float fxx, fyy, fxy; \n"

839 
" vec4 D2 = v1.xyzw  cc.gggg;\n"

840 
" vec2 D4 = v2.xw  v2.yz;\n"

841 
" fxx = D2.x + D2.y;\n"

842 
" fyy = D2.z + D2.w;\n"

843 
" fxy = 0.25*(D4.x + D4.y);\n"

844 
" float fxx_plus_fyy = fxx + fyy;\n"

845 
" float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"

846 
" float score_down = (fxx*fyy  fxy*fxy);\n"

847 
" if( score_down <= 0  score_up > THRESHOLD2 * score_down)return;\n";

848  
849 
//...

850 
out<<" \n"

851 
" vec2 D5 = 0.5*(v1.ywv1.xz); \n"

852 
" float fx = D5.x, fy = D5.y ; \n"

853 
" float fs, fss , fxs, fys ; \n"

854 
" vec2 v3; vec4 v4, v5, v6;\n"

855 
//read 9 pixels of upper level

856 
<< 
857 
" v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"

858 
" v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"

859 
" v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"

860 
" v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"

861 
" v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"

862 
" v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"

863 
" v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"

864 
" v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"

865 
" v6.w = texture2DRect(texU, TexRU.xy).g;\n"

866 
//compare with 9 pixels of upper level

867 
//read and compare with 9 pixels of lower level

868 
//the maximum case

869 
<< 
870 
" if(dog == 1.0)\n"

871 
" {\n"

872 
" if(cc.g < v3.x  any(lessThan(cc.gggg, v4)) any(lessThan(cc.gggg, v6)))return; \n"

873 
" v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"

874 
" v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"

875 
" v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"

876 
" v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"

877 
" v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"

878 
" v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"

879 
" v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"

880 
" v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"

881 
" v6.w = texture2DRect(texD, TexRU.xy).g;\n"

882 
" if(cc.g < v3.y  any(lessThan(cc.gggg, v5)) any(lessThan(cc.gggg, v6)))return; \n"

883 
" }\n"

884 
//the minimum case

885 
<< 
886 
" else{\n"

887 
" if(cc.g > v3.x  any(greaterThan(cc.gggg, v4)) any(greaterThan(cc.gggg, v6)))return; \n"

888 
" v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"

889 
" v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"

890 
" v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"

891 
" v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"

892 
" v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"

893 
" v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"

894 
" v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"

895 
" v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"

896 
" v6.w = texture2DRect(texD, TexRU.xy).g;\n"

897 
" if(cc.g > v3.y  any(greaterThan(cc.gggg, v5)) any(greaterThan(cc.gggg, v6)))return; \n"

898 
" }\n";

899  
900 
if(GlobalUtil::_SubpixelLocalization)

901  
902 
// subpixel localization FragData1 = vec4(dog, 0, 0, 0); return;

903 
out << 
904 
" fs = 0.5*( v3.x  v3.y ); \n"

905 
" fss = v3.x + v3.y  cc.g  cc.g;\n"

906 
" fxs = 0.25 * ( v4.y + v5.x  v4.x  v5.y);\n"

907 
" fys = 0.25 * ( v4.w + v5.z  v4.z  v5.w);\n"

908 

909 
//

910 
// let dog difference be quatratic function of dx, dy, ds;

911 
// df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +

912 
// + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)

913 
// + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)

914 
// (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)

915 

916 
//the local extremum satisfies

917 
// df/dx = 0, df/dy = 0, df/dz = 0

918 

919 
//that is

920 
// fx  fxx fxy fxs  dx

921 
// fy =  fxy fyy fys  * dy

922 
// fs  fxs fys fss  ds

923 
// need to solve dx, dy, ds

924  
925 
// Use Gauss elimination to solve the linear system

926 
<< 
927 
" vec3 dxys = vec3(0.0); \n"

928 
" vec4 A0, A1, A2 ; \n"

929 
" A0 = vec4(fxx, fxy, fxs, fx); \n"

930 
" A1 = vec4(fxy, fyy, fys, fy); \n"

931 
" A2 = vec4(fxs, fys, fss, fs); \n"

932 
" vec3 x3 = abs(vec3(fxx, fxy, fxs)); \n"

933 
" float maxa = max(max(x3.x, x3.y), x3.z); \n"

934 
" if(maxa >= 1e10 ) { \n"

935 
" if(x3.y ==maxa ) \n"

936 
" { \n"

937 
" vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"

938 
" }else if( x3.z == maxa ) \n"

939 
" { \n"

940 
" vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"

941 
" } \n"

942 
" A0 /= A0.x; \n"

943 
" A1 = A1.x * A0; \n"

944 
" A2 = A2.x * A0; \n"

945 
" vec2 x2 = abs(vec2(A1.y, A2.y)); \n"

946 
" if( x2.y > x2.x ) \n"

947 
" { \n"

948 
" vec3 TEMP = A2.yzw; \n"

949 
" A2.yzw = A1.yzw; \n"

950 
" A1.yzw = TEMP; \n"

951 
" x2.x = x2.y; \n"

952 
" } \n"

953 
" if(x2.x >= 1e10) { \n"

954 
" A1.yzw /= A1.y; \n"

955 
" A2.yzw = A2.y * A1.yzw; \n"

956 
" if(abs(A2.z) >= 1e10) { \n"

957 
// compute dx, dy, ds:

958 
<< 
959 
" \n"

960 
" dxys.z = A2.w /A2.z; \n"

961 
" dxys.y = A1.w  dxys.z*A1.z; \n"

962 
" dxys.x = A0.w  dxys.z*A0.z  dxys.y*A0.y; \n"

963  
964 
//one more threshold which I forgot in versions prior to 286

965 
<< 
966 
" bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= THRESHOLD1) ;\n"

967 
" if(dog_test  any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0;\n"

968 
" }\n"

969 
" }\n"

970 
" }\n"

971 
//keep the point when the offset is less than 1

972 
<< 
973 
" gl_FragData[1] = vec4( dog, dxys); \n";

974 
else

975  
976 
out<< 
977 
" gl_FragData[1] = vec4( dog, 0, 0, 0) ; \n";

978  
979 
out<< 
980 
"}\n" <<'\0'; 
981  
982  
983  
984 
ProgramGLSL * program = new ProgramGLSL(buffer);

985 
if(program>IsNative())

986 
{ 
987 
s_keypoint = program ; 
988 
//parameter

989 
}else

990 
{ 
991 
delete program;

992 
out.seekp(pos); 
993 
out << 
994 
" gl_FragData[1] = vec4(dog, 0, 0, 0) ; \n"

995 
"}\n" <<'\0'; 
996 
s_keypoint = program = new ProgramGLSL(buffer);

997 
GlobalUtil::_SubpixelLocalization = 0;

998 
std::cerr<<"Detection simplified on this hardware"<<endl;

999 
} 
1000  
1001 
_param_dog_texu = glGetUniformLocation(*program, "texU");

1002 
_param_dog_texd = glGetUniformLocation(*program, "texD");

1003 
} 
1004  
1005  
1006 
void ShaderBagGLSL::SetDogTexParam(int texU, int texD) 
1007 
{ 
1008 
glUniform1i(_param_dog_texu, 1);

1009 
glUniform1i(_param_dog_texd, 2);

1010 
} 
1011  
1012 
void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0) 
1013 
{ 
1014 
glUniform1i(_param_genlist_step_tex0, 1);

1015 
} 
1016 
void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth, float size) 
1017 
{ 
1018 
float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; 
1019 
glUniform4fv(_param_genvbo_size, 1, sizes);

1020  
1021 
} 
1022  
1023  
1024  
1025 
void ShaderBagGLSL::UnloadProgram()

1026 
{ 
1027 
glUseProgram(0);

1028 
} 
1029  
1030  
1031  
1032 
void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev) 
1033 
{ 
1034 
ProgramGLSL * program; 
1035  
1036 
s_genlist_init_tight = new ProgramGLSL(

1037 
"uniform sampler2DRect tex; void main (void){\n"

1038 
"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"

1039 
"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"

1040 
"gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"

1041 
"}");

1042  
1043 

1044 
s_genlist_init_ex = program = new ProgramGLSL(

1045 
"uniform sampler2DRect tex;uniform vec2 bbox;\n"

1046 
"void main (void ){\n"

1047 
"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"

1048 
"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"

1049 
"bvec4 helper2 = bvec4( \n"

1050 
"all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0,\n"

1051 
"all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0,\n"

1052 
"all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0,\n"

1053 
"all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0);\n"

1054 
"gl_FragColor = vec4(helper2);\n"

1055 
"}");

1056 
_param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");

1057  
1058  
1059 
//reduction ...

1060 
s_genlist_histo = new ProgramGLSL(

1061 
"uniform sampler2DRect tex; void main (void){\n"

1062 
"vec4 helper; vec4 helper2; \n"

1063 
"helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"

1064 
"helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"

1065 
"gl_FragColor.rg = helper2.xz + helper2.yw;\n"

1066 
"helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"

1067 
"helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"

1068 
"gl_FragColor.ba= helper2.xz+helper2.yw;\n"

1069 
"}");

1070  
1071  
1072 
//read of the first part, which generates tex coordinates

1073 
s_genlist_start= program = LoadGenListStepShader(1, 1); 
1074 
_param_ftex_width= glGetUniformLocation(*program, "width");

1075 
_param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");

1076 
//stepping

1077 
s_genlist_step = program = LoadGenListStepShader(0, 1); 
1078 
_param_genlist_step_tex= glGetUniformLocation(*program, "tex");

1079 
_param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");

1080  
1081 
} 
1082  
1083 
void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax) 
1084 
{ 
1085 
float truncate[2] = {xmax  0.5f , ymax  0.5f}; 
1086 
glUniform2fv(_param_margin_copy_truncate, 1, truncate);

1087 
} 
1088  
1089 
void ShaderBagGLSL::SetGenListInitParam(int w, int h) 
1090 
{ 
1091 
float bbox[2] = {w  1.0f, h  1.0f}; 
1092 
glUniform2fv(_param_genlist_init_bbox, 1, bbox);

1093 
} 
1094 
void ShaderBagGLSL::SetGenListStartParam(float width, int tex0) 
1095 
{ 
1096 
glUniform1f(_param_ftex_width, width); 
1097 
} 
1098  
1099  
1100 
ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step) 
1101 
{ 
1102 
int i;

1103 
char buffer[10240]; 
1104 
// char chanels[5] = "rgba";

1105 
ostrstream out(buffer, 10240);

1106  
1107 
for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n"; 
1108 
if(start)

1109 
{ 
1110 
out<<"uniform float width;\n";

1111 
out<<"void main(void){\n";

1112 
out<<"float index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";

1113 
out<<"vec2 pos = vec2(0.5, 0.5);\n";

1114 
}else

1115 
{ 
1116 
out<<"uniform sampler2DRect tex;\n";

1117 
out<<"void main(void){\n";

1118 
out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";

1119 
out<<"vec2 pos = tc.rg; float index = tc.b;\n";

1120 
} 
1121 
out<<"vec2 sum; vec4 cc;\n";

1122  
1123  
1124 
if(step>0) 
1125 
{ 
1126 
out<<"vec2 cpos = vec2(0.5, 0.5);\t vec2 opos;\n";

1127 
for(i = 0; i < step; i++) 
1128 
{ 
1129  
1130 
out<<"cc = texture2DRect(tex"<<i<<", pos);\n"; 
1131 
out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b; \n";

1132 
out<<"if (index <cc.r){ opos = cpos.xx;}\n";

1133 
out<<"else if(index < sum.x ) {opos = cpos.yx; index = cc.r;}\n";

1134 
out<<"else if(index < sum.y ) {opos = cpos.xy; index = sum.x;}\n";

1135 
out<<"else {opos = cpos.yy; index = sum.y;}\n";

1136 
out<<"pos = (pos + pos + opos);\n";

1137 
} 
1138 
} 
1139 
out<<"gl_FragColor = vec4(pos, index, 1.0);\n";

1140 
out<<"}\n"<<'\0'; 
1141 
return new ProgramGLSL(buffer); 
1142 
} 
1143  
1144  
1145 
void ShaderBagGLSL::LoadOrientationShader()

1146 
{ 
1147 
char buffer[10240]; 
1148 
ostrstream out(buffer,10240);

1149  
1150 
if(GlobalUtil::_IsNvidia)

1151 
{ 
1152 
out << "#pragma optionNV(ifcvt none)\n"

1153 
"#pragma optionNV(unroll all)\n";

1154 
} 
1155  
1156 
out<<"\n"

1157 
"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" 
1158 
"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" 
1159 
"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" 
1160 
"uniform sampler2DRect tex; \n"

1161 
"uniform sampler2DRect gradTex; \n"

1162 
"uniform vec4 size; \n"

1163 
<< (GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign? 
1164 
" uniform sampler2DRect texS; \n" : " ") 
1165 
<< 
1166 
"void main() \n"

1167 
"{ \n"

1168 
" vec4 bins[10]; \n"

1169 
" bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"

1170 
" bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"

1171 
" bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"

1172 
" vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy); \n"

1173 
" vec2 pos = loc.xy; \n"

1174 
" bool orientation_mode = (size.z != 0); \n"

1175 
" float sigma = orientation_mode? abs(size.z) : loc.w; \n";

1176 
if(GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign)

1177 
{ 
1178 
out<< 
1179 
" if(orientation_mode){\n"

1180 
" vec4 offset = texture2DRect(texS, pos);\n"

1181 
" pos.xy = pos.xy + offset.yz; \n"

1182 
" sigma = sigma * pow(size.w, offset.w);\n"

1183 
" #if "<< GlobalUtil::_KeepExtremumSign << "\n" 
1184 
" if(offset.x < 0.6) sigma = sigma; \n"

1185 
" #endif\n"

1186 
" }\n";

1187 
} 
1188 
out<< 
1189 
" //bool fixed_orientation = (size.z < 0); \n"

1190 
" if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"

1191 
" float gsigma = sigma * GAUSSIAN_WF; \n"

1192 
" vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF); \n"

1193 
" vec2 dim = size.xy; \n"

1194 
" float dist_threshold = win.x*win.x+0.5; \n"

1195 
" float factor = 0.5/(gsigma*gsigma); \n"

1196 
" vec4 sz; vec2 spos; \n"

1197 
" //if(any(pos.xy <= 1)) discard; \n"

1198 
" sz.xy = max( pos  win, vec2(1,1)); \n"

1199 
" sz.zw = min( pos + win, dim2); \n"

1200 
" sz = floor(sz)+0.5;";

1201 
//loop to get the histogram

1202  
1203 
out<<"\n"

1204 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

1205 
" { \n"

1206 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

1207 
" { \n"

1208 
" vec2 offset = spos  pos; \n"

1209 
" float sq_dist = dot(offset,offset); \n"

1210 
" if( sq_dist < dist_threshold){ \n"

1211 
" vec4 cc = texture2DRect(gradTex, spos); \n"

1212 
" float grad = cc.b; float theta = cc.a; \n"

1213 
" float idx = floor(degrees(theta)*0.1); \n"

1214 
" if(idx < 0 ) idx += 36; \n"

1215 
" float weight = grad*exp(sq_dist * factor); \n"

1216 
" float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ; \n"

1217 
" vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";

1218  
1219 
if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)

1220 
{ 
1221 
//dynamic indexing may not be faster

1222 
out<<"\n"

1223 
" int iidx = int((idx*0.25)); \n"

1224 
" bins[iidx]+=inc; \n"

1225 
" } \n"

1226 
" } \n"

1227 
" }";

1228  
1229 
}else

1230 
{ 
1231 
//nvfp40 still does not support dynamic array indexing

1232 
//unrolled binary search...

1233 
out<<"\n"

1234 
" if(idx < 16) \n"

1235 
" { \n"

1236 
" if(idx < 8) \n"

1237 
" { \n"

1238 
" if(idx < 4) { bins[0]+=inc;} \n"

1239 
" else { bins[1]+=inc;} \n"

1240 
" }else \n"

1241 
" { \n"

1242 
" if(idx < 12){ bins[2]+=inc;} \n"

1243 
" else { bins[3]+=inc;} \n"

1244 
" } \n"

1245 
" }else if(idx < 32) \n"

1246 
" { \n"

1247 
" if(idx < 24) \n"

1248 
" { \n"

1249 
" if(idx <20) { bins[4]+=inc;} \n"

1250 
" else { bins[5]+=inc;} \n"

1251 
" }else \n"

1252 
" { \n"

1253 
" if(idx < 28){ bins[6]+=inc;} \n"

1254 
" else { bins[7]+=inc;} \n"

1255 
" } \n"

1256 
" }else \n"

1257 
" { \n"

1258 
" bins[8]+=inc; \n"

1259 
" } \n"

1260 
" } \n"

1261 
" } \n"

1262 
" }";

1263  
1264 
} 
1265  
1266 
WriteOrientationCodeToStream(out); 
1267  
1268 
ProgramGLSL * program = new ProgramGLSL(buffer);

1269 
if(program>IsNative())

1270 
{ 
1271 
s_orientation = program ; 
1272 
_param_orientation_gtex = glGetUniformLocation(*program, "gradTex");

1273 
_param_orientation_size = glGetUniformLocation(*program, "size");

1274 
_param_orientation_stex = glGetUniformLocation(*program, "texS");

1275 
}else

1276 
{ 
1277 
delete program;

1278 
} 
1279 
} 
1280  
1281  
1282 
void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)

1283 
{ 
1284 
//smooth histogram and find the largest

1285 
/*

1286 
smoothing kernel: (1 3 6 7 6 3 1 )/27

1287 
the same as 3 pass of (1 1 1)/3 averaging

1288 
maybe better to use 4 pass on the vectors...

1289 
*/

1290  
1291  
1292 
//the inner loop on different array numbers is always unrolled in fp40

1293  
1294 
//bug fixed here:)

1295 
out<<"\n"

1296 
" //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0; \n"

1297 
" mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0; \n"

1298 
" mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"

1299 
" #define FILTER_CODE(i) { \\\n"

1300 
" vec4 newb = (bins[i]* m2); \\\n"

1301 
" newb.xyz += ( prev.yzw * m1); \\\n"

1302 
" prev = bins[i]; \\\n"

1303 
" newb.wzy += ( bins[i+1].zyx *m1); \\\n"

1304 
" bins[i] = newb;}\n"

1305 
" for (int j=0; j<2; j++) \n"

1306 
" { \n"

1307 
" vec4 prev = bins[8]; \n"

1308 
" bins[9] = bins[0]; \n";

1309  
1310 
if(GlobalUtil::_IsNvidia)

1311 
{ 
1312 
out<< 
1313 
" for (int i=0; i<9; i++) \n"

1314 
" { \n"

1315 
" FILTER_CODE(i); \n"

1316 
" } \n"

1317 
" }";

1318  
1319 
}else

1320 
{ 
1321 
//manually unroll the loop for ATI.

1322 
out << 
1323 
" FILTER_CODE(0);\n"

1324 
" FILTER_CODE(1);\n"

1325 
" FILTER_CODE(2);\n"

1326 
" FILTER_CODE(3);\n"

1327 
" FILTER_CODE(4);\n"

1328 
" FILTER_CODE(5);\n"

1329 
" FILTER_CODE(6);\n"

1330 
" FILTER_CODE(7);\n"

1331 
" FILTER_CODE(8);\n"

1332 
" }\n";

1333 
} 
1334 
//find the maximum voting

1335 
out<<"\n"

1336 
" vec4 maxh; vec2 maxh2; \n"

1337 
" vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"

1338 
" bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"

1339 
" maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";

1340  
1341 
char *testpeak_code;

1342 
char *savepeak_code;

1343  
1344 
//save two/three/four orientations with the largest votings?

1345  
1346 
if(GlobalUtil::_MaxOrientation>1) 
1347 
{ 
1348 
out<<"\n"

1349 
" vec4 Orientations = vec4(0, 0, 0, 0); \n"

1350 
" vec4 weights = vec4(0,0,0,0); ";

1351 

1352 
testpeak_code = "\\\n"

1353 
" {test = greaterThan(bins[i], hh);";

1354  
1355 
//save the orientations in weightdecreasing order

1356 
if(GlobalUtil::_MaxOrientation ==2) 
1357 
{ 
1358 
savepeak_code = "\\\n"

1359 
" if(weight <=weights.g){}\\\n"

1360 
" else if(weight >weights.r)\\\n"

1361 
" {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"

1362 
" else {weights.g = weight; Orientations.g = th;}";

1363 
}else if(GlobalUtil::_MaxOrientation ==3) 
1364 
{ 
1365 
savepeak_code = "\\\n"

1366 
" if(weight <=weights.b){}\\\n"

1367 
" else if(weight >weights.r)\\\n"

1368 
" {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"

1369 
" else if(weight >weights.g)\\\n"

1370 
" {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"

1371 
" else {weights.b = weight; Orientations.b = th;}";

1372 
}else

1373 
{ 
1374 
savepeak_code = "\\\n"

1375 
" if(weight <=weights.a){}\\\n"

1376 
" else if(weight >weights.r)\\\n"

1377 
" {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"

1378 
" else if(weight >weights.g)\\\n"

1379 
" {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"

1380 
" else if(weight >weights.b)\\\n"

1381 
" {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"

1382 
" else {weights.a = weight; Orientations.a = th;}";

1383 
} 
1384  
1385 
}else

1386 
{ 
1387 
out<<"\n"

1388 
" float Orientation; ";

1389 
testpeak_code ="\\\n"

1390 
" if(npeaks<=0.0){\\\n"

1391 
" test = equal(bins[i], maxh) ;";

1392 
savepeak_code="\\\n"

1393 
" npeaks++; \\\n"

1394 
" Orientation = th;";

1395  
1396 
} 
1397 
//find the peaks

1398 
out <<"\n"

1399 
" #define FINDPEAK(i, k)" <<testpeak_code<<"\\\n" 
1400 
" if( any ( test) ) \\\n"

1401 
" { \\\n"

1402 
" if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \\\n"

1403 
" { \\\n"

1404 
" float di = 0.5 * (bins[i].yprevb) / (bins[i].y+prevbbins[i].x  bins[i].x) ; \\\n"

1405 
" float th = (k+di+0.5); float weight = bins[i].x;"

1406 
<<savepeak_code<<"\\\n"

1407 
" }\\\n"

1408 
" else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) ) \\\n"

1409 
" { \\\n"

1410 
" float di = 0.5 * (bins[i].zbins[i].x) / (bins[i].z+bins[i].xbins[i].y bins[i].y) ; \\\n"

1411 
" float th = (k+di+1.5); float weight = bins[i].y; "

1412 
<<savepeak_code<<" \\\n"

1413 
" }\\\n"

1414 
" if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) ) \\\n"

1415 
" { \\\n"

1416 
" float di = 0.5 * (bins[i].wbins[i].y) / (bins[i].w+bins[i].ybins[i].z bins[i].z) ; \\\n"

1417 
" float th = (k+di+2.5); float weight = bins[i].z; "

1418 
<<savepeak_code<<" \\\n"

1419 
" }\\\n"

1420 
" else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \\\n"

1421 
" { \\\n"

1422 
" float di = 0.5 * (bins[i+1].xbins[i].z) / (bins[i+1].x+bins[i].zbins[i].w  bins[i].w) ; \\\n"

1423 
" float th = (k+di+3.5); float weight = bins[i].w; "

1424 
<<savepeak_code<<" \\\n"

1425 
" }\\\n"

1426 
" }}\\\n"

1427 
" prevb = bins[i].w;";

1428 
//the following loop will be unrolled anyway in fp40,

1429 
//taking more than 1000 instrucsions..

1430 
//....

1431 
if(GlobalUtil::_IsNvidia)

1432 
{ 
1433 
out<<"\n"

1434 
" vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test; \n"

1435 
" bins[9] = bins[0]; \n"

1436 
" float npeaks = 0.0, k = 0; \n"

1437 
" float prevb = bins[8].w; \n"

1438 
" for (int i = 0; i <9 ; i++) \n"

1439 
" {\n"

1440 
" FINDPEAK(i, k);\n"

1441 
" k = k + 4.0; \n"

1442 
" }";

1443 
}else

1444 
{ 
1445 
//loop unroll for ATI.

1446 
out <<"\n"

1447 
" vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"

1448 
" bins[9] = bins[0]; \n"

1449 
" float npeaks = 0.0; \n"

1450 
" float prevb = bins[8].w; \n"

1451 
" FINDPEAK(0, 0.0);\n"

1452 
" FINDPEAK(1, 4.0);\n"

1453 
" FINDPEAK(2, 8.0);\n"

1454 
" FINDPEAK(3, 12.0);\n"

1455 
" FINDPEAK(4, 16.0);\n"

1456 
" FINDPEAK(5, 20.0);\n"

1457 
" FINDPEAK(6, 24.0);\n"

1458 
" FINDPEAK(7, 28.0);\n"

1459 
" FINDPEAK(8, 32.0);\n";

1460 
} 
1461 
//WRITE output

1462 
if(GlobalUtil::_MaxOrientation>1) 
1463 
{ 
1464 
out<<"\n"

1465 
" if(orientation_mode){\n"

1466 
" npeaks = dot(vec4(1,1,"

1467 
<<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<"," 
1468 
<<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n" 
1469 
" gl_FragData[1] = radians((Orientations )*10.0);\n"

1470 
" gl_FragData[0] = vec4(pos, npeaks, sigma);\n"

1471 
" }else{\n"

1472 
" gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"

1473 
" }\n";

1474 
}else

1475 
{ 
1476 
out<<"\n"

1477 
" gl_FragData[0] = vec4(pos, radians((Orientation.x)*10.0), sigma);\n";

1478 
} 
1479 
//end

1480 
out<<"\n"

1481 
"}\n"<<'\0'; 
1482  
1483  
1484 
} 
1485  
1486 
void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) 
1487 
{ 
1488 
glUniform1i(_param_orientation_gtex, 1);

1489 
glUniform1f(_param_orientation_size, sigma); 
1490 
} 
1491  
1492  
1493  
1494  
1495 
void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step) 
1496 
{ 
1497 
///

1498 
glUniform1i(_param_orientation_gtex, 1);

1499  
1500 
if((GlobalUtil::_SubpixelLocalization  GlobalUtil::_KeepExtremumSign)&& stex)

1501 
{ 
1502 
//specify texutre for subpixel subscale localization

1503 
glUniform1i(_param_orientation_stex, 2);

1504 
} 
1505  
1506 
float size[4]; 
1507 
size[0] = (float)width; 
1508 
size[1] = (float)height; 
1509 
size[2] = sigma;

1510 
size[3] = step;

1511 
glUniform4fv(_param_orientation_size, 1, size);

1512  
1513 
} 
1514  
1515  
1516 
void ShaderBagGLSL::LoadDescriptorShaderF2()

1517 
{ 
1518 
//one shader outpout 128/8 = 16 , each fragout encodes 4

1519 
//const double twopi = 2.0*3.14159265358979323846;

1520 
//const double rpi = 8.0/twopi;

1521 
char buffer[10240]; 
1522 
ostrstream out(buffer, 10240);

1523  
1524 
out<<setprecision(8);

1525  
1526 
out<<"\n"

1527 
"#define M_PI 3.14159265358979323846\n"

1528 
"#define TWO_PI (2.0*M_PI)\n"

1529 
"#define RPI 1.2732395447351626861510701069801\n"

1530 
"#define WF size.z\n"

1531 
"uniform sampler2DRect tex; \n"

1532 
"uniform sampler2DRect gradTex; \n"

1533 
"uniform vec4 dsize; \n"

1534 
"uniform vec3 size; \n"

1535 
"void main() \n"

1536 
"{\n"

1537 
" vec2 dim = size.xy; //image size \n"

1538 
" float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"

1539 
" float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5)); \n"

1540 
" index = floor(index*0.125) + 0.49; \n"

1541 
" vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"

1542 
" vec2 pos = texture2DRect(tex, coord).xy; \n"

1543 
" if(any(lessThanEqual(pos.xy, vec2(1.0)))  any(greaterThanEqual(pos.xy, dim1.0)))// discard; \n"

1544 
" { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"

1545 
" float anglef = texture2DRect(tex, coord).z;\n"

1546 
" if(anglef > M_PI) anglef = TWO_PI;\n"

1547 
" float sigma = texture2DRect(tex, coord).w; \n"

1548 
" float spt = abs(sigma * WF); //default to be 3*sigma \n";

1549  
1550 
//rotation

1551 
out<< 
1552 
" vec4 cscs, rots; \n"

1553 
" cscs.y = sin(anglef); cscs.x = cos(anglef); \n"

1554 
" cscs.zw =  cscs.xy; \n"

1555 
" rots = cscs /spt; \n"

1556 
" cscs *= spt; \n";

1557  
1558 
//here cscs is actually (cos, sin, cos, sin) * (factor: 3)*sigma

1559 
//and rots is (cos, sin, cos, sin ) /(factor*sigma)

1560 
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread

1561 
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

1562  
1563 
out<< 
1564 
"vec4 temp; vec2 pt, offsetpt; \n"

1565 
" /*the fraction part of idx is .5*/ \n"

1566 
" offsetpt.x = 4.0* fract(idx*0.25)  2.0; \n"

1567 
" offsetpt.y = floor(idx*0.25)  1.5; \n"

1568 
" temp = cscs.xwyx*offsetpt.xyxy; \n"

1569 
" pt = pos + temp.xz + temp.yw; \n";

1570 

1571 
//get a horizontal bounding box of the rotated rectangle

1572 
out<< 
1573 
" vec2 bwin = abs(cscs.xy); \n"

1574 
" float bsz = bwin.x + bwin.y; \n"

1575 
" vec4 sz; \n"

1576 
" sz.xy = max(pt  vec2(bsz), vec2(1,1));\n"

1577 
" sz.zw = min(pt + vec2(bsz), dim  2); \n"

1578 
" sz = floor(sz)+0.5;"; //move sample point to pixel center 
1579 
//get voting for two box

1580  
1581 
out<<"\n"

1582 
" vec4 DA, DB; vec2 spos; \n"

1583 
" DA = DB = vec4(0, 0, 0, 0); \n"

1584 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

1585 
" { \n"

1586 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

1587 
" { \n"

1588 
" vec2 diff = spos  pt; \n"

1589 
" temp = rots.xywx * diff.xyxy;\n"

1590 
" vec2 nxy = (temp.xz + temp.yw); \n"

1591 
" vec2 nxyn = abs(nxy); \n"

1592 
" if(all( lessThan(nxyn, vec2(1.0)) ))\n"

1593 
" {\n"

1594 
" vec4 cc = texture2DRect(gradTex, spos); \n"

1595 
" float mod = cc.b; float angle = cc.a; \n"

1596 
" float theta0 = RPI * (anglef  angle); \n"

1597 
" float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"

1598 
" diff = nxy + offsetpt.xy; \n"

1599 
" float ww = exp(0.125*dot(diff, diff));\n"

1600 
" vec2 weights = 1  nxyn;\n"

1601 
" float weight = weights.x * weights.y *mod*ww; \n"

1602 
" float theta1 = floor(theta); \n"

1603 
" float weight2 = (theta  theta1) * weight;\n"

1604 
" float weight1 = weight  weight2;\n"

1605 
" DA += vec4(equal(vec4(theta1), vec4(0, 1, 2, 3)))*weight1;\n"

1606 
" DA += vec4(equal(vec4(theta1), vec4(7, 0, 1, 2)))*weight2; \n"

1607 
" DB += vec4(equal(vec4(theta1), vec4(4, 5, 6, 7)))*weight1;\n"

1608 
" DB += vec4(equal(vec4(theta1), vec4(3, 4, 5, 6)))*weight2; \n"

1609 
" }\n"

1610 
" }\n"

1611 
" }\n";

1612  
1613 
out<< 
1614 
" gl_FragData[0] = DA; gl_FragData[1] = DB;\n"

1615 
"}\n"<<'\0'; 
1616  
1617 
ProgramGLSL * program = new ProgramGLSL(buffer);

1618  
1619 
if(program>IsNative())

1620 
{ 
1621 
s_descriptor_fp = program ; 
1622 
_param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");

1623 
_param_descriptor_size = glGetUniformLocation(*program, "size");

1624 
_param_descriptor_dsize = glGetUniformLocation(*program, "dsize");

1625 
}else

1626 
{ 
1627 
delete program;

1628 
} 
1629  
1630  
1631 
} 
1632  
1633 
void ShaderBagGLSL::LoadDescriptorShader()

1634 
{ 
1635 
GlobalUtil::_DescriptorPPT = 16;

1636 
LoadDescriptorShaderF2(); 
1637 
} 
1638  
1639  
1640 
void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) 
1641 
{ 
1642 
///

1643 
glUniform1i(_param_descriptor_gtex, 1);

1644  
1645 
float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; 
1646 
glUniform4fv(_param_descriptor_dsize, 1, dsize);

1647 
float size[3]; 
1648 
size[0] = width;

1649 
size[1] = height;

1650 
size[2] = GlobalUtil::_DescriptorWindowFactor;

1651 
glUniform3fv(_param_descriptor_size, 1, size);

1652  
1653 
} 
1654  
1655 
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////

1656  
1657 
void ShaderBagPKSL::LoadFixedShaders()

1658 
{ 
1659 
ProgramGLSL * program; 
1660  
1661  
1662 
s_gray = new ProgramGLSL(

1663 
"uniform sampler2DRect tex; void main(){\n"

1664 
"float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"

1665 
"gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}" );

1666  
1667  
1668 
s_sampling = new ProgramGLSL(

1669 
"uniform sampler2DRect tex; void main(){\n"

1670 
"gl_FragColor= vec4( texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"

1671 
" texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}" );

1672  
1673  
1674 
s_margin_copy = program = new ProgramGLSL(

1675 
"uniform sampler2DRect tex; uniform vec4 truncate; void main(){\n"

1676 
"vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"

1677 
"bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"

1678 
"if(ob.y) { gl_FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"

1679 
"else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"

1680 
"else { vec4 weights = vec4(vec4(0, 1, 2, 3) == truncate.w);\n"

1681 
"float v = dot(weights, cc); gl_FragColor = vec4(v);}}");

1682  
1683 
_param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");

1684  
1685  
1686  
1687 
s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}"); 
1688  
1689  
1690  
1691 
s_grad_pass = program = new ProgramGLSL(

1692 
"uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"

1693 
"{\n"

1694 
" vec4 v1, v2, gg;\n"

1695 
" vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"

1696 
" vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"

1697 
" gl_FragData[0] = cc  cp; \n"

1698 
" vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"

1699 
" vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"

1700 
" vec4 dx = (vec4(cr.rb, cc.ga)  vec4(cc.rb, cl.ga)).zxwy;\n"

1701 
" vec4 dy = (vec4(cu.rg, cc.ba)  vec4(cc.rg, cd.ba)).zwxy;\n"

1702 
" vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"

1703 
" gl_FragData[1] = grad;\n"

1704 
" vec4 invalid = vec4(equal(grad, vec4(0.0))); \n"

1705 
" vec4 ov = atan(dy, dx + invalid); \n"

1706 
" gl_FragData[2] = ov; \n"

1707 
"}\n\0"); //when 
1708  
1709 
_param_grad_pass_texp = glGetUniformLocation(*program, "texp");

1710  
1711  
1712 
GlobalUtil::_OrientationPack2 = 0;

1713 
LoadOrientationShader(); 
1714  
1715 
if(s_orientation == NULL) 
1716 
{ 
1717 
//Load a simplified version if the right version is not supported

1718 
s_orientation = program = new ProgramGLSL(

1719 
"uniform sampler2DRect fTex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"

1720 
" vec4 cc = texture2DRect(fTex, gl_TexCoord[0].xy);\n"

1721 
" vec2 co = cc.xy * 0.5; \n"

1722 
" vec4 oo = texture2DRect(oTex, co);\n"

1723 
" bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"

1724 
" float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"

1725 
" gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");

1726 
_param_orientation_gtex= glGetUniformLocation(*program, "oTex");

1727 
_param_orientation_size= glGetUniformLocation(*program, "size");

1728  
1729 
GlobalUtil::_MaxOrientation = 0;

1730 
GlobalUtil::_FullSupported = 0;

1731 
std::cerr<<"Orientation simplified on this hardware"<<endl;

1732 
} 
1733  
1734 
if(GlobalUtil::_DescriptorPPT)

1735 
{ 
1736 
LoadDescriptorShader(); 
1737 
if(s_descriptor_fp == NULL) 
1738 
{ 
1739 
GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;

1740 
std::cerr<<"Descriptor ignored on this hardware"<<endl;

1741 
} 
1742 
} 
1743 
} 
1744  
1745  
1746 
void ShaderBagPKSL::LoadDisplayShaders()

1747 
{ 
1748 
ProgramGLSL * program; 
1749  
1750 
s_copy_key = new ProgramGLSL(

1751 
"uniform sampler2DRect tex;void main(){\n"

1752 
"gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");

1753  
1754 
//shader used to write a vertex buffer object

1755 
//which is used to draw the quads of each feature

1756 
s_vertex_list = program = new ProgramGLSL(

1757 
"uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"

1758 
"float fwidth = sizes.y; \n"

1759 
"float twidth = sizes.z; \n"

1760 
"float rwidth = sizes.w; \n"

1761 
"float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"

1762 
"float px = mod(index, twidth);\n"

1763 
"vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"

1764 
"vec4 cc = texture2DRect(tex, tpos );\n"

1765 
"float size = 3.0f * cc.a; \n"

1766 
"gl_FragColor.zw = vec2(0.0, 1.0);\n"

1767 
"if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"

1768 
"{\n"

1769 
" float type = fract(px);\n"

1770 
" vec2 dxy; float s, c;\n"

1771 
" dxy.x = type < 0.1 ? 0.0 : ((type <0.5  type > 0.9)? size : size);\n"

1772 
" dxy.y = type < 0.2 ? 0.0 : ((type < 0.3  type > 0.7 )? size :size); \n"

1773 
" s = sin(cc.b); c = cos(cc.b); \n"

1774 
" gl_FragColor.x = cc.x + c*dxy.xs*dxy.y;\n"

1775 
" gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"

1776 
"}\n\0");

1777 
/*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/

1778  
1779 
_param_genvbo_size = glGetUniformLocation(*program, "sizes");

1780  
1781 
s_display_gaussian = new ProgramGLSL(

1782 
"uniform sampler2DRect tex; void main(){\n"

1783 
"vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"

1784 
"float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");

1785  
1786 
s_display_dog = new ProgramGLSL(

1787 
"uniform sampler2DRect tex; void main(){\n"

1788 
"vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"

1789 
"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"

1790 
"gl_FragColor = vec4(g, g, g, 1.0);}" );

1791  
1792  
1793 
s_display_grad = new ProgramGLSL(

1794 
"uniform sampler2DRect tex; void main(){\n"

1795 
"vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"

1796 
"float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");

1797  
1798 
s_display_keys= new ProgramGLSL(

1799 
"uniform sampler2DRect tex; void main(){\n"

1800 
"vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"

1801 
"vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"

1802 
"bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"

1803 
"float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"

1804 
"if(v == 0) discard; \n"

1805 
"else if(oc.r > 0) gl_FragColor = vec4(1.0, 0, 0,1.0); \n"

1806 
"else gl_FragColor = vec4(0.0,1.0,0.0,1.0); }" );

1807 
} 
1808  
1809 
void ShaderBagPKSL::LoadOrientationShader(void) 
1810 
{ 
1811 
char buffer[10240]; 
1812 
ostrstream out(buffer,10240);

1813 
if(GlobalUtil::_IsNvidia)

1814 
{ 
1815 
out << "#pragma optionNV(ifcvt none)\n"

1816 
"#pragma optionNV(unroll all)\n";

1817 
} 
1818 
out<<"\n"

1819 
"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" 
1820 
"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" 
1821 
"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" 
1822 
"uniform sampler2DRect tex; uniform sampler2DRect gtex;\n"

1823 
"uniform sampler2DRect otex; uniform vec4 size;\n"

1824 
"void main() \n"

1825 
"{ \n"

1826 
" vec4 bins[10]; \n"

1827 
" bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"

1828 
" bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"

1829 
" bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"

1830 
" vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy); \n"

1831 
" vec2 pos = sift.xy; \n"

1832 
" bool orientation_mode = (size.z != 0); \n"

1833 
" float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"

1834 
" //bool fixed_orientation = (size.z < 0); \n"

1835 
" if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"

1836 
" float gsigma = sigma * GAUSSIAN_WF; \n"

1837 
" vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF); \n"

1838 
" vec2 dim = size.xy; \n"

1839 
" vec4 dist_threshold = vec4(win.x*win.x+0.5); \n"

1840 
" float factor = 0.5/(gsigma*gsigma); \n"

1841 
" vec4 sz; vec2 spos; \n"

1842 
" //if(any(pos.xy <= 1)) discard; \n"

1843 
" sz.xy = max( pos  win, vec2(2,2)); \n"

1844 
" sz.zw = min( pos + win, dim3); \n"

1845 
" sz = floor(sz*0.5) + 0.5; ";

1846 
//loop to get the histogram

1847  
1848 
out<<"\n"

1849 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

1850 
" { \n"

1851 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

1852 
" { \n"

1853 
" vec2 offset = 2* spos  pos  0.5; \n"

1854 
" vec4 off = vec4(offset, offset + 1); \n"

1855 
" vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"

1856 
" bvec4 inside = lessThan(distsq, dist_threshold); \n"

1857 
" if(any(inside)) \n"

1858 
" { \n"

1859 
" vec4 gg = texture2DRect(gtex, spos); \n"

1860 
" vec4 oo = texture2DRect(otex, spos); \n"

1861 
" vec4 weight = gg * exp(distsq * factor); \n"

1862 
" vec4 idxv = floor(degrees(oo)*0.1); \n"

1863 
" idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0); \n"

1864 
" vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0); \n";

1865 
//

1866 
if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)

1867 
{ 
1868 
// it might be slow on some GPUs

1869 
out<<"\n"

1870 
" for(int i = 0 ; i < 4; i++)\n"

1871 
" {\n"

1872 
" if(inside[i])\n"

1873 
" {\n"

1874 
" float idx = idxv[i]; \n"

1875 
" vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"

1876 
" int iidx = int(floor(idx*0.25)); \n"

1877 
" bins[iidx]+=inc; \n"

1878 
" } \n"

1879 
" } \n"

1880 
" } \n"

1881 
" } \n"

1882 
" }";

1883  
1884 
}else

1885 
{ 
1886 
//nvfp40 still does not support dynamic array indexing

1887 
//unrolled binary search

1888 
//it seems to be faster than the dyanmic indexing version on some GPUs

1889 
out<<"\n"

1890 
" for(int i = 0 ; i < 4; i++)\n"

1891 
" {\n"

1892 
" if(inside[i])\n"

1893 
" {\n"

1894 
" float idx = idxv[i]; \n"

1895 
" vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"

1896 
" if(idx < 16) \n"

1897 
" { \n"

1898 
" if(idx < 8) \n"

1899 
" { \n"

1900 
" if(idx < 4) { bins[0]+=inc;} \n"

1901 
" else { bins[1]+=inc;} \n"

1902 
" }else \n"

1903 
" { \n"

1904 
" if(idx < 12){ bins[2]+=inc;} \n"

1905 
" else { bins[3]+=inc;} \n"

1906 
" } \n"

1907 
" }else if(idx < 32) \n"

1908 
" { \n"

1909 
" if(idx < 24) \n"

1910 
" { \n"

1911 
" if(idx <20) { bins[4]+=inc;} \n"

1912 
" else { bins[5]+=inc;} \n"

1913 
" }else \n"

1914 
" { \n"

1915 
" if(idx < 28){ bins[6]+=inc;} \n"

1916 
" else { bins[7]+=inc;} \n"

1917 
" } \n"

1918 
" }else \n"

1919 
" { \n"

1920 
" bins[8]+=inc; \n"

1921 
" } \n"

1922 
" } \n"

1923 
" } \n"

1924 
" } \n"

1925 
" } \n"

1926 
" }";

1927  
1928 
} 
1929  
1930 
//reuse the code from the unpacked version..

1931 
ShaderBagGLSL::WriteOrientationCodeToStream(out); 
1932  
1933  
1934  
1935 
ProgramGLSL * program = new ProgramGLSL(buffer);

1936 
if(program>IsNative())

1937 
{ 
1938 
s_orientation = program ; 
1939 
_param_orientation_gtex = glGetUniformLocation(*program, "gtex");

1940 
_param_orientation_otex = glGetUniformLocation(*program, "otex");

1941 
_param_orientation_size = glGetUniformLocation(*program, "size");

1942 
}else

1943 
{ 
1944 
delete program;

1945 
} 
1946 
} 
1947  
1948 
void ShaderBagPKSL::SetGenListStartParam(float width, int tex0) 
1949 
{ 
1950 
glUniform1f(_param_ftex_width, width); 
1951 
} 
1952  
1953 
void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev) 
1954 
{ 
1955 
ProgramGLSL * program; 
1956  
1957 
s_genlist_init_tight = new ProgramGLSL(

1958 
"uniform sampler2DRect tex; void main ()\n"

1959 
"{\n"

1960 
" vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"

1961 
" texture2DRect(tex, gl_TexCoord[1].xy).r, \n"

1962 
" texture2DRect(tex, gl_TexCoord[2].xy).r, \n"

1963 
" texture2DRect(tex, gl_TexCoord[3].xy).r); \n"

1964 
" gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"

1965 
"}");

1966  
1967 
s_genlist_init_ex = program = new ProgramGLSL(

1968 
"uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"

1969 
"{\n"

1970 
" vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"

1971 
" vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"

1972 
" vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"

1973 
" vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"

1974 
" vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"

1975 
" vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"

1976 
" vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"

1977 
" vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"

1978 
" helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"

1979 
" helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"

1980 
" helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"

1981 
" helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"

1982 
" gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0)))); \n"

1983 
" gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0)))); \n"

1984 
" gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0)))); \n"

1985 
" gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0)))); \n"

1986 
"}");

1987 
_param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");

1988  
1989 
s_genlist_end = program = new ProgramGLSL(

1990 
GlobalUtil::_KeepExtremumSign == 0 ?

1991 

1992 
"uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"

1993 
"{\n"

1994 
" vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"

1995 
" vec2 pos = tc.rg; float index = tc.b;\n"

1996 
" vec4 tk = texture2DRect( ktex, pos); \n"

1997 
" vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"

1998 
" vec2 opos; \n"

1999 
" opos.x = dot(keys, vec4(0.5, 0.5, 0.5, 0.5));\n"

2000 
" opos.y = dot(keys, vec4(0.5, 0.5, 0.5, 0.5));\n"

2001 
" gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"

2002 
"}" :

2003 

2004 
"uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"

2005 
"{\n"

2006 
" vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"

2007 
" vec2 pos = tc.rg; float index = tc.b;\n"

2008 
" vec4 tk = texture2DRect( ktex, pos); \n"

2009 
" vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"

2010 
" vec2 opos; \n"

2011 
" opos.x = dot(keys, vec4(0.5, 0.5, 0.5, 0.5));\n"

2012 
" opos.y = dot(keys, vec4(0.5, 0.5, 0.5, 0.5));\n"

2013 
" gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"

2014 
"}"

2015 
); 
2016 
_param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");

2017  
2018 
//reduction ...

2019 
s_genlist_histo = new ProgramGLSL(

2020 
"uniform sampler2DRect tex; void main ()\n"

2021 
"{\n"

2022 
" vec4 helper; vec4 helper2; \n"

2023 
" helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"

2024 
" helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"

2025 
" gl_FragColor.rg = helper2.xz + helper2.yw;\n"

2026 
" helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"

2027 
" helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"

2028 
" gl_FragColor.ba= helper2.xz+helper2.yw;\n"

2029 
"}");

2030  
2031  
2032 
//read of the first part, which generates tex coordinates

2033  
2034 
s_genlist_start= program = ShaderBagGLSL::LoadGenListStepShader(1, 1); 
2035 
_param_ftex_width= glGetUniformLocation(*program, "width");

2036 
_param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");

2037 
//stepping

2038 
s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1); 
2039 
_param_genlist_step_tex= glGetUniformLocation(*program, "tex");

2040 
_param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");

2041  
2042 
} 
2043 
void ShaderBagPKSL::UnloadProgram(void) 
2044 
{ 
2045 
glUseProgram(0);

2046 
} 
2047 
void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold) 
2048 
{ 
2049 
//

2050 
char buffer[10240]; 
2051 
float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); 
2052 
float threshold1 = dog_threshold;

2053 
float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; 
2054 
ostrstream out(buffer, 10240);

2055 
out<<setprecision(8);

2056  
2057 
if(GlobalUtil::_IsNvidia)

2058 
{ 
2059 
out << "#pragma optionNV(ifcvt none)\n"

2060 
"#pragma optionNV(unroll all)\n"

2061 
"#define REPEAT4(FUNCTION)\\\n"

2062 
"for(int i = 0; i < 4; ++i)\\\n"

2063 
"{\\\n"

2064 
" FUNCTION(i);\\\n"

2065 
"}\n";

2066 
}else

2067 
{ 
2068 
//loop unroll

2069 
out << "#define REPEAT4(FUNCTION)\\\n"

2070 
"FUNCTION(0);\\\n"

2071 
"FUNCTION(1);\\\n"

2072 
"FUNCTION(2);\\\n"

2073 
"FUNCTION(3);\n";

2074 
} 
2075 
//tex(X)(Y)

2076 
//X: (CLR) (CENTER 0, LEFT 1, RIGHT +1)

2077 
//Y: (CDU) (CENTER 0, DOWN 1, UP +1)

2078 
out << "#define THRESHOLD0 " << threshold0 << "\n" 
2079 
"#define THRESHOLD1 " << threshold1 << "\n" 
2080 
"#define THRESHOLD2 " << threshold2 << "\n"; 
2081  
2082 
out<< 
2083 
"uniform sampler2DRect tex; uniform sampler2DRect texU;\n"

2084 
"uniform sampler2DRect texD; void main ()\n"

2085 
"{\n"

2086 
" vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"

2087 
" vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"

2088 
" vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"

2089 
" vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"

2090 
" vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"

2091 
" vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"

2092 
" vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"

2093 
" vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"

2094 
" vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"

2095 
" vec4 cru = texture2DRect(tex, TexRU.xy);\n"

2096 
" vec4 cc = ccc;\n"

2097 
" vec4 v1[4], v2[4];\n"

2098 
" v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2099 
" v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2100 
" v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2101 
" v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2102 
" v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"

2103 
" v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"

2104 
" v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"

2105 
" v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"

2106  
2107 
//test against 8 neighbours

2108 
//use variable to identify type of extremum

2109 
//1.0 for local maximum and 1.0 for minimum

2110 
<< 
2111 
" vec4 key = vec4(0.0); \n"

2112 
" #define KEYTEST_STEP0(i) \\\n"

2113 
" {\\\n"

2114 
" bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"

2115 
" key[i] = cc[i] > THRESHOLD0 && all(test1)?1.0: 0.0;\\\n"

2116 
" key[i] = cc[i] < THRESHOLD0 && all(test2)? 1.0: key[i];\\\n"

2117 
" }\n"

2118 
" REPEAT4(KEYTEST_STEP0);\n"

2119 
" if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"

2120 
" if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"

2121 
" gl_FragColor = vec4(0.0);\n"

2122 
" if(any(notEqual(key, vec4(0.0)))) {\n";

2123  
2124 
//do edge supression first..

2125 
//vector v1 is < (1, 0), (1, 0), (0,1), (0, 1)>

2126 
//vector v2 is < (1,1), (1,1), (1,1), (1, 1)>

2127  
2128 
out<< 
2129 
" float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"

2130 
" #define EDGE_SUPPRESION(i) \\\n"

2131 
" if(key[i] != 0)\\\n"

2132 
" {\\\n"

2133 
" vec4 D2 = v1[i].xyzw  cc[i];\\\n"

2134 
" vec2 D4 = v2[i].xw  v2[i].yz;\\\n"

2135 
" vec2 D5 = 0.5*(v1[i].ywv1[i].xz); \\\n"

2136 
" fx[i] = D5.x; fy[i] = D5.y ;\\\n"

2137 
" fxx[i] = D2.x + D2.y;\\\n"

2138 
" fyy[i] = D2.z + D2.w;\\\n"

2139 
" fxy[i] = 0.25*(D4.x + D4.y);\\\n"

2140 
" float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"

2141 
" float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"

2142 
" float score_down = (fxx[i]*fyy[i]  fxy[i]*fxy[i]);\\\n"

2143 
" if( score_down <= 0  score_up > THRESHOLD2 * score_down)key[i] = 0;\\\n"

2144 
" }\n"

2145 
" REPEAT4(EDGE_SUPPRESION);\n"

2146 
" if(any(notEqual(key, vec4(0.0)))) {\n";

2147 

2148 
////////////////////////////////////////////////

2149 
//read 9 pixels of upper/lower level

2150 
out<< 
2151 
" vec4 v4[4], v5[4], v6[4];\n"

2152 
" ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"

2153 
" clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"

2154 
" crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"

2155 
" ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"

2156 
" ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"

2157 
" cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"

2158 
" clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"

2159 
" crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"

2160 
" cru = texture2DRect(texU, TexRU.xy);\n"

2161 
" vec4 cu = ccc;\n"

2162 
" v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2163 
" v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2164 
" v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2165 
" v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2166 
" v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"

2167 
" v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"

2168 
" v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"

2169 
" v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"

2170 
<< 
2171 
" #define KEYTEST_STEP1(i)\\\n"

2172 
" if(key[i] == 1.0)\\\n"

2173 
" {\\\n"

2174 
" bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"

2175 
" if(cc[i] < cu[i]  any(test))key[i] = 0.0; \\\n"

2176 
" }else if(key[i] == 1.0)\\\n"

2177 
" {\\\n"

2178 
" bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"

2179 
" if(cc[i] > cu[i]  any(test) )key[i] = 0.0; \\\n"

2180 
" }\n"

2181 
" REPEAT4(KEYTEST_STEP1);\n"

2182 
" if(any(notEqual(key, vec4(0.0)))) { \n"

2183 
<< 
2184 
" ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"

2185 
" clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"

2186 
" crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"

2187 
" ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"

2188 
" ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"

2189 
" cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"

2190 
" clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"

2191 
" crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"

2192 
" cru = texture2DRect(texD, TexRU.xy);\n"

2193 
" vec4 cd = ccc;\n"

2194 
" v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"

2195 
" v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"

2196 
" v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"

2197 
" v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"

2198 
" v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"

2199 
" v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"

2200 
" v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"

2201 
" v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"

2202 
<< 
2203 
" #define KEYTEST_STEP2(i)\\\n"

2204 
" if(key[i] == 1.0)\\\n"

2205 
" {\\\n"

2206 
" bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"

2207 
" if(cc[i] < cd[i]  any(test))key[i] = 0.0; \\\n"

2208 
" }else if(key[i] == 1.0)\\\n"

2209 
" {\\\n"

2210 
" bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"

2211 
" if(cc[i] > cd[i]  any(test))key[i] = 0.0; \\\n"

2212 
" }\n"

2213 
" REPEAT4(KEYTEST_STEP2);\n"

2214 
" float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"

2215 
" //assume there is only one keypoint in the four. \n"

2216 
" if(keysum==1.0) {\n";

2217  
2218 
//////////////////////////////////////////////////////////////////////

2219 
if(GlobalUtil::_SubpixelLocalization)

2220  
2221 
out << 
2222 
" vec3 offset = vec3(0, 0, 0); \n"

2223 
" #define TESTMOVE_KEYPOINT(idx) \\\n"

2224 
" if(key[idx] != 0) \\\n"

2225 
" {\\\n"

2226 
" cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \\\n"

2227 
" v4[0] = v4[idx]; v5[0] = v5[idx]; \\\n"

2228 
" fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \\\n"

2229 
" fx[0] = fx[idx]; fy[0] = fy[idx]; \\\n"

2230 
" }\n"

2231 
" TESTMOVE_KEYPOINT(1);\n"

2232 
" TESTMOVE_KEYPOINT(2);\n"

2233 
" TESTMOVE_KEYPOINT(3);\n"

2234 
<< 
2235 

2236 
" float fs = 0.5*( cu[0]  cd[0] ); \n"

2237 
" float fss = cu[0] + cd[0]  cc[0]  cc[0];\n"

2238 
" float fxs = 0.25 * (v4[0].y + v5[0].x  v4[0].x  v5[0].y);\n"

2239 
" float fys = 0.25 * (v4[0].w + v5[0].z  v4[0].z  v5[0].w);\n"

2240 
" vec4 A0, A1, A2 ; \n"

2241 
" A0 = vec4(fxx[0], fxy[0], fxs, fx[0]); \n"

2242 
" A1 = vec4(fxy[0], fyy[0], fys, fy[0]); \n"

2243 
" A2 = vec4(fxs, fys, fss, fs); \n"

2244 
" vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs)); \n"

2245 
" float maxa = max(max(x3.x, x3.y), x3.z); \n"

2246 
" if(maxa >= 1e10 ) \n"

2247 
" { \n"

2248 
" if(x3.y ==maxa ) \n"

2249 
" { \n"

2250 
" vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"

2251 
" }else if( x3.z == maxa ) \n"

2252 
" { \n"

2253 
" vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"

2254 
" } \n"

2255 
" A0 /= A0.x; \n"

2256 
" A1 = A1.x * A0; \n"

2257 
" A2 = A2.x * A0; \n"

2258 
" vec2 x2 = abs(vec2(A1.y, A2.y)); \n"

2259 
" if( x2.y > x2.x ) \n"

2260 
" { \n"

2261 
" vec3 TEMP = A2.yzw; \n"

2262 
" A2.yzw = A1.yzw; \n"

2263 
" A1.yzw = TEMP; \n"

2264 
" x2.x = x2.y; \n"

2265 
" } \n"

2266 
" if(x2.x >= 1e10) { \n"

2267 
" A1.yzw /= A1.y; \n"

2268 
" A2.yzw = A2.y * A1.yzw; \n"

2269 
" if(abs(A2.z) >= 1e10) {\n"

2270 
" offset.z = A2.w /A2.z; \n"

2271 
" offset.y = A1.w  offset.z*A1.z; \n"

2272 
" offset.x = A0.w  offset.z*A0.z  offset.y*A0.y; \n"

2273 
" bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"

2274 
" if(!test  any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"

2275 
" }\n"

2276 
" }\n"

2277 
" }\n"

2278 
<<"\n"

2279 
" float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"

2280 
" gl_FragColor = vec4(keyv, offset);\n"

2281 
" }}}}\n"

2282 
"}\n" <<'\0'; 
2283  
2284 
else out << "\n" 
2285 
" float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"

2286 
" gl_FragColor = vec4(keyv, 0, 0, 0);\n"

2287 
" }}}}\n"

2288 
"}\n" <<'\0'; 
2289  
2290 
ProgramGLSL * program = new ProgramGLSL(buffer);

2291 
s_keypoint = program ; 
2292  
2293 
//parameter

2294 
_param_dog_texu = glGetUniformLocation(*program, "texU");

2295 
_param_dog_texd = glGetUniformLocation(*program, "texD");

2296 
} 
2297 
void ShaderBagPKSL::SetDogTexParam(int texU, int texD) 
2298 
{ 
2299 
glUniform1i(_param_dog_texu, 1);

2300 
glUniform1i(_param_dog_texd, 2);

2301 
} 
2302 
void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0) 
2303 
{ 
2304 
glUniform1i(_param_genlist_step_tex0, 1);

2305 
} 
2306  
2307 
void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size) 
2308 
{ 
2309 
float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; 
2310 
glUniform4fv(_param_genvbo_size, 1, sizes);

2311 
} 
2312 
void ShaderBagPKSL::SetGradPassParam(int texP) 
2313 
{ 
2314 
glUniform1i(_param_grad_pass_texp, 1);

2315 
} 
2316  
2317 
void ShaderBagPKSL::LoadDescriptorShader()

2318 
{ 
2319 
GlobalUtil::_DescriptorPPT = 16;

2320 
LoadDescriptorShaderF2(); 
2321 
s_rect_description = LoadDescriptorProgramRECT(); 
2322 
} 
2323  
2324 
ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT() 
2325 
{ 
2326 
//one shader outpout 128/8 = 16 , each fragout encodes 4

2327 
//const double twopi = 2.0*3.14159265358979323846;

2328 
//const double rpi = 8.0/twopi;

2329 
char buffer[10240]; 
2330 
ostrstream out(buffer, 10240);

2331  
2332 
out<<setprecision(8);

2333  
2334 
out<<"\n"

2335 
"#define M_PI 3.14159265358979323846\n"

2336 
"#define TWO_PI (2.0*M_PI)\n"

2337 
"#define RPI 1.2732395447351626861510701069801\n"

2338 
"#define WF size.z\n"

2339 
"uniform sampler2DRect tex; \n"

2340 
"uniform sampler2DRect gtex; \n"

2341 
"uniform sampler2DRect otex; \n"

2342 
"uniform vec4 dsize; \n"

2343 
"uniform vec3 size; \n"

2344 
"void main() \n"

2345 
"{\n"

2346 
" vec2 dim = size.xy; //image size \n"

2347 
" float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"

2348 
" float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"

2349 
" index = floor(index*0.125)+ 0.49; \n"

2350 
" vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"

2351 
" vec2 pos = texture2DRect(tex, coord).xy; \n"

2352 
" vec2 wsz = texture2DRect(tex, coord).zw;\n"

2353 
" float aspect_ratio = wsz.y / wsz.x;\n"

2354 
" float aspect_sq = aspect_ratio * aspect_ratio; \n"

2355 
" vec2 spt = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";

2356  
2357 
//here cscs is actually (cos, sin, cos, sin) * (factor: 3)*sigma

2358 
//and rots is (cos, sin, cos, sin ) /(factor*sigma)

2359 
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread

2360 
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

2361 
out<< 
2362 
" vec4 temp; vec2 pt; \n"

2363 
" pt.x = pos.x + fract(idx*0.25) * wsz.x; \n"

2364 
" pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y; \n";

2365 

2366 
//get a horizontal bounding box of the rotated rectangle

2367 
out<< 
2368 
" vec4 sz; \n"

2369 
" sz.xy = max(pt  spt, vec2(2,2));\n"

2370 
" sz.zw = min(pt + spt, dim  3); \n"

2371 
" sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center 
2372 
//get voting for two box

2373  
2374 
out<<"\n"

2375 
" vec4 DA, DB; vec2 spos; \n"

2376 
" DA = DB = vec4(0, 0, 0, 0); \n"

2377 
" vec4 nox = vec4(0, 1.0, 0.0, 1.0); \n"

2378 
" vec4 noy = vec4(0, 0.0, 1.0, 1.0); \n"

2379 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

2380 
" { \n"

2381 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

2382 
" { \n"

2383 
" vec2 tpt = spos * 2.0  pt  0.5; \n"

2384 
" vec4 nx = (tpt.x + nox) * ispt.x; \n"

2385 
" vec4 ny = (tpt.y + noy) * ispt.y; \n"

2386 
" vec4 nxn = abs(nx), nyn = abs(ny); \n"

2387 
" bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"

2388 
" if(any(inside))\n"

2389 
" {\n"

2390 
" vec4 gg = texture2DRect(gtex, spos);\n"

2391 
" vec4 oo = texture2DRect(otex, spos);\n"

2392 
//" vec4 cc = cos(oo), ss = sin(oo); \n"

2393 
//" oo = atan(ss* aspect_ratio, cc); \n"

2394 
//" gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "

2395 
" vec4 theta0 = ( oo)*RPI;\n"

2396 
" vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"

2397 
" vec4 theta1 = floor(theta); \n"

2398 
" vec4 weight = (1  nxn) * (1  nyn) * gg; \n"

2399 
" vec4 weight2 = (theta  theta1) * weight; \n"

2400 
" vec4 weight1 = weight  weight2; \n"

2401 
" for(int i = 0;i < 4; i++)\n"

2402 
" {\n"

2403 
" if(inside[i])\n"

2404 
" {\n"

2405 
" DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"

2406 
" DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"

2407 
" DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"

2408 
" DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"

2409 
" }\n"

2410 
" }\n"

2411 
" }\n"

2412 
" }\n"

2413 
" }\n";

2414 
out<< 
2415 
" gl_FragData[0] = DA; gl_FragData[1] = DB;\n"

2416 
"}\n"<<'\0'; 
2417  
2418 
ProgramGLSL * program = new ProgramGLSL(buffer);

2419 
if(program>IsNative())

2420 
{ 
2421 
return program;

2422 
} 
2423 
else

2424 
{ 
2425 
delete program;

2426 
return NULL; 
2427 
} 
2428 
} 
2429  
2430 
ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL() 
2431 
{ 
2432 
//one shader outpout 128/8 = 16 , each fragout encodes 4

2433 
//const double twopi = 2.0*3.14159265358979323846;

2434 
//const double rpi = 8.0/twopi;

2435 
char buffer[10240]; 
2436 
ostrstream out(buffer, 10240);

2437  
2438 
out<<setprecision(8);

2439  
2440 
out<<"\n"

2441 
"#define M_PI 3.14159265358979323846\n"

2442 
"#define TWO_PI (2.0*M_PI)\n"

2443 
"#define RPI 1.2732395447351626861510701069801\n"

2444 
"#define WF size.z\n"

2445 
"uniform sampler2DRect tex; \n"

2446 
"uniform sampler2DRect gtex; \n"

2447 
"uniform sampler2DRect otex; \n"

2448 
"uniform vec4 dsize; \n"

2449 
"uniform vec3 size; \n"

2450 
"void main() \n"

2451 
"{\n"

2452 
" vec2 dim = size.xy; //image size \n"

2453 
" float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"

2454 
" float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"

2455 
" index = floor(index*0.125)+ 0.49; \n"

2456 
" vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"

2457 
" vec2 pos = texture2DRect(tex, coord).xy; \n"

2458 
" if(any(lessThan(pos.xy, vec2(1.0)))  any(greaterThan(pos.xy, dim1.0))) "

2459 
" //discard; \n"

2460 
" { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"

2461 
" float anglef = texture2DRect(tex, coord).z;\n"

2462 
" if(anglef > M_PI) anglef = TWO_PI;\n"

2463 
" float sigma = texture2DRect(tex, coord).w; \n"

2464 
" float spt = abs(sigma * WF); //default to be 3*sigma \n";

2465 
//rotation

2466 
out<< 
2467 
" vec4 cscs, rots; \n"

2468 
" cscs.x = cos(anglef); cscs.y = sin(anglef); \n"

2469 
" cscs.zw =  cscs.xy; \n"

2470 
" rots = cscs /spt; \n"

2471 
" cscs *= spt; \n";

2472  
2473 
//here cscs is actually (cos, sin, cos, sin) * (factor: 3)*sigma

2474 
//and rots is (cos, sin, cos, sin ) /(factor*sigma)

2475 
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread

2476 
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

2477 
out<< 
2478 
" vec4 temp; vec2 pt, offsetpt; \n"

2479 
" /*the fraction part of idx is .5*/ \n"

2480 
" offsetpt.x = 4.0* fract(idx*0.25)  2.0; \n"

2481 
" offsetpt.y = floor(idx*0.25)  1.5; \n"

2482 
" temp = cscs.xwyx*offsetpt.xyxy; \n"

2483 
" pt = pos + temp.xz + temp.yw; \n";

2484 

2485 
//get a horizontal bounding box of the rotated rectangle

2486 
out<< 
2487 
" vec2 bwin = abs(cscs.xy); \n"

2488 
" float bsz = bwin.x + bwin.y; \n"

2489 
" vec4 sz; \n"

2490 
" sz.xy = max(pt  vec2(bsz), vec2(2,2));\n"

2491 
" sz.zw = min(pt + vec2(bsz), dim  3); \n"

2492 
" sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center 
2493 
//get voting for two box

2494  
2495 
out<<"\n"

2496 
" vec4 DA, DB; vec2 spos; \n"

2497 
" DA = DB = vec4(0, 0, 0, 0); \n"

2498 
" vec4 nox = vec4(0, rots.xy, rots.x + rots.y); \n"

2499 
" vec4 noy = vec4(0, rots.wx, rots.w + rots.x); \n"

2500 
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"

2501 
" { \n"

2502 
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"

2503 
" { \n"

2504 
" vec2 tpt = spos * 2.0  pt  0.5; \n"

2505 
" vec4 temp = rots.xywx * tpt.xyxy; \n"

2506 
" vec2 temp2 = temp.xz + temp.yw; \n"

2507 
" vec4 nx = temp2.x + nox; \n"

2508 
" vec4 ny = temp2.y + noy; \n"

2509 
" vec4 nxn = abs(nx), nyn = abs(ny); \n"

2510 
" bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"

2511 
" if(any(inside))\n"

2512 
" {\n"

2513 
" vec4 gg = texture2DRect(gtex, spos);\n"

2514 
" vec4 oo = texture2DRect(otex, spos);\n"

2515 
" vec4 theta0 = (anglef  oo)*RPI;\n"

2516 
" vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"

2517 
" vec4 theta1 = floor(theta); \n"

2518 
" vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"

2519 
" vec4 ww = exp(0.125 * (diffx * diffx + diffy * diffy )); \n"

2520 
" vec4 weight = (1  nxn) * (1  nyn) * gg * ww; \n"

2521 
" vec4 weight2 = (theta  theta1) * weight; \n"

2522 
" vec4 weight1 = weight  weight2; \n"

2523 
" for(int i = 0;i < 4; i++)\n"

2524 
" {\n"

2525 
" if(inside[i])\n"

2526 
" {\n"

2527 
" DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"

2528 
" DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"

2529 
" DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"

2530 
" DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"

2531 
" }\n"

2532 
" }\n"

2533 
" }\n"

2534 
" }\n"

2535 
" }\n";

2536 
out<< 
2537 
" gl_FragData[0] = DA; gl_FragData[1] = DB;\n"

2538 
"}\n"<<'\0'; 
2539  
2540 
ProgramGLSL * program = new ProgramGLSL(buffer);

2541 
if(program>IsNative())

2542 
{ 
2543 
return program;

2544 
} 
2545 
else

2546 
{ 
2547 
delete program;

2548 
return NULL; 
2549 
} 
2550 
} 
2551  
2552 
void ShaderBagPKSL::LoadDescriptorShaderF2()

2553 
{ 
2554  
2555 
ProgramGLSL * program = LoadDescriptorProgramPKSL(); 
2556 
if( program )

2557 
{ 
2558 
s_descriptor_fp = program; 
2559 
_param_descriptor_gtex = glGetUniformLocation(*program, "gtex");

2560 
_param_descriptor_otex = glGetUniformLocation(*program, "otex");

2561 
_param_descriptor_size = glGetUniformLocation(*program, "size");

2562 
_param_descriptor_dsize = glGetUniformLocation(*program, "dsize");

2563 
} 
2564 
} 
2565  
2566  
2567  
2568 
void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) 
2569 
{ 
2570 
glUniform1i(_param_orientation_gtex, 1);

2571 
glUniform2f(_param_orientation_size, sigma, sigma_step); 
2572 
} 
2573  
2574  
2575 
void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step) 
2576 
{ 
2577 
///

2578 
glUniform1i(_param_orientation_gtex, 1);

2579 
glUniform1i(_param_orientation_otex, 2);

2580  
2581 
float size[4]; 
2582 
size[0] = (float)width; 
2583 
size[1] = (float)height; 
2584 
size[2] = sigma;

2585 
size[3] = step;

2586 
glUniform4fv(_param_orientation_size, 1, size);

2587  
2588 
} 
2589  
2590 
void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) 
2591 
{ 
2592 
if(sigma == 0 && s_rect_description) 
2593 
{ 
2594 
//rectangle description mode

2595 
s_rect_description>UseProgram(); 
2596 
GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");

2597 
GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");

2598 
GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");

2599 
GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");

2600 
///

2601 
glUniform1i(param_descriptor_gtex, 1);

2602 
glUniform1i(param_descriptor_otex, 2);

2603  
2604 
float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; 
2605 
glUniform4fv(param_descriptor_dsize, 1, dsize);

2606 
float size[3]; 
2607 
size[0] = width;

2608 
size[1] = height;

2609 
size[2] = GlobalUtil::_DescriptorWindowFactor;

2610 
glUniform3fv(param_descriptor_size, 1, size);

2611 
}else

2612 
{ 
2613 
///

2614 
glUniform1i(_param_descriptor_gtex, 1);

2615 
glUniform1i(_param_descriptor_otex, 2);

2616  
2617  
2618 
float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; 
2619 
glUniform4fv(_param_descriptor_dsize, 1, dsize);

2620 
float size[3]; 
2621 
size[0] = width;

2622 
size[1] = height;

2623 
size[2] = GlobalUtil::_DescriptorWindowFactor;

2624 
glUniform3fv(_param_descriptor_size, 1, size);

2625 
} 
2626  
2627 
} 
2628  
2629  
2630 
void ShaderBagPKSL::SetGenListEndParam(int ktex) 
2631 
{ 
2632 
glUniform1i(_param_genlist_end_ktex, 1);

2633 
} 
2634 
void ShaderBagPKSL::SetGenListInitParam(int w, int h) 
2635 
{ 
2636 
float bbox[4] = {(w 1.0f) * 0.5f +0.25f, (w1.0f) * 0.5f  0.25f, (h  1.0f) * 0.5f + 0.25f, (h1.0f) * 0.5f  0.25f}; 
2637 
glUniform4fv(_param_genlist_init_bbox, 1, bbox);

2638 
} 
2639  
2640 
void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax) 
2641 
{ 
2642 
float truncate[4]; 
2643 
truncate[0] = (xmax  0.5f) * 0.5f; //((xmax + 1) >> 1)  0.5f; 
2644 
truncate[1] = (ymax  0.5f) * 0.5f; //((ymax + 1) >> 1)  0.5f; 
2645 
truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f; 
2646 
truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f); 
2647 
glUniform4fv(_param_margin_copy_truncate, 1, truncate);

2648 
} 