root / rgbdslam / external / siftgpu / src / SiftGPU / ProgramGLSL.cpp @ 9240aaa3
History | View | Annotate | Download (81.7 KB)
1 | 9240aaa3 | Alex | ////////////////////////////////////////////////////////////////////////////
|
---|---|---|---|
2 | // File: ProgramGLSL.cpp
|
||
3 | // Author: Changchang Wu
|
||
4 | // Description : GLSL related classes
|
||
5 | // class ProgramGLSL A simple wrapper of GLSL programs
|
||
6 | // class ShaderBagGLSL GLSL shaders for SIFT
|
||
7 | // class FilterGLSL GLSL gaussian filters for SIFT
|
||
8 | //
|
||
9 | // Copyright (c) 2007 University of North Carolina at Chapel Hill
|
||
10 | // All Rights Reserved
|
||
11 | //
|
||
12 | // Permission to use, copy, modify and distribute this software and its
|
||
13 | // documentation for educational, research and non-profit purposes, without
|
||
14 | // fee, and without a written agreement is hereby granted, provided that the
|
||
15 | // above copyright notice and the following paragraph appear in all copies.
|
||
16 | //
|
||
17 | // The University of North Carolina at Chapel Hill make no representations
|
||
18 | // about the suitability of this software for any purpose. It is provided
|
||
19 | // 'as is' without express or implied warranty.
|
||
20 | //
|
||
21 | // Please send BUG REPORTS to ccwu@cs.unc.edu
|
||
22 | //
|
||
23 | ////////////////////////////////////////////////////////////////////////////
|
||
24 | |||
25 | |||
26 | #include "GL/glew.h" |
||
27 | #include <string.h> |
||
28 | #include <stdio.h> |
||
29 | #include <iomanip> |
||
30 | #include <iostream> |
||
31 | #include <strstream> |
||
32 | #include <vector> |
||
33 | #include <algorithm> |
||
34 | #include <math.h> |
||
35 | using namespace std; |
||
36 | |||
37 | #include "GlobalUtil.h" |
||
38 | #include "ProgramGLSL.h" |
||
39 | #include "GLTexImage.h" |
||
40 | #include "ShaderMan.h" |
||
41 | #include "SiftGPU.h" |
||
42 | |||
43 | ProgramGLSL::ShaderObject::ShaderObject(int shadertype, const char * source, int filesource) |
||
44 | { |
||
45 | |||
46 | |||
47 | _type = shadertype; |
||
48 | _compiled = 0;
|
||
49 | |||
50 | |||
51 | _shaderID = glCreateShader(shadertype); |
||
52 | if(_shaderID == 0) return; |
||
53 | |||
54 | if(source)
|
||
55 | { |
||
56 | |||
57 | GLint code_length; |
||
58 | if(filesource ==0) |
||
59 | { |
||
60 | const char* code = source; |
||
61 | code_length = strlen(code); |
||
62 | glShaderSource(_shaderID, 1, (const char **) &code, &code_length); |
||
63 | }else
|
||
64 | { |
||
65 | char * code;
|
||
66 | if((code_length= ReadShaderFile(source, code)) ==0) return; |
||
67 | glShaderSource(_shaderID, 1, (const char **) &code, &code_length); |
||
68 | delete code;
|
||
69 | } |
||
70 | |||
71 | glCompileShader(_shaderID); |
||
72 | |||
73 | CheckCompileLog(); |
||
74 | |||
75 | if(!_compiled) std::cout << source;
|
||
76 | } |
||
77 | |||
78 | |||
79 | |||
80 | |||
81 | } |
||
82 | |||
83 | int ProgramGLSL::ShaderObject::ReadShaderFile(const char *sourcefile, char*& code ) |
||
84 | { |
||
85 | code = NULL;
|
||
86 | FILE * file; |
||
87 | int len=0; |
||
88 | |||
89 | if(sourcefile == NULL) return 0; |
||
90 | |||
91 | file = fopen(sourcefile,"rt");
|
||
92 | if(file == NULL) return 0; |
||
93 | |||
94 | |||
95 | fseek(file, 0, SEEK_END);
|
||
96 | len = ftell(file); |
||
97 | rewind(file); |
||
98 | if(len >1) |
||
99 | { |
||
100 | code = new char[len+1]; |
||
101 | fread(code, sizeof( char), len, file); |
||
102 | code[len] = 0;
|
||
103 | }else
|
||
104 | { |
||
105 | len = 0;
|
||
106 | } |
||
107 | |||
108 | fclose(file); |
||
109 | |||
110 | return len;
|
||
111 | |||
112 | } |
||
113 | |||
114 | void ProgramGLSL::ShaderObject::CheckCompileLog()
|
||
115 | { |
||
116 | |||
117 | GLint status; |
||
118 | glGetShaderiv(_shaderID, GL_COMPILE_STATUS, &status); |
||
119 | _compiled = (status ==GL_TRUE); |
||
120 | |||
121 | if(_compiled == 0) PrintCompileLog(std::cout); |
||
122 | |||
123 | |||
124 | } |
||
125 | |||
126 | ProgramGLSL::ShaderObject::~ShaderObject() |
||
127 | { |
||
128 | if(_shaderID) glDeleteShader(_shaderID);
|
||
129 | |||
130 | } |
||
131 | |||
132 | int ProgramGLSL::ShaderObject::IsValidFragmentShader()
|
||
133 | { |
||
134 | return _type == GL_FRAGMENT_SHADER && _shaderID && _compiled;
|
||
135 | } |
||
136 | |||
137 | int ProgramGLSL::ShaderObject::IsValidVertexShader()
|
||
138 | { |
||
139 | return _type == GL_VERTEX_SHADER && _shaderID && _compiled;
|
||
140 | } |
||
141 | |||
142 | |||
143 | void ProgramGLSL::ShaderObject::PrintCompileLog(ostream&os)
|
||
144 | { |
||
145 | GLint len = 0;
|
||
146 | |||
147 | glGetShaderiv(_shaderID, GL_INFO_LOG_LENGTH , &len); |
||
148 | if(len <=1) return; |
||
149 | |||
150 | char * compileLog = new char[len+1]; |
||
151 | if(compileLog == NULL) return; |
||
152 | |||
153 | glGetShaderInfoLog(_shaderID, len, &len, compileLog); |
||
154 | |||
155 | |||
156 | os<<"Compile Log\n"<<compileLog<<"\n"; |
||
157 | |||
158 | delete compileLog;
|
||
159 | } |
||
160 | |||
161 | |||
162 | ProgramGLSL::ProgramGLSL() |
||
163 | { |
||
164 | _linked = 0;
|
||
165 | _programID = glCreateProgram(); |
||
166 | } |
||
167 | ProgramGLSL::~ProgramGLSL() |
||
168 | { |
||
169 | if(_programID)glDeleteProgram(_programID);
|
||
170 | } |
||
171 | void ProgramGLSL::AttachShaderObject(ShaderObject &shader)
|
||
172 | { |
||
173 | if(_programID && shader.IsValidShaderObject())
|
||
174 | glAttachShader(_programID, shader.GetShaderID()); |
||
175 | } |
||
176 | void ProgramGLSL::DetachShaderObject(ShaderObject &shader)
|
||
177 | { |
||
178 | if(_programID && shader.IsValidShaderObject())
|
||
179 | glDetachShader(_programID, shader.GetShaderID()); |
||
180 | } |
||
181 | int ProgramGLSL::LinkProgram()
|
||
182 | { |
||
183 | _linked = 0;
|
||
184 | |||
185 | if(_programID==0) return 0; |
||
186 | |||
187 | glLinkProgram(_programID); |
||
188 | |||
189 | CheckLinkLog(); |
||
190 | |||
191 | // GlobalUtil::StartTimer("100 link test");
|
||
192 | // for(int i = 0; i<100; i++) glLinkProgram(_programID);
|
||
193 | // GlobalUtil::StopTimer();
|
||
194 | |||
195 | return _linked;
|
||
196 | } |
||
197 | |||
198 | void ProgramGLSL::CheckLinkLog()
|
||
199 | { |
||
200 | GLint status; |
||
201 | glGetProgramiv(_programID, GL_LINK_STATUS, &status); |
||
202 | |||
203 | _linked = (status == GL_TRUE); |
||
204 | |||
205 | } |
||
206 | |||
207 | |||
208 | int ProgramGLSL::ValidateProgram()
|
||
209 | { |
||
210 | if(_programID && _linked)
|
||
211 | { |
||
212 | /// GLint status;
|
||
213 | // glValidateProgram(_programID);
|
||
214 | // glGetProgramiv(_programID, GL_VALIDATE_STATUS, &status);
|
||
215 | // return status == GL_TRUE;
|
||
216 | return 1; |
||
217 | } |
||
218 | else
|
||
219 | return 0; |
||
220 | } |
||
221 | |||
222 | void ProgramGLSL::PrintLinkLog(std::ostream &os)
|
||
223 | { |
||
224 | GLint len = 0;
|
||
225 | |||
226 | glGetProgramiv(_programID, GL_INFO_LOG_LENGTH , &len); |
||
227 | if(len <=1) return; |
||
228 | |||
229 | char* linkLog = new char[len+1]; |
||
230 | if(linkLog == NULL) return; |
||
231 | |||
232 | glGetProgramInfoLog(_programID, len, &len, linkLog); |
||
233 | |||
234 | linkLog[len] = 0;
|
||
235 | |||
236 | if(strstr(linkLog, "failed")) |
||
237 | { |
||
238 | os<<linkLog + (linkLog[0] == ' '? 1:0)<<"\n"; |
||
239 | _linked = 0;
|
||
240 | } |
||
241 | |||
242 | delete linkLog;
|
||
243 | } |
||
244 | |||
245 | int ProgramGLSL::UseProgram()
|
||
246 | { |
||
247 | if(ValidateProgram())
|
||
248 | { |
||
249 | glUseProgram(_programID); |
||
250 | return true; |
||
251 | } |
||
252 | else
|
||
253 | { |
||
254 | return false; |
||
255 | } |
||
256 | } |
||
257 | |||
258 | |||
259 | ProgramGLSL::ProgramGLSL(const char *frag_source) |
||
260 | { |
||
261 | _linked = 0;
|
||
262 | _programID = glCreateProgram(); |
||
263 | ShaderObject shader(GL_FRAGMENT_SHADER, frag_source); |
||
264 | |||
265 | if(shader.IsValidFragmentShader())
|
||
266 | { |
||
267 | AttachShaderObject(shader); |
||
268 | LinkProgram(); |
||
269 | |||
270 | if(!_linked)
|
||
271 | { |
||
272 | //shader.PrintCompileLog(std::cout);
|
||
273 | PrintLinkLog(std::cout); |
||
274 | } |
||
275 | }else
|
||
276 | { |
||
277 | _linked = 0;
|
||
278 | } |
||
279 | |||
280 | } |
||
281 | |||
282 | /*
|
||
283 | ProgramGLSL::ProgramGLSL(char*frag_source, char * vert_source)
|
||
284 | {
|
||
285 | _used = 0;
|
||
286 | _linked = 0;
|
||
287 | _programID = glCreateProgram();
|
||
288 | ShaderObject shader(GL_FRAGMENT_SHADER, frag_source);
|
||
289 | ShaderObject vertex_shader(GL_VERTEX_SHADER, vert_source);
|
||
290 | AttachShaderObject(shader);
|
||
291 | AttachShaderObject(vertex_shader);
|
||
292 | LinkProgram();
|
||
293 | if(!_linked)
|
||
294 | {
|
||
295 | shader.PrintCompileLog(std::cout);
|
||
296 | vertex_shader.PrintCompileLog(std::cout);
|
||
297 | PrintLinkLog(std::cout);
|
||
298 | std::cout<<vert_source;
|
||
299 | std::cout<<frag_source;
|
||
300 | }
|
||
301 | |||
302 | }
|
||
303 | */
|
||
304 | |||
305 | |||
306 | |||
307 | void ProgramGLSL::ReLink()
|
||
308 | { |
||
309 | glLinkProgram(_programID); |
||
310 | } |
||
311 | |||
312 | int ProgramGLSL::IsNative()
|
||
313 | { |
||
314 | return _linked;
|
||
315 | } |
||
316 | |||
317 | FilterGLSL::FilterGLSL(float sigma)
|
||
318 | { |
||
319 | //pixel inside 3*sigma box
|
||
320 | int sz = int( ceil( GlobalUtil::_FilterWidthFactor * sigma -0.5) ) ;// |
||
321 | int width = 2*sz + 1; |
||
322 | |||
323 | //filter size truncation
|
||
324 | if(GlobalUtil::_MaxFilterWidth >0 && width > GlobalUtil::_MaxFilterWidth) |
||
325 | { |
||
326 | std::cout<<"Filter size truncated from "<<width<<" to "<<GlobalUtil::_MaxFilterWidth<<endl; |
||
327 | sz = GlobalUtil::_MaxFilterWidth>>1;
|
||
328 | width = 2 * sz + 1; |
||
329 | } |
||
330 | |||
331 | int i;
|
||
332 | float * kernel = new float[width]; |
||
333 | float rv = 1.0f/(sigma*sigma); |
||
334 | float v, ksum =0; |
||
335 | |||
336 | // pre-compute filter
|
||
337 | for( i = -sz ; i <= sz ; ++i)
|
||
338 | { |
||
339 | kernel[i+sz] = v = exp(-0.5f * i * i *rv) ; |
||
340 | ksum += v; |
||
341 | } |
||
342 | |||
343 | //normalize the kernel
|
||
344 | rv = 1.0f / ksum; |
||
345 | for(i = 0; i< width ;i++) kernel[i]*=rv; |
||
346 | //
|
||
347 | |||
348 | MakeFilterProgram(kernel, width); |
||
349 | |||
350 | _size = sz; |
||
351 | |||
352 | delete[] kernel;
|
||
353 | if(GlobalUtil::_verbose && GlobalUtil::_timingL) std::cout<<"Filter: sigma = "<<sigma<<", size = "<<width<<"x"<<width<<endl; |
||
354 | } |
||
355 | |||
356 | |||
357 | void FilterGLSL::MakeFilterProgram(float kernel[], int width) |
||
358 | { |
||
359 | if(GlobalUtil::_usePackedTex)
|
||
360 | { |
||
361 | s_shader_h = CreateFilterHPK(kernel, width); |
||
362 | s_shader_v = CreateFilterVPK(kernel, width); |
||
363 | }else
|
||
364 | { |
||
365 | s_shader_h = CreateFilterH(kernel, width); |
||
366 | s_shader_v = CreateFilterV(kernel, width); |
||
367 | } |
||
368 | } |
||
369 | |||
370 | ProgramGPU* FilterGLSL::CreateFilterH(float kernel[], int width) |
||
371 | { |
||
372 | |||
373 | char buffer[10240]; |
||
374 | ostrstream out(buffer, 10240);
|
||
375 | out<<setprecision(8);
|
||
376 | |||
377 | out<< "uniform sampler2DRect tex;";
|
||
378 | out<< "\nvoid main(void){ float intensity = 0.0 ; vec2 pos;\n";
|
||
379 | |||
380 | int half_width = width / 2; |
||
381 | for(int i = 0; i< width; i++) |
||
382 | { |
||
383 | if(i == half_width)
|
||
384 | { |
||
385 | |||
386 | out<<"float or = texture2DRect(tex, gl_TexCoord[0].st).r;\n";
|
||
387 | out<<"intensity+= or * "<<kernel[i]<<";\n"; |
||
388 | }else
|
||
389 | { |
||
390 | out<<"pos = gl_TexCoord[0].st + vec2(float("<< (i - half_width) <<") , 0);\n"; |
||
391 | out<<"intensity+= "<<kernel[i]<<"*texture2DRect(tex, pos).r;\n"; |
||
392 | } |
||
393 | } |
||
394 | |||
395 | //copy original data to red channel
|
||
396 | out<<"gl_FragColor.r = or;\n";
|
||
397 | out<<"gl_FragColor.b = intensity;}\n"<<'\0'; |
||
398 | |||
399 | return new ProgramGLSL( buffer); |
||
400 | } |
||
401 | |||
402 | |||
403 | ProgramGPU* FilterGLSL::CreateFilterV(float kernel[], int height) |
||
404 | { |
||
405 | |||
406 | char buffer[10240]; |
||
407 | ostrstream out(buffer, 10240);
|
||
408 | out<<setprecision(8);
|
||
409 | |||
410 | out<< "uniform sampler2DRect tex;";
|
||
411 | out<< "\nvoid main(void){ float intensity = 0.0;vec2 pos; \n";
|
||
412 | int half_height = height / 2; |
||
413 | for(int i = 0; i< height; i++) |
||
414 | { |
||
415 | |||
416 | if(i == half_height)
|
||
417 | { |
||
418 | out<<"vec2 orb = texture2DRect(tex, gl_TexCoord[0].st).rb;\n";
|
||
419 | out<<"intensity+= orb.y * "<<kernel[i]<<";\n"; |
||
420 | |||
421 | }else
|
||
422 | { |
||
423 | out<<"pos = gl_TexCoord[0].st + vec2(0, float("<<(i - half_height) <<") );\n"; |
||
424 | out<<"intensity+= texture2DRect(tex, pos).b * "<<kernel[i]<<";\n"; |
||
425 | } |
||
426 | |||
427 | } |
||
428 | |||
429 | out<<"gl_FragColor.b = orb.y;\n";
|
||
430 | out<<"gl_FragColor.g = intensity - orb.x;\n"; // difference of gaussian.. |
||
431 | out<<"gl_FragColor.r = intensity;}\n"<<'\0'; |
||
432 | |||
433 | // std::cout<<buffer<<endl;
|
||
434 | return new ProgramGLSL( buffer); |
||
435 | } |
||
436 | |||
437 | |||
438 | |||
439 | ProgramGPU* FilterGLSL::CreateFilterHPK(float kernel[], int width) |
||
440 | { |
||
441 | //both h and v are packed...
|
||
442 | int i, j , xw, xwn;
|
||
443 | |||
444 | int halfwidth = width >>1; |
||
445 | float * pf = kernel + halfwidth;
|
||
446 | int nhpixel = (halfwidth+1)>>1; //how many neighbour pixels need to be looked up |
||
447 | int npixel = (nhpixel<<1)+1;// |
||
448 | char buffer[10240]; |
||
449 | float weight[3]; |
||
450 | ostrstream out(buffer, 10240);
|
||
451 | out<<setprecision(8);
|
||
452 | |||
453 | out<< "uniform sampler2DRect tex;";
|
||
454 | out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
|
||
455 | ///use multi texture coordinate because nhpixels can be at most 3
|
||
456 | out<<"vec4 pc; vec2 coord; \n";
|
||
457 | for( i = 0 ; i < npixel ; i++) |
||
458 | { |
||
459 | out<<"coord = gl_TexCoord[0].xy + vec2(float("<<i-nhpixel<<"),0);\n"; |
||
460 | out<<"pc=texture2DRect(tex, coord);\n";
|
||
461 | if(GlobalUtil::_PreciseBorder) out<<"if(coord.x < 0) pc = pc.rrbb;\n"; |
||
462 | //for each sub-pixel j in center, the weight of sub-pixel k
|
||
463 | xw = (i - nhpixel)*2;
|
||
464 | for( j = 0; j < 3; j++) |
||
465 | { |
||
466 | xwn = xw + j -1;
|
||
467 | weight[j] = xwn < -halfwidth || xwn > halfwidth? 0 : pf[xwn];
|
||
468 | } |
||
469 | if(weight[1] == 0.0) |
||
470 | { |
||
471 | out<<"result += vec4("<<weight[2]<<","<<weight[0]<<","<<weight[2]<<","<<weight[0]<<")*pc.grab;\n"; |
||
472 | } |
||
473 | else
|
||
474 | { |
||
475 | out<<"result += vec4("<<weight[1]<<", "<<weight[0]<<", "<<weight[1]<<", "<<weight[0]<<")*pc.rrbb;\n"; |
||
476 | out<<"result += vec4("<<weight[2]<<", "<<weight[1]<<", "<<weight[2]<<", "<<weight[1]<<")*pc.ggaa;\n"; |
||
477 | } |
||
478 | |||
479 | } |
||
480 | out<<"gl_FragColor = result;}\n"<<'\0'; |
||
481 | |||
482 | return new ProgramGLSL( buffer); |
||
483 | |||
484 | |||
485 | } |
||
486 | |||
487 | |||
488 | ProgramGPU* FilterGLSL::CreateFilterVPK(float kernel[], int height) |
||
489 | { |
||
490 | |||
491 | //both h and v are packed...
|
||
492 | int i, j, yw, ywn;
|
||
493 | |||
494 | int halfh = height >>1; |
||
495 | float * pf = kernel + halfh;
|
||
496 | int nhpixel = (halfh+1)>>1; //how many neighbour pixels need to be looked up |
||
497 | int npixel = (nhpixel<<1)+1;// |
||
498 | char buffer[10240]; |
||
499 | float weight[3]; |
||
500 | ostrstream out(buffer, 10240);
|
||
501 | out<<setprecision(8);
|
||
502 | |||
503 | out<< "uniform sampler2DRect tex;";
|
||
504 | out<< "\nvoid main(void){ vec4 result = vec4(0, 0, 0, 0);\n";
|
||
505 | ///use multi texture coordinate because nhpixels can be at most 3
|
||
506 | out<<"vec4 pc; vec2 coord;\n";
|
||
507 | for( i = 0 ; i < npixel ; i++) |
||
508 | { |
||
509 | out<<"coord = gl_TexCoord[0].xy + vec2(0, float("<<i-nhpixel<<"));\n"; |
||
510 | out<<"pc=texture2DRect(tex, coord);\n";
|
||
511 | if(GlobalUtil::_PreciseBorder) out<<"if(coord.y < 0) pc = pc.rgrg;\n"; |
||
512 | |||
513 | //for each sub-pixel j in center, the weight of sub-pixel k
|
||
514 | yw = (i - nhpixel)*2;
|
||
515 | for( j = 0; j < 3; j++) |
||
516 | { |
||
517 | ywn = yw + j -1;
|
||
518 | weight[j] = ywn < -halfh || ywn > halfh? 0 : pf[ywn];
|
||
519 | } |
||
520 | if(weight[1] == 0.0) |
||
521 | { |
||
522 | out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[0]<<","<<weight[0]<<")*pc.barg;\n"; |
||
523 | }else
|
||
524 | { |
||
525 | out<<"result += vec4("<<weight[1]<<","<<weight[1]<<","<<weight[0]<<","<<weight[0]<<")*pc.rgrg;\n"; |
||
526 | out<<"result += vec4("<<weight[2]<<","<<weight[2]<<","<<weight[1]<<","<<weight[1]<<")*pc.baba;\n"; |
||
527 | } |
||
528 | } |
||
529 | out<<"gl_FragColor = result;}\n"<<'\0'; |
||
530 | |||
531 | return new ProgramGLSL( buffer); |
||
532 | } |
||
533 | |||
534 | |||
535 | |||
536 | ShaderBag::ShaderBag() |
||
537 | { |
||
538 | s_debug = 0;
|
||
539 | s_orientation = 0;
|
||
540 | s_display_gaussian = 0;
|
||
541 | s_display_dog = 0;
|
||
542 | s_display_grad = 0;
|
||
543 | s_display_keys = 0;
|
||
544 | s_sampling = 0;
|
||
545 | s_grad_pass = 0;
|
||
546 | s_dog_pass = 0;
|
||
547 | s_keypoint = 0;
|
||
548 | s_genlist_init_tight = 0;
|
||
549 | s_genlist_init_ex = 0;
|
||
550 | s_genlist_histo = 0;
|
||
551 | s_genlist_start = 0;
|
||
552 | s_genlist_step = 0;
|
||
553 | s_genlist_end = 0;
|
||
554 | s_vertex_list = 0;
|
||
555 | s_descriptor_fp = 0;
|
||
556 | s_margin_copy = 0;
|
||
557 | ////////////
|
||
558 | f_gaussian_skip0 = NULL;
|
||
559 | f_gaussian_skip1 = NULL;
|
||
560 | f_gaussian_step = NULL;
|
||
561 | _gaussian_step_num = 0;
|
||
562 | |||
563 | } |
||
564 | |||
565 | ShaderBag::~ShaderBag() |
||
566 | { |
||
567 | if(s_debug)delete s_debug; |
||
568 | if(s_orientation)delete s_orientation; |
||
569 | if(s_display_gaussian)delete s_display_gaussian; |
||
570 | if(s_display_dog)delete s_display_dog; |
||
571 | if(s_display_grad)delete s_display_grad; |
||
572 | if(s_display_keys)delete s_display_keys; |
||
573 | if(s_sampling)delete s_sampling; |
||
574 | if(s_grad_pass)delete s_grad_pass; |
||
575 | if(s_dog_pass) delete s_dog_pass; |
||
576 | if(s_keypoint)delete s_keypoint; |
||
577 | if(s_genlist_init_tight)delete s_genlist_init_tight; |
||
578 | if(s_genlist_init_ex)delete s_genlist_init_ex; |
||
579 | if(s_genlist_histo)delete s_genlist_histo; |
||
580 | if(s_genlist_start)delete s_genlist_start; |
||
581 | if(s_genlist_step)delete s_genlist_step; |
||
582 | if(s_genlist_end)delete s_genlist_end; |
||
583 | if(s_vertex_list)delete s_vertex_list; |
||
584 | if(s_descriptor_fp)delete s_descriptor_fp; |
||
585 | if(s_margin_copy) delete s_margin_copy; |
||
586 | |||
587 | //////////////////////////////////////////////
|
||
588 | if(f_gaussian_skip1) delete f_gaussian_skip1; |
||
589 | |||
590 | for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++) |
||
591 | { |
||
592 | if(f_gaussian_skip0_v[i]) delete f_gaussian_skip0_v[i]; |
||
593 | } |
||
594 | if(f_gaussian_step && _gaussian_step_num > 0) |
||
595 | { |
||
596 | for(int i = 0; i< _gaussian_step_num; i++) |
||
597 | { |
||
598 | delete f_gaussian_step[i];
|
||
599 | } |
||
600 | delete[] f_gaussian_step;
|
||
601 | } |
||
602 | } |
||
603 | |||
604 | |||
605 | void ShaderBag::SelectInitialSmoothingFilter(int octave_min, SiftParam¶m) |
||
606 | { |
||
607 | float sigma = param.GetInitialSmoothSigma(octave_min);
|
||
608 | if(sigma == 0) |
||
609 | { |
||
610 | f_gaussian_skip0 = NULL;
|
||
611 | }else
|
||
612 | { |
||
613 | for(unsigned int i = 0; i < f_gaussian_skip0_v.size(); i++) |
||
614 | { |
||
615 | if(f_gaussian_skip0_v[i]->_id == octave_min)
|
||
616 | { |
||
617 | f_gaussian_skip0 = f_gaussian_skip0_v[i]; |
||
618 | return ;
|
||
619 | } |
||
620 | } |
||
621 | FilterGLSL * filter = new FilterGLSL(sigma);
|
||
622 | filter->_id = octave_min; |
||
623 | f_gaussian_skip0_v.push_back(filter); |
||
624 | f_gaussian_skip0 = filter; |
||
625 | } |
||
626 | } |
||
627 | |||
628 | void ShaderBag::CreateGaussianFilters(SiftParam¶m)
|
||
629 | { |
||
630 | if(param._sigma_skip0>0.0f) |
||
631 | { |
||
632 | FilterGLSL * filter; |
||
633 | f_gaussian_skip0 = filter = new FilterGLSL(param._sigma_skip0);
|
||
634 | filter->_id = GlobalUtil::_octave_min_default; |
||
635 | f_gaussian_skip0_v.push_back(filter); |
||
636 | } |
||
637 | if(param._sigma_skip1>0.0f) |
||
638 | { |
||
639 | f_gaussian_skip1 = new FilterGLSL(param._sigma_skip1);
|
||
640 | } |
||
641 | |||
642 | f_gaussian_step = new FilterProgram*[param._sigma_num];
|
||
643 | for(int i = 0; i< param._sigma_num; i++) |
||
644 | { |
||
645 | f_gaussian_step[i] = new FilterGLSL(param._sigma[i]);
|
||
646 | } |
||
647 | _gaussian_step_num = param._sigma_num; |
||
648 | } |
||
649 | |||
650 | |||
651 | void ShaderBag::LoadDynamicShaders(SiftParam& param)
|
||
652 | { |
||
653 | LoadKeypointShader(param._dog_threshold, param._edge_threshold); |
||
654 | LoadGenListShader(param._dog_level_num, 0);
|
||
655 | CreateGaussianFilters(param); |
||
656 | } |
||
657 | |||
658 | |||
659 | void ShaderBagGLSL::LoadFixedShaders()
|
||
660 | { |
||
661 | |||
662 | |||
663 | s_gray = new ProgramGLSL(
|
||
664 | "uniform sampler2DRect rgbTex; void main(void){\n"
|
||
665 | "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(rgbTex,gl_TexCoord[0].st ).rgb);\n"
|
||
666 | "gl_FragColor = vec4(intensity, intensity, intensity, 1.0);}");
|
||
667 | |||
668 | |||
669 | s_debug = new ProgramGLSL( "void main(void){gl_FragColor.rg = gl_TexCoord[0].st;}"); |
||
670 | |||
671 | |||
672 | s_sampling = new ProgramGLSL(
|
||
673 | "uniform sampler2DRect tex; void main(void){gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg;}");
|
||
674 | |||
675 | //
|
||
676 | s_grad_pass = new ProgramGLSL(
|
||
677 | "uniform sampler2DRect tex; void main ()\n"
|
||
678 | "{\n"
|
||
679 | " vec4 v1, v2, gg;\n"
|
||
680 | " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
|
||
681 | " gg.x = texture2DRect(tex, gl_TexCoord[1].xy).r;\n"
|
||
682 | " gg.y = texture2DRect(tex, gl_TexCoord[2].xy).r;\n"
|
||
683 | " gg.z = texture2DRect(tex, gl_TexCoord[3].xy).r;\n"
|
||
684 | " gg.w = texture2DRect(tex, gl_TexCoord[4].xy).r;\n"
|
||
685 | " vec2 dxdy = (gg.yw - gg.xz); \n"
|
||
686 | " float grad = 0.5*length(dxdy);\n"
|
||
687 | " float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"
|
||
688 | " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
|
||
689 | "}\n\0");
|
||
690 | |||
691 | ProgramGLSL * program; |
||
692 | s_margin_copy = program = new ProgramGLSL(
|
||
693 | "uniform sampler2DRect tex; uniform vec2 truncate;\n"
|
||
694 | "void main(){ gl_FragColor = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate)); }");
|
||
695 | |||
696 | _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
|
||
697 | |||
698 | |||
699 | GlobalUtil::_OrientationPack2 = 0;
|
||
700 | LoadOrientationShader(); |
||
701 | |||
702 | if(s_orientation == NULL) |
||
703 | { |
||
704 | //Load a simplified version if the right version is not supported
|
||
705 | s_orientation = program = new ProgramGLSL(
|
||
706 | "uniform sampler2DRect fTex; uniform sampler2DRect oTex;\n"
|
||
707 | " uniform float size; void main(){\n"
|
||
708 | " vec4 cc = texture2DRect(fTex, gl_TexCoord[0].st);\n"
|
||
709 | " vec4 oo = texture2DRect(oTex, cc.rg);\n"
|
||
710 | " gl_FragColor.rg = cc.rg;\n"
|
||
711 | " gl_FragColor.b = oo.a;\n"
|
||
712 | " gl_FragColor.a = size;}");
|
||
713 | |||
714 | _param_orientation_gtex = glGetUniformLocation(*program, "oTex");
|
||
715 | _param_orientation_size = glGetUniformLocation(*program, "size");
|
||
716 | |||
717 | GlobalUtil::_MaxOrientation = 0;
|
||
718 | GlobalUtil::_FullSupported = 0;
|
||
719 | std::cerr<<"Orientation simplified on this hardware"<<endl;
|
||
720 | } |
||
721 | |||
722 | |||
723 | if(GlobalUtil::_DescriptorPPT) LoadDescriptorShader();
|
||
724 | if(s_descriptor_fp == NULL) |
||
725 | { |
||
726 | GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
|
||
727 | std::cerr<<"Descriptor ignored on this hardware"<<endl;
|
||
728 | } |
||
729 | |||
730 | s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}"); |
||
731 | } |
||
732 | |||
733 | |||
734 | void ShaderBagGLSL::LoadDisplayShaders()
|
||
735 | { |
||
736 | s_copy_key = new ProgramGLSL(
|
||
737 | "uniform sampler2DRect tex; void main(){\n"
|
||
738 | "gl_FragColor.rg= texture2DRect(tex, gl_TexCoord[0].st).rg; gl_FragColor.ba = vec2(0.0,1.0); }");
|
||
739 | |||
740 | |||
741 | ProgramGLSL * program; |
||
742 | s_vertex_list = program = new ProgramGLSL(
|
||
743 | "uniform vec4 sizes; uniform sampler2DRect tex;\n"
|
||
744 | "void main(void){\n"
|
||
745 | "float fwidth = sizes.y; float twidth = sizes.z; float rwidth = sizes.w; \n"
|
||
746 | "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
|
||
747 | "float px = mod(index, twidth);\n"
|
||
748 | "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
|
||
749 | "vec4 cc = texture2DRect(tex, tpos );\n"
|
||
750 | "float size = 3.0f * cc.a; //sizes.x;// \n"
|
||
751 | "gl_FragColor.zw = vec2(0.0, 1.0);\n"
|
||
752 | "if(cc.x<=0 || cc.y <=0) {gl_FragColor.xy = cc.xy; }\n"
|
||
753 | "else {float type = fract(px);\n"
|
||
754 | "vec2 dxy; \n"
|
||
755 | "dxy.x = type < 0.1 ? 0.0 : ((type <0.5 || type > 0.9)? size : -size);\n"
|
||
756 | "dxy.y = type < 0.2 ? 0.0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
|
||
757 | "float s = sin(cc.b); float c = cos(cc.b); \n"
|
||
758 | "gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
|
||
759 | "gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n}\n");
|
||
760 | |||
761 | |||
762 | _param_genvbo_size = glGetUniformLocation(*program, "sizes");
|
||
763 | s_display_gaussian = new ProgramGLSL(
|
||
764 | "uniform sampler2DRect tex; void main(void){float r = texture2DRect(tex, gl_TexCoord[0].st).r;\n"
|
||
765 | "gl_FragColor = vec4(r, r, r, 1);}" );
|
||
766 | |||
767 | s_display_dog = new ProgramGLSL(
|
||
768 | "uniform sampler2DRect tex; void main(void){float g = 0.5+(20.0*texture2DRect(tex, gl_TexCoord[0].st).g);\n"
|
||
769 | "gl_FragColor = vec4(g, g, g, 0.0);}" );
|
||
770 | |||
771 | s_display_grad = new ProgramGLSL(
|
||
772 | "uniform sampler2DRect tex; void main(void){\n"
|
||
773 | " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);gl_FragColor = vec4(5.0* cc.bbb, 1.0);}");
|
||
774 | |||
775 | s_display_keys= new ProgramGLSL(
|
||
776 | "uniform sampler2DRect tex; void main(void){\n"
|
||
777 | " vec4 cc = texture2DRect(tex, gl_TexCoord[0].st);\n"
|
||
778 | " if(cc.r ==0.0) discard; gl_FragColor = (cc.r==1.0? vec4(1.0, 0.0, 0,1.0):vec4(0.0,1.0,0.0,1.0));}");
|
||
779 | } |
||
780 | |||
781 | void ShaderBagGLSL::LoadKeypointShader(float threshold, float edge_threshold) |
||
782 | { |
||
783 | |||
784 | char buffer[10240]; |
||
785 | float threshold0 = threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); |
||
786 | float threshold1 = threshold;
|
||
787 | float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; |
||
788 | ostrstream out(buffer, 10240);
|
||
789 | streampos pos; |
||
790 | |||
791 | //tex(X)(Y)
|
||
792 | //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
|
||
793 | //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
|
||
794 | |||
795 | out << "#define THRESHOLD0 " << threshold0 << "\n" |
||
796 | "#define THRESHOLD1 " << threshold1 << "\n" |
||
797 | "#define THRESHOLD2 " << threshold2 << "\n"; |
||
798 | |||
799 | out<< |
||
800 | "uniform sampler2DRect tex, texU, texD; void main ()\n"
|
||
801 | "{\n"
|
||
802 | " vec4 v1, v2, gg, temp;\n"
|
||
803 | " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
|
||
804 | " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
|
||
805 | " temp = texture2DRect(tex, gl_TexCoord[1].xy);\n"
|
||
806 | " v1.x = temp.g; gg.x = temp.r;\n"
|
||
807 | " temp = texture2DRect(tex, gl_TexCoord[2].xy) ;\n"
|
||
808 | " v1.y = temp.g; gg.y = temp.r;\n"
|
||
809 | " temp = texture2DRect(tex, gl_TexCoord[3].xy) ;\n"
|
||
810 | " v1.z = temp.g; gg.z = temp.r;\n"
|
||
811 | " temp = texture2DRect(tex, gl_TexCoord[4].xy) ;\n"
|
||
812 | " v1.w = temp.g; gg.w = temp.r;\n"
|
||
813 | " v2.x = texture2DRect(tex, gl_TexCoord[5].xy).g;\n"
|
||
814 | " v2.y = texture2DRect(tex, gl_TexCoord[6].xy).g;\n"
|
||
815 | " v2.z = texture2DRect(tex, gl_TexCoord[7].xy).g;\n"
|
||
816 | " v2.w = texture2DRect(tex, TexRU.xy).g;\n"
|
||
817 | " vec2 dxdy = (gg.yw - gg.xz); \n"
|
||
818 | " float grad = 0.5*length(dxdy);\n"
|
||
819 | " float theta = grad==0? 0.0: atan(dxdy.y, dxdy.x);\n"
|
||
820 | " gl_FragData[0] = vec4(cc.rg, grad, theta);\n"
|
||
821 | |||
822 | //test against 8 neighbours
|
||
823 | //use variable to identify type of extremum
|
||
824 | //1.0 for local maximum and 0.5 for minimum
|
||
825 | << |
||
826 | " float dog = 0.0; \n"
|
||
827 | " gl_FragData[1] = vec4(0, 0, 0, 0); \n"
|
||
828 | " dog = cc.g > THRESHOLD0 && all(greaterThan(cc.gggg, max(v1, v2)))?1.0: 0.0;\n"
|
||
829 | " dog = cc.g < -THRESHOLD0 && all(lessThan(cc.gggg, min(v1, v2)))?0.5: dog;\n"
|
||
830 | " if(dog == 0.0) return;\n";
|
||
831 | |||
832 | pos = out.tellp(); |
||
833 | //do edge supression first..
|
||
834 | //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
|
||
835 | //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
|
||
836 | |||
837 | out<< |
||
838 | " float fxx, fyy, fxy; \n"
|
||
839 | " vec4 D2 = v1.xyzw - cc.gggg;\n"
|
||
840 | " vec2 D4 = v2.xw - v2.yz;\n"
|
||
841 | " fxx = D2.x + D2.y;\n"
|
||
842 | " fyy = D2.z + D2.w;\n"
|
||
843 | " fxy = 0.25*(D4.x + D4.y);\n"
|
||
844 | " float fxx_plus_fyy = fxx + fyy;\n"
|
||
845 | " float score_up = fxx_plus_fyy*fxx_plus_fyy; \n"
|
||
846 | " float score_down = (fxx*fyy - fxy*fxy);\n"
|
||
847 | " if( score_down <= 0 || score_up > THRESHOLD2 * score_down)return;\n";
|
||
848 | |||
849 | //...
|
||
850 | out<<" \n"
|
||
851 | " vec2 D5 = 0.5*(v1.yw-v1.xz); \n"
|
||
852 | " float fx = D5.x, fy = D5.y ; \n"
|
||
853 | " float fs, fss , fxs, fys ; \n"
|
||
854 | " vec2 v3; vec4 v4, v5, v6;\n"
|
||
855 | //read 9 pixels of upper level
|
||
856 | << |
||
857 | " v3.x = texture2DRect(texU, gl_TexCoord[0].xy).g;\n"
|
||
858 | " v4.x = texture2DRect(texU, gl_TexCoord[1].xy).g;\n"
|
||
859 | " v4.y = texture2DRect(texU, gl_TexCoord[2].xy).g;\n"
|
||
860 | " v4.z = texture2DRect(texU, gl_TexCoord[3].xy).g;\n"
|
||
861 | " v4.w = texture2DRect(texU, gl_TexCoord[4].xy).g;\n"
|
||
862 | " v6.x = texture2DRect(texU, gl_TexCoord[5].xy).g;\n"
|
||
863 | " v6.y = texture2DRect(texU, gl_TexCoord[6].xy).g;\n"
|
||
864 | " v6.z = texture2DRect(texU, gl_TexCoord[7].xy).g;\n"
|
||
865 | " v6.w = texture2DRect(texU, TexRU.xy).g;\n"
|
||
866 | //compare with 9 pixels of upper level
|
||
867 | //read and compare with 9 pixels of lower level
|
||
868 | //the maximum case
|
||
869 | << |
||
870 | " if(dog == 1.0)\n"
|
||
871 | " {\n"
|
||
872 | " if(cc.g < v3.x || any(lessThan(cc.gggg, v4)) ||any(lessThan(cc.gggg, v6)))return; \n"
|
||
873 | " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
|
||
874 | " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
|
||
875 | " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
|
||
876 | " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
|
||
877 | " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
|
||
878 | " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
|
||
879 | " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
|
||
880 | " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
|
||
881 | " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
|
||
882 | " if(cc.g < v3.y || any(lessThan(cc.gggg, v5)) ||any(lessThan(cc.gggg, v6)))return; \n"
|
||
883 | " }\n"
|
||
884 | //the minimum case
|
||
885 | << |
||
886 | " else{\n"
|
||
887 | " if(cc.g > v3.x || any(greaterThan(cc.gggg, v4)) ||any(greaterThan(cc.gggg, v6)))return; \n"
|
||
888 | " v3.y = texture2DRect(texD, gl_TexCoord[0].xy).g;\n"
|
||
889 | " v5.x = texture2DRect(texD, gl_TexCoord[1].xy).g;\n"
|
||
890 | " v5.y = texture2DRect(texD, gl_TexCoord[2].xy).g;\n"
|
||
891 | " v5.z = texture2DRect(texD, gl_TexCoord[3].xy).g;\n"
|
||
892 | " v5.w = texture2DRect(texD, gl_TexCoord[4].xy).g;\n"
|
||
893 | " v6.x = texture2DRect(texD, gl_TexCoord[5].xy).g;\n"
|
||
894 | " v6.y = texture2DRect(texD, gl_TexCoord[6].xy).g;\n"
|
||
895 | " v6.z = texture2DRect(texD, gl_TexCoord[7].xy).g;\n"
|
||
896 | " v6.w = texture2DRect(texD, TexRU.xy).g;\n"
|
||
897 | " if(cc.g > v3.y || any(greaterThan(cc.gggg, v5)) ||any(greaterThan(cc.gggg, v6)))return; \n"
|
||
898 | " }\n";
|
||
899 | |||
900 | if(GlobalUtil::_SubpixelLocalization)
|
||
901 | |||
902 | // sub-pixel localization FragData1 = vec4(dog, 0, 0, 0); return;
|
||
903 | out << |
||
904 | " fs = 0.5*( v3.x - v3.y ); \n"
|
||
905 | " fss = v3.x + v3.y - cc.g - cc.g;\n"
|
||
906 | " fxs = 0.25 * ( v4.y + v5.x - v4.x - v5.y);\n"
|
||
907 | " fys = 0.25 * ( v4.w + v5.z - v4.z - v5.w);\n"
|
||
908 | |||
909 | //
|
||
910 | // let dog difference be quatratic function of dx, dy, ds;
|
||
911 | // df(dx, dy, ds) = fx * dx + fy*dy + fs * ds +
|
||
912 | // + 0.5 * ( fxx * dx * dx + fyy * dy * dy + fss * ds * ds)
|
||
913 | // + (fxy * dx * dy + fxs * dx * ds + fys * dy * ds)
|
||
914 | // (fx, fy, fs, fxx, fyy, fss, fxy, fxs, fys are the derivatives)
|
||
915 | |||
916 | //the local extremum satisfies
|
||
917 | // df/dx = 0, df/dy = 0, df/dz = 0
|
||
918 | |||
919 | //that is
|
||
920 | // |-fx| | fxx fxy fxs | |dx|
|
||
921 | // |-fy| = | fxy fyy fys | * |dy|
|
||
922 | // |-fs| | fxs fys fss | |ds|
|
||
923 | // need to solve dx, dy, ds
|
||
924 | |||
925 | // Use Gauss elimination to solve the linear system
|
||
926 | << |
||
927 | " vec3 dxys = vec3(0.0); \n"
|
||
928 | " vec4 A0, A1, A2 ; \n"
|
||
929 | " A0 = vec4(fxx, fxy, fxs, -fx); \n"
|
||
930 | " A1 = vec4(fxy, fyy, fys, -fy); \n"
|
||
931 | " A2 = vec4(fxs, fys, fss, -fs); \n"
|
||
932 | " vec3 x3 = abs(vec3(fxx, fxy, fxs)); \n"
|
||
933 | " float maxa = max(max(x3.x, x3.y), x3.z); \n"
|
||
934 | " if(maxa >= 1e-10 ) { \n"
|
||
935 | " if(x3.y ==maxa ) \n"
|
||
936 | " { \n"
|
||
937 | " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
|
||
938 | " }else if( x3.z == maxa ) \n"
|
||
939 | " { \n"
|
||
940 | " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
|
||
941 | " } \n"
|
||
942 | " A0 /= A0.x; \n"
|
||
943 | " A1 -= A1.x * A0; \n"
|
||
944 | " A2 -= A2.x * A0; \n"
|
||
945 | " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
|
||
946 | " if( x2.y > x2.x ) \n"
|
||
947 | " { \n"
|
||
948 | " vec3 TEMP = A2.yzw; \n"
|
||
949 | " A2.yzw = A1.yzw; \n"
|
||
950 | " A1.yzw = TEMP; \n"
|
||
951 | " x2.x = x2.y; \n"
|
||
952 | " } \n"
|
||
953 | " if(x2.x >= 1e-10) { \n"
|
||
954 | " A1.yzw /= A1.y; \n"
|
||
955 | " A2.yzw -= A2.y * A1.yzw; \n"
|
||
956 | " if(abs(A2.z) >= 1e-10) { \n"
|
||
957 | // compute dx, dy, ds:
|
||
958 | << |
||
959 | " \n"
|
||
960 | " dxys.z = A2.w /A2.z; \n"
|
||
961 | " dxys.y = A1.w - dxys.z*A1.z; \n"
|
||
962 | " dxys.x = A0.w - dxys.z*A0.z - dxys.y*A0.y; \n"
|
||
963 | |||
964 | //one more threshold which I forgot in versions prior to 286
|
||
965 | << |
||
966 | " bool dog_test = (abs(cc.g + 0.5*dot(vec3(fx, fy, fs), dxys ))<= THRESHOLD1) ;\n"
|
||
967 | " if(dog_test || any(greaterThan(abs(dxys), vec3(1.0)))) dog = 0;\n"
|
||
968 | " }\n"
|
||
969 | " }\n"
|
||
970 | " }\n"
|
||
971 | //keep the point when the offset is less than 1
|
||
972 | << |
||
973 | " gl_FragData[1] = vec4( dog, dxys); \n";
|
||
974 | else
|
||
975 | |||
976 | out<< |
||
977 | " gl_FragData[1] = vec4( dog, 0, 0, 0) ; \n";
|
||
978 | |||
979 | out<< |
||
980 | "}\n" <<'\0'; |
||
981 | |||
982 | |||
983 | |||
984 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
985 | if(program->IsNative())
|
||
986 | { |
||
987 | s_keypoint = program ; |
||
988 | //parameter
|
||
989 | }else
|
||
990 | { |
||
991 | delete program;
|
||
992 | out.seekp(pos); |
||
993 | out << |
||
994 | " gl_FragData[1] = vec4(dog, 0, 0, 0) ; \n"
|
||
995 | "}\n" <<'\0'; |
||
996 | s_keypoint = program = new ProgramGLSL(buffer);
|
||
997 | GlobalUtil::_SubpixelLocalization = 0;
|
||
998 | std::cerr<<"Detection simplified on this hardware"<<endl;
|
||
999 | } |
||
1000 | |||
1001 | _param_dog_texu = glGetUniformLocation(*program, "texU");
|
||
1002 | _param_dog_texd = glGetUniformLocation(*program, "texD");
|
||
1003 | } |
||
1004 | |||
1005 | |||
1006 | void ShaderBagGLSL::SetDogTexParam(int texU, int texD) |
||
1007 | { |
||
1008 | glUniform1i(_param_dog_texu, 1);
|
||
1009 | glUniform1i(_param_dog_texd, 2);
|
||
1010 | } |
||
1011 | |||
1012 | void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0) |
||
1013 | { |
||
1014 | glUniform1i(_param_genlist_step_tex0, 1);
|
||
1015 | } |
||
1016 | void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth, float size) |
||
1017 | { |
||
1018 | float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; |
||
1019 | glUniform4fv(_param_genvbo_size, 1, sizes);
|
||
1020 | |||
1021 | } |
||
1022 | |||
1023 | |||
1024 | |||
1025 | void ShaderBagGLSL::UnloadProgram()
|
||
1026 | { |
||
1027 | glUseProgram(0);
|
||
1028 | } |
||
1029 | |||
1030 | |||
1031 | |||
1032 | void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev) |
||
1033 | { |
||
1034 | ProgramGLSL * program; |
||
1035 | |||
1036 | s_genlist_init_tight = new ProgramGLSL(
|
||
1037 | "uniform sampler2DRect tex; void main (void){\n"
|
||
1038 | "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
|
||
1039 | "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
|
||
1040 | "gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
|
||
1041 | "}");
|
||
1042 | |||
1043 | |||
1044 | s_genlist_init_ex = program = new ProgramGLSL(
|
||
1045 | "uniform sampler2DRect tex;uniform vec2 bbox;\n"
|
||
1046 | "void main (void ){\n"
|
||
1047 | "vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r, texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
|
||
1048 | "texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
|
||
1049 | "bvec4 helper2 = bvec4( \n"
|
||
1050 | "all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0,\n"
|
||
1051 | "all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0,\n"
|
||
1052 | "all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0,\n"
|
||
1053 | "all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0);\n"
|
||
1054 | "gl_FragColor = vec4(helper2);\n"
|
||
1055 | "}");
|
||
1056 | _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
|
||
1057 | |||
1058 | |||
1059 | //reduction ...
|
||
1060 | s_genlist_histo = new ProgramGLSL(
|
||
1061 | "uniform sampler2DRect tex; void main (void){\n"
|
||
1062 | "vec4 helper; vec4 helper2; \n"
|
||
1063 | "helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
|
||
1064 | "helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
|
||
1065 | "gl_FragColor.rg = helper2.xz + helper2.yw;\n"
|
||
1066 | "helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
|
||
1067 | "helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
|
||
1068 | "gl_FragColor.ba= helper2.xz+helper2.yw;\n"
|
||
1069 | "}");
|
||
1070 | |||
1071 | |||
1072 | //read of the first part, which generates tex coordinates
|
||
1073 | s_genlist_start= program = LoadGenListStepShader(1, 1); |
||
1074 | _param_ftex_width= glGetUniformLocation(*program, "width");
|
||
1075 | _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
|
||
1076 | //stepping
|
||
1077 | s_genlist_step = program = LoadGenListStepShader(0, 1); |
||
1078 | _param_genlist_step_tex= glGetUniformLocation(*program, "tex");
|
||
1079 | _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
|
||
1080 | |||
1081 | } |
||
1082 | |||
1083 | void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax) |
||
1084 | { |
||
1085 | float truncate[2] = {xmax - 0.5f , ymax - 0.5f}; |
||
1086 | glUniform2fv(_param_margin_copy_truncate, 1, truncate);
|
||
1087 | } |
||
1088 | |||
1089 | void ShaderBagGLSL::SetGenListInitParam(int w, int h) |
||
1090 | { |
||
1091 | float bbox[2] = {w - 1.0f, h - 1.0f}; |
||
1092 | glUniform2fv(_param_genlist_init_bbox, 1, bbox);
|
||
1093 | } |
||
1094 | void ShaderBagGLSL::SetGenListStartParam(float width, int tex0) |
||
1095 | { |
||
1096 | glUniform1f(_param_ftex_width, width); |
||
1097 | } |
||
1098 | |||
1099 | |||
1100 | ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step) |
||
1101 | { |
||
1102 | int i;
|
||
1103 | char buffer[10240]; |
||
1104 | // char chanels[5] = "rgba";
|
||
1105 | ostrstream out(buffer, 10240);
|
||
1106 | |||
1107 | for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n"; |
||
1108 | if(start)
|
||
1109 | { |
||
1110 | out<<"uniform float width;\n";
|
||
1111 | out<<"void main(void){\n";
|
||
1112 | out<<"float index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
|
||
1113 | out<<"vec2 pos = vec2(0.5, 0.5);\n";
|
||
1114 | }else
|
||
1115 | { |
||
1116 | out<<"uniform sampler2DRect tex;\n";
|
||
1117 | out<<"void main(void){\n";
|
||
1118 | out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
|
||
1119 | out<<"vec2 pos = tc.rg; float index = tc.b;\n";
|
||
1120 | } |
||
1121 | out<<"vec2 sum; vec4 cc;\n";
|
||
1122 | |||
1123 | |||
1124 | if(step>0) |
||
1125 | { |
||
1126 | out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
|
||
1127 | for(i = 0; i < step; i++) |
||
1128 | { |
||
1129 | |||
1130 | out<<"cc = texture2DRect(tex"<<i<<", pos);\n"; |
||
1131 | out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b; \n";
|
||
1132 | out<<"if (index <cc.r){ opos = cpos.xx;}\n";
|
||
1133 | out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
|
||
1134 | out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
|
||
1135 | out<<"else {opos = cpos.yy; index -= sum.y;}\n";
|
||
1136 | out<<"pos = (pos + pos + opos);\n";
|
||
1137 | } |
||
1138 | } |
||
1139 | out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
|
||
1140 | out<<"}\n"<<'\0'; |
||
1141 | return new ProgramGLSL(buffer); |
||
1142 | } |
||
1143 | |||
1144 | |||
1145 | void ShaderBagGLSL::LoadOrientationShader()
|
||
1146 | { |
||
1147 | char buffer[10240]; |
||
1148 | ostrstream out(buffer,10240);
|
||
1149 | |||
1150 | if(GlobalUtil::_IsNvidia)
|
||
1151 | { |
||
1152 | out << "#pragma optionNV(ifcvt none)\n"
|
||
1153 | "#pragma optionNV(unroll all)\n";
|
||
1154 | } |
||
1155 | |||
1156 | out<<"\n"
|
||
1157 | "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" |
||
1158 | "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" |
||
1159 | "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" |
||
1160 | "uniform sampler2DRect tex; \n"
|
||
1161 | "uniform sampler2DRect gradTex; \n"
|
||
1162 | "uniform vec4 size; \n"
|
||
1163 | << (GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign? |
||
1164 | " uniform sampler2DRect texS; \n" : " ") |
||
1165 | << |
||
1166 | "void main() \n"
|
||
1167 | "{ \n"
|
||
1168 | " vec4 bins[10]; \n"
|
||
1169 | " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
|
||
1170 | " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
|
||
1171 | " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
|
||
1172 | " vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
|
||
1173 | " vec2 pos = loc.xy; \n"
|
||
1174 | " bool orientation_mode = (size.z != 0); \n"
|
||
1175 | " float sigma = orientation_mode? abs(size.z) : loc.w; \n";
|
||
1176 | if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
|
||
1177 | { |
||
1178 | out<< |
||
1179 | " if(orientation_mode){\n"
|
||
1180 | " vec4 offset = texture2DRect(texS, pos);\n"
|
||
1181 | " pos.xy = pos.xy + offset.yz; \n"
|
||
1182 | " sigma = sigma * pow(size.w, offset.w);\n"
|
||
1183 | " #if "<< GlobalUtil::_KeepExtremumSign << "\n" |
||
1184 | " if(offset.x < 0.6) sigma = -sigma; \n"
|
||
1185 | " #endif\n"
|
||
1186 | " }\n";
|
||
1187 | } |
||
1188 | out<< |
||
1189 | " //bool fixed_orientation = (size.z < 0); \n"
|
||
1190 | " if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"
|
||
1191 | " float gsigma = sigma * GAUSSIAN_WF; \n"
|
||
1192 | " vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF); \n"
|
||
1193 | " vec2 dim = size.xy; \n"
|
||
1194 | " float dist_threshold = win.x*win.x+0.5; \n"
|
||
1195 | " float factor = -0.5/(gsigma*gsigma); \n"
|
||
1196 | " vec4 sz; vec2 spos; \n"
|
||
1197 | " //if(any(pos.xy <= 1)) discard; \n"
|
||
1198 | " sz.xy = max( pos - win, vec2(1,1)); \n"
|
||
1199 | " sz.zw = min( pos + win, dim-2); \n"
|
||
1200 | " sz = floor(sz)+0.5;";
|
||
1201 | //loop to get the histogram
|
||
1202 | |||
1203 | out<<"\n"
|
||
1204 | " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
|
||
1205 | " { \n"
|
||
1206 | " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
|
||
1207 | " { \n"
|
||
1208 | " vec2 offset = spos - pos; \n"
|
||
1209 | " float sq_dist = dot(offset,offset); \n"
|
||
1210 | " if( sq_dist < dist_threshold){ \n"
|
||
1211 | " vec4 cc = texture2DRect(gradTex, spos); \n"
|
||
1212 | " float grad = cc.b; float theta = cc.a; \n"
|
||
1213 | " float idx = floor(degrees(theta)*0.1); \n"
|
||
1214 | " if(idx < 0 ) idx += 36; \n"
|
||
1215 | " float weight = grad*exp(sq_dist * factor); \n"
|
||
1216 | " float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ; \n"
|
||
1217 | " vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";
|
||
1218 | |||
1219 | if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)
|
||
1220 | { |
||
1221 | //dynamic indexing may not be faster
|
||
1222 | out<<"\n"
|
||
1223 | " int iidx = int((idx*0.25)); \n"
|
||
1224 | " bins[iidx]+=inc; \n"
|
||
1225 | " } \n"
|
||
1226 | " } \n"
|
||
1227 | " }";
|
||
1228 | |||
1229 | }else
|
||
1230 | { |
||
1231 | //nvfp40 still does not support dynamic array indexing
|
||
1232 | //unrolled binary search...
|
||
1233 | out<<"\n"
|
||
1234 | " if(idx < 16) \n"
|
||
1235 | " { \n"
|
||
1236 | " if(idx < 8) \n"
|
||
1237 | " { \n"
|
||
1238 | " if(idx < 4) { bins[0]+=inc;} \n"
|
||
1239 | " else { bins[1]+=inc;} \n"
|
||
1240 | " }else \n"
|
||
1241 | " { \n"
|
||
1242 | " if(idx < 12){ bins[2]+=inc;} \n"
|
||
1243 | " else { bins[3]+=inc;} \n"
|
||
1244 | " } \n"
|
||
1245 | " }else if(idx < 32) \n"
|
||
1246 | " { \n"
|
||
1247 | " if(idx < 24) \n"
|
||
1248 | " { \n"
|
||
1249 | " if(idx <20) { bins[4]+=inc;} \n"
|
||
1250 | " else { bins[5]+=inc;} \n"
|
||
1251 | " }else \n"
|
||
1252 | " { \n"
|
||
1253 | " if(idx < 28){ bins[6]+=inc;} \n"
|
||
1254 | " else { bins[7]+=inc;} \n"
|
||
1255 | " } \n"
|
||
1256 | " }else \n"
|
||
1257 | " { \n"
|
||
1258 | " bins[8]+=inc; \n"
|
||
1259 | " } \n"
|
||
1260 | " } \n"
|
||
1261 | " } \n"
|
||
1262 | " }";
|
||
1263 | |||
1264 | } |
||
1265 | |||
1266 | WriteOrientationCodeToStream(out); |
||
1267 | |||
1268 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
1269 | if(program->IsNative())
|
||
1270 | { |
||
1271 | s_orientation = program ; |
||
1272 | _param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
|
||
1273 | _param_orientation_size = glGetUniformLocation(*program, "size");
|
||
1274 | _param_orientation_stex = glGetUniformLocation(*program, "texS");
|
||
1275 | }else
|
||
1276 | { |
||
1277 | delete program;
|
||
1278 | } |
||
1279 | } |
||
1280 | |||
1281 | |||
1282 | void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
|
||
1283 | { |
||
1284 | //smooth histogram and find the largest
|
||
1285 | /*
|
||
1286 | smoothing kernel: (1 3 6 7 6 3 1 )/27
|
||
1287 | the same as 3 pass of (1 1 1)/3 averaging
|
||
1288 | maybe better to use 4 pass on the vectors...
|
||
1289 | */
|
||
1290 | |||
1291 | |||
1292 | //the inner loop on different array numbers is always unrolled in fp40
|
||
1293 | |||
1294 | //bug fixed here:)
|
||
1295 | out<<"\n"
|
||
1296 | " //mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0; \n"
|
||
1297 | " mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0; \n"
|
||
1298 | " mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
|
||
1299 | " #define FILTER_CODE(i) { \\\n"
|
||
1300 | " vec4 newb = (bins[i]* m2); \\\n"
|
||
1301 | " newb.xyz += ( prev.yzw * m1); \\\n"
|
||
1302 | " prev = bins[i]; \\\n"
|
||
1303 | " newb.wzy += ( bins[i+1].zyx *m1); \\\n"
|
||
1304 | " bins[i] = newb;}\n"
|
||
1305 | " for (int j=0; j<2; j++) \n"
|
||
1306 | " { \n"
|
||
1307 | " vec4 prev = bins[8]; \n"
|
||
1308 | " bins[9] = bins[0]; \n";
|
||
1309 | |||
1310 | if(GlobalUtil::_IsNvidia)
|
||
1311 | { |
||
1312 | out<< |
||
1313 | " for (int i=0; i<9; i++) \n"
|
||
1314 | " { \n"
|
||
1315 | " FILTER_CODE(i); \n"
|
||
1316 | " } \n"
|
||
1317 | " }";
|
||
1318 | |||
1319 | }else
|
||
1320 | { |
||
1321 | //manually unroll the loop for ATI.
|
||
1322 | out << |
||
1323 | " FILTER_CODE(0);\n"
|
||
1324 | " FILTER_CODE(1);\n"
|
||
1325 | " FILTER_CODE(2);\n"
|
||
1326 | " FILTER_CODE(3);\n"
|
||
1327 | " FILTER_CODE(4);\n"
|
||
1328 | " FILTER_CODE(5);\n"
|
||
1329 | " FILTER_CODE(6);\n"
|
||
1330 | " FILTER_CODE(7);\n"
|
||
1331 | " FILTER_CODE(8);\n"
|
||
1332 | " }\n";
|
||
1333 | } |
||
1334 | //find the maximum voting
|
||
1335 | out<<"\n"
|
||
1336 | " vec4 maxh; vec2 maxh2; \n"
|
||
1337 | " vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
|
||
1338 | " bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
|
||
1339 | " maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";
|
||
1340 | |||
1341 | char *testpeak_code;
|
||
1342 | char *savepeak_code;
|
||
1343 | |||
1344 | //save two/three/four orientations with the largest votings?
|
||
1345 | |||
1346 | if(GlobalUtil::_MaxOrientation>1) |
||
1347 | { |
||
1348 | out<<"\n"
|
||
1349 | " vec4 Orientations = vec4(0, 0, 0, 0); \n"
|
||
1350 | " vec4 weights = vec4(0,0,0,0); ";
|
||
1351 | |||
1352 | testpeak_code = "\\\n"
|
||
1353 | " {test = greaterThan(bins[i], hh);";
|
||
1354 | |||
1355 | //save the orientations in weight-decreasing order
|
||
1356 | if(GlobalUtil::_MaxOrientation ==2) |
||
1357 | { |
||
1358 | savepeak_code = "\\\n"
|
||
1359 | " if(weight <=weights.g){}\\\n"
|
||
1360 | " else if(weight >weights.r)\\\n"
|
||
1361 | " {weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
|
||
1362 | " else {weights.g = weight; Orientations.g = th;}";
|
||
1363 | }else if(GlobalUtil::_MaxOrientation ==3) |
||
1364 | { |
||
1365 | savepeak_code = "\\\n"
|
||
1366 | " if(weight <=weights.b){}\\\n"
|
||
1367 | " else if(weight >weights.r)\\\n"
|
||
1368 | " {weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
|
||
1369 | " else if(weight >weights.g)\\\n"
|
||
1370 | " {weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
|
||
1371 | " else {weights.b = weight; Orientations.b = th;}";
|
||
1372 | }else
|
||
1373 | { |
||
1374 | savepeak_code = "\\\n"
|
||
1375 | " if(weight <=weights.a){}\\\n"
|
||
1376 | " else if(weight >weights.r)\\\n"
|
||
1377 | " {weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
|
||
1378 | " else if(weight >weights.g)\\\n"
|
||
1379 | " {weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
|
||
1380 | " else if(weight >weights.b)\\\n"
|
||
1381 | " {weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
|
||
1382 | " else {weights.a = weight; Orientations.a = th;}";
|
||
1383 | } |
||
1384 | |||
1385 | }else
|
||
1386 | { |
||
1387 | out<<"\n"
|
||
1388 | " float Orientation; ";
|
||
1389 | testpeak_code ="\\\n"
|
||
1390 | " if(npeaks<=0.0){\\\n"
|
||
1391 | " test = equal(bins[i], maxh) ;";
|
||
1392 | savepeak_code="\\\n"
|
||
1393 | " npeaks++; \\\n"
|
||
1394 | " Orientation = th;";
|
||
1395 | |||
1396 | } |
||
1397 | //find the peaks
|
||
1398 | out <<"\n"
|
||
1399 | " #define FINDPEAK(i, k)" <<testpeak_code<<"\\\n" |
||
1400 | " if( any ( test) ) \\\n"
|
||
1401 | " { \\\n"
|
||
1402 | " if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y ) \\\n"
|
||
1403 | " { \\\n"
|
||
1404 | " float di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
|
||
1405 | " float th = (k+di+0.5); float weight = bins[i].x;"
|
||
1406 | <<savepeak_code<<"\\\n"
|
||
1407 | " }\\\n"
|
||
1408 | " else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) ) \\\n"
|
||
1409 | " { \\\n"
|
||
1410 | " float di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
|
||
1411 | " float th = (k+di+1.5); float weight = bins[i].y; "
|
||
1412 | <<savepeak_code<<" \\\n"
|
||
1413 | " }\\\n"
|
||
1414 | " if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) ) \\\n"
|
||
1415 | " { \\\n"
|
||
1416 | " float di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
|
||
1417 | " float th = (k+di+2.5); float weight = bins[i].z; "
|
||
1418 | <<savepeak_code<<" \\\n"
|
||
1419 | " }\\\n"
|
||
1420 | " else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x ) \\\n"
|
||
1421 | " { \\\n"
|
||
1422 | " float di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
|
||
1423 | " float th = (k+di+3.5); float weight = bins[i].w; "
|
||
1424 | <<savepeak_code<<" \\\n"
|
||
1425 | " }\\\n"
|
||
1426 | " }}\\\n"
|
||
1427 | " prevb = bins[i].w;";
|
||
1428 | //the following loop will be unrolled anyway in fp40,
|
||
1429 | //taking more than 1000 instrucsions..
|
||
1430 | //....
|
||
1431 | if(GlobalUtil::_IsNvidia)
|
||
1432 | { |
||
1433 | out<<"\n"
|
||
1434 | " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test; \n"
|
||
1435 | " bins[9] = bins[0]; \n"
|
||
1436 | " float npeaks = 0.0, k = 0; \n"
|
||
1437 | " float prevb = bins[8].w; \n"
|
||
1438 | " for (int i = 0; i <9 ; i++) \n"
|
||
1439 | " {\n"
|
||
1440 | " FINDPEAK(i, k);\n"
|
||
1441 | " k = k + 4.0; \n"
|
||
1442 | " }";
|
||
1443 | }else
|
||
1444 | { |
||
1445 | //loop unroll for ATI.
|
||
1446 | out <<"\n"
|
||
1447 | " vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
|
||
1448 | " bins[9] = bins[0]; \n"
|
||
1449 | " float npeaks = 0.0; \n"
|
||
1450 | " float prevb = bins[8].w; \n"
|
||
1451 | " FINDPEAK(0, 0.0);\n"
|
||
1452 | " FINDPEAK(1, 4.0);\n"
|
||
1453 | " FINDPEAK(2, 8.0);\n"
|
||
1454 | " FINDPEAK(3, 12.0);\n"
|
||
1455 | " FINDPEAK(4, 16.0);\n"
|
||
1456 | " FINDPEAK(5, 20.0);\n"
|
||
1457 | " FINDPEAK(6, 24.0);\n"
|
||
1458 | " FINDPEAK(7, 28.0);\n"
|
||
1459 | " FINDPEAK(8, 32.0);\n";
|
||
1460 | } |
||
1461 | //WRITE output
|
||
1462 | if(GlobalUtil::_MaxOrientation>1) |
||
1463 | { |
||
1464 | out<<"\n"
|
||
1465 | " if(orientation_mode){\n"
|
||
1466 | " npeaks = dot(vec4(1,1,"
|
||
1467 | <<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<"," |
||
1468 | <<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n" |
||
1469 | " gl_FragData[1] = radians((Orientations )*10.0);\n"
|
||
1470 | " gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
|
||
1471 | " }else{\n"
|
||
1472 | " gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
|
||
1473 | " }\n";
|
||
1474 | }else
|
||
1475 | { |
||
1476 | out<<"\n"
|
||
1477 | " gl_FragData[0] = vec4(pos, radians((Orientation.x)*10.0), sigma);\n";
|
||
1478 | } |
||
1479 | //end
|
||
1480 | out<<"\n"
|
||
1481 | "}\n"<<'\0'; |
||
1482 | |||
1483 | |||
1484 | } |
||
1485 | |||
1486 | void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) |
||
1487 | { |
||
1488 | glUniform1i(_param_orientation_gtex, 1);
|
||
1489 | glUniform1f(_param_orientation_size, sigma); |
||
1490 | } |
||
1491 | |||
1492 | |||
1493 | |||
1494 | |||
1495 | void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step) |
||
1496 | { |
||
1497 | ///
|
||
1498 | glUniform1i(_param_orientation_gtex, 1);
|
||
1499 | |||
1500 | if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
|
||
1501 | { |
||
1502 | //specify texutre for subpixel subscale localization
|
||
1503 | glUniform1i(_param_orientation_stex, 2);
|
||
1504 | } |
||
1505 | |||
1506 | float size[4]; |
||
1507 | size[0] = (float)width; |
||
1508 | size[1] = (float)height; |
||
1509 | size[2] = sigma;
|
||
1510 | size[3] = step;
|
||
1511 | glUniform4fv(_param_orientation_size, 1, size);
|
||
1512 | |||
1513 | } |
||
1514 | |||
1515 | |||
1516 | void ShaderBagGLSL::LoadDescriptorShaderF2()
|
||
1517 | { |
||
1518 | //one shader outpout 128/8 = 16 , each fragout encodes 4
|
||
1519 | //const double twopi = 2.0*3.14159265358979323846;
|
||
1520 | //const double rpi = 8.0/twopi;
|
||
1521 | char buffer[10240]; |
||
1522 | ostrstream out(buffer, 10240);
|
||
1523 | |||
1524 | out<<setprecision(8);
|
||
1525 | |||
1526 | out<<"\n"
|
||
1527 | "#define M_PI 3.14159265358979323846\n"
|
||
1528 | "#define TWO_PI (2.0*M_PI)\n"
|
||
1529 | "#define RPI 1.2732395447351626861510701069801\n"
|
||
1530 | "#define WF size.z\n"
|
||
1531 | "uniform sampler2DRect tex; \n"
|
||
1532 | "uniform sampler2DRect gradTex; \n"
|
||
1533 | "uniform vec4 dsize; \n"
|
||
1534 | "uniform vec3 size; \n"
|
||
1535 | "void main() \n"
|
||
1536 | "{\n"
|
||
1537 | " vec2 dim = size.xy; //image size \n"
|
||
1538 | " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
|
||
1539 | " float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5)); \n"
|
||
1540 | " index = floor(index*0.125) + 0.49; \n"
|
||
1541 | " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
|
||
1542 | " vec2 pos = texture2DRect(tex, coord).xy; \n"
|
||
1543 | " if(any(lessThanEqual(pos.xy, vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard; \n"
|
||
1544 | " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
|
||
1545 | " float anglef = texture2DRect(tex, coord).z;\n"
|
||
1546 | " if(anglef > M_PI) anglef -= TWO_PI;\n"
|
||
1547 | " float sigma = texture2DRect(tex, coord).w; \n"
|
||
1548 | " float spt = abs(sigma * WF); //default to be 3*sigma \n";
|
||
1549 | |||
1550 | //rotation
|
||
1551 | out<< |
||
1552 | " vec4 cscs, rots; \n"
|
||
1553 | " cscs.y = sin(anglef); cscs.x = cos(anglef); \n"
|
||
1554 | " cscs.zw = - cscs.xy; \n"
|
||
1555 | " rots = cscs /spt; \n"
|
||
1556 | " cscs *= spt; \n";
|
||
1557 | |||
1558 | //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
|
||
1559 | //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
|
||
1560 | //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
|
||
1561 | //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
|
||
1562 | |||
1563 | out<< |
||
1564 | "vec4 temp; vec2 pt, offsetpt; \n"
|
||
1565 | " /*the fraction part of idx is .5*/ \n"
|
||
1566 | " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
|
||
1567 | " offsetpt.y = floor(idx*0.25) - 1.5; \n"
|
||
1568 | " temp = cscs.xwyx*offsetpt.xyxy; \n"
|
||
1569 | " pt = pos + temp.xz + temp.yw; \n";
|
||
1570 | |||
1571 | //get a horizontal bounding box of the rotated rectangle
|
||
1572 | out<< |
||
1573 | " vec2 bwin = abs(cscs.xy); \n"
|
||
1574 | " float bsz = bwin.x + bwin.y; \n"
|
||
1575 | " vec4 sz; \n"
|
||
1576 | " sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
|
||
1577 | " sz.zw = min(pt + vec2(bsz), dim - 2); \n"
|
||
1578 | " sz = floor(sz)+0.5;"; //move sample point to pixel center |
||
1579 | //get voting for two box
|
||
1580 | |||
1581 | out<<"\n"
|
||
1582 | " vec4 DA, DB; vec2 spos; \n"
|
||
1583 | " DA = DB = vec4(0, 0, 0, 0); \n"
|
||
1584 | " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
|
||
1585 | " { \n"
|
||
1586 | " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
|
||
1587 | " { \n"
|
||
1588 | " vec2 diff = spos - pt; \n"
|
||
1589 | " temp = rots.xywx * diff.xyxy;\n"
|
||
1590 | " vec2 nxy = (temp.xz + temp.yw); \n"
|
||
1591 | " vec2 nxyn = abs(nxy); \n"
|
||
1592 | " if(all( lessThan(nxyn, vec2(1.0)) ))\n"
|
||
1593 | " {\n"
|
||
1594 | " vec4 cc = texture2DRect(gradTex, spos); \n"
|
||
1595 | " float mod = cc.b; float angle = cc.a; \n"
|
||
1596 | " float theta0 = RPI * (anglef - angle); \n"
|
||
1597 | " float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
|
||
1598 | " diff = nxy + offsetpt.xy; \n"
|
||
1599 | " float ww = exp(-0.125*dot(diff, diff));\n"
|
||
1600 | " vec2 weights = 1 - nxyn;\n"
|
||
1601 | " float weight = weights.x * weights.y *mod*ww; \n"
|
||
1602 | " float theta1 = floor(theta); \n"
|
||
1603 | " float weight2 = (theta - theta1) * weight;\n"
|
||
1604 | " float weight1 = weight - weight2;\n"
|
||
1605 | " DA += vec4(equal(vec4(theta1), vec4(0, 1, 2, 3)))*weight1;\n"
|
||
1606 | " DA += vec4(equal(vec4(theta1), vec4(7, 0, 1, 2)))*weight2; \n"
|
||
1607 | " DB += vec4(equal(vec4(theta1), vec4(4, 5, 6, 7)))*weight1;\n"
|
||
1608 | " DB += vec4(equal(vec4(theta1), vec4(3, 4, 5, 6)))*weight2; \n"
|
||
1609 | " }\n"
|
||
1610 | " }\n"
|
||
1611 | " }\n";
|
||
1612 | |||
1613 | out<< |
||
1614 | " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
|
||
1615 | "}\n"<<'\0'; |
||
1616 | |||
1617 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
1618 | |||
1619 | if(program->IsNative())
|
||
1620 | { |
||
1621 | s_descriptor_fp = program ; |
||
1622 | _param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
|
||
1623 | _param_descriptor_size = glGetUniformLocation(*program, "size");
|
||
1624 | _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
|
||
1625 | }else
|
||
1626 | { |
||
1627 | delete program;
|
||
1628 | } |
||
1629 | |||
1630 | |||
1631 | } |
||
1632 | |||
1633 | void ShaderBagGLSL::LoadDescriptorShader()
|
||
1634 | { |
||
1635 | GlobalUtil::_DescriptorPPT = 16;
|
||
1636 | LoadDescriptorShaderF2(); |
||
1637 | } |
||
1638 | |||
1639 | |||
1640 | void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) |
||
1641 | { |
||
1642 | ///
|
||
1643 | glUniform1i(_param_descriptor_gtex, 1);
|
||
1644 | |||
1645 | float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; |
||
1646 | glUniform4fv(_param_descriptor_dsize, 1, dsize);
|
||
1647 | float size[3]; |
||
1648 | size[0] = width;
|
||
1649 | size[1] = height;
|
||
1650 | size[2] = GlobalUtil::_DescriptorWindowFactor;
|
||
1651 | glUniform3fv(_param_descriptor_size, 1, size);
|
||
1652 | |||
1653 | } |
||
1654 | |||
1655 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||
1656 | |||
1657 | void ShaderBagPKSL::LoadFixedShaders()
|
||
1658 | { |
||
1659 | ProgramGLSL * program; |
||
1660 | |||
1661 | |||
1662 | s_gray = new ProgramGLSL(
|
||
1663 | "uniform sampler2DRect tex; void main(){\n"
|
||
1664 | "float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
|
||
1665 | "gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}" );
|
||
1666 | |||
1667 | |||
1668 | s_sampling = new ProgramGLSL(
|
||
1669 | "uniform sampler2DRect tex; void main(){\n"
|
||
1670 | "gl_FragColor= vec4( texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
|
||
1671 | " texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}" );
|
||
1672 | |||
1673 | |||
1674 | s_margin_copy = program = new ProgramGLSL(
|
||
1675 | "uniform sampler2DRect tex; uniform vec4 truncate; void main(){\n"
|
||
1676 | "vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
|
||
1677 | "bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
|
||
1678 | "if(ob.y) { gl_FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
|
||
1679 | "else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
|
||
1680 | "else { vec4 weights = vec4(vec4(0, 1, 2, 3) == truncate.w);\n"
|
||
1681 | "float v = dot(weights, cc); gl_FragColor = vec4(v);}}");
|
||
1682 | |||
1683 | _param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");
|
||
1684 | |||
1685 | |||
1686 | |||
1687 | s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}"); |
||
1688 | |||
1689 | |||
1690 | |||
1691 | s_grad_pass = program = new ProgramGLSL(
|
||
1692 | "uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
|
||
1693 | "{\n"
|
||
1694 | " vec4 v1, v2, gg;\n"
|
||
1695 | " vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
|
||
1696 | " vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
|
||
1697 | " gl_FragData[0] = cc - cp; \n"
|
||
1698 | " vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
|
||
1699 | " vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
|
||
1700 | " vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
|
||
1701 | " vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
|
||
1702 | " vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
|
||
1703 | " gl_FragData[1] = grad;\n"
|
||
1704 | " vec4 invalid = vec4(equal(grad, vec4(0.0))); \n"
|
||
1705 | " vec4 ov = atan(dy, dx + invalid); \n"
|
||
1706 | " gl_FragData[2] = ov; \n"
|
||
1707 | "}\n\0"); //when |
||
1708 | |||
1709 | _param_grad_pass_texp = glGetUniformLocation(*program, "texp");
|
||
1710 | |||
1711 | |||
1712 | GlobalUtil::_OrientationPack2 = 0;
|
||
1713 | LoadOrientationShader(); |
||
1714 | |||
1715 | if(s_orientation == NULL) |
||
1716 | { |
||
1717 | //Load a simplified version if the right version is not supported
|
||
1718 | s_orientation = program = new ProgramGLSL(
|
||
1719 | "uniform sampler2DRect fTex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
|
||
1720 | " vec4 cc = texture2DRect(fTex, gl_TexCoord[0].xy);\n"
|
||
1721 | " vec2 co = cc.xy * 0.5; \n"
|
||
1722 | " vec4 oo = texture2DRect(oTex, co);\n"
|
||
1723 | " bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
|
||
1724 | " float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
|
||
1725 | " gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");
|
||
1726 | _param_orientation_gtex= glGetUniformLocation(*program, "oTex");
|
||
1727 | _param_orientation_size= glGetUniformLocation(*program, "size");
|
||
1728 | |||
1729 | GlobalUtil::_MaxOrientation = 0;
|
||
1730 | GlobalUtil::_FullSupported = 0;
|
||
1731 | std::cerr<<"Orientation simplified on this hardware"<<endl;
|
||
1732 | } |
||
1733 | |||
1734 | if(GlobalUtil::_DescriptorPPT)
|
||
1735 | { |
||
1736 | LoadDescriptorShader(); |
||
1737 | if(s_descriptor_fp == NULL) |
||
1738 | { |
||
1739 | GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0;
|
||
1740 | std::cerr<<"Descriptor ignored on this hardware"<<endl;
|
||
1741 | } |
||
1742 | } |
||
1743 | } |
||
1744 | |||
1745 | |||
1746 | void ShaderBagPKSL::LoadDisplayShaders()
|
||
1747 | { |
||
1748 | ProgramGLSL * program; |
||
1749 | |||
1750 | s_copy_key = new ProgramGLSL(
|
||
1751 | "uniform sampler2DRect tex;void main(){\n"
|
||
1752 | "gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");
|
||
1753 | |||
1754 | //shader used to write a vertex buffer object
|
||
1755 | //which is used to draw the quads of each feature
|
||
1756 | s_vertex_list = program = new ProgramGLSL(
|
||
1757 | "uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
|
||
1758 | "float fwidth = sizes.y; \n"
|
||
1759 | "float twidth = sizes.z; \n"
|
||
1760 | "float rwidth = sizes.w; \n"
|
||
1761 | "float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
|
||
1762 | "float px = mod(index, twidth);\n"
|
||
1763 | "vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
|
||
1764 | "vec4 cc = texture2DRect(tex, tpos );\n"
|
||
1765 | "float size = 3.0f * cc.a; \n"
|
||
1766 | "gl_FragColor.zw = vec2(0.0, 1.0);\n"
|
||
1767 | "if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
|
||
1768 | "{\n"
|
||
1769 | " float type = fract(px);\n"
|
||
1770 | " vec2 dxy; float s, c;\n"
|
||
1771 | " dxy.x = type < 0.1 ? 0.0 : ((type <0.5 || type > 0.9)? size : -size);\n"
|
||
1772 | " dxy.y = type < 0.2 ? 0.0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
|
||
1773 | " s = sin(cc.b); c = cos(cc.b); \n"
|
||
1774 | " gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
|
||
1775 | " gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
|
||
1776 | "}\n\0");
|
||
1777 | /*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/
|
||
1778 | |||
1779 | _param_genvbo_size = glGetUniformLocation(*program, "sizes");
|
||
1780 | |||
1781 | s_display_gaussian = new ProgramGLSL(
|
||
1782 | "uniform sampler2DRect tex; void main(){\n"
|
||
1783 | "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
|
||
1784 | "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");
|
||
1785 | |||
1786 | s_display_dog = new ProgramGLSL(
|
||
1787 | "uniform sampler2DRect tex; void main(){\n"
|
||
1788 | "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
|
||
1789 | "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a);float g = (0.5+20.0*v);\n"
|
||
1790 | "gl_FragColor = vec4(g, g, g, 1.0);}" );
|
||
1791 | |||
1792 | |||
1793 | s_display_grad = new ProgramGLSL(
|
||
1794 | "uniform sampler2DRect tex; void main(){\n"
|
||
1795 | "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy); bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
|
||
1796 | "float v = ff.y ?(ff.x ? pc.r : pc.g):(ff.x ? pc.b : pc.a); gl_FragColor = vec4(5.0 *vec3(v), 1.0); }");
|
||
1797 | |||
1798 | s_display_keys= new ProgramGLSL(
|
||
1799 | "uniform sampler2DRect tex; void main(){\n"
|
||
1800 | "vec4 oc = texture2DRect(tex, gl_TexCoord[0].xy); \n"
|
||
1801 | "vec4 cc = vec4(equal(abs(oc.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
|
||
1802 | "bvec2 ff = lessThan(fract(gl_TexCoord[0].xy) , vec2(0.5));\n"
|
||
1803 | "float v = ff.y ?(ff.x ? cc.r : cc.g):(ff.x ? cc.b : cc.a);\n"
|
||
1804 | "if(v == 0) discard; \n"
|
||
1805 | "else if(oc.r > 0) gl_FragColor = vec4(1.0, 0, 0,1.0); \n"
|
||
1806 | "else gl_FragColor = vec4(0.0,1.0,0.0,1.0); }" );
|
||
1807 | } |
||
1808 | |||
1809 | void ShaderBagPKSL::LoadOrientationShader(void) |
||
1810 | { |
||
1811 | char buffer[10240]; |
||
1812 | ostrstream out(buffer,10240);
|
||
1813 | if(GlobalUtil::_IsNvidia)
|
||
1814 | { |
||
1815 | out << "#pragma optionNV(ifcvt none)\n"
|
||
1816 | "#pragma optionNV(unroll all)\n";
|
||
1817 | } |
||
1818 | out<<"\n"
|
||
1819 | "#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n" |
||
1820 | "#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n" |
||
1821 | "#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n" |
||
1822 | "uniform sampler2DRect tex; uniform sampler2DRect gtex;\n"
|
||
1823 | "uniform sampler2DRect otex; uniform vec4 size;\n"
|
||
1824 | "void main() \n"
|
||
1825 | "{ \n"
|
||
1826 | " vec4 bins[10]; \n"
|
||
1827 | " bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0); \n"
|
||
1828 | " bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0); \n"
|
||
1829 | " bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0); \n"
|
||
1830 | " vec4 sift = texture2DRect(tex, gl_TexCoord[0].xy); \n"
|
||
1831 | " vec2 pos = sift.xy; \n"
|
||
1832 | " bool orientation_mode = (size.z != 0); \n"
|
||
1833 | " float sigma = orientation_mode? (abs(size.z) * pow(size.w, sift.w) * sift.z) : (sift.w); \n"
|
||
1834 | " //bool fixed_orientation = (size.z < 0); \n"
|
||
1835 | " if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"
|
||
1836 | " float gsigma = sigma * GAUSSIAN_WF; \n"
|
||
1837 | " vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF); \n"
|
||
1838 | " vec2 dim = size.xy; \n"
|
||
1839 | " vec4 dist_threshold = vec4(win.x*win.x+0.5); \n"
|
||
1840 | " float factor = -0.5/(gsigma*gsigma); \n"
|
||
1841 | " vec4 sz; vec2 spos; \n"
|
||
1842 | " //if(any(pos.xy <= 1)) discard; \n"
|
||
1843 | " sz.xy = max( pos - win, vec2(2,2)); \n"
|
||
1844 | " sz.zw = min( pos + win, dim-3); \n"
|
||
1845 | " sz = floor(sz*0.5) + 0.5; ";
|
||
1846 | //loop to get the histogram
|
||
1847 | |||
1848 | out<<"\n"
|
||
1849 | " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
|
||
1850 | " { \n"
|
||
1851 | " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
|
||
1852 | " { \n"
|
||
1853 | " vec2 offset = 2* spos - pos - 0.5; \n"
|
||
1854 | " vec4 off = vec4(offset, offset + 1); \n"
|
||
1855 | " vec4 distsq = off.xzxz * off.xzxz + off.yyww * off.yyww; \n"
|
||
1856 | " bvec4 inside = lessThan(distsq, dist_threshold); \n"
|
||
1857 | " if(any(inside)) \n"
|
||
1858 | " { \n"
|
||
1859 | " vec4 gg = texture2DRect(gtex, spos); \n"
|
||
1860 | " vec4 oo = texture2DRect(otex, spos); \n"
|
||
1861 | " vec4 weight = gg * exp(distsq * factor); \n"
|
||
1862 | " vec4 idxv = floor(degrees(oo)*0.1); \n"
|
||
1863 | " idxv+= (vec4(lessThan(idxv, vec4(0.0)))*36.0); \n"
|
||
1864 | " vec4 vidx = fract(idxv * 0.25) * 4.0;//mod(idxv, 4.0); \n";
|
||
1865 | //
|
||
1866 | if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)
|
||
1867 | { |
||
1868 | // it might be slow on some GPUs
|
||
1869 | out<<"\n"
|
||
1870 | " for(int i = 0 ; i < 4; i++)\n"
|
||
1871 | " {\n"
|
||
1872 | " if(inside[i])\n"
|
||
1873 | " {\n"
|
||
1874 | " float idx = idxv[i]; \n"
|
||
1875 | " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"
|
||
1876 | " int iidx = int(floor(idx*0.25)); \n"
|
||
1877 | " bins[iidx]+=inc; \n"
|
||
1878 | " } \n"
|
||
1879 | " } \n"
|
||
1880 | " } \n"
|
||
1881 | " } \n"
|
||
1882 | " }";
|
||
1883 | |||
1884 | }else
|
||
1885 | { |
||
1886 | //nvfp40 still does not support dynamic array indexing
|
||
1887 | //unrolled binary search
|
||
1888 | //it seems to be faster than the dyanmic indexing version on some GPUs
|
||
1889 | out<<"\n"
|
||
1890 | " for(int i = 0 ; i < 4; i++)\n"
|
||
1891 | " {\n"
|
||
1892 | " if(inside[i])\n"
|
||
1893 | " {\n"
|
||
1894 | " float idx = idxv[i]; \n"
|
||
1895 | " vec4 inc = weight[i] * vec4(equal(vec4(vidx[i]), vec4(0,1,2,3))); \n"
|
||
1896 | " if(idx < 16) \n"
|
||
1897 | " { \n"
|
||
1898 | " if(idx < 8) \n"
|
||
1899 | " { \n"
|
||
1900 | " if(idx < 4) { bins[0]+=inc;} \n"
|
||
1901 | " else { bins[1]+=inc;} \n"
|
||
1902 | " }else \n"
|
||
1903 | " { \n"
|
||
1904 | " if(idx < 12){ bins[2]+=inc;} \n"
|
||
1905 | " else { bins[3]+=inc;} \n"
|
||
1906 | " } \n"
|
||
1907 | " }else if(idx < 32) \n"
|
||
1908 | " { \n"
|
||
1909 | " if(idx < 24) \n"
|
||
1910 | " { \n"
|
||
1911 | " if(idx <20) { bins[4]+=inc;} \n"
|
||
1912 | " else { bins[5]+=inc;} \n"
|
||
1913 | " }else \n"
|
||
1914 | " { \n"
|
||
1915 | " if(idx < 28){ bins[6]+=inc;} \n"
|
||
1916 | " else { bins[7]+=inc;} \n"
|
||
1917 | " } \n"
|
||
1918 | " }else \n"
|
||
1919 | " { \n"
|
||
1920 | " bins[8]+=inc; \n"
|
||
1921 | " } \n"
|
||
1922 | " } \n"
|
||
1923 | " } \n"
|
||
1924 | " } \n"
|
||
1925 | " } \n"
|
||
1926 | " }";
|
||
1927 | |||
1928 | } |
||
1929 | |||
1930 | //reuse the code from the unpacked version..
|
||
1931 | ShaderBagGLSL::WriteOrientationCodeToStream(out); |
||
1932 | |||
1933 | |||
1934 | |||
1935 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
1936 | if(program->IsNative())
|
||
1937 | { |
||
1938 | s_orientation = program ; |
||
1939 | _param_orientation_gtex = glGetUniformLocation(*program, "gtex");
|
||
1940 | _param_orientation_otex = glGetUniformLocation(*program, "otex");
|
||
1941 | _param_orientation_size = glGetUniformLocation(*program, "size");
|
||
1942 | }else
|
||
1943 | { |
||
1944 | delete program;
|
||
1945 | } |
||
1946 | } |
||
1947 | |||
1948 | void ShaderBagPKSL::SetGenListStartParam(float width, int tex0) |
||
1949 | { |
||
1950 | glUniform1f(_param_ftex_width, width); |
||
1951 | } |
||
1952 | |||
1953 | void ShaderBagPKSL::LoadGenListShader(int ndoglev,int nlev) |
||
1954 | { |
||
1955 | ProgramGLSL * program; |
||
1956 | |||
1957 | s_genlist_init_tight = new ProgramGLSL(
|
||
1958 | "uniform sampler2DRect tex; void main ()\n"
|
||
1959 | "{\n"
|
||
1960 | " vec4 key = vec4(texture2DRect(tex, gl_TexCoord[0].xy).r, \n"
|
||
1961 | " texture2DRect(tex, gl_TexCoord[1].xy).r, \n"
|
||
1962 | " texture2DRect(tex, gl_TexCoord[2].xy).r, \n"
|
||
1963 | " texture2DRect(tex, gl_TexCoord[3].xy).r); \n"
|
||
1964 | " gl_FragColor = vec4(notEqual(key, vec4(0.0))); \n"
|
||
1965 | "}");
|
||
1966 | |||
1967 | s_genlist_init_ex = program = new ProgramGLSL(
|
||
1968 | "uniform sampler2DRect tex; uniform vec4 bbox; void main ()\n"
|
||
1969 | "{\n"
|
||
1970 | " vec4 helper1 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[0].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
|
||
1971 | " vec4 helper2 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[1].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
|
||
1972 | " vec4 helper3 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[2].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
|
||
1973 | " vec4 helper4 = vec4(equal(vec4(abs(texture2DRect(tex, gl_TexCoord[3].xy).r)), vec4(1.0, 2.0, 3.0, 4.0)));\n"
|
||
1974 | " vec4 bx1 = vec4(lessThan(gl_TexCoord[0].xxyy, bbox)); \n"
|
||
1975 | " vec4 bx4 = vec4(lessThan(gl_TexCoord[3].xxyy, bbox)); \n"
|
||
1976 | " vec4 bx2 = vec4(bx4.xy, bx1.zw); \n"
|
||
1977 | " vec4 bx3 = vec4(bx1.xy, bx4.zw);\n"
|
||
1978 | " helper1 = min(min(bx1.xyxy, bx1.zzww), helper1);\n"
|
||
1979 | " helper2 = min(min(bx2.xyxy, bx2.zzww), helper2);\n"
|
||
1980 | " helper3 = min(min(bx3.xyxy, bx3.zzww), helper3);\n"
|
||
1981 | " helper4 = min(min(bx4.xyxy, bx4.zzww), helper4);\n"
|
||
1982 | " gl_FragColor.r = float(any(greaterThan(max(helper1.xy, helper1.zw), vec2(0.0)))); \n"
|
||
1983 | " gl_FragColor.g = float(any(greaterThan(max(helper2.xy, helper2.zw), vec2(0.0)))); \n"
|
||
1984 | " gl_FragColor.b = float(any(greaterThan(max(helper3.xy, helper3.zw), vec2(0.0)))); \n"
|
||
1985 | " gl_FragColor.a = float(any(greaterThan(max(helper4.xy, helper4.zw), vec2(0.0)))); \n"
|
||
1986 | "}");
|
||
1987 | _param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");
|
||
1988 | |||
1989 | s_genlist_end = program = new ProgramGLSL(
|
||
1990 | GlobalUtil::_KeepExtremumSign == 0 ?
|
||
1991 | |||
1992 | "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
|
||
1993 | "{\n"
|
||
1994 | " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
|
||
1995 | " vec2 pos = tc.rg; float index = tc.b;\n"
|
||
1996 | " vec4 tk = texture2DRect( ktex, pos); \n"
|
||
1997 | " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))); \n"
|
||
1998 | " vec2 opos; \n"
|
||
1999 | " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
|
||
2000 | " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
|
||
2001 | " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, 1.0, tk.w);\n"
|
||
2002 | "}" :
|
||
2003 | |||
2004 | "uniform sampler2DRect tex; uniform sampler2DRect ktex; void main()\n"
|
||
2005 | "{\n"
|
||
2006 | " vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n"
|
||
2007 | " vec2 pos = tc.rg; float index = tc.b;\n"
|
||
2008 | " vec4 tk = texture2DRect( ktex, pos); \n"
|
||
2009 | " vec4 keys = vec4(equal(abs(tk.rrrr), vec4(1.0, 2.0, 3.0, 4.0))) \n"
|
||
2010 | " vec2 opos; \n"
|
||
2011 | " opos.x = dot(keys, vec4(-0.5, 0.5, -0.5, 0.5));\n"
|
||
2012 | " opos.y = dot(keys, vec4(-0.5, -0.5, 0.5, 0.5));\n"
|
||
2013 | " gl_FragColor = vec4(opos + pos * 2.0 + tk.yz, sign(tk.r), tk.w);\n"
|
||
2014 | "}"
|
||
2015 | ); |
||
2016 | _param_genlist_end_ktex = glGetUniformLocation(*program, "ktex");
|
||
2017 | |||
2018 | //reduction ...
|
||
2019 | s_genlist_histo = new ProgramGLSL(
|
||
2020 | "uniform sampler2DRect tex; void main ()\n"
|
||
2021 | "{\n"
|
||
2022 | " vec4 helper; vec4 helper2; \n"
|
||
2023 | " helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
|
||
2024 | " helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
|
||
2025 | " gl_FragColor.rg = helper2.xz + helper2.yw;\n"
|
||
2026 | " helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
|
||
2027 | " helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
|
||
2028 | " gl_FragColor.ba= helper2.xz+helper2.yw;\n"
|
||
2029 | "}");
|
||
2030 | |||
2031 | |||
2032 | //read of the first part, which generates tex coordinates
|
||
2033 | |||
2034 | s_genlist_start= program = ShaderBagGLSL::LoadGenListStepShader(1, 1); |
||
2035 | _param_ftex_width= glGetUniformLocation(*program, "width");
|
||
2036 | _param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
|
||
2037 | //stepping
|
||
2038 | s_genlist_step = program = ShaderBagGLSL::LoadGenListStepShader(0, 1); |
||
2039 | _param_genlist_step_tex= glGetUniformLocation(*program, "tex");
|
||
2040 | _param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");
|
||
2041 | |||
2042 | } |
||
2043 | void ShaderBagPKSL::UnloadProgram(void) |
||
2044 | { |
||
2045 | glUseProgram(0);
|
||
2046 | } |
||
2047 | void ShaderBagPKSL::LoadKeypointShader(float dog_threshold, float edge_threshold) |
||
2048 | { |
||
2049 | //
|
||
2050 | char buffer[10240]; |
||
2051 | float threshold0 = dog_threshold* (GlobalUtil::_SubpixelLocalization?0.8f:1.0f); |
||
2052 | float threshold1 = dog_threshold;
|
||
2053 | float threshold2 = (edge_threshold+1)*(edge_threshold+1)/edge_threshold; |
||
2054 | ostrstream out(buffer, 10240);
|
||
2055 | out<<setprecision(8);
|
||
2056 | |||
2057 | if(GlobalUtil::_IsNvidia)
|
||
2058 | { |
||
2059 | out << "#pragma optionNV(ifcvt none)\n"
|
||
2060 | "#pragma optionNV(unroll all)\n"
|
||
2061 | "#define REPEAT4(FUNCTION)\\\n"
|
||
2062 | "for(int i = 0; i < 4; ++i)\\\n"
|
||
2063 | "{\\\n"
|
||
2064 | " FUNCTION(i);\\\n"
|
||
2065 | "}\n";
|
||
2066 | }else
|
||
2067 | { |
||
2068 | //loop unroll
|
||
2069 | out << "#define REPEAT4(FUNCTION)\\\n"
|
||
2070 | "FUNCTION(0);\\\n"
|
||
2071 | "FUNCTION(1);\\\n"
|
||
2072 | "FUNCTION(2);\\\n"
|
||
2073 | "FUNCTION(3);\n";
|
||
2074 | } |
||
2075 | //tex(X)(Y)
|
||
2076 | //X: (CLR) (CENTER 0, LEFT -1, RIGHT +1)
|
||
2077 | //Y: (CDU) (CENTER 0, DOWN -1, UP +1)
|
||
2078 | out << "#define THRESHOLD0 " << threshold0 << "\n" |
||
2079 | "#define THRESHOLD1 " << threshold1 << "\n" |
||
2080 | "#define THRESHOLD2 " << threshold2 << "\n"; |
||
2081 | |||
2082 | out<< |
||
2083 | "uniform sampler2DRect tex; uniform sampler2DRect texU;\n"
|
||
2084 | "uniform sampler2DRect texD; void main ()\n"
|
||
2085 | "{\n"
|
||
2086 | " vec2 TexRU = vec2(gl_TexCoord[2].x, gl_TexCoord[4].y); \n"
|
||
2087 | " vec4 ccc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
|
||
2088 | " vec4 clc = texture2DRect(tex, gl_TexCoord[1].xy);\n"
|
||
2089 | " vec4 crc = texture2DRect(tex, gl_TexCoord[2].xy);\n"
|
||
2090 | " vec4 ccd = texture2DRect(tex, gl_TexCoord[3].xy);\n"
|
||
2091 | " vec4 ccu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
|
||
2092 | " vec4 cld = texture2DRect(tex, gl_TexCoord[5].xy);\n"
|
||
2093 | " vec4 clu = texture2DRect(tex, gl_TexCoord[6].xy);\n"
|
||
2094 | " vec4 crd = texture2DRect(tex, gl_TexCoord[7].xy);\n"
|
||
2095 | " vec4 cru = texture2DRect(tex, TexRU.xy);\n"
|
||
2096 | " vec4 cc = ccc;\n"
|
||
2097 | " vec4 v1[4], v2[4];\n"
|
||
2098 | " v1[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
|
||
2099 | " v1[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
|
||
2100 | " v1[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
|
||
2101 | " v1[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
|
||
2102 | " v2[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
|
||
2103 | " v2[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
|
||
2104 | " v2[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
|
||
2105 | " v2[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
|
||
2106 | |||
2107 | //test against 8 neighbours
|
||
2108 | //use variable to identify type of extremum
|
||
2109 | //1.0 for local maximum and -1.0 for minimum
|
||
2110 | << |
||
2111 | " vec4 key = vec4(0.0); \n"
|
||
2112 | " #define KEYTEST_STEP0(i) \\\n"
|
||
2113 | " {\\\n"
|
||
2114 | " bvec4 test1 = greaterThan(vec4(cc[i]), max(v1[i], v2[i])), test2 = lessThan(vec4(cc[i]), min(v1[i], v2[i]));\\\n"
|
||
2115 | " key[i] = cc[i] > THRESHOLD0 && all(test1)?1.0: 0.0;\\\n"
|
||
2116 | " key[i] = cc[i] < -THRESHOLD0 && all(test2)? -1.0: key[i];\\\n"
|
||
2117 | " }\n"
|
||
2118 | " REPEAT4(KEYTEST_STEP0);\n"
|
||
2119 | " if(gl_TexCoord[0].x < 1.0) {key.rb = vec2(0.0);}\n"
|
||
2120 | " if(gl_TexCoord[0].y < 1.0) {key.rg = vec2(0.0);}\n"
|
||
2121 | " gl_FragColor = vec4(0.0);\n"
|
||
2122 | " if(any(notEqual(key, vec4(0.0)))) {\n";
|
||
2123 | |||
2124 | //do edge supression first..
|
||
2125 | //vector v1 is < (-1, 0), (1, 0), (0,-1), (0, 1)>
|
||
2126 | //vector v2 is < (-1,-1), (-1,1), (1,-1), (1, 1)>
|
||
2127 | |||
2128 | out<< |
||
2129 | " float fxx[4], fyy[4], fxy[4], fx[4], fy[4];\n"
|
||
2130 | " #define EDGE_SUPPRESION(i) \\\n"
|
||
2131 | " if(key[i] != 0)\\\n"
|
||
2132 | " {\\\n"
|
||
2133 | " vec4 D2 = v1[i].xyzw - cc[i];\\\n"
|
||
2134 | " vec2 D4 = v2[i].xw - v2[i].yz;\\\n"
|
||
2135 | " vec2 D5 = 0.5*(v1[i].yw-v1[i].xz); \\\n"
|
||
2136 | " fx[i] = D5.x; fy[i] = D5.y ;\\\n"
|
||
2137 | " fxx[i] = D2.x + D2.y;\\\n"
|
||
2138 | " fyy[i] = D2.z + D2.w;\\\n"
|
||
2139 | " fxy[i] = 0.25*(D4.x + D4.y);\\\n"
|
||
2140 | " float fxx_plus_fyy = fxx[i] + fyy[i];\\\n"
|
||
2141 | " float score_up = fxx_plus_fyy*fxx_plus_fyy; \\\n"
|
||
2142 | " float score_down = (fxx[i]*fyy[i] - fxy[i]*fxy[i]);\\\n"
|
||
2143 | " if( score_down <= 0 || score_up > THRESHOLD2 * score_down)key[i] = 0;\\\n"
|
||
2144 | " }\n"
|
||
2145 | " REPEAT4(EDGE_SUPPRESION);\n"
|
||
2146 | " if(any(notEqual(key, vec4(0.0)))) {\n";
|
||
2147 | |||
2148 | ////////////////////////////////////////////////
|
||
2149 | //read 9 pixels of upper/lower level
|
||
2150 | out<< |
||
2151 | " vec4 v4[4], v5[4], v6[4];\n"
|
||
2152 | " ccc = texture2DRect(texU, gl_TexCoord[0].xy);\n"
|
||
2153 | " clc = texture2DRect(texU, gl_TexCoord[1].xy);\n"
|
||
2154 | " crc = texture2DRect(texU, gl_TexCoord[2].xy);\n"
|
||
2155 | " ccd = texture2DRect(texU, gl_TexCoord[3].xy);\n"
|
||
2156 | " ccu = texture2DRect(texU, gl_TexCoord[4].xy);\n"
|
||
2157 | " cld = texture2DRect(texU, gl_TexCoord[5].xy);\n"
|
||
2158 | " clu = texture2DRect(texU, gl_TexCoord[6].xy);\n"
|
||
2159 | " crd = texture2DRect(texU, gl_TexCoord[7].xy);\n"
|
||
2160 | " cru = texture2DRect(texU, TexRU.xy);\n"
|
||
2161 | " vec4 cu = ccc;\n"
|
||
2162 | " v4[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
|
||
2163 | " v4[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
|
||
2164 | " v4[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
|
||
2165 | " v4[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
|
||
2166 | " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
|
||
2167 | " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
|
||
2168 | " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
|
||
2169 | " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
|
||
2170 | << |
||
2171 | " #define KEYTEST_STEP1(i)\\\n"
|
||
2172 | " if(key[i] == 1.0)\\\n"
|
||
2173 | " {\\\n"
|
||
2174 | " bvec4 test = lessThan(vec4(cc[i]), max(v4[i], v6[i])); \\\n"
|
||
2175 | " if(cc[i] < cu[i] || any(test))key[i] = 0.0; \\\n"
|
||
2176 | " }else if(key[i] == -1.0)\\\n"
|
||
2177 | " {\\\n"
|
||
2178 | " bvec4 test = greaterThan(vec4(cc[i]), min(v4[i], v6[i])); \\\n"
|
||
2179 | " if(cc[i] > cu[i] || any(test) )key[i] = 0.0; \\\n"
|
||
2180 | " }\n"
|
||
2181 | " REPEAT4(KEYTEST_STEP1);\n"
|
||
2182 | " if(any(notEqual(key, vec4(0.0)))) { \n"
|
||
2183 | << |
||
2184 | " ccc = texture2DRect(texD, gl_TexCoord[0].xy);\n"
|
||
2185 | " clc = texture2DRect(texD, gl_TexCoord[1].xy);\n"
|
||
2186 | " crc = texture2DRect(texD, gl_TexCoord[2].xy);\n"
|
||
2187 | " ccd = texture2DRect(texD, gl_TexCoord[3].xy);\n"
|
||
2188 | " ccu = texture2DRect(texD, gl_TexCoord[4].xy);\n"
|
||
2189 | " cld = texture2DRect(texD, gl_TexCoord[5].xy);\n"
|
||
2190 | " clu = texture2DRect(texD, gl_TexCoord[6].xy);\n"
|
||
2191 | " crd = texture2DRect(texD, gl_TexCoord[7].xy);\n"
|
||
2192 | " cru = texture2DRect(texD, TexRU.xy);\n"
|
||
2193 | " vec4 cd = ccc;\n"
|
||
2194 | " v5[0] = vec4(clc.g, ccc.g, ccd.b, ccc.b);\n"
|
||
2195 | " v5[1] = vec4(ccc.r, crc.r, ccd.a, ccc.a);\n"
|
||
2196 | " v5[2] = vec4(clc.a, ccc.a, ccc.r, ccu.r);\n"
|
||
2197 | " v5[3] = vec4(ccc.b, crc.b, ccc.g, ccu.g);\n"
|
||
2198 | " v6[0] = vec4(cld.a, clc.a, ccd.a, ccc.a);\n"
|
||
2199 | " v6[1] = vec4(ccd.b, ccc.b, crd.b, crc.b);\n"
|
||
2200 | " v6[2] = vec4(clc.g, clu.g, ccc.g, ccu.g);\n"
|
||
2201 | " v6[3] = vec4(ccc.r, ccu.r, crc.r, cru.r);\n"
|
||
2202 | << |
||
2203 | " #define KEYTEST_STEP2(i)\\\n"
|
||
2204 | " if(key[i] == 1.0)\\\n"
|
||
2205 | " {\\\n"
|
||
2206 | " bvec4 test = lessThan(vec4(cc[i]), max(v5[i], v6[i]));\\\n"
|
||
2207 | " if(cc[i] < cd[i] || any(test))key[i] = 0.0; \\\n"
|
||
2208 | " }else if(key[i] == -1.0)\\\n"
|
||
2209 | " {\\\n"
|
||
2210 | " bvec4 test = greaterThan(vec4(cc[i]), min(v5[i], v6[i]));\\\n"
|
||
2211 | " if(cc[i] > cd[i] || any(test))key[i] = 0.0; \\\n"
|
||
2212 | " }\n"
|
||
2213 | " REPEAT4(KEYTEST_STEP2);\n"
|
||
2214 | " float keysum = dot(abs(key), vec4(1, 1, 1, 1)) ;\n"
|
||
2215 | " //assume there is only one keypoint in the four. \n"
|
||
2216 | " if(keysum==1.0) {\n";
|
||
2217 | |||
2218 | //////////////////////////////////////////////////////////////////////
|
||
2219 | if(GlobalUtil::_SubpixelLocalization)
|
||
2220 | |||
2221 | out << |
||
2222 | " vec3 offset = vec3(0, 0, 0); \n"
|
||
2223 | " #define TESTMOVE_KEYPOINT(idx) \\\n"
|
||
2224 | " if(key[idx] != 0) \\\n"
|
||
2225 | " {\\\n"
|
||
2226 | " cu[0] = cu[idx]; cd[0] = cd[idx]; cc[0] = cc[idx]; \\\n"
|
||
2227 | " v4[0] = v4[idx]; v5[0] = v5[idx]; \\\n"
|
||
2228 | " fxy[0] = fxy[idx]; fxx[0] = fxx[idx]; fyy[0] = fyy[idx]; \\\n"
|
||
2229 | " fx[0] = fx[idx]; fy[0] = fy[idx]; \\\n"
|
||
2230 | " }\n"
|
||
2231 | " TESTMOVE_KEYPOINT(1);\n"
|
||
2232 | " TESTMOVE_KEYPOINT(2);\n"
|
||
2233 | " TESTMOVE_KEYPOINT(3);\n"
|
||
2234 | << |
||
2235 | |||
2236 | " float fs = 0.5*( cu[0] - cd[0] ); \n"
|
||
2237 | " float fss = cu[0] + cd[0] - cc[0] - cc[0];\n"
|
||
2238 | " float fxs = 0.25 * (v4[0].y + v5[0].x - v4[0].x - v5[0].y);\n"
|
||
2239 | " float fys = 0.25 * (v4[0].w + v5[0].z - v4[0].z - v5[0].w);\n"
|
||
2240 | " vec4 A0, A1, A2 ; \n"
|
||
2241 | " A0 = vec4(fxx[0], fxy[0], fxs, -fx[0]); \n"
|
||
2242 | " A1 = vec4(fxy[0], fyy[0], fys, -fy[0]); \n"
|
||
2243 | " A2 = vec4(fxs, fys, fss, -fs); \n"
|
||
2244 | " vec3 x3 = abs(vec3(fxx[0], fxy[0], fxs)); \n"
|
||
2245 | " float maxa = max(max(x3.x, x3.y), x3.z); \n"
|
||
2246 | " if(maxa >= 1e-10 ) \n"
|
||
2247 | " { \n"
|
||
2248 | " if(x3.y ==maxa ) \n"
|
||
2249 | " { \n"
|
||
2250 | " vec4 TEMP = A1; A1 = A0; A0 = TEMP; \n"
|
||
2251 | " }else if( x3.z == maxa ) \n"
|
||
2252 | " { \n"
|
||
2253 | " vec4 TEMP = A2; A2 = A0; A0 = TEMP; \n"
|
||
2254 | " } \n"
|
||
2255 | " A0 /= A0.x; \n"
|
||
2256 | " A1 -= A1.x * A0; \n"
|
||
2257 | " A2 -= A2.x * A0; \n"
|
||
2258 | " vec2 x2 = abs(vec2(A1.y, A2.y)); \n"
|
||
2259 | " if( x2.y > x2.x ) \n"
|
||
2260 | " { \n"
|
||
2261 | " vec3 TEMP = A2.yzw; \n"
|
||
2262 | " A2.yzw = A1.yzw; \n"
|
||
2263 | " A1.yzw = TEMP; \n"
|
||
2264 | " x2.x = x2.y; \n"
|
||
2265 | " } \n"
|
||
2266 | " if(x2.x >= 1e-10) { \n"
|
||
2267 | " A1.yzw /= A1.y; \n"
|
||
2268 | " A2.yzw -= A2.y * A1.yzw; \n"
|
||
2269 | " if(abs(A2.z) >= 1e-10) {\n"
|
||
2270 | " offset.z = A2.w /A2.z; \n"
|
||
2271 | " offset.y = A1.w - offset.z*A1.z; \n"
|
||
2272 | " offset.x = A0.w - offset.z*A0.z - offset.y*A0.y; \n"
|
||
2273 | " bool test = (abs(cc[0] + 0.5*dot(vec3(fx[0], fy[0], fs), offset ))>THRESHOLD1) ;\n"
|
||
2274 | " if(!test || any( greaterThan(abs(offset), vec3(1.0)))) key = vec4(0.0);\n"
|
||
2275 | " }\n"
|
||
2276 | " }\n"
|
||
2277 | " }\n"
|
||
2278 | <<"\n"
|
||
2279 | " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
|
||
2280 | " gl_FragColor = vec4(keyv, offset);\n"
|
||
2281 | " }}}}\n"
|
||
2282 | "}\n" <<'\0'; |
||
2283 | |||
2284 | else out << "\n" |
||
2285 | " float keyv = dot(key, vec4(1.0, 2.0, 3.0, 4.0));\n"
|
||
2286 | " gl_FragColor = vec4(keyv, 0, 0, 0);\n"
|
||
2287 | " }}}}\n"
|
||
2288 | "}\n" <<'\0'; |
||
2289 | |||
2290 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
2291 | s_keypoint = program ; |
||
2292 | |||
2293 | //parameter
|
||
2294 | _param_dog_texu = glGetUniformLocation(*program, "texU");
|
||
2295 | _param_dog_texd = glGetUniformLocation(*program, "texD");
|
||
2296 | } |
||
2297 | void ShaderBagPKSL::SetDogTexParam(int texU, int texD) |
||
2298 | { |
||
2299 | glUniform1i(_param_dog_texu, 1);
|
||
2300 | glUniform1i(_param_dog_texd, 2);
|
||
2301 | } |
||
2302 | void ShaderBagPKSL::SetGenListStepParam(int tex, int tex0) |
||
2303 | { |
||
2304 | glUniform1i(_param_genlist_step_tex0, 1);
|
||
2305 | } |
||
2306 | |||
2307 | void ShaderBagPKSL::SetGenVBOParam(float width, float fwidth,float size) |
||
2308 | { |
||
2309 | float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width}; |
||
2310 | glUniform4fv(_param_genvbo_size, 1, sizes);
|
||
2311 | } |
||
2312 | void ShaderBagPKSL::SetGradPassParam(int texP) |
||
2313 | { |
||
2314 | glUniform1i(_param_grad_pass_texp, 1);
|
||
2315 | } |
||
2316 | |||
2317 | void ShaderBagPKSL::LoadDescriptorShader()
|
||
2318 | { |
||
2319 | GlobalUtil::_DescriptorPPT = 16;
|
||
2320 | LoadDescriptorShaderF2(); |
||
2321 | s_rect_description = LoadDescriptorProgramRECT(); |
||
2322 | } |
||
2323 | |||
2324 | ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramRECT() |
||
2325 | { |
||
2326 | //one shader outpout 128/8 = 16 , each fragout encodes 4
|
||
2327 | //const double twopi = 2.0*3.14159265358979323846;
|
||
2328 | //const double rpi = 8.0/twopi;
|
||
2329 | char buffer[10240]; |
||
2330 | ostrstream out(buffer, 10240);
|
||
2331 | |||
2332 | out<<setprecision(8);
|
||
2333 | |||
2334 | out<<"\n"
|
||
2335 | "#define M_PI 3.14159265358979323846\n"
|
||
2336 | "#define TWO_PI (2.0*M_PI)\n"
|
||
2337 | "#define RPI 1.2732395447351626861510701069801\n"
|
||
2338 | "#define WF size.z\n"
|
||
2339 | "uniform sampler2DRect tex; \n"
|
||
2340 | "uniform sampler2DRect gtex; \n"
|
||
2341 | "uniform sampler2DRect otex; \n"
|
||
2342 | "uniform vec4 dsize; \n"
|
||
2343 | "uniform vec3 size; \n"
|
||
2344 | "void main() \n"
|
||
2345 | "{\n"
|
||
2346 | " vec2 dim = size.xy; //image size \n"
|
||
2347 | " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
|
||
2348 | " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
|
||
2349 | " index = floor(index*0.125)+ 0.49; \n"
|
||
2350 | " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
|
||
2351 | " vec2 pos = texture2DRect(tex, coord).xy; \n"
|
||
2352 | " vec2 wsz = texture2DRect(tex, coord).zw;\n"
|
||
2353 | " float aspect_ratio = wsz.y / wsz.x;\n"
|
||
2354 | " float aspect_sq = aspect_ratio * aspect_ratio; \n"
|
||
2355 | " vec2 spt = wsz * 0.25; vec2 ispt = 1.0 / spt; \n";
|
||
2356 | |||
2357 | //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
|
||
2358 | //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
|
||
2359 | //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
|
||
2360 | //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
|
||
2361 | out<< |
||
2362 | " vec4 temp; vec2 pt; \n"
|
||
2363 | " pt.x = pos.x + fract(idx*0.25) * wsz.x; \n"
|
||
2364 | " pt.y = pos.y + (floor(idx*0.25) + 0.5) * spt.y; \n";
|
||
2365 | |||
2366 | //get a horizontal bounding box of the rotated rectangle
|
||
2367 | out<< |
||
2368 | " vec4 sz; \n"
|
||
2369 | " sz.xy = max(pt - spt, vec2(2,2));\n"
|
||
2370 | " sz.zw = min(pt + spt, dim - 3); \n"
|
||
2371 | " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center |
||
2372 | //get voting for two box
|
||
2373 | |||
2374 | out<<"\n"
|
||
2375 | " vec4 DA, DB; vec2 spos; \n"
|
||
2376 | " DA = DB = vec4(0, 0, 0, 0); \n"
|
||
2377 | " vec4 nox = vec4(0, 1.0, 0.0, 1.0); \n"
|
||
2378 | " vec4 noy = vec4(0, 0.0, 1.0, 1.0); \n"
|
||
2379 | " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
|
||
2380 | " { \n"
|
||
2381 | " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
|
||
2382 | " { \n"
|
||
2383 | " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
|
||
2384 | " vec4 nx = (tpt.x + nox) * ispt.x; \n"
|
||
2385 | " vec4 ny = (tpt.y + noy) * ispt.y; \n"
|
||
2386 | " vec4 nxn = abs(nx), nyn = abs(ny); \n"
|
||
2387 | " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
|
||
2388 | " if(any(inside))\n"
|
||
2389 | " {\n"
|
||
2390 | " vec4 gg = texture2DRect(gtex, spos);\n"
|
||
2391 | " vec4 oo = texture2DRect(otex, spos);\n"
|
||
2392 | //" vec4 cc = cos(oo), ss = sin(oo); \n"
|
||
2393 | //" oo = atan(ss* aspect_ratio, cc); \n"
|
||
2394 | //" gg = gg * sqrt(ss * ss * aspect_sq + cc * cc); \n "
|
||
2395 | " vec4 theta0 = (- oo)*RPI;\n"
|
||
2396 | " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
|
||
2397 | " vec4 theta1 = floor(theta); \n"
|
||
2398 | " vec4 weight = (1 - nxn) * (1 - nyn) * gg; \n"
|
||
2399 | " vec4 weight2 = (theta - theta1) * weight; \n"
|
||
2400 | " vec4 weight1 = weight - weight2; \n"
|
||
2401 | " for(int i = 0;i < 4; i++)\n"
|
||
2402 | " {\n"
|
||
2403 | " if(inside[i])\n"
|
||
2404 | " {\n"
|
||
2405 | " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"
|
||
2406 | " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"
|
||
2407 | " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"
|
||
2408 | " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"
|
||
2409 | " }\n"
|
||
2410 | " }\n"
|
||
2411 | " }\n"
|
||
2412 | " }\n"
|
||
2413 | " }\n";
|
||
2414 | out<< |
||
2415 | " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
|
||
2416 | "}\n"<<'\0'; |
||
2417 | |||
2418 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
2419 | if(program->IsNative())
|
||
2420 | { |
||
2421 | return program;
|
||
2422 | } |
||
2423 | else
|
||
2424 | { |
||
2425 | delete program;
|
||
2426 | return NULL; |
||
2427 | } |
||
2428 | } |
||
2429 | |||
2430 | ProgramGLSL* ShaderBagPKSL::LoadDescriptorProgramPKSL() |
||
2431 | { |
||
2432 | //one shader outpout 128/8 = 16 , each fragout encodes 4
|
||
2433 | //const double twopi = 2.0*3.14159265358979323846;
|
||
2434 | //const double rpi = 8.0/twopi;
|
||
2435 | char buffer[10240]; |
||
2436 | ostrstream out(buffer, 10240);
|
||
2437 | |||
2438 | out<<setprecision(8);
|
||
2439 | |||
2440 | out<<"\n"
|
||
2441 | "#define M_PI 3.14159265358979323846\n"
|
||
2442 | "#define TWO_PI (2.0*M_PI)\n"
|
||
2443 | "#define RPI 1.2732395447351626861510701069801\n"
|
||
2444 | "#define WF size.z\n"
|
||
2445 | "uniform sampler2DRect tex; \n"
|
||
2446 | "uniform sampler2DRect gtex; \n"
|
||
2447 | "uniform sampler2DRect otex; \n"
|
||
2448 | "uniform vec4 dsize; \n"
|
||
2449 | "uniform vec3 size; \n"
|
||
2450 | "void main() \n"
|
||
2451 | "{\n"
|
||
2452 | " vec2 dim = size.xy; //image size \n"
|
||
2453 | " float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
|
||
2454 | " float idx = 8.0* fract(index * 0.125) + 8.0 * floor(2.0* fract(gl_TexCoord[0].y * 0.5)); \n"
|
||
2455 | " index = floor(index*0.125)+ 0.49; \n"
|
||
2456 | " vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
|
||
2457 | " vec2 pos = texture2DRect(tex, coord).xy; \n"
|
||
2458 | " if(any(lessThan(pos.xy, vec2(1.0))) || any(greaterThan(pos.xy, dim-1.0))) "
|
||
2459 | " //discard; \n"
|
||
2460 | " { gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
|
||
2461 | " float anglef = texture2DRect(tex, coord).z;\n"
|
||
2462 | " if(anglef > M_PI) anglef -= TWO_PI;\n"
|
||
2463 | " float sigma = texture2DRect(tex, coord).w; \n"
|
||
2464 | " float spt = abs(sigma * WF); //default to be 3*sigma \n";
|
||
2465 | //rotation
|
||
2466 | out<< |
||
2467 | " vec4 cscs, rots; \n"
|
||
2468 | " cscs.x = cos(anglef); cscs.y = sin(anglef); \n"
|
||
2469 | " cscs.zw = - cscs.xy; \n"
|
||
2470 | " rots = cscs /spt; \n"
|
||
2471 | " cscs *= spt; \n";
|
||
2472 | |||
2473 | //here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
|
||
2474 | //and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
|
||
2475 | //devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
|
||
2476 | //To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
|
||
2477 | out<< |
||
2478 | " vec4 temp; vec2 pt, offsetpt; \n"
|
||
2479 | " /*the fraction part of idx is .5*/ \n"
|
||
2480 | " offsetpt.x = 4.0* fract(idx*0.25) - 2.0; \n"
|
||
2481 | " offsetpt.y = floor(idx*0.25) - 1.5; \n"
|
||
2482 | " temp = cscs.xwyx*offsetpt.xyxy; \n"
|
||
2483 | " pt = pos + temp.xz + temp.yw; \n";
|
||
2484 | |||
2485 | //get a horizontal bounding box of the rotated rectangle
|
||
2486 | out<< |
||
2487 | " vec2 bwin = abs(cscs.xy); \n"
|
||
2488 | " float bsz = bwin.x + bwin.y; \n"
|
||
2489 | " vec4 sz; \n"
|
||
2490 | " sz.xy = max(pt - vec2(bsz), vec2(2,2));\n"
|
||
2491 | " sz.zw = min(pt + vec2(bsz), dim - 3); \n"
|
||
2492 | " sz = floor(sz * 0.5)+0.5;"; //move sample point to pixel center |
||
2493 | //get voting for two box
|
||
2494 | |||
2495 | out<<"\n"
|
||
2496 | " vec4 DA, DB; vec2 spos; \n"
|
||
2497 | " DA = DB = vec4(0, 0, 0, 0); \n"
|
||
2498 | " vec4 nox = vec4(0, rots.xy, rots.x + rots.y); \n"
|
||
2499 | " vec4 noy = vec4(0, rots.wx, rots.w + rots.x); \n"
|
||
2500 | " for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
|
||
2501 | " { \n"
|
||
2502 | " for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
|
||
2503 | " { \n"
|
||
2504 | " vec2 tpt = spos * 2.0 - pt - 0.5; \n"
|
||
2505 | " vec4 temp = rots.xywx * tpt.xyxy; \n"
|
||
2506 | " vec2 temp2 = temp.xz + temp.yw; \n"
|
||
2507 | " vec4 nx = temp2.x + nox; \n"
|
||
2508 | " vec4 ny = temp2.y + noy; \n"
|
||
2509 | " vec4 nxn = abs(nx), nyn = abs(ny); \n"
|
||
2510 | " bvec4 inside = lessThan(max(nxn, nyn) , vec4(1.0)); \n"
|
||
2511 | " if(any(inside))\n"
|
||
2512 | " {\n"
|
||
2513 | " vec4 gg = texture2DRect(gtex, spos);\n"
|
||
2514 | " vec4 oo = texture2DRect(otex, spos);\n"
|
||
2515 | " vec4 theta0 = (anglef - oo)*RPI;\n"
|
||
2516 | " vec4 theta = 8.0 * fract(1.0 + 0.125 * theta0); \n"
|
||
2517 | " vec4 theta1 = floor(theta); \n"
|
||
2518 | " vec4 diffx = nx + offsetpt.x, diffy = ny + offsetpt.y; \n"
|
||
2519 | " vec4 ww = exp(-0.125 * (diffx * diffx + diffy * diffy )); \n"
|
||
2520 | " vec4 weight = (1 - nxn) * (1 - nyn) * gg * ww; \n"
|
||
2521 | " vec4 weight2 = (theta - theta1) * weight; \n"
|
||
2522 | " vec4 weight1 = weight - weight2; \n"
|
||
2523 | " for(int i = 0;i < 4; i++)\n"
|
||
2524 | " {\n"
|
||
2525 | " if(inside[i])\n"
|
||
2526 | " {\n"
|
||
2527 | " DA += vec4(equal(vec4(theta1[i]), vec4(0, 1, 2, 3)))*weight1[i]; \n"
|
||
2528 | " DA += vec4(equal(vec4(theta1[i]), vec4(7, 0, 1, 2)))*weight2[i]; \n"
|
||
2529 | " DB += vec4(equal(vec4(theta1[i]), vec4(4, 5, 6, 7)))*weight1[i]; \n"
|
||
2530 | " DB += vec4(equal(vec4(theta1[i]), vec4(3, 4, 5, 6)))*weight2[i]; \n"
|
||
2531 | " }\n"
|
||
2532 | " }\n"
|
||
2533 | " }\n"
|
||
2534 | " }\n"
|
||
2535 | " }\n";
|
||
2536 | out<< |
||
2537 | " gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
|
||
2538 | "}\n"<<'\0'; |
||
2539 | |||
2540 | ProgramGLSL * program = new ProgramGLSL(buffer);
|
||
2541 | if(program->IsNative())
|
||
2542 | { |
||
2543 | return program;
|
||
2544 | } |
||
2545 | else
|
||
2546 | { |
||
2547 | delete program;
|
||
2548 | return NULL; |
||
2549 | } |
||
2550 | } |
||
2551 | |||
2552 | void ShaderBagPKSL::LoadDescriptorShaderF2()
|
||
2553 | { |
||
2554 | |||
2555 | ProgramGLSL * program = LoadDescriptorProgramPKSL(); |
||
2556 | if( program )
|
||
2557 | { |
||
2558 | s_descriptor_fp = program; |
||
2559 | _param_descriptor_gtex = glGetUniformLocation(*program, "gtex");
|
||
2560 | _param_descriptor_otex = glGetUniformLocation(*program, "otex");
|
||
2561 | _param_descriptor_size = glGetUniformLocation(*program, "size");
|
||
2562 | _param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
|
||
2563 | } |
||
2564 | } |
||
2565 | |||
2566 | |||
2567 | |||
2568 | void ShaderBagPKSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step) |
||
2569 | { |
||
2570 | glUniform1i(_param_orientation_gtex, 1);
|
||
2571 | glUniform2f(_param_orientation_size, sigma, sigma_step); |
||
2572 | } |
||
2573 | |||
2574 | |||
2575 | void ShaderBagPKSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int otex, float step) |
||
2576 | { |
||
2577 | ///
|
||
2578 | glUniform1i(_param_orientation_gtex, 1);
|
||
2579 | glUniform1i(_param_orientation_otex, 2);
|
||
2580 | |||
2581 | float size[4]; |
||
2582 | size[0] = (float)width; |
||
2583 | size[1] = (float)height; |
||
2584 | size[2] = sigma;
|
||
2585 | size[3] = step;
|
||
2586 | glUniform4fv(_param_orientation_size, 1, size);
|
||
2587 | |||
2588 | } |
||
2589 | |||
2590 | void ShaderBagPKSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth, float width, float height, float sigma) |
||
2591 | { |
||
2592 | if(sigma == 0 && s_rect_description) |
||
2593 | { |
||
2594 | //rectangle description mode
|
||
2595 | s_rect_description->UseProgram(); |
||
2596 | GLint param_descriptor_gtex = glGetUniformLocation(*s_rect_description, "gtex");
|
||
2597 | GLint param_descriptor_otex = glGetUniformLocation(*s_rect_description, "otex");
|
||
2598 | GLint param_descriptor_size = glGetUniformLocation(*s_rect_description, "size");
|
||
2599 | GLint param_descriptor_dsize = glGetUniformLocation(*s_rect_description, "dsize");
|
||
2600 | ///
|
||
2601 | glUniform1i(param_descriptor_gtex, 1);
|
||
2602 | glUniform1i(param_descriptor_otex, 2);
|
||
2603 | |||
2604 | float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; |
||
2605 | glUniform4fv(param_descriptor_dsize, 1, dsize);
|
||
2606 | float size[3]; |
||
2607 | size[0] = width;
|
||
2608 | size[1] = height;
|
||
2609 | size[2] = GlobalUtil::_DescriptorWindowFactor;
|
||
2610 | glUniform3fv(param_descriptor_size, 1, size);
|
||
2611 | }else
|
||
2612 | { |
||
2613 | ///
|
||
2614 | glUniform1i(_param_descriptor_gtex, 1);
|
||
2615 | glUniform1i(_param_descriptor_otex, 2);
|
||
2616 | |||
2617 | |||
2618 | float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth}; |
||
2619 | glUniform4fv(_param_descriptor_dsize, 1, dsize);
|
||
2620 | float size[3]; |
||
2621 | size[0] = width;
|
||
2622 | size[1] = height;
|
||
2623 | size[2] = GlobalUtil::_DescriptorWindowFactor;
|
||
2624 | glUniform3fv(_param_descriptor_size, 1, size);
|
||
2625 | } |
||
2626 | |||
2627 | } |
||
2628 | |||
2629 | |||
2630 | void ShaderBagPKSL::SetGenListEndParam(int ktex) |
||
2631 | { |
||
2632 | glUniform1i(_param_genlist_end_ktex, 1);
|
||
2633 | } |
||
2634 | void ShaderBagPKSL::SetGenListInitParam(int w, int h) |
||
2635 | { |
||
2636 | float bbox[4] = {(w -1.0f) * 0.5f +0.25f, (w-1.0f) * 0.5f - 0.25f, (h - 1.0f) * 0.5f + 0.25f, (h-1.0f) * 0.5f - 0.25f}; |
||
2637 | glUniform4fv(_param_genlist_init_bbox, 1, bbox);
|
||
2638 | } |
||
2639 | |||
2640 | void ShaderBagPKSL::SetMarginCopyParam(int xmax, int ymax) |
||
2641 | { |
||
2642 | float truncate[4]; |
||
2643 | truncate[0] = (xmax - 0.5f) * 0.5f; //((xmax + 1) >> 1) - 0.5f; |
||
2644 | truncate[1] = (ymax - 0.5f) * 0.5f; //((ymax + 1) >> 1) - 0.5f; |
||
2645 | truncate[2] = (xmax %2 == 1)? 0.0f: 1.0f; |
||
2646 | truncate[3] = truncate[2] + (((ymax % 2) == 1)? 0.0f : 2.0f); |
||
2647 | glUniform4fv(_param_margin_copy_truncate, 1, truncate);
|
||
2648 | } |