root / rgbdslam / external / siftgpu / src / SiftGPU / CuTexImage.cpp @ 9240aaa3
History | View | Annotate | Download (6.32 KB)
1 | 9240aaa3 | Alex | ////////////////////////////////////////////////////////////////////////////
|
---|---|---|---|
2 | // File: CuTexImage.cpp
|
||
3 | // Author: Changchang Wu
|
||
4 | // Description : implementation of the CuTexImage class.
|
||
5 | //
|
||
6 | // Copyright (c) 2007 University of North Carolina at Chapel Hill
|
||
7 | // All Rights Reserved
|
||
8 | //
|
||
9 | // Permission to use, copy, modify and distribute this software and its
|
||
10 | // documentation for educational, research and non-profit purposes, without
|
||
11 | // fee, and without a written agreement is hereby granted, provided that the
|
||
12 | // above copyright notice and the following paragraph appear in all copies.
|
||
13 | //
|
||
14 | // The University of North Carolina at Chapel Hill make no representations
|
||
15 | // about the suitability of this software for any purpose. It is provided
|
||
16 | // 'as is' without express or implied warranty.
|
||
17 | //
|
||
18 | // Please send BUG REPORTS to ccwu@cs.unc.edu
|
||
19 | //
|
||
20 | ////////////////////////////////////////////////////////////////////////////
|
||
21 | |||
22 | #if defined(CUDA_SIFTGPU_ENABLED)
|
||
23 | |||
24 | #include "GL/glew.h" |
||
25 | #include <iostream> |
||
26 | #include <vector> |
||
27 | #include <algorithm> |
||
28 | #include <stdlib.h> |
||
29 | #include <math.h> |
||
30 | using namespace std; |
||
31 | |||
32 | #include <cuda.h> |
||
33 | #include <cuda_runtime_api.h> |
||
34 | #include <cuda_gl_interop.h> |
||
35 | |||
36 | #include "GlobalUtil.h" |
||
37 | #include "GLTexImage.h" |
||
38 | #include "CuTexImage.h" |
||
39 | #include "ProgramCU.h" |
||
40 | |||
41 | #if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D) |
||
42 | #error "Require CUDA 2.2 or higher" |
||
43 | #endif
|
||
44 | |||
45 | |||
46 | CuTexImage::CuTexImage() |
||
47 | { |
||
48 | _cuData = NULL;
|
||
49 | _cuData2D = NULL;
|
||
50 | _fromPBO = 0;
|
||
51 | _numChannel = _numBytes = 0;
|
||
52 | _imgWidth = _imgHeight = _texWidth = _texHeight = 0;
|
||
53 | } |
||
54 | |||
55 | CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo) |
||
56 | { |
||
57 | _cuData = NULL;
|
||
58 | |||
59 | //check size of pbo
|
||
60 | GLint bsize, esize = width * height * nchannel * sizeof(float); |
||
61 | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); |
||
62 | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
||
63 | if(bsize < esize)
|
||
64 | { |
||
65 | glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB);
|
||
66 | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
||
67 | } |
||
68 | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
|
||
69 | if(bsize >=esize)
|
||
70 | { |
||
71 | |||
72 | cudaGLRegisterBufferObject(pbo); |
||
73 | cudaGLMapBufferObject(&_cuData, pbo); |
||
74 | ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject");
|
||
75 | _fromPBO = pbo; |
||
76 | }else
|
||
77 | { |
||
78 | _cuData = NULL;
|
||
79 | _fromPBO = 0;
|
||
80 | } |
||
81 | if(_cuData)
|
||
82 | { |
||
83 | _numBytes = bsize; |
||
84 | _imgWidth = width; |
||
85 | _imgHeight = height; |
||
86 | _numChannel = nchannel; |
||
87 | }else
|
||
88 | { |
||
89 | _numBytes = 0;
|
||
90 | _imgWidth = 0;
|
||
91 | _imgHeight = 0;
|
||
92 | _numChannel = 0;
|
||
93 | } |
||
94 | |||
95 | _texWidth = _texHeight =0;
|
||
96 | |||
97 | _cuData2D = NULL;
|
||
98 | } |
||
99 | |||
100 | CuTexImage::~CuTexImage() |
||
101 | { |
||
102 | |||
103 | |||
104 | if(_fromPBO)
|
||
105 | { |
||
106 | cudaGLUnmapBufferObject(_fromPBO); |
||
107 | cudaGLUnregisterBufferObject(_fromPBO); |
||
108 | }else if(_cuData) |
||
109 | { |
||
110 | cudaFree(_cuData); |
||
111 | } |
||
112 | if(_cuData2D) cudaFreeArray(_cuData2D);
|
||
113 | } |
||
114 | |||
115 | void CuTexImage::SetImageSize(int width, int height) |
||
116 | { |
||
117 | _imgWidth = width; |
||
118 | _imgHeight = height; |
||
119 | } |
||
120 | |||
121 | void CuTexImage::InitTexture(int width, int height, int nchannel) |
||
122 | { |
||
123 | int size;
|
||
124 | _imgWidth = width; |
||
125 | _imgHeight = height; |
||
126 | _numChannel = min(max(nchannel, 1), 4); |
||
127 | |||
128 | size = width * height * _numChannel * sizeof(float); |
||
129 | |||
130 | if(size <= _numBytes) return; |
||
131 | |||
132 | if(_cuData) cudaFree(_cuData);
|
||
133 | |||
134 | //allocate the array data
|
||
135 | cudaMalloc(&_cuData, _numBytes = size); |
||
136 | |||
137 | #ifdef _DEBUG
|
||
138 | ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture");
|
||
139 | #endif
|
||
140 | } |
||
141 | |||
142 | void CuTexImage::CopyFromHost(const void * buf) |
||
143 | { |
||
144 | if(_cuData == NULL) return; |
||
145 | cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice); |
||
146 | } |
||
147 | |||
148 | void CuTexImage::CopyToHost(void * buf) |
||
149 | { |
||
150 | if(_cuData == NULL) return; |
||
151 | cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost); |
||
152 | } |
||
153 | |||
154 | void CuTexImage::CopyToHost(void * buf, int stream) |
||
155 | { |
||
156 | if(_cuData == NULL) return; |
||
157 | cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream); |
||
158 | } |
||
159 | |||
160 | void CuTexImage::InitTexture2D()
|
||
161 | { |
||
162 | #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
|
||
163 | if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight))
|
||
164 | { |
||
165 | cudaFreeArray(_cuData2D); |
||
166 | _cuData2D = NULL;
|
||
167 | } |
||
168 | |||
169 | if(_cuData2D == NULL) |
||
170 | { |
||
171 | _texWidth = max(_texWidth, _imgWidth); |
||
172 | _texHeight = max(_texHeight, _imgHeight); |
||
173 | cudaChannelFormatDesc desc; |
||
174 | desc.f = cudaChannelFormatKindFloat; |
||
175 | desc.x = sizeof(float) * 8; |
||
176 | desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0; |
||
177 | desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0; |
||
178 | desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0; |
||
179 | cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight); |
||
180 | ProgramCU::CheckErrorCUDA("cudaMallocArray");
|
||
181 | } |
||
182 | #endif
|
||
183 | } |
||
184 | |||
185 | void CuTexImage::CopyToTexture2D()
|
||
186 | { |
||
187 | #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D)
|
||
188 | InitTexture2D(); |
||
189 | |||
190 | if(_cuData2D)
|
||
191 | { |
||
192 | cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) , |
||
193 | _imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice); |
||
194 | ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray");
|
||
195 | } |
||
196 | #endif
|
||
197 | |||
198 | } |
||
199 | |||
200 | int CuTexImage::DebugCopyToTexture2D()
|
||
201 | { |
||
202 | |||
203 | /* CuTexImage tex;
|
||
204 | float data1[2][3] = {{1, 2, 5}, {3, 4, 5}}, data2[2][5];
|
||
205 | tex.InitTexture(3, 2, 1);
|
||
206 | cudaMemcpy(tex._cuData, data1[0], 6 * sizeof(float), cudaMemcpyHostToDevice);
|
||
207 | cudaMemcpy(data1, tex._cuData, 4 * sizeof(float) , cudaMemcpyDeviceToHost);
|
||
208 | tex._texWidth =5; tex._texHeight = 2;
|
||
209 | tex.CopyToTexture2D();
|
||
210 | cudaMemcpyFromArray(data2[0], tex._cuData2D, 0, 0, 10 * sizeof(float), cudaMemcpyDeviceToHost);*/
|
||
211 | |||
212 | return 1; |
||
213 | } |
||
214 | |||
215 | |||
216 | |||
217 | void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo) |
||
218 | { |
||
219 | void* pbuf =NULL; |
||
220 | GLint esize = width * height * sizeof(float); |
||
221 | cudaGLRegisterBufferObject(pbo); |
||
222 | cudaGLMapBufferObject(&pbuf, pbo); |
||
223 | |||
224 | cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice); |
||
225 | |||
226 | cudaGLUnmapBufferObject(pbo); |
||
227 | cudaGLUnregisterBufferObject(pbo); |
||
228 | } |
||
229 | |||
230 | int CuTexImage::CopyToPBO(GLuint pbo)
|
||
231 | { |
||
232 | void* pbuf =NULL; |
||
233 | GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel; |
||
234 | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); |
||
235 | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
||
236 | if(bsize < esize)
|
||
237 | { |
||
238 | glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB); |
||
239 | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
||
240 | } |
||
241 | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
|
||
242 | |||
243 | if(bsize >= esize)
|
||
244 | { |
||
245 | cudaGLRegisterBufferObject(pbo); |
||
246 | cudaGLMapBufferObject(&pbuf, pbo); |
||
247 | cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice); |
||
248 | cudaGLUnmapBufferObject(pbo); |
||
249 | cudaGLUnregisterBufferObject(pbo); |
||
250 | return 1; |
||
251 | }else
|
||
252 | { |
||
253 | return 0; |
||
254 | } |
||
255 | } |
||
256 | |||
257 | #endif
|