Showing
11 changed files
with
1261 additions
and
0 deletions
Makefile
0 → 100644
README.md
0 → 100644
1 | +# OpenCL Howto | |
2 | + | |
3 | +Code snippets taken from | |
4 | +[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo) | |
5 | + | |
6 | +## Description | |
7 | + | |
8 | +This is just some more playing around with OpenCL and try to learn a bit about | |
9 | +it. | |
10 | + | |
11 | +## Requirements | |
12 | + | |
13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | |
14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | |
15 | +Integrated Graphics Controller (rev 09)) with the | |
16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | |
17 | +open source library. | |
18 | + | |
19 | +## License | |
20 | + | |
21 | +MIT License | |
22 | + | |
23 | +> Permission is hereby granted, free of charge, to any person obtaining a copy | |
24 | +> of this software and associated documentation files (the "Software"), to | |
25 | +> deal in the Software without restriction, including without limitation the | |
26 | +> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
27 | +> sell copies of the Software, and to permit persons to whom the Software is | |
28 | +> furnished to do so, subject to the following conditions: | |
29 | +> | |
30 | +> The above copyright notice and this permission notice shall be included in | |
31 | +> all copies or substantial portions of the Software. | |
32 | +> | |
33 | +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
34 | +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
35 | +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
36 | +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
37 | +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
38 | +> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
39 | +> IN THE SOFTWARE. | ... | ... |
cl-demo.c
0 → 100644
1 | +#include "timing.h" | |
2 | +#include "cl-helper.h" | |
3 | + | |
4 | + | |
5 | + | |
6 | + | |
7 | +int main(int argc, char **argv) | |
8 | +{ | |
9 | + if (argc != 3) | |
10 | + { | |
11 | + fprintf(stderr, "need two arguments!\n"); | |
12 | + abort(); | |
13 | + } | |
14 | + | |
15 | + const cl_long n = atol(argv[1]); | |
16 | + const int ntrips = atoi(argv[2]); | |
17 | + | |
18 | + cl_context ctx; | |
19 | + cl_command_queue queue; | |
20 | + create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); | |
21 | + | |
22 | + print_device_info_from_queue(queue); | |
23 | + | |
24 | + // -------------------------------------------------------------------------- | |
25 | + // load kernels | |
26 | + // -------------------------------------------------------------------------- | |
27 | + char *knl_text = read_file("vec-add-soln.cl"); | |
28 | + cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); | |
29 | + free(knl_text); | |
30 | + | |
31 | + // -------------------------------------------------------------------------- | |
32 | + // allocate and initialize CPU memory | |
33 | + // -------------------------------------------------------------------------- | |
34 | + float *a = (float *) malloc(sizeof(float) * n); | |
35 | + if (!a) { perror("alloc x"); abort(); } | |
36 | + float *b = (float *) malloc(sizeof(float) * n); | |
37 | + if (!b) { perror("alloc y"); abort(); } | |
38 | + float *c = (float *) malloc(sizeof(float) * n); | |
39 | + if (!c) { perror("alloc z"); abort(); } | |
40 | + | |
41 | + for (size_t i = 0; i < n; ++i) | |
42 | + { | |
43 | + a[i] = i; | |
44 | + b[i] = 2*i; | |
45 | + } | |
46 | + | |
47 | + // -------------------------------------------------------------------------- | |
48 | + // allocate device memory | |
49 | + // -------------------------------------------------------------------------- | |
50 | + cl_int status; | |
51 | + cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
52 | + sizeof(float) * n, 0, &status); | |
53 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
54 | + | |
55 | + cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
56 | + sizeof(float) * n, 0, &status); | |
57 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
58 | + | |
59 | + cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
60 | + sizeof(float) * n, 0, &status); | |
61 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
62 | + | |
63 | + // -------------------------------------------------------------------------- | |
64 | + // transfer to device | |
65 | + // -------------------------------------------------------------------------- | |
66 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | |
67 | + queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
68 | + n * sizeof(float), a, | |
69 | + 0, NULL, NULL)); | |
70 | + | |
71 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | |
72 | + queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
73 | + n * sizeof(float), b, | |
74 | + 0, NULL, NULL)); | |
75 | + | |
76 | + // -------------------------------------------------------------------------- | |
77 | + // run code on device | |
78 | + // -------------------------------------------------------------------------- | |
79 | + | |
80 | + CALL_CL_GUARDED(clFinish, (queue)); | |
81 | + | |
82 | + timestamp_type time1, time2; | |
83 | + get_timestamp(&time1); | |
84 | + | |
85 | + for (int trip = 0; trip < ntrips; ++trip) | |
86 | + { | |
87 | + SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n); | |
88 | + size_t ldim[] = { 32 }; | |
89 | + size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] }; | |
90 | + CALL_CL_GUARDED(clEnqueueNDRangeKernel, | |
91 | + (queue, knl, | |
92 | + /*dimensions*/ 1, NULL, gdim, ldim, | |
93 | + 0, NULL, NULL)); | |
94 | + } | |
95 | + | |
96 | + CALL_CL_GUARDED(clFinish, (queue)); | |
97 | + | |
98 | + get_timestamp(&time2); | |
99 | + double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips; | |
100 | + printf("%f s\n", elapsed); | |
101 | + printf("%f GB/s\n", | |
102 | + 3*n*sizeof(float)/1e9/elapsed); | |
103 | + | |
104 | + // -------------------------------------------------------------------------- | |
105 | + // transfer back & check | |
106 | + // -------------------------------------------------------------------------- | |
107 | + CALL_CL_GUARDED(clEnqueueReadBuffer, ( | |
108 | + queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
109 | + n * sizeof(float), c, | |
110 | + 0, NULL, NULL)); | |
111 | + | |
112 | + for (size_t i = 0; i < n; ++i) | |
113 | + if (c[i] != 3*i) | |
114 | + { | |
115 | + printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i); | |
116 | + abort(); | |
117 | + } | |
118 | + puts("GOOD"); | |
119 | + | |
120 | + // -------------------------------------------------------------------------- | |
121 | + // clean up | |
122 | + // -------------------------------------------------------------------------- | |
123 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); | |
124 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); | |
125 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); | |
126 | + CALL_CL_GUARDED(clReleaseKernel, (knl)); | |
127 | + CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); | |
128 | + CALL_CL_GUARDED(clReleaseContext, (ctx)); | |
129 | + | |
130 | + return 0; | |
131 | +} | ... | ... |
cl-helper.c
0 → 100644
1 | +/* | |
2 | + * Copyright (c) 2010 Andreas Kloeckner | |
3 | + * | |
4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
5 | + * of this software and associated documentation files (the "Software"), to deal | |
6 | + * in the Software without restriction, including without limitation the rights | |
7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
8 | + * copies of the Software, and to permit persons to whom the Software is | |
9 | + * furnished to do so, subject to the following conditions: | |
10 | + * | |
11 | + * The above copyright notice and this permission notice shall be included in | |
12 | + * all copies or substantial portions of the Software. | |
13 | + * | |
14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
20 | + * THE SOFTWARE. | |
21 | + */ | |
22 | + | |
23 | + | |
24 | + | |
25 | + | |
26 | +#include "cl-helper.h" | |
27 | +#include <string.h> | |
28 | +#include <stdbool.h> | |
29 | + | |
30 | + | |
31 | + | |
32 | + | |
33 | +#define MAX_NAME_LEN 1000 | |
34 | + | |
35 | + | |
36 | + | |
37 | + | |
38 | +const char *cl_error_to_str(cl_int e) | |
39 | +{ | |
40 | + switch (e) | |
41 | + { | |
42 | + case CL_SUCCESS: return "success"; | |
43 | + case CL_DEVICE_NOT_FOUND: return "device not found"; | |
44 | + case CL_DEVICE_NOT_AVAILABLE: return "device not available"; | |
45 | +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) | |
46 | + case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available"; | |
47 | +#endif | |
48 | + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure"; | |
49 | + case CL_OUT_OF_RESOURCES: return "out of resources"; | |
50 | + case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; | |
51 | + case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available"; | |
52 | + case CL_MEM_COPY_OVERLAP: return "mem copy overlap"; | |
53 | + case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch"; | |
54 | + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported"; | |
55 | + case CL_BUILD_PROGRAM_FAILURE: return "build program failure"; | |
56 | + case CL_MAP_FAILURE: return "map failure"; | |
57 | + | |
58 | + case CL_INVALID_VALUE: return "invalid value"; | |
59 | + case CL_INVALID_DEVICE_TYPE: return "invalid device type"; | |
60 | + case CL_INVALID_PLATFORM: return "invalid platform"; | |
61 | + case CL_INVALID_DEVICE: return "invalid device"; | |
62 | + case CL_INVALID_CONTEXT: return "invalid context"; | |
63 | + case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties"; | |
64 | + case CL_INVALID_COMMAND_QUEUE: return "invalid command queue"; | |
65 | + case CL_INVALID_HOST_PTR: return "invalid host ptr"; | |
66 | + case CL_INVALID_MEM_OBJECT: return "invalid mem object"; | |
67 | + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor"; | |
68 | + case CL_INVALID_IMAGE_SIZE: return "invalid image size"; | |
69 | + case CL_INVALID_SAMPLER: return "invalid sampler"; | |
70 | + case CL_INVALID_BINARY: return "invalid binary"; | |
71 | + case CL_INVALID_BUILD_OPTIONS: return "invalid build options"; | |
72 | + case CL_INVALID_PROGRAM: return "invalid program"; | |
73 | + case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable"; | |
74 | + case CL_INVALID_KERNEL_NAME: return "invalid kernel name"; | |
75 | + case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition"; | |
76 | + case CL_INVALID_KERNEL: return "invalid kernel"; | |
77 | + case CL_INVALID_ARG_INDEX: return "invalid arg index"; | |
78 | + case CL_INVALID_ARG_VALUE: return "invalid arg value"; | |
79 | + case CL_INVALID_ARG_SIZE: return "invalid arg size"; | |
80 | + case CL_INVALID_KERNEL_ARGS: return "invalid kernel args"; | |
81 | + case CL_INVALID_WORK_DIMENSION: return "invalid work dimension"; | |
82 | + case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; | |
83 | + case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size"; | |
84 | + case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset"; | |
85 | + case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list"; | |
86 | + case CL_INVALID_EVENT: return "invalid event"; | |
87 | + case CL_INVALID_OPERATION: return "invalid operation"; | |
88 | + case CL_INVALID_GL_OBJECT: return "invalid gl object"; | |
89 | + case CL_INVALID_BUFFER_SIZE: return "invalid buffer size"; | |
90 | + case CL_INVALID_MIP_LEVEL: return "invalid mip level"; | |
91 | + | |
92 | +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) | |
93 | + case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number"; | |
94 | +#endif | |
95 | + | |
96 | +#ifdef CL_VERSION_1_1 | |
97 | + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset"; | |
98 | + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list"; | |
99 | + case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size"; | |
100 | +#endif | |
101 | + | |
102 | + default: return "invalid/unknown error code"; | |
103 | + } | |
104 | +} | |
105 | + | |
106 | + | |
107 | + | |
108 | + | |
109 | +void print_platforms_devices() | |
110 | +{ | |
111 | + // get number of platforms | |
112 | + cl_uint plat_count; | |
113 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | |
114 | + | |
115 | + // allocate memory, get list of platforms | |
116 | + cl_platform_id *platforms = | |
117 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | |
118 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | |
119 | + | |
120 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | |
121 | + | |
122 | + // iterate over platforms | |
123 | + for (cl_uint i = 0; i < plat_count; ++i) | |
124 | + { | |
125 | + // get platform vendor name | |
126 | + char buf[MAX_NAME_LEN]; | |
127 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
128 | + sizeof(buf), buf, NULL)); | |
129 | + printf("platform %d: vendor '%s'\n", i, buf); | |
130 | + | |
131 | + // get number of devices in platform | |
132 | + cl_uint dev_count; | |
133 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
134 | + 0, NULL, &dev_count)); | |
135 | + | |
136 | + cl_device_id *devices = | |
137 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | |
138 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | |
139 | + | |
140 | + // get list of devices in platform | |
141 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
142 | + dev_count, devices, NULL)); | |
143 | + | |
144 | + // iterate over devices | |
145 | + for (cl_uint j = 0; j < dev_count; ++j) | |
146 | + { | |
147 | + char buf[MAX_NAME_LEN]; | |
148 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
149 | + sizeof(buf), buf, NULL)); | |
150 | + printf(" device %d: '%s'\n", j, buf); | |
151 | + } | |
152 | + | |
153 | + free(devices); | |
154 | + } | |
155 | + | |
156 | + free(platforms); | |
157 | +} | |
158 | + | |
159 | + | |
160 | + | |
161 | + | |
162 | +/* Read a line from stdin. C makes things simple. :) | |
163 | + * From http://stackoverflow.com/a/314422/1148634 | |
164 | + */ | |
165 | +char *read_a_line(void) | |
166 | +{ | |
167 | + char * line = (char *) malloc(MAX_NAME_LEN), * linep = line; | |
168 | + size_t lenmax = MAX_NAME_LEN, len = lenmax; | |
169 | + int c; | |
170 | + | |
171 | + if(line == NULL) | |
172 | + return NULL; | |
173 | + | |
174 | + for(;;) | |
175 | + { | |
176 | + c = fgetc(stdin); | |
177 | + if(c == EOF) | |
178 | + break; | |
179 | + | |
180 | + if(--len == 0) | |
181 | + { | |
182 | + char *linen = (char *) realloc(linep, lenmax *= 2); | |
183 | + len = lenmax; | |
184 | + | |
185 | + if(linen == NULL) | |
186 | + { | |
187 | + free(linep); | |
188 | + return NULL; | |
189 | + } | |
190 | + line = linen + (line - linep); | |
191 | + linep = linen; | |
192 | + } | |
193 | + | |
194 | + if((*line++ = c) == '\n') | |
195 | + break; | |
196 | + } | |
197 | + *line = '\0'; | |
198 | + return linep; | |
199 | +} | |
200 | + | |
201 | + | |
202 | + | |
203 | + | |
204 | +const char *CHOOSE_INTERACTIVELY = "INTERACTIVE"; | |
205 | + | |
206 | + | |
207 | +#define MIN(a,b) (((a)<(b))?(a):(b)) | |
208 | +#define MAX(a,b) (((a)>(b))?(a):(b)) | |
209 | + | |
210 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, | |
211 | + cl_context *ctx, cl_command_queue *queue, int enable_profiling) | |
212 | +{ | |
213 | + char dev_sel_buf[MAX_NAME_LEN]; | |
214 | + char platform_sel_buf[MAX_NAME_LEN]; | |
215 | + | |
216 | + // get number of platforms | |
217 | + cl_uint plat_count; | |
218 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | |
219 | + | |
220 | + // allocate memory, get list of platform handles | |
221 | + cl_platform_id *platforms = | |
222 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | |
223 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | |
224 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | |
225 | + | |
226 | + // print menu, if requested | |
227 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | |
228 | + if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | |
229 | +#endif | |
230 | + { | |
231 | + puts("Choose platform:"); | |
232 | + for (cl_uint i = 0; i < plat_count; ++i) | |
233 | + { | |
234 | + char buf[MAX_NAME_LEN]; | |
235 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
236 | + sizeof(buf), buf, NULL)); | |
237 | + printf("[%d] %s\n", i, buf); | |
238 | + } | |
239 | + | |
240 | + printf("Enter choice: "); | |
241 | + fflush(stdout); | |
242 | + | |
243 | + char *sel = read_a_line(); | |
244 | + if (!sel) | |
245 | + { | |
246 | + fprintf(stderr, "error reading line from stdin"); | |
247 | + abort(); | |
248 | + } | |
249 | + | |
250 | + int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); | |
251 | + free(sel); | |
252 | + | |
253 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, | |
254 | + sizeof(platform_sel_buf), platform_sel_buf, NULL)); | |
255 | + plat_name = platform_sel_buf; | |
256 | + } | |
257 | + | |
258 | + // iterate over platforms | |
259 | + for (cl_uint i = 0; i < plat_count; ++i) | |
260 | + { | |
261 | + // get platform name | |
262 | + char buf[MAX_NAME_LEN]; | |
263 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
264 | + sizeof(buf), buf, NULL)); | |
265 | + | |
266 | + // does it match? | |
267 | + if (!plat_name || strstr(buf, plat_name)) | |
268 | + { | |
269 | + // get number of devices in platform | |
270 | + cl_uint dev_count; | |
271 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
272 | + 0, NULL, &dev_count)); | |
273 | + | |
274 | + // allocate memory, get list of device handles in platform | |
275 | + cl_device_id *devices = | |
276 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | |
277 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | |
278 | + | |
279 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
280 | + dev_count, devices, NULL)); | |
281 | + | |
282 | + // {{{ print device menu, if requested | |
283 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | |
284 | + if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | |
285 | +#endif | |
286 | + { | |
287 | + puts("Choose device:"); | |
288 | + for (cl_uint j = 0; j < dev_count; ++j) | |
289 | + { | |
290 | + char buf[MAX_NAME_LEN]; | |
291 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
292 | + sizeof(buf), buf, NULL)); | |
293 | + printf("[%d] %s\n", j, buf); | |
294 | + } | |
295 | + | |
296 | + printf("Enter choice: "); | |
297 | + fflush(stdout); | |
298 | + | |
299 | + char *sel = read_a_line(); | |
300 | + if (!sel) | |
301 | + { | |
302 | + fprintf(stderr, "error reading line from stdin"); | |
303 | + abort(); | |
304 | + } | |
305 | + | |
306 | + int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); | |
307 | + free(sel); | |
308 | + | |
309 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, | |
310 | + sizeof(dev_sel_buf), dev_sel_buf, NULL)); | |
311 | + dev_name = dev_sel_buf; | |
312 | + } | |
313 | + | |
314 | + // }}} | |
315 | + | |
316 | + // iterate over devices | |
317 | + for (cl_uint j = 0; j < dev_count; ++j) | |
318 | + { | |
319 | + // get device name | |
320 | + char buf[MAX_NAME_LEN]; | |
321 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
322 | + sizeof(buf), buf, NULL)); | |
323 | + | |
324 | + // does it match? | |
325 | + if (!dev_name || strstr(buf, dev_name)) | |
326 | + { | |
327 | + if (idx == 0) | |
328 | + { | |
329 | + cl_platform_id plat = platforms[i]; | |
330 | + cl_device_id dev = devices[j]; | |
331 | + | |
332 | + free(devices); | |
333 | + free(platforms); | |
334 | + | |
335 | + // create a context | |
336 | + cl_context_properties cps[3] = { | |
337 | + CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; | |
338 | + | |
339 | + cl_int status; | |
340 | + *ctx = clCreateContext( | |
341 | + cps, 1, &dev, NULL, NULL, &status); | |
342 | + CHECK_CL_ERROR(status, "clCreateContext"); | |
343 | + | |
344 | + // create a command queue | |
345 | + cl_command_queue_properties qprops = 0; | |
346 | + if (enable_profiling) | |
347 | + qprops |= CL_QUEUE_PROFILING_ENABLE; | |
348 | + | |
349 | + if (queue) | |
350 | + { | |
351 | + *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); | |
352 | + CHECK_CL_ERROR(status, "clCreateCommandQueue"); | |
353 | + } | |
354 | + | |
355 | + return; | |
356 | + } | |
357 | + else | |
358 | + --idx; | |
359 | + } | |
360 | + } | |
361 | + | |
362 | + free(devices); | |
363 | + } | |
364 | + } | |
365 | + | |
366 | + free(platforms); | |
367 | + | |
368 | + fputs("create_context_on: specified device not found.\n", stderr); | |
369 | + abort(); | |
370 | +} | |
371 | + | |
372 | + | |
373 | + | |
374 | + | |
375 | +char *read_file(const char *filename) | |
376 | +{ | |
377 | + FILE *f = fopen(filename, "r"); | |
378 | + CHECK_SYS_ERROR(!f, "read_file: opening file"); | |
379 | + | |
380 | + // figure out file size | |
381 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); | |
382 | + size_t size = ftell(f); | |
383 | + | |
384 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, | |
385 | + "read_file: seeking to start"); | |
386 | + | |
387 | + // allocate memory, slurp in entire file | |
388 | + char *result = (char *) malloc(size+1); | |
389 | + CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); | |
390 | + CHECK_SYS_ERROR(fread(result, 1, size, f) < size, | |
391 | + "read_file: reading file contents"); | |
392 | + | |
393 | + // close, return | |
394 | + CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); | |
395 | + result[size] = '\0'; | |
396 | + | |
397 | + return result; | |
398 | +} | |
399 | + | |
400 | + | |
401 | + | |
402 | + | |
403 | +static int printed_compiler_output_message = 0; | |
404 | + | |
405 | +cl_kernel kernel_from_string(cl_context ctx, | |
406 | + char const *knl, char const *knl_name, char const *options) | |
407 | +{ | |
408 | + // create an OpenCL program (may have multiple kernels) | |
409 | + size_t sizes[] = { strlen(knl) }; | |
410 | + | |
411 | + if (options && strlen(options) == 0) | |
412 | + { | |
413 | + // reportedly, some implementations dislike empty strings. | |
414 | + options = NULL; | |
415 | + } | |
416 | + | |
417 | + cl_int status; | |
418 | + cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); | |
419 | + CHECK_CL_ERROR(status, "clCreateProgramWithSource"); | |
420 | + | |
421 | + // build it | |
422 | + status = clBuildProgram(program, 0, NULL, options, NULL, NULL); | |
423 | + | |
424 | + { | |
425 | + // get build log and print it | |
426 | + | |
427 | + cl_device_id dev; | |
428 | + CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, | |
429 | + sizeof(dev), &dev, NULL)); | |
430 | + | |
431 | + size_t log_size; | |
432 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | |
433 | + 0, NULL, &log_size)); | |
434 | + | |
435 | + bool do_print = status != CL_SUCCESS; | |
436 | + if (!do_print && log_size) | |
437 | + { | |
438 | + if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT")) | |
439 | + do_print = true; | |
440 | + else | |
441 | + { | |
442 | + if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG")) | |
443 | + { | |
444 | + fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n" | |
445 | + "*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n" | |
446 | + "*** NOTE: this may include compiler warnings and other important messages\n" | |
447 | + "*** about your code.\n" | |
448 | + "*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n"); | |
449 | + printed_compiler_output_message = true; | |
450 | + } | |
451 | + } | |
452 | + } | |
453 | + | |
454 | + if (do_print) | |
455 | + { | |
456 | + char *log = (char *) malloc(log_size); | |
457 | + CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); | |
458 | + | |
459 | + char devname[MAX_NAME_LEN]; | |
460 | + CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, | |
461 | + sizeof(devname), devname, NULL)); | |
462 | + | |
463 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | |
464 | + log_size, log, NULL)); | |
465 | + fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n", | |
466 | + knl_name, devname, log); | |
467 | + } | |
468 | + } | |
469 | + | |
470 | + CHECK_CL_ERROR(status, "clBuildProgram"); | |
471 | + | |
472 | + // fish the kernel out of the program | |
473 | + cl_kernel kernel = clCreateKernel(program, knl_name, &status); | |
474 | + CHECK_CL_ERROR(status, "clCreateKernel"); | |
475 | + | |
476 | + CALL_CL_GUARDED(clReleaseProgram, (program)); | |
477 | + | |
478 | + return kernel; | |
479 | +} | |
480 | + | |
481 | + | |
482 | + | |
483 | + | |
484 | +void print_device_info(cl_device_id device) | |
485 | +{ | |
486 | + // adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c | |
487 | + | |
488 | +#define LONG_PROPS \ | |
489 | + defn(VENDOR_ID), \ | |
490 | + defn(MAX_COMPUTE_UNITS), \ | |
491 | + defn(MAX_WORK_ITEM_DIMENSIONS), \ | |
492 | + defn(MAX_WORK_GROUP_SIZE), \ | |
493 | + defn(PREFERRED_VECTOR_WIDTH_CHAR), \ | |
494 | + defn(PREFERRED_VECTOR_WIDTH_SHORT), \ | |
495 | + defn(PREFERRED_VECTOR_WIDTH_INT), \ | |
496 | + defn(PREFERRED_VECTOR_WIDTH_LONG), \ | |
497 | + defn(PREFERRED_VECTOR_WIDTH_FLOAT), \ | |
498 | + defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \ | |
499 | + defn(MAX_CLOCK_FREQUENCY), \ | |
500 | + defn(ADDRESS_BITS), \ | |
501 | + defn(MAX_MEM_ALLOC_SIZE), \ | |
502 | + defn(IMAGE_SUPPORT), \ | |
503 | + defn(MAX_READ_IMAGE_ARGS), \ | |
504 | + defn(MAX_WRITE_IMAGE_ARGS), \ | |
505 | + defn(IMAGE2D_MAX_WIDTH), \ | |
506 | + defn(IMAGE2D_MAX_HEIGHT), \ | |
507 | + defn(IMAGE3D_MAX_WIDTH), \ | |
508 | + defn(IMAGE3D_MAX_HEIGHT), \ | |
509 | + defn(IMAGE3D_MAX_DEPTH), \ | |
510 | + defn(MAX_SAMPLERS), \ | |
511 | + defn(MAX_PARAMETER_SIZE), \ | |
512 | + defn(MEM_BASE_ADDR_ALIGN), \ | |
513 | + defn(MIN_DATA_TYPE_ALIGN_SIZE), \ | |
514 | + defn(GLOBAL_MEM_CACHELINE_SIZE), \ | |
515 | + defn(GLOBAL_MEM_CACHE_SIZE), \ | |
516 | + defn(GLOBAL_MEM_SIZE), \ | |
517 | + defn(MAX_CONSTANT_BUFFER_SIZE), \ | |
518 | + defn(MAX_CONSTANT_ARGS), \ | |
519 | + defn(LOCAL_MEM_SIZE), \ | |
520 | + defn(ERROR_CORRECTION_SUPPORT), \ | |
521 | + defn(PROFILING_TIMER_RESOLUTION), \ | |
522 | + defn(ENDIAN_LITTLE), \ | |
523 | + defn(AVAILABLE), \ | |
524 | + defn(COMPILER_AVAILABLE), | |
525 | + | |
526 | +#define STR_PROPS \ | |
527 | + defn(NAME), \ | |
528 | + defn(VENDOR), \ | |
529 | + defn(PROFILE), \ | |
530 | + defn(VERSION), \ | |
531 | + defn(EXTENSIONS), | |
532 | + | |
533 | +#define HEX_PROPS \ | |
534 | + defn(SINGLE_FP_CONFIG), \ | |
535 | + defn(QUEUE_PROPERTIES), | |
536 | + | |
537 | + | |
538 | + printf("---------------------------------------------------------------------\n"); | |
539 | + | |
540 | + | |
541 | + static struct { cl_device_info param; const char *name; } longProps[] = { | |
542 | +#define defn(X) { CL_DEVICE_##X, #X } | |
543 | + LONG_PROPS | |
544 | +#undef defn | |
545 | + { 0, NULL }, | |
546 | + }; | |
547 | + static struct { cl_device_info param; const char *name; } hexProps[] = { | |
548 | +#define defn(X) { CL_DEVICE_##X, #X } | |
549 | + HEX_PROPS | |
550 | +#undef defn | |
551 | + { 0, NULL }, | |
552 | + }; | |
553 | + static struct { cl_device_info param; const char *name; } strProps[] = { | |
554 | +#define defn(X) { CL_DEVICE_##X, #X } | |
555 | + STR_PROPS | |
556 | +#undef defn | |
557 | + { CL_DRIVER_VERSION, "DRIVER_VERSION" }, | |
558 | + { 0, NULL }, | |
559 | + }; | |
560 | + cl_int status; | |
561 | + size_t size; | |
562 | + char buf[65536]; | |
563 | + long long val; /* Avoids unpleasant surprises for some params */ | |
564 | + int ii; | |
565 | + | |
566 | + for (ii = 0; strProps[ii].name != NULL; ii++) | |
567 | + { | |
568 | + status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size); | |
569 | + if (status != CL_SUCCESS) | |
570 | + { | |
571 | + printf("Unable to get %s: %s!\n", | |
572 | + strProps[ii].name, cl_error_to_str(status)); | |
573 | + continue; | |
574 | + } | |
575 | + if (size > sizeof buf) | |
576 | + { | |
577 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
578 | + strProps[ii].name, size, sizeof buf); | |
579 | + } | |
580 | + printf("%s: %s\n", | |
581 | + strProps[ii].name, buf); | |
582 | + } | |
583 | + printf("\n"); | |
584 | + | |
585 | + status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL); | |
586 | + if (status == CL_SUCCESS) | |
587 | + { | |
588 | + printf("Type: "); | |
589 | + if (val & CL_DEVICE_TYPE_DEFAULT) | |
590 | + { | |
591 | + val &= ~CL_DEVICE_TYPE_DEFAULT; | |
592 | + printf("Default "); | |
593 | + } | |
594 | + if (val & CL_DEVICE_TYPE_CPU) | |
595 | + { | |
596 | + val &= ~CL_DEVICE_TYPE_CPU; | |
597 | + printf("CPU "); | |
598 | + } | |
599 | + if (val & CL_DEVICE_TYPE_GPU) | |
600 | + { | |
601 | + val &= ~CL_DEVICE_TYPE_GPU; | |
602 | + printf("GPU "); | |
603 | + } | |
604 | + if (val & CL_DEVICE_TYPE_ACCELERATOR) | |
605 | + { | |
606 | + val &= ~CL_DEVICE_TYPE_ACCELERATOR; | |
607 | + printf("Accelerator "); | |
608 | + } | |
609 | + if (val != 0) { | |
610 | + printf("Unknown (0x%llx) ", val); | |
611 | + } | |
612 | + printf("\n"); | |
613 | + } | |
614 | + else | |
615 | + { | |
616 | + printf("Unable to get TYPE: %s!\n", | |
617 | + cl_error_to_str(status)); | |
618 | + } | |
619 | + | |
620 | + status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, | |
621 | + sizeof val, &val, NULL); | |
622 | + if (status == CL_SUCCESS) | |
623 | + { | |
624 | + printf("EXECUTION_CAPABILITIES: "); | |
625 | + if (val & CL_EXEC_KERNEL) | |
626 | + { | |
627 | + val &= ~CL_EXEC_KERNEL; | |
628 | + printf("Kernel "); | |
629 | + } | |
630 | + if (val & CL_EXEC_NATIVE_KERNEL) | |
631 | + { | |
632 | + val &= ~CL_EXEC_NATIVE_KERNEL; | |
633 | + printf("Native "); | |
634 | + } | |
635 | + if (val) | |
636 | + printf("Unknown (0x%llx) ", val); | |
637 | + | |
638 | + printf("\n"); | |
639 | + } | |
640 | + else | |
641 | + { | |
642 | + printf("Unable to get EXECUTION_CAPABILITIES: %s!\n", | |
643 | + cl_error_to_str(status)); | |
644 | + } | |
645 | + | |
646 | + status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, | |
647 | + sizeof val, &val, NULL); | |
648 | + if (status == CL_SUCCESS) | |
649 | + { | |
650 | + static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" }; | |
651 | + static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0]; | |
652 | + | |
653 | + printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n", | |
654 | + val < numTypes ? cacheTypes[val] : "???", val); | |
655 | + } | |
656 | + else | |
657 | + { | |
658 | + printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n", | |
659 | + cl_error_to_str(status)); | |
660 | + } | |
661 | + | |
662 | + status = clGetDeviceInfo(device, | |
663 | + CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL); | |
664 | + | |
665 | + if (status == CL_SUCCESS) | |
666 | + { | |
667 | + static const char *lmemTypes[] = { "???", "Local", "Global" }; | |
668 | + static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0]; | |
669 | + | |
670 | + printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n", | |
671 | + val < numTypes ? lmemTypes[val] : "???", val); | |
672 | + } | |
673 | + else | |
674 | + { | |
675 | + printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n", | |
676 | + cl_error_to_str(status)); | |
677 | + } | |
678 | + | |
679 | + for (ii = 0; hexProps[ii].name != NULL; ii++) | |
680 | + { | |
681 | + status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size); | |
682 | + if (status != CL_SUCCESS) | |
683 | + { | |
684 | + printf("Unable to get %s: %s!\n", | |
685 | + hexProps[ii].name, cl_error_to_str(status)); | |
686 | + continue; | |
687 | + } | |
688 | + if (size > sizeof val) | |
689 | + { | |
690 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
691 | + hexProps[ii].name, size, sizeof val); | |
692 | + } | |
693 | + printf("%s: 0x%llx\n", hexProps[ii].name, val); | |
694 | + } | |
695 | + printf("\n"); | |
696 | + | |
697 | + for (ii = 0; longProps[ii].name != NULL; ii++) | |
698 | + { | |
699 | + status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size); | |
700 | + if (status != CL_SUCCESS) | |
701 | + { | |
702 | + printf("Unable to get %s: %s!\n", | |
703 | + longProps[ii].name, cl_error_to_str(status)); | |
704 | + continue; | |
705 | + } | |
706 | + if (size > sizeof val) | |
707 | + { | |
708 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
709 | + longProps[ii].name, size, sizeof val); | |
710 | + } | |
711 | + printf("%s: %lld\n", longProps[ii].name, val); | |
712 | + } | |
713 | + | |
714 | + { | |
715 | + size_t size; | |
716 | + CALL_CL_GUARDED(clGetDeviceInfo, | |
717 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size)); | |
718 | + | |
719 | + size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay! | |
720 | + | |
721 | + CALL_CL_GUARDED(clGetDeviceInfo, | |
722 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size)); | |
723 | + | |
724 | + printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie | |
725 | + for (size_t i = 0; i < size/sizeof(size_t); ++i) | |
726 | + printf("%zd ", res_vec[i]); | |
727 | + printf("\n"); | |
728 | + } | |
729 | + printf("---------------------------------------------------------------------\n"); | |
730 | +} | |
731 | + | |
732 | + | |
733 | + | |
734 | +void print_device_info_from_queue(cl_command_queue queue) | |
735 | +{ | |
736 | + cl_device_id dev; | |
737 | + CALL_CL_GUARDED(clGetCommandQueueInfo, | |
738 | + (queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); | |
739 | + | |
740 | + print_device_info(dev); | |
741 | +} | ... | ... |
cl-helper.h
0 → 100644
1 | +/* | |
2 | + * Copyright (c) 2010, 2012 Andreas Kloeckner | |
3 | + * | |
4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
5 | + * of this software and associated documentation files (the "Software"), to deal | |
6 | + * in the Software without restriction, including without limitation the rights | |
7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
8 | + * copies of the Software, and to permit persons to whom the Software is | |
9 | + * furnished to do so, subject to the following conditions: | |
10 | + * | |
11 | + * The above copyright notice and this permission notice shall be included in | |
12 | + * all copies or substantial portions of the Software. | |
13 | + * | |
14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
20 | + * THE SOFTWARE. | |
21 | + */ | |
22 | + | |
23 | + | |
24 | + | |
25 | + | |
26 | +#ifndef NYUHPC_CL_HELPER | |
27 | +#define NYUHPC_CL_HELPER | |
28 | + | |
29 | +#include <stdarg.h> | |
30 | +#include <stdio.h> | |
31 | +#include <stdlib.h> | |
32 | + | |
33 | +#ifdef __APPLE__ | |
34 | +#include <OpenCL/opencl.h> | |
35 | +#else | |
36 | +#include <CL/cl.h> | |
37 | +#endif | |
38 | + | |
39 | +/* An error check macro for OpenCL. | |
40 | + * | |
41 | + * Usage: | |
42 | + * CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name") | |
43 | + * | |
44 | + * It will abort with a message if an error occurred. | |
45 | + */ | |
46 | + | |
47 | +#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \ | |
48 | + if ((STATUS_CODE) != CL_SUCCESS) \ | |
49 | + { \ | |
50 | + fprintf(stderr, \ | |
51 | + "*** '%s' in '%s' on line %d failed with error '%s'.\n", \ | |
52 | + WHAT, __FILE__, __LINE__, \ | |
53 | + cl_error_to_str(STATUS_CODE)); \ | |
54 | + abort(); \ | |
55 | + } | |
56 | + | |
57 | +/* A more automated error check macro for OpenCL, for use with clXxxx | |
58 | + * functions that return status codes. (Not all of them do, notably | |
59 | + * clCreateXxx do not.) | |
60 | + * | |
61 | + * Usage: | |
62 | + * CALL_CL_GUARDED(clFunction, (arg1, arg2)); | |
63 | + * | |
64 | + * Note the slightly strange comma between the function name and the | |
65 | + * argument list. | |
66 | + */ | |
67 | + | |
68 | +#define CALL_CL_GUARDED(NAME, ARGLIST) \ | |
69 | + { \ | |
70 | + cl_int status_code; \ | |
71 | + status_code = NAME ARGLIST; \ | |
72 | + CHECK_CL_ERROR(status_code, #NAME); \ | |
73 | + } | |
74 | + | |
75 | +/* An error check macro for Unix system functions. If "COND" is true, then the | |
76 | + * last system error ("errno") is printed along with MSG, which is supposed to | |
77 | + * be a string describing what you were doing. | |
78 | + * | |
79 | + * Example: | |
80 | + * CHECK_SYS_ERROR(dave != 0, "opening hatch"); | |
81 | + */ | |
82 | +#define CHECK_SYS_ERROR(COND, MSG) \ | |
83 | + if (COND) \ | |
84 | + { \ | |
85 | + perror(MSG); \ | |
86 | + abort(); \ | |
87 | + } | |
88 | + | |
89 | +/* Return a string describing the OpenCL error code 'e'. | |
90 | + */ | |
91 | +const char *cl_error_to_str(cl_int e); | |
92 | + | |
93 | +/* Print a list of available OpenCL platforms and devices | |
94 | + * to standard output. | |
95 | + */ | |
96 | +void print_platforms_devices(); | |
97 | + | |
98 | +/* Create an OpenCL context and a matching command queue on a platform from a | |
99 | + * vendor whose name contains 'plat_name' on a device whose name contains | |
100 | + * 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no | |
101 | + * preference in the matter. | |
102 | + * | |
103 | + * If multiple devices match both 'plat_name' and 'dev_name', then 'idx' | |
104 | + * prescribes the number of the device that should be chosen. | |
105 | + * | |
106 | + * You may also use the special value CHOOSE_INTERACTIVELY to offer the user | |
107 | + * a choice. You should use this value for code you turn in. | |
108 | + * | |
109 | + * This function always succeeds. (If an error occurs, the program | |
110 | + * is aborted. | |
111 | + * | |
112 | + * You can force interactive querying by defining the | |
113 | + * CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c. | |
114 | + * You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE | |
115 | + * compiler option. | |
116 | + */ | |
117 | +extern const char *CHOOSE_INTERACTIVELY; | |
118 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint | |
119 | + idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling); | |
120 | + | |
121 | +/* Read contents of file 'filename'. | |
122 | + * Return as a new string. You must free the string when you're done with it. | |
123 | + * | |
124 | + * This function always succeeds. (If an error occurs, the program | |
125 | + * is aborted. | |
126 | + */ | |
127 | +char *read_file(const char *filename); | |
128 | + | |
129 | +/* Create a new OpenCL kernel from the code in the string 'knl'. | |
130 | + * 'knl_name' is the name of the kernel function, and 'options', | |
131 | + * if not NULL, is a string containing compiler flags. | |
132 | + * | |
133 | + * You must release the resulting kernel when you're done | |
134 | + * with it. | |
135 | + * | |
136 | + * This function always succeeds. (If an error occurs, the program | |
137 | + * is aborted. | |
138 | + */ | |
139 | +cl_kernel kernel_from_string(cl_context ctx, | |
140 | + char const *knl, char const *knl_name, char const *options); | |
141 | + | |
142 | +/* Print information about a device, found from either the | |
143 | + * queue or the device_id. | |
144 | + */ | |
145 | +void print_device_info(cl_device_id device); | |
146 | +void print_device_info_from_queue(cl_command_queue queue); | |
147 | + | |
148 | +#define SET_1_KERNEL_ARG(knl, arg0) \ | |
149 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); | |
150 | + | |
151 | +#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \ | |
152 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
153 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); | |
154 | + | |
155 | +#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \ | |
156 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
157 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
158 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); | |
159 | + | |
160 | +#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \ | |
161 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
162 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
163 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
164 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); | |
165 | + | |
166 | +#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \ | |
167 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
168 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
169 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
170 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
171 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); | |
172 | + | |
173 | +#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \ | |
174 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
175 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
176 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
177 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
178 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
179 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); | |
180 | + | |
181 | +#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ | |
182 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
183 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
184 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
185 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
186 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
187 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
188 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); | |
189 | + | |
190 | +#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ | |
191 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
192 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
193 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
194 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
195 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
196 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
197 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
198 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); | |
199 | + | |
200 | +#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ | |
201 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
202 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
203 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
204 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
205 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
206 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
207 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
208 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
209 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); | |
210 | + | |
211 | +#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ | |
212 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
213 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
214 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
215 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
216 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
217 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
218 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
219 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
220 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
221 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); | |
222 | + | |
223 | +#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ | |
224 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
225 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
226 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
227 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
228 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
229 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
230 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
231 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
232 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
233 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | |
234 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); | |
235 | + | |
236 | +#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \ | |
237 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
238 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
239 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
240 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
241 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
242 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
243 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
244 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
245 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
246 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | |
247 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \ | |
248 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11)); | |
249 | + | |
250 | +#endif | ... | ... |
print-devices.c
0 → 100644
set-governor
0 → 100755
show-clock-freq
0 → 100755
timing.h
0 → 100644
1 | +#ifdef __APPLE__ | |
2 | + | |
3 | +#include <sys/time.h> | |
4 | + | |
5 | +typedef struct timeval timestamp_type; | |
6 | + | |
7 | +static void get_timestamp(timestamp_type *t) | |
8 | +{ | |
9 | + gettimeofday(t, NULL); | |
10 | +} | |
11 | + | |
12 | +static double timestamp_diff_in_seconds(timestamp_type start, | |
13 | +timestamp_type end) | |
14 | +{ | |
15 | + /* Perform the carry for the later subtraction by updating start. */ | |
16 | + if (end.tv_usec < start.tv_usec) { | |
17 | + int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1; | |
18 | + start.tv_usec -= 1000000 * nsec; | |
19 | + start.tv_sec += nsec; | |
20 | + } | |
21 | + if (end.tv_usec - start.tv_usec > 1000000) { | |
22 | + int nsec = (end.tv_usec - start.tv_usec) / 1000000; | |
23 | + start.tv_usec += 1000000 * nsec; | |
24 | + start.tv_sec -= nsec; | |
25 | + } | |
26 | + | |
27 | + return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6; | |
28 | +} | |
29 | + | |
30 | +#else | |
31 | + | |
32 | +#include <time.h> | |
33 | + | |
34 | +typedef struct timespec timestamp_type; | |
35 | + | |
36 | +static void get_timestamp(timestamp_type *t) | |
37 | +{ | |
38 | + clock_gettime(CLOCK_REALTIME, t); | |
39 | +} | |
40 | + | |
41 | +static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end) | |
42 | +{ | |
43 | + struct timespec temp; | |
44 | + if ((end.tv_nsec-start.tv_nsec)<0) { | |
45 | + temp.tv_sec = end.tv_sec-start.tv_sec-1; | |
46 | + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; | |
47 | + } else { | |
48 | + temp.tv_sec = end.tv_sec-start.tv_sec; | |
49 | + temp.tv_nsec = end.tv_nsec-start.tv_nsec; | |
50 | + } | |
51 | + return temp.tv_sec + 1e-9*temp.tv_nsec; | |
52 | +} | |
53 | + | |
54 | +#endif | ... | ... |
Please
register
or
login
to post a comment