Showing
11 changed files
with
1261 additions
and
0 deletions
Makefile
0 → 100644
README.md
0 → 100644
1 | +# OpenCL Howto | ||
2 | + | ||
3 | +Code snippets taken from | ||
4 | +[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo) | ||
5 | + | ||
6 | +## Description | ||
7 | + | ||
8 | +This is just some more playing around with OpenCL and try to learn a bit about | ||
9 | +it. | ||
10 | + | ||
11 | +## Requirements | ||
12 | + | ||
13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | ||
14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | ||
15 | +Integrated Graphics Controller (rev 09)) with the | ||
16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | ||
17 | +open source library. | ||
18 | + | ||
19 | +## License | ||
20 | + | ||
21 | +MIT License | ||
22 | + | ||
23 | +> Permission is hereby granted, free of charge, to any person obtaining a copy | ||
24 | +> of this software and associated documentation files (the "Software"), to | ||
25 | +> deal in the Software without restriction, including without limitation the | ||
26 | +> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||
27 | +> sell copies of the Software, and to permit persons to whom the Software is | ||
28 | +> furnished to do so, subject to the following conditions: | ||
29 | +> | ||
30 | +> The above copyright notice and this permission notice shall be included in | ||
31 | +> all copies or substantial portions of the Software. | ||
32 | +> | ||
33 | +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
34 | +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
35 | +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
36 | +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
37 | +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
38 | +> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
39 | +> IN THE SOFTWARE. |
cl-demo.c
0 → 100644
1 | +#include "timing.h" | ||
2 | +#include "cl-helper.h" | ||
3 | + | ||
4 | + | ||
5 | + | ||
6 | + | ||
7 | +int main(int argc, char **argv) | ||
8 | +{ | ||
9 | + if (argc != 3) | ||
10 | + { | ||
11 | + fprintf(stderr, "need two arguments!\n"); | ||
12 | + abort(); | ||
13 | + } | ||
14 | + | ||
15 | + const cl_long n = atol(argv[1]); | ||
16 | + const int ntrips = atoi(argv[2]); | ||
17 | + | ||
18 | + cl_context ctx; | ||
19 | + cl_command_queue queue; | ||
20 | + create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); | ||
21 | + | ||
22 | + print_device_info_from_queue(queue); | ||
23 | + | ||
24 | + // -------------------------------------------------------------------------- | ||
25 | + // load kernels | ||
26 | + // -------------------------------------------------------------------------- | ||
27 | + char *knl_text = read_file("vec-add-soln.cl"); | ||
28 | + cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); | ||
29 | + free(knl_text); | ||
30 | + | ||
31 | + // -------------------------------------------------------------------------- | ||
32 | + // allocate and initialize CPU memory | ||
33 | + // -------------------------------------------------------------------------- | ||
34 | + float *a = (float *) malloc(sizeof(float) * n); | ||
35 | + if (!a) { perror("alloc x"); abort(); } | ||
36 | + float *b = (float *) malloc(sizeof(float) * n); | ||
37 | + if (!b) { perror("alloc y"); abort(); } | ||
38 | + float *c = (float *) malloc(sizeof(float) * n); | ||
39 | + if (!c) { perror("alloc z"); abort(); } | ||
40 | + | ||
41 | + for (size_t i = 0; i < n; ++i) | ||
42 | + { | ||
43 | + a[i] = i; | ||
44 | + b[i] = 2*i; | ||
45 | + } | ||
46 | + | ||
47 | + // -------------------------------------------------------------------------- | ||
48 | + // allocate device memory | ||
49 | + // -------------------------------------------------------------------------- | ||
50 | + cl_int status; | ||
51 | + cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | ||
52 | + sizeof(float) * n, 0, &status); | ||
53 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | ||
54 | + | ||
55 | + cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | ||
56 | + sizeof(float) * n, 0, &status); | ||
57 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | ||
58 | + | ||
59 | + cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | ||
60 | + sizeof(float) * n, 0, &status); | ||
61 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | ||
62 | + | ||
63 | + // -------------------------------------------------------------------------- | ||
64 | + // transfer to device | ||
65 | + // -------------------------------------------------------------------------- | ||
66 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | ||
67 | + queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, | ||
68 | + n * sizeof(float), a, | ||
69 | + 0, NULL, NULL)); | ||
70 | + | ||
71 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | ||
72 | + queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, | ||
73 | + n * sizeof(float), b, | ||
74 | + 0, NULL, NULL)); | ||
75 | + | ||
76 | + // -------------------------------------------------------------------------- | ||
77 | + // run code on device | ||
78 | + // -------------------------------------------------------------------------- | ||
79 | + | ||
80 | + CALL_CL_GUARDED(clFinish, (queue)); | ||
81 | + | ||
82 | + timestamp_type time1, time2; | ||
83 | + get_timestamp(&time1); | ||
84 | + | ||
85 | + for (int trip = 0; trip < ntrips; ++trip) | ||
86 | + { | ||
87 | + SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n); | ||
88 | + size_t ldim[] = { 32 }; | ||
89 | + size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] }; | ||
90 | + CALL_CL_GUARDED(clEnqueueNDRangeKernel, | ||
91 | + (queue, knl, | ||
92 | + /*dimensions*/ 1, NULL, gdim, ldim, | ||
93 | + 0, NULL, NULL)); | ||
94 | + } | ||
95 | + | ||
96 | + CALL_CL_GUARDED(clFinish, (queue)); | ||
97 | + | ||
98 | + get_timestamp(&time2); | ||
99 | + double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips; | ||
100 | + printf("%f s\n", elapsed); | ||
101 | + printf("%f GB/s\n", | ||
102 | + 3*n*sizeof(float)/1e9/elapsed); | ||
103 | + | ||
104 | + // -------------------------------------------------------------------------- | ||
105 | + // transfer back & check | ||
106 | + // -------------------------------------------------------------------------- | ||
107 | + CALL_CL_GUARDED(clEnqueueReadBuffer, ( | ||
108 | + queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, | ||
109 | + n * sizeof(float), c, | ||
110 | + 0, NULL, NULL)); | ||
111 | + | ||
112 | + for (size_t i = 0; i < n; ++i) | ||
113 | + if (c[i] != 3*i) | ||
114 | + { | ||
115 | + printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i); | ||
116 | + abort(); | ||
117 | + } | ||
118 | + puts("GOOD"); | ||
119 | + | ||
120 | + // -------------------------------------------------------------------------- | ||
121 | + // clean up | ||
122 | + // -------------------------------------------------------------------------- | ||
123 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); | ||
124 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); | ||
125 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); | ||
126 | + CALL_CL_GUARDED(clReleaseKernel, (knl)); | ||
127 | + CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); | ||
128 | + CALL_CL_GUARDED(clReleaseContext, (ctx)); | ||
129 | + | ||
130 | + return 0; | ||
131 | +} |
cl-helper.c
0 → 100644
1 | +/* | ||
2 | + * Copyright (c) 2010 Andreas Kloeckner | ||
3 | + * | ||
4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
5 | + * of this software and associated documentation files (the "Software"), to deal | ||
6 | + * in the Software without restriction, including without limitation the rights | ||
7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
8 | + * copies of the Software, and to permit persons to whom the Software is | ||
9 | + * furnished to do so, subject to the following conditions: | ||
10 | + * | ||
11 | + * The above copyright notice and this permission notice shall be included in | ||
12 | + * all copies or substantial portions of the Software. | ||
13 | + * | ||
14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
20 | + * THE SOFTWARE. | ||
21 | + */ | ||
22 | + | ||
23 | + | ||
24 | + | ||
25 | + | ||
26 | +#include "cl-helper.h" | ||
27 | +#include <string.h> | ||
28 | +#include <stdbool.h> | ||
29 | + | ||
30 | + | ||
31 | + | ||
32 | + | ||
33 | +#define MAX_NAME_LEN 1000 | ||
34 | + | ||
35 | + | ||
36 | + | ||
37 | + | ||
38 | +const char *cl_error_to_str(cl_int e) | ||
39 | +{ | ||
40 | + switch (e) | ||
41 | + { | ||
42 | + case CL_SUCCESS: return "success"; | ||
43 | + case CL_DEVICE_NOT_FOUND: return "device not found"; | ||
44 | + case CL_DEVICE_NOT_AVAILABLE: return "device not available"; | ||
45 | +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) | ||
46 | + case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available"; | ||
47 | +#endif | ||
48 | + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure"; | ||
49 | + case CL_OUT_OF_RESOURCES: return "out of resources"; | ||
50 | + case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; | ||
51 | + case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available"; | ||
52 | + case CL_MEM_COPY_OVERLAP: return "mem copy overlap"; | ||
53 | + case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch"; | ||
54 | + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported"; | ||
55 | + case CL_BUILD_PROGRAM_FAILURE: return "build program failure"; | ||
56 | + case CL_MAP_FAILURE: return "map failure"; | ||
57 | + | ||
58 | + case CL_INVALID_VALUE: return "invalid value"; | ||
59 | + case CL_INVALID_DEVICE_TYPE: return "invalid device type"; | ||
60 | + case CL_INVALID_PLATFORM: return "invalid platform"; | ||
61 | + case CL_INVALID_DEVICE: return "invalid device"; | ||
62 | + case CL_INVALID_CONTEXT: return "invalid context"; | ||
63 | + case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties"; | ||
64 | + case CL_INVALID_COMMAND_QUEUE: return "invalid command queue"; | ||
65 | + case CL_INVALID_HOST_PTR: return "invalid host ptr"; | ||
66 | + case CL_INVALID_MEM_OBJECT: return "invalid mem object"; | ||
67 | + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor"; | ||
68 | + case CL_INVALID_IMAGE_SIZE: return "invalid image size"; | ||
69 | + case CL_INVALID_SAMPLER: return "invalid sampler"; | ||
70 | + case CL_INVALID_BINARY: return "invalid binary"; | ||
71 | + case CL_INVALID_BUILD_OPTIONS: return "invalid build options"; | ||
72 | + case CL_INVALID_PROGRAM: return "invalid program"; | ||
73 | + case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable"; | ||
74 | + case CL_INVALID_KERNEL_NAME: return "invalid kernel name"; | ||
75 | + case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition"; | ||
76 | + case CL_INVALID_KERNEL: return "invalid kernel"; | ||
77 | + case CL_INVALID_ARG_INDEX: return "invalid arg index"; | ||
78 | + case CL_INVALID_ARG_VALUE: return "invalid arg value"; | ||
79 | + case CL_INVALID_ARG_SIZE: return "invalid arg size"; | ||
80 | + case CL_INVALID_KERNEL_ARGS: return "invalid kernel args"; | ||
81 | + case CL_INVALID_WORK_DIMENSION: return "invalid work dimension"; | ||
82 | + case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; | ||
83 | + case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size"; | ||
84 | + case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset"; | ||
85 | + case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list"; | ||
86 | + case CL_INVALID_EVENT: return "invalid event"; | ||
87 | + case CL_INVALID_OPERATION: return "invalid operation"; | ||
88 | + case CL_INVALID_GL_OBJECT: return "invalid gl object"; | ||
89 | + case CL_INVALID_BUFFER_SIZE: return "invalid buffer size"; | ||
90 | + case CL_INVALID_MIP_LEVEL: return "invalid mip level"; | ||
91 | + | ||
92 | +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) | ||
93 | + case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number"; | ||
94 | +#endif | ||
95 | + | ||
96 | +#ifdef CL_VERSION_1_1 | ||
97 | + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset"; | ||
98 | + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list"; | ||
99 | + case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size"; | ||
100 | +#endif | ||
101 | + | ||
102 | + default: return "invalid/unknown error code"; | ||
103 | + } | ||
104 | +} | ||
105 | + | ||
106 | + | ||
107 | + | ||
108 | + | ||
109 | +void print_platforms_devices() | ||
110 | +{ | ||
111 | + // get number of platforms | ||
112 | + cl_uint plat_count; | ||
113 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | ||
114 | + | ||
115 | + // allocate memory, get list of platforms | ||
116 | + cl_platform_id *platforms = | ||
117 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | ||
118 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | ||
119 | + | ||
120 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | ||
121 | + | ||
122 | + // iterate over platforms | ||
123 | + for (cl_uint i = 0; i < plat_count; ++i) | ||
124 | + { | ||
125 | + // get platform vendor name | ||
126 | + char buf[MAX_NAME_LEN]; | ||
127 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | ||
128 | + sizeof(buf), buf, NULL)); | ||
129 | + printf("platform %d: vendor '%s'\n", i, buf); | ||
130 | + | ||
131 | + // get number of devices in platform | ||
132 | + cl_uint dev_count; | ||
133 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | ||
134 | + 0, NULL, &dev_count)); | ||
135 | + | ||
136 | + cl_device_id *devices = | ||
137 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | ||
138 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | ||
139 | + | ||
140 | + // get list of devices in platform | ||
141 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | ||
142 | + dev_count, devices, NULL)); | ||
143 | + | ||
144 | + // iterate over devices | ||
145 | + for (cl_uint j = 0; j < dev_count; ++j) | ||
146 | + { | ||
147 | + char buf[MAX_NAME_LEN]; | ||
148 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | ||
149 | + sizeof(buf), buf, NULL)); | ||
150 | + printf(" device %d: '%s'\n", j, buf); | ||
151 | + } | ||
152 | + | ||
153 | + free(devices); | ||
154 | + } | ||
155 | + | ||
156 | + free(platforms); | ||
157 | +} | ||
158 | + | ||
159 | + | ||
160 | + | ||
161 | + | ||
162 | +/* Read a line from stdin. C makes things simple. :) | ||
163 | + * From http://stackoverflow.com/a/314422/1148634 | ||
164 | + */ | ||
165 | +char *read_a_line(void) | ||
166 | +{ | ||
167 | + char * line = (char *) malloc(MAX_NAME_LEN), * linep = line; | ||
168 | + size_t lenmax = MAX_NAME_LEN, len = lenmax; | ||
169 | + int c; | ||
170 | + | ||
171 | + if(line == NULL) | ||
172 | + return NULL; | ||
173 | + | ||
174 | + for(;;) | ||
175 | + { | ||
176 | + c = fgetc(stdin); | ||
177 | + if(c == EOF) | ||
178 | + break; | ||
179 | + | ||
180 | + if(--len == 0) | ||
181 | + { | ||
182 | + char *linen = (char *) realloc(linep, lenmax *= 2); | ||
183 | + len = lenmax; | ||
184 | + | ||
185 | + if(linen == NULL) | ||
186 | + { | ||
187 | + free(linep); | ||
188 | + return NULL; | ||
189 | + } | ||
190 | + line = linen + (line - linep); | ||
191 | + linep = linen; | ||
192 | + } | ||
193 | + | ||
194 | + if((*line++ = c) == '\n') | ||
195 | + break; | ||
196 | + } | ||
197 | + *line = '\0'; | ||
198 | + return linep; | ||
199 | +} | ||
200 | + | ||
201 | + | ||
202 | + | ||
203 | + | ||
204 | +const char *CHOOSE_INTERACTIVELY = "INTERACTIVE"; | ||
205 | + | ||
206 | + | ||
207 | +#define MIN(a,b) (((a)<(b))?(a):(b)) | ||
208 | +#define MAX(a,b) (((a)>(b))?(a):(b)) | ||
209 | + | ||
210 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, | ||
211 | + cl_context *ctx, cl_command_queue *queue, int enable_profiling) | ||
212 | +{ | ||
213 | + char dev_sel_buf[MAX_NAME_LEN]; | ||
214 | + char platform_sel_buf[MAX_NAME_LEN]; | ||
215 | + | ||
216 | + // get number of platforms | ||
217 | + cl_uint plat_count; | ||
218 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | ||
219 | + | ||
220 | + // allocate memory, get list of platform handles | ||
221 | + cl_platform_id *platforms = | ||
222 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | ||
223 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | ||
224 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | ||
225 | + | ||
226 | + // print menu, if requested | ||
227 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | ||
228 | + if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | ||
229 | +#endif | ||
230 | + { | ||
231 | + puts("Choose platform:"); | ||
232 | + for (cl_uint i = 0; i < plat_count; ++i) | ||
233 | + { | ||
234 | + char buf[MAX_NAME_LEN]; | ||
235 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | ||
236 | + sizeof(buf), buf, NULL)); | ||
237 | + printf("[%d] %s\n", i, buf); | ||
238 | + } | ||
239 | + | ||
240 | + printf("Enter choice: "); | ||
241 | + fflush(stdout); | ||
242 | + | ||
243 | + char *sel = read_a_line(); | ||
244 | + if (!sel) | ||
245 | + { | ||
246 | + fprintf(stderr, "error reading line from stdin"); | ||
247 | + abort(); | ||
248 | + } | ||
249 | + | ||
250 | + int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); | ||
251 | + free(sel); | ||
252 | + | ||
253 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, | ||
254 | + sizeof(platform_sel_buf), platform_sel_buf, NULL)); | ||
255 | + plat_name = platform_sel_buf; | ||
256 | + } | ||
257 | + | ||
258 | + // iterate over platforms | ||
259 | + for (cl_uint i = 0; i < plat_count; ++i) | ||
260 | + { | ||
261 | + // get platform name | ||
262 | + char buf[MAX_NAME_LEN]; | ||
263 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | ||
264 | + sizeof(buf), buf, NULL)); | ||
265 | + | ||
266 | + // does it match? | ||
267 | + if (!plat_name || strstr(buf, plat_name)) | ||
268 | + { | ||
269 | + // get number of devices in platform | ||
270 | + cl_uint dev_count; | ||
271 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | ||
272 | + 0, NULL, &dev_count)); | ||
273 | + | ||
274 | + // allocate memory, get list of device handles in platform | ||
275 | + cl_device_id *devices = | ||
276 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | ||
277 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | ||
278 | + | ||
279 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | ||
280 | + dev_count, devices, NULL)); | ||
281 | + | ||
282 | + // {{{ print device menu, if requested | ||
283 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | ||
284 | + if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | ||
285 | +#endif | ||
286 | + { | ||
287 | + puts("Choose device:"); | ||
288 | + for (cl_uint j = 0; j < dev_count; ++j) | ||
289 | + { | ||
290 | + char buf[MAX_NAME_LEN]; | ||
291 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | ||
292 | + sizeof(buf), buf, NULL)); | ||
293 | + printf("[%d] %s\n", j, buf); | ||
294 | + } | ||
295 | + | ||
296 | + printf("Enter choice: "); | ||
297 | + fflush(stdout); | ||
298 | + | ||
299 | + char *sel = read_a_line(); | ||
300 | + if (!sel) | ||
301 | + { | ||
302 | + fprintf(stderr, "error reading line from stdin"); | ||
303 | + abort(); | ||
304 | + } | ||
305 | + | ||
306 | + int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); | ||
307 | + free(sel); | ||
308 | + | ||
309 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, | ||
310 | + sizeof(dev_sel_buf), dev_sel_buf, NULL)); | ||
311 | + dev_name = dev_sel_buf; | ||
312 | + } | ||
313 | + | ||
314 | + // }}} | ||
315 | + | ||
316 | + // iterate over devices | ||
317 | + for (cl_uint j = 0; j < dev_count; ++j) | ||
318 | + { | ||
319 | + // get device name | ||
320 | + char buf[MAX_NAME_LEN]; | ||
321 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | ||
322 | + sizeof(buf), buf, NULL)); | ||
323 | + | ||
324 | + // does it match? | ||
325 | + if (!dev_name || strstr(buf, dev_name)) | ||
326 | + { | ||
327 | + if (idx == 0) | ||
328 | + { | ||
329 | + cl_platform_id plat = platforms[i]; | ||
330 | + cl_device_id dev = devices[j]; | ||
331 | + | ||
332 | + free(devices); | ||
333 | + free(platforms); | ||
334 | + | ||
335 | + // create a context | ||
336 | + cl_context_properties cps[3] = { | ||
337 | + CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; | ||
338 | + | ||
339 | + cl_int status; | ||
340 | + *ctx = clCreateContext( | ||
341 | + cps, 1, &dev, NULL, NULL, &status); | ||
342 | + CHECK_CL_ERROR(status, "clCreateContext"); | ||
343 | + | ||
344 | + // create a command queue | ||
345 | + cl_command_queue_properties qprops = 0; | ||
346 | + if (enable_profiling) | ||
347 | + qprops |= CL_QUEUE_PROFILING_ENABLE; | ||
348 | + | ||
349 | + if (queue) | ||
350 | + { | ||
351 | + *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); | ||
352 | + CHECK_CL_ERROR(status, "clCreateCommandQueue"); | ||
353 | + } | ||
354 | + | ||
355 | + return; | ||
356 | + } | ||
357 | + else | ||
358 | + --idx; | ||
359 | + } | ||
360 | + } | ||
361 | + | ||
362 | + free(devices); | ||
363 | + } | ||
364 | + } | ||
365 | + | ||
366 | + free(platforms); | ||
367 | + | ||
368 | + fputs("create_context_on: specified device not found.\n", stderr); | ||
369 | + abort(); | ||
370 | +} | ||
371 | + | ||
372 | + | ||
373 | + | ||
374 | + | ||
375 | +char *read_file(const char *filename) | ||
376 | +{ | ||
377 | + FILE *f = fopen(filename, "r"); | ||
378 | + CHECK_SYS_ERROR(!f, "read_file: opening file"); | ||
379 | + | ||
380 | + // figure out file size | ||
381 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); | ||
382 | + size_t size = ftell(f); | ||
383 | + | ||
384 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, | ||
385 | + "read_file: seeking to start"); | ||
386 | + | ||
387 | + // allocate memory, slurp in entire file | ||
388 | + char *result = (char *) malloc(size+1); | ||
389 | + CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); | ||
390 | + CHECK_SYS_ERROR(fread(result, 1, size, f) < size, | ||
391 | + "read_file: reading file contents"); | ||
392 | + | ||
393 | + // close, return | ||
394 | + CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); | ||
395 | + result[size] = '\0'; | ||
396 | + | ||
397 | + return result; | ||
398 | +} | ||
399 | + | ||
400 | + | ||
401 | + | ||
402 | + | ||
403 | +static int printed_compiler_output_message = 0; | ||
404 | + | ||
405 | +cl_kernel kernel_from_string(cl_context ctx, | ||
406 | + char const *knl, char const *knl_name, char const *options) | ||
407 | +{ | ||
408 | + // create an OpenCL program (may have multiple kernels) | ||
409 | + size_t sizes[] = { strlen(knl) }; | ||
410 | + | ||
411 | + if (options && strlen(options) == 0) | ||
412 | + { | ||
413 | + // reportedly, some implementations dislike empty strings. | ||
414 | + options = NULL; | ||
415 | + } | ||
416 | + | ||
417 | + cl_int status; | ||
418 | + cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); | ||
419 | + CHECK_CL_ERROR(status, "clCreateProgramWithSource"); | ||
420 | + | ||
421 | + // build it | ||
422 | + status = clBuildProgram(program, 0, NULL, options, NULL, NULL); | ||
423 | + | ||
424 | + { | ||
425 | + // get build log and print it | ||
426 | + | ||
427 | + cl_device_id dev; | ||
428 | + CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, | ||
429 | + sizeof(dev), &dev, NULL)); | ||
430 | + | ||
431 | + size_t log_size; | ||
432 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | ||
433 | + 0, NULL, &log_size)); | ||
434 | + | ||
435 | + bool do_print = status != CL_SUCCESS; | ||
436 | + if (!do_print && log_size) | ||
437 | + { | ||
438 | + if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT")) | ||
439 | + do_print = true; | ||
440 | + else | ||
441 | + { | ||
442 | + if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG")) | ||
443 | + { | ||
444 | + fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n" | ||
445 | + "*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n" | ||
446 | + "*** NOTE: this may include compiler warnings and other important messages\n" | ||
447 | + "*** about your code.\n" | ||
448 | + "*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n"); | ||
449 | + printed_compiler_output_message = true; | ||
450 | + } | ||
451 | + } | ||
452 | + } | ||
453 | + | ||
454 | + if (do_print) | ||
455 | + { | ||
456 | + char *log = (char *) malloc(log_size); | ||
457 | + CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); | ||
458 | + | ||
459 | + char devname[MAX_NAME_LEN]; | ||
460 | + CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, | ||
461 | + sizeof(devname), devname, NULL)); | ||
462 | + | ||
463 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | ||
464 | + log_size, log, NULL)); | ||
465 | + fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n", | ||
466 | + knl_name, devname, log); | ||
467 | + } | ||
468 | + } | ||
469 | + | ||
470 | + CHECK_CL_ERROR(status, "clBuildProgram"); | ||
471 | + | ||
472 | + // fish the kernel out of the program | ||
473 | + cl_kernel kernel = clCreateKernel(program, knl_name, &status); | ||
474 | + CHECK_CL_ERROR(status, "clCreateKernel"); | ||
475 | + | ||
476 | + CALL_CL_GUARDED(clReleaseProgram, (program)); | ||
477 | + | ||
478 | + return kernel; | ||
479 | +} | ||
480 | + | ||
481 | + | ||
482 | + | ||
483 | + | ||
484 | +void print_device_info(cl_device_id device) | ||
485 | +{ | ||
486 | + // adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c | ||
487 | + | ||
488 | +#define LONG_PROPS \ | ||
489 | + defn(VENDOR_ID), \ | ||
490 | + defn(MAX_COMPUTE_UNITS), \ | ||
491 | + defn(MAX_WORK_ITEM_DIMENSIONS), \ | ||
492 | + defn(MAX_WORK_GROUP_SIZE), \ | ||
493 | + defn(PREFERRED_VECTOR_WIDTH_CHAR), \ | ||
494 | + defn(PREFERRED_VECTOR_WIDTH_SHORT), \ | ||
495 | + defn(PREFERRED_VECTOR_WIDTH_INT), \ | ||
496 | + defn(PREFERRED_VECTOR_WIDTH_LONG), \ | ||
497 | + defn(PREFERRED_VECTOR_WIDTH_FLOAT), \ | ||
498 | + defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \ | ||
499 | + defn(MAX_CLOCK_FREQUENCY), \ | ||
500 | + defn(ADDRESS_BITS), \ | ||
501 | + defn(MAX_MEM_ALLOC_SIZE), \ | ||
502 | + defn(IMAGE_SUPPORT), \ | ||
503 | + defn(MAX_READ_IMAGE_ARGS), \ | ||
504 | + defn(MAX_WRITE_IMAGE_ARGS), \ | ||
505 | + defn(IMAGE2D_MAX_WIDTH), \ | ||
506 | + defn(IMAGE2D_MAX_HEIGHT), \ | ||
507 | + defn(IMAGE3D_MAX_WIDTH), \ | ||
508 | + defn(IMAGE3D_MAX_HEIGHT), \ | ||
509 | + defn(IMAGE3D_MAX_DEPTH), \ | ||
510 | + defn(MAX_SAMPLERS), \ | ||
511 | + defn(MAX_PARAMETER_SIZE), \ | ||
512 | + defn(MEM_BASE_ADDR_ALIGN), \ | ||
513 | + defn(MIN_DATA_TYPE_ALIGN_SIZE), \ | ||
514 | + defn(GLOBAL_MEM_CACHELINE_SIZE), \ | ||
515 | + defn(GLOBAL_MEM_CACHE_SIZE), \ | ||
516 | + defn(GLOBAL_MEM_SIZE), \ | ||
517 | + defn(MAX_CONSTANT_BUFFER_SIZE), \ | ||
518 | + defn(MAX_CONSTANT_ARGS), \ | ||
519 | + defn(LOCAL_MEM_SIZE), \ | ||
520 | + defn(ERROR_CORRECTION_SUPPORT), \ | ||
521 | + defn(PROFILING_TIMER_RESOLUTION), \ | ||
522 | + defn(ENDIAN_LITTLE), \ | ||
523 | + defn(AVAILABLE), \ | ||
524 | + defn(COMPILER_AVAILABLE), | ||
525 | + | ||
526 | +#define STR_PROPS \ | ||
527 | + defn(NAME), \ | ||
528 | + defn(VENDOR), \ | ||
529 | + defn(PROFILE), \ | ||
530 | + defn(VERSION), \ | ||
531 | + defn(EXTENSIONS), | ||
532 | + | ||
533 | +#define HEX_PROPS \ | ||
534 | + defn(SINGLE_FP_CONFIG), \ | ||
535 | + defn(QUEUE_PROPERTIES), | ||
536 | + | ||
537 | + | ||
538 | + printf("---------------------------------------------------------------------\n"); | ||
539 | + | ||
540 | + | ||
541 | + static struct { cl_device_info param; const char *name; } longProps[] = { | ||
542 | +#define defn(X) { CL_DEVICE_##X, #X } | ||
543 | + LONG_PROPS | ||
544 | +#undef defn | ||
545 | + { 0, NULL }, | ||
546 | + }; | ||
547 | + static struct { cl_device_info param; const char *name; } hexProps[] = { | ||
548 | +#define defn(X) { CL_DEVICE_##X, #X } | ||
549 | + HEX_PROPS | ||
550 | +#undef defn | ||
551 | + { 0, NULL }, | ||
552 | + }; | ||
553 | + static struct { cl_device_info param; const char *name; } strProps[] = { | ||
554 | +#define defn(X) { CL_DEVICE_##X, #X } | ||
555 | + STR_PROPS | ||
556 | +#undef defn | ||
557 | + { CL_DRIVER_VERSION, "DRIVER_VERSION" }, | ||
558 | + { 0, NULL }, | ||
559 | + }; | ||
560 | + cl_int status; | ||
561 | + size_t size; | ||
562 | + char buf[65536]; | ||
563 | + long long val; /* Avoids unpleasant surprises for some params */ | ||
564 | + int ii; | ||
565 | + | ||
566 | + for (ii = 0; strProps[ii].name != NULL; ii++) | ||
567 | + { | ||
568 | + status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size); | ||
569 | + if (status != CL_SUCCESS) | ||
570 | + { | ||
571 | + printf("Unable to get %s: %s!\n", | ||
572 | + strProps[ii].name, cl_error_to_str(status)); | ||
573 | + continue; | ||
574 | + } | ||
575 | + if (size > sizeof buf) | ||
576 | + { | ||
577 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | ||
578 | + strProps[ii].name, size, sizeof buf); | ||
579 | + } | ||
580 | + printf("%s: %s\n", | ||
581 | + strProps[ii].name, buf); | ||
582 | + } | ||
583 | + printf("\n"); | ||
584 | + | ||
585 | + status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL); | ||
586 | + if (status == CL_SUCCESS) | ||
587 | + { | ||
588 | + printf("Type: "); | ||
589 | + if (val & CL_DEVICE_TYPE_DEFAULT) | ||
590 | + { | ||
591 | + val &= ~CL_DEVICE_TYPE_DEFAULT; | ||
592 | + printf("Default "); | ||
593 | + } | ||
594 | + if (val & CL_DEVICE_TYPE_CPU) | ||
595 | + { | ||
596 | + val &= ~CL_DEVICE_TYPE_CPU; | ||
597 | + printf("CPU "); | ||
598 | + } | ||
599 | + if (val & CL_DEVICE_TYPE_GPU) | ||
600 | + { | ||
601 | + val &= ~CL_DEVICE_TYPE_GPU; | ||
602 | + printf("GPU "); | ||
603 | + } | ||
604 | + if (val & CL_DEVICE_TYPE_ACCELERATOR) | ||
605 | + { | ||
606 | + val &= ~CL_DEVICE_TYPE_ACCELERATOR; | ||
607 | + printf("Accelerator "); | ||
608 | + } | ||
609 | + if (val != 0) { | ||
610 | + printf("Unknown (0x%llx) ", val); | ||
611 | + } | ||
612 | + printf("\n"); | ||
613 | + } | ||
614 | + else | ||
615 | + { | ||
616 | + printf("Unable to get TYPE: %s!\n", | ||
617 | + cl_error_to_str(status)); | ||
618 | + } | ||
619 | + | ||
620 | + status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, | ||
621 | + sizeof val, &val, NULL); | ||
622 | + if (status == CL_SUCCESS) | ||
623 | + { | ||
624 | + printf("EXECUTION_CAPABILITIES: "); | ||
625 | + if (val & CL_EXEC_KERNEL) | ||
626 | + { | ||
627 | + val &= ~CL_EXEC_KERNEL; | ||
628 | + printf("Kernel "); | ||
629 | + } | ||
630 | + if (val & CL_EXEC_NATIVE_KERNEL) | ||
631 | + { | ||
632 | + val &= ~CL_EXEC_NATIVE_KERNEL; | ||
633 | + printf("Native "); | ||
634 | + } | ||
635 | + if (val) | ||
636 | + printf("Unknown (0x%llx) ", val); | ||
637 | + | ||
638 | + printf("\n"); | ||
639 | + } | ||
640 | + else | ||
641 | + { | ||
642 | + printf("Unable to get EXECUTION_CAPABILITIES: %s!\n", | ||
643 | + cl_error_to_str(status)); | ||
644 | + } | ||
645 | + | ||
646 | + status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, | ||
647 | + sizeof val, &val, NULL); | ||
648 | + if (status == CL_SUCCESS) | ||
649 | + { | ||
650 | + static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" }; | ||
651 | + static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0]; | ||
652 | + | ||
653 | + printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n", | ||
654 | + val < numTypes ? cacheTypes[val] : "???", val); | ||
655 | + } | ||
656 | + else | ||
657 | + { | ||
658 | + printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n", | ||
659 | + cl_error_to_str(status)); | ||
660 | + } | ||
661 | + | ||
662 | + status = clGetDeviceInfo(device, | ||
663 | + CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL); | ||
664 | + | ||
665 | + if (status == CL_SUCCESS) | ||
666 | + { | ||
667 | + static const char *lmemTypes[] = { "???", "Local", "Global" }; | ||
668 | + static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0]; | ||
669 | + | ||
670 | + printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n", | ||
671 | + val < numTypes ? lmemTypes[val] : "???", val); | ||
672 | + } | ||
673 | + else | ||
674 | + { | ||
675 | + printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n", | ||
676 | + cl_error_to_str(status)); | ||
677 | + } | ||
678 | + | ||
679 | + for (ii = 0; hexProps[ii].name != NULL; ii++) | ||
680 | + { | ||
681 | + status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size); | ||
682 | + if (status != CL_SUCCESS) | ||
683 | + { | ||
684 | + printf("Unable to get %s: %s!\n", | ||
685 | + hexProps[ii].name, cl_error_to_str(status)); | ||
686 | + continue; | ||
687 | + } | ||
688 | + if (size > sizeof val) | ||
689 | + { | ||
690 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | ||
691 | + hexProps[ii].name, size, sizeof val); | ||
692 | + } | ||
693 | + printf("%s: 0x%llx\n", hexProps[ii].name, val); | ||
694 | + } | ||
695 | + printf("\n"); | ||
696 | + | ||
697 | + for (ii = 0; longProps[ii].name != NULL; ii++) | ||
698 | + { | ||
699 | + status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size); | ||
700 | + if (status != CL_SUCCESS) | ||
701 | + { | ||
702 | + printf("Unable to get %s: %s!\n", | ||
703 | + longProps[ii].name, cl_error_to_str(status)); | ||
704 | + continue; | ||
705 | + } | ||
706 | + if (size > sizeof val) | ||
707 | + { | ||
708 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | ||
709 | + longProps[ii].name, size, sizeof val); | ||
710 | + } | ||
711 | + printf("%s: %lld\n", longProps[ii].name, val); | ||
712 | + } | ||
713 | + | ||
714 | + { | ||
715 | + size_t size; | ||
716 | + CALL_CL_GUARDED(clGetDeviceInfo, | ||
717 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size)); | ||
718 | + | ||
719 | + size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay! | ||
720 | + | ||
721 | + CALL_CL_GUARDED(clGetDeviceInfo, | ||
722 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size)); | ||
723 | + | ||
724 | + printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie | ||
725 | + for (size_t i = 0; i < size/sizeof(size_t); ++i) | ||
726 | + printf("%zd ", res_vec[i]); | ||
727 | + printf("\n"); | ||
728 | + } | ||
729 | + printf("---------------------------------------------------------------------\n"); | ||
730 | +} | ||
731 | + | ||
732 | + | ||
733 | + | ||
734 | +void print_device_info_from_queue(cl_command_queue queue) | ||
735 | +{ | ||
736 | + cl_device_id dev; | ||
737 | + CALL_CL_GUARDED(clGetCommandQueueInfo, | ||
738 | + (queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); | ||
739 | + | ||
740 | + print_device_info(dev); | ||
741 | +} |
cl-helper.h
0 → 100644
1 | +/* | ||
2 | + * Copyright (c) 2010, 2012 Andreas Kloeckner | ||
3 | + * | ||
4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
5 | + * of this software and associated documentation files (the "Software"), to deal | ||
6 | + * in the Software without restriction, including without limitation the rights | ||
7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
8 | + * copies of the Software, and to permit persons to whom the Software is | ||
9 | + * furnished to do so, subject to the following conditions: | ||
10 | + * | ||
11 | + * The above copyright notice and this permission notice shall be included in | ||
12 | + * all copies or substantial portions of the Software. | ||
13 | + * | ||
14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
20 | + * THE SOFTWARE. | ||
21 | + */ | ||
22 | + | ||
23 | + | ||
24 | + | ||
25 | + | ||
26 | +#ifndef NYUHPC_CL_HELPER | ||
27 | +#define NYUHPC_CL_HELPER | ||
28 | + | ||
29 | +#include <stdarg.h> | ||
30 | +#include <stdio.h> | ||
31 | +#include <stdlib.h> | ||
32 | + | ||
33 | +#ifdef __APPLE__ | ||
34 | +#include <OpenCL/opencl.h> | ||
35 | +#else | ||
36 | +#include <CL/cl.h> | ||
37 | +#endif | ||
38 | + | ||
39 | +/* An error check macro for OpenCL. | ||
40 | + * | ||
41 | + * Usage: | ||
42 | + * CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name") | ||
43 | + * | ||
44 | + * It will abort with a message if an error occurred. | ||
45 | + */ | ||
46 | + | ||
47 | +#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \ | ||
48 | + if ((STATUS_CODE) != CL_SUCCESS) \ | ||
49 | + { \ | ||
50 | + fprintf(stderr, \ | ||
51 | + "*** '%s' in '%s' on line %d failed with error '%s'.\n", \ | ||
52 | + WHAT, __FILE__, __LINE__, \ | ||
53 | + cl_error_to_str(STATUS_CODE)); \ | ||
54 | + abort(); \ | ||
55 | + } | ||
56 | + | ||
57 | +/* A more automated error check macro for OpenCL, for use with clXxxx | ||
58 | + * functions that return status codes. (Not all of them do, notably | ||
59 | + * clCreateXxx do not.) | ||
60 | + * | ||
61 | + * Usage: | ||
62 | + * CALL_CL_GUARDED(clFunction, (arg1, arg2)); | ||
63 | + * | ||
64 | + * Note the slightly strange comma between the function name and the | ||
65 | + * argument list. | ||
66 | + */ | ||
67 | + | ||
68 | +#define CALL_CL_GUARDED(NAME, ARGLIST) \ | ||
69 | + { \ | ||
70 | + cl_int status_code; \ | ||
71 | + status_code = NAME ARGLIST; \ | ||
72 | + CHECK_CL_ERROR(status_code, #NAME); \ | ||
73 | + } | ||
74 | + | ||
75 | +/* An error check macro for Unix system functions. If "COND" is true, then the | ||
76 | + * last system error ("errno") is printed along with MSG, which is supposed to | ||
77 | + * be a string describing what you were doing. | ||
78 | + * | ||
79 | + * Example: | ||
80 | + * CHECK_SYS_ERROR(dave != 0, "opening hatch"); | ||
81 | + */ | ||
82 | +#define CHECK_SYS_ERROR(COND, MSG) \ | ||
83 | + if (COND) \ | ||
84 | + { \ | ||
85 | + perror(MSG); \ | ||
86 | + abort(); \ | ||
87 | + } | ||
88 | + | ||
89 | +/* Return a string describing the OpenCL error code 'e'. | ||
90 | + */ | ||
91 | +const char *cl_error_to_str(cl_int e); | ||
92 | + | ||
93 | +/* Print a list of available OpenCL platforms and devices | ||
94 | + * to standard output. | ||
95 | + */ | ||
96 | +void print_platforms_devices(); | ||
97 | + | ||
98 | +/* Create an OpenCL context and a matching command queue on a platform from a | ||
99 | + * vendor whose name contains 'plat_name' on a device whose name contains | ||
100 | + * 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no | ||
101 | + * preference in the matter. | ||
102 | + * | ||
103 | + * If multiple devices match both 'plat_name' and 'dev_name', then 'idx' | ||
104 | + * prescribes the number of the device that should be chosen. | ||
105 | + * | ||
106 | + * You may also use the special value CHOOSE_INTERACTIVELY to offer the user | ||
107 | + * a choice. You should use this value for code you turn in. | ||
108 | + * | ||
109 | + * This function always succeeds. (If an error occurs, the program | ||
110 | + * is aborted. | ||
111 | + * | ||
112 | + * You can force interactive querying by defining the | ||
113 | + * CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c. | ||
114 | + * You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE | ||
115 | + * compiler option. | ||
116 | + */ | ||
117 | +extern const char *CHOOSE_INTERACTIVELY; | ||
118 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint | ||
119 | + idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling); | ||
120 | + | ||
121 | +/* Read contents of file 'filename'. | ||
122 | + * Return as a new string. You must free the string when you're done with it. | ||
123 | + * | ||
124 | + * This function always succeeds. (If an error occurs, the program | ||
125 | + * is aborted. | ||
126 | + */ | ||
127 | +char *read_file(const char *filename); | ||
128 | + | ||
129 | +/* Create a new OpenCL kernel from the code in the string 'knl'. | ||
130 | + * 'knl_name' is the name of the kernel function, and 'options', | ||
131 | + * if not NULL, is a string containing compiler flags. | ||
132 | + * | ||
133 | + * You must release the resulting kernel when you're done | ||
134 | + * with it. | ||
135 | + * | ||
136 | + * This function always succeeds. (If an error occurs, the program | ||
137 | + * is aborted. | ||
138 | + */ | ||
139 | +cl_kernel kernel_from_string(cl_context ctx, | ||
140 | + char const *knl, char const *knl_name, char const *options); | ||
141 | + | ||
142 | +/* Print information about a device, found from either the | ||
143 | + * queue or the device_id. | ||
144 | + */ | ||
145 | +void print_device_info(cl_device_id device); | ||
146 | +void print_device_info_from_queue(cl_command_queue queue); | ||
147 | + | ||
148 | +#define SET_1_KERNEL_ARG(knl, arg0) \ | ||
149 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); | ||
150 | + | ||
151 | +#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \ | ||
152 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
153 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); | ||
154 | + | ||
155 | +#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \ | ||
156 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
157 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
158 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); | ||
159 | + | ||
160 | +#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \ | ||
161 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
162 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
163 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
164 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); | ||
165 | + | ||
166 | +#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \ | ||
167 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
168 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
169 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
170 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
171 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); | ||
172 | + | ||
173 | +#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \ | ||
174 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
175 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
176 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
177 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
178 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
179 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); | ||
180 | + | ||
181 | +#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ | ||
182 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
183 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
184 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
185 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
186 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
187 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
188 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); | ||
189 | + | ||
190 | +#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ | ||
191 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
192 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
193 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
194 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
195 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
196 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
197 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | ||
198 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); | ||
199 | + | ||
200 | +#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ | ||
201 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
202 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
203 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
204 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
205 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
206 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
207 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | ||
208 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | ||
209 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); | ||
210 | + | ||
211 | +#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ | ||
212 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
213 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
214 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
215 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
216 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
217 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
218 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | ||
219 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | ||
220 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | ||
221 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); | ||
222 | + | ||
223 | +#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ | ||
224 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
225 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
226 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
227 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
228 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
229 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
230 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | ||
231 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | ||
232 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | ||
233 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | ||
234 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); | ||
235 | + | ||
236 | +#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \ | ||
237 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | ||
238 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | ||
239 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | ||
240 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | ||
241 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | ||
242 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | ||
243 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | ||
244 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | ||
245 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | ||
246 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | ||
247 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \ | ||
248 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11)); | ||
249 | + | ||
250 | +#endif |
print-devices.c
0 → 100644
set-governor
0 → 100755
show-clock-freq
0 → 100755
timing.h
0 → 100644
1 | +#ifdef __APPLE__ | ||
2 | + | ||
3 | +#include <sys/time.h> | ||
4 | + | ||
5 | +typedef struct timeval timestamp_type; | ||
6 | + | ||
7 | +static void get_timestamp(timestamp_type *t) | ||
8 | +{ | ||
9 | + gettimeofday(t, NULL); | ||
10 | +} | ||
11 | + | ||
12 | +static double timestamp_diff_in_seconds(timestamp_type start, | ||
13 | +timestamp_type end) | ||
14 | +{ | ||
15 | + /* Perform the carry for the later subtraction by updating start. */ | ||
16 | + if (end.tv_usec < start.tv_usec) { | ||
17 | + int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1; | ||
18 | + start.tv_usec -= 1000000 * nsec; | ||
19 | + start.tv_sec += nsec; | ||
20 | + } | ||
21 | + if (end.tv_usec - start.tv_usec > 1000000) { | ||
22 | + int nsec = (end.tv_usec - start.tv_usec) / 1000000; | ||
23 | + start.tv_usec += 1000000 * nsec; | ||
24 | + start.tv_sec -= nsec; | ||
25 | + } | ||
26 | + | ||
27 | + return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6; | ||
28 | +} | ||
29 | + | ||
30 | +#else | ||
31 | + | ||
32 | +#include <time.h> | ||
33 | + | ||
34 | +typedef struct timespec timestamp_type; | ||
35 | + | ||
36 | +static void get_timestamp(timestamp_type *t) | ||
37 | +{ | ||
38 | + clock_gettime(CLOCK_REALTIME, t); | ||
39 | +} | ||
40 | + | ||
41 | +static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end) | ||
42 | +{ | ||
43 | + struct timespec temp; | ||
44 | + if ((end.tv_nsec-start.tv_nsec)<0) { | ||
45 | + temp.tv_sec = end.tv_sec-start.tv_sec-1; | ||
46 | + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; | ||
47 | + } else { | ||
48 | + temp.tv_sec = end.tv_sec-start.tv_sec; | ||
49 | + temp.tv_nsec = end.tv_nsec-start.tv_nsec; | ||
50 | + } | ||
51 | + return temp.tv_sec + 1e-9*temp.tv_nsec; | ||
52 | +} | ||
53 | + | ||
54 | +#endif |
Please
register
or
login
to post a comment