Showing
11 changed files
with
1261 additions
and
0 deletions
Makefile
0 → 100644
README.md
0 → 100644
| 1 | +# OpenCL Howto | |
| 2 | + | |
| 3 | +Code snippets taken from | |
| 4 | +[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo) | |
| 5 | + | |
| 6 | +## Description | |
| 7 | + | |
| 8 | +This is just some more playing around with OpenCL and try to learn a bit about | |
| 9 | +it. | |
| 10 | + | |
| 11 | +## Requirements | |
| 12 | + | |
| 13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | |
| 14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | |
| 15 | +Integrated Graphics Controller (rev 09)) with the | |
| 16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | |
| 17 | +open source library. | |
| 18 | + | |
| 19 | +## License | |
| 20 | + | |
| 21 | +MIT License | |
| 22 | + | |
| 23 | +> Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 24 | +> of this software and associated documentation files (the "Software"), to | |
| 25 | +> deal in the Software without restriction, including without limitation the | |
| 26 | +> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
| 27 | +> sell copies of the Software, and to permit persons to whom the Software is | |
| 28 | +> furnished to do so, subject to the following conditions: | |
| 29 | +> | |
| 30 | +> The above copyright notice and this permission notice shall be included in | |
| 31 | +> all copies or substantial portions of the Software. | |
| 32 | +> | |
| 33 | +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 34 | +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 35 | +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 36 | +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 37 | +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
| 38 | +> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 39 | +> IN THE SOFTWARE. | ... | ... |
cl-demo.c
0 → 100644
| 1 | +#include "timing.h" | |
| 2 | +#include "cl-helper.h" | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | +int main(int argc, char **argv) | |
| 8 | +{ | |
| 9 | + if (argc != 3) | |
| 10 | + { | |
| 11 | + fprintf(stderr, "need two arguments!\n"); | |
| 12 | + abort(); | |
| 13 | + } | |
| 14 | + | |
| 15 | + const cl_long n = atol(argv[1]); | |
| 16 | + const int ntrips = atoi(argv[2]); | |
| 17 | + | |
| 18 | + cl_context ctx; | |
| 19 | + cl_command_queue queue; | |
| 20 | + create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); | |
| 21 | + | |
| 22 | + print_device_info_from_queue(queue); | |
| 23 | + | |
| 24 | + // -------------------------------------------------------------------------- | |
| 25 | + // load kernels | |
| 26 | + // -------------------------------------------------------------------------- | |
| 27 | + char *knl_text = read_file("vec-add-soln.cl"); | |
| 28 | + cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); | |
| 29 | + free(knl_text); | |
| 30 | + | |
| 31 | + // -------------------------------------------------------------------------- | |
| 32 | + // allocate and initialize CPU memory | |
| 33 | + // -------------------------------------------------------------------------- | |
| 34 | + float *a = (float *) malloc(sizeof(float) * n); | |
| 35 | + if (!a) { perror("alloc x"); abort(); } | |
| 36 | + float *b = (float *) malloc(sizeof(float) * n); | |
| 37 | + if (!b) { perror("alloc y"); abort(); } | |
| 38 | + float *c = (float *) malloc(sizeof(float) * n); | |
| 39 | + if (!c) { perror("alloc z"); abort(); } | |
| 40 | + | |
| 41 | + for (size_t i = 0; i < n; ++i) | |
| 42 | + { | |
| 43 | + a[i] = i; | |
| 44 | + b[i] = 2*i; | |
| 45 | + } | |
| 46 | + | |
| 47 | + // -------------------------------------------------------------------------- | |
| 48 | + // allocate device memory | |
| 49 | + // -------------------------------------------------------------------------- | |
| 50 | + cl_int status; | |
| 51 | + cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
| 52 | + sizeof(float) * n, 0, &status); | |
| 53 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
| 54 | + | |
| 55 | + cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
| 56 | + sizeof(float) * n, 0, &status); | |
| 57 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
| 58 | + | |
| 59 | + cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, | |
| 60 | + sizeof(float) * n, 0, &status); | |
| 61 | + CHECK_CL_ERROR(status, "clCreateBuffer"); | |
| 62 | + | |
| 63 | + // -------------------------------------------------------------------------- | |
| 64 | + // transfer to device | |
| 65 | + // -------------------------------------------------------------------------- | |
| 66 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | |
| 67 | + queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
| 68 | + n * sizeof(float), a, | |
| 69 | + 0, NULL, NULL)); | |
| 70 | + | |
| 71 | + CALL_CL_GUARDED(clEnqueueWriteBuffer, ( | |
| 72 | + queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
| 73 | + n * sizeof(float), b, | |
| 74 | + 0, NULL, NULL)); | |
| 75 | + | |
| 76 | + // -------------------------------------------------------------------------- | |
| 77 | + // run code on device | |
| 78 | + // -------------------------------------------------------------------------- | |
| 79 | + | |
| 80 | + CALL_CL_GUARDED(clFinish, (queue)); | |
| 81 | + | |
| 82 | + timestamp_type time1, time2; | |
| 83 | + get_timestamp(&time1); | |
| 84 | + | |
| 85 | + for (int trip = 0; trip < ntrips; ++trip) | |
| 86 | + { | |
| 87 | + SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n); | |
| 88 | + size_t ldim[] = { 32 }; | |
| 89 | + size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] }; | |
| 90 | + CALL_CL_GUARDED(clEnqueueNDRangeKernel, | |
| 91 | + (queue, knl, | |
| 92 | + /*dimensions*/ 1, NULL, gdim, ldim, | |
| 93 | + 0, NULL, NULL)); | |
| 94 | + } | |
| 95 | + | |
| 96 | + CALL_CL_GUARDED(clFinish, (queue)); | |
| 97 | + | |
| 98 | + get_timestamp(&time2); | |
| 99 | + double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips; | |
| 100 | + printf("%f s\n", elapsed); | |
| 101 | + printf("%f GB/s\n", | |
| 102 | + 3*n*sizeof(float)/1e9/elapsed); | |
| 103 | + | |
| 104 | + // -------------------------------------------------------------------------- | |
| 105 | + // transfer back & check | |
| 106 | + // -------------------------------------------------------------------------- | |
| 107 | + CALL_CL_GUARDED(clEnqueueReadBuffer, ( | |
| 108 | + queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, | |
| 109 | + n * sizeof(float), c, | |
| 110 | + 0, NULL, NULL)); | |
| 111 | + | |
| 112 | + for (size_t i = 0; i < n; ++i) | |
| 113 | + if (c[i] != 3*i) | |
| 114 | + { | |
| 115 | + printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i); | |
| 116 | + abort(); | |
| 117 | + } | |
| 118 | + puts("GOOD"); | |
| 119 | + | |
| 120 | + // -------------------------------------------------------------------------- | |
| 121 | + // clean up | |
| 122 | + // -------------------------------------------------------------------------- | |
| 123 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); | |
| 124 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); | |
| 125 | + CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); | |
| 126 | + CALL_CL_GUARDED(clReleaseKernel, (knl)); | |
| 127 | + CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); | |
| 128 | + CALL_CL_GUARDED(clReleaseContext, (ctx)); | |
| 129 | + | |
| 130 | + return 0; | |
| 131 | +} | ... | ... |
cl-helper.c
0 → 100644
| 1 | +/* | |
| 2 | + * Copyright (c) 2010 Andreas Kloeckner | |
| 3 | + * | |
| 4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 5 | + * of this software and associated documentation files (the "Software"), to deal | |
| 6 | + * in the Software without restriction, including without limitation the rights | |
| 7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 8 | + * copies of the Software, and to permit persons to whom the Software is | |
| 9 | + * furnished to do so, subject to the following conditions: | |
| 10 | + * | |
| 11 | + * The above copyright notice and this permission notice shall be included in | |
| 12 | + * all copies or substantial portions of the Software. | |
| 13 | + * | |
| 14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 20 | + * THE SOFTWARE. | |
| 21 | + */ | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | +#include "cl-helper.h" | |
| 27 | +#include <string.h> | |
| 28 | +#include <stdbool.h> | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | +#define MAX_NAME_LEN 1000 | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | +const char *cl_error_to_str(cl_int e) | |
| 39 | +{ | |
| 40 | + switch (e) | |
| 41 | + { | |
| 42 | + case CL_SUCCESS: return "success"; | |
| 43 | + case CL_DEVICE_NOT_FOUND: return "device not found"; | |
| 44 | + case CL_DEVICE_NOT_AVAILABLE: return "device not available"; | |
| 45 | +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) | |
| 46 | + case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available"; | |
| 47 | +#endif | |
| 48 | + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure"; | |
| 49 | + case CL_OUT_OF_RESOURCES: return "out of resources"; | |
| 50 | + case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; | |
| 51 | + case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available"; | |
| 52 | + case CL_MEM_COPY_OVERLAP: return "mem copy overlap"; | |
| 53 | + case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch"; | |
| 54 | + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported"; | |
| 55 | + case CL_BUILD_PROGRAM_FAILURE: return "build program failure"; | |
| 56 | + case CL_MAP_FAILURE: return "map failure"; | |
| 57 | + | |
| 58 | + case CL_INVALID_VALUE: return "invalid value"; | |
| 59 | + case CL_INVALID_DEVICE_TYPE: return "invalid device type"; | |
| 60 | + case CL_INVALID_PLATFORM: return "invalid platform"; | |
| 61 | + case CL_INVALID_DEVICE: return "invalid device"; | |
| 62 | + case CL_INVALID_CONTEXT: return "invalid context"; | |
| 63 | + case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties"; | |
| 64 | + case CL_INVALID_COMMAND_QUEUE: return "invalid command queue"; | |
| 65 | + case CL_INVALID_HOST_PTR: return "invalid host ptr"; | |
| 66 | + case CL_INVALID_MEM_OBJECT: return "invalid mem object"; | |
| 67 | + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor"; | |
| 68 | + case CL_INVALID_IMAGE_SIZE: return "invalid image size"; | |
| 69 | + case CL_INVALID_SAMPLER: return "invalid sampler"; | |
| 70 | + case CL_INVALID_BINARY: return "invalid binary"; | |
| 71 | + case CL_INVALID_BUILD_OPTIONS: return "invalid build options"; | |
| 72 | + case CL_INVALID_PROGRAM: return "invalid program"; | |
| 73 | + case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable"; | |
| 74 | + case CL_INVALID_KERNEL_NAME: return "invalid kernel name"; | |
| 75 | + case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition"; | |
| 76 | + case CL_INVALID_KERNEL: return "invalid kernel"; | |
| 77 | + case CL_INVALID_ARG_INDEX: return "invalid arg index"; | |
| 78 | + case CL_INVALID_ARG_VALUE: return "invalid arg value"; | |
| 79 | + case CL_INVALID_ARG_SIZE: return "invalid arg size"; | |
| 80 | + case CL_INVALID_KERNEL_ARGS: return "invalid kernel args"; | |
| 81 | + case CL_INVALID_WORK_DIMENSION: return "invalid work dimension"; | |
| 82 | + case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; | |
| 83 | + case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size"; | |
| 84 | + case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset"; | |
| 85 | + case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list"; | |
| 86 | + case CL_INVALID_EVENT: return "invalid event"; | |
| 87 | + case CL_INVALID_OPERATION: return "invalid operation"; | |
| 88 | + case CL_INVALID_GL_OBJECT: return "invalid gl object"; | |
| 89 | + case CL_INVALID_BUFFER_SIZE: return "invalid buffer size"; | |
| 90 | + case CL_INVALID_MIP_LEVEL: return "invalid mip level"; | |
| 91 | + | |
| 92 | +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1) | |
| 93 | + case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number"; | |
| 94 | +#endif | |
| 95 | + | |
| 96 | +#ifdef CL_VERSION_1_1 | |
| 97 | + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset"; | |
| 98 | + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list"; | |
| 99 | + case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size"; | |
| 100 | +#endif | |
| 101 | + | |
| 102 | + default: return "invalid/unknown error code"; | |
| 103 | + } | |
| 104 | +} | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
| 109 | +void print_platforms_devices() | |
| 110 | +{ | |
| 111 | + // get number of platforms | |
| 112 | + cl_uint plat_count; | |
| 113 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | |
| 114 | + | |
| 115 | + // allocate memory, get list of platforms | |
| 116 | + cl_platform_id *platforms = | |
| 117 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | |
| 118 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | |
| 119 | + | |
| 120 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | |
| 121 | + | |
| 122 | + // iterate over platforms | |
| 123 | + for (cl_uint i = 0; i < plat_count; ++i) | |
| 124 | + { | |
| 125 | + // get platform vendor name | |
| 126 | + char buf[MAX_NAME_LEN]; | |
| 127 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
| 128 | + sizeof(buf), buf, NULL)); | |
| 129 | + printf("platform %d: vendor '%s'\n", i, buf); | |
| 130 | + | |
| 131 | + // get number of devices in platform | |
| 132 | + cl_uint dev_count; | |
| 133 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
| 134 | + 0, NULL, &dev_count)); | |
| 135 | + | |
| 136 | + cl_device_id *devices = | |
| 137 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | |
| 138 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | |
| 139 | + | |
| 140 | + // get list of devices in platform | |
| 141 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
| 142 | + dev_count, devices, NULL)); | |
| 143 | + | |
| 144 | + // iterate over devices | |
| 145 | + for (cl_uint j = 0; j < dev_count; ++j) | |
| 146 | + { | |
| 147 | + char buf[MAX_NAME_LEN]; | |
| 148 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
| 149 | + sizeof(buf), buf, NULL)); | |
| 150 | + printf(" device %d: '%s'\n", j, buf); | |
| 151 | + } | |
| 152 | + | |
| 153 | + free(devices); | |
| 154 | + } | |
| 155 | + | |
| 156 | + free(platforms); | |
| 157 | +} | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | +/* Read a line from stdin. C makes things simple. :) | |
| 163 | + * From http://stackoverflow.com/a/314422/1148634 | |
| 164 | + */ | |
| 165 | +char *read_a_line(void) | |
| 166 | +{ | |
| 167 | + char * line = (char *) malloc(MAX_NAME_LEN), * linep = line; | |
| 168 | + size_t lenmax = MAX_NAME_LEN, len = lenmax; | |
| 169 | + int c; | |
| 170 | + | |
| 171 | + if(line == NULL) | |
| 172 | + return NULL; | |
| 173 | + | |
| 174 | + for(;;) | |
| 175 | + { | |
| 176 | + c = fgetc(stdin); | |
| 177 | + if(c == EOF) | |
| 178 | + break; | |
| 179 | + | |
| 180 | + if(--len == 0) | |
| 181 | + { | |
| 182 | + char *linen = (char *) realloc(linep, lenmax *= 2); | |
| 183 | + len = lenmax; | |
| 184 | + | |
| 185 | + if(linen == NULL) | |
| 186 | + { | |
| 187 | + free(linep); | |
| 188 | + return NULL; | |
| 189 | + } | |
| 190 | + line = linen + (line - linep); | |
| 191 | + linep = linen; | |
| 192 | + } | |
| 193 | + | |
| 194 | + if((*line++ = c) == '\n') | |
| 195 | + break; | |
| 196 | + } | |
| 197 | + *line = '\0'; | |
| 198 | + return linep; | |
| 199 | +} | |
| 200 | + | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | +const char *CHOOSE_INTERACTIVELY = "INTERACTIVE"; | |
| 205 | + | |
| 206 | + | |
| 207 | +#define MIN(a,b) (((a)<(b))?(a):(b)) | |
| 208 | +#define MAX(a,b) (((a)>(b))?(a):(b)) | |
| 209 | + | |
| 210 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, | |
| 211 | + cl_context *ctx, cl_command_queue *queue, int enable_profiling) | |
| 212 | +{ | |
| 213 | + char dev_sel_buf[MAX_NAME_LEN]; | |
| 214 | + char platform_sel_buf[MAX_NAME_LEN]; | |
| 215 | + | |
| 216 | + // get number of platforms | |
| 217 | + cl_uint plat_count; | |
| 218 | + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); | |
| 219 | + | |
| 220 | + // allocate memory, get list of platform handles | |
| 221 | + cl_platform_id *platforms = | |
| 222 | + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); | |
| 223 | + CHECK_SYS_ERROR(!platforms, "allocating platform array"); | |
| 224 | + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); | |
| 225 | + | |
| 226 | + // print menu, if requested | |
| 227 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | |
| 228 | + if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | |
| 229 | +#endif | |
| 230 | + { | |
| 231 | + puts("Choose platform:"); | |
| 232 | + for (cl_uint i = 0; i < plat_count; ++i) | |
| 233 | + { | |
| 234 | + char buf[MAX_NAME_LEN]; | |
| 235 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
| 236 | + sizeof(buf), buf, NULL)); | |
| 237 | + printf("[%d] %s\n", i, buf); | |
| 238 | + } | |
| 239 | + | |
| 240 | + printf("Enter choice: "); | |
| 241 | + fflush(stdout); | |
| 242 | + | |
| 243 | + char *sel = read_a_line(); | |
| 244 | + if (!sel) | |
| 245 | + { | |
| 246 | + fprintf(stderr, "error reading line from stdin"); | |
| 247 | + abort(); | |
| 248 | + } | |
| 249 | + | |
| 250 | + int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); | |
| 251 | + free(sel); | |
| 252 | + | |
| 253 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, | |
| 254 | + sizeof(platform_sel_buf), platform_sel_buf, NULL)); | |
| 255 | + plat_name = platform_sel_buf; | |
| 256 | + } | |
| 257 | + | |
| 258 | + // iterate over platforms | |
| 259 | + for (cl_uint i = 0; i < plat_count; ++i) | |
| 260 | + { | |
| 261 | + // get platform name | |
| 262 | + char buf[MAX_NAME_LEN]; | |
| 263 | + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, | |
| 264 | + sizeof(buf), buf, NULL)); | |
| 265 | + | |
| 266 | + // does it match? | |
| 267 | + if (!plat_name || strstr(buf, plat_name)) | |
| 268 | + { | |
| 269 | + // get number of devices in platform | |
| 270 | + cl_uint dev_count; | |
| 271 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
| 272 | + 0, NULL, &dev_count)); | |
| 273 | + | |
| 274 | + // allocate memory, get list of device handles in platform | |
| 275 | + cl_device_id *devices = | |
| 276 | + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); | |
| 277 | + CHECK_SYS_ERROR(!devices, "allocating device array"); | |
| 278 | + | |
| 279 | + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, | |
| 280 | + dev_count, devices, NULL)); | |
| 281 | + | |
| 282 | + // {{{ print device menu, if requested | |
| 283 | +#ifndef CL_HELPER_FORCE_INTERACTIVE | |
| 284 | + if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer | |
| 285 | +#endif | |
| 286 | + { | |
| 287 | + puts("Choose device:"); | |
| 288 | + for (cl_uint j = 0; j < dev_count; ++j) | |
| 289 | + { | |
| 290 | + char buf[MAX_NAME_LEN]; | |
| 291 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
| 292 | + sizeof(buf), buf, NULL)); | |
| 293 | + printf("[%d] %s\n", j, buf); | |
| 294 | + } | |
| 295 | + | |
| 296 | + printf("Enter choice: "); | |
| 297 | + fflush(stdout); | |
| 298 | + | |
| 299 | + char *sel = read_a_line(); | |
| 300 | + if (!sel) | |
| 301 | + { | |
| 302 | + fprintf(stderr, "error reading line from stdin"); | |
| 303 | + abort(); | |
| 304 | + } | |
| 305 | + | |
| 306 | + int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); | |
| 307 | + free(sel); | |
| 308 | + | |
| 309 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, | |
| 310 | + sizeof(dev_sel_buf), dev_sel_buf, NULL)); | |
| 311 | + dev_name = dev_sel_buf; | |
| 312 | + } | |
| 313 | + | |
| 314 | + // }}} | |
| 315 | + | |
| 316 | + // iterate over devices | |
| 317 | + for (cl_uint j = 0; j < dev_count; ++j) | |
| 318 | + { | |
| 319 | + // get device name | |
| 320 | + char buf[MAX_NAME_LEN]; | |
| 321 | + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, | |
| 322 | + sizeof(buf), buf, NULL)); | |
| 323 | + | |
| 324 | + // does it match? | |
| 325 | + if (!dev_name || strstr(buf, dev_name)) | |
| 326 | + { | |
| 327 | + if (idx == 0) | |
| 328 | + { | |
| 329 | + cl_platform_id plat = platforms[i]; | |
| 330 | + cl_device_id dev = devices[j]; | |
| 331 | + | |
| 332 | + free(devices); | |
| 333 | + free(platforms); | |
| 334 | + | |
| 335 | + // create a context | |
| 336 | + cl_context_properties cps[3] = { | |
| 337 | + CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; | |
| 338 | + | |
| 339 | + cl_int status; | |
| 340 | + *ctx = clCreateContext( | |
| 341 | + cps, 1, &dev, NULL, NULL, &status); | |
| 342 | + CHECK_CL_ERROR(status, "clCreateContext"); | |
| 343 | + | |
| 344 | + // create a command queue | |
| 345 | + cl_command_queue_properties qprops = 0; | |
| 346 | + if (enable_profiling) | |
| 347 | + qprops |= CL_QUEUE_PROFILING_ENABLE; | |
| 348 | + | |
| 349 | + if (queue) | |
| 350 | + { | |
| 351 | + *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); | |
| 352 | + CHECK_CL_ERROR(status, "clCreateCommandQueue"); | |
| 353 | + } | |
| 354 | + | |
| 355 | + return; | |
| 356 | + } | |
| 357 | + else | |
| 358 | + --idx; | |
| 359 | + } | |
| 360 | + } | |
| 361 | + | |
| 362 | + free(devices); | |
| 363 | + } | |
| 364 | + } | |
| 365 | + | |
| 366 | + free(platforms); | |
| 367 | + | |
| 368 | + fputs("create_context_on: specified device not found.\n", stderr); | |
| 369 | + abort(); | |
| 370 | +} | |
| 371 | + | |
| 372 | + | |
| 373 | + | |
| 374 | + | |
| 375 | +char *read_file(const char *filename) | |
| 376 | +{ | |
| 377 | + FILE *f = fopen(filename, "r"); | |
| 378 | + CHECK_SYS_ERROR(!f, "read_file: opening file"); | |
| 379 | + | |
| 380 | + // figure out file size | |
| 381 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); | |
| 382 | + size_t size = ftell(f); | |
| 383 | + | |
| 384 | + CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, | |
| 385 | + "read_file: seeking to start"); | |
| 386 | + | |
| 387 | + // allocate memory, slurp in entire file | |
| 388 | + char *result = (char *) malloc(size+1); | |
| 389 | + CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); | |
| 390 | + CHECK_SYS_ERROR(fread(result, 1, size, f) < size, | |
| 391 | + "read_file: reading file contents"); | |
| 392 | + | |
| 393 | + // close, return | |
| 394 | + CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); | |
| 395 | + result[size] = '\0'; | |
| 396 | + | |
| 397 | + return result; | |
| 398 | +} | |
| 399 | + | |
| 400 | + | |
| 401 | + | |
| 402 | + | |
| 403 | +static int printed_compiler_output_message = 0; | |
| 404 | + | |
| 405 | +cl_kernel kernel_from_string(cl_context ctx, | |
| 406 | + char const *knl, char const *knl_name, char const *options) | |
| 407 | +{ | |
| 408 | + // create an OpenCL program (may have multiple kernels) | |
| 409 | + size_t sizes[] = { strlen(knl) }; | |
| 410 | + | |
| 411 | + if (options && strlen(options) == 0) | |
| 412 | + { | |
| 413 | + // reportedly, some implementations dislike empty strings. | |
| 414 | + options = NULL; | |
| 415 | + } | |
| 416 | + | |
| 417 | + cl_int status; | |
| 418 | + cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); | |
| 419 | + CHECK_CL_ERROR(status, "clCreateProgramWithSource"); | |
| 420 | + | |
| 421 | + // build it | |
| 422 | + status = clBuildProgram(program, 0, NULL, options, NULL, NULL); | |
| 423 | + | |
| 424 | + { | |
| 425 | + // get build log and print it | |
| 426 | + | |
| 427 | + cl_device_id dev; | |
| 428 | + CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, | |
| 429 | + sizeof(dev), &dev, NULL)); | |
| 430 | + | |
| 431 | + size_t log_size; | |
| 432 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | |
| 433 | + 0, NULL, &log_size)); | |
| 434 | + | |
| 435 | + bool do_print = status != CL_SUCCESS; | |
| 436 | + if (!do_print && log_size) | |
| 437 | + { | |
| 438 | + if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT")) | |
| 439 | + do_print = true; | |
| 440 | + else | |
| 441 | + { | |
| 442 | + if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG")) | |
| 443 | + { | |
| 444 | + fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n" | |
| 445 | + "*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n" | |
| 446 | + "*** NOTE: this may include compiler warnings and other important messages\n" | |
| 447 | + "*** about your code.\n" | |
| 448 | + "*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n"); | |
| 449 | + printed_compiler_output_message = true; | |
| 450 | + } | |
| 451 | + } | |
| 452 | + } | |
| 453 | + | |
| 454 | + if (do_print) | |
| 455 | + { | |
| 456 | + char *log = (char *) malloc(log_size); | |
| 457 | + CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); | |
| 458 | + | |
| 459 | + char devname[MAX_NAME_LEN]; | |
| 460 | + CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, | |
| 461 | + sizeof(devname), devname, NULL)); | |
| 462 | + | |
| 463 | + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, | |
| 464 | + log_size, log, NULL)); | |
| 465 | + fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n", | |
| 466 | + knl_name, devname, log); | |
| 467 | + } | |
| 468 | + } | |
| 469 | + | |
| 470 | + CHECK_CL_ERROR(status, "clBuildProgram"); | |
| 471 | + | |
| 472 | + // fish the kernel out of the program | |
| 473 | + cl_kernel kernel = clCreateKernel(program, knl_name, &status); | |
| 474 | + CHECK_CL_ERROR(status, "clCreateKernel"); | |
| 475 | + | |
| 476 | + CALL_CL_GUARDED(clReleaseProgram, (program)); | |
| 477 | + | |
| 478 | + return kernel; | |
| 479 | +} | |
| 480 | + | |
| 481 | + | |
| 482 | + | |
| 483 | + | |
| 484 | +void print_device_info(cl_device_id device) | |
| 485 | +{ | |
| 486 | + // adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c | |
| 487 | + | |
| 488 | +#define LONG_PROPS \ | |
| 489 | + defn(VENDOR_ID), \ | |
| 490 | + defn(MAX_COMPUTE_UNITS), \ | |
| 491 | + defn(MAX_WORK_ITEM_DIMENSIONS), \ | |
| 492 | + defn(MAX_WORK_GROUP_SIZE), \ | |
| 493 | + defn(PREFERRED_VECTOR_WIDTH_CHAR), \ | |
| 494 | + defn(PREFERRED_VECTOR_WIDTH_SHORT), \ | |
| 495 | + defn(PREFERRED_VECTOR_WIDTH_INT), \ | |
| 496 | + defn(PREFERRED_VECTOR_WIDTH_LONG), \ | |
| 497 | + defn(PREFERRED_VECTOR_WIDTH_FLOAT), \ | |
| 498 | + defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \ | |
| 499 | + defn(MAX_CLOCK_FREQUENCY), \ | |
| 500 | + defn(ADDRESS_BITS), \ | |
| 501 | + defn(MAX_MEM_ALLOC_SIZE), \ | |
| 502 | + defn(IMAGE_SUPPORT), \ | |
| 503 | + defn(MAX_READ_IMAGE_ARGS), \ | |
| 504 | + defn(MAX_WRITE_IMAGE_ARGS), \ | |
| 505 | + defn(IMAGE2D_MAX_WIDTH), \ | |
| 506 | + defn(IMAGE2D_MAX_HEIGHT), \ | |
| 507 | + defn(IMAGE3D_MAX_WIDTH), \ | |
| 508 | + defn(IMAGE3D_MAX_HEIGHT), \ | |
| 509 | + defn(IMAGE3D_MAX_DEPTH), \ | |
| 510 | + defn(MAX_SAMPLERS), \ | |
| 511 | + defn(MAX_PARAMETER_SIZE), \ | |
| 512 | + defn(MEM_BASE_ADDR_ALIGN), \ | |
| 513 | + defn(MIN_DATA_TYPE_ALIGN_SIZE), \ | |
| 514 | + defn(GLOBAL_MEM_CACHELINE_SIZE), \ | |
| 515 | + defn(GLOBAL_MEM_CACHE_SIZE), \ | |
| 516 | + defn(GLOBAL_MEM_SIZE), \ | |
| 517 | + defn(MAX_CONSTANT_BUFFER_SIZE), \ | |
| 518 | + defn(MAX_CONSTANT_ARGS), \ | |
| 519 | + defn(LOCAL_MEM_SIZE), \ | |
| 520 | + defn(ERROR_CORRECTION_SUPPORT), \ | |
| 521 | + defn(PROFILING_TIMER_RESOLUTION), \ | |
| 522 | + defn(ENDIAN_LITTLE), \ | |
| 523 | + defn(AVAILABLE), \ | |
| 524 | + defn(COMPILER_AVAILABLE), | |
| 525 | + | |
| 526 | +#define STR_PROPS \ | |
| 527 | + defn(NAME), \ | |
| 528 | + defn(VENDOR), \ | |
| 529 | + defn(PROFILE), \ | |
| 530 | + defn(VERSION), \ | |
| 531 | + defn(EXTENSIONS), | |
| 532 | + | |
| 533 | +#define HEX_PROPS \ | |
| 534 | + defn(SINGLE_FP_CONFIG), \ | |
| 535 | + defn(QUEUE_PROPERTIES), | |
| 536 | + | |
| 537 | + | |
| 538 | + printf("---------------------------------------------------------------------\n"); | |
| 539 | + | |
| 540 | + | |
| 541 | + static struct { cl_device_info param; const char *name; } longProps[] = { | |
| 542 | +#define defn(X) { CL_DEVICE_##X, #X } | |
| 543 | + LONG_PROPS | |
| 544 | +#undef defn | |
| 545 | + { 0, NULL }, | |
| 546 | + }; | |
| 547 | + static struct { cl_device_info param; const char *name; } hexProps[] = { | |
| 548 | +#define defn(X) { CL_DEVICE_##X, #X } | |
| 549 | + HEX_PROPS | |
| 550 | +#undef defn | |
| 551 | + { 0, NULL }, | |
| 552 | + }; | |
| 553 | + static struct { cl_device_info param; const char *name; } strProps[] = { | |
| 554 | +#define defn(X) { CL_DEVICE_##X, #X } | |
| 555 | + STR_PROPS | |
| 556 | +#undef defn | |
| 557 | + { CL_DRIVER_VERSION, "DRIVER_VERSION" }, | |
| 558 | + { 0, NULL }, | |
| 559 | + }; | |
| 560 | + cl_int status; | |
| 561 | + size_t size; | |
| 562 | + char buf[65536]; | |
| 563 | + long long val; /* Avoids unpleasant surprises for some params */ | |
| 564 | + int ii; | |
| 565 | + | |
| 566 | + for (ii = 0; strProps[ii].name != NULL; ii++) | |
| 567 | + { | |
| 568 | + status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size); | |
| 569 | + if (status != CL_SUCCESS) | |
| 570 | + { | |
| 571 | + printf("Unable to get %s: %s!\n", | |
| 572 | + strProps[ii].name, cl_error_to_str(status)); | |
| 573 | + continue; | |
| 574 | + } | |
| 575 | + if (size > sizeof buf) | |
| 576 | + { | |
| 577 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
| 578 | + strProps[ii].name, size, sizeof buf); | |
| 579 | + } | |
| 580 | + printf("%s: %s\n", | |
| 581 | + strProps[ii].name, buf); | |
| 582 | + } | |
| 583 | + printf("\n"); | |
| 584 | + | |
| 585 | + status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL); | |
| 586 | + if (status == CL_SUCCESS) | |
| 587 | + { | |
| 588 | + printf("Type: "); | |
| 589 | + if (val & CL_DEVICE_TYPE_DEFAULT) | |
| 590 | + { | |
| 591 | + val &= ~CL_DEVICE_TYPE_DEFAULT; | |
| 592 | + printf("Default "); | |
| 593 | + } | |
| 594 | + if (val & CL_DEVICE_TYPE_CPU) | |
| 595 | + { | |
| 596 | + val &= ~CL_DEVICE_TYPE_CPU; | |
| 597 | + printf("CPU "); | |
| 598 | + } | |
| 599 | + if (val & CL_DEVICE_TYPE_GPU) | |
| 600 | + { | |
| 601 | + val &= ~CL_DEVICE_TYPE_GPU; | |
| 602 | + printf("GPU "); | |
| 603 | + } | |
| 604 | + if (val & CL_DEVICE_TYPE_ACCELERATOR) | |
| 605 | + { | |
| 606 | + val &= ~CL_DEVICE_TYPE_ACCELERATOR; | |
| 607 | + printf("Accelerator "); | |
| 608 | + } | |
| 609 | + if (val != 0) { | |
| 610 | + printf("Unknown (0x%llx) ", val); | |
| 611 | + } | |
| 612 | + printf("\n"); | |
| 613 | + } | |
| 614 | + else | |
| 615 | + { | |
| 616 | + printf("Unable to get TYPE: %s!\n", | |
| 617 | + cl_error_to_str(status)); | |
| 618 | + } | |
| 619 | + | |
| 620 | + status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, | |
| 621 | + sizeof val, &val, NULL); | |
| 622 | + if (status == CL_SUCCESS) | |
| 623 | + { | |
| 624 | + printf("EXECUTION_CAPABILITIES: "); | |
| 625 | + if (val & CL_EXEC_KERNEL) | |
| 626 | + { | |
| 627 | + val &= ~CL_EXEC_KERNEL; | |
| 628 | + printf("Kernel "); | |
| 629 | + } | |
| 630 | + if (val & CL_EXEC_NATIVE_KERNEL) | |
| 631 | + { | |
| 632 | + val &= ~CL_EXEC_NATIVE_KERNEL; | |
| 633 | + printf("Native "); | |
| 634 | + } | |
| 635 | + if (val) | |
| 636 | + printf("Unknown (0x%llx) ", val); | |
| 637 | + | |
| 638 | + printf("\n"); | |
| 639 | + } | |
| 640 | + else | |
| 641 | + { | |
| 642 | + printf("Unable to get EXECUTION_CAPABILITIES: %s!\n", | |
| 643 | + cl_error_to_str(status)); | |
| 644 | + } | |
| 645 | + | |
| 646 | + status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, | |
| 647 | + sizeof val, &val, NULL); | |
| 648 | + if (status == CL_SUCCESS) | |
| 649 | + { | |
| 650 | + static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" }; | |
| 651 | + static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0]; | |
| 652 | + | |
| 653 | + printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n", | |
| 654 | + val < numTypes ? cacheTypes[val] : "???", val); | |
| 655 | + } | |
| 656 | + else | |
| 657 | + { | |
| 658 | + printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n", | |
| 659 | + cl_error_to_str(status)); | |
| 660 | + } | |
| 661 | + | |
| 662 | + status = clGetDeviceInfo(device, | |
| 663 | + CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL); | |
| 664 | + | |
| 665 | + if (status == CL_SUCCESS) | |
| 666 | + { | |
| 667 | + static const char *lmemTypes[] = { "???", "Local", "Global" }; | |
| 668 | + static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0]; | |
| 669 | + | |
| 670 | + printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n", | |
| 671 | + val < numTypes ? lmemTypes[val] : "???", val); | |
| 672 | + } | |
| 673 | + else | |
| 674 | + { | |
| 675 | + printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n", | |
| 676 | + cl_error_to_str(status)); | |
| 677 | + } | |
| 678 | + | |
| 679 | + for (ii = 0; hexProps[ii].name != NULL; ii++) | |
| 680 | + { | |
| 681 | + status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size); | |
| 682 | + if (status != CL_SUCCESS) | |
| 683 | + { | |
| 684 | + printf("Unable to get %s: %s!\n", | |
| 685 | + hexProps[ii].name, cl_error_to_str(status)); | |
| 686 | + continue; | |
| 687 | + } | |
| 688 | + if (size > sizeof val) | |
| 689 | + { | |
| 690 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
| 691 | + hexProps[ii].name, size, sizeof val); | |
| 692 | + } | |
| 693 | + printf("%s: 0x%llx\n", hexProps[ii].name, val); | |
| 694 | + } | |
| 695 | + printf("\n"); | |
| 696 | + | |
| 697 | + for (ii = 0; longProps[ii].name != NULL; ii++) | |
| 698 | + { | |
| 699 | + status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size); | |
| 700 | + if (status != CL_SUCCESS) | |
| 701 | + { | |
| 702 | + printf("Unable to get %s: %s!\n", | |
| 703 | + longProps[ii].name, cl_error_to_str(status)); | |
| 704 | + continue; | |
| 705 | + } | |
| 706 | + if (size > sizeof val) | |
| 707 | + { | |
| 708 | + printf("Large %s (%zd bytes)! Truncating to %ld!\n", | |
| 709 | + longProps[ii].name, size, sizeof val); | |
| 710 | + } | |
| 711 | + printf("%s: %lld\n", longProps[ii].name, val); | |
| 712 | + } | |
| 713 | + | |
| 714 | + { | |
| 715 | + size_t size; | |
| 716 | + CALL_CL_GUARDED(clGetDeviceInfo, | |
| 717 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size)); | |
| 718 | + | |
| 719 | + size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay! | |
| 720 | + | |
| 721 | + CALL_CL_GUARDED(clGetDeviceInfo, | |
| 722 | + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size)); | |
| 723 | + | |
| 724 | + printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie | |
| 725 | + for (size_t i = 0; i < size/sizeof(size_t); ++i) | |
| 726 | + printf("%zd ", res_vec[i]); | |
| 727 | + printf("\n"); | |
| 728 | + } | |
| 729 | + printf("---------------------------------------------------------------------\n"); | |
| 730 | +} | |
| 731 | + | |
| 732 | + | |
| 733 | + | |
| 734 | +void print_device_info_from_queue(cl_command_queue queue) | |
| 735 | +{ | |
| 736 | + cl_device_id dev; | |
| 737 | + CALL_CL_GUARDED(clGetCommandQueueInfo, | |
| 738 | + (queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL)); | |
| 739 | + | |
| 740 | + print_device_info(dev); | |
| 741 | +} | ... | ... |
cl-helper.h
0 → 100644
| 1 | +/* | |
| 2 | + * Copyright (c) 2010, 2012 Andreas Kloeckner | |
| 3 | + * | |
| 4 | + * Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 5 | + * of this software and associated documentation files (the "Software"), to deal | |
| 6 | + * in the Software without restriction, including without limitation the rights | |
| 7 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 8 | + * copies of the Software, and to permit persons to whom the Software is | |
| 9 | + * furnished to do so, subject to the following conditions: | |
| 10 | + * | |
| 11 | + * The above copyright notice and this permission notice shall be included in | |
| 12 | + * all copies or substantial portions of the Software. | |
| 13 | + * | |
| 14 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 15 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 16 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 17 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 18 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 19 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 20 | + * THE SOFTWARE. | |
| 21 | + */ | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | +#ifndef NYUHPC_CL_HELPER | |
| 27 | +#define NYUHPC_CL_HELPER | |
| 28 | + | |
| 29 | +#include <stdarg.h> | |
| 30 | +#include <stdio.h> | |
| 31 | +#include <stdlib.h> | |
| 32 | + | |
| 33 | +#ifdef __APPLE__ | |
| 34 | +#include <OpenCL/opencl.h> | |
| 35 | +#else | |
| 36 | +#include <CL/cl.h> | |
| 37 | +#endif | |
| 38 | + | |
| 39 | +/* An error check macro for OpenCL. | |
| 40 | + * | |
| 41 | + * Usage: | |
| 42 | + * CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name") | |
| 43 | + * | |
| 44 | + * It will abort with a message if an error occurred. | |
| 45 | + */ | |
| 46 | + | |
| 47 | +#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \ | |
| 48 | + if ((STATUS_CODE) != CL_SUCCESS) \ | |
| 49 | + { \ | |
| 50 | + fprintf(stderr, \ | |
| 51 | + "*** '%s' in '%s' on line %d failed with error '%s'.\n", \ | |
| 52 | + WHAT, __FILE__, __LINE__, \ | |
| 53 | + cl_error_to_str(STATUS_CODE)); \ | |
| 54 | + abort(); \ | |
| 55 | + } | |
| 56 | + | |
| 57 | +/* A more automated error check macro for OpenCL, for use with clXxxx | |
| 58 | + * functions that return status codes. (Not all of them do, notably | |
| 59 | + * clCreateXxx do not.) | |
| 60 | + * | |
| 61 | + * Usage: | |
| 62 | + * CALL_CL_GUARDED(clFunction, (arg1, arg2)); | |
| 63 | + * | |
| 64 | + * Note the slightly strange comma between the function name and the | |
| 65 | + * argument list. | |
| 66 | + */ | |
| 67 | + | |
| 68 | +#define CALL_CL_GUARDED(NAME, ARGLIST) \ | |
| 69 | + { \ | |
| 70 | + cl_int status_code; \ | |
| 71 | + status_code = NAME ARGLIST; \ | |
| 72 | + CHECK_CL_ERROR(status_code, #NAME); \ | |
| 73 | + } | |
| 74 | + | |
| 75 | +/* An error check macro for Unix system functions. If "COND" is true, then the | |
| 76 | + * last system error ("errno") is printed along with MSG, which is supposed to | |
| 77 | + * be a string describing what you were doing. | |
| 78 | + * | |
| 79 | + * Example: | |
| 80 | + * CHECK_SYS_ERROR(dave != 0, "opening hatch"); | |
| 81 | + */ | |
| 82 | +#define CHECK_SYS_ERROR(COND, MSG) \ | |
| 83 | + if (COND) \ | |
| 84 | + { \ | |
| 85 | + perror(MSG); \ | |
| 86 | + abort(); \ | |
| 87 | + } | |
| 88 | + | |
| 89 | +/* Return a string describing the OpenCL error code 'e'. | |
| 90 | + */ | |
| 91 | +const char *cl_error_to_str(cl_int e); | |
| 92 | + | |
| 93 | +/* Print a list of available OpenCL platforms and devices | |
| 94 | + * to standard output. | |
| 95 | + */ | |
| 96 | +void print_platforms_devices(); | |
| 97 | + | |
| 98 | +/* Create an OpenCL context and a matching command queue on a platform from a | |
| 99 | + * vendor whose name contains 'plat_name' on a device whose name contains | |
| 100 | + * 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no | |
| 101 | + * preference in the matter. | |
| 102 | + * | |
| 103 | + * If multiple devices match both 'plat_name' and 'dev_name', then 'idx' | |
| 104 | + * prescribes the number of the device that should be chosen. | |
| 105 | + * | |
| 106 | + * You may also use the special value CHOOSE_INTERACTIVELY to offer the user | |
| 107 | + * a choice. You should use this value for code you turn in. | |
| 108 | + * | |
| 109 | + * This function always succeeds. (If an error occurs, the program | |
| 110 | + * is aborted. | |
| 111 | + * | |
| 112 | + * You can force interactive querying by defining the | |
| 113 | + * CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c. | |
| 114 | + * You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE | |
| 115 | + * compiler option. | |
| 116 | + */ | |
| 117 | +extern const char *CHOOSE_INTERACTIVELY; | |
| 118 | +void create_context_on(const char *plat_name, const char*dev_name, cl_uint | |
| 119 | + idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling); | |
| 120 | + | |
| 121 | +/* Read contents of file 'filename'. | |
| 122 | + * Return as a new string. You must free the string when you're done with it. | |
| 123 | + * | |
| 124 | + * This function always succeeds. (If an error occurs, the program | |
| 125 | + * is aborted. | |
| 126 | + */ | |
| 127 | +char *read_file(const char *filename); | |
| 128 | + | |
| 129 | +/* Create a new OpenCL kernel from the code in the string 'knl'. | |
| 130 | + * 'knl_name' is the name of the kernel function, and 'options', | |
| 131 | + * if not NULL, is a string containing compiler flags. | |
| 132 | + * | |
| 133 | + * You must release the resulting kernel when you're done | |
| 134 | + * with it. | |
| 135 | + * | |
| 136 | + * This function always succeeds. (If an error occurs, the program | |
| 137 | + * is aborted. | |
| 138 | + */ | |
| 139 | +cl_kernel kernel_from_string(cl_context ctx, | |
| 140 | + char const *knl, char const *knl_name, char const *options); | |
| 141 | + | |
| 142 | +/* Print information about a device, found from either the | |
| 143 | + * queue or the device_id. | |
| 144 | + */ | |
| 145 | +void print_device_info(cl_device_id device); | |
| 146 | +void print_device_info_from_queue(cl_command_queue queue); | |
| 147 | + | |
| 148 | +#define SET_1_KERNEL_ARG(knl, arg0) \ | |
| 149 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); | |
| 150 | + | |
| 151 | +#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \ | |
| 152 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 153 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); | |
| 154 | + | |
| 155 | +#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \ | |
| 156 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 157 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 158 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); | |
| 159 | + | |
| 160 | +#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \ | |
| 161 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 162 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 163 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 164 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); | |
| 165 | + | |
| 166 | +#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \ | |
| 167 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 168 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 169 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 170 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 171 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); | |
| 172 | + | |
| 173 | +#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \ | |
| 174 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 175 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 176 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 177 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 178 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 179 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); | |
| 180 | + | |
| 181 | +#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ | |
| 182 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 183 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 184 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 185 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 186 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 187 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 188 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); | |
| 189 | + | |
| 190 | +#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ | |
| 191 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 192 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 193 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 194 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 195 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 196 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 197 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
| 198 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); | |
| 199 | + | |
| 200 | +#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \ | |
| 201 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 202 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 203 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 204 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 205 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 206 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 207 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
| 208 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
| 209 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); | |
| 210 | + | |
| 211 | +#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \ | |
| 212 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 213 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 214 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 215 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 216 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 217 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 218 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
| 219 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
| 220 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
| 221 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); | |
| 222 | + | |
| 223 | +#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \ | |
| 224 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 225 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 226 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 227 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 228 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 229 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 230 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
| 231 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
| 232 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
| 233 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | |
| 234 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); | |
| 235 | + | |
| 236 | +#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \ | |
| 237 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \ | |
| 238 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \ | |
| 239 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \ | |
| 240 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \ | |
| 241 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \ | |
| 242 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \ | |
| 243 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \ | |
| 244 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \ | |
| 245 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \ | |
| 246 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \ | |
| 247 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \ | |
| 248 | + CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11)); | |
| 249 | + | |
| 250 | +#endif | ... | ... |
print-devices.c
0 → 100644
set-governor
0 → 100755
show-clock-freq
0 → 100755
timing.h
0 → 100644
| 1 | +#ifdef __APPLE__ | |
| 2 | + | |
| 3 | +#include <sys/time.h> | |
| 4 | + | |
| 5 | +typedef struct timeval timestamp_type; | |
| 6 | + | |
| 7 | +static void get_timestamp(timestamp_type *t) | |
| 8 | +{ | |
| 9 | + gettimeofday(t, NULL); | |
| 10 | +} | |
| 11 | + | |
| 12 | +static double timestamp_diff_in_seconds(timestamp_type start, | |
| 13 | +timestamp_type end) | |
| 14 | +{ | |
| 15 | + /* Perform the carry for the later subtraction by updating start. */ | |
| 16 | + if (end.tv_usec < start.tv_usec) { | |
| 17 | + int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1; | |
| 18 | + start.tv_usec -= 1000000 * nsec; | |
| 19 | + start.tv_sec += nsec; | |
| 20 | + } | |
| 21 | + if (end.tv_usec - start.tv_usec > 1000000) { | |
| 22 | + int nsec = (end.tv_usec - start.tv_usec) / 1000000; | |
| 23 | + start.tv_usec += 1000000 * nsec; | |
| 24 | + start.tv_sec -= nsec; | |
| 25 | + } | |
| 26 | + | |
| 27 | + return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6; | |
| 28 | +} | |
| 29 | + | |
| 30 | +#else | |
| 31 | + | |
| 32 | +#include <time.h> | |
| 33 | + | |
| 34 | +typedef struct timespec timestamp_type; | |
| 35 | + | |
| 36 | +static void get_timestamp(timestamp_type *t) | |
| 37 | +{ | |
| 38 | + clock_gettime(CLOCK_REALTIME, t); | |
| 39 | +} | |
| 40 | + | |
| 41 | +static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end) | |
| 42 | +{ | |
| 43 | + struct timespec temp; | |
| 44 | + if ((end.tv_nsec-start.tv_nsec)<0) { | |
| 45 | + temp.tv_sec = end.tv_sec-start.tv_sec-1; | |
| 46 | + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; | |
| 47 | + } else { | |
| 48 | + temp.tv_sec = end.tv_sec-start.tv_sec; | |
| 49 | + temp.tv_nsec = end.tv_nsec-start.tv_nsec; | |
| 50 | + } | |
| 51 | + return temp.tv_sec + 1e-9*temp.tv_nsec; | |
| 52 | +} | |
| 53 | + | |
| 54 | +#endif | ... | ... |
Please
register
or
login
to post a comment