Commit c3bddbc632d53fb3488b6b4a6694f5402f7caf77

Authored by Georg Hopp
0 parents

initial commit

  1 +.*.sw[op]
  2 +*~
  3 +a.out
  4 +print-devices
  5 +cl-demo
  6 +*.o
... ...
  1 +EXECUTABLES = cl-demo print-devices
  2 +
  3 +all: $(EXECUTABLES)
  4 +
  5 +print-devices: print-devices.c cl-helper.c
  6 + gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL
  7 +
  8 +cl-demo: cl-demo.c cl-helper.c
  9 + gcc -std=gnu99 -o$@ $^ -lrt -lOpenCL
  10 +
  11 +clean:
  12 + @rm -f $(EXECUTABLES) *.o
... ...
  1 +# OpenCL Howto
  2 +
  3 +Code snippets taken from
  4 +[OpenCLHowto](https://wiki.tiker.net/OpenCLHowTo)
  5 +
  6 +## Description
  7 +
  8 +This is just some more playing around with OpenCL and try to learn a bit about
  9 +it.
  10 +
  11 +## Requirements
  12 +
  13 +Some OpenCL capable hardware and the according OpenCL library exposing the
  14 +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT
  15 +Integrated Graphics Controller (rev 09)) with the
  16 +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/)
  17 +open source library.
  18 +
  19 +## License
  20 +
  21 +MIT License
  22 +
  23 +> Permission is hereby granted, free of charge, to any person obtaining a copy
  24 +> of this software and associated documentation files (the "Software"), to
  25 +> deal in the Software without restriction, including without limitation the
  26 +> rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  27 +> sell copies of the Software, and to permit persons to whom the Software is
  28 +> furnished to do so, subject to the following conditions:
  29 +>
  30 +> The above copyright notice and this permission notice shall be included in
  31 +> all copies or substantial portions of the Software.
  32 +>
  33 +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  34 +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  35 +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  36 +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  37 +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  38 +> FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  39 +> IN THE SOFTWARE.
... ...
  1 +#include "timing.h"
  2 +#include "cl-helper.h"
  3 +
  4 +
  5 +
  6 +
  7 +int main(int argc, char **argv)
  8 +{
  9 + if (argc != 3)
  10 + {
  11 + fprintf(stderr, "need two arguments!\n");
  12 + abort();
  13 + }
  14 +
  15 + const cl_long n = atol(argv[1]);
  16 + const int ntrips = atoi(argv[2]);
  17 +
  18 + cl_context ctx;
  19 + cl_command_queue queue;
  20 + create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0);
  21 +
  22 + print_device_info_from_queue(queue);
  23 +
  24 + // --------------------------------------------------------------------------
  25 + // load kernels
  26 + // --------------------------------------------------------------------------
  27 + char *knl_text = read_file("vec-add-soln.cl");
  28 + cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL);
  29 + free(knl_text);
  30 +
  31 + // --------------------------------------------------------------------------
  32 + // allocate and initialize CPU memory
  33 + // --------------------------------------------------------------------------
  34 + float *a = (float *) malloc(sizeof(float) * n);
  35 + if (!a) { perror("alloc x"); abort(); }
  36 + float *b = (float *) malloc(sizeof(float) * n);
  37 + if (!b) { perror("alloc y"); abort(); }
  38 + float *c = (float *) malloc(sizeof(float) * n);
  39 + if (!c) { perror("alloc z"); abort(); }
  40 +
  41 + for (size_t i = 0; i < n; ++i)
  42 + {
  43 + a[i] = i;
  44 + b[i] = 2*i;
  45 + }
  46 +
  47 + // --------------------------------------------------------------------------
  48 + // allocate device memory
  49 + // --------------------------------------------------------------------------
  50 + cl_int status;
  51 + cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
  52 + sizeof(float) * n, 0, &status);
  53 + CHECK_CL_ERROR(status, "clCreateBuffer");
  54 +
  55 + cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
  56 + sizeof(float) * n, 0, &status);
  57 + CHECK_CL_ERROR(status, "clCreateBuffer");
  58 +
  59 + cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
  60 + sizeof(float) * n, 0, &status);
  61 + CHECK_CL_ERROR(status, "clCreateBuffer");
  62 +
  63 + // --------------------------------------------------------------------------
  64 + // transfer to device
  65 + // --------------------------------------------------------------------------
  66 + CALL_CL_GUARDED(clEnqueueWriteBuffer, (
  67 + queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0,
  68 + n * sizeof(float), a,
  69 + 0, NULL, NULL));
  70 +
  71 + CALL_CL_GUARDED(clEnqueueWriteBuffer, (
  72 + queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0,
  73 + n * sizeof(float), b,
  74 + 0, NULL, NULL));
  75 +
  76 + // --------------------------------------------------------------------------
  77 + // run code on device
  78 + // --------------------------------------------------------------------------
  79 +
  80 + CALL_CL_GUARDED(clFinish, (queue));
  81 +
  82 + timestamp_type time1, time2;
  83 + get_timestamp(&time1);
  84 +
  85 + for (int trip = 0; trip < ntrips; ++trip)
  86 + {
  87 + SET_4_KERNEL_ARGS(knl, buf_a, buf_b, buf_c, n);
  88 + size_t ldim[] = { 32 };
  89 + size_t gdim[] = { ((n + ldim[0] - 1)/ldim[0])*ldim[0] };
  90 + CALL_CL_GUARDED(clEnqueueNDRangeKernel,
  91 + (queue, knl,
  92 + /*dimensions*/ 1, NULL, gdim, ldim,
  93 + 0, NULL, NULL));
  94 + }
  95 +
  96 + CALL_CL_GUARDED(clFinish, (queue));
  97 +
  98 + get_timestamp(&time2);
  99 + double elapsed = timestamp_diff_in_seconds(time1,time2)/ntrips;
  100 + printf("%f s\n", elapsed);
  101 + printf("%f GB/s\n",
  102 + 3*n*sizeof(float)/1e9/elapsed);
  103 +
  104 + // --------------------------------------------------------------------------
  105 + // transfer back & check
  106 + // --------------------------------------------------------------------------
  107 + CALL_CL_GUARDED(clEnqueueReadBuffer, (
  108 + queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0,
  109 + n * sizeof(float), c,
  110 + 0, NULL, NULL));
  111 +
  112 + for (size_t i = 0; i < n; ++i)
  113 + if (c[i] != 3*i)
  114 + {
  115 + printf("BAD %ld %f %f!\n", i, c[i], c[i] - 3*i);
  116 + abort();
  117 + }
  118 + puts("GOOD");
  119 +
  120 + // --------------------------------------------------------------------------
  121 + // clean up
  122 + // --------------------------------------------------------------------------
  123 + CALL_CL_GUARDED(clReleaseMemObject, (buf_a));
  124 + CALL_CL_GUARDED(clReleaseMemObject, (buf_b));
  125 + CALL_CL_GUARDED(clReleaseMemObject, (buf_c));
  126 + CALL_CL_GUARDED(clReleaseKernel, (knl));
  127 + CALL_CL_GUARDED(clReleaseCommandQueue, (queue));
  128 + CALL_CL_GUARDED(clReleaseContext, (ctx));
  129 +
  130 + return 0;
  131 +}
... ...
  1 +/*
  2 + * Copyright (c) 2010 Andreas Kloeckner
  3 + *
  4 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 + * of this software and associated documentation files (the "Software"), to deal
  6 + * in the Software without restriction, including without limitation the rights
  7 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 + * copies of the Software, and to permit persons to whom the Software is
  9 + * furnished to do so, subject to the following conditions:
  10 + *
  11 + * The above copyright notice and this permission notice shall be included in
  12 + * all copies or substantial portions of the Software.
  13 + *
  14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20 + * THE SOFTWARE.
  21 + */
  22 +
  23 +
  24 +
  25 +
  26 +#include "cl-helper.h"
  27 +#include <string.h>
  28 +#include <stdbool.h>
  29 +
  30 +
  31 +
  32 +
  33 +#define MAX_NAME_LEN 1000
  34 +
  35 +
  36 +
  37 +
  38 +const char *cl_error_to_str(cl_int e)
  39 +{
  40 + switch (e)
  41 + {
  42 + case CL_SUCCESS: return "success";
  43 + case CL_DEVICE_NOT_FOUND: return "device not found";
  44 + case CL_DEVICE_NOT_AVAILABLE: return "device not available";
  45 +#if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001)
  46 + case CL_COMPILER_NOT_AVAILABLE: return "device compiler not available";
  47 +#endif
  48 + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "mem object allocation failure";
  49 + case CL_OUT_OF_RESOURCES: return "out of resources";
  50 + case CL_OUT_OF_HOST_MEMORY: return "out of host memory";
  51 + case CL_PROFILING_INFO_NOT_AVAILABLE: return "profiling info not available";
  52 + case CL_MEM_COPY_OVERLAP: return "mem copy overlap";
  53 + case CL_IMAGE_FORMAT_MISMATCH: return "image format mismatch";
  54 + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "image format not supported";
  55 + case CL_BUILD_PROGRAM_FAILURE: return "build program failure";
  56 + case CL_MAP_FAILURE: return "map failure";
  57 +
  58 + case CL_INVALID_VALUE: return "invalid value";
  59 + case CL_INVALID_DEVICE_TYPE: return "invalid device type";
  60 + case CL_INVALID_PLATFORM: return "invalid platform";
  61 + case CL_INVALID_DEVICE: return "invalid device";
  62 + case CL_INVALID_CONTEXT: return "invalid context";
  63 + case CL_INVALID_QUEUE_PROPERTIES: return "invalid queue properties";
  64 + case CL_INVALID_COMMAND_QUEUE: return "invalid command queue";
  65 + case CL_INVALID_HOST_PTR: return "invalid host ptr";
  66 + case CL_INVALID_MEM_OBJECT: return "invalid mem object";
  67 + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "invalid image format descriptor";
  68 + case CL_INVALID_IMAGE_SIZE: return "invalid image size";
  69 + case CL_INVALID_SAMPLER: return "invalid sampler";
  70 + case CL_INVALID_BINARY: return "invalid binary";
  71 + case CL_INVALID_BUILD_OPTIONS: return "invalid build options";
  72 + case CL_INVALID_PROGRAM: return "invalid program";
  73 + case CL_INVALID_PROGRAM_EXECUTABLE: return "invalid program executable";
  74 + case CL_INVALID_KERNEL_NAME: return "invalid kernel name";
  75 + case CL_INVALID_KERNEL_DEFINITION: return "invalid kernel definition";
  76 + case CL_INVALID_KERNEL: return "invalid kernel";
  77 + case CL_INVALID_ARG_INDEX: return "invalid arg index";
  78 + case CL_INVALID_ARG_VALUE: return "invalid arg value";
  79 + case CL_INVALID_ARG_SIZE: return "invalid arg size";
  80 + case CL_INVALID_KERNEL_ARGS: return "invalid kernel args";
  81 + case CL_INVALID_WORK_DIMENSION: return "invalid work dimension";
  82 + case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size";
  83 + case CL_INVALID_WORK_ITEM_SIZE: return "invalid work item size";
  84 + case CL_INVALID_GLOBAL_OFFSET: return "invalid global offset";
  85 + case CL_INVALID_EVENT_WAIT_LIST: return "invalid event wait list";
  86 + case CL_INVALID_EVENT: return "invalid event";
  87 + case CL_INVALID_OPERATION: return "invalid operation";
  88 + case CL_INVALID_GL_OBJECT: return "invalid gl object";
  89 + case CL_INVALID_BUFFER_SIZE: return "invalid buffer size";
  90 + case CL_INVALID_MIP_LEVEL: return "invalid mip level";
  91 +
  92 +#if defined(cl_khr_gl_sharing) && (cl_khr_gl_sharing >= 1)
  93 + case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: return "invalid gl sharegroup reference number";
  94 +#endif
  95 +
  96 +#ifdef CL_VERSION_1_1
  97 + case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "misaligned sub-buffer offset";
  98 + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "exec status error for events in wait list";
  99 + case CL_INVALID_GLOBAL_WORK_SIZE: return "invalid global work size";
  100 +#endif
  101 +
  102 + default: return "invalid/unknown error code";
  103 + }
  104 +}
  105 +
  106 +
  107 +
  108 +
  109 +void print_platforms_devices()
  110 +{
  111 + // get number of platforms
  112 + cl_uint plat_count;
  113 + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));
  114 +
  115 + // allocate memory, get list of platforms
  116 + cl_platform_id *platforms =
  117 + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  118 + CHECK_SYS_ERROR(!platforms, "allocating platform array");
  119 +
  120 + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
  121 +
  122 + // iterate over platforms
  123 + for (cl_uint i = 0; i < plat_count; ++i)
  124 + {
  125 + // get platform vendor name
  126 + char buf[MAX_NAME_LEN];
  127 + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
  128 + sizeof(buf), buf, NULL));
  129 + printf("platform %d: vendor '%s'\n", i, buf);
  130 +
  131 + // get number of devices in platform
  132 + cl_uint dev_count;
  133 + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
  134 + 0, NULL, &dev_count));
  135 +
  136 + cl_device_id *devices =
  137 + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
  138 + CHECK_SYS_ERROR(!devices, "allocating device array");
  139 +
  140 + // get list of devices in platform
  141 + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
  142 + dev_count, devices, NULL));
  143 +
  144 + // iterate over devices
  145 + for (cl_uint j = 0; j < dev_count; ++j)
  146 + {
  147 + char buf[MAX_NAME_LEN];
  148 + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
  149 + sizeof(buf), buf, NULL));
  150 + printf(" device %d: '%s'\n", j, buf);
  151 + }
  152 +
  153 + free(devices);
  154 + }
  155 +
  156 + free(platforms);
  157 +}
  158 +
  159 +
  160 +
  161 +
  162 +/* Read a line from stdin. C makes things simple. :)
  163 + * From http://stackoverflow.com/a/314422/1148634
  164 + */
  165 +char *read_a_line(void)
  166 +{
  167 + char * line = (char *) malloc(MAX_NAME_LEN), * linep = line;
  168 + size_t lenmax = MAX_NAME_LEN, len = lenmax;
  169 + int c;
  170 +
  171 + if(line == NULL)
  172 + return NULL;
  173 +
  174 + for(;;)
  175 + {
  176 + c = fgetc(stdin);
  177 + if(c == EOF)
  178 + break;
  179 +
  180 + if(--len == 0)
  181 + {
  182 + char *linen = (char *) realloc(linep, lenmax *= 2);
  183 + len = lenmax;
  184 +
  185 + if(linen == NULL)
  186 + {
  187 + free(linep);
  188 + return NULL;
  189 + }
  190 + line = linen + (line - linep);
  191 + linep = linen;
  192 + }
  193 +
  194 + if((*line++ = c) == '\n')
  195 + break;
  196 + }
  197 + *line = '\0';
  198 + return linep;
  199 +}
  200 +
  201 +
  202 +
  203 +
  204 +const char *CHOOSE_INTERACTIVELY = "INTERACTIVE";
  205 +
  206 +
  207 +#define MIN(a,b) (((a)<(b))?(a):(b))
  208 +#define MAX(a,b) (((a)>(b))?(a):(b))
  209 +
  210 +void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx,
  211 + cl_context *ctx, cl_command_queue *queue, int enable_profiling)
  212 +{
  213 + char dev_sel_buf[MAX_NAME_LEN];
  214 + char platform_sel_buf[MAX_NAME_LEN];
  215 +
  216 + // get number of platforms
  217 + cl_uint plat_count;
  218 + CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count));
  219 +
  220 + // allocate memory, get list of platform handles
  221 + cl_platform_id *platforms =
  222 + (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id));
  223 + CHECK_SYS_ERROR(!platforms, "allocating platform array");
  224 + CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL));
  225 +
  226 + // print menu, if requested
  227 +#ifndef CL_HELPER_FORCE_INTERACTIVE
  228 + if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
  229 +#endif
  230 + {
  231 + puts("Choose platform:");
  232 + for (cl_uint i = 0; i < plat_count; ++i)
  233 + {
  234 + char buf[MAX_NAME_LEN];
  235 + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
  236 + sizeof(buf), buf, NULL));
  237 + printf("[%d] %s\n", i, buf);
  238 + }
  239 +
  240 + printf("Enter choice: ");
  241 + fflush(stdout);
  242 +
  243 + char *sel = read_a_line();
  244 + if (!sel)
  245 + {
  246 + fprintf(stderr, "error reading line from stdin");
  247 + abort();
  248 + }
  249 +
  250 + int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1);
  251 + free(sel);
  252 +
  253 + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR,
  254 + sizeof(platform_sel_buf), platform_sel_buf, NULL));
  255 + plat_name = platform_sel_buf;
  256 + }
  257 +
  258 + // iterate over platforms
  259 + for (cl_uint i = 0; i < plat_count; ++i)
  260 + {
  261 + // get platform name
  262 + char buf[MAX_NAME_LEN];
  263 + CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR,
  264 + sizeof(buf), buf, NULL));
  265 +
  266 + // does it match?
  267 + if (!plat_name || strstr(buf, plat_name))
  268 + {
  269 + // get number of devices in platform
  270 + cl_uint dev_count;
  271 + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
  272 + 0, NULL, &dev_count));
  273 +
  274 + // allocate memory, get list of device handles in platform
  275 + cl_device_id *devices =
  276 + (cl_device_id *) malloc(dev_count*sizeof(cl_device_id));
  277 + CHECK_SYS_ERROR(!devices, "allocating device array");
  278 +
  279 + CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL,
  280 + dev_count, devices, NULL));
  281 +
  282 + // {{{ print device menu, if requested
  283 +#ifndef CL_HELPER_FORCE_INTERACTIVE
  284 + if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer
  285 +#endif
  286 + {
  287 + puts("Choose device:");
  288 + for (cl_uint j = 0; j < dev_count; ++j)
  289 + {
  290 + char buf[MAX_NAME_LEN];
  291 + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
  292 + sizeof(buf), buf, NULL));
  293 + printf("[%d] %s\n", j, buf);
  294 + }
  295 +
  296 + printf("Enter choice: ");
  297 + fflush(stdout);
  298 +
  299 + char *sel = read_a_line();
  300 + if (!sel)
  301 + {
  302 + fprintf(stderr, "error reading line from stdin");
  303 + abort();
  304 + }
  305 +
  306 + int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1);
  307 + free(sel);
  308 +
  309 + CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME,
  310 + sizeof(dev_sel_buf), dev_sel_buf, NULL));
  311 + dev_name = dev_sel_buf;
  312 + }
  313 +
  314 + // }}}
  315 +
  316 + // iterate over devices
  317 + for (cl_uint j = 0; j < dev_count; ++j)
  318 + {
  319 + // get device name
  320 + char buf[MAX_NAME_LEN];
  321 + CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME,
  322 + sizeof(buf), buf, NULL));
  323 +
  324 + // does it match?
  325 + if (!dev_name || strstr(buf, dev_name))
  326 + {
  327 + if (idx == 0)
  328 + {
  329 + cl_platform_id plat = platforms[i];
  330 + cl_device_id dev = devices[j];
  331 +
  332 + free(devices);
  333 + free(platforms);
  334 +
  335 + // create a context
  336 + cl_context_properties cps[3] = {
  337 + CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 };
  338 +
  339 + cl_int status;
  340 + *ctx = clCreateContext(
  341 + cps, 1, &dev, NULL, NULL, &status);
  342 + CHECK_CL_ERROR(status, "clCreateContext");
  343 +
  344 + // create a command queue
  345 + cl_command_queue_properties qprops = 0;
  346 + if (enable_profiling)
  347 + qprops |= CL_QUEUE_PROFILING_ENABLE;
  348 +
  349 + if (queue)
  350 + {
  351 + *queue = clCreateCommandQueue(*ctx, dev, qprops, &status);
  352 + CHECK_CL_ERROR(status, "clCreateCommandQueue");
  353 + }
  354 +
  355 + return;
  356 + }
  357 + else
  358 + --idx;
  359 + }
  360 + }
  361 +
  362 + free(devices);
  363 + }
  364 + }
  365 +
  366 + free(platforms);
  367 +
  368 + fputs("create_context_on: specified device not found.\n", stderr);
  369 + abort();
  370 +}
  371 +
  372 +
  373 +
  374 +
  375 +char *read_file(const char *filename)
  376 +{
  377 + FILE *f = fopen(filename, "r");
  378 + CHECK_SYS_ERROR(!f, "read_file: opening file");
  379 +
  380 + // figure out file size
  381 + CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end");
  382 + size_t size = ftell(f);
  383 +
  384 + CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0,
  385 + "read_file: seeking to start");
  386 +
  387 + // allocate memory, slurp in entire file
  388 + char *result = (char *) malloc(size+1);
  389 + CHECK_SYS_ERROR(!result, "read_file: allocating file contents");
  390 + CHECK_SYS_ERROR(fread(result, 1, size, f) < size,
  391 + "read_file: reading file contents");
  392 +
  393 + // close, return
  394 + CHECK_SYS_ERROR(fclose(f), "read_file: closing file");
  395 + result[size] = '\0';
  396 +
  397 + return result;
  398 +}
  399 +
  400 +
  401 +
  402 +
  403 +static int printed_compiler_output_message = 0;
  404 +
  405 +cl_kernel kernel_from_string(cl_context ctx,
  406 + char const *knl, char const *knl_name, char const *options)
  407 +{
  408 + // create an OpenCL program (may have multiple kernels)
  409 + size_t sizes[] = { strlen(knl) };
  410 +
  411 + if (options && strlen(options) == 0)
  412 + {
  413 + // reportedly, some implementations dislike empty strings.
  414 + options = NULL;
  415 + }
  416 +
  417 + cl_int status;
  418 + cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status);
  419 + CHECK_CL_ERROR(status, "clCreateProgramWithSource");
  420 +
  421 + // build it
  422 + status = clBuildProgram(program, 0, NULL, options, NULL, NULL);
  423 +
  424 + {
  425 + // get build log and print it
  426 +
  427 + cl_device_id dev;
  428 + CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES,
  429 + sizeof(dev), &dev, NULL));
  430 +
  431 + size_t log_size;
  432 + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
  433 + 0, NULL, &log_size));
  434 +
  435 + bool do_print = status != CL_SUCCESS;
  436 + if (!do_print && log_size)
  437 + {
  438 + if (getenv("CL_HELPER_PRINT_COMPILER_OUTPUT"))
  439 + do_print = true;
  440 + else
  441 + {
  442 + if (!printed_compiler_output_message && !getenv("CL_HELPER_NO_COMPILER_OUTPUT_NAG"))
  443 + {
  444 + fprintf(stderr, "*** Kernel compilation resulted in non-empty log message.\n"
  445 + "*** Set environment variable CL_HELPER_PRINT_COMPILER_OUTPUT=1 to see more.\n"
  446 + "*** NOTE: this may include compiler warnings and other important messages\n"
  447 + "*** about your code.\n"
  448 + "*** Set CL_HELPER_NO_COMPILER_OUTPUT_NAG=1 to disable this message.\n");
  449 + printed_compiler_output_message = true;
  450 + }
  451 + }
  452 + }
  453 +
  454 + if (do_print)
  455 + {
  456 + char *log = (char *) malloc(log_size);
  457 + CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log");
  458 +
  459 + char devname[MAX_NAME_LEN];
  460 + CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME,
  461 + sizeof(devname), devname, NULL));
  462 +
  463 + CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG,
  464 + log_size, log, NULL));
  465 + fprintf(stderr, "*** build of '%s' on '%s' said:\n%s\n*** (end of message)\n",
  466 + knl_name, devname, log);
  467 + }
  468 + }
  469 +
  470 + CHECK_CL_ERROR(status, "clBuildProgram");
  471 +
  472 + // fish the kernel out of the program
  473 + cl_kernel kernel = clCreateKernel(program, knl_name, &status);
  474 + CHECK_CL_ERROR(status, "clCreateKernel");
  475 +
  476 + CALL_CL_GUARDED(clReleaseProgram, (program));
  477 +
  478 + return kernel;
  479 +}
  480 +
  481 +
  482 +
  483 +
  484 +void print_device_info(cl_device_id device)
  485 +{
  486 + // adapted from http://graphics.stanford.edu/~yoel/notes/clInfo.c
  487 +
  488 +#define LONG_PROPS \
  489 + defn(VENDOR_ID), \
  490 + defn(MAX_COMPUTE_UNITS), \
  491 + defn(MAX_WORK_ITEM_DIMENSIONS), \
  492 + defn(MAX_WORK_GROUP_SIZE), \
  493 + defn(PREFERRED_VECTOR_WIDTH_CHAR), \
  494 + defn(PREFERRED_VECTOR_WIDTH_SHORT), \
  495 + defn(PREFERRED_VECTOR_WIDTH_INT), \
  496 + defn(PREFERRED_VECTOR_WIDTH_LONG), \
  497 + defn(PREFERRED_VECTOR_WIDTH_FLOAT), \
  498 + defn(PREFERRED_VECTOR_WIDTH_DOUBLE), \
  499 + defn(MAX_CLOCK_FREQUENCY), \
  500 + defn(ADDRESS_BITS), \
  501 + defn(MAX_MEM_ALLOC_SIZE), \
  502 + defn(IMAGE_SUPPORT), \
  503 + defn(MAX_READ_IMAGE_ARGS), \
  504 + defn(MAX_WRITE_IMAGE_ARGS), \
  505 + defn(IMAGE2D_MAX_WIDTH), \
  506 + defn(IMAGE2D_MAX_HEIGHT), \
  507 + defn(IMAGE3D_MAX_WIDTH), \
  508 + defn(IMAGE3D_MAX_HEIGHT), \
  509 + defn(IMAGE3D_MAX_DEPTH), \
  510 + defn(MAX_SAMPLERS), \
  511 + defn(MAX_PARAMETER_SIZE), \
  512 + defn(MEM_BASE_ADDR_ALIGN), \
  513 + defn(MIN_DATA_TYPE_ALIGN_SIZE), \
  514 + defn(GLOBAL_MEM_CACHELINE_SIZE), \
  515 + defn(GLOBAL_MEM_CACHE_SIZE), \
  516 + defn(GLOBAL_MEM_SIZE), \
  517 + defn(MAX_CONSTANT_BUFFER_SIZE), \
  518 + defn(MAX_CONSTANT_ARGS), \
  519 + defn(LOCAL_MEM_SIZE), \
  520 + defn(ERROR_CORRECTION_SUPPORT), \
  521 + defn(PROFILING_TIMER_RESOLUTION), \
  522 + defn(ENDIAN_LITTLE), \
  523 + defn(AVAILABLE), \
  524 + defn(COMPILER_AVAILABLE),
  525 +
  526 +#define STR_PROPS \
  527 + defn(NAME), \
  528 + defn(VENDOR), \
  529 + defn(PROFILE), \
  530 + defn(VERSION), \
  531 + defn(EXTENSIONS),
  532 +
  533 +#define HEX_PROPS \
  534 + defn(SINGLE_FP_CONFIG), \
  535 + defn(QUEUE_PROPERTIES),
  536 +
  537 +
  538 + printf("---------------------------------------------------------------------\n");
  539 +
  540 +
  541 + static struct { cl_device_info param; const char *name; } longProps[] = {
  542 +#define defn(X) { CL_DEVICE_##X, #X }
  543 + LONG_PROPS
  544 +#undef defn
  545 + { 0, NULL },
  546 + };
  547 + static struct { cl_device_info param; const char *name; } hexProps[] = {
  548 +#define defn(X) { CL_DEVICE_##X, #X }
  549 + HEX_PROPS
  550 +#undef defn
  551 + { 0, NULL },
  552 + };
  553 + static struct { cl_device_info param; const char *name; } strProps[] = {
  554 +#define defn(X) { CL_DEVICE_##X, #X }
  555 + STR_PROPS
  556 +#undef defn
  557 + { CL_DRIVER_VERSION, "DRIVER_VERSION" },
  558 + { 0, NULL },
  559 + };
  560 + cl_int status;
  561 + size_t size;
  562 + char buf[65536];
  563 + long long val; /* Avoids unpleasant surprises for some params */
  564 + int ii;
  565 +
  566 + for (ii = 0; strProps[ii].name != NULL; ii++)
  567 + {
  568 + status = clGetDeviceInfo(device, strProps[ii].param, sizeof buf, buf, &size);
  569 + if (status != CL_SUCCESS)
  570 + {
  571 + printf("Unable to get %s: %s!\n",
  572 + strProps[ii].name, cl_error_to_str(status));
  573 + continue;
  574 + }
  575 + if (size > sizeof buf)
  576 + {
  577 + printf("Large %s (%zd bytes)! Truncating to %ld!\n",
  578 + strProps[ii].name, size, sizeof buf);
  579 + }
  580 + printf("%s: %s\n",
  581 + strProps[ii].name, buf);
  582 + }
  583 + printf("\n");
  584 +
  585 + status = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof val, &val, NULL);
  586 + if (status == CL_SUCCESS)
  587 + {
  588 + printf("Type: ");
  589 + if (val & CL_DEVICE_TYPE_DEFAULT)
  590 + {
  591 + val &= ~CL_DEVICE_TYPE_DEFAULT;
  592 + printf("Default ");
  593 + }
  594 + if (val & CL_DEVICE_TYPE_CPU)
  595 + {
  596 + val &= ~CL_DEVICE_TYPE_CPU;
  597 + printf("CPU ");
  598 + }
  599 + if (val & CL_DEVICE_TYPE_GPU)
  600 + {
  601 + val &= ~CL_DEVICE_TYPE_GPU;
  602 + printf("GPU ");
  603 + }
  604 + if (val & CL_DEVICE_TYPE_ACCELERATOR)
  605 + {
  606 + val &= ~CL_DEVICE_TYPE_ACCELERATOR;
  607 + printf("Accelerator ");
  608 + }
  609 + if (val != 0) {
  610 + printf("Unknown (0x%llx) ", val);
  611 + }
  612 + printf("\n");
  613 + }
  614 + else
  615 + {
  616 + printf("Unable to get TYPE: %s!\n",
  617 + cl_error_to_str(status));
  618 + }
  619 +
  620 + status = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES,
  621 + sizeof val, &val, NULL);
  622 + if (status == CL_SUCCESS)
  623 + {
  624 + printf("EXECUTION_CAPABILITIES: ");
  625 + if (val & CL_EXEC_KERNEL)
  626 + {
  627 + val &= ~CL_EXEC_KERNEL;
  628 + printf("Kernel ");
  629 + }
  630 + if (val & CL_EXEC_NATIVE_KERNEL)
  631 + {
  632 + val &= ~CL_EXEC_NATIVE_KERNEL;
  633 + printf("Native ");
  634 + }
  635 + if (val)
  636 + printf("Unknown (0x%llx) ", val);
  637 +
  638 + printf("\n");
  639 + }
  640 + else
  641 + {
  642 + printf("Unable to get EXECUTION_CAPABILITIES: %s!\n",
  643 + cl_error_to_str(status));
  644 + }
  645 +
  646 + status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
  647 + sizeof val, &val, NULL);
  648 + if (status == CL_SUCCESS)
  649 + {
  650 + static const char *cacheTypes[] = { "None", "Read-Only", "Read-Write" };
  651 + static int numTypes = sizeof cacheTypes / sizeof cacheTypes[0];
  652 +
  653 + printf("GLOBAL_MEM_CACHE_TYPE: %s (%lld)\n",
  654 + val < numTypes ? cacheTypes[val] : "???", val);
  655 + }
  656 + else
  657 + {
  658 + printf("Unable to get GLOBAL_MEM_CACHE_TYPE: %s!\n",
  659 + cl_error_to_str(status));
  660 + }
  661 +
  662 + status = clGetDeviceInfo(device,
  663 + CL_DEVICE_LOCAL_MEM_TYPE, sizeof val, &val, NULL);
  664 +
  665 + if (status == CL_SUCCESS)
  666 + {
  667 + static const char *lmemTypes[] = { "???", "Local", "Global" };
  668 + static int numTypes = sizeof lmemTypes / sizeof lmemTypes[0];
  669 +
  670 + printf("CL_DEVICE_LOCAL_MEM_TYPE: %s (%lld)\n",
  671 + val < numTypes ? lmemTypes[val] : "???", val);
  672 + }
  673 + else
  674 + {
  675 + printf("Unable to get CL_DEVICE_LOCAL_MEM_TYPE: %s!\n",
  676 + cl_error_to_str(status));
  677 + }
  678 +
  679 + for (ii = 0; hexProps[ii].name != NULL; ii++)
  680 + {
  681 + status = clGetDeviceInfo(device, hexProps[ii].param, sizeof val, &val, &size);
  682 + if (status != CL_SUCCESS)
  683 + {
  684 + printf("Unable to get %s: %s!\n",
  685 + hexProps[ii].name, cl_error_to_str(status));
  686 + continue;
  687 + }
  688 + if (size > sizeof val)
  689 + {
  690 + printf("Large %s (%zd bytes)! Truncating to %ld!\n",
  691 + hexProps[ii].name, size, sizeof val);
  692 + }
  693 + printf("%s: 0x%llx\n", hexProps[ii].name, val);
  694 + }
  695 + printf("\n");
  696 +
  697 + for (ii = 0; longProps[ii].name != NULL; ii++)
  698 + {
  699 + status = clGetDeviceInfo(device, longProps[ii].param, sizeof val, &val, &size);
  700 + if (status != CL_SUCCESS)
  701 + {
  702 + printf("Unable to get %s: %s!\n",
  703 + longProps[ii].name, cl_error_to_str(status));
  704 + continue;
  705 + }
  706 + if (size > sizeof val)
  707 + {
  708 + printf("Large %s (%zd bytes)! Truncating to %ld!\n",
  709 + longProps[ii].name, size, sizeof val);
  710 + }
  711 + printf("%s: %lld\n", longProps[ii].name, val);
  712 + }
  713 +
  714 + {
  715 + size_t size;
  716 + CALL_CL_GUARDED(clGetDeviceInfo,
  717 + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, 0, 0, &size));
  718 +
  719 + size_t res_vec[size/sizeof(size_t)]; // C99 VLA yay!
  720 +
  721 + CALL_CL_GUARDED(clGetDeviceInfo,
  722 + (device, CL_DEVICE_MAX_WORK_ITEM_SIZES, size, res_vec, &size));
  723 +
  724 + printf("MAX_WORK_GROUP_SIZES: "); // a tiny lie
  725 + for (size_t i = 0; i < size/sizeof(size_t); ++i)
  726 + printf("%zd ", res_vec[i]);
  727 + printf("\n");
  728 + }
  729 + printf("---------------------------------------------------------------------\n");
  730 +}
  731 +
  732 +
  733 +
  734 +void print_device_info_from_queue(cl_command_queue queue)
  735 +{
  736 + cl_device_id dev;
  737 + CALL_CL_GUARDED(clGetCommandQueueInfo,
  738 + (queue, CL_QUEUE_DEVICE, sizeof dev, &dev, NULL));
  739 +
  740 + print_device_info(dev);
  741 +}
... ...
  1 +/*
  2 + * Copyright (c) 2010, 2012 Andreas Kloeckner
  3 + *
  4 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 + * of this software and associated documentation files (the "Software"), to deal
  6 + * in the Software without restriction, including without limitation the rights
  7 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 + * copies of the Software, and to permit persons to whom the Software is
  9 + * furnished to do so, subject to the following conditions:
  10 + *
  11 + * The above copyright notice and this permission notice shall be included in
  12 + * all copies or substantial portions of the Software.
  13 + *
  14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20 + * THE SOFTWARE.
  21 + */
  22 +
  23 +
  24 +
  25 +
  26 +#ifndef NYUHPC_CL_HELPER
  27 +#define NYUHPC_CL_HELPER
  28 +
  29 +#include <stdarg.h>
  30 +#include <stdio.h>
  31 +#include <stdlib.h>
  32 +
  33 +#ifdef __APPLE__
  34 +#include <OpenCL/opencl.h>
  35 +#else
  36 +#include <CL/cl.h>
  37 +#endif
  38 +
  39 +/* An error check macro for OpenCL.
  40 + *
  41 + * Usage:
  42 + * CHECK_CL_ERROR(status_code_from_a_cl_operation, "function_name")
  43 + *
  44 + * It will abort with a message if an error occurred.
  45 + */
  46 +
  47 +#define CHECK_CL_ERROR(STATUS_CODE, WHAT) \
  48 + if ((STATUS_CODE) != CL_SUCCESS) \
  49 + { \
  50 + fprintf(stderr, \
  51 + "*** '%s' in '%s' on line %d failed with error '%s'.\n", \
  52 + WHAT, __FILE__, __LINE__, \
  53 + cl_error_to_str(STATUS_CODE)); \
  54 + abort(); \
  55 + }
  56 +
  57 +/* A more automated error check macro for OpenCL, for use with clXxxx
  58 + * functions that return status codes. (Not all of them do, notably
  59 + * clCreateXxx do not.)
  60 + *
  61 + * Usage:
  62 + * CALL_CL_GUARDED(clFunction, (arg1, arg2));
  63 + *
  64 + * Note the slightly strange comma between the function name and the
  65 + * argument list.
  66 + */
  67 +
  68 +#define CALL_CL_GUARDED(NAME, ARGLIST) \
  69 + { \
  70 + cl_int status_code; \
  71 + status_code = NAME ARGLIST; \
  72 + CHECK_CL_ERROR(status_code, #NAME); \
  73 + }
  74 +
  75 +/* An error check macro for Unix system functions. If "COND" is true, then the
  76 + * last system error ("errno") is printed along with MSG, which is supposed to
  77 + * be a string describing what you were doing.
  78 + *
  79 + * Example:
  80 + * CHECK_SYS_ERROR(dave != 0, "opening hatch");
  81 + */
  82 +#define CHECK_SYS_ERROR(COND, MSG) \
  83 + if (COND) \
  84 + { \
  85 + perror(MSG); \
  86 + abort(); \
  87 + }
  88 +
  89 +/* Return a string describing the OpenCL error code 'e'.
  90 + */
  91 +const char *cl_error_to_str(cl_int e);
  92 +
  93 +/* Print a list of available OpenCL platforms and devices
  94 + * to standard output.
  95 + */
  96 +void print_platforms_devices();
  97 +
  98 +/* Create an OpenCL context and a matching command queue on a platform from a
  99 + * vendor whose name contains 'plat_name' on a device whose name contains
  100 + * 'dev_name'. Both 'plat_name' and 'dev_name' may be NULL, indicating no
  101 + * preference in the matter.
  102 + *
  103 + * If multiple devices match both 'plat_name' and 'dev_name', then 'idx'
  104 + * prescribes the number of the device that should be chosen.
  105 + *
  106 + * You may also use the special value CHOOSE_INTERACTIVELY to offer the user
  107 + * a choice. You should use this value for code you turn in.
  108 + *
  109 + * This function always succeeds. (If an error occurs, the program
  110 + * is aborted.
  111 + *
  112 + * You can force interactive querying by defining the
  113 + * CL_HELPER_FORCE_INTERACTIVE macro when compiling cl-helper.c.
  114 + * You may do so by passing the -DCL_HELPER_FORCE_INTERACTIVE
  115 + * compiler option.
  116 + */
  117 +extern const char *CHOOSE_INTERACTIVELY;
  118 +void create_context_on(const char *plat_name, const char*dev_name, cl_uint
  119 + idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling);
  120 +
  121 +/* Read contents of file 'filename'.
  122 + * Return as a new string. You must free the string when you're done with it.
  123 + *
  124 + * This function always succeeds. (If an error occurs, the program
  125 + * is aborted.
  126 + */
  127 +char *read_file(const char *filename);
  128 +
  129 +/* Create a new OpenCL kernel from the code in the string 'knl'.
  130 + * 'knl_name' is the name of the kernel function, and 'options',
  131 + * if not NULL, is a string containing compiler flags.
  132 + *
  133 + * You must release the resulting kernel when you're done
  134 + * with it.
  135 + *
  136 + * This function always succeeds. (If an error occurs, the program
  137 + * is aborted.
  138 + */
  139 +cl_kernel kernel_from_string(cl_context ctx,
  140 + char const *knl, char const *knl_name, char const *options);
  141 +
  142 +/* Print information about a device, found from either the
  143 + * queue or the device_id.
  144 + */
  145 +void print_device_info(cl_device_id device);
  146 +void print_device_info_from_queue(cl_command_queue queue);
  147 +
  148 +#define SET_1_KERNEL_ARG(knl, arg0) \
  149 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0));
  150 +
  151 +#define SET_2_KERNEL_ARGS(knl, arg0, arg1) \
  152 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  153 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1));
  154 +
  155 +#define SET_3_KERNEL_ARGS(knl, arg0, arg1, arg2) \
  156 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  157 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  158 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2));
  159 +
  160 +#define SET_4_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3) \
  161 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  162 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  163 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  164 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3));
  165 +
  166 +#define SET_5_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4) \
  167 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  168 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  169 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  170 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  171 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4));
  172 +
  173 +#define SET_6_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5) \
  174 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  175 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  176 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  177 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  178 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  179 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5));
  180 +
  181 +#define SET_7_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \
  182 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  183 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  184 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  185 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  186 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  187 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  188 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6));
  189 +
  190 +#define SET_8_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
  191 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  192 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  193 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  194 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  195 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  196 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  197 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
  198 + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7));
  199 +
  200 +#define SET_9_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
  201 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  202 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  203 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  204 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  205 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  206 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  207 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
  208 + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
  209 + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8));
  210 +
  211 +#define SET_10_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) \
  212 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  213 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  214 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  215 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  216 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  217 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  218 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
  219 + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
  220 + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
  221 + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9));
  222 +
  223 +#define SET_11_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10) \
  224 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  225 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  226 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  227 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  228 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  229 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  230 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
  231 + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
  232 + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
  233 + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \
  234 + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10));
  235 +
  236 +#define SET_12_KERNEL_ARGS(knl, arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11) \
  237 + CALL_CL_GUARDED(clSetKernelArg, (knl, 0, sizeof(arg0), &arg0)); \
  238 + CALL_CL_GUARDED(clSetKernelArg, (knl, 1, sizeof(arg1), &arg1)); \
  239 + CALL_CL_GUARDED(clSetKernelArg, (knl, 2, sizeof(arg2), &arg2)); \
  240 + CALL_CL_GUARDED(clSetKernelArg, (knl, 3, sizeof(arg3), &arg3)); \
  241 + CALL_CL_GUARDED(clSetKernelArg, (knl, 4, sizeof(arg4), &arg4)); \
  242 + CALL_CL_GUARDED(clSetKernelArg, (knl, 5, sizeof(arg5), &arg5)); \
  243 + CALL_CL_GUARDED(clSetKernelArg, (knl, 6, sizeof(arg6), &arg6)); \
  244 + CALL_CL_GUARDED(clSetKernelArg, (knl, 7, sizeof(arg7), &arg7)); \
  245 + CALL_CL_GUARDED(clSetKernelArg, (knl, 8, sizeof(arg8), &arg8)); \
  246 + CALL_CL_GUARDED(clSetKernelArg, (knl, 9, sizeof(arg9), &arg9)); \
  247 + CALL_CL_GUARDED(clSetKernelArg, (knl, 10, sizeof(arg10), &arg10)); \
  248 + CALL_CL_GUARDED(clSetKernelArg, (knl, 11, sizeof(arg11), &arg11));
  249 +
  250 +#endif
... ...
  1 +#include "cl-helper.h"
  2 +
  3 +int main(int argc, char **argv)
  4 +{
  5 + print_platforms_devices();
  6 + return 0;
  7 +}
... ...
  1 +#! /bin/bash
  2 +
  3 +NCPUS=$(grep processor /proc/cpuinfo | wc -l)
  4 +for i in $(seq 0 $((NCPUS-1)) ); do
  5 + echo $i
  6 + cpufreq-set -g $1 -c $i
  7 +done
... ...
  1 +#! /bin/sh
  2 +cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq
... ...
  1 +#ifdef __APPLE__
  2 +
  3 +#include <sys/time.h>
  4 +
  5 +typedef struct timeval timestamp_type;
  6 +
  7 +static void get_timestamp(timestamp_type *t)
  8 +{
  9 + gettimeofday(t, NULL);
  10 +}
  11 +
  12 +static double timestamp_diff_in_seconds(timestamp_type start,
  13 +timestamp_type end)
  14 +{
  15 + /* Perform the carry for the later subtraction by updating start. */
  16 + if (end.tv_usec < start.tv_usec) {
  17 + int nsec = (start.tv_usec - end.tv_usec) / 1000000 + 1;
  18 + start.tv_usec -= 1000000 * nsec;
  19 + start.tv_sec += nsec;
  20 + }
  21 + if (end.tv_usec - start.tv_usec > 1000000) {
  22 + int nsec = (end.tv_usec - start.tv_usec) / 1000000;
  23 + start.tv_usec += 1000000 * nsec;
  24 + start.tv_sec -= nsec;
  25 + }
  26 +
  27 + return end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec)*1e-6;
  28 +}
  29 +
  30 +#else
  31 +
  32 +#include <time.h>
  33 +
  34 +typedef struct timespec timestamp_type;
  35 +
  36 +static void get_timestamp(timestamp_type *t)
  37 +{
  38 + clock_gettime(CLOCK_REALTIME, t);
  39 +}
  40 +
  41 +static double timestamp_diff_in_seconds(timestamp_type start, timestamp_type end)
  42 +{
  43 + struct timespec temp;
  44 + if ((end.tv_nsec-start.tv_nsec)<0) {
  45 + temp.tv_sec = end.tv_sec-start.tv_sec-1;
  46 + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
  47 + } else {
  48 + temp.tv_sec = end.tv_sec-start.tv_sec;
  49 + temp.tv_nsec = end.tv_nsec-start.tv_nsec;
  50 + }
  51 + return temp.tv_sec + 1e-9*temp.tv_nsec;
  52 +}
  53 +
  54 +#endif
... ...
  1 +#pragma OPENCL EXTENSION cl_khr_fp64: enable
  2 +
  3 +__kernel void sum(
  4 + __global const float *a,
  5 + __global const float *b,
  6 + __global float *c,
  7 + long n)
  8 +{
  9 + int gid = get_global_id(0);
  10 + if (gid < n)
  11 + c[gid] = a[gid] + b[gid];
  12 +}
... ...
Please register or login to post a comment