Commit 62adb687b110a726b1fa7f81ee7944b101a1a809

Authored by Georg Hopp
1 parent e8963201

Latest changes and polishments...

1   -part*
  1 +part1
  2 +part2
  3 +part3
  4 +part4
  5 +part4.clbin
  6 +part5
  7 +part6
  8 +part8
  9 +createclbin
  10 +part6bin
... ...
1   -CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include
2   -LIBS += -lcl -L/usr/local/lib64/beignet
  1 +CFLAGS += -O0 -Werror -ggdb -std=c99
  2 +LIBS += -lOpenCL
3 3 CC = cc
4 4
5   -BINARIES = part1 part2 part3 part4 part5 part6 part8
  5 +BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin
6 6
7 7 all: $(BINARIES)
8 8
9 9 %: %.c
10 10 $(CC) $(CFLAGS) $(LIBS) -o $@ $<
11   - strip $@
  11 +
  12 +part4.clbin: createclbin
  13 + createclbin
12 14
13 15 .PHONY: clean
14 16
15 17 clean:
16   - rm $(BINARIES)
  18 + @rm -f $(BINARIES) part4.clbin
... ...
1   -OpenCL tutorial notes
2   -=====================
  1 +# OpenCL cookbook code
3 2
4   -URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/
  3 +These are some code examples from the
  4 +[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/).
  5 +
  6 +## Description
  7 +
  8 +Basically I just took the examples from that tutorial and put them into a
  9 +repo.
  10 +
  11 +## Requirements
  12 +
  13 +Some OpenCL capable hardware and the according OpenCL library exposing the
  14 +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT
  15 +Integrated Graphics Controller (rev 09)) with the
  16 +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/)
  17 +open source library.
  18 +
  19 +## License
  20 +
  21 +As far as I can say the code is free of any license. It's purpose is just to
  22 +demonstrate OpenCL.
  23 +
  24 +## Author
  25 +
  26 +Dhruba Bandopadhyay
... ...
  1 +#include <stdio.h>
  2 +#include <stdlib.h>
  3 +#include <assert.h>
  4 +
  5 +#ifdef __APPLE__
  6 +#include <OpenCL/opencl.h>
  7 +#else
  8 +#include <CL/cl.h>
  9 +#endif
  10 +
  11 +#define KERNEL "part4.cl"
  12 +
  13 +size_t
  14 +clPutProgramBinaryToFile(
  15 + const char * const filename,
  16 + const cl_program * const program)
  17 +{
  18 + cl_int cl_status;
  19 +
  20 + cl_uint num_devices;
  21 + cl_status = clGetProgramInfo(
  22 + *program,
  23 + CL_PROGRAM_NUM_DEVICES,
  24 + sizeof(cl_uint),
  25 + &num_devices,
  26 + NULL);
  27 +
  28 + if (cl_status != CL_SUCCESS) {
  29 + return 0;
  30 + }
  31 +
  32 + cl_device_id devices[num_devices];
  33 + cl_status =
  34 + clGetProgramInfo(
  35 + *program,
  36 + CL_PROGRAM_DEVICES,
  37 + sizeof(cl_device_id) * num_devices,
  38 + devices,
  39 + NULL);
  40 +
  41 + if (cl_status != CL_SUCCESS) {
  42 + return 0;
  43 + }
  44 +
  45 + size_t binary_size[num_devices];
  46 + cl_status =
  47 + clGetProgramInfo(
  48 + *program,
  49 + CL_PROGRAM_BINARY_SIZES,
  50 + sizeof(size_t) * num_devices,
  51 + binary_size,
  52 + NULL);
  53 +
  54 + if (cl_status != CL_SUCCESS) {
  55 + return 0;
  56 + }
  57 +
  58 + unsigned char * binaries[num_devices];
  59 + for (cl_uint i = 0; i < num_devices; i++) {
  60 + binaries[i] = (unsigned char *) malloc(binary_size[i]);
  61 + }
  62 + cl_status =
  63 + clGetProgramInfo(
  64 + *program,
  65 + CL_PROGRAM_BINARIES,
  66 + sizeof(unsigned char *) * num_devices,
  67 + binaries,
  68 + NULL);
  69 +
  70 + if (cl_status != CL_SUCCESS) {
  71 + for (cl_uint i = 0; i < num_devices; i++) {
  72 + free(binaries[i]);
  73 + }
  74 + return 0;
  75 + }
  76 +
  77 + FILE * handle = fopen(filename, "wb");
  78 + size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle);
  79 +
  80 + for (cl_uint i = 0; i < num_devices; i++) {
  81 + free(binaries[i]);
  82 + }
  83 + fclose(handle);
  84 +
  85 + return size;
  86 +}
  87 +
  88 +size_t
  89 +clGetProgramFromSourceFile(
  90 + const char * const filename,
  91 + const cl_context * const context,
  92 + cl_program * const program)
  93 +{
  94 + /*
  95 + * Get a build OpenCL program from source
  96 + */
  97 + FILE * handle;
  98 + char * buffer;
  99 + size_t size;
  100 +
  101 + cl_int cl_status;
  102 + cl_uint num_devices;
  103 +
  104 + // get size of kernel source
  105 + handle = fopen(filename, "r");
  106 + fseek(handle, 0, SEEK_END);
  107 + size = ftell(handle);
  108 + rewind(handle);
  109 +
  110 + // read kernel source into buffer
  111 + buffer = (char*) malloc(size + 1);
  112 + buffer[size] = '\0';
  113 +
  114 + if (size != fread(buffer, sizeof(char), size, handle))
  115 + {
  116 + fclose(handle);
  117 + free(buffer);
  118 + return 0;
  119 + }
  120 +
  121 + fclose(handle);
  122 +
  123 + // create and build program
  124 + *program = clCreateProgramWithSource(
  125 + *context, 1, (const char**) &buffer, &size, &cl_status);
  126 +
  127 + free(buffer);
  128 +
  129 + if (cl_status != CL_SUCCESS) {
  130 + return 0;
  131 + }
  132 +
  133 + cl_status = clGetContextInfo(
  134 + *context,
  135 + CL_CONTEXT_NUM_DEVICES,
  136 + sizeof(cl_uint),
  137 + &num_devices,
  138 + NULL);
  139 +
  140 + if (cl_status != CL_SUCCESS) {
  141 + clReleaseProgram(*program);
  142 + return 0;
  143 + }
  144 +
  145 + cl_device_id devices[num_devices];
  146 +
  147 + cl_status = clGetContextInfo(
  148 + *context,
  149 + CL_CONTEXT_DEVICES,
  150 + sizeof(cl_device_id) * num_devices,
  151 + devices,
  152 + NULL);
  153 +
  154 + cl_status = clBuildProgram(
  155 + *program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL);
  156 +
  157 + if (cl_status != CL_SUCCESS) {
  158 + clReleaseProgram(*program);
  159 + return 0;
  160 + }
  161 +
  162 + return size;
  163 +}
  164 +
  165 +int
  166 +clInit(cl_context * const context)
  167 +{
  168 + /*
  169 + * TODO add failure handling
  170 + */
  171 + cl_platform_id platform;
  172 + cl_uint num_devices;
  173 +
  174 + // get first available sdk and gpu and create context
  175 + clGetPlatformIDs(1, &platform, NULL);
  176 + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices);
  177 + printf("%u devices during init.\n", num_devices);
  178 + cl_device_id devices[num_devices];
  179 + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
  180 + *context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL);
  181 +
  182 + return 0;
  183 +}
  184 +
  185 +int main()
  186 +{
  187 + cl_int cl_status;
  188 + cl_context context;
  189 + cl_program program;
  190 +
  191 + size_t sourceSize;
  192 + size_t count;
  193 +
  194 + clInit(&context);
  195 + sourceSize =
  196 + clGetProgramFromSourceFile(KERNEL, &context, &program);
  197 +
  198 + assert(sourceSize != 0);
  199 +
  200 + count = clPutProgramBinaryToFile(KERNEL "bin", &program);
  201 +
  202 + assert(count != 0);
  203 +
  204 + clReleaseProgram(program);
  205 + clReleaseContext(context);
  206 +
  207 + return 0;
  208 +}
  209 +
  210 +// vim: set ft=c ts=4 sw=4:
... ...
  1 +#include <stdio.h>
  2 +#include <stdlib.h>
  3 +#include <assert.h>
  4 +#ifdef __APPLE__
  5 +#include <OpenCL/opencl.h>
  6 +#else
  7 +#include <CL/cl.h>
  8 +#endif
  9 +
  10 +#define KERNEL "part4.clbin"
  11 +
  12 +int main() {
  13 +
  14 + cl_platform_id platform; cl_device_id device; cl_context context;
  15 + cl_program program; cl_kernel kernel; cl_command_queue queue;
  16 + cl_mem kernelBuffer;
  17 +
  18 + FILE* programHandle; char *programBuffer; char *programLog;
  19 + size_t programSize; char hostBuffer[32];
  20 +
  21 + // get first available sdk and gpu and create context
  22 + clGetPlatformIDs(1, &platform, NULL);
  23 + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
  24 + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
  25 +
  26 + // get size of kernel source
  27 + programHandle = fopen(KERNEL, "rb");
  28 + fseek(programHandle, 0, SEEK_END);
  29 + programSize = ftell(programHandle);
  30 + rewind(programHandle);
  31 +
  32 + // read kernel source into buffer
  33 + programBuffer = (char*) malloc(programSize + 1);
  34 + programBuffer[programSize] = '\0';
  35 + assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle));
  36 +
  37 + fclose(programHandle);
  38 +
  39 + // create and build program
  40 + program = clCreateProgramWithBinary(context, 1, &device,
  41 + (const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL);
  42 + free(programBuffer);
  43 +
  44 + // create kernel and command queue
  45 + kernel = clCreateKernel(program, "hello", NULL);
  46 + queue = clCreateCommandQueue(context, device, 0, NULL);
  47 +
  48 + // create kernel argument buffer and set it into kernel
  49 + kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
  50 + 32 * sizeof(char), NULL, NULL);
  51 + clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer);
  52 +
  53 + // execute kernel, read back the output and print to screen
  54 + clEnqueueTask(queue, kernel, 0, NULL, NULL);
  55 + clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0,
  56 + 32 * sizeof(char), hostBuffer, 0, NULL, NULL);
  57 + puts(hostBuffer);
  58 +
  59 + clFlush(queue);
  60 + clFinish(queue);
  61 + clReleaseKernel(kernel);
  62 + clReleaseProgram(program);
  63 + clReleaseMemObject(kernelBuffer);
  64 + clReleaseCommandQueue(queue);
  65 + clReleaseContext(context);
  66 +
  67 + return 0;
  68 +
  69 +}
  70 +
  71 +// vim: set ft=c ts=4 sw=4:
... ...
Please register or login to post a comment