Showing
5 changed files
with
323 additions
and
9 deletions
| 1 | -CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include | ||
| 2 | -LIBS += -lcl -L/usr/local/lib64/beignet | 1 | +CFLAGS += -O0 -Werror -ggdb -std=c99 |
| 2 | +LIBS += -lOpenCL | ||
| 3 | CC = cc | 3 | CC = cc |
| 4 | 4 | ||
| 5 | -BINARIES = part1 part2 part3 part4 part5 part6 part8 | 5 | +BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin |
| 6 | 6 | ||
| 7 | all: $(BINARIES) | 7 | all: $(BINARIES) |
| 8 | 8 | ||
| 9 | %: %.c | 9 | %: %.c |
| 10 | $(CC) $(CFLAGS) $(LIBS) -o $@ $< | 10 | $(CC) $(CFLAGS) $(LIBS) -o $@ $< |
| 11 | - strip $@ | 11 | + |
| 12 | +part4.clbin: createclbin | ||
| 13 | + createclbin | ||
| 12 | 14 | ||
| 13 | .PHONY: clean | 15 | .PHONY: clean |
| 14 | 16 | ||
| 15 | clean: | 17 | clean: |
| 16 | - rm $(BINARIES) | 18 | + @rm -f $(BINARIES) part4.clbin |
| 1 | -OpenCL tutorial notes | ||
| 2 | -===================== | 1 | +# OpenCL cookbook code |
| 3 | 2 | ||
| 4 | -URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ | 3 | +These are some code examples from the |
| 4 | +[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). | ||
| 5 | + | ||
| 6 | +## Description | ||
| 7 | + | ||
| 8 | +Basically I just took the examples from that tutorial and put them into a | ||
| 9 | +repo. | ||
| 10 | + | ||
| 11 | +## Requirements | ||
| 12 | + | ||
| 13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | ||
| 14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | ||
| 15 | +Integrated Graphics Controller (rev 09)) with the | ||
| 16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | ||
| 17 | +open source library. | ||
| 18 | + | ||
| 19 | +## License | ||
| 20 | + | ||
| 21 | +As far as I can say the code is free of any license. It's purpose is just to | ||
| 22 | +demonstrate OpenCL. | ||
| 23 | + | ||
| 24 | +## Author | ||
| 25 | + | ||
| 26 | +Dhruba Bandopadhyay |
createclbin.c
0 → 100644
| 1 | +#include <stdio.h> | ||
| 2 | +#include <stdlib.h> | ||
| 3 | +#include <assert.h> | ||
| 4 | + | ||
| 5 | +#ifdef __APPLE__ | ||
| 6 | +#include <OpenCL/opencl.h> | ||
| 7 | +#else | ||
| 8 | +#include <CL/cl.h> | ||
| 9 | +#endif | ||
| 10 | + | ||
| 11 | +#define KERNEL "part4.cl" | ||
| 12 | + | ||
| 13 | +size_t | ||
| 14 | +clPutProgramBinaryToFile( | ||
| 15 | + const char * const filename, | ||
| 16 | + const cl_program * const program) | ||
| 17 | +{ | ||
| 18 | + cl_int cl_status; | ||
| 19 | + | ||
| 20 | + cl_uint num_devices; | ||
| 21 | + cl_status = clGetProgramInfo( | ||
| 22 | + *program, | ||
| 23 | + CL_PROGRAM_NUM_DEVICES, | ||
| 24 | + sizeof(cl_uint), | ||
| 25 | + &num_devices, | ||
| 26 | + NULL); | ||
| 27 | + | ||
| 28 | + if (cl_status != CL_SUCCESS) { | ||
| 29 | + return 0; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + cl_device_id devices[num_devices]; | ||
| 33 | + cl_status = | ||
| 34 | + clGetProgramInfo( | ||
| 35 | + *program, | ||
| 36 | + CL_PROGRAM_DEVICES, | ||
| 37 | + sizeof(cl_device_id) * num_devices, | ||
| 38 | + devices, | ||
| 39 | + NULL); | ||
| 40 | + | ||
| 41 | + if (cl_status != CL_SUCCESS) { | ||
| 42 | + return 0; | ||
| 43 | + } | ||
| 44 | + | ||
| 45 | + size_t binary_size[num_devices]; | ||
| 46 | + cl_status = | ||
| 47 | + clGetProgramInfo( | ||
| 48 | + *program, | ||
| 49 | + CL_PROGRAM_BINARY_SIZES, | ||
| 50 | + sizeof(size_t) * num_devices, | ||
| 51 | + binary_size, | ||
| 52 | + NULL); | ||
| 53 | + | ||
| 54 | + if (cl_status != CL_SUCCESS) { | ||
| 55 | + return 0; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + unsigned char * binaries[num_devices]; | ||
| 59 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
| 60 | + binaries[i] = (unsigned char *) malloc(binary_size[i]); | ||
| 61 | + } | ||
| 62 | + cl_status = | ||
| 63 | + clGetProgramInfo( | ||
| 64 | + *program, | ||
| 65 | + CL_PROGRAM_BINARIES, | ||
| 66 | + sizeof(unsigned char *) * num_devices, | ||
| 67 | + binaries, | ||
| 68 | + NULL); | ||
| 69 | + | ||
| 70 | + if (cl_status != CL_SUCCESS) { | ||
| 71 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
| 72 | + free(binaries[i]); | ||
| 73 | + } | ||
| 74 | + return 0; | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + FILE * handle = fopen(filename, "wb"); | ||
| 78 | + size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); | ||
| 79 | + | ||
| 80 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
| 81 | + free(binaries[i]); | ||
| 82 | + } | ||
| 83 | + fclose(handle); | ||
| 84 | + | ||
| 85 | + return size; | ||
| 86 | +} | ||
| 87 | + | ||
| 88 | +size_t | ||
| 89 | +clGetProgramFromSourceFile( | ||
| 90 | + const char * const filename, | ||
| 91 | + const cl_context * const context, | ||
| 92 | + cl_program * const program) | ||
| 93 | +{ | ||
| 94 | + /* | ||
| 95 | + * Get a build OpenCL program from source | ||
| 96 | + */ | ||
| 97 | + FILE * handle; | ||
| 98 | + char * buffer; | ||
| 99 | + size_t size; | ||
| 100 | + | ||
| 101 | + cl_int cl_status; | ||
| 102 | + cl_uint num_devices; | ||
| 103 | + | ||
| 104 | + // get size of kernel source | ||
| 105 | + handle = fopen(filename, "r"); | ||
| 106 | + fseek(handle, 0, SEEK_END); | ||
| 107 | + size = ftell(handle); | ||
| 108 | + rewind(handle); | ||
| 109 | + | ||
| 110 | + // read kernel source into buffer | ||
| 111 | + buffer = (char*) malloc(size + 1); | ||
| 112 | + buffer[size] = '\0'; | ||
| 113 | + | ||
| 114 | + if (size != fread(buffer, sizeof(char), size, handle)) | ||
| 115 | + { | ||
| 116 | + fclose(handle); | ||
| 117 | + free(buffer); | ||
| 118 | + return 0; | ||
| 119 | + } | ||
| 120 | + | ||
| 121 | + fclose(handle); | ||
| 122 | + | ||
| 123 | + // create and build program | ||
| 124 | + *program = clCreateProgramWithSource( | ||
| 125 | + *context, 1, (const char**) &buffer, &size, &cl_status); | ||
| 126 | + | ||
| 127 | + free(buffer); | ||
| 128 | + | ||
| 129 | + if (cl_status != CL_SUCCESS) { | ||
| 130 | + return 0; | ||
| 131 | + } | ||
| 132 | + | ||
| 133 | + cl_status = clGetContextInfo( | ||
| 134 | + *context, | ||
| 135 | + CL_CONTEXT_NUM_DEVICES, | ||
| 136 | + sizeof(cl_uint), | ||
| 137 | + &num_devices, | ||
| 138 | + NULL); | ||
| 139 | + | ||
| 140 | + if (cl_status != CL_SUCCESS) { | ||
| 141 | + clReleaseProgram(*program); | ||
| 142 | + return 0; | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + cl_device_id devices[num_devices]; | ||
| 146 | + | ||
| 147 | + cl_status = clGetContextInfo( | ||
| 148 | + *context, | ||
| 149 | + CL_CONTEXT_DEVICES, | ||
| 150 | + sizeof(cl_device_id) * num_devices, | ||
| 151 | + devices, | ||
| 152 | + NULL); | ||
| 153 | + | ||
| 154 | + cl_status = clBuildProgram( | ||
| 155 | + *program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); | ||
| 156 | + | ||
| 157 | + if (cl_status != CL_SUCCESS) { | ||
| 158 | + clReleaseProgram(*program); | ||
| 159 | + return 0; | ||
| 160 | + } | ||
| 161 | + | ||
| 162 | + return size; | ||
| 163 | +} | ||
| 164 | + | ||
| 165 | +int | ||
| 166 | +clInit(cl_context * const context) | ||
| 167 | +{ | ||
| 168 | + /* | ||
| 169 | + * TODO add failure handling | ||
| 170 | + */ | ||
| 171 | + cl_platform_id platform; | ||
| 172 | + cl_uint num_devices; | ||
| 173 | + | ||
| 174 | + // get first available sdk and gpu and create context | ||
| 175 | + clGetPlatformIDs(1, &platform, NULL); | ||
| 176 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); | ||
| 177 | + printf("%u devices during init.\n", num_devices); | ||
| 178 | + cl_device_id devices[num_devices]; | ||
| 179 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); | ||
| 180 | + *context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); | ||
| 181 | + | ||
| 182 | + return 0; | ||
| 183 | +} | ||
| 184 | + | ||
| 185 | +int main() | ||
| 186 | +{ | ||
| 187 | + cl_int cl_status; | ||
| 188 | + cl_context context; | ||
| 189 | + cl_program program; | ||
| 190 | + | ||
| 191 | + size_t sourceSize; | ||
| 192 | + size_t count; | ||
| 193 | + | ||
| 194 | + clInit(&context); | ||
| 195 | + sourceSize = | ||
| 196 | + clGetProgramFromSourceFile(KERNEL, &context, &program); | ||
| 197 | + | ||
| 198 | + assert(sourceSize != 0); | ||
| 199 | + | ||
| 200 | + count = clPutProgramBinaryToFile(KERNEL "bin", &program); | ||
| 201 | + | ||
| 202 | + assert(count != 0); | ||
| 203 | + | ||
| 204 | + clReleaseProgram(program); | ||
| 205 | + clReleaseContext(context); | ||
| 206 | + | ||
| 207 | + return 0; | ||
| 208 | +} | ||
| 209 | + | ||
| 210 | +// vim: set ft=c ts=4 sw=4: |
part6bin.c
0 → 100644
| 1 | +#include <stdio.h> | ||
| 2 | +#include <stdlib.h> | ||
| 3 | +#include <assert.h> | ||
| 4 | +#ifdef __APPLE__ | ||
| 5 | +#include <OpenCL/opencl.h> | ||
| 6 | +#else | ||
| 7 | +#include <CL/cl.h> | ||
| 8 | +#endif | ||
| 9 | + | ||
| 10 | +#define KERNEL "part4.clbin" | ||
| 11 | + | ||
| 12 | +int main() { | ||
| 13 | + | ||
| 14 | + cl_platform_id platform; cl_device_id device; cl_context context; | ||
| 15 | + cl_program program; cl_kernel kernel; cl_command_queue queue; | ||
| 16 | + cl_mem kernelBuffer; | ||
| 17 | + | ||
| 18 | + FILE* programHandle; char *programBuffer; char *programLog; | ||
| 19 | + size_t programSize; char hostBuffer[32]; | ||
| 20 | + | ||
| 21 | + // get first available sdk and gpu and create context | ||
| 22 | + clGetPlatformIDs(1, &platform, NULL); | ||
| 23 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); | ||
| 24 | + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); | ||
| 25 | + | ||
| 26 | + // get size of kernel source | ||
| 27 | + programHandle = fopen(KERNEL, "rb"); | ||
| 28 | + fseek(programHandle, 0, SEEK_END); | ||
| 29 | + programSize = ftell(programHandle); | ||
| 30 | + rewind(programHandle); | ||
| 31 | + | ||
| 32 | + // read kernel source into buffer | ||
| 33 | + programBuffer = (char*) malloc(programSize + 1); | ||
| 34 | + programBuffer[programSize] = '\0'; | ||
| 35 | + assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); | ||
| 36 | + | ||
| 37 | + fclose(programHandle); | ||
| 38 | + | ||
| 39 | + // create and build program | ||
| 40 | + program = clCreateProgramWithBinary(context, 1, &device, | ||
| 41 | + (const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); | ||
| 42 | + free(programBuffer); | ||
| 43 | + | ||
| 44 | + // create kernel and command queue | ||
| 45 | + kernel = clCreateKernel(program, "hello", NULL); | ||
| 46 | + queue = clCreateCommandQueue(context, device, 0, NULL); | ||
| 47 | + | ||
| 48 | + // create kernel argument buffer and set it into kernel | ||
| 49 | + kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | ||
| 50 | + 32 * sizeof(char), NULL, NULL); | ||
| 51 | + clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); | ||
| 52 | + | ||
| 53 | + // execute kernel, read back the output and print to screen | ||
| 54 | + clEnqueueTask(queue, kernel, 0, NULL, NULL); | ||
| 55 | + clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, | ||
| 56 | + 32 * sizeof(char), hostBuffer, 0, NULL, NULL); | ||
| 57 | + puts(hostBuffer); | ||
| 58 | + | ||
| 59 | + clFlush(queue); | ||
| 60 | + clFinish(queue); | ||
| 61 | + clReleaseKernel(kernel); | ||
| 62 | + clReleaseProgram(program); | ||
| 63 | + clReleaseMemObject(kernelBuffer); | ||
| 64 | + clReleaseCommandQueue(queue); | ||
| 65 | + clReleaseContext(context); | ||
| 66 | + | ||
| 67 | + return 0; | ||
| 68 | + | ||
| 69 | +} | ||
| 70 | + | ||
| 71 | +// vim: set ft=c ts=4 sw=4: |
Please
register
or
login
to post a comment