Showing
5 changed files
with
323 additions
and
9 deletions
| 1 | -CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include | |
| 2 | -LIBS += -lcl -L/usr/local/lib64/beignet | |
| 1 | +CFLAGS += -O0 -Werror -ggdb -std=c99 | |
| 2 | +LIBS += -lOpenCL | |
| 3 | 3 | CC = cc |
| 4 | 4 | |
| 5 | -BINARIES = part1 part2 part3 part4 part5 part6 part8 | |
| 5 | +BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin | |
| 6 | 6 | |
| 7 | 7 | all: $(BINARIES) |
| 8 | 8 | |
| 9 | 9 | %: %.c |
| 10 | 10 | $(CC) $(CFLAGS) $(LIBS) -o $@ $< |
| 11 | - strip $@ | |
| 11 | + | |
| 12 | +part4.clbin: createclbin | |
| 13 | + createclbin | |
| 12 | 14 | |
| 13 | 15 | .PHONY: clean |
| 14 | 16 | |
| 15 | 17 | clean: |
| 16 | - rm $(BINARIES) | |
| 18 | + @rm -f $(BINARIES) part4.clbin | ... | ... |
| 1 | -OpenCL tutorial notes | |
| 2 | -===================== | |
| 1 | +# OpenCL cookbook code | |
| 3 | 2 | |
| 4 | -URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ | |
| 3 | +These are some code examples from the | |
| 4 | +[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). | |
| 5 | + | |
| 6 | +## Description | |
| 7 | + | |
| 8 | +Basically I just took the examples from that tutorial and put them into a | |
| 9 | +repo. | |
| 10 | + | |
| 11 | +## Requirements | |
| 12 | + | |
| 13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | |
| 14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | |
| 15 | +Integrated Graphics Controller (rev 09)) with the | |
| 16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | |
| 17 | +open source library. | |
| 18 | + | |
| 19 | +## License | |
| 20 | + | |
| 21 | +As far as I can say the code is free of any license. It's purpose is just to | |
| 22 | +demonstrate OpenCL. | |
| 23 | + | |
| 24 | +## Author | |
| 25 | + | |
| 26 | +Dhruba Bandopadhyay | ... | ... |
createclbin.c
0 → 100644
| 1 | +#include <stdio.h> | |
| 2 | +#include <stdlib.h> | |
| 3 | +#include <assert.h> | |
| 4 | + | |
| 5 | +#ifdef __APPLE__ | |
| 6 | +#include <OpenCL/opencl.h> | |
| 7 | +#else | |
| 8 | +#include <CL/cl.h> | |
| 9 | +#endif | |
| 10 | + | |
| 11 | +#define KERNEL "part4.cl" | |
| 12 | + | |
| 13 | +size_t | |
| 14 | +clPutProgramBinaryToFile( | |
| 15 | + const char * const filename, | |
| 16 | + const cl_program * const program) | |
| 17 | +{ | |
| 18 | + cl_int cl_status; | |
| 19 | + | |
| 20 | + cl_uint num_devices; | |
| 21 | + cl_status = clGetProgramInfo( | |
| 22 | + *program, | |
| 23 | + CL_PROGRAM_NUM_DEVICES, | |
| 24 | + sizeof(cl_uint), | |
| 25 | + &num_devices, | |
| 26 | + NULL); | |
| 27 | + | |
| 28 | + if (cl_status != CL_SUCCESS) { | |
| 29 | + return 0; | |
| 30 | + } | |
| 31 | + | |
| 32 | + cl_device_id devices[num_devices]; | |
| 33 | + cl_status = | |
| 34 | + clGetProgramInfo( | |
| 35 | + *program, | |
| 36 | + CL_PROGRAM_DEVICES, | |
| 37 | + sizeof(cl_device_id) * num_devices, | |
| 38 | + devices, | |
| 39 | + NULL); | |
| 40 | + | |
| 41 | + if (cl_status != CL_SUCCESS) { | |
| 42 | + return 0; | |
| 43 | + } | |
| 44 | + | |
| 45 | + size_t binary_size[num_devices]; | |
| 46 | + cl_status = | |
| 47 | + clGetProgramInfo( | |
| 48 | + *program, | |
| 49 | + CL_PROGRAM_BINARY_SIZES, | |
| 50 | + sizeof(size_t) * num_devices, | |
| 51 | + binary_size, | |
| 52 | + NULL); | |
| 53 | + | |
| 54 | + if (cl_status != CL_SUCCESS) { | |
| 55 | + return 0; | |
| 56 | + } | |
| 57 | + | |
| 58 | + unsigned char * binaries[num_devices]; | |
| 59 | + for (cl_uint i = 0; i < num_devices; i++) { | |
| 60 | + binaries[i] = (unsigned char *) malloc(binary_size[i]); | |
| 61 | + } | |
| 62 | + cl_status = | |
| 63 | + clGetProgramInfo( | |
| 64 | + *program, | |
| 65 | + CL_PROGRAM_BINARIES, | |
| 66 | + sizeof(unsigned char *) * num_devices, | |
| 67 | + binaries, | |
| 68 | + NULL); | |
| 69 | + | |
| 70 | + if (cl_status != CL_SUCCESS) { | |
| 71 | + for (cl_uint i = 0; i < num_devices; i++) { | |
| 72 | + free(binaries[i]); | |
| 73 | + } | |
| 74 | + return 0; | |
| 75 | + } | |
| 76 | + | |
| 77 | + FILE * handle = fopen(filename, "wb"); | |
| 78 | + size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); | |
| 79 | + | |
| 80 | + for (cl_uint i = 0; i < num_devices; i++) { | |
| 81 | + free(binaries[i]); | |
| 82 | + } | |
| 83 | + fclose(handle); | |
| 84 | + | |
| 85 | + return size; | |
| 86 | +} | |
| 87 | + | |
| 88 | +size_t | |
| 89 | +clGetProgramFromSourceFile( | |
| 90 | + const char * const filename, | |
| 91 | + const cl_context * const context, | |
| 92 | + cl_program * const program) | |
| 93 | +{ | |
| 94 | + /* | |
| 95 | + * Get a build OpenCL program from source | |
| 96 | + */ | |
| 97 | + FILE * handle; | |
| 98 | + char * buffer; | |
| 99 | + size_t size; | |
| 100 | + | |
| 101 | + cl_int cl_status; | |
| 102 | + cl_uint num_devices; | |
| 103 | + | |
| 104 | + // get size of kernel source | |
| 105 | + handle = fopen(filename, "r"); | |
| 106 | + fseek(handle, 0, SEEK_END); | |
| 107 | + size = ftell(handle); | |
| 108 | + rewind(handle); | |
| 109 | + | |
| 110 | + // read kernel source into buffer | |
| 111 | + buffer = (char*) malloc(size + 1); | |
| 112 | + buffer[size] = '\0'; | |
| 113 | + | |
| 114 | + if (size != fread(buffer, sizeof(char), size, handle)) | |
| 115 | + { | |
| 116 | + fclose(handle); | |
| 117 | + free(buffer); | |
| 118 | + return 0; | |
| 119 | + } | |
| 120 | + | |
| 121 | + fclose(handle); | |
| 122 | + | |
| 123 | + // create and build program | |
| 124 | + *program = clCreateProgramWithSource( | |
| 125 | + *context, 1, (const char**) &buffer, &size, &cl_status); | |
| 126 | + | |
| 127 | + free(buffer); | |
| 128 | + | |
| 129 | + if (cl_status != CL_SUCCESS) { | |
| 130 | + return 0; | |
| 131 | + } | |
| 132 | + | |
| 133 | + cl_status = clGetContextInfo( | |
| 134 | + *context, | |
| 135 | + CL_CONTEXT_NUM_DEVICES, | |
| 136 | + sizeof(cl_uint), | |
| 137 | + &num_devices, | |
| 138 | + NULL); | |
| 139 | + | |
| 140 | + if (cl_status != CL_SUCCESS) { | |
| 141 | + clReleaseProgram(*program); | |
| 142 | + return 0; | |
| 143 | + } | |
| 144 | + | |
| 145 | + cl_device_id devices[num_devices]; | |
| 146 | + | |
| 147 | + cl_status = clGetContextInfo( | |
| 148 | + *context, | |
| 149 | + CL_CONTEXT_DEVICES, | |
| 150 | + sizeof(cl_device_id) * num_devices, | |
| 151 | + devices, | |
| 152 | + NULL); | |
| 153 | + | |
| 154 | + cl_status = clBuildProgram( | |
| 155 | + *program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); | |
| 156 | + | |
| 157 | + if (cl_status != CL_SUCCESS) { | |
| 158 | + clReleaseProgram(*program); | |
| 159 | + return 0; | |
| 160 | + } | |
| 161 | + | |
| 162 | + return size; | |
| 163 | +} | |
| 164 | + | |
| 165 | +int | |
| 166 | +clInit(cl_context * const context) | |
| 167 | +{ | |
| 168 | + /* | |
| 169 | + * TODO add failure handling | |
| 170 | + */ | |
| 171 | + cl_platform_id platform; | |
| 172 | + cl_uint num_devices; | |
| 173 | + | |
| 174 | + // get first available sdk and gpu and create context | |
| 175 | + clGetPlatformIDs(1, &platform, NULL); | |
| 176 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); | |
| 177 | + printf("%u devices during init.\n", num_devices); | |
| 178 | + cl_device_id devices[num_devices]; | |
| 179 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); | |
| 180 | + *context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); | |
| 181 | + | |
| 182 | + return 0; | |
| 183 | +} | |
| 184 | + | |
| 185 | +int main() | |
| 186 | +{ | |
| 187 | + cl_int cl_status; | |
| 188 | + cl_context context; | |
| 189 | + cl_program program; | |
| 190 | + | |
| 191 | + size_t sourceSize; | |
| 192 | + size_t count; | |
| 193 | + | |
| 194 | + clInit(&context); | |
| 195 | + sourceSize = | |
| 196 | + clGetProgramFromSourceFile(KERNEL, &context, &program); | |
| 197 | + | |
| 198 | + assert(sourceSize != 0); | |
| 199 | + | |
| 200 | + count = clPutProgramBinaryToFile(KERNEL "bin", &program); | |
| 201 | + | |
| 202 | + assert(count != 0); | |
| 203 | + | |
| 204 | + clReleaseProgram(program); | |
| 205 | + clReleaseContext(context); | |
| 206 | + | |
| 207 | + return 0; | |
| 208 | +} | |
| 209 | + | |
| 210 | +// vim: set ft=c ts=4 sw=4: | ... | ... |
part6bin.c
0 → 100644
| 1 | +#include <stdio.h> | |
| 2 | +#include <stdlib.h> | |
| 3 | +#include <assert.h> | |
| 4 | +#ifdef __APPLE__ | |
| 5 | +#include <OpenCL/opencl.h> | |
| 6 | +#else | |
| 7 | +#include <CL/cl.h> | |
| 8 | +#endif | |
| 9 | + | |
| 10 | +#define KERNEL "part4.clbin" | |
| 11 | + | |
| 12 | +int main() { | |
| 13 | + | |
| 14 | + cl_platform_id platform; cl_device_id device; cl_context context; | |
| 15 | + cl_program program; cl_kernel kernel; cl_command_queue queue; | |
| 16 | + cl_mem kernelBuffer; | |
| 17 | + | |
| 18 | + FILE* programHandle; char *programBuffer; char *programLog; | |
| 19 | + size_t programSize; char hostBuffer[32]; | |
| 20 | + | |
| 21 | + // get first available sdk and gpu and create context | |
| 22 | + clGetPlatformIDs(1, &platform, NULL); | |
| 23 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); | |
| 24 | + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); | |
| 25 | + | |
| 26 | + // get size of kernel source | |
| 27 | + programHandle = fopen(KERNEL, "rb"); | |
| 28 | + fseek(programHandle, 0, SEEK_END); | |
| 29 | + programSize = ftell(programHandle); | |
| 30 | + rewind(programHandle); | |
| 31 | + | |
| 32 | + // read kernel source into buffer | |
| 33 | + programBuffer = (char*) malloc(programSize + 1); | |
| 34 | + programBuffer[programSize] = '\0'; | |
| 35 | + assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); | |
| 36 | + | |
| 37 | + fclose(programHandle); | |
| 38 | + | |
| 39 | + // create and build program | |
| 40 | + program = clCreateProgramWithBinary(context, 1, &device, | |
| 41 | + (const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); | |
| 42 | + free(programBuffer); | |
| 43 | + | |
| 44 | + // create kernel and command queue | |
| 45 | + kernel = clCreateKernel(program, "hello", NULL); | |
| 46 | + queue = clCreateCommandQueue(context, device, 0, NULL); | |
| 47 | + | |
| 48 | + // create kernel argument buffer and set it into kernel | |
| 49 | + kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | |
| 50 | + 32 * sizeof(char), NULL, NULL); | |
| 51 | + clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); | |
| 52 | + | |
| 53 | + // execute kernel, read back the output and print to screen | |
| 54 | + clEnqueueTask(queue, kernel, 0, NULL, NULL); | |
| 55 | + clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, | |
| 56 | + 32 * sizeof(char), hostBuffer, 0, NULL, NULL); | |
| 57 | + puts(hostBuffer); | |
| 58 | + | |
| 59 | + clFlush(queue); | |
| 60 | + clFinish(queue); | |
| 61 | + clReleaseKernel(kernel); | |
| 62 | + clReleaseProgram(program); | |
| 63 | + clReleaseMemObject(kernelBuffer); | |
| 64 | + clReleaseCommandQueue(queue); | |
| 65 | + clReleaseContext(context); | |
| 66 | + | |
| 67 | + return 0; | |
| 68 | + | |
| 69 | +} | |
| 70 | + | |
| 71 | +// vim: set ft=c ts=4 sw=4: | ... | ... |
Please
register
or
login
to post a comment