Showing
5 changed files
with
323 additions
and
9 deletions
1 | -CFLAGS += -O2 -march=native -std=c99 -I/usr/local/include | ||
2 | -LIBS += -lcl -L/usr/local/lib64/beignet | 1 | +CFLAGS += -O0 -Werror -ggdb -std=c99 |
2 | +LIBS += -lOpenCL | ||
3 | CC = cc | 3 | CC = cc |
4 | 4 | ||
5 | -BINARIES = part1 part2 part3 part4 part5 part6 part8 | 5 | +BINARIES = part1 part2 part3 part4 part5 part6 part8 createclbin part6bin |
6 | 6 | ||
7 | all: $(BINARIES) | 7 | all: $(BINARIES) |
8 | 8 | ||
9 | %: %.c | 9 | %: %.c |
10 | $(CC) $(CFLAGS) $(LIBS) -o $@ $< | 10 | $(CC) $(CFLAGS) $(LIBS) -o $@ $< |
11 | - strip $@ | 11 | + |
12 | +part4.clbin: createclbin | ||
13 | + createclbin | ||
12 | 14 | ||
13 | .PHONY: clean | 15 | .PHONY: clean |
14 | 16 | ||
15 | clean: | 17 | clean: |
16 | - rm $(BINARIES) | 18 | + @rm -f $(BINARIES) part4.clbin |
1 | -OpenCL tutorial notes | ||
2 | -===================== | 1 | +# OpenCL cookbook code |
3 | 2 | ||
4 | -URL: http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/ | 3 | +These are some code examples from the |
4 | +[OpenCL cookbook tutorial series](http://dhruba.name/2012/08/21/opencl-cookbook-series-reference/). | ||
5 | + | ||
6 | +## Description | ||
7 | + | ||
8 | +Basically I just took the examples from that tutorial and put them into a | ||
9 | +repo. | ||
10 | + | ||
11 | +## Requirements | ||
12 | + | ||
13 | +Some OpenCL capable hardware and the according OpenCL library exposing the | ||
14 | +OpenCL API. I tested this on an Intel GPU (Intel Corporation Haswell-ULT | ||
15 | +Integrated Graphics Controller (rev 09)) with the | ||
16 | +[beignet](https://www.freedesktop.org/wiki/Software/Beignet/) | ||
17 | +open source library. | ||
18 | + | ||
19 | +## License | ||
20 | + | ||
21 | +As far as I can say the code is free of any license. It's purpose is just to | ||
22 | +demonstrate OpenCL. | ||
23 | + | ||
24 | +## Author | ||
25 | + | ||
26 | +Dhruba Bandopadhyay |
createclbin.c
0 → 100644
1 | +#include <stdio.h> | ||
2 | +#include <stdlib.h> | ||
3 | +#include <assert.h> | ||
4 | + | ||
5 | +#ifdef __APPLE__ | ||
6 | +#include <OpenCL/opencl.h> | ||
7 | +#else | ||
8 | +#include <CL/cl.h> | ||
9 | +#endif | ||
10 | + | ||
11 | +#define KERNEL "part4.cl" | ||
12 | + | ||
13 | +size_t | ||
14 | +clPutProgramBinaryToFile( | ||
15 | + const char * const filename, | ||
16 | + const cl_program * const program) | ||
17 | +{ | ||
18 | + cl_int cl_status; | ||
19 | + | ||
20 | + cl_uint num_devices; | ||
21 | + cl_status = clGetProgramInfo( | ||
22 | + *program, | ||
23 | + CL_PROGRAM_NUM_DEVICES, | ||
24 | + sizeof(cl_uint), | ||
25 | + &num_devices, | ||
26 | + NULL); | ||
27 | + | ||
28 | + if (cl_status != CL_SUCCESS) { | ||
29 | + return 0; | ||
30 | + } | ||
31 | + | ||
32 | + cl_device_id devices[num_devices]; | ||
33 | + cl_status = | ||
34 | + clGetProgramInfo( | ||
35 | + *program, | ||
36 | + CL_PROGRAM_DEVICES, | ||
37 | + sizeof(cl_device_id) * num_devices, | ||
38 | + devices, | ||
39 | + NULL); | ||
40 | + | ||
41 | + if (cl_status != CL_SUCCESS) { | ||
42 | + return 0; | ||
43 | + } | ||
44 | + | ||
45 | + size_t binary_size[num_devices]; | ||
46 | + cl_status = | ||
47 | + clGetProgramInfo( | ||
48 | + *program, | ||
49 | + CL_PROGRAM_BINARY_SIZES, | ||
50 | + sizeof(size_t) * num_devices, | ||
51 | + binary_size, | ||
52 | + NULL); | ||
53 | + | ||
54 | + if (cl_status != CL_SUCCESS) { | ||
55 | + return 0; | ||
56 | + } | ||
57 | + | ||
58 | + unsigned char * binaries[num_devices]; | ||
59 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
60 | + binaries[i] = (unsigned char *) malloc(binary_size[i]); | ||
61 | + } | ||
62 | + cl_status = | ||
63 | + clGetProgramInfo( | ||
64 | + *program, | ||
65 | + CL_PROGRAM_BINARIES, | ||
66 | + sizeof(unsigned char *) * num_devices, | ||
67 | + binaries, | ||
68 | + NULL); | ||
69 | + | ||
70 | + if (cl_status != CL_SUCCESS) { | ||
71 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
72 | + free(binaries[i]); | ||
73 | + } | ||
74 | + return 0; | ||
75 | + } | ||
76 | + | ||
77 | + FILE * handle = fopen(filename, "wb"); | ||
78 | + size_t size = fwrite(binaries[0], sizeof(unsigned char), binary_size[0], handle); | ||
79 | + | ||
80 | + for (cl_uint i = 0; i < num_devices; i++) { | ||
81 | + free(binaries[i]); | ||
82 | + } | ||
83 | + fclose(handle); | ||
84 | + | ||
85 | + return size; | ||
86 | +} | ||
87 | + | ||
88 | +size_t | ||
89 | +clGetProgramFromSourceFile( | ||
90 | + const char * const filename, | ||
91 | + const cl_context * const context, | ||
92 | + cl_program * const program) | ||
93 | +{ | ||
94 | + /* | ||
95 | + * Get a build OpenCL program from source | ||
96 | + */ | ||
97 | + FILE * handle; | ||
98 | + char * buffer; | ||
99 | + size_t size; | ||
100 | + | ||
101 | + cl_int cl_status; | ||
102 | + cl_uint num_devices; | ||
103 | + | ||
104 | + // get size of kernel source | ||
105 | + handle = fopen(filename, "r"); | ||
106 | + fseek(handle, 0, SEEK_END); | ||
107 | + size = ftell(handle); | ||
108 | + rewind(handle); | ||
109 | + | ||
110 | + // read kernel source into buffer | ||
111 | + buffer = (char*) malloc(size + 1); | ||
112 | + buffer[size] = '\0'; | ||
113 | + | ||
114 | + if (size != fread(buffer, sizeof(char), size, handle)) | ||
115 | + { | ||
116 | + fclose(handle); | ||
117 | + free(buffer); | ||
118 | + return 0; | ||
119 | + } | ||
120 | + | ||
121 | + fclose(handle); | ||
122 | + | ||
123 | + // create and build program | ||
124 | + *program = clCreateProgramWithSource( | ||
125 | + *context, 1, (const char**) &buffer, &size, &cl_status); | ||
126 | + | ||
127 | + free(buffer); | ||
128 | + | ||
129 | + if (cl_status != CL_SUCCESS) { | ||
130 | + return 0; | ||
131 | + } | ||
132 | + | ||
133 | + cl_status = clGetContextInfo( | ||
134 | + *context, | ||
135 | + CL_CONTEXT_NUM_DEVICES, | ||
136 | + sizeof(cl_uint), | ||
137 | + &num_devices, | ||
138 | + NULL); | ||
139 | + | ||
140 | + if (cl_status != CL_SUCCESS) { | ||
141 | + clReleaseProgram(*program); | ||
142 | + return 0; | ||
143 | + } | ||
144 | + | ||
145 | + cl_device_id devices[num_devices]; | ||
146 | + | ||
147 | + cl_status = clGetContextInfo( | ||
148 | + *context, | ||
149 | + CL_CONTEXT_DEVICES, | ||
150 | + sizeof(cl_device_id) * num_devices, | ||
151 | + devices, | ||
152 | + NULL); | ||
153 | + | ||
154 | + cl_status = clBuildProgram( | ||
155 | + *program, 1, devices, "-Werror -cl-std=CL1.1", NULL, NULL); | ||
156 | + | ||
157 | + if (cl_status != CL_SUCCESS) { | ||
158 | + clReleaseProgram(*program); | ||
159 | + return 0; | ||
160 | + } | ||
161 | + | ||
162 | + return size; | ||
163 | +} | ||
164 | + | ||
165 | +int | ||
166 | +clInit(cl_context * const context) | ||
167 | +{ | ||
168 | + /* | ||
169 | + * TODO add failure handling | ||
170 | + */ | ||
171 | + cl_platform_id platform; | ||
172 | + cl_uint num_devices; | ||
173 | + | ||
174 | + // get first available sdk and gpu and create context | ||
175 | + clGetPlatformIDs(1, &platform, NULL); | ||
176 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 10, NULL, &num_devices); | ||
177 | + printf("%u devices during init.\n", num_devices); | ||
178 | + cl_device_id devices[num_devices]; | ||
179 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL); | ||
180 | + *context = clCreateContext(NULL, num_devices, devices, NULL, NULL, NULL); | ||
181 | + | ||
182 | + return 0; | ||
183 | +} | ||
184 | + | ||
185 | +int main() | ||
186 | +{ | ||
187 | + cl_int cl_status; | ||
188 | + cl_context context; | ||
189 | + cl_program program; | ||
190 | + | ||
191 | + size_t sourceSize; | ||
192 | + size_t count; | ||
193 | + | ||
194 | + clInit(&context); | ||
195 | + sourceSize = | ||
196 | + clGetProgramFromSourceFile(KERNEL, &context, &program); | ||
197 | + | ||
198 | + assert(sourceSize != 0); | ||
199 | + | ||
200 | + count = clPutProgramBinaryToFile(KERNEL "bin", &program); | ||
201 | + | ||
202 | + assert(count != 0); | ||
203 | + | ||
204 | + clReleaseProgram(program); | ||
205 | + clReleaseContext(context); | ||
206 | + | ||
207 | + return 0; | ||
208 | +} | ||
209 | + | ||
210 | +// vim: set ft=c ts=4 sw=4: |
part6bin.c
0 → 100644
1 | +#include <stdio.h> | ||
2 | +#include <stdlib.h> | ||
3 | +#include <assert.h> | ||
4 | +#ifdef __APPLE__ | ||
5 | +#include <OpenCL/opencl.h> | ||
6 | +#else | ||
7 | +#include <CL/cl.h> | ||
8 | +#endif | ||
9 | + | ||
10 | +#define KERNEL "part4.clbin" | ||
11 | + | ||
12 | +int main() { | ||
13 | + | ||
14 | + cl_platform_id platform; cl_device_id device; cl_context context; | ||
15 | + cl_program program; cl_kernel kernel; cl_command_queue queue; | ||
16 | + cl_mem kernelBuffer; | ||
17 | + | ||
18 | + FILE* programHandle; char *programBuffer; char *programLog; | ||
19 | + size_t programSize; char hostBuffer[32]; | ||
20 | + | ||
21 | + // get first available sdk and gpu and create context | ||
22 | + clGetPlatformIDs(1, &platform, NULL); | ||
23 | + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); | ||
24 | + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); | ||
25 | + | ||
26 | + // get size of kernel source | ||
27 | + programHandle = fopen(KERNEL, "rb"); | ||
28 | + fseek(programHandle, 0, SEEK_END); | ||
29 | + programSize = ftell(programHandle); | ||
30 | + rewind(programHandle); | ||
31 | + | ||
32 | + // read kernel source into buffer | ||
33 | + programBuffer = (char*) malloc(programSize + 1); | ||
34 | + programBuffer[programSize] = '\0'; | ||
35 | + assert (programSize == fread(programBuffer, sizeof(char), programSize, programHandle)); | ||
36 | + | ||
37 | + fclose(programHandle); | ||
38 | + | ||
39 | + // create and build program | ||
40 | + program = clCreateProgramWithBinary(context, 1, &device, | ||
41 | + (const size_t*)&programSize, (const unsigned char **) &programBuffer, NULL, NULL); | ||
42 | + free(programBuffer); | ||
43 | + | ||
44 | + // create kernel and command queue | ||
45 | + kernel = clCreateKernel(program, "hello", NULL); | ||
46 | + queue = clCreateCommandQueue(context, device, 0, NULL); | ||
47 | + | ||
48 | + // create kernel argument buffer and set it into kernel | ||
49 | + kernelBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, | ||
50 | + 32 * sizeof(char), NULL, NULL); | ||
51 | + clSetKernelArg(kernel, 0, sizeof(cl_mem), &kernelBuffer); | ||
52 | + | ||
53 | + // execute kernel, read back the output and print to screen | ||
54 | + clEnqueueTask(queue, kernel, 0, NULL, NULL); | ||
55 | + clEnqueueReadBuffer(queue, kernelBuffer, CL_TRUE, 0, | ||
56 | + 32 * sizeof(char), hostBuffer, 0, NULL, NULL); | ||
57 | + puts(hostBuffer); | ||
58 | + | ||
59 | + clFlush(queue); | ||
60 | + clFinish(queue); | ||
61 | + clReleaseKernel(kernel); | ||
62 | + clReleaseProgram(program); | ||
63 | + clReleaseMemObject(kernelBuffer); | ||
64 | + clReleaseCommandQueue(queue); | ||
65 | + clReleaseContext(context); | ||
66 | + | ||
67 | + return 0; | ||
68 | + | ||
69 | +} | ||
70 | + | ||
71 | +// vim: set ft=c ts=4 sw=4: |
Please
register
or
login
to post a comment