FreeRDP
prim_YUV_opencl.c
1 
21 #include <freerdp/config.h>
22 
23 #include <freerdp/types.h>
24 #include <freerdp/primitives.h>
25 #include "prim_internal.h"
26 
27 #if defined(WITH_OPENCL)
28 #ifdef __APPLE__
29 #include "OpenCL/opencl.h"
30 #else
31 #include <CL/cl.h>
32 #endif
33 #include "primitives-opencl-program.h"
34 
35 #include <freerdp/log.h>
36 #define TAG FREERDP_TAG("primitives")
37 
38 typedef struct
39 {
40  BOOL support;
41  cl_platform_id platformId;
42  cl_device_id deviceId;
43  cl_context context;
44  cl_command_queue commandQueue;
45  cl_program program;
46 } primitives_opencl_context;
47 
48 typedef struct
49 {
50  primitives_opencl_context* cl;
51  cl_kernel kernel;
52  cl_mem srcObjs[3];
53  cl_mem dstObj;
54  prim_size_t roi;
55  size_t dstStep;
56 } primitives_cl_kernel;
57 
58 static primitives_opencl_context* primitives_get_opencl_context(void);
59 
60 static void cl_kernel_free(primitives_cl_kernel* kernel)
61 {
62  if (!kernel)
63  return;
64 
65  if (kernel->dstObj)
66  clReleaseMemObject(kernel->dstObj);
67 
68  for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
69  {
70  cl_mem obj = kernel->srcObjs[i];
71  kernel->srcObjs[i] = NULL;
72  if (obj)
73  clReleaseMemObject(obj);
74  }
75 
76  if (kernel->kernel)
77  clReleaseKernel(kernel->kernel);
78 
79  free(kernel);
80 }
81 
82 static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi)
83 {
84  WINPR_ASSERT(kernelName);
85  WINPR_ASSERT(roi);
86 
87  primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel));
88  if (!kernel)
89  goto fail;
90 
91  kernel->roi = *roi;
92  kernel->cl = primitives_get_opencl_context();
93  if (!kernel->cl)
94  goto fail;
95 
96  cl_int ret = CL_INVALID_VALUE;
97  kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
98  if (ret != CL_SUCCESS)
99  {
100  WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
101  goto fail;
102  }
103 
104  return kernel;
105 fail:
106  cl_kernel_free(kernel);
107  return NULL;
108 }
109 
110 static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx, const BYTE* WINPR_RESTRICT pSrc[3],
111  const UINT32 srcStep[3])
112 {
113  const char* sourceNames[] = { "Y", "U", "V" };
114 
115  WINPR_ASSERT(ctx);
116  WINPR_ASSERT(pSrc);
117  WINPR_ASSERT(srcStep);
118 
119  for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
120  {
121  cl_int ret = CL_INVALID_VALUE;
122  const BYTE* csrc = pSrc[i];
123  void* WINPR_RESTRICT src = WINPR_CAST_CONST_PTR_AWAY(csrc, void* WINPR_RESTRICT);
124  ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
125  1ull * srcStep[i] * ctx->roi.height, src, &ret);
126  if (ret != CL_SUCCESS)
127  {
128  WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
129  return FALSE;
130  }
131 
132  ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), &ctx->srcObjs[i]);
133  if (ret != CL_SUCCESS)
134  {
135  WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
136  return FALSE;
137  }
138 
139  ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]);
140  if (ret != CL_SUCCESS)
141  {
142  WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
143  return FALSE;
144  }
145  }
146 
147  return TRUE;
148 }
149 
150 static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
151 {
152 
153  WINPR_ASSERT(ctx);
154 
155  ctx->dstStep = dstStep;
156  cl_int ret = CL_INVALID_VALUE;
157  ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
158  1ull * dstStep * ctx->roi.height, NULL, &ret);
159  if (ret != CL_SUCCESS)
160  {
161  WLog_ERR(TAG, "unable to create dest obj");
162  return FALSE;
163  }
164 
165  ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), &ctx->dstObj);
166  if (ret != CL_SUCCESS)
167  {
168  WLog_ERR(TAG, "unable to set arg destObj");
169  return FALSE;
170  }
171 
172  ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep);
173  if (ret != CL_SUCCESS)
174  {
175  WLog_ERR(TAG, "unable to set arg dstStep");
176  return FALSE;
177  }
178 
179  return TRUE;
180 }
181 
182 static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
183 {
184  WINPR_ASSERT(ctx);
185  WINPR_ASSERT(pDst);
186 
187  size_t indexes[2] = { 0 };
188  indexes[0] = ctx->roi.width;
189  indexes[1] = ctx->roi.height;
190 
191  cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
192  0, NULL, NULL);
193  if (ret != CL_SUCCESS)
194  {
195  WLog_ERR(TAG, "unable to enqueue call kernel");
196  return FALSE;
197  }
198 
199  /* Transfer result to host */
200  ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
201  ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
202  if (ret != CL_SUCCESS)
203  {
204  WLog_ERR(TAG, "unable to read back buffer");
205  return FALSE;
206  }
207 
208  return TRUE;
209 }
210 
211 static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* WINPR_RESTRICT pSrc[3],
212  const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
213  const prim_size_t* WINPR_RESTRICT roi)
214 {
215  pstatus_t res = -1;
216 
217  primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
218  if (!ctx)
219  goto fail;
220 
221  if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
222  goto fail;
223 
224  if (!cl_kernel_set_destination(ctx, dstStep))
225  goto fail;
226 
227  if (!cl_kernel_process(ctx, pDst))
228  goto fail;
229 
230  res = PRIMITIVES_SUCCESS;
231 
232 fail:
233  cl_kernel_free(ctx);
234  return res;
235 }
236 
237 static primitives_opencl_context openclContext = { 0 };
238 
239 static primitives_opencl_context* primitives_get_opencl_context(void)
240 {
241  return &openclContext;
242 }
243 
244 static void cl_context_free(primitives_opencl_context* ctx)
245 {
246  if (!ctx)
247  return;
248  clReleaseProgram(ctx->program);
249  clReleaseCommandQueue(ctx->commandQueue);
250  clReleaseContext(ctx->context);
251  clReleaseDevice(ctx->deviceId);
252  ctx->support = FALSE;
253 }
254 
255 static pstatus_t primitives_uninit_opencl(void)
256 {
257  if (!openclContext.support)
258  return PRIMITIVES_SUCCESS;
259 
260  cl_context_free(&openclContext);
261  return PRIMITIVES_SUCCESS;
262 }
263 
264 static BOOL primitives_init_opencl_context(primitives_opencl_context* WINPR_RESTRICT prims)
265 {
266  cl_uint ndevices = 0;
267  cl_uint nplatforms = 0;
268  cl_kernel kernel = NULL;
269 
270  BOOL gotGPU = FALSE;
271  size_t programLen = 0;
272 
273  cl_int ret = clGetPlatformIDs(0, NULL, &nplatforms);
274  if (ret != CL_SUCCESS || nplatforms < 1)
275  return FALSE;
276 
277  cl_platform_id* platform_ids = (cl_platform_id*)calloc(nplatforms, sizeof(cl_platform_id));
278  if (!platform_ids)
279  return FALSE;
280 
281  ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
282  if (ret != CL_SUCCESS)
283  {
284  free((void*)platform_ids);
285  return FALSE;
286  }
287 
288  for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++)
289  {
290  cl_device_id device_id = NULL;
291  cl_context context = NULL;
292  char platformName[1000] = { 0 };
293  char deviceName[1000] = { 0 };
294 
295  ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
296  platformName, NULL);
297  if (ret != CL_SUCCESS)
298  continue;
299 
300  ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
301  if (ret != CL_SUCCESS)
302  continue;
303 
304  ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
305  if (ret != CL_SUCCESS)
306  {
307  WLog_ERR(TAG, "openCL: unable get device name for platform %s", platformName);
308  clReleaseDevice(device_id);
309  continue;
310  }
311 
312  context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
313  if (ret != CL_SUCCESS)
314  {
315  WLog_ERR(TAG, "openCL: unable to create context for platform %s, device %s",
316  platformName, deviceName);
317  clReleaseDevice(device_id);
318  continue;
319  }
320 
321 #if defined(CL_VERSION_2_0)
322  prims->commandQueue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret);
323 #else
324  prims->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
325 #endif
326  if (ret != CL_SUCCESS)
327  {
328  WLog_ERR(TAG, "openCL: unable to create command queue");
329  clReleaseContext(context);
330  clReleaseDevice(device_id);
331  continue;
332  }
333 
334  WLog_INFO(TAG, "openCL: using platform=%s device=%s", platformName, deviceName);
335 
336  prims->platformId = platform_ids[i];
337  prims->deviceId = device_id;
338  prims->context = context;
339  gotGPU = TRUE;
340  }
341 
342  free((void*)platform_ids);
343 
344  if (!gotGPU)
345  {
346  WLog_ERR(TAG, "openCL: no GPU found");
347  return FALSE;
348  }
349 
350  programLen = strnlen(openclProgram, sizeof(openclProgram));
351  const char* ptr = openclProgram;
352  prims->program = clCreateProgramWithSource(prims->context, 1, &ptr, &programLen, &ret);
353  if (ret != CL_SUCCESS)
354  {
355  WLog_ERR(TAG, "openCL: unable to create program");
356  goto fail;
357  }
358 
359  ret = clBuildProgram(prims->program, 1, &prims->deviceId, NULL, NULL, NULL);
360  if (ret != CL_SUCCESS)
361  {
362  size_t length = 0;
363  char buffer[2048];
364  ret = clGetProgramBuildInfo(prims->program, prims->deviceId, CL_PROGRAM_BUILD_LOG,
365  sizeof(buffer), buffer, &length);
366  if (ret != CL_SUCCESS)
367  {
368  WLog_ERR(TAG,
369  "openCL: building program failed but unable to retrieve buildLog, error=%d",
370  ret);
371  }
372  else
373  {
374  WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
375  }
376  goto fail;
377  }
378 
379  kernel = clCreateKernel(prims->program, "yuv420_to_bgra_1b", &ret);
380  if (ret != CL_SUCCESS)
381  {
382  WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
383  goto fail;
384  }
385  clReleaseKernel(kernel);
386 
387  prims->support = TRUE;
388  return TRUE;
389 
390 fail:
391  cl_context_free(prims);
392  return FALSE;
393 }
394 
395 static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
396  const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
397  UINT32 dstStep, UINT32 DstFormat,
398  const prim_size_t* WINPR_RESTRICT roi)
399 {
400  const char* kernel_name = NULL;
401 
402  switch (DstFormat)
403  {
404  case PIXEL_FORMAT_ABGR32:
405  kernel_name = "yuv420_to_abgr_1b";
406  break;
407  case PIXEL_FORMAT_XBGR32:
408  kernel_name = "yuv420_to_xbgr_1b";
409  break;
410  case PIXEL_FORMAT_RGBX32:
411  kernel_name = "yuv420_to_rgba_1b";
412  break;
413  case PIXEL_FORMAT_RGBA32:
414  kernel_name = "yuv420_to_rgbx_1b";
415  break;
416  case PIXEL_FORMAT_BGRA32:
417  kernel_name = "yuv420_to_bgra_1b";
418  break;
419  case PIXEL_FORMAT_BGRX32:
420  kernel_name = "yuv420_to_bgrx_1b";
421  break;
422  case PIXEL_FORMAT_XRGB32:
423  kernel_name = "yuv420_to_xrgb_1b";
424  break;
425  case PIXEL_FORMAT_ARGB32:
426  kernel_name = "yuv420_to_argb_1b";
427  break;
428  default:
429  {
430  primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
431  if (!p)
432  return -1;
433  return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
434  }
435  }
436 
437  return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
438 }
439 
440 static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
441  const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
442  UINT32 dstStep, UINT32 DstFormat,
443  const prim_size_t* WINPR_RESTRICT roi)
444 {
445  const char* kernel_name = NULL;
446 
447  switch (DstFormat)
448  {
449  case PIXEL_FORMAT_ABGR32:
450  kernel_name = "yuv444_to_abgr_1b";
451  break;
452  case PIXEL_FORMAT_XBGR32:
453  kernel_name = "yuv444_to_xbgr_1b";
454  break;
455  case PIXEL_FORMAT_RGBX32:
456  kernel_name = "yuv444_to_rgba_1b";
457  break;
458  case PIXEL_FORMAT_RGBA32:
459  kernel_name = "yuv444_to_rgbx_1b";
460  break;
461  case PIXEL_FORMAT_BGRA32:
462  kernel_name = "yuv444_to_bgra_1b";
463  break;
464  case PIXEL_FORMAT_BGRX32:
465  kernel_name = "yuv444_to_bgrx_1b";
466  break;
467  case PIXEL_FORMAT_XRGB32:
468  kernel_name = "yuv444_to_xrgb_1b";
469  break;
470  case PIXEL_FORMAT_ARGB32:
471  kernel_name = "yuv444_to_argb_1b";
472  break;
473  default:
474  {
475  primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
476  if (!p)
477  return -1;
478  return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
479  }
480  }
481 
482  return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
483 }
484 
485 BOOL primitives_init_opencl(primitives_t* prims)
486 {
487  primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
488  if (!prims || !p)
489  return FALSE;
490  *prims = *p;
491 
492  if (!primitives_init_opencl_context(&openclContext))
493  return FALSE;
494 
495  prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
496  prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
497  prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
498  prims->uninit = primitives_uninit_opencl;
499  return TRUE;
500 }
501 #endif