21 #include <freerdp/config.h>
23 #include <freerdp/types.h>
24 #include <freerdp/primitives.h>
25 #include "prim_internal.h"
27 #if defined(WITH_OPENCL)
29 #include "OpenCL/opencl.h"
33 #include "primitives-opencl-program.h"
35 #include <freerdp/log.h>
36 #define TAG FREERDP_TAG("primitives")
41 cl_platform_id platformId;
42 cl_device_id deviceId;
44 cl_command_queue commandQueue;
46 } primitives_opencl_context;
50 primitives_opencl_context* cl;
56 } primitives_cl_kernel;
58 static primitives_opencl_context* primitives_get_opencl_context(
void);
60 static void cl_kernel_free(primitives_cl_kernel* kernel)
66 clReleaseMemObject(kernel->dstObj);
68 for (
size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
70 cl_mem obj = kernel->srcObjs[i];
71 kernel->srcObjs[i] = NULL;
73 clReleaseMemObject(obj);
77 clReleaseKernel(kernel->kernel);
82 static primitives_cl_kernel* cl_kernel_new(
const char* kernelName,
const prim_size_t* roi)
84 WINPR_ASSERT(kernelName);
87 primitives_cl_kernel* kernel = calloc(1,
sizeof(primitives_cl_kernel));
92 kernel->cl = primitives_get_opencl_context();
96 cl_int ret = CL_INVALID_VALUE;
97 kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
98 if (ret != CL_SUCCESS)
100 WLog_ERR(TAG,
"openCL: unable to create kernel %s", kernelName);
106 cl_kernel_free(kernel);
110 static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx,
const BYTE* WINPR_RESTRICT pSrc[3],
111 const UINT32 srcStep[3])
113 const char* sourceNames[] = {
"Y",
"U",
"V" };
117 WINPR_ASSERT(srcStep);
119 for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
121 cl_int ret = CL_INVALID_VALUE;
122 const BYTE* csrc = pSrc[i];
123 void* WINPR_RESTRICT src = WINPR_CAST_CONST_PTR_AWAY(csrc,
void* WINPR_RESTRICT);
124 ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
125 1ull * srcStep[i] * ctx->roi.height, src, &ret);
126 if (ret != CL_SUCCESS)
128 WLog_ERR(TAG,
"unable to create %sobj", sourceNames[i]);
132 ret = clSetKernelArg(ctx->kernel, i * 2,
sizeof(cl_mem), &ctx->srcObjs[i]);
133 if (ret != CL_SUCCESS)
135 WLog_ERR(TAG,
"unable to set arg for %sobj", sourceNames[i]);
139 ret = clSetKernelArg(ctx->kernel, i * 2 + 1,
sizeof(cl_uint), &srcStep[i]);
140 if (ret != CL_SUCCESS)
142 WLog_ERR(TAG,
"unable to set arg stride for %sobj", sourceNames[i]);
150 static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
155 ctx->dstStep = dstStep;
156 cl_int ret = CL_INVALID_VALUE;
157 ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
158 1ull * dstStep * ctx->roi.height, NULL, &ret);
159 if (ret != CL_SUCCESS)
161 WLog_ERR(TAG,
"unable to create dest obj");
165 ret = clSetKernelArg(ctx->kernel, 6,
sizeof(cl_mem), &ctx->dstObj);
166 if (ret != CL_SUCCESS)
168 WLog_ERR(TAG,
"unable to set arg destObj");
172 ret = clSetKernelArg(ctx->kernel, 7,
sizeof(cl_uint), &dstStep);
173 if (ret != CL_SUCCESS)
175 WLog_ERR(TAG,
"unable to set arg dstStep");
182 static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
187 size_t indexes[2] = { 0 };
188 indexes[0] = ctx->roi.width;
189 indexes[1] = ctx->roi.height;
191 cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
193 if (ret != CL_SUCCESS)
195 WLog_ERR(TAG,
"unable to enqueue call kernel");
200 ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
201 ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
202 if (ret != CL_SUCCESS)
204 WLog_ERR(TAG,
"unable to read back buffer");
211 static pstatus_t opencl_YUVToRGB(
const char* kernelName,
const BYTE* WINPR_RESTRICT pSrc[3],
212 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
217 primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
221 if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
224 if (!cl_kernel_set_destination(ctx, dstStep))
227 if (!cl_kernel_process(ctx, pDst))
230 res = PRIMITIVES_SUCCESS;
237 static primitives_opencl_context openclContext = { 0 };
239 static primitives_opencl_context* primitives_get_opencl_context(
void)
241 return &openclContext;
244 static void cl_context_free(primitives_opencl_context* ctx)
248 clReleaseProgram(ctx->program);
249 clReleaseCommandQueue(ctx->commandQueue);
250 clReleaseContext(ctx->context);
251 clReleaseDevice(ctx->deviceId);
252 ctx->support = FALSE;
255 static pstatus_t primitives_uninit_opencl(
void)
257 if (!openclContext.support)
258 return PRIMITIVES_SUCCESS;
260 cl_context_free(&openclContext);
261 return PRIMITIVES_SUCCESS;
264 static BOOL primitives_init_opencl_context(primitives_opencl_context* WINPR_RESTRICT prims)
266 cl_uint ndevices = 0;
267 cl_uint nplatforms = 0;
268 cl_kernel kernel = NULL;
271 size_t programLen = 0;
273 cl_int ret = clGetPlatformIDs(0, NULL, &nplatforms);
274 if (ret != CL_SUCCESS || nplatforms < 1)
277 cl_platform_id* platform_ids = (cl_platform_id*)calloc(nplatforms,
sizeof(cl_platform_id));
281 ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
282 if (ret != CL_SUCCESS)
284 free((
void*)platform_ids);
288 for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++)
290 cl_device_id device_id = NULL;
291 cl_context context = NULL;
292 char platformName[1000] = { 0 };
293 char deviceName[1000] = { 0 };
295 ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME,
sizeof(platformName),
297 if (ret != CL_SUCCESS)
300 ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
301 if (ret != CL_SUCCESS)
304 ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME,
sizeof(deviceName), deviceName, NULL);
305 if (ret != CL_SUCCESS)
307 WLog_ERR(TAG,
"openCL: unable get device name for platform %s", platformName);
308 clReleaseDevice(device_id);
312 context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
313 if (ret != CL_SUCCESS)
315 WLog_ERR(TAG,
"openCL: unable to create context for platform %s, device %s",
316 platformName, deviceName);
317 clReleaseDevice(device_id);
321 #if defined(CL_VERSION_2_0)
322 prims->commandQueue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret);
324 prims->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
326 if (ret != CL_SUCCESS)
328 WLog_ERR(TAG,
"openCL: unable to create command queue");
329 clReleaseContext(context);
330 clReleaseDevice(device_id);
334 WLog_INFO(TAG,
"openCL: using platform=%s device=%s", platformName, deviceName);
336 prims->platformId = platform_ids[i];
337 prims->deviceId = device_id;
338 prims->context = context;
342 free((
void*)platform_ids);
346 WLog_ERR(TAG,
"openCL: no GPU found");
350 programLen = strnlen(openclProgram,
sizeof(openclProgram));
351 const char* ptr = openclProgram;
352 prims->program = clCreateProgramWithSource(prims->context, 1, &ptr, &programLen, &ret);
353 if (ret != CL_SUCCESS)
355 WLog_ERR(TAG,
"openCL: unable to create program");
359 ret = clBuildProgram(prims->program, 1, &prims->deviceId, NULL, NULL, NULL);
360 if (ret != CL_SUCCESS)
364 ret = clGetProgramBuildInfo(prims->program, prims->deviceId, CL_PROGRAM_BUILD_LOG,
365 sizeof(buffer), buffer, &length);
366 if (ret != CL_SUCCESS)
369 "openCL: building program failed but unable to retrieve buildLog, error=%d",
374 WLog_ERR(TAG,
"openCL: unable to build program, errorLog=%s", buffer);
379 kernel = clCreateKernel(prims->program,
"yuv420_to_bgra_1b", &ret);
380 if (ret != CL_SUCCESS)
382 WLog_ERR(TAG,
"openCL: unable to create yuv420_to_bgra_1b kernel");
385 clReleaseKernel(kernel);
387 prims->support = TRUE;
391 cl_context_free(prims);
395 static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
396 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
397 UINT32 dstStep, UINT32 DstFormat,
400 const char* kernel_name = NULL;
404 case PIXEL_FORMAT_ABGR32:
405 kernel_name =
"yuv420_to_abgr_1b";
407 case PIXEL_FORMAT_XBGR32:
408 kernel_name =
"yuv420_to_xbgr_1b";
410 case PIXEL_FORMAT_RGBX32:
411 kernel_name =
"yuv420_to_rgba_1b";
413 case PIXEL_FORMAT_RGBA32:
414 kernel_name =
"yuv420_to_rgbx_1b";
416 case PIXEL_FORMAT_BGRA32:
417 kernel_name =
"yuv420_to_bgra_1b";
419 case PIXEL_FORMAT_BGRX32:
420 kernel_name =
"yuv420_to_bgrx_1b";
422 case PIXEL_FORMAT_XRGB32:
423 kernel_name =
"yuv420_to_xrgb_1b";
425 case PIXEL_FORMAT_ARGB32:
426 kernel_name =
"yuv420_to_argb_1b";
430 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
433 return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
437 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
440 static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(
const BYTE* WINPR_RESTRICT pSrc[3],
441 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
442 UINT32 dstStep, UINT32 DstFormat,
445 const char* kernel_name = NULL;
449 case PIXEL_FORMAT_ABGR32:
450 kernel_name =
"yuv444_to_abgr_1b";
452 case PIXEL_FORMAT_XBGR32:
453 kernel_name =
"yuv444_to_xbgr_1b";
455 case PIXEL_FORMAT_RGBX32:
456 kernel_name =
"yuv444_to_rgba_1b";
458 case PIXEL_FORMAT_RGBA32:
459 kernel_name =
"yuv444_to_rgbx_1b";
461 case PIXEL_FORMAT_BGRA32:
462 kernel_name =
"yuv444_to_bgra_1b";
464 case PIXEL_FORMAT_BGRX32:
465 kernel_name =
"yuv444_to_bgrx_1b";
467 case PIXEL_FORMAT_XRGB32:
468 kernel_name =
"yuv444_to_xrgb_1b";
470 case PIXEL_FORMAT_ARGB32:
471 kernel_name =
"yuv444_to_argb_1b";
475 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
478 return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
482 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
487 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
492 if (!primitives_init_opencl_context(&openclContext))
495 prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
496 prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
497 prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
498 prims->uninit = primitives_uninit_opencl;