FreeRDP
yuv.c
1 #include <winpr/sysinfo.h>
2 #include <winpr/assert.h>
3 #include <winpr/cast.h>
4 #include <winpr/pool.h>
5 
6 #include <freerdp/settings.h>
7 #include <freerdp/codec/region.h>
8 #include <freerdp/primitives.h>
9 #include <freerdp/log.h>
10 #include <freerdp/codec/yuv.h>
11 
12 #define TAG FREERDP_TAG("codec")
13 
14 #define TILE_SIZE 64
15 
16 typedef struct
17 {
18  YUV_CONTEXT* context;
19  const BYTE* pYUVData[3];
20  UINT32 iStride[3];
21  DWORD DstFormat;
22  BYTE* dest;
23  UINT32 nDstStep;
24  RECTANGLE_16 rect;
25 } YUV_PROCESS_WORK_PARAM;
26 
27 typedef struct
28 {
29  YUV_CONTEXT* context;
30  const BYTE* pYUVData[3];
31  UINT32 iStride[3];
32  BYTE* pYUVDstData[3];
33  UINT32 iDstStride[3];
34  RECTANGLE_16 rect;
35  BYTE type;
36 } YUV_COMBINE_WORK_PARAM;
37 
38 typedef struct
39 {
40  YUV_CONTEXT* context;
41  const BYTE* pSrcData;
42 
43  DWORD SrcFormat;
44  UINT32 nSrcStep;
45  RECTANGLE_16 rect;
46  BYTE version;
47 
48  BYTE* pYUVLumaData[3];
49  BYTE* pYUVChromaData[3];
50  UINT32 iStride[3];
51 } YUV_ENCODE_WORK_PARAM;
52 
53 struct S_YUV_CONTEXT
54 {
55  UINT32 width, height;
56  BOOL useThreads;
57  BOOL encoder;
58  UINT32 nthreads;
59  UINT32 heightStep;
60 
61  PTP_POOL threadPool;
62  TP_CALLBACK_ENVIRON ThreadPoolEnv;
63 
64  UINT32 work_object_count;
65  PTP_WORK* work_objects;
66  YUV_ENCODE_WORK_PARAM* work_enc_params;
67  YUV_PROCESS_WORK_PARAM* work_dec_params;
68  YUV_COMBINE_WORK_PARAM* work_combined_params;
69 };
70 
71 static INLINE BOOL avc420_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3],
72  const UINT32 iStride[3],
73  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep,
74  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat)
75 {
76  primitives_t* prims = primitives_get();
77  prim_size_t roi;
78  const BYTE* pYUVPoint[3];
79 
80  WINPR_ASSERT(pYUVData);
81  WINPR_ASSERT(iStride);
82  WINPR_ASSERT(rect);
83  WINPR_ASSERT(pDstData);
84 
85  const INT32 width = rect->right - rect->left;
86  const INT32 height = rect->bottom - rect->top;
87  BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep +
88  1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat);
89 
90  pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left;
91  pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top / 2 * iStride[1] + rect->left / 2;
92  pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top / 2 * iStride[2] + rect->left / 2;
93 
94  roi.width = WINPR_ASSERTING_INT_CAST(uint32_t, width);
95  roi.height = WINPR_ASSERTING_INT_CAST(uint32_t, height);
96 
97  if (prims->YUV420ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) !=
98  PRIMITIVES_SUCCESS)
99  return FALSE;
100 
101  return TRUE;
102 }
103 
104 static INLINE BOOL avc444_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3],
105  const UINT32 iStride[3],
106  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep,
107  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat)
108 {
109  primitives_t* prims = primitives_get();
110  prim_size_t roi;
111  const BYTE* pYUVPoint[3];
112 
113  WINPR_ASSERT(pYUVData);
114  WINPR_ASSERT(iStride);
115  WINPR_ASSERT(rect);
116  WINPR_ASSERT(pDstData);
117 
118  const INT32 width = rect->right - rect->left;
119  const INT32 height = rect->bottom - rect->top;
120  BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep +
121  1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat);
122 
123  pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left;
124  pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top * iStride[1] + rect->left;
125  pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top * iStride[2] + rect->left;
126 
127  roi.width = WINPR_ASSERTING_INT_CAST(uint32_t, width);
128  roi.height = WINPR_ASSERTING_INT_CAST(uint32_t, height);
129 
130  if (prims->YUV444ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) !=
131  PRIMITIVES_SUCCESS)
132  return FALSE;
133 
134  return TRUE;
135 }
136 
137 static void CALLBACK yuv420_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
138  PTP_WORK work)
139 {
140  YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
141  WINPR_UNUSED(instance);
142  WINPR_UNUSED(work);
143  WINPR_ASSERT(param);
144 
145  if (!avc420_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
146  param->dest, param->DstFormat))
147  WLog_WARN(TAG, "avc420_yuv_to_rgb failed");
148 }
149 
150 static void CALLBACK yuv444_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
151  PTP_WORK work)
152 {
153  YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
154  WINPR_UNUSED(instance);
155  WINPR_UNUSED(work);
156  WINPR_ASSERT(param);
157 
158  if (!avc444_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
159  param->dest, param->DstFormat))
160  WLog_WARN(TAG, "avc444_yuv_to_rgb failed");
161 }
162 
163 BOOL yuv_context_reset(YUV_CONTEXT* WINPR_RESTRICT context, UINT32 width, UINT32 height)
164 {
165  BOOL rc = FALSE;
166  WINPR_ASSERT(context);
167 
168  context->width = width;
169  context->height = height;
170  context->heightStep = (height / context->nthreads);
171 
172  if (context->useThreads)
173  {
174  const UINT32 pw = (width + TILE_SIZE - width % TILE_SIZE) / TILE_SIZE;
175  const UINT32 ph = (height + TILE_SIZE - height % TILE_SIZE) / TILE_SIZE;
176 
177  /* We´ve calculated the amount of workers for 64x64 tiles, but the decoder
178  * might get 16x16 tiles mixed in. */
179  const UINT32 count = pw * ph * 16;
180 
181  context->work_object_count = 0;
182  if (context->encoder)
183  {
184  void* tmp = winpr_aligned_recalloc(context->work_enc_params, count,
185  sizeof(YUV_ENCODE_WORK_PARAM), 32);
186  if (!tmp)
187  goto fail;
188  memset(tmp, 0, count * sizeof(YUV_ENCODE_WORK_PARAM));
189 
190  context->work_enc_params = tmp;
191  }
192  else
193  {
194  void* tmp = winpr_aligned_recalloc(context->work_dec_params, count,
195  sizeof(YUV_PROCESS_WORK_PARAM), 32);
196  if (!tmp)
197  goto fail;
198  memset(tmp, 0, count * sizeof(YUV_PROCESS_WORK_PARAM));
199 
200  context->work_dec_params = tmp;
201 
202  void* ctmp = winpr_aligned_recalloc(context->work_combined_params, count,
203  sizeof(YUV_COMBINE_WORK_PARAM), 32);
204  if (!ctmp)
205  goto fail;
206  memset(ctmp, 0, count * sizeof(YUV_COMBINE_WORK_PARAM));
207 
208  context->work_combined_params = ctmp;
209  }
210 
211  void* wtmp =
212  winpr_aligned_recalloc((void*)context->work_objects, count, sizeof(PTP_WORK), 32);
213  if (!wtmp)
214  goto fail;
215  memset(wtmp, 0, count * sizeof(PTP_WORK));
216 
217  context->work_objects = (PTP_WORK*)wtmp;
218  context->work_object_count = count;
219  }
220  rc = TRUE;
221 fail:
222  return rc;
223 }
224 
225 YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags)
226 {
227  SYSTEM_INFO sysInfos;
228  YUV_CONTEXT* ret = winpr_aligned_calloc(1, sizeof(*ret), 32);
229  if (!ret)
230  return NULL;
231 
233  primitives_get();
234 
235  ret->encoder = encoder;
236  ret->nthreads = 1;
237  if (!(ThreadingFlags & THREADING_FLAGS_DISABLE_THREADS))
238  {
239  GetNativeSystemInfo(&sysInfos);
240  ret->useThreads = (sysInfos.dwNumberOfProcessors > 1);
241  if (ret->useThreads)
242  {
243  ret->nthreads = sysInfos.dwNumberOfProcessors;
244  ret->threadPool = CreateThreadpool(NULL);
245  if (!ret->threadPool)
246  {
247  goto error_threadpool;
248  }
249 
250  InitializeThreadpoolEnvironment(&ret->ThreadPoolEnv);
251  SetThreadpoolCallbackPool(&ret->ThreadPoolEnv, ret->threadPool);
252  }
253  }
254 
255  return ret;
256 
257 error_threadpool:
258  WINPR_PRAGMA_DIAG_PUSH
259  WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC
260  yuv_context_free(ret);
261  WINPR_PRAGMA_DIAG_POP
262  return NULL;
263 }
264 
265 void yuv_context_free(YUV_CONTEXT* context)
266 {
267  if (!context)
268  return;
269  if (context->useThreads)
270  {
271  if (context->threadPool)
272  CloseThreadpool(context->threadPool);
273  DestroyThreadpoolEnvironment(&context->ThreadPoolEnv);
274  winpr_aligned_free((void*)context->work_objects);
275  winpr_aligned_free(context->work_combined_params);
276  winpr_aligned_free(context->work_enc_params);
277  winpr_aligned_free(context->work_dec_params);
278  }
279  winpr_aligned_free(context);
280 }
281 
282 static INLINE YUV_PROCESS_WORK_PARAM pool_decode_param(const RECTANGLE_16* WINPR_RESTRICT rect,
283  YUV_CONTEXT* WINPR_RESTRICT context,
284  const BYTE* WINPR_RESTRICT pYUVData[3],
285  const UINT32 iStride[3], UINT32 DstFormat,
286  BYTE* WINPR_RESTRICT dest, UINT32 nDstStep)
287 {
288  YUV_PROCESS_WORK_PARAM current = { 0 };
289 
290  WINPR_ASSERT(rect);
291  WINPR_ASSERT(context);
292  WINPR_ASSERT(pYUVData);
293  WINPR_ASSERT(iStride);
294  WINPR_ASSERT(dest);
295 
296  current.context = context;
297  current.DstFormat = DstFormat;
298  current.pYUVData[0] = pYUVData[0];
299  current.pYUVData[1] = pYUVData[1];
300  current.pYUVData[2] = pYUVData[2];
301  current.iStride[0] = iStride[0];
302  current.iStride[1] = iStride[1];
303  current.iStride[2] = iStride[2];
304  current.nDstStep = nDstStep;
305  current.dest = dest;
306  current.rect = *rect;
307  return current;
308 }
309 
310 static BOOL submit_object(PTP_WORK* WINPR_RESTRICT work_object, PTP_WORK_CALLBACK cb,
311  const void* WINPR_RESTRICT param, YUV_CONTEXT* WINPR_RESTRICT context)
312 {
313  union
314  {
315  const void* cpv;
316  void* pv;
317  } cnv;
318 
319  cnv.cpv = param;
320 
321  if (!work_object)
322  return FALSE;
323 
324  *work_object = NULL;
325 
326  if (!param || !context)
327  return FALSE;
328 
329  *work_object = CreateThreadpoolWork(cb, cnv.pv, &context->ThreadPoolEnv);
330  if (!*work_object)
331  return FALSE;
332 
333  SubmitThreadpoolWork(*work_object);
334  return TRUE;
335 }
336 
337 static void free_objects(PTP_WORK* work_objects, UINT32 waitCount)
338 {
339  WINPR_ASSERT(work_objects || (waitCount == 0));
340 
341  for (UINT32 i = 0; i < waitCount; i++)
342  {
343  PTP_WORK cur = work_objects[i];
344  work_objects[i] = NULL;
345 
346  if (!cur)
347  continue;
348 
349  WaitForThreadpoolWorkCallbacks(cur, FALSE);
350  CloseThreadpoolWork(cur);
351  }
352 }
353 
354 static BOOL intersects(UINT32 pos, const RECTANGLE_16* WINPR_RESTRICT regionRects,
355  UINT32 numRegionRects)
356 {
357  WINPR_ASSERT(regionRects || (numRegionRects == 0));
358 
359  for (UINT32 x = pos + 1; x < numRegionRects; x++)
360  {
361  const RECTANGLE_16* what = &regionRects[pos];
362  const RECTANGLE_16* rect = &regionRects[x];
363 
364  if (rectangles_intersects(what, rect))
365  {
366  WLog_WARN(TAG, "YUV decoder: intersecting rectangles, aborting");
367  return TRUE;
368  }
369  }
370 
371  return FALSE;
372 }
373 
374 static RECTANGLE_16 clamp(YUV_CONTEXT* WINPR_RESTRICT context,
375  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 srcHeight)
376 {
377  WINPR_ASSERT(context);
378  WINPR_ASSERT(rect);
379 
380  RECTANGLE_16 c = *rect;
381  const UINT32 height = MIN(context->height, srcHeight);
382  if (c.top > height)
383  c.top = WINPR_ASSERTING_INT_CAST(UINT16, height);
384  if (c.bottom > height)
385  c.bottom = WINPR_ASSERTING_INT_CAST(UINT16, height);
386  return c;
387 }
388 
389 static BOOL pool_decode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb,
390  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
391  UINT32 yuvHeight, UINT32 DstFormat, BYTE* WINPR_RESTRICT dest,
392  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
393  UINT32 numRegionRects)
394 {
395  BOOL rc = FALSE;
396  UINT32 waitCount = 0;
397  primitives_t* prims = primitives_get();
398 
399  WINPR_ASSERT(context);
400  WINPR_ASSERT(cb);
401  WINPR_ASSERT(pYUVData);
402  WINPR_ASSERT(iStride);
403  WINPR_ASSERT(dest);
404  WINPR_ASSERT(regionRects || (numRegionRects == 0));
405 
406  if (context->encoder)
407  {
408  WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
409  return FALSE;
410  }
411 
412  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
413  {
414  for (UINT32 y = 0; y < numRegionRects; y++)
415  {
416  const RECTANGLE_16 rect = clamp(context, &regionRects[y], yuvHeight);
417  YUV_PROCESS_WORK_PARAM current =
418  pool_decode_param(&rect, context, pYUVData, iStride, DstFormat, dest, nDstStep);
419  cb(NULL, &current, NULL);
420  }
421  return TRUE;
422  }
423 
424  /* case where we use threads */
425  for (UINT32 x = 0; x < numRegionRects; x++)
426  {
427  RECTANGLE_16 r = clamp(context, &regionRects[x], yuvHeight);
428 
429  if (intersects(x, regionRects, numRegionRects))
430  continue;
431 
432  while (r.left < r.right)
433  {
434  RECTANGLE_16 y = r;
435  y.right = MIN(r.right, r.left + TILE_SIZE);
436 
437  while (y.top < y.bottom)
438  {
439  RECTANGLE_16 z = y;
440 
441  if (context->work_object_count <= waitCount)
442  {
443  WLog_ERR(TAG,
444  "YUV decoder: invalid number of tiles, only support less than %" PRIu32
445  ", got %" PRIu32,
446  context->work_object_count, waitCount);
447  goto fail;
448  }
449 
450  YUV_PROCESS_WORK_PARAM* cur = &context->work_dec_params[waitCount];
451  z.bottom = MIN(z.bottom, z.top + TILE_SIZE);
452  if (rectangle_is_empty(&z))
453  continue;
454  *cur = pool_decode_param(&z, context, pYUVData, iStride, DstFormat, dest, nDstStep);
455  if (!submit_object(&context->work_objects[waitCount], cb, cur, context))
456  goto fail;
457  waitCount++;
458  y.top += TILE_SIZE;
459  }
460 
461  r.left += TILE_SIZE;
462  }
463  }
464  rc = TRUE;
465 fail:
466  free_objects(context->work_objects, context->work_object_count);
467  return rc;
468 }
469 
470 static INLINE BOOL check_rect(const YUV_CONTEXT* WINPR_RESTRICT yuv,
471  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstWidth,
472  UINT32 nDstHeight)
473 {
474  WINPR_ASSERT(yuv);
475  WINPR_ASSERT(rect);
476 
477  /* Check, if the output rectangle is valid in decoded h264 frame. */
478  if ((rect->right > yuv->width) || (rect->left > yuv->width))
479  return FALSE;
480 
481  if ((rect->top > yuv->height) || (rect->bottom > yuv->height))
482  return FALSE;
483 
484  /* Check, if the output rectangle is valid in destination buffer. */
485  if ((rect->right > nDstWidth) || (rect->left > nDstWidth))
486  return FALSE;
487 
488  if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight))
489  return FALSE;
490 
491  return TRUE;
492 }
493 
494 static void CALLBACK yuv444_combine_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
495  PTP_WORK work)
496 {
497  YUV_COMBINE_WORK_PARAM* param = (YUV_COMBINE_WORK_PARAM*)context;
498  primitives_t* prims = primitives_get();
499 
500  WINPR_ASSERT(param);
501  YUV_CONTEXT* yuv = param->context;
502  WINPR_ASSERT(yuv);
503 
504  const RECTANGLE_16* rect = &param->rect;
505  WINPR_ASSERT(rect);
506 
507  const UINT32 alignedWidth = yuv->width + ((yuv->width % 16 != 0) ? 16 - yuv->width % 16 : 0);
508  const UINT32 alignedHeight =
509  yuv->height + ((yuv->height % 16 != 0) ? 16 - yuv->height % 16 : 0);
510 
511  WINPR_UNUSED(instance);
512  WINPR_UNUSED(work);
513 
514  if (!check_rect(param->context, rect, yuv->width, yuv->height))
515  return;
516 
517  if (prims->YUV420CombineToYUV444(param->type, param->pYUVData, param->iStride, alignedWidth,
518  alignedHeight, param->pYUVDstData, param->iDstStride,
519  rect) != PRIMITIVES_SUCCESS)
520  WLog_WARN(TAG, "YUV420CombineToYUV444 failed");
521 }
522 
523 static INLINE YUV_COMBINE_WORK_PARAM
524 pool_decode_rect_param(const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context,
525  BYTE type, const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
526  BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3])
527 {
528  YUV_COMBINE_WORK_PARAM current = { 0 };
529 
530  WINPR_ASSERT(rect);
531  WINPR_ASSERT(context);
532  WINPR_ASSERT(pYUVData);
533  WINPR_ASSERT(iStride);
534  WINPR_ASSERT(pYUVDstData);
535  WINPR_ASSERT(iDstStride);
536 
537  current.context = context;
538  current.pYUVData[0] = pYUVData[0];
539  current.pYUVData[1] = pYUVData[1];
540  current.pYUVData[2] = pYUVData[2];
541  current.pYUVDstData[0] = pYUVDstData[0];
542  current.pYUVDstData[1] = pYUVDstData[1];
543  current.pYUVDstData[2] = pYUVDstData[2];
544  current.iStride[0] = iStride[0];
545  current.iStride[1] = iStride[1];
546  current.iStride[2] = iStride[2];
547  current.iDstStride[0] = iDstStride[0];
548  current.iDstStride[1] = iDstStride[1];
549  current.iDstStride[2] = iDstStride[2];
550  current.type = type;
551  current.rect = *rect;
552  return current;
553 }
554 
555 static BOOL pool_decode_rect(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type,
556  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
557  BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3],
558  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
559 {
560  BOOL rc = FALSE;
561  UINT32 waitCount = 0;
562  PTP_WORK_CALLBACK cb = yuv444_combine_work_callback;
563  primitives_t* prims = primitives_get();
564 
565  WINPR_ASSERT(context);
566  WINPR_ASSERT(pYUVData);
567  WINPR_ASSERT(iStride);
568  WINPR_ASSERT(pYUVDstData);
569  WINPR_ASSERT(iDstStride);
570  WINPR_ASSERT(regionRects || (numRegionRects == 0));
571 
572  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
573  {
574  for (UINT32 y = 0; y < numRegionRects; y++)
575  {
576  YUV_COMBINE_WORK_PARAM current = pool_decode_rect_param(
577  &regionRects[y], context, type, pYUVData, iStride, pYUVDstData, iDstStride);
578  cb(NULL, &current, NULL);
579  }
580  return TRUE;
581  }
582 
583  /* case where we use threads */
584  for (waitCount = 0; waitCount < numRegionRects; waitCount++)
585  {
586  YUV_COMBINE_WORK_PARAM* current = NULL;
587 
588  if (context->work_object_count <= waitCount)
589  {
590  WLog_ERR(TAG,
591  "YUV rect decoder: invalid number of tiles, only support less than %" PRIu32
592  ", got %" PRIu32,
593  context->work_object_count, waitCount);
594  goto fail;
595  }
596  current = &context->work_combined_params[waitCount];
597  *current = pool_decode_rect_param(&regionRects[waitCount], context, type, pYUVData, iStride,
598  pYUVDstData, iDstStride);
599 
600  if (!submit_object(&context->work_objects[waitCount], cb, current, context))
601  goto fail;
602  }
603 
604  rc = TRUE;
605 fail:
606  free_objects(context->work_objects, context->work_object_count);
607  return rc;
608 }
609 
610 BOOL yuv444_context_decode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type,
611  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
612  UINT32 srcYuvHeight, BYTE* WINPR_RESTRICT pYUVDstData[3],
613  const UINT32 iDstStride[3], DWORD DstFormat, BYTE* WINPR_RESTRICT dest,
614  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
615  UINT32 numRegionRects)
616 {
617  const BYTE* pYUVCDstData[3];
618 
619  WINPR_ASSERT(context);
620  WINPR_ASSERT(pYUVData);
621  WINPR_ASSERT(iStride);
622  WINPR_ASSERT(pYUVDstData);
623  WINPR_ASSERT(iDstStride);
624  WINPR_ASSERT(dest);
625  WINPR_ASSERT(regionRects || (numRegionRects == 0));
626 
627  if (context->encoder)
628  {
629  WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
630  return FALSE;
631  }
632  if (!pool_decode_rect(context, type, pYUVData, iStride, pYUVDstData, iDstStride, regionRects,
633  numRegionRects))
634  return FALSE;
635 
636  pYUVCDstData[0] = pYUVDstData[0];
637  pYUVCDstData[1] = pYUVDstData[1];
638  pYUVCDstData[2] = pYUVDstData[2];
639  return pool_decode(context, yuv444_process_work_callback, pYUVCDstData, iDstStride,
640  srcYuvHeight, DstFormat, dest, nDstStep, regionRects, numRegionRects);
641 }
642 
643 BOOL yuv420_context_decode(YUV_CONTEXT* WINPR_RESTRICT context,
644  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
645  UINT32 yuvHeight, DWORD DstFormat, BYTE* WINPR_RESTRICT dest,
646  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
647  UINT32 numRegionRects)
648 {
649  return pool_decode(context, yuv420_process_work_callback, pYUVData, iStride, yuvHeight,
650  DstFormat, dest, nDstStep, regionRects, numRegionRects);
651 }
652 
653 static void CALLBACK yuv420_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
654  PTP_WORK work)
655 {
656  prim_size_t roi;
657  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
658  primitives_t* prims = primitives_get();
659  BYTE* pYUVData[3];
660  const BYTE* src = NULL;
661 
662  WINPR_UNUSED(instance);
663  WINPR_UNUSED(work);
664  WINPR_ASSERT(param);
665 
666  roi.width = param->rect.right - param->rect.left;
667  roi.height = param->rect.bottom - param->rect.top;
668  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
669  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
670  pYUVData[0] =
671  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
672  pYUVData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
673  param->rect.left / 2;
674  pYUVData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
675  param->rect.left / 2;
676 
677  if (prims->RGBToYUV420_8u_P3AC4R(src, param->SrcFormat, param->nSrcStep, pYUVData,
678  param->iStride, &roi) != PRIMITIVES_SUCCESS)
679  {
680  WLog_ERR(TAG, "error when decoding lines");
681  }
682 }
683 
684 static void CALLBACK yuv444v1_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
685  PTP_WORK work)
686 {
687  prim_size_t roi;
688  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
689  primitives_t* prims = primitives_get();
690  BYTE* pYUVLumaData[3];
691  BYTE* pYUVChromaData[3];
692  const BYTE* src = NULL;
693 
694  WINPR_UNUSED(instance);
695  WINPR_UNUSED(work);
696  WINPR_ASSERT(param);
697 
698  roi.width = param->rect.right - param->rect.left;
699  roi.height = param->rect.bottom - param->rect.top;
700  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
701  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
702  pYUVLumaData[0] =
703  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
704  pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
705  param->rect.left / 2;
706  pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
707  param->rect.left / 2;
708  pYUVChromaData[0] =
709  param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
710  pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
711  param->rect.left / 2;
712  pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
713  param->rect.left / 2;
714  if (prims->RGBToAVC444YUV(src, param->SrcFormat, param->nSrcStep, pYUVLumaData, param->iStride,
715  pYUVChromaData, param->iStride, &roi) != PRIMITIVES_SUCCESS)
716  {
717  WLog_ERR(TAG, "error when decoding lines");
718  }
719 }
720 
721 static void CALLBACK yuv444v2_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
722  PTP_WORK work)
723 {
724  prim_size_t roi;
725  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
726  primitives_t* prims = primitives_get();
727  BYTE* pYUVLumaData[3];
728  BYTE* pYUVChromaData[3];
729  const BYTE* src = NULL;
730 
731  WINPR_UNUSED(instance);
732  WINPR_UNUSED(work);
733  WINPR_ASSERT(param);
734 
735  roi.width = param->rect.right - param->rect.left;
736  roi.height = param->rect.bottom - param->rect.top;
737  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
738  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
739  pYUVLumaData[0] =
740  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
741  pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
742  param->rect.left / 2;
743  pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
744  param->rect.left / 2;
745  pYUVChromaData[0] =
746  param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
747  pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
748  param->rect.left / 2;
749  pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
750  param->rect.left / 2;
751  if (prims->RGBToAVC444YUVv2(src, param->SrcFormat, param->nSrcStep, pYUVLumaData,
752  param->iStride, pYUVChromaData, param->iStride,
753  &roi) != PRIMITIVES_SUCCESS)
754  {
755  WLog_ERR(TAG, "error when decoding lines");
756  }
757 }
758 
759 static INLINE YUV_ENCODE_WORK_PARAM pool_encode_fill(
760  const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context,
761  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat, const UINT32 iStride[],
762  BYTE* WINPR_RESTRICT pYUVLumaData[], BYTE* WINPR_RESTRICT pYUVChromaData[])
763 {
764  YUV_ENCODE_WORK_PARAM current = { 0 };
765 
766  WINPR_ASSERT(rect);
767  WINPR_ASSERT(context);
768  WINPR_ASSERT(pSrcData);
769  WINPR_ASSERT(iStride);
770  WINPR_ASSERT(pYUVLumaData);
771 
772  current.context = context;
773  current.pSrcData = pSrcData;
774  current.SrcFormat = SrcFormat;
775  current.nSrcStep = nSrcStep;
776  current.pYUVLumaData[0] = pYUVLumaData[0];
777  current.pYUVLumaData[1] = pYUVLumaData[1];
778  current.pYUVLumaData[2] = pYUVLumaData[2];
779  if (pYUVChromaData)
780  {
781  current.pYUVChromaData[0] = pYUVChromaData[0];
782  current.pYUVChromaData[1] = pYUVChromaData[1];
783  current.pYUVChromaData[2] = pYUVChromaData[2];
784  }
785  current.iStride[0] = iStride[0];
786  current.iStride[1] = iStride[1];
787  current.iStride[2] = iStride[2];
788 
789  current.rect = *rect;
790 
791  return current;
792 }
793 
794 static BOOL pool_encode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb,
795  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat,
796  const UINT32 iStride[], BYTE* WINPR_RESTRICT pYUVLumaData[],
797  BYTE* WINPR_RESTRICT pYUVChromaData[],
798  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
799 {
800  BOOL rc = FALSE;
801  primitives_t* prims = primitives_get();
802  UINT32 waitCount = 0;
803 
804  WINPR_ASSERT(context);
805  WINPR_ASSERT(cb);
806  WINPR_ASSERT(pSrcData);
807  WINPR_ASSERT(iStride);
808  WINPR_ASSERT(regionRects || (numRegionRects == 0));
809 
810  if (!context->encoder)
811  {
812 
813  WLog_ERR(TAG, "YUV context set up for decoding, can not encode with it, aborting");
814  return FALSE;
815  }
816 
817  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
818  {
819  for (UINT32 x = 0; x < numRegionRects; x++)
820  {
821  YUV_ENCODE_WORK_PARAM current =
822  pool_encode_fill(&regionRects[x], context, pSrcData, nSrcStep, SrcFormat, iStride,
823  pYUVLumaData, pYUVChromaData);
824  cb(NULL, &current, NULL);
825  }
826  return TRUE;
827  }
828 
829  /* case where we use threads */
830  for (UINT32 x = 0; x < numRegionRects; x++)
831  {
832  const RECTANGLE_16* rect = &regionRects[x];
833  const UINT32 height = rect->bottom - rect->top;
834  const UINT32 steps = (height + context->heightStep / 2) / context->heightStep;
835 
836  waitCount += steps;
837  }
838 
839  for (UINT32 x = 0; x < numRegionRects; x++)
840  {
841  const RECTANGLE_16* rect = &regionRects[x];
842  const UINT32 height = rect->bottom - rect->top;
843  const UINT32 steps = (height + context->heightStep / 2) / context->heightStep;
844 
845  for (UINT32 y = 0; y < steps; y++)
846  {
847  RECTANGLE_16 r = *rect;
848  YUV_ENCODE_WORK_PARAM* current = NULL;
849 
850  if (context->work_object_count <= waitCount)
851  {
852  WLog_ERR(TAG,
853  "YUV encoder: invalid number of tiles, only support less than %" PRIu32
854  ", got %" PRIu32,
855  context->work_object_count, waitCount);
856  goto fail;
857  }
858 
859  current = &context->work_enc_params[waitCount];
860  r.top += y * context->heightStep;
861  *current = pool_encode_fill(&r, context, pSrcData, nSrcStep, SrcFormat, iStride,
862  pYUVLumaData, pYUVChromaData);
863  if (!submit_object(&context->work_objects[waitCount], cb, current, context))
864  goto fail;
865  waitCount++;
866  }
867  }
868 
869  rc = TRUE;
870 fail:
871  free_objects(context->work_objects, context->work_object_count);
872  return rc;
873 }
874 
875 BOOL yuv420_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT pSrcData,
876  UINT32 nSrcStep, UINT32 SrcFormat, const UINT32 iStride[3],
877  BYTE* WINPR_RESTRICT pYUVData[3],
878  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
879 {
880  if (!context || !pSrcData || !iStride || !pYUVData || !regionRects)
881  return FALSE;
882 
883  return pool_encode(context, yuv420_encode_work_callback, pSrcData, nSrcStep, SrcFormat, iStride,
884  pYUVData, NULL, regionRects, numRegionRects);
885 }
886 
887 BOOL yuv444_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE version,
888  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat,
889  const UINT32 iStride[3], BYTE* WINPR_RESTRICT pYUVLumaData[3],
890  BYTE* WINPR_RESTRICT pYUVChromaData[3],
891  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
892 {
893  PTP_WORK_CALLBACK cb = NULL;
894  switch (version)
895  {
896  case 1:
897  cb = yuv444v1_encode_work_callback;
898  break;
899  case 2:
900  cb = yuv444v2_encode_work_callback;
901  break;
902  default:
903  return FALSE;
904  }
905 
906  return pool_encode(context, cb, pSrcData, nSrcStep, SrcFormat, iStride, pYUVLumaData,
907  pYUVChromaData, regionRects, numRegionRects);
908 }