FreeRDP
yuv.c
1 #include <winpr/sysinfo.h>
2 #include <winpr/assert.h>
3 #include <winpr/pool.h>
4 
5 #include <freerdp/settings.h>
6 #include <freerdp/codec/region.h>
7 #include <freerdp/primitives.h>
8 #include <freerdp/log.h>
9 #include <freerdp/codec/yuv.h>
10 
11 #define TAG FREERDP_TAG("codec")
12 
13 #define TILE_SIZE 64
14 
15 typedef struct
16 {
17  YUV_CONTEXT* context;
18  const BYTE* pYUVData[3];
19  UINT32 iStride[3];
20  DWORD DstFormat;
21  BYTE* dest;
22  UINT32 nDstStep;
23  RECTANGLE_16 rect;
24 } YUV_PROCESS_WORK_PARAM;
25 
26 typedef struct
27 {
28  YUV_CONTEXT* context;
29  const BYTE* pYUVData[3];
30  UINT32 iStride[3];
31  BYTE* pYUVDstData[3];
32  UINT32 iDstStride[3];
33  RECTANGLE_16 rect;
34  BYTE type;
35 } YUV_COMBINE_WORK_PARAM;
36 
37 typedef struct
38 {
39  YUV_CONTEXT* context;
40  const BYTE* pSrcData;
41 
42  DWORD SrcFormat;
43  UINT32 nSrcStep;
44  RECTANGLE_16 rect;
45  BYTE version;
46 
47  BYTE* pYUVLumaData[3];
48  BYTE* pYUVChromaData[3];
49  UINT32 iStride[3];
50 } YUV_ENCODE_WORK_PARAM;
51 
52 struct S_YUV_CONTEXT
53 {
54  UINT32 width, height;
55  BOOL useThreads;
56  BOOL encoder;
57  UINT32 nthreads;
58  UINT32 heightStep;
59 
60  PTP_POOL threadPool;
61  TP_CALLBACK_ENVIRON ThreadPoolEnv;
62 
63  UINT32 work_object_count;
64  PTP_WORK* work_objects;
65  YUV_ENCODE_WORK_PARAM* work_enc_params;
66  YUV_PROCESS_WORK_PARAM* work_dec_params;
67  YUV_COMBINE_WORK_PARAM* work_combined_params;
68 };
69 
70 static INLINE BOOL avc420_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3],
71  const UINT32 iStride[3],
72  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep,
73  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat)
74 {
75  primitives_t* prims = primitives_get();
76  prim_size_t roi;
77  const BYTE* pYUVPoint[3];
78 
79  WINPR_ASSERT(pYUVData);
80  WINPR_ASSERT(iStride);
81  WINPR_ASSERT(rect);
82  WINPR_ASSERT(pDstData);
83 
84  const INT32 width = rect->right - rect->left;
85  const INT32 height = rect->bottom - rect->top;
86  BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep +
87  1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat);
88 
89  pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left;
90  pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top / 2 * iStride[1] + rect->left / 2;
91  pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top / 2 * iStride[2] + rect->left / 2;
92 
93  roi.width = width;
94  roi.height = height;
95 
96  if (prims->YUV420ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) !=
97  PRIMITIVES_SUCCESS)
98  return FALSE;
99 
100  return TRUE;
101 }
102 
103 static INLINE BOOL avc444_yuv_to_rgb(const BYTE* WINPR_RESTRICT pYUVData[3],
104  const UINT32 iStride[3],
105  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstStep,
106  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat)
107 {
108  primitives_t* prims = primitives_get();
109  prim_size_t roi;
110  const BYTE* pYUVPoint[3];
111 
112  WINPR_ASSERT(pYUVData);
113  WINPR_ASSERT(iStride);
114  WINPR_ASSERT(rect);
115  WINPR_ASSERT(pDstData);
116 
117  const INT32 width = rect->right - rect->left;
118  const INT32 height = rect->bottom - rect->top;
119  BYTE* pDstPoint = pDstData + 1ULL * rect->top * nDstStep +
120  1ULL * rect->left * FreeRDPGetBytesPerPixel(DstFormat);
121 
122  pYUVPoint[0] = pYUVData[0] + 1ULL * rect->top * iStride[0] + rect->left;
123  pYUVPoint[1] = pYUVData[1] + 1ULL * rect->top * iStride[1] + rect->left;
124  pYUVPoint[2] = pYUVData[2] + 1ULL * rect->top * iStride[2] + rect->left;
125 
126  roi.width = width;
127  roi.height = height;
128 
129  if (prims->YUV444ToRGB_8u_P3AC4R(pYUVPoint, iStride, pDstPoint, nDstStep, DstFormat, &roi) !=
130  PRIMITIVES_SUCCESS)
131  return FALSE;
132 
133  return TRUE;
134 }
135 
136 static void CALLBACK yuv420_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
137  PTP_WORK work)
138 {
139  YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
140  WINPR_UNUSED(instance);
141  WINPR_UNUSED(work);
142  WINPR_ASSERT(param);
143 
144  if (!avc420_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
145  param->dest, param->DstFormat))
146  WLog_WARN(TAG, "avc420_yuv_to_rgb failed");
147 }
148 
149 static void CALLBACK yuv444_process_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
150  PTP_WORK work)
151 {
152  YUV_PROCESS_WORK_PARAM* param = (YUV_PROCESS_WORK_PARAM*)context;
153  WINPR_UNUSED(instance);
154  WINPR_UNUSED(work);
155  WINPR_ASSERT(param);
156 
157  if (!avc444_yuv_to_rgb(param->pYUVData, param->iStride, &param->rect, param->nDstStep,
158  param->dest, param->DstFormat))
159  WLog_WARN(TAG, "avc444_yuv_to_rgb failed");
160 }
161 
162 BOOL yuv_context_reset(YUV_CONTEXT* WINPR_RESTRICT context, UINT32 width, UINT32 height)
163 {
164  BOOL rc = FALSE;
165  WINPR_ASSERT(context);
166 
167  context->width = width;
168  context->height = height;
169  context->heightStep = (height / context->nthreads);
170 
171  if (context->useThreads)
172  {
173  const UINT32 pw = (width + TILE_SIZE - width % TILE_SIZE) / TILE_SIZE;
174  const UINT32 ph = (height + TILE_SIZE - height % TILE_SIZE) / TILE_SIZE;
175 
176  /* We´ve calculated the amount of workers for 64x64 tiles, but the decoder
177  * might get 16x16 tiles mixed in. */
178  const UINT32 count = pw * ph * 16;
179 
180  context->work_object_count = 0;
181  if (context->encoder)
182  {
183  void* tmp = winpr_aligned_recalloc(context->work_enc_params, count,
184  sizeof(YUV_ENCODE_WORK_PARAM), 32);
185  if (!tmp)
186  goto fail;
187  memset(tmp, 0, count * sizeof(YUV_ENCODE_WORK_PARAM));
188 
189  context->work_enc_params = tmp;
190  }
191  else
192  {
193  void* tmp = winpr_aligned_recalloc(context->work_dec_params, count,
194  sizeof(YUV_PROCESS_WORK_PARAM), 32);
195  if (!tmp)
196  goto fail;
197  memset(tmp, 0, count * sizeof(YUV_PROCESS_WORK_PARAM));
198 
199  context->work_dec_params = tmp;
200 
201  void* ctmp = winpr_aligned_recalloc(context->work_combined_params, count,
202  sizeof(YUV_COMBINE_WORK_PARAM), 32);
203  if (!ctmp)
204  goto fail;
205  memset(ctmp, 0, count * sizeof(YUV_COMBINE_WORK_PARAM));
206 
207  context->work_combined_params = ctmp;
208  }
209 
210  void* wtmp = winpr_aligned_recalloc(context->work_objects, count, sizeof(PTP_WORK), 32);
211  if (!wtmp)
212  goto fail;
213  memset(wtmp, 0, count * sizeof(PTP_WORK));
214 
215  context->work_objects = wtmp;
216  context->work_object_count = count;
217  }
218  rc = TRUE;
219 fail:
220  return rc;
221 }
222 
223 YUV_CONTEXT* yuv_context_new(BOOL encoder, UINT32 ThreadingFlags)
224 {
225  SYSTEM_INFO sysInfos;
226  YUV_CONTEXT* ret = winpr_aligned_calloc(1, sizeof(*ret), 32);
227  if (!ret)
228  return NULL;
229 
231  primitives_get();
232 
233  ret->encoder = encoder;
234  ret->nthreads = 1;
235  if (!(ThreadingFlags & THREADING_FLAGS_DISABLE_THREADS))
236  {
237  GetNativeSystemInfo(&sysInfos);
238  ret->useThreads = (sysInfos.dwNumberOfProcessors > 1);
239  if (ret->useThreads)
240  {
241  ret->nthreads = sysInfos.dwNumberOfProcessors;
242  ret->threadPool = CreateThreadpool(NULL);
243  if (!ret->threadPool)
244  {
245  goto error_threadpool;
246  }
247 
248  InitializeThreadpoolEnvironment(&ret->ThreadPoolEnv);
249  SetThreadpoolCallbackPool(&ret->ThreadPoolEnv, ret->threadPool);
250  }
251  }
252 
253  return ret;
254 
255 error_threadpool:
256  WINPR_PRAGMA_DIAG_PUSH
257  WINPR_PRAGMA_DIAG_IGNORED_MISMATCHED_DEALLOC
258  yuv_context_free(ret);
259  WINPR_PRAGMA_DIAG_POP
260  return NULL;
261 }
262 
263 void yuv_context_free(YUV_CONTEXT* context)
264 {
265  if (!context)
266  return;
267  if (context->useThreads)
268  {
269  if (context->threadPool)
270  CloseThreadpool(context->threadPool);
271  DestroyThreadpoolEnvironment(&context->ThreadPoolEnv);
272  winpr_aligned_free(context->work_objects);
273  winpr_aligned_free(context->work_combined_params);
274  winpr_aligned_free(context->work_enc_params);
275  winpr_aligned_free(context->work_dec_params);
276  }
277  winpr_aligned_free(context);
278 }
279 
280 static INLINE YUV_PROCESS_WORK_PARAM pool_decode_param(const RECTANGLE_16* WINPR_RESTRICT rect,
281  YUV_CONTEXT* WINPR_RESTRICT context,
282  const BYTE* WINPR_RESTRICT pYUVData[3],
283  const UINT32 iStride[3], UINT32 DstFormat,
284  BYTE* WINPR_RESTRICT dest, UINT32 nDstStep)
285 {
286  YUV_PROCESS_WORK_PARAM current = { 0 };
287 
288  WINPR_ASSERT(rect);
289  WINPR_ASSERT(context);
290  WINPR_ASSERT(pYUVData);
291  WINPR_ASSERT(iStride);
292  WINPR_ASSERT(dest);
293 
294  current.context = context;
295  current.DstFormat = DstFormat;
296  current.pYUVData[0] = pYUVData[0];
297  current.pYUVData[1] = pYUVData[1];
298  current.pYUVData[2] = pYUVData[2];
299  current.iStride[0] = iStride[0];
300  current.iStride[1] = iStride[1];
301  current.iStride[2] = iStride[2];
302  current.nDstStep = nDstStep;
303  current.dest = dest;
304  current.rect = *rect;
305  return current;
306 }
307 
308 static BOOL submit_object(PTP_WORK* WINPR_RESTRICT work_object, PTP_WORK_CALLBACK cb,
309  const void* WINPR_RESTRICT param, YUV_CONTEXT* WINPR_RESTRICT context)
310 {
311  union
312  {
313  const void* cpv;
314  void* pv;
315  } cnv;
316 
317  cnv.cpv = param;
318 
319  if (!work_object)
320  return FALSE;
321 
322  *work_object = NULL;
323 
324  if (!param || !context)
325  return FALSE;
326 
327  *work_object = CreateThreadpoolWork(cb, cnv.pv, &context->ThreadPoolEnv);
328  if (!*work_object)
329  return FALSE;
330 
331  SubmitThreadpoolWork(*work_object);
332  return TRUE;
333 }
334 
335 static void free_objects(PTP_WORK* work_objects, UINT32 waitCount)
336 {
337  WINPR_ASSERT(work_objects || (waitCount == 0));
338 
339  for (UINT32 i = 0; i < waitCount; i++)
340  {
341  PTP_WORK cur = work_objects[i];
342  work_objects[i] = NULL;
343 
344  if (!cur)
345  continue;
346 
347  WaitForThreadpoolWorkCallbacks(cur, FALSE);
348  CloseThreadpoolWork(cur);
349  }
350 }
351 
352 static BOOL intersects(UINT32 pos, const RECTANGLE_16* WINPR_RESTRICT regionRects,
353  UINT32 numRegionRects)
354 {
355  WINPR_ASSERT(regionRects || (numRegionRects == 0));
356 
357  for (UINT32 x = pos + 1; x < numRegionRects; x++)
358  {
359  const RECTANGLE_16* what = &regionRects[pos];
360  const RECTANGLE_16* rect = &regionRects[x];
361 
362  if (rectangles_intersects(what, rect))
363  {
364  WLog_WARN(TAG, "YUV decoder: intersecting rectangles, aborting");
365  return TRUE;
366  }
367  }
368 
369  return FALSE;
370 }
371 
372 static RECTANGLE_16 clamp(YUV_CONTEXT* WINPR_RESTRICT context,
373  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 srcHeight)
374 {
375  WINPR_ASSERT(context);
376  WINPR_ASSERT(rect);
377 
378  RECTANGLE_16 c = *rect;
379  const UINT32 height = MIN(context->height, srcHeight);
380  if (c.top > height)
381  c.top = height;
382  if (c.bottom > height)
383  c.bottom = height;
384  return c;
385 }
386 
387 static BOOL pool_decode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb,
388  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
389  UINT32 yuvHeight, UINT32 DstFormat, BYTE* WINPR_RESTRICT dest,
390  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
391  UINT32 numRegionRects)
392 {
393  BOOL rc = FALSE;
394  UINT32 waitCount = 0;
395  primitives_t* prims = primitives_get();
396 
397  WINPR_ASSERT(context);
398  WINPR_ASSERT(cb);
399  WINPR_ASSERT(pYUVData);
400  WINPR_ASSERT(iStride);
401  WINPR_ASSERT(dest);
402  WINPR_ASSERT(regionRects || (numRegionRects == 0));
403 
404  if (context->encoder)
405  {
406  WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
407  return FALSE;
408  }
409 
410  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
411  {
412  for (UINT32 y = 0; y < numRegionRects; y++)
413  {
414  const RECTANGLE_16 rect = clamp(context, &regionRects[y], yuvHeight);
415  YUV_PROCESS_WORK_PARAM current =
416  pool_decode_param(&rect, context, pYUVData, iStride, DstFormat, dest, nDstStep);
417  cb(NULL, &current, NULL);
418  }
419  return TRUE;
420  }
421 
422  /* case where we use threads */
423  for (UINT32 x = 0; x < numRegionRects; x++)
424  {
425  RECTANGLE_16 r = clamp(context, &regionRects[x], yuvHeight);
426 
427  if (intersects(x, regionRects, numRegionRects))
428  continue;
429 
430  while (r.left < r.right)
431  {
432  RECTANGLE_16 y = r;
433  y.right = MIN(r.right, r.left + TILE_SIZE);
434 
435  while (y.top < y.bottom)
436  {
437  RECTANGLE_16 z = y;
438 
439  if (context->work_object_count <= waitCount)
440  {
441  WLog_ERR(TAG,
442  "YUV decoder: invalid number of tiles, only support less than %" PRIu32
443  ", got %" PRIu32,
444  context->work_object_count, waitCount);
445  goto fail;
446  }
447 
448  YUV_PROCESS_WORK_PARAM* cur = &context->work_dec_params[waitCount];
449  z.bottom = MIN(z.bottom, z.top + TILE_SIZE);
450  if (rectangle_is_empty(&z))
451  continue;
452  *cur = pool_decode_param(&z, context, pYUVData, iStride, DstFormat, dest, nDstStep);
453  if (!submit_object(&context->work_objects[waitCount], cb, cur, context))
454  goto fail;
455  waitCount++;
456  y.top += TILE_SIZE;
457  }
458 
459  r.left += TILE_SIZE;
460  }
461  }
462  rc = TRUE;
463 fail:
464  free_objects(context->work_objects, context->work_object_count);
465  return rc;
466 }
467 
468 static INLINE BOOL check_rect(const YUV_CONTEXT* WINPR_RESTRICT yuv,
469  const RECTANGLE_16* WINPR_RESTRICT rect, UINT32 nDstWidth,
470  UINT32 nDstHeight)
471 {
472  WINPR_ASSERT(yuv);
473  WINPR_ASSERT(rect);
474 
475  /* Check, if the output rectangle is valid in decoded h264 frame. */
476  if ((rect->right > yuv->width) || (rect->left > yuv->width))
477  return FALSE;
478 
479  if ((rect->top > yuv->height) || (rect->bottom > yuv->height))
480  return FALSE;
481 
482  /* Check, if the output rectangle is valid in destination buffer. */
483  if ((rect->right > nDstWidth) || (rect->left > nDstWidth))
484  return FALSE;
485 
486  if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight))
487  return FALSE;
488 
489  return TRUE;
490 }
491 
492 static void CALLBACK yuv444_combine_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
493  PTP_WORK work)
494 {
495  YUV_COMBINE_WORK_PARAM* param = (YUV_COMBINE_WORK_PARAM*)context;
496  primitives_t* prims = primitives_get();
497 
498  WINPR_ASSERT(param);
499  YUV_CONTEXT* yuv = param->context;
500  WINPR_ASSERT(yuv);
501 
502  const RECTANGLE_16* rect = &param->rect;
503  WINPR_ASSERT(rect);
504 
505  const UINT32 alignedWidth = yuv->width + ((yuv->width % 16 != 0) ? 16 - yuv->width % 16 : 0);
506  const UINT32 alignedHeight =
507  yuv->height + ((yuv->height % 16 != 0) ? 16 - yuv->height % 16 : 0);
508 
509  WINPR_UNUSED(instance);
510  WINPR_UNUSED(work);
511 
512  if (!check_rect(param->context, rect, yuv->width, yuv->height))
513  return;
514 
515  if (prims->YUV420CombineToYUV444(param->type, param->pYUVData, param->iStride, alignedWidth,
516  alignedHeight, param->pYUVDstData, param->iDstStride,
517  rect) != PRIMITIVES_SUCCESS)
518  WLog_WARN(TAG, "YUV420CombineToYUV444 failed");
519 }
520 
521 static INLINE YUV_COMBINE_WORK_PARAM
522 pool_decode_rect_param(const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context,
523  BYTE type, const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
524  BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3])
525 {
526  YUV_COMBINE_WORK_PARAM current = { 0 };
527 
528  WINPR_ASSERT(rect);
529  WINPR_ASSERT(context);
530  WINPR_ASSERT(pYUVData);
531  WINPR_ASSERT(iStride);
532  WINPR_ASSERT(pYUVDstData);
533  WINPR_ASSERT(iDstStride);
534 
535  current.context = context;
536  current.pYUVData[0] = pYUVData[0];
537  current.pYUVData[1] = pYUVData[1];
538  current.pYUVData[2] = pYUVData[2];
539  current.pYUVDstData[0] = pYUVDstData[0];
540  current.pYUVDstData[1] = pYUVDstData[1];
541  current.pYUVDstData[2] = pYUVDstData[2];
542  current.iStride[0] = iStride[0];
543  current.iStride[1] = iStride[1];
544  current.iStride[2] = iStride[2];
545  current.iDstStride[0] = iDstStride[0];
546  current.iDstStride[1] = iDstStride[1];
547  current.iDstStride[2] = iDstStride[2];
548  current.type = type;
549  current.rect = *rect;
550  return current;
551 }
552 
553 static BOOL pool_decode_rect(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type,
554  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
555  BYTE* WINPR_RESTRICT pYUVDstData[3], const UINT32 iDstStride[3],
556  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
557 {
558  BOOL rc = FALSE;
559  UINT32 waitCount = 0;
560  PTP_WORK_CALLBACK cb = yuv444_combine_work_callback;
561  primitives_t* prims = primitives_get();
562 
563  WINPR_ASSERT(context);
564  WINPR_ASSERT(pYUVData);
565  WINPR_ASSERT(iStride);
566  WINPR_ASSERT(pYUVDstData);
567  WINPR_ASSERT(iDstStride);
568  WINPR_ASSERT(regionRects || (numRegionRects == 0));
569 
570  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
571  {
572  for (UINT32 y = 0; y < numRegionRects; y++)
573  {
574  YUV_COMBINE_WORK_PARAM current = pool_decode_rect_param(
575  &regionRects[y], context, type, pYUVData, iStride, pYUVDstData, iDstStride);
576  cb(NULL, &current, NULL);
577  }
578  return TRUE;
579  }
580 
581  /* case where we use threads */
582  for (waitCount = 0; waitCount < numRegionRects; waitCount++)
583  {
584  YUV_COMBINE_WORK_PARAM* current = NULL;
585 
586  if (context->work_object_count <= waitCount)
587  {
588  WLog_ERR(TAG,
589  "YUV rect decoder: invalid number of tiles, only support less than %" PRIu32
590  ", got %" PRIu32,
591  context->work_object_count, waitCount);
592  goto fail;
593  }
594  current = &context->work_combined_params[waitCount];
595  *current = pool_decode_rect_param(&regionRects[waitCount], context, type, pYUVData, iStride,
596  pYUVDstData, iDstStride);
597 
598  if (!submit_object(&context->work_objects[waitCount], cb, current, context))
599  goto fail;
600  }
601 
602  rc = TRUE;
603 fail:
604  free_objects(context->work_objects, context->work_object_count);
605  return rc;
606 }
607 
608 BOOL yuv444_context_decode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE type,
609  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
610  UINT32 srcYuvHeight, BYTE* WINPR_RESTRICT pYUVDstData[3],
611  const UINT32 iDstStride[3], DWORD DstFormat, BYTE* WINPR_RESTRICT dest,
612  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
613  UINT32 numRegionRects)
614 {
615  const BYTE* pYUVCDstData[3];
616 
617  WINPR_ASSERT(context);
618  WINPR_ASSERT(pYUVData);
619  WINPR_ASSERT(iStride);
620  WINPR_ASSERT(pYUVDstData);
621  WINPR_ASSERT(iDstStride);
622  WINPR_ASSERT(dest);
623  WINPR_ASSERT(regionRects || (numRegionRects == 0));
624 
625  if (context->encoder)
626  {
627  WLog_ERR(TAG, "YUV context set up for encoding, can not decode with it, aborting");
628  return FALSE;
629  }
630  if (!pool_decode_rect(context, type, pYUVData, iStride, pYUVDstData, iDstStride, regionRects,
631  numRegionRects))
632  return FALSE;
633 
634  pYUVCDstData[0] = pYUVDstData[0];
635  pYUVCDstData[1] = pYUVDstData[1];
636  pYUVCDstData[2] = pYUVDstData[2];
637  return pool_decode(context, yuv444_process_work_callback, pYUVCDstData, iDstStride,
638  srcYuvHeight, DstFormat, dest, nDstStep, regionRects, numRegionRects);
639 }
640 
641 BOOL yuv420_context_decode(YUV_CONTEXT* WINPR_RESTRICT context,
642  const BYTE* WINPR_RESTRICT pYUVData[3], const UINT32 iStride[3],
643  UINT32 yuvHeight, DWORD DstFormat, BYTE* WINPR_RESTRICT dest,
644  UINT32 nDstStep, const RECTANGLE_16* WINPR_RESTRICT regionRects,
645  UINT32 numRegionRects)
646 {
647  return pool_decode(context, yuv420_process_work_callback, pYUVData, iStride, yuvHeight,
648  DstFormat, dest, nDstStep, regionRects, numRegionRects);
649 }
650 
651 static void CALLBACK yuv420_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
652  PTP_WORK work)
653 {
654  prim_size_t roi;
655  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
656  primitives_t* prims = primitives_get();
657  BYTE* pYUVData[3];
658  const BYTE* src = NULL;
659 
660  WINPR_UNUSED(instance);
661  WINPR_UNUSED(work);
662  WINPR_ASSERT(param);
663 
664  roi.width = param->rect.right - param->rect.left;
665  roi.height = param->rect.bottom - param->rect.top;
666  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
667  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
668  pYUVData[0] =
669  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
670  pYUVData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
671  param->rect.left / 2;
672  pYUVData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
673  param->rect.left / 2;
674 
675  if (prims->RGBToYUV420_8u_P3AC4R(src, param->SrcFormat, param->nSrcStep, pYUVData,
676  param->iStride, &roi) != PRIMITIVES_SUCCESS)
677  {
678  WLog_ERR(TAG, "error when decoding lines");
679  }
680 }
681 
682 static void CALLBACK yuv444v1_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
683  PTP_WORK work)
684 {
685  prim_size_t roi;
686  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
687  primitives_t* prims = primitives_get();
688  BYTE* pYUVLumaData[3];
689  BYTE* pYUVChromaData[3];
690  const BYTE* src = NULL;
691 
692  WINPR_UNUSED(instance);
693  WINPR_UNUSED(work);
694  WINPR_ASSERT(param);
695 
696  roi.width = param->rect.right - param->rect.left;
697  roi.height = param->rect.bottom - param->rect.top;
698  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
699  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
700  pYUVLumaData[0] =
701  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
702  pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
703  param->rect.left / 2;
704  pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
705  param->rect.left / 2;
706  pYUVChromaData[0] =
707  param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
708  pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
709  param->rect.left / 2;
710  pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
711  param->rect.left / 2;
712  if (prims->RGBToAVC444YUV(src, param->SrcFormat, param->nSrcStep, pYUVLumaData, param->iStride,
713  pYUVChromaData, param->iStride, &roi) != PRIMITIVES_SUCCESS)
714  {
715  WLog_ERR(TAG, "error when decoding lines");
716  }
717 }
718 
719 static void CALLBACK yuv444v2_encode_work_callback(PTP_CALLBACK_INSTANCE instance, void* context,
720  PTP_WORK work)
721 {
722  prim_size_t roi;
723  YUV_ENCODE_WORK_PARAM* param = (YUV_ENCODE_WORK_PARAM*)context;
724  primitives_t* prims = primitives_get();
725  BYTE* pYUVLumaData[3];
726  BYTE* pYUVChromaData[3];
727  const BYTE* src = NULL;
728 
729  WINPR_UNUSED(instance);
730  WINPR_UNUSED(work);
731  WINPR_ASSERT(param);
732 
733  roi.width = param->rect.right - param->rect.left;
734  roi.height = param->rect.bottom - param->rect.top;
735  src = param->pSrcData + 1ULL * param->nSrcStep * param->rect.top +
736  1ULL * param->rect.left * FreeRDPGetBytesPerPixel(param->SrcFormat);
737  pYUVLumaData[0] =
738  param->pYUVLumaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
739  pYUVLumaData[1] = param->pYUVLumaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
740  param->rect.left / 2;
741  pYUVLumaData[2] = param->pYUVLumaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
742  param->rect.left / 2;
743  pYUVChromaData[0] =
744  param->pYUVChromaData[0] + 1ULL * param->rect.top * param->iStride[0] + param->rect.left;
745  pYUVChromaData[1] = param->pYUVChromaData[1] + 1ULL * param->rect.top / 2 * param->iStride[1] +
746  param->rect.left / 2;
747  pYUVChromaData[2] = param->pYUVChromaData[2] + 1ULL * param->rect.top / 2 * param->iStride[2] +
748  param->rect.left / 2;
749  if (prims->RGBToAVC444YUVv2(src, param->SrcFormat, param->nSrcStep, pYUVLumaData,
750  param->iStride, pYUVChromaData, param->iStride,
751  &roi) != PRIMITIVES_SUCCESS)
752  {
753  WLog_ERR(TAG, "error when decoding lines");
754  }
755 }
756 
757 static INLINE YUV_ENCODE_WORK_PARAM pool_encode_fill(
758  const RECTANGLE_16* WINPR_RESTRICT rect, YUV_CONTEXT* WINPR_RESTRICT context,
759  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat, const UINT32 iStride[],
760  BYTE* WINPR_RESTRICT pYUVLumaData[], BYTE* WINPR_RESTRICT pYUVChromaData[])
761 {
762  YUV_ENCODE_WORK_PARAM current = { 0 };
763 
764  WINPR_ASSERT(rect);
765  WINPR_ASSERT(context);
766  WINPR_ASSERT(pSrcData);
767  WINPR_ASSERT(iStride);
768  WINPR_ASSERT(pYUVLumaData);
769 
770  current.context = context;
771  current.pSrcData = pSrcData;
772  current.SrcFormat = SrcFormat;
773  current.nSrcStep = nSrcStep;
774  current.pYUVLumaData[0] = pYUVLumaData[0];
775  current.pYUVLumaData[1] = pYUVLumaData[1];
776  current.pYUVLumaData[2] = pYUVLumaData[2];
777  if (pYUVChromaData)
778  {
779  current.pYUVChromaData[0] = pYUVChromaData[0];
780  current.pYUVChromaData[1] = pYUVChromaData[1];
781  current.pYUVChromaData[2] = pYUVChromaData[2];
782  }
783  current.iStride[0] = iStride[0];
784  current.iStride[1] = iStride[1];
785  current.iStride[2] = iStride[2];
786 
787  current.rect = *rect;
788 
789  return current;
790 }
791 
792 static BOOL pool_encode(YUV_CONTEXT* WINPR_RESTRICT context, PTP_WORK_CALLBACK cb,
793  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat,
794  const UINT32 iStride[], BYTE* WINPR_RESTRICT pYUVLumaData[],
795  BYTE* WINPR_RESTRICT pYUVChromaData[],
796  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
797 {
798  BOOL rc = FALSE;
799  primitives_t* prims = primitives_get();
800  UINT32 waitCount = 0;
801 
802  WINPR_ASSERT(context);
803  WINPR_ASSERT(cb);
804  WINPR_ASSERT(pSrcData);
805  WINPR_ASSERT(iStride);
806  WINPR_ASSERT(regionRects || (numRegionRects == 0));
807 
808  if (!context->encoder)
809  {
810 
811  WLog_ERR(TAG, "YUV context set up for decoding, can not encode with it, aborting");
812  return FALSE;
813  }
814 
815  if (!context->useThreads || (primitives_flags(prims) & PRIM_FLAGS_HAVE_EXTGPU))
816  {
817  for (UINT32 x = 0; x < numRegionRects; x++)
818  {
819  YUV_ENCODE_WORK_PARAM current =
820  pool_encode_fill(&regionRects[x], context, pSrcData, nSrcStep, SrcFormat, iStride,
821  pYUVLumaData, pYUVChromaData);
822  cb(NULL, &current, NULL);
823  }
824  return TRUE;
825  }
826 
827  /* case where we use threads */
828  for (UINT32 x = 0; x < numRegionRects; x++)
829  {
830  const RECTANGLE_16* rect = &regionRects[x];
831  const UINT32 height = rect->bottom - rect->top;
832  const UINT32 steps = (height + context->heightStep / 2) / context->heightStep;
833 
834  waitCount += steps;
835  }
836 
837  for (UINT32 x = 0; x < numRegionRects; x++)
838  {
839  const RECTANGLE_16* rect = &regionRects[x];
840  const UINT32 height = rect->bottom - rect->top;
841  const UINT32 steps = (height + context->heightStep / 2) / context->heightStep;
842 
843  for (UINT32 y = 0; y < steps; y++)
844  {
845  RECTANGLE_16 r = *rect;
846  YUV_ENCODE_WORK_PARAM* current = NULL;
847 
848  if (context->work_object_count <= waitCount)
849  {
850  WLog_ERR(TAG,
851  "YUV encoder: invalid number of tiles, only support less than %" PRIu32
852  ", got %" PRIu32,
853  context->work_object_count, waitCount);
854  goto fail;
855  }
856 
857  current = &context->work_enc_params[waitCount];
858  r.top += y * context->heightStep;
859  *current = pool_encode_fill(&r, context, pSrcData, nSrcStep, SrcFormat, iStride,
860  pYUVLumaData, pYUVChromaData);
861  if (!submit_object(&context->work_objects[waitCount], cb, current, context))
862  goto fail;
863  waitCount++;
864  }
865  }
866 
867  rc = TRUE;
868 fail:
869  free_objects(context->work_objects, context->work_object_count);
870  return rc;
871 }
872 
873 BOOL yuv420_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, const BYTE* WINPR_RESTRICT pSrcData,
874  UINT32 nSrcStep, UINT32 SrcFormat, const UINT32 iStride[3],
875  BYTE* WINPR_RESTRICT pYUVData[3],
876  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
877 {
878  if (!context || !pSrcData || !iStride || !pYUVData || !regionRects)
879  return FALSE;
880 
881  return pool_encode(context, yuv420_encode_work_callback, pSrcData, nSrcStep, SrcFormat, iStride,
882  pYUVData, NULL, regionRects, numRegionRects);
883 }
884 
885 BOOL yuv444_context_encode(YUV_CONTEXT* WINPR_RESTRICT context, BYTE version,
886  const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 SrcFormat,
887  const UINT32 iStride[3], BYTE* WINPR_RESTRICT pYUVLumaData[3],
888  BYTE* WINPR_RESTRICT pYUVChromaData[3],
889  const RECTANGLE_16* WINPR_RESTRICT regionRects, UINT32 numRegionRects)
890 {
891  PTP_WORK_CALLBACK cb = NULL;
892  switch (version)
893  {
894  case 1:
895  cb = yuv444v1_encode_work_callback;
896  break;
897  case 2:
898  cb = yuv444v2_encode_work_callback;
899  break;
900  default:
901  return FALSE;
902  }
903 
904  return pool_encode(context, cb, pSrcData, nSrcStep, SrcFormat, iStride, pYUVLumaData,
905  pYUVChromaData, regionRects, numRegionRects);
906 }