FreeRDP
prim_colors.c
1 /* FreeRDP: A Remote Desktop Protocol Client
2  * Color conversion operations.
3  * vi:ts=4 sw=4:
4  *
5  * Copyright 2011 Stephen Erisman
6  * Copyright 2011 Norbert Federa <norbert.federa@thincast.com>
7  * Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com>
8  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License"); you may
11  * not use this file except in compliance with the License. You may obtain
12  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
16  * or implied. See the License for the specific language governing
17  * permissions and limitations under the License.
18  */
19 
20 #include <freerdp/config.h>
21 
22 #include <freerdp/types.h>
23 #include <freerdp/primitives.h>
24 #include <freerdp/codec/color.h>
25 
26 #include "prim_internal.h"
27 #include "prim_colors.h"
28 
29 #ifndef MINMAX
30 #define MINMAX(_v_, _l_, _h_) ((_v_) < (_l_) ? (_l_) : ((_v_) > (_h_) ? (_h_) : (_v_)))
31 #endif /* !MINMAX */
32 /* ------------------------------------------------------------------------- */
33 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_BGRX(const INT16* WINPR_RESTRICT pSrc[3],
34  UINT32 srcStep, BYTE* WINPR_RESTRICT pDst,
35  UINT32 dstStep, UINT32 DstFormat,
36  const prim_size_t* WINPR_RESTRICT roi)
37 {
38  BYTE* pRGB = pDst;
39  const INT16* pY = pSrc[0];
40  const INT16* pCb = pSrc[1];
41  const INT16* pCr = pSrc[2];
42  const size_t srcPad = (srcStep - (roi->width * 2)) / 2;
43  const size_t dstPad = (dstStep - (roi->width * 4));
44  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
45 
46  for (UINT32 y = 0; y < roi->height; y++)
47  {
48  for (UINT32 x = 0; x < roi->width; x++)
49  {
50  INT16 R = 0;
51  INT16 G = 0;
52  INT16 B = 0;
53  const INT32 divisor = 16;
54  const INT32 Y = (INT32)((UINT32)((*pY++) + 4096) << divisor);
55  const INT32 Cb = (*pCb++);
56  const INT32 Cr = (*pCr++);
57  const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)) * 1LL;
58  const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)) * 1LL;
59  const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)) * 1LL;
60  const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)) * 1LL;
61  R = ((INT16)((CrR + Y) >> divisor) >> 5);
62  G = ((INT16)((Y - CbG - CrG) >> divisor) >> 5);
63  B = ((INT16)((CbB + Y) >> divisor) >> 5);
64  pRGB = writePixelBGRX(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), CLIP(B), 0);
65  }
66 
67  pY += srcPad;
68  pCb += srcPad;
69  pCr += srcPad;
70  pRGB += dstPad;
71  }
72 
73  return PRIMITIVES_SUCCESS;
74 }
75 
76 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R_general(const INT16* WINPR_RESTRICT pSrc[3],
77  UINT32 srcStep, BYTE* WINPR_RESTRICT pDst,
78  UINT32 dstStep, UINT32 DstFormat,
79  const prim_size_t* WINPR_RESTRICT roi)
80 {
81  BYTE* pRGB = pDst;
82  const INT16* pY = pSrc[0];
83  const INT16* pCb = pSrc[1];
84  const INT16* pCr = pSrc[2];
85  const size_t srcPad = (srcStep - (roi->width * 2)) / 2;
86  const size_t dstPad = (dstStep - (roi->width * 4));
87  const fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
88  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
89 
90  for (UINT32 y = 0; y < roi->height; y++)
91  {
92  for (UINT32 x = 0; x < roi->width; x++)
93  {
94  const INT32 divisor = 16;
95  const INT32 Y = (INT32)((UINT32)((*pY++) + 4096) << divisor);
96  const INT32 Cb = (*pCb++);
97  const INT32 Cr = (*pCr++);
98  const INT64 CrR = Cr * (INT64)(1.402525f * (1 << divisor)) * 1LL;
99  const INT64 CrG = Cr * (INT64)(0.714401f * (1 << divisor)) * 1LL;
100  const INT64 CbG = Cb * (INT64)(0.343730f * (1 << divisor)) * 1LL;
101  const INT64 CbB = Cb * (INT64)(1.769905f * (1 << divisor)) * 1LL;
102  const INT64 R = (CrR + Y) >> (divisor + 5);
103  const INT64 G = (Y - CbG - CrG) >> (divisor + 5);
104  const INT64 B = (CbB + Y) >> (divisor + 5);
105  pRGB = writePixel(pRGB, formatSize, DstFormat, CLIP(R), CLIP(G), CLIP(B), 0);
106  }
107 
108  pY += srcPad;
109  pCb += srcPad;
110  pCr += srcPad;
111  pRGB += dstPad;
112  }
113 
114  return PRIMITIVES_SUCCESS;
115 }
116 
117 static pstatus_t general_yCbCrToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3],
118  UINT32 srcStep, BYTE* WINPR_RESTRICT pDst,
119  UINT32 dstStep, UINT32 DstFormat,
120  const prim_size_t* WINPR_RESTRICT roi)
121 {
122  switch (DstFormat)
123  {
124  case PIXEL_FORMAT_BGRA32:
125  case PIXEL_FORMAT_BGRX32:
126  return general_yCbCrToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat,
127  roi);
128 
129  default:
130  return general_yCbCrToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
131  roi);
132  }
133 }
134 
135 /* ------------------------------------------------------------------------- */
136 
137 static pstatus_t
138 general_yCbCrToRGB_16s16s_P3P3(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep,
139  INT16* WINPR_RESTRICT pDst[3], INT32 dstStep,
140  const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
141 {
153  const INT16* yptr = pSrc[0];
154  const INT16* cbptr = pSrc[1];
155  const INT16* crptr = pSrc[2];
156  INT16* rptr = pDst[0];
157  INT16* gptr = pDst[1];
158  INT16* bptr = pDst[2];
159  UINT32 srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
160  UINT32 dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
161 
162  for (UINT32 y = 0; y < roi->height; y++)
163  {
164  for (UINT32 x = 0; x < roi->width; ++x)
165  {
166  /* INT32 is used intentionally because we calculate
167  * with shifted factors!
168  */
169  INT32 cy = (INT32)(*yptr++);
170  INT32 cb = (INT32)(*cbptr++);
171  INT32 cr = (INT32)(*crptr++);
172  INT64 r = 0;
173  INT64 g = 0;
174  INT64 b = 0;
175  /*
176  * This is the slow floating point version kept here for reference.
177  * y = y + 4096; // 128<<5=4096 so that we can scale the sum by>>5
178  * r = y + cr*1.403f;
179  * g = y - cb*0.344f - cr*0.714f;
180  * b = y + cb*1.770f;
181  * y_r_buf[i] = CLIP(r>>5);
182  * cb_g_buf[i] = CLIP(g>>5);
183  * cr_b_buf[i] = CLIP(b>>5);
184  */
185  /*
186  * We scale the factors by << 16 into 32-bit integers in order to
187  * avoid slower floating point multiplications. Since the final
188  * result needs to be scaled by >> 5 we will extract only the
189  * upper 11 bits (>> 21) from the final sum.
190  * Hence we also have to scale the other terms of the sum by << 16.
191  * R: 1.403 << 16 = 91947
192  * G: 0.344 << 16 = 22544, 0.714 << 16 = 46792
193  * B: 1.770 << 16 = 115998
194  */
195  cy = (INT32)((UINT32)(cy + 4096) << 16);
196  r = cy + cr * 91947LL;
197  g = cy - cb * 22544LL - cr * 46792LL;
198  b = cy + cb * 115998LL;
199  *rptr++ = CLIP(r >> 21);
200  *gptr++ = CLIP(g >> 21);
201  *bptr++ = CLIP(b >> 21);
202  }
203 
204  yptr += srcbump;
205  cbptr += srcbump;
206  crptr += srcbump;
207  rptr += dstbump;
208  gptr += dstbump;
209  bptr += dstbump;
210  }
211 
212  return PRIMITIVES_SUCCESS;
213 }
214 
215 /* ------------------------------------------------------------------------- */
216 static pstatus_t
217 general_RGBToYCbCr_16s16s_P3P3(const INT16* WINPR_RESTRICT pSrc[3], INT32 srcStep,
218  INT16* WINPR_RESTRICT pDst[3], INT32 dstStep,
219  const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
220 {
221  /* The encoded YCbCr coefficients are represented as 11.5 fixed-point
222  * numbers:
223  *
224  * 1 sign bit + 10 integer bits + 5 fractional bits
225  *
226  * However only 7 integer bits will be actually used since the value
227  * range is [-128.0, 127.0]. In other words, the encoded coefficients
228  * is scaled by << 5 when interpreted as INT16.
229  * It will be scaled down to original during the quantization phase.
230  */
231  const INT16* rptr = pSrc[0];
232  const INT16* gptr = pSrc[1];
233  const INT16* bptr = pSrc[2];
234  INT16* yptr = pDst[0];
235  INT16* cbptr = pDst[1];
236  INT16* crptr = pDst[2];
237  UINT32 srcbump = (srcStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
238  UINT32 dstbump = (dstStep - (roi->width * sizeof(UINT16))) / sizeof(UINT16);
239 
240  for (UINT32 y = 0; y < roi->height; y++)
241  {
242  for (UINT32 x = 0; x < roi->width; ++x)
243  {
244  /* INT32 is used intentionally because we calculate with
245  * shifted factors!
246  */
247  INT32 r = (INT32)(*rptr++);
248  INT32 g = (INT32)(*gptr++);
249  INT32 b = (INT32)(*bptr++);
250  /* We scale the factors by << 15 into 32-bit integers in order
251  * to avoid slower floating point multiplications. Since the
252  * terms need to be scaled by << 5 we simply scale the final
253  * sum by >> 10
254  *
255  * Y: 0.299000 << 15 = 9798, 0.587000 << 15 = 19235,
256  * 0.114000 << 15 = 3735
257  * Cb: 0.168935 << 15 = 5535, 0.331665 << 15 = 10868,
258  * 0.500590 << 15 = 16403
259  * Cr: 0.499813 << 15 = 16377, 0.418531 << 15 = 13714,
260  * 0.081282 << 15 = 2663
261  */
262  INT32 cy = (r * 9798 + g * 19235 + b * 3735) >> 10;
263  INT32 cb = (r * -5535 + g * -10868 + b * 16403) >> 10;
264  INT32 cr = (r * 16377 + g * -13714 + b * -2663) >> 10;
265  *yptr++ = (INT16)MINMAX(cy - 4096, -4096, 4095);
266  *cbptr++ = (INT16)MINMAX(cb, -4096, 4095);
267  *crptr++ = (INT16)MINMAX(cr, -4096, 4095);
268  }
269 
270  yptr += srcbump;
271  cbptr += srcbump;
272  crptr += srcbump;
273  rptr += dstbump;
274  gptr += dstbump;
275  bptr += dstbump;
276  }
277 
278  return PRIMITIVES_SUCCESS;
279 }
280 
281 static INLINE void writeScanlineGeneric(BYTE* dst, DWORD formatSize, UINT32 DstFormat,
282  const INT16* r, const INT16* g, const INT16* b, DWORD width)
283 {
284  fkt_writePixel writePixel = getPixelWriteFunction(DstFormat, FALSE);
285 
286  for (UINT32 x = 0; x < width; x++)
287  dst = writePixel(dst, formatSize, DstFormat, *r++, *g++, *b++, 0);
288 }
289 
290 static INLINE void writeScanlineRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
291  const INT16* g, const INT16* b, DWORD width)
292 {
293  WINPR_UNUSED(formatSize);
294  WINPR_UNUSED(DstFormat);
295 
296  for (UINT32 x = 0; x < width; x++)
297  {
298  const BYTE R = CLIP(*r++);
299  const BYTE G = CLIP(*g++);
300  const BYTE B = CLIP(*b++);
301  *dst++ = R;
302  *dst++ = G;
303  *dst++ = B;
304  }
305 }
306 
307 static INLINE void writeScanlineBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
308  const INT16* g, const INT16* b, DWORD width)
309 {
310  WINPR_UNUSED(formatSize);
311  WINPR_UNUSED(DstFormat);
312 
313  for (UINT32 x = 0; x < width; x++)
314  {
315  const BYTE R = CLIP(*r++);
316  const BYTE G = CLIP(*g++);
317  const BYTE B = CLIP(*b++);
318  *dst++ = B;
319  *dst++ = G;
320  *dst++ = R;
321  }
322 }
323 
324 static INLINE void writeScanlineBGRX(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
325  const INT16* g, const INT16* b, DWORD width)
326 {
327  WINPR_UNUSED(formatSize);
328  WINPR_UNUSED(DstFormat);
329 
330  for (UINT32 x = 0; x < width; x++)
331  {
332  const BYTE R = CLIP(*r++);
333  const BYTE G = CLIP(*g++);
334  const BYTE B = CLIP(*b++);
335  *dst++ = B;
336  *dst++ = G;
337  *dst++ = R;
338  *dst++ = 0xFF;
339  }
340 }
341 
342 static INLINE void writeScanlineRGBX(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
343  const INT16* g, const INT16* b, DWORD width)
344 {
345  WINPR_UNUSED(formatSize);
346  WINPR_UNUSED(DstFormat);
347 
348  for (UINT32 x = 0; x < width; x++)
349  {
350  const BYTE R = CLIP(*r++);
351  const BYTE G = CLIP(*g++);
352  const BYTE B = CLIP(*b++);
353  *dst++ = R;
354  *dst++ = G;
355  *dst++ = B;
356  *dst++ = 0xFF;
357  }
358 }
359 
360 static INLINE void writeScanlineXBGR(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
361  const INT16* g, const INT16* b, DWORD width)
362 {
363  WINPR_UNUSED(formatSize);
364  WINPR_UNUSED(DstFormat);
365 
366  for (UINT32 x = 0; x < width; x++)
367  {
368  const BYTE R = CLIP(*r++);
369  const BYTE G = CLIP(*g++);
370  const BYTE B = CLIP(*b++);
371  *dst++ = 0xFF;
372  *dst++ = B;
373  *dst++ = G;
374  *dst++ = R;
375  }
376 }
377 
378 static INLINE void writeScanlineXRGB(BYTE* dst, DWORD formatSize, UINT32 DstFormat, const INT16* r,
379  const INT16* g, const INT16* b, DWORD width)
380 {
381  WINPR_UNUSED(formatSize);
382  WINPR_UNUSED(DstFormat);
383 
384  for (UINT32 x = 0; x < width; x++)
385  {
386  const BYTE R = CLIP(*r++);
387  const BYTE G = CLIP(*g++);
388  const BYTE B = CLIP(*b++);
389  *dst++ = 0xFF;
390  *dst++ = R;
391  *dst++ = G;
392  *dst++ = B;
393  }
394 }
395 
396 typedef void (*fkt_writeScanline)(BYTE*, DWORD, UINT32, const INT16*, const INT16*, const INT16*,
397  DWORD);
398 
399 static INLINE fkt_writeScanline getScanlineWriteFunction(DWORD format)
400 {
401  switch (format)
402  {
403  case PIXEL_FORMAT_ARGB32:
404  case PIXEL_FORMAT_XRGB32:
405  return writeScanlineXRGB;
406 
407  case PIXEL_FORMAT_ABGR32:
408  case PIXEL_FORMAT_XBGR32:
409  return writeScanlineXBGR;
410 
411  case PIXEL_FORMAT_RGBA32:
412  case PIXEL_FORMAT_RGBX32:
413  return writeScanlineRGBX;
414 
415  case PIXEL_FORMAT_BGRA32:
416  case PIXEL_FORMAT_BGRX32:
417  return writeScanlineBGRX;
418 
419  case PIXEL_FORMAT_BGR24:
420  return writeScanlineBGR;
421 
422  case PIXEL_FORMAT_RGB24:
423  return writeScanlineRGB;
424 
425  default:
426  return writeScanlineGeneric;
427  }
428 }
429 
430 /* ------------------------------------------------------------------------- */
431 static pstatus_t general_RGBToRGB_16s8u_P3AC4R_general(
432  const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, and B arrays */
433  UINT32 srcStep, /* bytes between rows in source data */
434  BYTE* WINPR_RESTRICT pDst, /* 32-bit interleaved ARGB (ABGR?) data */
435  UINT32 dstStep, /* bytes between rows in dest data */
436  UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
437 {
438  const INT16* r = pSrc[0];
439  const INT16* g = pSrc[1];
440  const INT16* b = pSrc[2];
441  const DWORD srcAdd = srcStep / sizeof(INT16);
442  fkt_writeScanline writeScanline = getScanlineWriteFunction(DstFormat);
443  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
444 
445  for (UINT32 y = 0; y < roi->height; ++y)
446  {
447  (*writeScanline)(pDst, formatSize, DstFormat, r, g, b, roi->width);
448  pDst += dstStep;
449  r += srcAdd;
450  g += srcAdd;
451  b += srcAdd;
452  }
453 
454  return PRIMITIVES_SUCCESS;
455 }
456 
457 static pstatus_t general_RGBToRGB_16s8u_P3AC4R_BGRX(
458  const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, and B arrays */
459  UINT32 srcStep, /* bytes between rows in source data */
460  BYTE* WINPR_RESTRICT pDst, /* 32-bit interleaved ARGB (ABGR?) data */
461  UINT32 dstStep, /* bytes between rows in dest data */
462  UINT32 DstFormat, const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
463 {
464  const INT16* r = pSrc[0];
465  const INT16* g = pSrc[1];
466  const INT16* b = pSrc[2];
467  const DWORD srcAdd = srcStep / sizeof(INT16);
468  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
469 
470  for (UINT32 y = 0; y < roi->height; ++y)
471  {
472  writeScanlineBGRX(pDst, formatSize, DstFormat, r, g, b, roi->width);
473  pDst += dstStep;
474  r += srcAdd;
475  g += srcAdd;
476  b += srcAdd;
477  }
478 
479  return PRIMITIVES_SUCCESS;
480 }
481 
482 static pstatus_t
483 general_RGBToRGB_16s8u_P3AC4R(const INT16* WINPR_RESTRICT pSrc[3], /* 16-bit R,G, and B arrays */
484  UINT32 srcStep, /* bytes between rows in source data */
485  BYTE* WINPR_RESTRICT pDst, /* 32-bit interleaved ARGB (ABGR?) data */
486  UINT32 dstStep, /* bytes between rows in dest data */
487  UINT32 DstFormat,
488  const prim_size_t* WINPR_RESTRICT roi) /* region of interest */
489 {
490  switch (DstFormat)
491  {
492  case PIXEL_FORMAT_BGRA32:
493  case PIXEL_FORMAT_BGRX32:
494  return general_RGBToRGB_16s8u_P3AC4R_BGRX(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
495 
496  default:
497  return general_RGBToRGB_16s8u_P3AC4R_general(pSrc, srcStep, pDst, dstStep, DstFormat,
498  roi);
499  }
500 }
501 /* ------------------------------------------------------------------------- */
502 void primitives_init_colors(primitives_t* WINPR_RESTRICT prims)
503 {
504  prims->yCbCrToRGB_16s8u_P3AC4R = general_yCbCrToRGB_16s8u_P3AC4R;
505  prims->yCbCrToRGB_16s16s_P3P3 = general_yCbCrToRGB_16s16s_P3P3;
506  prims->RGBToYCbCr_16s16s_P3P3 = general_RGBToYCbCr_16s16s_P3P3;
507  prims->RGBToRGB_16s8u_P3AC4R = general_RGBToRGB_16s8u_P3AC4R;
508 }
509 
510 /* ------------------------------------------------------------------------- */
511 void primitives_init_colors_opt(primitives_t* WINPR_RESTRICT prims)
512 {
513  primitives_init_colors_sse2(prims);
514  primitives_init_colors_neon(prims);
515 }