20 #include <freerdp/config.h>
22 #include <freerdp/types.h>
23 #include <freerdp/primitives.h>
24 #include <winpr/sysinfo.h>
26 #include "prim_internal.h"
27 #include "prim_templates.h"
28 #include "prim_YCoCg.h"
30 #if defined(NEON_INTRINSICS_ENABLED)
35 static pstatus_t neon_YCoCgToRGB_8u_X(
const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
36 BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,
37 UINT32 width, UINT32 height, UINT8 shift, BYTE bPos,
38 BYTE gPos, BYTE rPos, BYTE aPos, BOOL alpha)
41 const BYTE* sptr = pSrc;
42 const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
43 const int8_t cll = shift - 1;
44 const UINT32 srcPad = srcStep - (width * 4);
45 const UINT32 dstPad = dstStep - (width * formatSize);
46 const UINT32 pad = width % 8;
47 const uint8x8_t aVal = vdup_n_u8(0xFF);
48 const int8x8_t cllv = vdup_n_s8(cll);
50 for (UINT32 y = 0; y < height; y++)
52 for (UINT32 x = 0; x < width - pad; x += 8)
55 const uint8x8x4_t raw = vld4_u8(sptr);
56 const int8x8_t CgRaw = vreinterpret_s8_u8(vshl_u8(raw.val[0], cllv));
57 const int8x8_t CoRaw = vreinterpret_s8_u8(vshl_u8(raw.val[1], cllv));
58 const int16x8_t Cg = vmovl_s8(CgRaw);
59 const int16x8_t Co = vmovl_s8(CoRaw);
60 const int16x8_t Y = vreinterpretq_s16_u16(vmovl_u8(raw.val[2]));
61 const int16x8_t T = vsubq_s16(Y, Cg);
62 const int16x8_t R = vaddq_s16(T, Co);
63 const int16x8_t G = vaddq_s16(Y, Cg);
64 const int16x8_t B = vsubq_s16(T, Co);
66 bgrx.val[bPos] = vqmovun_s16(B);
67 bgrx.val[gPos] = vqmovun_s16(G);
68 bgrx.val[rPos] = vqmovun_s16(R);
71 bgrx.val[aPos] = raw.val[3];
73 bgrx.val[aPos] = aVal;
80 for (UINT32 x = 0; x < pad; x++)
83 const INT16 Cg = (INT16)((INT8)((*sptr++) << cll));
84 const INT16 Co = (INT16)((INT8)((*sptr++) << cll));
85 const INT16 Y = (INT16)(*sptr++);
86 const INT16 T = Y - Cg;
87 const INT16 R = T + Co;
88 const INT16 G = Y + Cg;
89 const INT16 B = T - Co;
109 return PRIMITIVES_SUCCESS;
112 static pstatus_t neon_YCoCgToRGB_8u_AC4R(
const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
113 BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,
114 UINT32 width, UINT32 height, UINT8 shift, BOOL withAlpha)
118 case PIXEL_FORMAT_BGRA32:
119 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
120 shift, 2, 1, 0, 3, withAlpha);
122 case PIXEL_FORMAT_BGRX32:
123 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
124 shift, 2, 1, 0, 3, withAlpha);
126 case PIXEL_FORMAT_RGBA32:
127 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
128 shift, 0, 1, 2, 3, withAlpha);
130 case PIXEL_FORMAT_RGBX32:
131 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
132 shift, 0, 1, 2, 3, withAlpha);
134 case PIXEL_FORMAT_ARGB32:
135 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
136 shift, 1, 2, 3, 0, withAlpha);
138 case PIXEL_FORMAT_XRGB32:
139 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
140 shift, 1, 2, 3, 0, withAlpha);
142 case PIXEL_FORMAT_ABGR32:
143 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
144 shift, 3, 2, 1, 0, withAlpha);
146 case PIXEL_FORMAT_XBGR32:
147 return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
148 shift, 3, 2, 1, 0, withAlpha);
151 return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width,
152 height, shift, withAlpha);
158 void primitives_init_YCoCg_neon(
primitives_t* WINPR_RESTRICT prims)
160 #if defined(NEON_INTRINSICS_ENABLED)
161 generic = primitives_get_generic();
162 primitives_init_YCoCg(prims);
164 if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
166 WLog_VRB(PRIM_TAG,
"NEON optimizations");
167 prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
170 WLog_VRB(PRIM_TAG,
"undefined WITH_SIMD or neon intrinsics not available");