FreeRDP
prim_internal.h
1 /* prim_internal.h
2  * vi:ts=4 sw=4
3  *
4  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
5  * Licensed under the Apache License, Version 2.0 (the "License"); you may
6  * not use this file except in compliance with the License. You may obtain
7  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing
12  * permissions and limitations under the License. Algorithms used by
13  * this code may be covered by patents by HP, Microsoft, or other parties.
14  *
15  */
16 
17 #ifndef FREERDP_LIB_PRIM_INTERNAL_H
18 #define FREERDP_LIB_PRIM_INTERNAL_H
19 
20 #include <winpr/platform.h>
21 #include <freerdp/config.h>
22 
23 #include <freerdp/primitives.h>
24 #include <freerdp/api.h>
25 
26 #include <freerdp/log.h>
27 
28 #include "../core/simd.h"
29 
30 #define PRIM_TAG FREERDP_TAG("primitives")
31 
32 #ifdef __GNUC__
33 #define PRIM_ALIGN_128 __attribute__((aligned(16)))
34 #else
35 #ifdef _WIN32
36 #define PRIM_ALIGN_128 __declspec(align(16))
37 #endif
38 #endif
39 
40 #if defined(SSE_AVX_INTRINSICS_ENABLED) || defined(NEON_INTRINSICS_ENABLED) || defined(WITH_OPENCL)
41 #define HAVE_OPTIMIZED_PRIMITIVES 1
42 #endif
43 
44 #if defined(SSE_AVX_INTRINSICS_ENABLED) || defined(NEON_INTRINSICS_ENABLED)
45 #define HAVE_CPU_OPTIMIZED_PRIMITIVES 1
46 #endif
47 
48 #if defined(SSE_AVX_INTRINSICS_ENABLED)
49 #include <emmintrin.h>
50 static inline __m128i mm_set_epu32(uint32_t val1, uint32_t val2, uint32_t val3, uint32_t val4)
51 {
52  return _mm_set_epi32((int32_t)val1, (int32_t)val2, (int32_t)val3, (int32_t)val4);
53 }
54 
55 static inline __m128i mm_set1_epu32(uint32_t val)
56 {
57  return _mm_set1_epi32((int32_t)val);
58 }
59 
60 static inline __m128i mm_set1_epu8(uint8_t val)
61 {
62  return _mm_set1_epi8((int8_t)val);
63 }
64 
65 /* Use lddqu for unaligned; load for 16-byte aligned. */
66 #define LOAD_SI128(_ptr_) \
67  (((const ULONG_PTR)(_ptr_)&0x0f) ? _mm_lddqu_si128((const __m128i*)(_ptr_)) \
68  : _mm_load_si128((const __m128i*)(_ptr_)))
69 #endif
70 
71 static INLINE BYTE* writePixelBGRA(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
72  BYTE B, BYTE A)
73 {
74  WINPR_UNUSED(formatSize);
75  WINPR_UNUSED(format);
76 
77  *dst++ = B;
78  *dst++ = G;
79  *dst++ = R;
80  *dst++ = A;
81  return dst;
82 }
83 
84 static INLINE BYTE* writePixelBGRX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
85  BYTE B, BYTE A)
86 {
87  WINPR_UNUSED(formatSize);
88  WINPR_UNUSED(format);
89  WINPR_UNUSED(A);
90 
91  *dst++ = B;
92  *dst++ = G;
93  *dst++ = R;
94  dst++; /* Do not touch alpha */
95 
96  return dst;
97 }
98 
99 static INLINE BYTE* writePixelRGBA(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
100  BYTE B, BYTE A)
101 {
102  WINPR_UNUSED(formatSize);
103  WINPR_UNUSED(format);
104 
105  *dst++ = R;
106  *dst++ = G;
107  *dst++ = B;
108  *dst++ = A;
109  return dst;
110 }
111 
112 static INLINE BYTE* writePixelRGBX(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
113  BYTE B, BYTE A)
114 {
115  WINPR_UNUSED(formatSize);
116  WINPR_UNUSED(format);
117  WINPR_UNUSED(A);
118 
119  *dst++ = R;
120  *dst++ = G;
121  *dst++ = B;
122  dst++; /* Do not touch alpha */
123 
124  return dst;
125 }
126 
127 static INLINE BYTE* writePixelABGR(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
128  BYTE B, BYTE A)
129 {
130  WINPR_UNUSED(formatSize);
131  WINPR_UNUSED(format);
132 
133  *dst++ = A;
134  *dst++ = B;
135  *dst++ = G;
136  *dst++ = R;
137  return dst;
138 }
139 
140 static INLINE BYTE* writePixelXBGR(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
141  BYTE B, BYTE A)
142 {
143  WINPR_UNUSED(formatSize);
144  WINPR_UNUSED(format);
145  WINPR_UNUSED(A);
146 
147  dst++; /* Do not touch alpha */
148  *dst++ = B;
149  *dst++ = G;
150  *dst++ = R;
151  return dst;
152 }
153 
154 static INLINE BYTE* writePixelARGB(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
155  BYTE B, BYTE A)
156 {
157  WINPR_UNUSED(formatSize);
158  WINPR_UNUSED(format);
159 
160  *dst++ = A;
161  *dst++ = R;
162  *dst++ = G;
163  *dst++ = B;
164  return dst;
165 }
166 
167 static INLINE BYTE* writePixelXRGB(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
168  BYTE B, BYTE A)
169 {
170  WINPR_UNUSED(formatSize);
171  WINPR_UNUSED(format);
172  WINPR_UNUSED(A);
173 
174  dst++; /* Do not touch alpha */
175  *dst++ = R;
176  *dst++ = G;
177  *dst++ = B;
178  return dst;
179 }
180 
181 static INLINE BYTE* writePixelGenericAlpha(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R,
182  BYTE G, BYTE B, BYTE A)
183 {
184  UINT32 color = FreeRDPGetColor(format, R, G, B, A);
185  FreeRDPWriteColor(dst, format, color);
186  return dst + formatSize;
187 }
188 
189 static INLINE BYTE* writePixelGeneric(BYTE* dst, DWORD formatSize, UINT32 format, BYTE R, BYTE G,
190  BYTE B, BYTE A)
191 {
192  UINT32 color = FreeRDPGetColor(format, R, G, B, A);
193  FreeRDPWriteColorIgnoreAlpha(dst, format, color);
194  return dst + formatSize;
195 }
196 
197 typedef BYTE* (*fkt_writePixel)(BYTE*, DWORD, UINT32, BYTE, BYTE, BYTE, BYTE);
198 
199 static INLINE fkt_writePixel getPixelWriteFunction(DWORD format, BOOL useAlpha)
200 {
201  switch (format)
202  {
203  case PIXEL_FORMAT_ARGB32:
204  case PIXEL_FORMAT_XRGB32:
205  return useAlpha ? writePixelARGB : writePixelXRGB;
206 
207  case PIXEL_FORMAT_ABGR32:
208  case PIXEL_FORMAT_XBGR32:
209  return useAlpha ? writePixelABGR : writePixelXBGR;
210 
211  case PIXEL_FORMAT_RGBA32:
212  case PIXEL_FORMAT_RGBX32:
213  return useAlpha ? writePixelRGBA : writePixelRGBX;
214 
215  case PIXEL_FORMAT_BGRA32:
216  case PIXEL_FORMAT_BGRX32:
217  return useAlpha ? writePixelBGRA : writePixelBGRX;
218 
219  default:
220  return useAlpha ? writePixelGenericAlpha : writePixelGeneric;
221  }
222 }
223 
224 static INLINE BYTE CLIP(INT64 X)
225 {
226  if (X > 255L)
227  return 255L;
228 
229  if (X < 0L)
230  return 0L;
231 
232  return (BYTE)X;
233 }
234 
235 static INLINE BYTE CONDITIONAL_CLIP(INT32 in, BYTE original)
236 {
237  BYTE out = CLIP(in);
238  BYTE diff = 0;
239  if (out > original)
240  diff = out - original;
241  else
242  diff = original - out;
243  if (diff < 30)
244  return original;
245  return out;
246 }
247 
253 static INLINE INT32 C(INT32 Y)
254 {
255  return (Y)-0L;
256 }
257 
258 static INLINE INT32 D(INT32 U)
259 {
260  return (U)-128L;
261 }
262 
263 static INLINE INT32 E(INT32 V)
264 {
265  return (V)-128L;
266 }
267 
268 static INLINE BYTE YUV2R(INT32 Y, INT32 U, INT32 V)
269 {
270  const INT32 r = (256L * C(Y) + 0L * D(U) + 403L * E(V));
271  const INT32 r8 = r >> 8L;
272  return CLIP(r8);
273 }
274 
275 static INLINE BYTE YUV2G(INT32 Y, INT32 U, INT32 V)
276 {
277  const INT32 g = (256L * C(Y) - 48L * D(U) - 120L * E(V));
278  const INT32 g8 = g >> 8L;
279  return CLIP(g8);
280 }
281 
282 static INLINE BYTE YUV2B(INT32 Y, INT32 U, INT32 V)
283 {
284  const INT32 b = (256L * C(Y) + 475L * D(U) + 0L * E(V));
285  const INT32 b8 = b >> 8L;
286  return CLIP(b8);
287 }
288 
289 /* Function prototypes for all the init/deinit routines. */
290 FREERDP_LOCAL void primitives_init_copy(primitives_t* WINPR_RESTRICT prims);
291 FREERDP_LOCAL void primitives_init_set(primitives_t* WINPR_RESTRICT prims);
292 FREERDP_LOCAL void primitives_init_add(primitives_t* WINPR_RESTRICT prims);
293 FREERDP_LOCAL void primitives_init_andor(primitives_t* WINPR_RESTRICT prims);
294 FREERDP_LOCAL void primitives_init_shift(primitives_t* WINPR_RESTRICT prims);
295 FREERDP_LOCAL void primitives_init_sign(primitives_t* WINPR_RESTRICT prims);
296 FREERDP_LOCAL void primitives_init_alphaComp(primitives_t* WINPR_RESTRICT prims);
297 FREERDP_LOCAL void primitives_init_colors(primitives_t* WINPR_RESTRICT prims);
298 FREERDP_LOCAL void primitives_init_YCoCg(primitives_t* WINPR_RESTRICT prims);
299 FREERDP_LOCAL void primitives_init_YUV(primitives_t* WINPR_RESTRICT prims);
300 
301 FREERDP_LOCAL void primitives_init_copy_opt(primitives_t* WINPR_RESTRICT prims);
302 FREERDP_LOCAL void primitives_init_set_opt(primitives_t* WINPR_RESTRICT prims);
303 FREERDP_LOCAL void primitives_init_add_opt(primitives_t* WINPR_RESTRICT prims);
304 FREERDP_LOCAL void primitives_init_andor_opt(primitives_t* WINPR_RESTRICT prims);
305 FREERDP_LOCAL void primitives_init_shift_opt(primitives_t* WINPR_RESTRICT prims);
306 FREERDP_LOCAL void primitives_init_sign_opt(primitives_t* WINPR_RESTRICT prims);
307 FREERDP_LOCAL void primitives_init_alphaComp_opt(primitives_t* WINPR_RESTRICT prims);
308 FREERDP_LOCAL void primitives_init_colors_opt(primitives_t* WINPR_RESTRICT prims);
309 FREERDP_LOCAL void primitives_init_YCoCg_opt(primitives_t* WINPR_RESTRICT prims);
310 FREERDP_LOCAL void primitives_init_YUV_opt(primitives_t* WINPR_RESTRICT prims);
311 
312 #if defined(WITH_OPENCL)
313 FREERDP_LOCAL BOOL primitives_init_opencl(primitives_t* WINPR_RESTRICT prims);
314 #endif
315 
316 FREERDP_LOCAL primitives_t* primitives_get_by_type(DWORD type);
317 
318 #endif /* FREERDP_LIB_PRIM_INTERNAL_H */