FreeRDP
prim_copy.c
1 /* FreeRDP: A Remote Desktop Protocol Client
2  * Copy operations.
3  * vi:ts=4 sw=4:
4  *
5  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
6  * Licensed under the Apache License, Version 2.0 (the "License"); you may
7  * not use this file except in compliance with the License. You may obtain
8  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12  * or implied. See the License for the specific language governing
13  * permissions and limitations under the License.
14  */
15 
16 #include <freerdp/config.h>
17 
18 #include <string.h>
19 #include <freerdp/types.h>
20 #include <freerdp/primitives.h>
21 #include <freerdp/log.h>
22 
23 #include "prim_internal.h"
24 #include "prim_copy.h"
25 #include "../codec/color.h"
26 
27 #include <freerdp/codec/color.h>
28 
29 static primitives_t* generic = NULL;
30 
31 /* ------------------------------------------------------------------------- */
32 /*static inline BOOL memory_regions_overlap_1d(*/
33 static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes)
34 {
35  const ULONG_PTR p1m = (const ULONG_PTR)p1;
36  const ULONG_PTR p2m = (const ULONG_PTR)p2;
37 
38  if (p1m <= p2m)
39  {
40  if (p1m + bytes > p2m)
41  return TRUE;
42  }
43  else
44  {
45  if (p2m + bytes > p1m)
46  return TRUE;
47  }
48 
49  /* else */
50  return FALSE;
51 }
52 
53 /* ------------------------------------------------------------------------- */
54 /*static inline BOOL memory_regions_overlap_2d( */
55 static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2,
56  int p2Step, int p2Size, int width, int height)
57 {
58  ULONG_PTR p1m = (ULONG_PTR)p1;
59  ULONG_PTR p2m = (ULONG_PTR)p2;
60 
61  if (p1m <= p2m)
62  {
63  ULONG_PTR p1mEnd = p1m +
64  1ull * (WINPR_ASSERTING_INT_CAST(uint32_t, height - 1)) *
65  WINPR_ASSERTING_INT_CAST(uint32_t, p1Step) +
66  1ull * WINPR_ASSERTING_INT_CAST(uint32_t, width* p1Size);
67 
68  if (p1mEnd > p2m)
69  return TRUE;
70  }
71  else
72  {
73  ULONG_PTR p2mEnd = p2m +
74  1ull * (WINPR_ASSERTING_INT_CAST(uintptr_t, height - 1)) *
75  WINPR_ASSERTING_INT_CAST(uintptr_t, p2Step) +
76  1ull * WINPR_ASSERTING_INT_CAST(uintptr_t, width* p2Size);
77 
78  if (p2mEnd > p1m)
79  return TRUE;
80  }
81 
82  /* else */
83  return FALSE;
84 }
85 
86 /* ------------------------------------------------------------------------- */
87 static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
88 {
89  if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len))
90  {
91  memmove((void*)pDst, (const void*)pSrc, (size_t)len);
92  }
93  else
94  {
95  memcpy((void*)pDst, (const void*)pSrc, (size_t)len);
96  }
97 
98  return PRIMITIVES_SUCCESS;
99 }
100 
101 /* ------------------------------------------------------------------------- */
102 /* Copy a block of pixels from one buffer to another.
103  * The addresses are assumed to have been already offset to the upper-left
104  * corners of the source and destination region of interest.
105  */
106 static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
107  INT32 width, INT32 height)
108 {
109  const BYTE* src = pSrc;
110  BYTE* dst = pDst;
111  const size_t rowbytes = WINPR_ASSERTING_INT_CAST(size_t, width) * sizeof(UINT32);
112 
113  if ((width == 0) || (height == 0))
114  return PRIMITIVES_SUCCESS;
115 
116  if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32),
117  width, height))
118  {
119  do
120  {
121  generic->copy(src, dst, WINPR_ASSERTING_INT_CAST(int32_t, rowbytes));
122  src += srcStep;
123  dst += dstStep;
124  } while (--height);
125  }
126  else
127  {
128  /* TODO: do it in one operation when the rowdata is adjacent. */
129  do
130  {
131  /* If we find a replacement for memcpy that is consistently
132  * faster, this could be replaced with that.
133  */
134  memcpy(dst, src, rowbytes);
135  src += srcStep;
136  dst += dstStep;
137  } while (--height);
138  }
139 
140  return PRIMITIVES_SUCCESS;
141 }
142 
143 static INLINE pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData,
144  UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
145  UINT32 nWidth, UINT32 nHeight,
146  const BYTE* WINPR_RESTRICT pSrcData,
147  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
148  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
149  SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
150 {
151 
152  const SSIZE_T srcByte = 3;
153  const SSIZE_T dstByte = 4;
154 
155  const UINT32 width = nWidth - nWidth % 8;
156 
157  for (SSIZE_T y = 0; y < nHeight; y++)
158  {
159  const BYTE* WINPR_RESTRICT srcLine =
160  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
161  BYTE* WINPR_RESTRICT dstLine =
162  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
163 
164  SSIZE_T x = 0;
165  WINPR_PRAGMA_UNROLL_LOOP
166  for (; x < width; x++)
167  {
168  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
169  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
170  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
171  }
172 
173  for (; x < nWidth; x++)
174  {
175  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
176  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
177  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
178  }
179  }
180 
181  return PRIMITIVES_SUCCESS;
182 }
183 
184 static INLINE pstatus_t generic_image_copy_bgrx32_bgrx32(
185  BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
186  UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc,
187  UINT32 nYSrc, SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier,
188  SSIZE_T dstVOffset)
189 {
190 
191  const SSIZE_T srcByte = 4;
192  const SSIZE_T dstByte = 4;
193 
194  const UINT32 width = nWidth - nWidth % 8;
195 
196  for (SSIZE_T y = 0; y < nHeight; y++)
197  {
198  const BYTE* WINPR_RESTRICT srcLine =
199  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
200  BYTE* WINPR_RESTRICT dstLine =
201  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
202 
203  SSIZE_T x = 0;
204  WINPR_PRAGMA_UNROLL_LOOP
205  for (; x < width; x++)
206  {
207  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
208  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
209  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
210  }
211  for (; x < nWidth; x++)
212  {
213  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
214  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
215  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
216  }
217  }
218 
219  return PRIMITIVES_SUCCESS;
220 }
221 
222 pstatus_t generic_image_copy_no_overlap_convert(
223  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
224  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
225  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
226  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
227 {
228  const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
229  const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
230 
231  const UINT32 width = nWidth - nWidth % 8;
232  for (SSIZE_T y = 0; y < nHeight; y++)
233  {
234  const BYTE* WINPR_RESTRICT srcLine =
235  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
236  BYTE* WINPR_RESTRICT dstLine =
237  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
238 
239  SSIZE_T x = 0;
240  WINPR_PRAGMA_UNROLL_LOOP
241  for (; x < width; x++)
242  {
243  const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
244  const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
245  FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
246  }
247  for (; x < nWidth; x++)
248  {
249  const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
250  const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
251  FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
252  }
253  }
254  return PRIMITIVES_SUCCESS;
255 }
256 
257 pstatus_t generic_image_copy_no_overlap_memcpy(
258  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
259  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
260  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
261  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
262  UINT32 flags)
263 {
264  const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
265  const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
266  const SSIZE_T copyDstWidth = nWidth * dstByte;
267  const SSIZE_T xSrcOffset = nXSrc * srcByte;
268  const SSIZE_T xDstOffset = nXDst * dstByte;
269 
270  for (SSIZE_T y = 0; y < nHeight; y++)
271  {
272  const BYTE* WINPR_RESTRICT srcLine =
273  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
274  BYTE* WINPR_RESTRICT dstLine =
275  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
276  memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset],
277  WINPR_ASSERTING_INT_CAST(size_t, copyDstWidth));
278  }
279 
280  return PRIMITIVES_SUCCESS;
281 }
282 
283 static INLINE pstatus_t generic_image_copy_no_overlap_dst_alpha(
284  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
285  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
286  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
287  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
288 {
289  WINPR_ASSERT(pDstData);
290  WINPR_ASSERT(pSrcData);
291 
292  switch (SrcFormat)
293  {
294  case PIXEL_FORMAT_BGR24:
295  switch (DstFormat)
296  {
297  case PIXEL_FORMAT_BGRX32:
298  case PIXEL_FORMAT_BGRA32:
299  return generic_image_copy_bgr24_bgrx32(
300  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
301  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
302  default:
303  break;
304  }
305  break;
306  case PIXEL_FORMAT_BGRX32:
307  case PIXEL_FORMAT_BGRA32:
308  switch (DstFormat)
309  {
310  case PIXEL_FORMAT_BGRX32:
311  case PIXEL_FORMAT_BGRA32:
312  return generic_image_copy_bgrx32_bgrx32(
313  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
314  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
315  default:
316  break;
317  }
318  break;
319  case PIXEL_FORMAT_RGBX32:
320  case PIXEL_FORMAT_RGBA32:
321  switch (DstFormat)
322  {
323  case PIXEL_FORMAT_RGBX32:
324  case PIXEL_FORMAT_RGBA32:
325  return generic_image_copy_bgrx32_bgrx32(
326  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
327  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
328  case PIXEL_FORMAT_RGB24:
329  return generic_image_copy_bgr24_bgrx32(
330  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
331  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
332  default:
333  break;
334  }
335  break;
336  default:
337  break;
338  }
339 
340  return generic_image_copy_no_overlap_convert(
341  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
342  nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
343 }
344 
345 static INLINE pstatus_t generic_image_copy_no_overlap_no_alpha(
346  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
347  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
348  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
349  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
350  UINT32 flags)
351 {
352  if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
353  return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
354  nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
355  nXSrc, nYSrc, palette, srcVMultiplier,
356  srcVOffset, dstVMultiplier, dstVOffset, flags);
357  else
358  return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
359  nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
360  nXSrc, nYSrc, palette, srcVMultiplier,
361  srcVOffset, dstVMultiplier, dstVOffset);
362 }
363 
364 static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
365  UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
366  UINT32 nWidth, UINT32 nHeight,
367  const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
368  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
369  const gdiPalette* WINPR_RESTRICT palette,
370  UINT32 flags)
371 {
372  const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
373  SSIZE_T srcVOffset = 0;
374  SSIZE_T srcVMultiplier = 1;
375  SSIZE_T dstVOffset = 0;
376  SSIZE_T dstVMultiplier = 1;
377 
378  if ((nWidth == 0) || (nHeight == 0))
379  return PRIMITIVES_SUCCESS;
380 
381  if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
382  return -1;
383 
384  if (!pDstData || !pSrcData)
385  return -1;
386 
387  if (nDstStep == 0)
388  nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
389 
390  if (nSrcStep == 0)
391  nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
392 
393  if (vSrcVFlip)
394  {
395  srcVOffset = (nHeight - 1ll) * nSrcStep;
396  srcVMultiplier = -1;
397  }
398 
399  if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
400  return generic_image_copy_no_overlap_dst_alpha(
401  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
402  nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
403  dstVOffset);
404  else
405  return generic_image_copy_no_overlap_no_alpha(
406  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
407  nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset,
408  flags);
409 
410  return PRIMITIVES_SUCCESS;
411 }
412 
413 /* ------------------------------------------------------------------------- */
414 void primitives_init_copy(primitives_t* WINPR_RESTRICT prims)
415 {
416  /* Start with the default. */
417  prims->copy_8u = general_copy_8u;
418  prims->copy_8u_AC4r = general_copy_8u_AC4r;
419  prims->copy = WINPR_FUNC_PTR_CAST(prims->copy_8u, __copy_t);
420  prims->copy_no_overlap = generic_image_copy_no_overlap;
421 }
422 
423 void primitives_init_copy_opt(primitives_t* prims)
424 {
425  primitives_init_copy_sse41(prims);
426 #if defined(WITH_AVX2)
427  primitives_init_copy_avx2(prims);
428 #endif
429 }