FreeRDP
prim_copy.c
1 /* FreeRDP: A Remote Desktop Protocol Client
2  * Copy operations.
3  * vi:ts=4 sw=4:
4  *
5  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
6  * Licensed under the Apache License, Version 2.0 (the "License"); you may
7  * not use this file except in compliance with the License. You may obtain
8  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12  * or implied. See the License for the specific language governing
13  * permissions and limitations under the License.
14  */
15 
16 #include <freerdp/config.h>
17 
18 #include <string.h>
19 #include <freerdp/types.h>
20 #include <freerdp/primitives.h>
21 #include <freerdp/log.h>
22 
23 #include "prim_internal.h"
24 #include "prim_copy.h"
25 #include "../codec/color.h"
26 
27 #include <freerdp/codec/color.h>
28 
29 static primitives_t* generic = NULL;
30 
31 /* ------------------------------------------------------------------------- */
32 /*static inline BOOL memory_regions_overlap_1d(*/
33 static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes)
34 {
35  const ULONG_PTR p1m = (const ULONG_PTR)p1;
36  const ULONG_PTR p2m = (const ULONG_PTR)p2;
37 
38  if (p1m <= p2m)
39  {
40  if (p1m + bytes > p2m)
41  return TRUE;
42  }
43  else
44  {
45  if (p2m + bytes > p1m)
46  return TRUE;
47  }
48 
49  /* else */
50  return FALSE;
51 }
52 
53 /* ------------------------------------------------------------------------- */
54 /*static inline BOOL memory_regions_overlap_2d( */
55 static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2,
56  int p2Step, int p2Size, int width, int height)
57 {
58  ULONG_PTR p1m = (ULONG_PTR)p1;
59  ULONG_PTR p2m = (ULONG_PTR)p2;
60 
61  if (p1m <= p2m)
62  {
63  ULONG_PTR p1mEnd = p1m + 1ull * (height - 1) * p1Step + 1ull * width * p1Size;
64 
65  if (p1mEnd > p2m)
66  return TRUE;
67  }
68  else
69  {
70  ULONG_PTR p2mEnd = p2m + 1ull * (height - 1) * p2Step + 1ull * width * p2Size;
71 
72  if (p2mEnd > p1m)
73  return TRUE;
74  }
75 
76  /* else */
77  return FALSE;
78 }
79 
80 /* ------------------------------------------------------------------------- */
81 static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len)
82 {
83  if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len))
84  {
85  memmove((void*)pDst, (const void*)pSrc, (size_t)len);
86  }
87  else
88  {
89  memcpy((void*)pDst, (const void*)pSrc, (size_t)len);
90  }
91 
92  return PRIMITIVES_SUCCESS;
93 }
94 
95 /* ------------------------------------------------------------------------- */
96 /* Copy a block of pixels from one buffer to another.
97  * The addresses are assumed to have been already offset to the upper-left
98  * corners of the source and destination region of interest.
99  */
100 static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep,
101  INT32 width, INT32 height)
102 {
103  const BYTE* src = pSrc;
104  BYTE* dst = pDst;
105  int rowbytes = width * sizeof(UINT32);
106 
107  if ((width == 0) || (height == 0))
108  return PRIMITIVES_SUCCESS;
109 
110  if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32),
111  width, height))
112  {
113  do
114  {
115  generic->copy(src, dst, rowbytes);
116  src += srcStep;
117  dst += dstStep;
118  } while (--height);
119  }
120  else
121  {
122  /* TODO: do it in one operation when the rowdata is adjacent. */
123  do
124  {
125  /* If we find a replacement for memcpy that is consistently
126  * faster, this could be replaced with that.
127  */
128  memcpy(dst, src, rowbytes);
129  src += srcStep;
130  dst += dstStep;
131  } while (--height);
132  }
133 
134  return PRIMITIVES_SUCCESS;
135 }
136 
137 static INLINE pstatus_t generic_image_copy_bgr24_bgrx32(BYTE* WINPR_RESTRICT pDstData,
138  UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
139  UINT32 nWidth, UINT32 nHeight,
140  const BYTE* WINPR_RESTRICT pSrcData,
141  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
142  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset,
143  SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
144 {
145 
146  const SSIZE_T srcByte = 3;
147  const SSIZE_T dstByte = 4;
148 
149  const UINT32 width = nWidth - nWidth % 8;
150 
151  for (SSIZE_T y = 0; y < nHeight; y++)
152  {
153  const BYTE* WINPR_RESTRICT srcLine =
154  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
155  BYTE* WINPR_RESTRICT dstLine =
156  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
157 
158  SSIZE_T x = 0;
159  WINPR_PRAGMA_UNROLL_LOOP
160  for (; x < width; x++)
161  {
162  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
163  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
164  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
165  }
166 
167  for (; x < nWidth; x++)
168  {
169  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
170  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
171  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
172  }
173  }
174 
175  return PRIMITIVES_SUCCESS;
176 }
177 
178 static INLINE pstatus_t generic_image_copy_bgrx32_bgrx32(
179  BYTE* WINPR_RESTRICT pDstData, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst, UINT32 nWidth,
180  UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, UINT32 nSrcStep, UINT32 nXSrc,
181  UINT32 nYSrc, SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier,
182  SSIZE_T dstVOffset)
183 {
184 
185  const SSIZE_T srcByte = 4;
186  const SSIZE_T dstByte = 4;
187 
188  const UINT32 width = nWidth - nWidth % 8;
189 
190  for (SSIZE_T y = 0; y < nHeight; y++)
191  {
192  const BYTE* WINPR_RESTRICT srcLine =
193  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
194  BYTE* WINPR_RESTRICT dstLine =
195  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
196 
197  SSIZE_T x = 0;
198  WINPR_PRAGMA_UNROLL_LOOP
199  for (; x < width; x++)
200  {
201  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
202  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
203  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
204  }
205  for (; x < nWidth; x++)
206  {
207  dstLine[(x + nXDst) * dstByte + 0] = srcLine[(x + nXSrc) * srcByte + 0];
208  dstLine[(x + nXDst) * dstByte + 1] = srcLine[(x + nXSrc) * srcByte + 1];
209  dstLine[(x + nXDst) * dstByte + 2] = srcLine[(x + nXSrc) * srcByte + 2];
210  }
211  }
212 
213  return PRIMITIVES_SUCCESS;
214 }
215 
216 pstatus_t generic_image_copy_no_overlap_convert(
217  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
218  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
219  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
220  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
221 {
222  const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
223  const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
224 
225  const UINT32 width = nWidth - nWidth % 8;
226  for (SSIZE_T y = 0; y < nHeight; y++)
227  {
228  const BYTE* WINPR_RESTRICT srcLine =
229  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
230  BYTE* WINPR_RESTRICT dstLine =
231  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
232 
233  SSIZE_T x = 0;
234  WINPR_PRAGMA_UNROLL_LOOP
235  for (; x < width; x++)
236  {
237  const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
238  const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
239  FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
240  }
241  for (; x < nWidth; x++)
242  {
243  const UINT32 color = FreeRDPReadColor_int(&srcLine[(x + nXSrc) * srcByte], SrcFormat);
244  const UINT32 dstColor = FreeRDPConvertColor(color, SrcFormat, DstFormat, palette);
245  FreeRDPWriteColor_int(&dstLine[(x + nXDst) * dstByte], DstFormat, dstColor);
246  }
247  }
248  return PRIMITIVES_SUCCESS;
249 }
250 
251 pstatus_t generic_image_copy_no_overlap_memcpy(
252  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
253  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
254  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
255  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
256  UINT32 flags)
257 {
258  const SSIZE_T dstByte = FreeRDPGetBytesPerPixel(DstFormat);
259  const SSIZE_T srcByte = FreeRDPGetBytesPerPixel(SrcFormat);
260  const SSIZE_T copyDstWidth = nWidth * dstByte;
261  const SSIZE_T xSrcOffset = nXSrc * srcByte;
262  const SSIZE_T xDstOffset = nXDst * dstByte;
263 
264  for (SSIZE_T y = 0; y < nHeight; y++)
265  {
266  const BYTE* WINPR_RESTRICT srcLine =
267  &pSrcData[srcVMultiplier * (y + nYSrc) * nSrcStep + srcVOffset];
268  BYTE* WINPR_RESTRICT dstLine =
269  &pDstData[dstVMultiplier * (y + nYDst) * nDstStep + dstVOffset];
270  memcpy(&dstLine[xDstOffset], &srcLine[xSrcOffset], copyDstWidth);
271  }
272 
273  return PRIMITIVES_SUCCESS;
274 }
275 
276 static INLINE pstatus_t generic_image_copy_no_overlap_dst_alpha(
277  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
278  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
279  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
280  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset)
281 {
282  WINPR_ASSERT(pDstData);
283  WINPR_ASSERT(pSrcData);
284 
285  switch (SrcFormat)
286  {
287  case PIXEL_FORMAT_BGR24:
288  switch (DstFormat)
289  {
290  case PIXEL_FORMAT_BGRX32:
291  case PIXEL_FORMAT_BGRA32:
292  return generic_image_copy_bgr24_bgrx32(
293  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
294  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
295  default:
296  break;
297  }
298  break;
299  case PIXEL_FORMAT_BGRX32:
300  case PIXEL_FORMAT_BGRA32:
301  switch (DstFormat)
302  {
303  case PIXEL_FORMAT_BGRX32:
304  case PIXEL_FORMAT_BGRA32:
305  return generic_image_copy_bgrx32_bgrx32(
306  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
307  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
308  default:
309  break;
310  }
311  break;
312  case PIXEL_FORMAT_RGBX32:
313  case PIXEL_FORMAT_RGBA32:
314  switch (DstFormat)
315  {
316  case PIXEL_FORMAT_RGBX32:
317  case PIXEL_FORMAT_RGBA32:
318  return generic_image_copy_bgrx32_bgrx32(
319  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
320  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
321  case PIXEL_FORMAT_RGB24:
322  return generic_image_copy_bgr24_bgrx32(
323  pDstData, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, nSrcStep,
324  nXSrc, nYSrc, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
325  default:
326  break;
327  }
328  break;
329  default:
330  break;
331  }
332 
333  return generic_image_copy_no_overlap_convert(
334  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
335  nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset);
336 }
337 
338 static INLINE pstatus_t generic_image_copy_no_overlap_no_alpha(
339  BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat, UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
340  UINT32 nWidth, UINT32 nHeight, const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
341  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc, const gdiPalette* WINPR_RESTRICT palette,
342  SSIZE_T srcVMultiplier, SSIZE_T srcVOffset, SSIZE_T dstVMultiplier, SSIZE_T dstVOffset,
343  UINT32 flags)
344 {
345  if (FreeRDPAreColorFormatsEqualNoAlpha(SrcFormat, DstFormat))
346  return generic_image_copy_no_overlap_memcpy(pDstData, DstFormat, nDstStep, nXDst, nYDst,
347  nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
348  nXSrc, nYSrc, palette, srcVMultiplier,
349  srcVOffset, dstVMultiplier, dstVOffset, flags);
350  else
351  return generic_image_copy_no_overlap_convert(pDstData, DstFormat, nDstStep, nXDst, nYDst,
352  nWidth, nHeight, pSrcData, SrcFormat, nSrcStep,
353  nXSrc, nYSrc, palette, srcVMultiplier,
354  srcVOffset, dstVMultiplier, dstVOffset);
355 }
356 
357 static pstatus_t generic_image_copy_no_overlap(BYTE* WINPR_RESTRICT pDstData, DWORD DstFormat,
358  UINT32 nDstStep, UINT32 nXDst, UINT32 nYDst,
359  UINT32 nWidth, UINT32 nHeight,
360  const BYTE* WINPR_RESTRICT pSrcData, DWORD SrcFormat,
361  UINT32 nSrcStep, UINT32 nXSrc, UINT32 nYSrc,
362  const gdiPalette* WINPR_RESTRICT palette,
363  UINT32 flags)
364 {
365  const BOOL vSrcVFlip = (flags & FREERDP_FLIP_VERTICAL) ? TRUE : FALSE;
366  SSIZE_T srcVOffset = 0;
367  SSIZE_T srcVMultiplier = 1;
368  SSIZE_T dstVOffset = 0;
369  SSIZE_T dstVMultiplier = 1;
370 
371  if ((nWidth == 0) || (nHeight == 0))
372  return PRIMITIVES_SUCCESS;
373 
374  if ((nHeight > INT32_MAX) || (nWidth > INT32_MAX))
375  return -1;
376 
377  if (!pDstData || !pSrcData)
378  return -1;
379 
380  if (nDstStep == 0)
381  nDstStep = nWidth * FreeRDPGetBytesPerPixel(DstFormat);
382 
383  if (nSrcStep == 0)
384  nSrcStep = nWidth * FreeRDPGetBytesPerPixel(SrcFormat);
385 
386  if (vSrcVFlip)
387  {
388  srcVOffset = (nHeight - 1ll) * nSrcStep;
389  srcVMultiplier = -1;
390  }
391 
392  if (((flags & FREERDP_KEEP_DST_ALPHA) != 0) && FreeRDPColorHasAlpha(DstFormat))
393  return generic_image_copy_no_overlap_dst_alpha(
394  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
395  nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier,
396  dstVOffset);
397  else
398  return generic_image_copy_no_overlap_no_alpha(
399  pDstData, DstFormat, nDstStep, nXDst, nYDst, nWidth, nHeight, pSrcData, SrcFormat,
400  nSrcStep, nXSrc, nYSrc, palette, srcVMultiplier, srcVOffset, dstVMultiplier, dstVOffset,
401  flags);
402 
403  return PRIMITIVES_SUCCESS;
404 }
405 
406 /* ------------------------------------------------------------------------- */
407 void primitives_init_copy(primitives_t* WINPR_RESTRICT prims)
408 {
409  /* Start with the default. */
410  prims->copy_8u = general_copy_8u;
411  prims->copy_8u_AC4r = general_copy_8u_AC4r;
412  prims->copy = WINPR_FUNC_PTR_CAST(prims->copy_8u, __copy_t);
413  prims->copy_no_overlap = generic_image_copy_no_overlap;
414 }
415 
416 void primitives_init_copy_opt(primitives_t* prims)
417 {
418  primitives_init_copy_sse41(prims);
419 #if defined(WITH_AVX2)
420  primitives_init_copy_avx2(prims);
421 #endif
422 }