FreeRDP
primitives.c
1 /* primitives.c
2  * This code queries processor features and calls the init/deinit routines.
3  * vi:ts=4 sw=4
4  *
5  * Copyright 2011 Martin Fleisz <martin.fleisz@thincast.com>
6  * (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
7  * Copyright 2019 David Fort <contact@hardening-consulting.com>
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License"); you may
10  * not use this file except in compliance with the License. You may obtain
11  * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0.
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15  * or implied. See the License for the specific language governing
16  * permissions and limitations under the License.
17  */
18 
19 #include <freerdp/config.h>
20 
21 #include <string.h>
22 #include <stdlib.h>
23 
24 #include <winpr/synch.h>
25 #include <winpr/sysinfo.h>
26 #include <winpr/crypto.h>
27 #include <freerdp/primitives.h>
28 
29 #include "prim_internal.h"
30 
31 #include <freerdp/log.h>
32 #define TAG FREERDP_TAG("primitives")
33 
34 /* hints to know which kind of primitives to use */
35 static primitive_hints primitivesHints = PRIMITIVES_AUTODETECT;
36 static BOOL primitives_init_optimized(primitives_t* prims);
37 
38 void primitives_set_hints(primitive_hints hints)
39 {
40  primitivesHints = hints;
41 }
42 
43 primitive_hints primitives_get_hints(void)
44 {
45  return primitivesHints;
46 }
47 
48 /* Singleton pointer used throughout the program when requested. */
49 static primitives_t pPrimitivesGeneric = { 0 };
50 static INIT_ONCE generic_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
51 
52 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
53 static primitives_t pPrimitivesCpu = { 0 };
54 static INIT_ONCE cpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
55 
56 #endif
57 #if defined(WITH_OPENCL)
58 static primitives_t pPrimitivesGpu = { 0 };
59 static INIT_ONCE gpu_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
60 
61 #endif
62 
63 static INIT_ONCE auto_primitives_InitOnce = INIT_ONCE_STATIC_INIT;
64 
65 static primitives_t pPrimitives = { 0 };
66 
67 /* ------------------------------------------------------------------------- */
68 static BOOL primitives_init_generic(primitives_t* prims)
69 {
70  primitives_init_add(prims);
71  primitives_init_andor(prims);
72  primitives_init_alphaComp(prims);
73  primitives_init_copy(prims);
74  primitives_init_set(prims);
75  primitives_init_shift(prims);
76  primitives_init_sign(prims);
77  primitives_init_colors(prims);
78  primitives_init_YCoCg(prims);
79  primitives_init_YUV(prims);
80  prims->uninit = NULL;
81  return TRUE;
82 }
83 
84 static BOOL CALLBACK primitives_init_generic_cb(PINIT_ONCE once, PVOID param, PVOID* context)
85 {
86  WINPR_UNUSED(once);
87  WINPR_UNUSED(param);
88  WINPR_UNUSED(context);
89  return primitives_init_generic(&pPrimitivesGeneric);
90 }
91 
92 static BOOL primitives_init_optimized(primitives_t* prims)
93 {
94  primitives_init_generic(prims);
95 
96 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
97  primitives_init_add_opt(prims);
98  primitives_init_andor_opt(prims);
99  primitives_init_alphaComp_opt(prims);
100  primitives_init_copy_opt(prims);
101  primitives_init_set_opt(prims);
102  primitives_init_shift_opt(prims);
103  primitives_init_sign_opt(prims);
104  primitives_init_colors_opt(prims);
105  primitives_init_YCoCg_opt(prims);
106  primitives_init_YUV_opt(prims);
107  prims->flags |= PRIM_FLAGS_HAVE_EXTCPU;
108 #endif
109  return TRUE;
110 }
111 
112 typedef struct
113 {
114  BYTE* channels[3];
115  UINT32 steps[3];
116  prim_size_t roi;
117  BYTE* outputBuffer;
118  UINT32 outputStride;
119  UINT32 testedFormat;
120 } primitives_YUV_benchmark;
121 
122 static void primitives_YUV_benchmark_free(primitives_YUV_benchmark* bench)
123 {
124  if (!bench)
125  return;
126 
127  free(bench->outputBuffer);
128 
129  for (int i = 0; i < 3; i++)
130  free(bench->channels[i]);
131  memset(bench, 0, sizeof(primitives_YUV_benchmark));
132 }
133 
134 static primitives_YUV_benchmark* primitives_YUV_benchmark_init(primitives_YUV_benchmark* ret)
135 {
136  prim_size_t* roi = NULL;
137  if (!ret)
138  return NULL;
139 
140  memset(ret, 0, sizeof(primitives_YUV_benchmark));
141  roi = &ret->roi;
142  roi->width = 1024;
143  roi->height = 768;
144  ret->outputStride = roi->width * 4;
145  ret->testedFormat = PIXEL_FORMAT_BGRA32;
146 
147  ret->outputBuffer = calloc(ret->outputStride, roi->height);
148  if (!ret->outputBuffer)
149  goto fail;
150 
151  for (int i = 0; i < 3; i++)
152  {
153  BYTE* buf = ret->channels[i] = calloc(roi->width, roi->height);
154  if (!buf)
155  goto fail;
156 
157  winpr_RAND(buf, 1ull * roi->width * roi->height);
158  ret->steps[i] = roi->width;
159  }
160 
161  return ret;
162 
163 fail:
164  primitives_YUV_benchmark_free(ret);
165  return ret;
166 }
167 
168 static BOOL primitives_YUV_benchmark_run(primitives_YUV_benchmark* bench, primitives_t* prims,
169  UINT64 runTime, UINT32* computations)
170 {
171  ULONGLONG dueDate = 0;
172  const BYTE* channels[3] = { 0 };
173  pstatus_t status = 0;
174 
175  *computations = 0;
176 
177  for (size_t i = 0; i < 3; i++)
178  channels[i] = bench->channels[i];
179 
180  /* do a first dry run to initialize cache and such */
181  status = prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
182  bench->outputStride, bench->testedFormat, &bench->roi);
183  if (status != PRIMITIVES_SUCCESS)
184  return FALSE;
185 
186  /* let's run the benchmark */
187  dueDate = GetTickCount64() + runTime;
188  while (GetTickCount64() < dueDate)
189  {
190  pstatus_t cstatus =
191  prims->YUV420ToRGB_8u_P3AC4R(channels, bench->steps, bench->outputBuffer,
192  bench->outputStride, bench->testedFormat, &bench->roi);
193  if (cstatus != PRIMITIVES_SUCCESS)
194  return FALSE;
195  *computations = *computations + 1;
196  }
197  return TRUE;
198 }
199 
200 static BOOL primitives_autodetect_best(primitives_t* prims)
201 {
202  BOOL ret = FALSE;
203  struct prim_benchmark
204  {
205  const char* name;
206  primitives_t* prims;
207  UINT32 flags;
208  UINT32 count;
209  };
210 
211  struct prim_benchmark testcases[] =
212  {
213  { "generic", NULL, PRIMITIVES_PURE_SOFT, 0 },
214 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
215  { "optimized", NULL, PRIMITIVES_ONLY_CPU, 0 },
216 #endif
217 #if defined(WITH_OPENCL)
218  { "opencl", NULL, PRIMITIVES_ONLY_GPU, 0 },
219 #endif
220  };
221  const struct prim_benchmark* best = NULL;
222 
223 #if !defined(HAVE_CPU_OPTIMIZED_PRIMITIVES) && !defined(WITH_OPENCL)
224  {
225  struct prim_benchmark* cur = &testcases[0];
226  cur->prims = primitives_get_by_type(cur->flags);
227  if (!cur->prims)
228  {
229  WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name);
230  return FALSE;
231  }
232  WLog_DBG(TAG, "primitives benchmark: only one backend, skipping...");
233  best = cur;
234  }
235 #else
236  {
237  UINT64 benchDuration = 150; /* 150 ms */
238  primitives_YUV_benchmark bench = { 0 };
239  primitives_YUV_benchmark* yuvBench = primitives_YUV_benchmark_init(&bench);
240  if (!yuvBench)
241  return FALSE;
242 
243  WLog_DBG(TAG, "primitives benchmark result:");
244  for (size_t x = 0; x < ARRAYSIZE(testcases); x++)
245  {
246  struct prim_benchmark* cur = &testcases[x];
247  cur->prims = primitives_get_by_type(cur->flags);
248  if (!cur->prims)
249  {
250  WLog_WARN(TAG, "Failed to initialize %s primitives", cur->name);
251  continue;
252  }
253  if (!primitives_YUV_benchmark_run(yuvBench, cur->prims, benchDuration, &cur->count))
254  {
255  WLog_WARN(TAG, "error running %s YUV bench", cur->name);
256  continue;
257  }
258 
259  WLog_DBG(TAG, " * %s= %" PRIu32, cur->name, cur->count);
260  if (!best || (best->count < cur->count))
261  best = cur;
262  }
263  primitives_YUV_benchmark_free(yuvBench);
264  }
265 #endif
266 
267  if (!best)
268  {
269  WLog_ERR(TAG, "No primitives to test, aborting.");
270  goto out;
271  }
272  /* finally compute the results */
273  *prims = *best->prims;
274 
275  WLog_DBG(TAG, "primitives autodetect, using %s", best->name);
276  ret = TRUE;
277 out:
278  if (!ret)
279  *prims = pPrimitivesGeneric;
280 
281  return ret;
282 }
283 
284 #if defined(WITH_OPENCL)
285 static BOOL CALLBACK primitives_init_gpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
286 {
287  WINPR_UNUSED(once);
288  WINPR_UNUSED(param);
289  WINPR_UNUSED(context);
290 
291  if (!primitives_init_opencl(&pPrimitivesGpu))
292  return FALSE;
293 
294  return TRUE;
295 }
296 #endif
297 
298 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
299 static BOOL CALLBACK primitives_init_cpu_cb(PINIT_ONCE once, PVOID param, PVOID* context)
300 {
301  WINPR_UNUSED(once);
302  WINPR_UNUSED(param);
303  WINPR_UNUSED(context);
304 
305  if (!primitives_init_optimized(&pPrimitivesCpu))
306  return FALSE;
307 
308  return TRUE;
309 }
310 #endif
311 
312 static BOOL CALLBACK primitives_auto_init_cb(PINIT_ONCE once, PVOID param, PVOID* context)
313 {
314  WINPR_UNUSED(once);
315  WINPR_UNUSED(param);
316  WINPR_UNUSED(context);
317 
318  return primitives_init(&pPrimitives, primitivesHints);
319 }
320 
321 BOOL primitives_init(primitives_t* p, primitive_hints hints)
322 {
323  switch (hints)
324  {
325  case PRIMITIVES_AUTODETECT:
326  return primitives_autodetect_best(p);
327  case PRIMITIVES_PURE_SOFT:
328  *p = pPrimitivesGeneric;
329  return TRUE;
330  case PRIMITIVES_ONLY_CPU:
331 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
332  *p = pPrimitivesCpu;
333  return TRUE;
334 #endif
335  case PRIMITIVES_ONLY_GPU:
336 #if defined(WITH_OPENCL)
337  *p = pPrimitivesGpu;
338  return TRUE;
339 #endif
340  default:
341  WLog_ERR(TAG, "unknown hint %d", hints);
342  return FALSE;
343  }
344 }
345 
346 void primitives_uninit(void)
347 {
348 #if defined(WITH_OPENCL)
349  if (pPrimitivesGpu.uninit)
350  pPrimitivesGpu.uninit();
351 #endif
352 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
353  if (pPrimitivesCpu.uninit)
354  pPrimitivesCpu.uninit();
355 #endif
356  if (pPrimitivesGeneric.uninit)
357  pPrimitivesGeneric.uninit();
358 }
359 
360 /* ------------------------------------------------------------------------- */
361 static void setup(void)
362 {
363  InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
364 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
365  InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL);
366 #endif
367 #if defined(WITH_OPENCL)
368  InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL);
369 #endif
370  InitOnceExecuteOnce(&auto_primitives_InitOnce, primitives_auto_init_cb, NULL, NULL);
371 }
372 
373 primitives_t* primitives_get(void)
374 {
375  setup();
376  return &pPrimitives;
377 }
378 
379 primitives_t* primitives_get_generic(void)
380 {
381  InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
382  return &pPrimitivesGeneric;
383 }
384 
385 primitives_t* primitives_get_by_type(DWORD type)
386 {
387  InitOnceExecuteOnce(&generic_primitives_InitOnce, primitives_init_generic_cb, NULL, NULL);
388 
389  switch (type)
390  {
391  case PRIMITIVES_ONLY_GPU:
392 #if defined(WITH_OPENCL)
393  if (!InitOnceExecuteOnce(&gpu_primitives_InitOnce, primitives_init_gpu_cb, NULL, NULL))
394  return NULL;
395  return &pPrimitivesGpu;
396 #endif
397  case PRIMITIVES_ONLY_CPU:
398 #if defined(HAVE_CPU_OPTIMIZED_PRIMITIVES)
399  if (!InitOnceExecuteOnce(&cpu_primitives_InitOnce, primitives_init_cpu_cb, NULL, NULL))
400  return NULL;
401  return &pPrimitivesCpu;
402 #endif
403  case PRIMITIVES_PURE_SOFT:
404  default:
405  return &pPrimitivesGeneric;
406  }
407 }
408 
409 DWORD primitives_flags(primitives_t* p)
410 {
411  return p->flags;
412 }