16 #include <freerdp/config.h>
18 #include <freerdp/types.h>
19 #include <freerdp/primitives.h>
20 #include <winpr/sysinfo.h>
22 #include "prim_sign.h"
24 #include "prim_internal.h"
26 #if defined(SSE_AVX_INTRINSICS_ENABLED)
27 #include <emmintrin.h>
28 #include <tmmintrin.h>
33 static pstatus_t ssse3_sign_16s(
const INT16* WINPR_RESTRICT pSrc, INT16* WINPR_RESTRICT pDst,
36 const INT16* sptr = pSrc;
42 return generic->sign_16s(pSrc, pDst, len);
46 if ((ULONG_PTR)pDst & 0x01)
48 return generic->sign_16s(pSrc, pDst, len);
52 while ((ULONG_PTR)dptr & 0x0f)
55 *dptr++ = WINPR_ASSERTING_INT_CAST(int16_t, (src < 0) ? (-1) : ((src > 0) ? 1 : 0));
58 return PRIMITIVES_SUCCESS;
65 if ((ULONG_PTR)sptr & 0x0f)
78 xmm0 = _mm_set1_epi16(0x0001U);
79 xmm1 = _mm_set1_epi16(0x0001U);
80 xmm2 = _mm_set1_epi16(0x0001U);
81 xmm3 = _mm_set1_epi16(0x0001U);
82 xmm4 = _mm_lddqu_si128((
const __m128i*)sptr);
84 xmm5 = _mm_lddqu_si128((
const __m128i*)sptr);
86 xmm6 = _mm_lddqu_si128((
const __m128i*)sptr);
88 xmm7 = _mm_lddqu_si128((
const __m128i*)sptr);
90 xmm0 = _mm_sign_epi16(xmm0, xmm4);
91 xmm1 = _mm_sign_epi16(xmm1, xmm5);
92 xmm2 = _mm_sign_epi16(xmm2, xmm6);
93 xmm3 = _mm_sign_epi16(xmm3, xmm7);
94 _mm_store_si128((__m128i*)dptr, xmm0);
96 _mm_store_si128((__m128i*)dptr, xmm1);
98 _mm_store_si128((__m128i*)dptr, xmm2);
100 _mm_store_si128((__m128i*)dptr, xmm3);
117 xmm0 = _mm_set1_epi16(0x0001U);
118 xmm1 = _mm_set1_epi16(0x0001U);
119 xmm2 = _mm_set1_epi16(0x0001U);
120 xmm3 = _mm_set1_epi16(0x0001U);
121 xmm4 = _mm_load_si128((
const __m128i*)sptr);
123 xmm5 = _mm_load_si128((
const __m128i*)sptr);
125 xmm6 = _mm_load_si128((
const __m128i*)sptr);
127 xmm7 = _mm_load_si128((
const __m128i*)sptr);
129 xmm0 = _mm_sign_epi16(xmm0, xmm4);
130 xmm1 = _mm_sign_epi16(xmm1, xmm5);
131 xmm2 = _mm_sign_epi16(xmm2, xmm6);
132 xmm3 = _mm_sign_epi16(xmm3, xmm7);
133 _mm_store_si128((__m128i*)dptr, xmm0);
135 _mm_store_si128((__m128i*)dptr, xmm1);
137 _mm_store_si128((__m128i*)dptr, xmm2);
139 _mm_store_si128((__m128i*)dptr, xmm3);
150 __m128i xmm0 = _mm_set1_epi16(0x0001U);
151 __m128i xmm1 = LOAD_SI128(sptr);
153 xmm0 = _mm_sign_epi16(xmm0, xmm1);
154 _mm_store_si128((__m128i*)dptr, xmm0);
162 *dptr++ = WINPR_ASSERTING_INT_CAST(int16_t, (src < 0) ? -1 : ((src > 0) ? 1 : 0));
165 return PRIMITIVES_SUCCESS;
171 void primitives_init_sign_ssse3(
primitives_t* WINPR_RESTRICT prims)
173 #if defined(SSE_AVX_INTRINSICS_ENABLED)
174 generic = primitives_get_generic();
175 primitives_init_sign(prims);
179 if (IsProcessorFeaturePresentEx(PF_EX_SSSE3) &&
180 IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
182 WLog_VRB(PRIM_TAG,
"SSE3/SSSE3 optimizations");
183 prims->sign_16s = ssse3_sign_16s;
187 WLog_VRB(PRIM_TAG,
"undefined WITH_SIMD or SSSE3/SSE3 intrinsics not available");