FreeRDP
unicode.c
1 
22 #include <winpr/config.h>
23 #include <winpr/assert.h>
24 
25 #include <errno.h>
26 #include <wctype.h>
27 
28 #include <winpr/crt.h>
29 #include <winpr/error.h>
30 #include <winpr/print.h>
31 
32 #ifndef MIN
33 #define MIN(a, b) (a) < (b) ? (a) : (b)
34 #endif
35 
36 #ifndef _WIN32
37 
38 #include "unicode.h"
39 
40 #include "../log.h"
41 #define TAG WINPR_TAG("unicode")
42 
156 #if !defined(WITH_WINPR_DEPRECATED)
157 static
158 #endif
159  int
160  MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
161  LPWSTR lpWideCharStr, int cchWideChar)
162 {
163  return int_MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr,
164  cchWideChar);
165 }
166 
206 #if !defined(WITH_WINPR_DEPRECATED)
207 static
208 #endif
209  int
210  WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
211  LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
212  LPBOOL lpUsedDefaultChar)
213 {
214  return int_WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr,
215  cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
216 }
217 
218 #endif
219 
232 #if defined(WITH_WINPR_DEPRECATED)
233 int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
234  LPWSTR* lpWideCharStr, int cchWideChar)
235 {
236  int status = 0;
237  BOOL allocate = FALSE;
238 
239  if (!lpMultiByteStr)
240  return 0;
241 
242  if (!lpWideCharStr)
243  return 0;
244 
245  if (cbMultiByte == -1)
246  {
247  size_t len = strnlen(lpMultiByteStr, INT_MAX);
248  if (len >= INT_MAX)
249  return 0;
250  cbMultiByte = (int)(len + 1);
251  }
252 
253  if (cchWideChar == 0)
254  {
255  cchWideChar = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, NULL, 0);
256  allocate = TRUE;
257  }
258  else if (!(*lpWideCharStr))
259  allocate = TRUE;
260 
261  if (cchWideChar < 1)
262  return 0;
263 
264  if (allocate)
265  {
266  *lpWideCharStr = (LPWSTR)calloc(cchWideChar + 1, sizeof(WCHAR));
267 
268  if (!(*lpWideCharStr))
269  {
270  // SetLastError(ERROR_INSUFFICIENT_BUFFER);
271  return 0;
272  }
273  }
274 
275  status = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, *lpWideCharStr,
276  cchWideChar);
277 
278  if (status != cchWideChar)
279  {
280  if (allocate)
281  {
282  free(*lpWideCharStr);
283  *lpWideCharStr = NULL;
284  status = 0;
285  }
286  }
287 
288  return status;
289 }
290 #endif
291 
304 #if defined(WITH_WINPR_DEPRECATED)
305 int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
306  LPSTR* lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
307  LPBOOL lpUsedDefaultChar)
308 {
309  int status = 0;
310  BOOL allocate = FALSE;
311 
312  if (!lpWideCharStr)
313  return 0;
314 
315  if (!lpMultiByteStr)
316  return 0;
317 
318  if (cchWideChar == -1)
319  cchWideChar = (int)(_wcslen(lpWideCharStr) + 1);
320 
321  if (cbMultiByte == 0)
322  {
323  cbMultiByte =
324  WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, NULL, 0, NULL, NULL);
325  allocate = TRUE;
326  }
327  else if (!(*lpMultiByteStr))
328  allocate = TRUE;
329 
330  if (cbMultiByte < 1)
331  return 0;
332 
333  if (allocate)
334  {
335  *lpMultiByteStr = (LPSTR)calloc(1, cbMultiByte + 1);
336 
337  if (!(*lpMultiByteStr))
338  {
339  // SetLastError(ERROR_INSUFFICIENT_BUFFER);
340  return 0;
341  }
342  }
343 
344  status = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, *lpMultiByteStr,
345  cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
346 
347  if ((status != cbMultiByte) && allocate)
348  {
349  status = 0;
350  }
351 
352  if ((status <= 0) && allocate)
353  {
354  free(*lpMultiByteStr);
355  *lpMultiByteStr = NULL;
356  }
357 
358  return status;
359 }
360 #endif
361 
366 const WCHAR* ByteSwapUnicode(WCHAR* wstr, size_t length)
367 {
368  WINPR_ASSERT(wstr || (length == 0));
369 
370  for (size_t x = 0; x < length; x++)
371  wstr[x] = _byteswap_ushort(wstr[x]);
372  return wstr;
373 }
374 
375 SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
376 {
377  if (!wstr)
378  {
379  if (str && len)
380  str[0] = 0;
381  return 0;
382  }
383 
384  const size_t wlen = _wcslen(wstr);
385  return ConvertWCharNToUtf8(wstr, wlen + 1, str, len);
386 }
387 
388 SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
389 {
390  BOOL isNullTerminated = FALSE;
391  if (wlen == 0)
392  return 0;
393 
394  WINPR_ASSERT(wstr);
395  size_t iwlen = _wcsnlen(wstr, wlen);
396 
397  if ((len > INT32_MAX) || (wlen > INT32_MAX))
398  {
399  SetLastError(ERROR_INVALID_PARAMETER);
400  return -1;
401  }
402 
403  if (iwlen < wlen)
404  {
405  isNullTerminated = TRUE;
406  iwlen++;
407  }
408  const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)len, NULL, NULL);
409  if ((rc <= 0) || ((len > 0) && ((size_t)rc > len)))
410  return -1;
411  else if (!isNullTerminated)
412  {
413  if (str && ((size_t)rc < len))
414  str[rc] = '\0';
415  return rc;
416  }
417  else if ((size_t)rc == len)
418  {
419  if (str && (str[rc - 1] != '\0'))
420  return rc;
421  }
422  return rc - 1;
423 }
424 
425 SSIZE_T ConvertMszWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
426 {
427  if (wlen == 0)
428  return 0;
429 
430  WINPR_ASSERT(wstr);
431 
432  if ((len > INT32_MAX) || (wlen > INT32_MAX))
433  {
434  SetLastError(ERROR_INVALID_PARAMETER);
435  return -1;
436  }
437 
438  const int iwlen = (int)len;
439  const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)wlen, str, iwlen, NULL, NULL);
440  if ((rc <= 0) || ((len > 0) && (rc > iwlen)))
441  return -1;
442 
443  return rc;
444 }
445 
446 SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
447 {
448  if (!str)
449  {
450  if (wstr && wlen)
451  wstr[0] = 0;
452  return 0;
453  }
454 
455  const size_t len = strlen(str);
456  return ConvertUtf8NToWChar(str, len + 1, wstr, wlen);
457 }
458 
459 SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
460 {
461  size_t ilen = strnlen(str, len);
462  BOOL isNullTerminated = FALSE;
463  if (len == 0)
464  return 0;
465 
466  WINPR_ASSERT(str);
467 
468  if ((len > INT32_MAX) || (wlen > INT32_MAX))
469  {
470  SetLastError(ERROR_INVALID_PARAMETER);
471  return -1;
472  }
473  if (ilen < len)
474  {
475  isNullTerminated = TRUE;
476  ilen++;
477  }
478 
479  const int iwlen = (int)wlen;
480  const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, iwlen);
481  if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
482  return -1;
483  if (!isNullTerminated)
484  {
485  if (wstr && (rc < iwlen))
486  wstr[rc] = '\0';
487  return rc;
488  }
489  else if (rc == iwlen)
490  {
491  if (wstr && (wstr[rc - 1] != '\0'))
492  return rc;
493  }
494  return rc - 1;
495 }
496 
497 SSIZE_T ConvertMszUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
498 {
499  if (len == 0)
500  return 0;
501 
502  WINPR_ASSERT(str);
503 
504  if ((len > INT32_MAX) || (wlen > INT32_MAX))
505  {
506  SetLastError(ERROR_INVALID_PARAMETER);
507  return -1;
508  }
509 
510  const int iwlen = (int)wlen;
511  const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)len, wstr, iwlen);
512  if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
513  return -1;
514 
515  return rc;
516 }
517 
518 char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
519 {
520  char* tmp = NULL;
521  const SSIZE_T rc = ConvertWCharToUtf8(wstr, NULL, 0);
522  if (pUtfCharLength)
523  *pUtfCharLength = 0;
524  if (rc < 0)
525  return NULL;
526  tmp = calloc((size_t)rc + 1ull, sizeof(char));
527  if (!tmp)
528  return NULL;
529  const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 1ull);
530  if (rc2 < 0)
531  {
532  free(tmp);
533  return NULL;
534  }
535  WINPR_ASSERT(rc == rc2);
536  if (pUtfCharLength)
537  *pUtfCharLength = (size_t)rc2;
538  return tmp;
539 }
540 
541 char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
542 {
543  char* tmp = NULL;
544  const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, NULL, 0);
545 
546  if (pUtfCharLength)
547  *pUtfCharLength = 0;
548  if (rc < 0)
549  return NULL;
550  tmp = calloc((size_t)rc + 1ull, sizeof(char));
551  if (!tmp)
552  return NULL;
553  const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
554  if (rc2 < 0)
555  {
556  free(tmp);
557  return NULL;
558  }
559  WINPR_ASSERT(rc == rc2);
560  if (pUtfCharLength)
561  *pUtfCharLength = (size_t)rc2;
562  return tmp;
563 }
564 
565 char* ConvertMszWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
566 {
567  char* tmp = NULL;
568  const SSIZE_T rc = ConvertMszWCharNToUtf8(wstr, wlen, NULL, 0);
569 
570  if (pUtfCharLength)
571  *pUtfCharLength = 0;
572  if (rc < 0)
573  return NULL;
574  tmp = calloc((size_t)rc + 1ull, sizeof(char));
575  if (!tmp)
576  return NULL;
577  const SSIZE_T rc2 = ConvertMszWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
578  if (rc2 < 0)
579  {
580  free(tmp);
581  return NULL;
582  }
583  WINPR_ASSERT(rc == rc2);
584  if (pUtfCharLength)
585  *pUtfCharLength = (size_t)rc2;
586  return tmp;
587 }
588 
589 WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
590 {
591  WCHAR* tmp = NULL;
592  const SSIZE_T rc = ConvertUtf8ToWChar(str, NULL, 0);
593  if (pSize)
594  *pSize = 0;
595  if (rc < 0)
596  return NULL;
597  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
598  if (!tmp)
599  return NULL;
600  const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 1ull);
601  if (rc2 < 0)
602  {
603  free(tmp);
604  return NULL;
605  }
606  WINPR_ASSERT(rc == rc2);
607  if (pSize)
608  *pSize = (size_t)rc2;
609  return tmp;
610 }
611 
612 WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
613 {
614  WCHAR* tmp = NULL;
615  const SSIZE_T rc = ConvertUtf8NToWChar(str, len, NULL, 0);
616  if (pSize)
617  *pSize = 0;
618  if (rc < 0)
619  return NULL;
620  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
621  if (!tmp)
622  return NULL;
623  const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
624  if (rc2 < 0)
625  {
626  free(tmp);
627  return NULL;
628  }
629  WINPR_ASSERT(rc == rc2);
630  if (pSize)
631  *pSize = (size_t)rc2;
632  return tmp;
633 }
634 
635 WCHAR* ConvertMszUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
636 {
637  WCHAR* tmp = NULL;
638  const SSIZE_T rc = ConvertMszUtf8NToWChar(str, len, NULL, 0);
639  if (pSize)
640  *pSize = 0;
641  if (rc < 0)
642  return NULL;
643  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
644  if (!tmp)
645  return NULL;
646  const SSIZE_T rc2 = ConvertMszUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
647  if (rc2 < 0)
648  {
649  free(tmp);
650  return NULL;
651  }
652  WINPR_ASSERT(rc == rc2);
653  if (pSize)
654  *pSize = (size_t)rc2;
655  return tmp;
656 }