Helpers.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "stdafx.h"
  6. #include "Codex/Utf8Codex.h"
  7. HRESULT Helpers::LoadScriptFromFile(LPCWSTR filename, LPCWSTR& contents, bool* isUtf8Out, LPCWSTR* contentsRawOut, UINT* lengthBytesOut, bool printFileOpenError)
  8. {
  9. HRESULT hr = S_OK;
  10. LPCWSTR contentsRaw = nullptr;
  11. UINT lengthBytes = 0;
  12. bool isUtf8 = false;
  13. contents = nullptr;
  14. FILE * file;
  15. //
  16. // Open the file as a binary file to prevent CRT from handling encoding, line-break conversions,
  17. // etc.
  18. //
  19. if (_wfopen_s(&file, filename, _u("rb")) != 0)
  20. {
  21. if (printFileOpenError)
  22. {
  23. DWORD lastError = GetLastError();
  24. char16 wszBuff[512];
  25. fwprintf(stderr, _u("Error in opening file '%s' "), filename);
  26. wszBuff[0] = 0;
  27. if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM,
  28. nullptr,
  29. lastError,
  30. 0,
  31. wszBuff,
  32. _countof(wszBuff),
  33. nullptr))
  34. {
  35. fwprintf(stderr, _u(": %s"), wszBuff);
  36. }
  37. fwprintf(stderr, _u("\n"));
  38. IfFailGo(E_FAIL);
  39. }
  40. else
  41. {
  42. return E_FAIL;
  43. }
  44. }
  45. //
  46. // Determine the file length, in bytes.
  47. //
  48. fseek(file, 0, SEEK_END);
  49. lengthBytes = ftell(file);
  50. fseek(file, 0, SEEK_SET);
  51. contentsRaw = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, lengthBytes + sizeof(WCHAR));
  52. if (nullptr == contentsRaw)
  53. {
  54. fwprintf(stderr, _u("out of memory"));
  55. IfFailGo(E_OUTOFMEMORY);
  56. }
  57. //
  58. // Read the entire content as a binary block.
  59. //
  60. fread((void*)contentsRaw, sizeof(char), lengthBytes, file);
  61. fclose(file);
  62. *(WCHAR*)((byte*)contentsRaw + lengthBytes) = _u('\0'); // Null terminate it. Could be LPCWSTR.
  63. //
  64. // Read encoding, handling any conversion to Unicode.
  65. //
  66. // Warning: The UNICODE buffer for parsing is supposed to be provided by the host.
  67. // This is not a complete read of the encoding. Some encodings like UTF7, UTF1, EBCDIC, SCSU, BOCU could be
  68. // wrongly classified as ANSI
  69. //
  70. byte * pRawBytes = (byte*)contentsRaw;
  71. if ((0xEF == *pRawBytes && 0xBB == *(pRawBytes + 1) && 0xBF == *(pRawBytes + 2)))
  72. {
  73. isUtf8 = true;
  74. }
  75. else if (0xFFFE == *contentsRaw || 0x0000 == *contentsRaw && 0xFEFF == *(contentsRaw + 1))
  76. {
  77. // unicode unsupported
  78. fwprintf(stderr, _u("unsupported file encoding"));
  79. IfFailGo(E_UNEXPECTED);
  80. }
  81. else if (0xFEFF == *contentsRaw)
  82. {
  83. // unicode LE
  84. contents = contentsRaw;
  85. }
  86. else
  87. {
  88. // Assume UTF8
  89. isUtf8 = true;
  90. }
  91. if (isUtf8)
  92. {
  93. utf8::DecodeOptions decodeOptions = utf8::doAllowInvalidWCHARs;
  94. UINT cUtf16Chars = utf8::ByteIndexIntoCharacterIndex(pRawBytes, lengthBytes, decodeOptions);
  95. contents = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, (cUtf16Chars + 1) * sizeof(WCHAR));
  96. if (nullptr == contents)
  97. {
  98. fwprintf(stderr, _u("out of memory"));
  99. IfFailGo(E_OUTOFMEMORY);
  100. }
  101. utf8::DecodeIntoAndNullTerminate((char16*) contents, pRawBytes, cUtf16Chars, decodeOptions);
  102. }
  103. Error:
  104. if (SUCCEEDED(hr) && isUtf8Out)
  105. {
  106. Assert(contentsRawOut);
  107. Assert(lengthBytesOut);
  108. *isUtf8Out = isUtf8;
  109. *contentsRawOut = contentsRaw;
  110. *lengthBytesOut = lengthBytes;
  111. }
  112. else if (contentsRaw && (contentsRaw != contents)) // Otherwise contentsRaw is lost. Free it if it is different to contents.
  113. {
  114. HeapFree(GetProcessHeap(), 0, (void*)contentsRaw);
  115. }
  116. if (contents && FAILED(hr))
  117. {
  118. HeapFree(GetProcessHeap(), 0, (void*)contents);
  119. contents = nullptr;
  120. }
  121. return hr;
  122. }