FileLoadHelpers.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "stdafx.h"
  6. #include "Codex/Utf8Codex.h"
  7. HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents, bool* isUtf8Out, LPCWSTR* contentsRawOut, UINT* lengthBytesOut, bool printFileOpenError)
  8. {
  9. HRESULT hr = S_OK;
  10. LPCWSTR contentsRaw = nullptr;
  11. LPCUTF8 pRawBytes = nullptr;
  12. UINT lengthBytes = 0;
  13. bool isUtf8 = false;
  14. contents = nullptr;
  15. FILE * file;
  16. //
  17. // Open the file as a binary file to prevent CRT from handling encoding, line-break conversions,
  18. // etc.
  19. //
  20. if (fopen_s(&file, filename, "rb") != 0)
  21. {
  22. if (printFileOpenError)
  23. {
  24. #ifdef _WIN32
  25. DWORD lastError = GetLastError();
  26. char16 wszBuff[512];
  27. fprintf(stderr, "Error in opening file '%s' ", filename);
  28. wszBuff[0] = 0;
  29. if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM,
  30. nullptr,
  31. lastError,
  32. 0,
  33. wszBuff,
  34. _countof(wszBuff),
  35. nullptr))
  36. {
  37. fwprintf(stderr, _u(": %s"), wszBuff);
  38. }
  39. fwprintf(stderr, _u("\n"));
  40. #elif defined(_POSIX_VERSION)
  41. fprintf(stderr, "Error in opening file: ");
  42. perror(filename);
  43. #endif
  44. IfFailGo(E_FAIL);
  45. }
  46. else
  47. {
  48. return E_FAIL;
  49. }
  50. }
  51. //
  52. // Determine the file length, in bytes.
  53. //
  54. fseek(file, 0, SEEK_END);
  55. lengthBytes = ftell(file);
  56. fseek(file, 0, SEEK_SET);
  57. contentsRaw = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, lengthBytes + sizeof(WCHAR));
  58. if (nullptr == contentsRaw)
  59. {
  60. fwprintf(stderr, _u("out of memory"));
  61. IfFailGo(E_OUTOFMEMORY);
  62. }
  63. //
  64. // Read the entire content as a binary block.
  65. //
  66. size_t readBytes = fread((void*)contentsRaw, sizeof(char), lengthBytes, file);
  67. if (readBytes < lengthBytes)
  68. {
  69. fwprintf(stderr, _u("readBytes should be equal to lengthBytes"));
  70. IfFailGo(E_FAIL);
  71. }
  72. fclose(file);
  73. *(WCHAR*)((byte*)contentsRaw + lengthBytes) = _u('\0'); // Null terminate it. Could be LPCWSTR.
  74. //
  75. // Read encoding, handling any conversion to Unicode.
  76. //
  77. // Warning: The UNICODE buffer for parsing is supposed to be provided by the host.
  78. // This is not a complete read of the encoding. Some encodings like UTF7, UTF1, EBCDIC, SCSU, BOCU could be
  79. // wrongly classified as ANSI
  80. //
  81. pRawBytes = (byte*)contentsRaw;
  82. if ((0xEF == *pRawBytes && 0xBB == *(pRawBytes + 1) && 0xBF == *(pRawBytes + 2)))
  83. {
  84. isUtf8 = true;
  85. }
  86. else if (0xFFFE == *contentsRaw || (0x0000 == *contentsRaw && 0xFEFF == *(contentsRaw + 1)))
  87. {
  88. // unicode unsupported
  89. fwprintf(stderr, _u("unsupported file encoding"));
  90. IfFailGo(E_UNEXPECTED);
  91. }
  92. else if (0xFEFF == *contentsRaw)
  93. {
  94. // unicode LE
  95. contents = contentsRaw;
  96. }
  97. else
  98. {
  99. // Assume UTF8
  100. isUtf8 = true;
  101. }
  102. if (isUtf8)
  103. {
  104. utf8::DecodeOptions decodeOptions = utf8::doAllowInvalidWCHARs;
  105. UINT cUtf16Chars = utf8::ByteIndexIntoCharacterIndex(pRawBytes, lengthBytes, decodeOptions);
  106. contents = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, (cUtf16Chars + 1) * sizeof(WCHAR));
  107. if (nullptr == contents)
  108. {
  109. fwprintf(stderr, _u("out of memory"));
  110. IfFailGo(E_OUTOFMEMORY);
  111. }
  112. utf8::DecodeUnitsIntoAndNullTerminate((char16*)contents, pRawBytes, pRawBytes + lengthBytes, decodeOptions);
  113. }
  114. Error:
  115. if (SUCCEEDED(hr) && isUtf8Out)
  116. {
  117. Assert(contentsRawOut);
  118. Assert(lengthBytesOut);
  119. *isUtf8Out = isUtf8;
  120. *contentsRawOut = contentsRaw;
  121. *lengthBytesOut = lengthBytes;
  122. }
  123. else if (contentsRaw && (contentsRaw != contents)) // Otherwise contentsRaw is lost. Free it if it is different to contents.
  124. {
  125. HeapFree(GetProcessHeap(), 0, (void*)contentsRaw);
  126. }
  127. if (contents && FAILED(hr))
  128. {
  129. HeapFree(GetProcessHeap(), 0, (void*)contents);
  130. contents = nullptr;
  131. }
  132. return hr;
  133. }