FileLoadHelpers.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. //-------------------------------------------------------------------------------------------------------
  2. // Copyright (C) Microsoft. All rights reserved.
  3. // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
  4. //-------------------------------------------------------------------------------------------------------
  5. #include "stdafx.h"
  6. #include "Codex/Utf8Codex.h"
  7. HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents, bool* isUtf8Out, LPCWSTR* contentsRawOut, UINT* lengthBytesOut, bool printFileOpenError)
  8. {
  9. HRESULT hr = S_OK;
  10. LPCWSTR contentsRaw = nullptr;
  11. LPCUTF8 pRawBytes = nullptr;
  12. UINT lengthBytes = 0;
  13. bool isUtf8 = false;
  14. contents = nullptr;
  15. FILE * file;
  16. //
  17. // Open the file as a binary file to prevent CRT from handling encoding, line-break conversions,
  18. // etc.
  19. //
  20. if (fopen_s(&file, filename, "rb") != 0)
  21. {
  22. if (printFileOpenError)
  23. {
  24. #ifdef _WIN32
  25. DWORD lastError = GetLastError();
  26. char16 wszBuff[512];
  27. fprintf(stderr, "Error in opening file '%s' ", filename);
  28. wszBuff[0] = 0;
  29. if (FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
  30. nullptr,
  31. lastError,
  32. 0,
  33. wszBuff,
  34. _countof(wszBuff),
  35. nullptr))
  36. {
  37. fwprintf(stderr, _u(": %s"), wszBuff);
  38. }
  39. fwprintf(stderr, _u("\n"));
  40. #elif defined(_POSIX_VERSION)
  41. fprintf(stderr, "Error in opening file: ");
  42. perror(filename);
  43. #endif
  44. IfFailGo(E_FAIL);
  45. }
  46. else
  47. {
  48. return E_FAIL;
  49. }
  50. }
  51. //
  52. // Determine the file length, in bytes.
  53. //
  54. fseek(file, 0, SEEK_END);
  55. lengthBytes = ftell(file);
  56. fseek(file, 0, SEEK_SET);
  57. contentsRaw = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, lengthBytes + sizeof(WCHAR));
  58. if (nullptr == contentsRaw)
  59. {
  60. fwprintf(stderr, _u("out of memory"));
  61. IfFailGo(E_OUTOFMEMORY);
  62. }
  63. //
  64. // Read the entire content as a binary block.
  65. //
  66. {
  67. size_t readBytes = fread((void*)contentsRaw, sizeof(char), lengthBytes, file);
  68. if (readBytes < lengthBytes)
  69. {
  70. fwprintf(stderr, _u("readBytes should be equal to lengthBytes"));
  71. IfFailGo(E_FAIL);
  72. }
  73. }
  74. fclose(file);
  75. *(WCHAR*)((byte*)contentsRaw + lengthBytes) = _u('\0'); // Null terminate it. Could be LPCWSTR.
  76. //
  77. // Read encoding, handling any conversion to Unicode.
  78. //
  79. // Warning: The UNICODE buffer for parsing is supposed to be provided by the host.
  80. // This is not a complete read of the encoding. Some encodings like UTF7, UTF1, EBCDIC, SCSU, BOCU could be
  81. // wrongly classified as ANSI
  82. //
  83. pRawBytes = (byte*)contentsRaw;
  84. if ((0xEF == *pRawBytes && 0xBB == *(pRawBytes + 1) && 0xBF == *(pRawBytes + 2)))
  85. {
  86. isUtf8 = true;
  87. }
  88. else if (0xFFFE == *contentsRaw || (0x0000 == *contentsRaw && 0xFEFF == *(contentsRaw + 1)))
  89. {
  90. // unicode unsupported
  91. fwprintf(stderr, _u("unsupported file encoding"));
  92. IfFailGo(E_UNEXPECTED);
  93. }
  94. else if (0xFEFF == *contentsRaw)
  95. {
  96. // unicode LE
  97. contents = contentsRaw;
  98. }
  99. else
  100. {
  101. // Assume UTF8
  102. isUtf8 = true;
  103. }
  104. if (isUtf8)
  105. {
  106. utf8::DecodeOptions decodeOptions = utf8::doAllowInvalidWCHARs;
  107. UINT cUtf16Chars = utf8::ByteIndexIntoCharacterIndex(pRawBytes, lengthBytes, decodeOptions);
  108. contents = (LPCWSTR)HeapAlloc(GetProcessHeap(), 0, (cUtf16Chars + (size_t)1) * sizeof(WCHAR));
  109. if (nullptr == contents)
  110. {
  111. fwprintf(stderr, _u("out of memory"));
  112. IfFailGo(E_OUTOFMEMORY);
  113. }
  114. utf8::DecodeUnitsIntoAndNullTerminate((char16*)contents, pRawBytes, pRawBytes + lengthBytes, decodeOptions);
  115. }
  116. Error:
  117. if (SUCCEEDED(hr) && isUtf8Out)
  118. {
  119. Assert(contentsRawOut);
  120. Assert(lengthBytesOut);
  121. *isUtf8Out = isUtf8;
  122. *contentsRawOut = contentsRaw;
  123. *lengthBytesOut = lengthBytes;
  124. }
  125. else if (contentsRaw && (contentsRaw != contents)) // Otherwise contentsRaw is lost. Free it if it is different to contents.
  126. {
  127. HeapFree(GetProcessHeap(), 0, (void*)contentsRaw);
  128. }
  129. if (contents && FAILED(hr))
  130. {
  131. HeapFree(GetProcessHeap(), 0, (void*)contents);
  132. contents = nullptr;
  133. }
  134. return hr;
  135. }