Browse Source

Default to an Encoding without BOM when BOM marker was not found (#42)

* Default to an Encoding without BOM when BOM marker was not found

* Use explicit types instead of var

* Update Project/Src/Util/FileReader.cs

Co-authored-by: Igor Velikorossov <RussKie@users.noreply.github.com>

---------

Co-authored-by: Igor Velikorossov <RussKie@users.noreply.github.com>
pull/44/head
Donatas Mačiūnas 10 months ago
committed by GitHub
parent
commit
4ea04d2436
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 24
      Project/Src/Util/FileReader.cs

24
Project/Src/Util/FileReader.cs

@ -16,16 +16,18 @@ namespace ICSharpCode.TextEditor.Util
/// </summary>
public static class FileReader
{
public static readonly UTF8Encoding Utf8WithoutBom = new(encoderShouldEmitUTF8Identifier: false);
public static bool IsUnicode(Encoding encoding)
{
var codepage = encoding.CodePage;
int codepage = encoding.CodePage;
// return true if codepage is any UTF codepage
return codepage == 65001 || codepage == 65000 || codepage == 1200 || codepage == 1201;
}
public static string ReadFileContent(Stream fs, ref Encoding encoding)
{
using (var reader = OpenStream(fs, encoding))
using (StreamReader reader = OpenStream(fs, encoding))
{
reader.Peek();
encoding = reader.CurrentEncoding;
@ -35,7 +37,7 @@ namespace ICSharpCode.TextEditor.Util
public static string ReadFileContent(string fileName, Encoding encoding)
{
using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
using (FileStream fs = new(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
return ReadFileContent(fs, ref encoding);
}
@ -50,8 +52,8 @@ namespace ICSharpCode.TextEditor.Util
{
// the autodetection of StreamReader is not capable of detecting the difference
// between ISO-8859-1 and UTF-8 without BOM.
var firstByte = fs.ReadByte();
var secondByte = fs.ReadByte();
int firstByte = fs.ReadByte();
int secondByte = fs.ReadByte();
switch ((firstByte << 8) | secondByte)
{
case 0x0000: // either UTF-32 Big Endian or a binary file; use StreamReader
@ -74,14 +76,14 @@ namespace ICSharpCode.TextEditor.Util
private static StreamReader AutoDetect(Stream fs, byte firstByte, byte secondByte, Encoding defaultEncoding)
{
var max = (int)Math.Min(fs.Length, val2: 500000); // look at max. 500 KB
int max = (int)Math.Min(fs.Length, val2: 500000); // look at max. 500 KB
const int ASCII = 0;
const int Error = 1;
const int UTF8 = 2;
const int UTF8Sequence = 3;
var state = ASCII;
var sequenceLength = 0;
for (var i = 0; i < max; i++)
int state = ASCII;
int sequenceLength = 0;
for (int i = 0; i < max; i++)
{
byte b;
if (i == 0)
@ -159,8 +161,8 @@ namespace ICSharpCode.TextEditor.Util
defaultEncoding = Encoding.Default; // use system encoding instead
return new StreamReader(fs, defaultEncoding);
default:
return new StreamReader(fs);
return new StreamReader(fs, Utf8WithoutBom);
}
}
}
}
}
Loading…
Cancel
Save