private void button1_Click(object sender, EventArgs e)
{
textBox1.Text = System.IO.File.ReadAllText(@textBox2.Text , Encoding.GetEncoding("UTF-8")); //gb2312
string fileName = @textBox2.Text ;
Encoding encoding = GetTextFileEncodingType(fileName);
Console.WriteLine(encoding.GetType());
Console.WriteLine($"代码页:【{encoding.CodePage}】,描述【{encoding.EncodingName}】");
Console.WriteLine($"WindowsCodePage:【{encoding.WindowsCodePage}】,WebName:【{encoding.WebName}】,HeaderName:【{encoding.HeaderName}】,BodyName:【{encoding.BodyName}】");
Console.ReadLine();
}
}
///
/// 获取文本文件的字符编码类型
///
///
///
static Encoding GetTextFileEncodingType(string fileName)
{
Encoding encoding = Encoding.Default;
FileStream fileStream = new FileStream(fileName, FileMode.Open, FileAccess.Read);
BinaryReader binaryReader = new BinaryReader(fileStream, encoding);
byte[] buffer = binaryReader.ReadBytes((int)fileStream.Length);
binaryReader.Close();
fileStream.Close();
if (buffer.Length >= 3 && buffer[0] == 239 && buffer[1] == 187 && buffer[2] == 191)
{
encoding = Encoding.UTF8;
}
else if (buffer.Length >= 3 && buffer[0] == 254 && buffer[1] == 255 && buffer[2] == 0)
{
encoding = Encoding.BigEndianUnicode;
}
else if (buffer.Length >= 3 && buffer[0] == 255 && buffer[1] == 254 && buffer[2] == 65)
{
encoding = Encoding.Unicode;
}
else if (IsUTF8Bytes(buffer))
{
encoding = Encoding.UTF8;
}
return encoding;
}
///
/// 判断是否是不带 BOM 的 UTF8 格式
/// BOM(Byte Order Mark),字节顺序标记,出现在文本文件头部,Unicode编码标准中用于标识文件是采用哪种格式的编码。
///
///
///
private static bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1; //计算当前正分析的字符应还有的字节数
byte curByte; //当前分析的字节.
for (int i = 0; i < data.Length; i++)
{
curByte = data[i];
if (charByteCounter == 1)
{
if (curByte >= 0x80)
{
//判断当前
while (((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
//标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{
return false;
}
}
}
else
{
//若是UTF-8 此时第一位必须为1
if ((curByte & 0xC0) != 0x80)
{
return false;
}
charByteCounter--;
}
}
if (charByteCounter > 1)
{
throw new Exception("非预期的byte格式");
}
return true;
}
Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
TextBox1.Text = System.IO.File.ReadAllText(TextBox2.Text, Encoding.GetEncoding("UTF-8")) 'gb2312
Dim fileName As String = TextBox2.Text
Dim encoding As Encoding = GetTextFileEncodingType(fileName)
Console.WriteLine(encoding.GetType())
Console.WriteLine($"代码页:【{encoding.CodePage}】,描述【{encoding.EncodingName}】")
Console.WriteLine($"WindowsCodePage:【{encoding.WindowsCodePage}】,WebName:【{encoding.WebName}】,HeaderName:【{encoding.HeaderName}】,BodyName:【{encoding.BodyName}】")
Console.ReadLine()
End Sub
Private Function GetTextFileEncodingType(fileName As String) As Encoding
Dim encoding As Encoding = Encoding.Default
Dim fileStream As New FileStream(fileName, FileMode.Open, FileAccess.Read)
Dim binaryReader As New BinaryReader(fileStream, encoding)
Dim buffer As Byte() = binaryReader.ReadBytes(CInt(fileStream.Length))
binaryReader.Close()
fileStream.Close()
If buffer.Length >= 3 AndAlso buffer(0) = 239 AndAlso buffer(1) = 187 AndAlso buffer(2) = 191 Then
encoding = Encoding.UTF8
ElseIf buffer.Length >= 3 AndAlso buffer(0) = 254 AndAlso buffer(1) = 255 AndAlso buffer(2) = 0 Then
encoding = Encoding.BigEndianUnicode
ElseIf buffer.Length >= 3 AndAlso buffer(0) = 255 AndAlso buffer(1) = 254 AndAlso buffer(2) = 65 Then
encoding = Encoding.Unicode
ElseIf IsUTF8Bytes(buffer) Then
encoding = Encoding.UTF8
End If
Return encoding
End Function
Private Function IsUTF8Bytes(data As Byte()) As Boolean
Dim charByteCounter As Integer = 1 '计算当前正分析的字符应还有的字节数
Dim curByte As Byte '当前分析的字节.
For i As Integer = 0 To data.Length - 1
curByte = data(i)
If charByteCounter = 1 Then
If curByte >= &H80 Then
'判断当前
While ((curByte <<= 1) And &H80) <> 0
charByteCounter += 1
End While
'标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
If charByteCounter = 1 OrElse charByteCounter > 6 Then
Return False
End If
End If
Else
'若是UTF-8 此时第一位必须为1
If (curByte And &HC0) <> &H80 Then
Return False
End If
charByteCounter -= 1
End If
Next
If charByteCounter > 1 Then
Throw New Exception("非预期的byte格式")
End If
Return True
End Function
方案来自 梦想橡皮擦 狂飙组基于 GPT 编写的 “程秘”
Private Sub button1_Click(ByVal sender As Object, ByVal e As EventArgs)
textBox1.Text = System.IO.File.ReadAllText(textBox2.Text, Encoding.GetEncoding("UTF-8")) 'gb2312
Dim fileName As String = textBox2.Text
Dim encoding As Encoding = GetTextFileEncodingType(fileName)
Console.WriteLine(encoding.GetType())
Console.WriteLine($"代码页:【{encoding.CodePage}】,描述【{encoding.EncodingName}】")
Console.WriteLine($"WindowsCodePage:【{encoding.WindowsCodePage}】,WebName:【{encoding.WebName}】,HeaderName:【{encoding.HeaderName}】,BodyName:【{encoding.BodyName}】")
Console.ReadLine()
End Sub
' 获取文本文件的字符编码类型
Private Shared Function GetTextFileEncodingType(ByVal fileName As String) As Encoding
Dim encoding As Encoding = Encoding.Default
Dim fileStream As New FileStream(fileName, FileMode.Open, FileAccess.Read)
Dim binaryReader As New BinaryReader(fileStream, encoding)
Dim buffer As Byte() = binaryReader.ReadBytes(CInt(fileStream.Length))
binaryReader.Close()
fileStream.Close()
If buffer.Length >= 3 AndAlso buffer(0) = 239 AndAlso buffer(1) = 187 AndAlso buffer(2) = 191 Then
encoding = Encoding.UTF8
ElseIf buffer.Length >= 3 AndAlso buffer(0) = 254 AndAlso buffer(1) = 255 AndAlso buffer(2) = 0 Then
encoding = Encoding.BigEndianUnicode
ElseIf buffer.Length >= 3 AndAlso buffer(0) = 255 AndAlso buffer(1) = 254 AndAlso buffer(2) = 65 Then
encoding = Encoding.Unicode
ElseIf IsUTF8Bytes(buffer) Then
encoding = Encoding.UTF8
End If
Return encoding
End Function
' 判断是否是不带 BOM 的 UTF8 格式
Private Shared Function IsUTF8Bytes(ByVal data As Byte()) As Boolean
Dim charByteCounter As Integer = 1 '计算当前正分析的字符应还有的字节数
Dim curByte As Byte '当前分析的字节.
For i As Integer = 0 To data.Length - 1
curByte = data(i)
If charByteCounter = 1 Then
If curByte >= &H80 Then
'判断当前
Do While ((curByte <<= 1) And &H80) <> 0
charByteCounter += 1
Loop
'标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
If charByteCounter = 1 OrElse charByteCounter > 6 Then
Return False
End If
End If
Else
'若是UTF-8 此时第一位必须为1
If (curByte And &HC0) <> &H80 Then
Return False
End If
charByteCounter -= 1
End If
Next
If charByteCounter > 1 Then
Throw New Exception("非预期的byte格式")
End If
Return True
End Function
不知道你这个问题是否已经解决, 如果还没有解决的话: