如果获取编码信息?

面试的时候,面试官问到这样一个问题:“在项目中,出现乱码的情况,你怎么获得该信息的原编码?”。
(当时就哑了,不知道怎么回答,望大牛们教教)

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;

public class FileEncodeReferee
{
private File file;

public FileEncodeReferee(File file)
{
    this.file = file;
}

public FileEncodeReferee(String path)
{
    file = new File(path);
}

public String getCharset()
{
    File file = this.file;

    String charset = "GBK";
    byte[] first3Bytes = new byte[3];
    BufferedInputStream bis = null;
    try
    {
        //boolean checked = false;
        bis = new BufferedInputStream(new FileInputStream(file));
        bis.mark(0);
        int read = bis.read(first3Bytes, 0, 3);
        if (read == -1)
        {
            return charset;
        }
        if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE)
        {
            charset = "UTF-16LE";
            //checked = true;
        }
        else if (first3Bytes[0] == (byte) 0xFE
                && first3Bytes[1] == (byte) 0xFF)
        {
            charset = "UTF-16BE";
            //checked = true;
        }
        else if (first3Bytes[0] == (byte) 0xEF
                && first3Bytes[1] == (byte) 0xBB
                && first3Bytes[2] == (byte) 0xBF)
        {
            charset = "UTF-8";
            //checked = true;
        }
        /** *//*******************************************************************
         * bis.reset(); if (!checked) { int loc = 0; while ((read =
         * bis.read()) != -1) { loc++; if (read >= 0xF0) { break; } if (0x80 <=
         * read && read <= 0xBF) // 单独出现BF以下的,也算是GBK { break; } if (0xC0 <=
         * read && read <= 0xDF) { read = bis.read(); if (0x80 <= read &&
         * read <= 0xBF)// 双字节 (0xC0 - 0xDF) { // (0x80 - 0xBF),也可能在GB编码内
         * continue; } else { break; } } else if (0xE0 <= read && read <=
         * 0xEF) { // 也有可能出错,但是几率较小 read = bis.read(); if (0x80 <= read &&
         * read <= 0xBF) { read = bis.read(); if (0x80 <= read && read <=
         * 0xBF) { charset = "UTF-8"; break; } else { break; } } else {
         * break; } } } System.out.println(loc + " " +
         * Integer.toHexString(read)); }
         ******************************************************************/
    }
    catch (Exception e)
    {
        e.printStackTrace();
    }
    finally
    {
        if (bis != null)
        {
            try
            {
                bis.close();
            }
            catch (Exception ex)
            {
                ex.printStackTrace();
            }
        }
    }
    return charset;
}

public static void main(String[] args)
{
    FileEncodeReferee fer = new FileEncodeReferee("E://Huha.csv");
    System.out.println(fer.getCharset());
}

}

你问问是乱码情况时的编码 还是出现乱码前一版本的编码呗.

真不会的时候可以直接反过去请教下呗..
貌似获文件编码好像是比较困难, 可以用Uedit32打开文件,看匹配的编码是哪一个,
再者就的使用第三方提供的jar 读取文件分析了.