我想把一个包含汉字Unicode编码的文件uni.txt用Java读取出来,并正确显示中文,请问该如何做呢。我使用FileInputStream和DataInputStream都没有读取成功。请大家指点一下。
uni.txt包含这样的内容:
table.rows.length; //\u8868\u683c\u603b\u884c\u6570
tableModel_arr[tib].pg.perPageCount; //\u6bcf\u9875\u8bb0\u5f55\u6570
tableModel_arr[tib].pg.page;//\u5f53\u524d\u7b2c\u51e0\u9875
。。。。。。
我单独用System.out.println("table.rows.length; //\u8868\u683c\u603b\u884c\u6570")能打印汉字;但一旦用输入流就不行了。
[b]问题补充:[/b]
感谢大家,面对大家的热情,如果我不说两句的话,我心里会憋得难受的。
以前我在其它网站提问题,不是很久没有回复,就是答非所问;在JavaEye我也是第一次提问,本是抱着试试的心里,没想到各位真是够哥们,有建议,也有代码,我都不晓得如何感激了。
祝各位前途似锦,祝JavaEye越办越火!
代码大概如下:
[code="java"]
import java.io.*;
public class ReadTxtFile {
public static void main(String[] s) throws IOException {
new ReadTxtFile().readTxtFile("C://uni.txt");
}
private void readTxtFile(String fileName) throws IOException {
File file = new File(fileName);
FileInputStream fin = new FileInputStream(file);
InputStreamReader read = new InputStreamReader(fin, "utf-8");
BufferedReader reader = new BufferedReader(read);
String content = reader.readLine();
while (content != null) {
char[] c = content.toCharArray();
char[] out = new char[c.length];
System.out.println(loadConvert(c, 0, c.length, out));
content = reader.readLine();
}
read.close();
reader.close();
fin.close();
}
private String loadConvert(char[] in, int off, int len, char[] convtBuf) {
if (convtBuf.length < len) {
int newLen = len * 2;
if (newLen < 0) {
newLen = Integer.MAX_VALUE;
}
convtBuf = new char[newLen];
}
char aChar;
char[] out = convtBuf;
int outLen = 0;
int end = off + len;
while (off < end) {
aChar = in[off++];
if (aChar == '\\') {
aChar = in[off++];
if (aChar == 'u') {
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = in[off++];
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
out[outLen++] = (char) value;
} else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
out[outLen++] = aChar;
}
} else {
out[outLen++] = (char) aChar;
}
}
return new String(out, 0, outLen);
}
}
[/code]
参考java.util.Properties
[code="java"]
public synchronized void load(InputStream inStream,String encoding) throws IOException {
char[] convtBuf = new char[1024];
LineReader lr = new LineReader(inStream,encoding);
int limit;
int keyLen;
int valueStart;
char c;
boolean hasSep;
boolean precedingBackslash;
while ((limit = lr.readLine()) >= 0) {
c = 0;
keyLen = 0;
valueStart = limit;
hasSep = false;
// System.out.println("line=<" + new String(lineBuf, 0, limit) +
// ">");
precedingBackslash = false;
while (keyLen < limit) {
c = lr.lineBuf[keyLen];
// need check if escaped.
if ((c == '=' || c == ':') && !precedingBackslash) {
valueStart = keyLen + 1;
hasSep = true;
break;
} else if ((c == ' ' || c == '\t' || c == '\f')
&& !precedingBackslash) {
valueStart = keyLen + 1;
break;
}
if (c == '\\') {
precedingBackslash = !precedingBackslash;
} else {
precedingBackslash = false;
}
keyLen++;
}
while (valueStart < limit) {
c = lr.lineBuf[valueStart];
if (c != ' ' && c != '\t' && c != '\f') {
if (!hasSep && (c == '=' || c == ':')) {
hasSep = true;
} else {
break;
}
}
valueStart++;
}
String key = loadConvert(lr.lineBuf, 0, keyLen, convtBuf);
String value = loadConvert(lr.lineBuf, valueStart, limit
- valueStart, convtBuf);
put(key, value);
}
lr.reader.close();
}
/*
* read in a "logical line" from input stream, skip all comment and blank
* lines and filter out those leading whitespace characters (\u0020, \u0009
* and \u000c) from the beginning of a "natural line". Method returns the
* char length of the "logical line" and stores the line in "lineBuf".
*/
class LineReader {
public LineReader(InputStream inStream,String encoding) {
try {
this.reader = new BufferedReader(new InputStreamReader(inStream,encoding));
} catch (UnsupportedEncodingException e) {
// Logger.getLogger(LineReader.class).error(e);
}
}
char[] inBuf = new char[8192];
char[] lineBuf = new char[1024];
int inLimit = 0;
int inOff = 0;
// InputStream inStream;
BufferedReader reader ;
int readLine() throws IOException {
int len = 0;
char c = 0;
boolean skipWhiteSpace = true;
boolean isCommentLine = false;
boolean isNewLine = true;
boolean appendedLineBegin = false;
boolean precedingBackslash = false;
boolean skipLF = false;
while (true) {
if (inOff >= inLimit) {
inLimit = reader.read(inBuf);
inOff = 0;
if (inLimit <= 0) {
if (len == 0 || isCommentLine) {
return -1;
}
return len;
}
}
// The line below is equivalent to calling a
// ISO8859-1 decoder.
// c = (char) (0xff & inBuf[inOff++]);
c = inBuf[inOff++];
if (skipLF) {
skipLF = false;
if (c == '\n') {
continue;
}
}
if (skipWhiteSpace) {
if (c == ' ' || c == '\t' || c == '\f') {
continue;
}
if (!appendedLineBegin && (c == '\r' || c == '\n')) {
continue;
}
skipWhiteSpace = false;
appendedLineBegin = false;
}
if (isNewLine) {
isNewLine = false;
if (c == '#' || c == '!') {
isCommentLine = true;
continue;
}
}
if (c != '\n' && c != '\r') {
lineBuf[len++] = c;
if (len == lineBuf.length) {
int newLength = lineBuf.length * 2;
if (newLength < 0) {
newLength = Integer.MAX_VALUE;
}
char[] buf = new char[newLength];
System.arraycopy(lineBuf, 0, buf, 0, lineBuf.length);
lineBuf = buf;
}
// flip the preceding backslash flag
if (c == '\\') {
precedingBackslash = !precedingBackslash;
} else {
precedingBackslash = false;
}
} else {
// reached EOL
if (isCommentLine || len == 0) {
isCommentLine = false;
isNewLine = true;
skipWhiteSpace = true;
len = 0;
continue;
}
if (inOff >= inLimit) {
inLimit = reader.read(inBuf);
inOff = 0;
if (inLimit <= 0) {
return len;
}
}
if (precedingBackslash) {
len -= 1;
// skip the leading whitespace characters in following
// line
skipWhiteSpace = true;
appendedLineBegin = true;
precedingBackslash = false;
if (c == '\r') {
skipLF = true;
}
} else {
return len;
}
}
}
}
//=================end of readLine()===========
}
/*
* Converts encoded \uxxxx to unicode chars and changes special saved
* chars to their original forms
*/
private String loadConvert(char[] in, int off, int len, char[] convtBuf) {
if (convtBuf.length < len) {
int newLen = len * 2;
if (newLen < 0) {
newLen = Integer.MAX_VALUE;
}
convtBuf = new char[newLen];
}
char aChar;
char[] out = convtBuf;
int outLen = 0;
int end = off + len;
while (off < end) {
aChar = in[off++];
if (aChar == '\\') {
aChar = in[off++];
if (aChar == 'u') {
// Read the xxxx
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = in[off++];
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
out[outLen++] = (char) value;
} else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
out[outLen++] = aChar;
}
} else {
out[outLen++] = (char) aChar;
}
}
return new String(out, 0, outLen);
}[/code]
loadConvert作用是转换编码
去看 [b][color=red]Properties.loadConvert (char[] in, int off, int len, char[] convtBuf)[/color][/b]方法. 实验后,可用.
[code="java"]String s = new String("\u4F60");
char[] c = s.toCharArray();
char[] out = new char[c.length];
System.out.println(new Tests().loadConvert(c, 0, c.length, out)); // 你[/code]
t.loadConvert就是直接拷贝的Properties.loadConvert 方法.
楼上的手快...