package com.durian.common.tool.chinese;
import com.durian.common.tool.chinese.util.ChineseUtil;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
public class Test {
public static void main(String[] args) throws IOException {
String str="中文简體";
File outfile = new File("D:/personal/Java/zhuanhuan.txt");
PrintWriter outer = new PrintWriter(outfile,"UTF-8");
String str2=ChineseUtil.convert(str, ChineseUtil.TRADITIONAL);//转繁体
System.out.printf("转繁体:%s",str2);
outer.printf("转繁体:");
outer.printf("%s",str2);
String str3=ChineseUtil.convert(str2, ChineseUtil.SIMPLIFIED);//转简体
System.out.println(str3);
outer.printf("转简体:%s",str3);
}
}
Util代码如下,输出方面不用在意,主要是Debug的时候发现str2和str3里简繁体都没有转换
package com.durian.common.tool.chinese.util;
import java.io.*;
import java.util.*;
/**
* 中文简繁体互转
*
*/
public class ChineseUtil {
private Properties charMap = new Properties();
private Set conflictingSets = new HashSet<>();
public static final int TRADITIONAL = 0;
public static final int SIMPLIFIED = 1;
private static final int NUM_OF_CONVERTERS = 2;
private static final ChineseUtil[] converters = new ChineseUtil[NUM_OF_CONVERTERS];
private static final String[] propertyFiles = new String[2];
static {
propertyFiles[TRADITIONAL] = "zh2Hant.properties";// 简转繁字典
propertyFiles[SIMPLIFIED] = "zh2Hans.properties";// 繁转简字典
}
/**
* @param converterType 0 for traditional and 1 for simplified
* @return
*/
public static ChineseUtil getInstance(int converterType) {
if (converterType >= 0 && converterType < NUM_OF_CONVERTERS) {
if (converters[converterType] == null) {
synchronized (ChineseUtil.class) {
if (converters[converterType] == null) {
converters[converterType] = new ChineseUtil(propertyFiles[converterType]);
}
}
}
return converters[converterType];
} else {
return null;
}
}
/**
* 简体繁体互转
*
* @param text 待转换的文本内容
* @param converterType 0转成繁体 1 转成简体
* @return
*/
public static String convert(String text, int converterType) {
ChineseUtil instance = getInstance(converterType);
return instance.convert(text);
}
// 获取字典库
private ChineseUtil(String propertyFile) {
InputStream is = null;
is = getClass().getResourceAsStream(propertyFile);
if (is != null) {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(is));
charMap.load(reader);
} catch (FileNotFoundException e) {
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (reader != null)
reader.close();
if (is != null)
is.close();
} catch (IOException e) {
}
}
}
initializeHelper();
}
private void initializeHelper() {
Map stringPossibilities = new HashMap<>();
Iterator iter = charMap.keySet().iterator();
while (iter.hasNext()) {
String key = (String) iter.next();
if (key.length() >= 1) {
for (int i = 0; i < (key.length()); i++) {
String keySubstring = key.substring(0, i + 1);
if (stringPossibilities.containsKey(keySubstring)) {
Integer integer = (Integer) (stringPossibilities.get(keySubstring));
stringPossibilities.put(keySubstring, (integer.intValue() + 1));// 多意字字池
} else {
stringPossibilities.put(keySubstring, (1));
}
}
}
}
iter = stringPossibilities.keySet().iterator();
while (iter.hasNext()) {
String key = (String) iter.next();
if (((Integer) (stringPossibilities.get(key))).intValue() > 1) {
conflictingSets.add(key);
}
}
}
/**
* 内容转换
*
* @param in
* @return
*/
public String convert(String in) {
StringBuilder outString = new StringBuilder();// 转换后的内容
StringBuilder stackString = new StringBuilder();// 压栈池(临时)
for (int i = 0; i < in.length(); i++) {
char c = in.charAt(i);
String key = "" + c;
stackString.append(key);
if (conflictingSets.contains(stackString.toString())) {// 多意字先跳过,直接先入栈
} else if (charMap.containsKey(stackString.toString())) { // 栈内容如果直接属于字典库,那么直接进行转换,并且清理栈内的东西
outString.append(charMap.get(stackString.toString()));
stackString.setLength(0);
} else { // 当且不是多意字,在字典里也找不到,则需要把栈内除最后一个字去掉后,再次分词/字查找转换
CharSequence sequence = stackString.subSequence(0, stackString.length() - 1);
stackString.delete(0, stackString.length() - 1);
flushStack(outString, new StringBuilder(sequence));
}
}
flushStack(outString, stackString);
return outString.toString();
}
private void flushStack(StringBuilder outString, StringBuilder stackString) {
while (stackString.length() > 0) {
if (charMap.containsKey(stackString.toString())) {
outString.append(charMap.get(stackString.toString()));
stackString.setLength(0);
} else {
outString.append("" + stackString.charAt(0));
stackString.delete(0, 1);
}
}
}
}
该回答引用chatgpt:
您可以使用第三方库 opencc4j 来实现Java简繁体转换。下面是一个简单的示例代码:
import com.github.houbb.opencc4j.util.ZhConverterUtil;
public class Main {
public static void main(String[] args) {
String simplified = "你好,世界!";
String traditional = ZhConverterUtil.convertToTraditional(simplified);
System.out.println(traditional);
}
}
如果您想要将繁体字转换为简体字,只需要调用 ZhConverterUtil.convertToSimple 方法即可。
String traditional = "你好,世界!";
String simplified = ZhConverterUtil.convertToSimple(traditional);
System.out.println(simplified); // 输出:你好,世界!
请注意,在使用此库之前,您需要将其添加到项目依赖中。您可以在 Maven 中添加以下依赖:
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>opencc4j-core</artifactId>
<version>1.1.1</version>
</dependency>