请用 Java 写一个 String 处理方法,将输入字符串中的中文与英文、数字之间使用空格隔开,并针对特殊符号(括号和逗号)
package com.fufeiwen.test;
import java.util.regex.Pattern;
public class Test {
public static void main(String args[]) {
System.out.println(getStr("我是中国ren,nishi谁?"));
}
public static String getStr(String str) {
String curStr;
int preType = -1;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
curStr = str.substring(i, i + 1);
int curType = getType(curStr);
if (i>0&&curType != preType) {
sb.append(" ");
}
sb.append(curStr);
preType=curType;
}
return sb.toString();
}
public static int getType(String str) {
String chinese = "^[\u4e00-\u9fa5]{0,}$";
String number = "^[0-9]*$";
String letter = "^[A-Za-z]+$";
int num = 0;//其他
if (Pattern.matches(chinese, str)) {
num = 1;//中文
} else if (Pattern.matches(number, str)) {
num = 2;//数字
} else if (Pattern.matches(letter, str)) {
num = 3;//字母
}
return num;
}
}
以上是大体思路,没考虑性能、细节。
package com.test.spring.test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
public static void main(String [] arg){
String str = "张三1234589*李四ABCabc王五2";
System.out.println(setStr(str));
}
public static String setStr(String str){
String value = "";
char [] c = str.toCharArray();
value += c[0];
for (int i = 1; i < c.length; i++) {
if( (isLetter(c[i]) == true && isNumber(c[i-1]) ==true) ||
(isLetter(c[i]) == true && isChinese(c[i-1]) ==true) ||
(isLetter(c[i]) == true && isSpecialCharacter(c[i-1]+"") ==true) ||
(isNumber(c[i]) == true && isLetter(c[i-1]) ==true) ||
(isNumber(c[i]) == true && isChinese(c[i-1]) ==true) ||
(isNumber(c[i]) == true && isSpecialCharacter(c[i-1]+"") ==true) ||
(isChinese(c[i]) == true && isLetter(c[i-1]) ==true) ||
(isChinese(c[i]) == true && isNumber(c[i-1]) ==true) ||
(isChinese(c[i]) == true && isSpecialCharacter(c[i-1]+"") ==true) ||
(isSpecialCharacter(c[i]+"") == true && isLetter(c[i-1]) ==true) ||
(isSpecialCharacter(c[i]+"") == true && isNumber(c[i-1]) ==true) ||
(isSpecialCharacter(c[i]+"") == true && isChinese(c[i-1]) ==true)
)
{
value = (value + " " + c[i]);
}else{
value = (value + c[i]);
}
}
return value;
}
/**
* 判断是否为字母
* @param c
* @return
/
public static boolean isLetter(char c){
if((c >= 'a' && c <= 'z') || ( c >= 'A' && c <= 'Z') )
return true;
return false;
}
/*
* 判断是否为数字
* @param c
* @return
/
public static boolean isNumber(char c){
if( c >= '0' && c <= '9' )
return true;
return false;
}
/*
* 根据Unicode编码完美的判断中文汉字和符号
* @param c
* @return
/
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
return true;
}
return false;
}
/*
* 判断是否为特殊字符
* @param 字符
* @return
* @throws
/
public static boolean isSpecialCharacter (String c) {
// 正则表达式判断是否存在特殊字符
String regEx="[`~!@#$%^&()+=|{}':;',\[\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(c);
if(m.find())
return true;
return false;
}
}
大体实现应该是这样的
因为我没看懂 针对特殊符号(括号和逗号)要干嘛,所以我只针对另外三种写的代码:
public static String getStr(String str) {
String chinese = "([\u4e00-\u9fa5]+)";
String number = "([0-9]+)";
String letter = "([A-Za-z]+)";
String[] regexs = {
chinese + number,
chinese + letter,
number + chinese,
number + letter,
letter + chinese,
letter + number
};
for (String regex : regexs) {
str = str.replaceAll(regex, "$1 $2");
}
return str;
}