现在能做到的是把每个字的hash值求出并存放在string[]中,接下来就不太会了,求大神指导
//words [0]为属性 [1]为权重
//hashbits hashCode权重
//return SimHash串
getSimHash(String[][] words,int hashbits)
//计算汉明距离
//str1 simHash生成的code
//str2
//return 整形距离越小越相似
getDistance(str1,str2)
下面是代码
package com.yeahmobi.ymconv.util;
public class MySimHash {
public static String getSimHash(String[][] words, int hashbits) {
double[] hash = new double[hashbits];
for (int i = 0; i < words.length; i++) {
long t = MurmurHash.hash64(words[i][0]);
// long t = hash(words[i][0], 64).longValue();
String str = getZero(Long.toBinaryString(t), hashbits);
for (int j = 0; j < str.length(); j++) {
int weights = Integer.parseInt(words[i][1]) <= 0 ? 1 : Integer.parseInt(words[i][1]);
int c = Integer.parseInt(str.charAt(j) + "");
if (c == 1)
hash[j] = hash[j] + (weights);
else
hash[j] = hash[j] + (-weights);
}
}
String hash1 = "";
for (double d : hash) {
hash1 += d > 0 ? "1" : "0";
}
return hash1;
}
public static String getZero(String str, int hashbits) {
return String.format("%" + hashbits + "s", str).replace(" ", "0");
}
public static int getDistance(String str1, String str2) {
int distance;
if (str1.length() != str2.length()) {
distance = -1;
} else {
distance = 0;
for (int i = 0; i < str1.length(); i++) {
if (str1.charAt(i) != str2.charAt(i)) {
distance++;
}
}
}
return distance;
}
public static void main(String[] args) {
// String s1 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "3" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64);
// String s2 = MySimHash.getSimHash(new String[][] { { "177.224.174.214", "1" }, { "mx", "1" }, { "775", "1" }, { "6177", "1" }, { "2478822", "1" }, { "generic", "1" }, { "storm", "1" }, { "android", "1" }, { "4.2.2", "1" } }, 64);
// String s3 = MySimHash.getSimHash(new String[][] { { "5.246.82.36", "1" }, { "sdf", "1" }, { "663", "1" }, { "333", "1" }, { "55", "0" }, { "sd", "1" }, { "er", "1" }, { "34", "1" }, { "sdfasdf", "1" }, { "hh", "1" } }, 64);
// String s4 = MySimHash.getSimHash(new String[][] { { "189.132.168.157", "1" }, { "mx", "1" }, { "390", "1" }, { "3203", "1" }, { "2342277", "1" }, { "samsung", "1" }, { "gt-i8190l", "1" }, { "android", "1" }, { "4.1.2", "1" } }, 64);
// String s5 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "1" }, { "mx", "1" }, { "775", "3" }, { "60541", "1" }, { "2342256", "1" }, { "alcatel", "1" }, { "onetouch5020", "1" }, { "android", "1" }, { "4.1.1", "1" }, { "hh", "1" } }, 64);
// String s6 = MySimHash.getSimHash(new String[][] { { "187.237.239.25", "3" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64);
// String s7 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "1" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64);
// System.out.println("----------");
// System.out.println(MySimHash.getDistance(s1, s2));
// System.out.println(MySimHash.getDistance(s1, s3));
// System.out.println(MySimHash.getDistance(s1, s4));
// System.out.println(MySimHash.getDistance(s1, s5));
// System.out.println(MySimHash.getDistance(s1, s6));
// System.out.println(MySimHash.getDistance(s1, s7));
//
// System.out.println(s1);
// System.out.println(s2);
// System.out.println(s3);
// System.out.println(s4);
// System.out.println(s5);
// System.out.println(s6);
}
}