语法错误。java是要这样写
public class Deduplicate {
public static void main(String[] args){
//todo:1、构建sparkconf,设置配置信息
SparkConf conf = new SparkConf().setAppName("Deduplicated Application").setMaster("local[2]");;
//todo:2、构建java版的sparkContext
JavaSparkContext sc = new JavaSparkContext(conf);
//todo:3、读取数据文件
JavaRDD<String> dataRDD1 = sc.textFile("data/data1.txt");
JavaRDD<String> dataRDD2 = sc.textFile("data/data2.txt");
//todo:4、合并数据集
JavaRDD<String> dataRDD = dataRDD1.union(dataRDD2);
//todo:5、生成<值、" ">键值对
JavaPairRDD<String,String> map =dataRDD.mapToPair(
new PairFunction<String, String, String>() {
public Tuple2<String, String> call(String s) throws Exception {
return new Tuple2<String,String >(s,"");
//获取数值作为key,空串作为value
}
}
);
//todo:6、通过合并键去重
JavaPairRDD<String,String> result = map.reduceByKey(
new Function2<String, String, String>() {
@Override
public String call(String s, String s2) throws Exception {
return s2;
}
}
);
//todo:7、输出
Object[] array = result.collect().toArray();
for (Object o : array) {
Tuple2<String,String> tuple2 = (Tuple2<String,String>) o;
System.out.println(tuple2._1);
}
}
}