同样的写法再scala中执行报错,然而在java中能够正常执行
以下是报错内容
helloOffSLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/C:/Users/zsts/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.5/log4j-slf4j-impl-2.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/C:/Users/zsts/.m2/repository/org/slf4j/slf4j-log4j12/1.7.10/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
ERROR StatusLogger No log4j2 configuration file found. Using default configuration: logging only errors to the console.
19:25:11.875 [main] ERROR org.apache.hadoop.util.Shell - Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:355) ~[hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:370) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:363) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:116) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.Groups.<init>(Groups.java:93) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.Groups.<init>(Groups.java:73) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:293) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:283) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:260) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:789) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:774) [hadoop-common-2.6.4.jar:?]
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:647) [hadoop-common-2.6.4.jar:?]
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
at scala.Option.getOrElse(Option.scala:120) [?:?]
at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
at org.apache.spark.SparkContext.<init>(SparkContext.scala:322) [spark-core_2.10-1.6.2.jar:1.6.2]
at tarot.test$.main(test.scala:19) [bin/:?]
at tarot.test.main(test.scala) [bin/:?]
Exception in thread "main" org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://tarot1:9000/sparkTest/hello
at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:285)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:313)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:199)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1307)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
at tarot.test$.main(test.scala:26)
at tarot.test.main(test.scala)
scala代码
package tarot
import scala.tools.nsc.doc.model.Val
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.api.java.JavaSparkContext
object test {
def main(hello:Array[String]){
print("helloOff")
val conf = new SparkConf()
conf.setMaster("spark://tarot1:7077")
.setAppName("hello_off")
.set("spark.executor.memory", "4g")
.set("spark.executor.cores", "4")
.set("spark.cores.max", "4")
.set("spark.sql.crossJoin.enabled", "true")
val sc = new SparkContext(conf)
sc.setLogLevel("ERROR")
val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
val filterRDD = file.filter { (ss:String) => ss.contains("hello") }
val f=filterRDD.cache()
// println(f)
// filterRDD.count()
for(x <- f.take(100)){
println(x)
}
}
def helloingTest(jsc:SparkContext){
val sc = jsc
val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
val filterRDD = file.filter((ss:String) => ss.contains("hello"))
val f=filterRDD.cache()
println(f)
val i = filterRDD.count()
println(i)
}
// val seehello =
def helloingTest(jsc:JavaSparkContext){
val sc = jsc
val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
val filterRDD = file.filter((ss:String) => ss.contains("hello"))
val f=filterRDD.cache()
println(f)
val i = filterRDD.count()
println(i)
}
}
java代码
package com.tarot.sparkToHdfsTest;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.rdd.RDD;
//import scalaModule.hello;
public class App
{
public static void main( String[] args )
{
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("spark://tarot1:7077")
.setAppName("hello off")
.set("spark.executor.memory","4g")
.set("spark.executor.cores", "4")
.set("spark.cores.max","4")
.set("spark.sql.crossJoin.enabled", "true");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
jsc.setLogLevel("ERROR");
text(jsc);
// test.helloingTest(jsc);
}
/**
* test
* @param jsc
*/
private static void text(JavaSparkContext jsc){
// jsc.textFile("hdfs://tarot1:9000/sparkTest/hello");
JavaRDD<String> jr= jsc.textFile("hdfs://tarot1:9000/sparkTest/hello",1);
jr.cache();
// test t = new test();
jr.filter(f);
for (String string : jr.take(100)) {
System.out.println(string);
}
System.out.println("hello off");
}
public static Function<String, Boolean> f = new Function<String, Boolean>() {
public Boolean call(String s) {
return s.contains("hello");
}
};
}
坑了很久了,网上的解决办法不是让我去shell上就是让我上传jar包,但是java不用啊?都调用的JVM。
大神救命
https://blog.csdn.net/xiao_jun_0820/article/details/45038205