错误内容
INFO yarn.Client: Uploading resource file:/tmp/spark-a3e4e9b6-c942-47d0-8718-e7abc682bec4/__spark_libs__2720370067367095364.zip -> hdfs://centos001:9000/user/root/.sparkStaging/application_1617199029481_0008/__spark_libs__2720370067367095364.zip
21/03/31 23:58:20 INFO yarn.Client: Deleted staging directory hdfs://centos001:9000/user/root/.sparkStaging/application_1617199029481_0008
Exception in thread "main" java.lang.IllegalArgumentException: Can not create a Path from an empty string
at org.apache.hadoop.fs.Path.checkPathArg(Path.java:126)
at org.apache.hadoop.fs.Path.<init>(Path.java:134)
at org.apache.hadoop.fs.Path.<init>(Path.java:93)
at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:383)
at org.apache.spark.deploy.yarn.Client.distribute$1(Client.scala:477)
at org.apache.spark.deploy.yarn.Client.$anonfun$prepareLocalResources$19(Client.scala:576)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:575)
at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:865)
at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:179)
at org.apache.spark.deploy.yarn.Client.run(Client.scala:1135)
at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1527)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
idea内容
import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} object wordcount { def main(args: Array[String]): Unit = { // 1. 获取Spark上下文操作对象 val conf: SparkConf = new SparkConf().setMaster("local").setAppName("wordcount") conf.set("spark.testing.memory", "2147480000") val sc: SparkContext = new SparkContext(conf) // 2. 业务逻辑 // (1). 读入文件1.txt : line val line: RDD[String] = sc.textFile("data/2.txt") // (2). 按空格拆分单词 : word val wordW: RDD[String] = line.flatMap(_.split(" ")) val wordD: RDD[String] = line.flatMap(_.split(",")) val words: RDD[String] = line.flatMap(_.split("\\.")) // (3). 把单词组装成:(word, 1) val wordKV: RDD[(String, Int)] = words.map((_, 1)) // (4). 按key reduce : (word, n) val res: RDD[(String, Int)] = wordKV.reduceByKey(_ + _).sortBy(_._2, false) val rs: Array[(String, Int)] = res.collect() for(item <- rs) println(item) rs.foreach(println) // 3. 关闭 sc.stop() } } Linux 语句
bin/spark-submit --class wordcount --master yarn --deploy-mode cluster ./ 3-18wordcount1.jar 10
hdfs
有可能是spark 和jdk 不兼容的问题吗?
.setMaster("local") 改.setMaster("yarn")也不行 2.txt文件内容就是几个hello wokd,
Hadoop zookeeper都可以确定没问题的。