spark在linux 跑一个idea的Scala的jar包报错

错误内容

INFO yarn.Client: Uploading resource file:/tmp/spark-a3e4e9b6-c942-47d0-8718-e7abc682bec4/__spark_libs__2720370067367095364.zip -> hdfs://centos001:9000/user/root/.sparkStaging/application_1617199029481_0008/__spark_libs__2720370067367095364.zip
21/03/31 23:58:20 INFO yarn.Client: Deleted staging directory hdfs://centos001:9000/user/root/.sparkStaging/application_1617199029481_0008
Exception in thread "main" java.lang.IllegalArgumentException: Can not create a Path from an empty string
    at org.apache.hadoop.fs.Path.checkPathArg(Path.java:126)
    at org.apache.hadoop.fs.Path.<init>(Path.java:134)
    at org.apache.hadoop.fs.Path.<init>(Path.java:93)
    at org.apache.spark.deploy.yarn.Client.copyFileToRemote(Client.scala:383)
    at org.apache.spark.deploy.yarn.Client.distribute$1(Client.scala:477)
    at org.apache.spark.deploy.yarn.Client.$anonfun$prepareLocalResources$19(Client.scala:576)
    at scala.Option.foreach(Option.scala:407)
    at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:575)
    at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:865)
    at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:179)
    at org.apache.spark.deploy.yarn.Client.run(Client.scala:1135)
    at org.apache.spark.deploy.yarn.YarnClusterApplication.start(Client.scala:1527)
    at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
    at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
    at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
    at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
    at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

idea内容

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object wordcount {
  def main(args: Array[String]): Unit = {
    // 1. 获取Spark上下文操作对象
    val conf: SparkConf = new SparkConf().setMaster("local").setAppName("wordcount")
    conf.set("spark.testing.memory", "2147480000")
    val sc: SparkContext = new SparkContext(conf)

    // 2. 业务逻辑
    // (1). 读入文件1.txt : line
    val line: RDD[String] = sc.textFile("data/2.txt")

    // (2). 按空格拆分单词 : word
    val wordW: RDD[String] = line.flatMap(_.split(" "))
    val wordD: RDD[String] = line.flatMap(_.split(","))
    val words: RDD[String] = line.flatMap(_.split("\\."))

    // (3). 把单词组装成:(word, 1)
    val wordKV: RDD[(String, Int)] = words.map((_, 1))

    // (4). 按key reduce : (word, n)
    val res: RDD[(String, Int)] = wordKV.reduceByKey(_ + _).sortBy(_._2, false)

    val rs: Array[(String, Int)] = res.collect()

    for(item <- rs)
        println(item)
    rs.foreach(println)

    // 3. 关闭
    sc.stop()

  }
}
Linux 语句

bin/spark-submit --class wordcount --master yarn --deploy-mode cluster ./ 3-18wordcount1.jar 10
 

 

 

hdfs

 


 

有可能是spark 和jdk 不兼容的问题吗?

.setMaster("local") 改.setMaster("yarn")也不行 2.txt文件内容就是几个hello wokd,

Hadoop  zookeeper都可以确定没问题的。