maven 依赖如下:
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.19</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.3.9</version>
</dependency>
测试代码如下:
package com.geek.test.spark;
import org.apache.spark.sql.SparkSession;
import java.util.Properties;
public class SparkTest {
public static void main(String[] args) {
try(SparkSession session = SparkSession.builder()
.appName("Awesome")
.master("local") // 使用本地spark
.getOrCreate()) {
Properties properties = new Properties();
properties.setProperty("user", "root");
properties.setProperty("password", "root");
session.read()
.option("driver", "com.mysql.cj.jdbc.Driver")
.jdbc("jdbc:mysql://localhost:3306/test", "sys_user", properties) // 加载mysql表数据
.createOrReplaceTempView("users");// 临时“表”仅在当前的session域中有效
session.read()
.option("header", true) // 将第一行当成表头
.option("inferSchema", true) //推断数据类型
.csv("C:\\Users\\Administrator\\Desktop\\test.csv") // 加载csv数据
.createOrReplaceTempView("userRoles");
session.sql("select * from users u join userRoles ur on u.id=ur.user_id").show(); // ①不同数据源连接查询
session.read()
.option("driver", "org.apache.hive.jdbc.HiveDriver")
.jdbc("jdbc:hive2://192.168.10.216:10000/kd", "test", properties)
.createOrReplaceTempView("cp");
session.sql("select * from cp").show(); // ②
}
}
}
①的运行结果没问题,②的运行结果如下: