Spark SQL With  Join

    科技2025-05-23  39

     

    本文主要是讲解spark中DataFrame 和SparkSQL的综合使用,以join操作为例。示例代码都是使用java和scala语言编写的。

     

    java版本

    package com.dt.sparkql.java; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import scala.Tuple2; import java.util.ArrayList; import java.util.List; public class SparkSqlWithJoin { public static void main(String[] args) {   /** * 创建spark配置对象,设置spark程序的运行时配置信息,例如通过setMaster设置集群
    Processed: 0.009, SQL: 8