<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.7</version>
</dependency>
import org.apache.spark.sql.SparkSession;
SparkSession spark = SparkSession
.builder()
.appName("BigDataAnalysis")
.master("local[*]")
.getOrCreate();
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
Dataset<Row> data = spark.read()
.format("csv")
.option("header", "true")
.load("hdfs://path/to/data.csv");
import org.apache.spark.sql.functions;
Dataset<Row> transformedData = data
.filter(functions.col("age").gt(18))
.groupBy("gender")
.agg(functions.avg("salary").alias("average_salary"));
transformedData.write()
.format("csv")
.option("header", "true")
.save("hdfs://path/to/output");