pip install pyspark ./sbin/start-master.sh ./sbin/start-worker.sh <master-url> python from pyspark.sql import SparkSession python spark = SparkSession.builder.appName("SparkTutorial").getOrCreate() python data = spark.read.csv("data.csv", header=True, inferSchema=True) python data.show(n) data.select("column1", "column2") data.filter(data["column1"] > 100) data.groupBy("column1").agg({"column2": "mean"}) data.orderBy("column1") data.write.csv("output.csv", header=True) spark-submit --master <master-url> <python-file>


上一篇:
下一篇:
切换中文