python import pyspark from pyspark.ml.feature import VectorAssembler from pyspark.ml.regression import LinearRegression spark = pyspark.sql.SparkSession.builder.getOrCreate() df = spark.read.csv('data.csv', header=True, inferSchema=True) assembler = VectorAssembler(inputCols=['feature1', 'feature2'], outputCol='features') df = assembler.transform(df) train_df, test_df = df.randomSplit([0.7, 0.3]) lr = LinearRegression(featuresCol='features', labelCol='label') model = lr.fit(train_df) predictions = model.transform(test_df) evaluator = RegressionEvaluator(labelCol='label', predictionCol='prediction') rmse = evaluator.evaluate(predictions, {evaluator.metricName: 'rmse'}) r2 = evaluator.evaluate(predictions, {evaluator.metricName: 'r2'}) print("RMSE:", rmse) print("R2 Score:", r2)


上一篇:
下一篇:
切换中文