python from pyspark.sql import SparkSession spark = SparkSession.builder \ .appName("DistributedML") \ .getOrCreate() python from pyspark.ml.regression import LinearRegression lr = LinearRegression() lrModel = lr.fit(trainData) python from pyspark.ml.evaluation import RegressionEvaluator evaluator = RegressionEvaluator() rmse = evaluator.evaluate(predictions) python from pyspark.ml.tuning import CrossValidator, ParamGridBuilder paramGrid = ParamGridBuilder() \ .addGrid(lr.regParam, [0.1, 0.01]) \ .addGrid(lr.elasticNetParam, [0.0, 0.5, 1.0]) \ .build() crossval = CrossValidator(estimator=lr, estimatorParamMaps=paramGrid, evaluator=evaluator) cvModel = crossval.fit(trainData) python lrModel.save("path/to/model") python from pyspark.ml.regression import LinearRegressionModel lrModel = LinearRegressionModel.load("path/to/model") predictions = lrModel.transform(testData)


上一篇:
下一篇:
切换中文