pyspark报错,


from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.feature import StandardScaler
from pyspark.ml.feature import VectorAssembler
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
import pandas as pd
VectorAssembler_BP =VectorAssembler(inputCols=["ZL","ZR","ZF","ZM","ZC"],outputCol="features")
new_df_BP=VectorAssembler_BP.transform(spark_df)
#获取聚类标签
prediction = model.transform(new_df_BP).select('prediction').collect()
labels_BP= [p.prediction for p in prediction ]
panda_df = pd.DataFrame(labels_BP, columns=['labels'])
# print(labels_BP)
#标签转化为Dataframe
sc=SparkContext.getOrCreate(spark)
sqlContext=SQLContext(sc)
labels_BP=sqlContext.createDataFrame(panda_df)
# print(labels_BP)
# 随机划分训练集和验证集,按照8:2划分
# X_train, X_test, Y_train, Y_test = train_test_split(new_df_BP, labels, test_size=0.2, random_state=1)
X_train_data,X_test_data=spark_df.randomSplit([0.8, 0.2], seed = 1)
Y_train_data,Y_test_data=labels_BP.randomSplit([0.8, 0.2], seed = 1)
sc_BP = StandardScaler()
sc_BP.fit(spark_df)
standard_BP_train = sc_BP.transform(X_train)
standard_BP_test = sc_BP.transform(X_test)
# mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10))
mlp_pyspark = MultilayerPerceptronClassifier(solver='lbfgs',tol=1e-05,blockSize= 10)
mlp_pyspark.fit(standard_BP_train, Y_train_data)
print("pyspark的神经网络的预测正确率为:", mlp_pyspark.score(standard_test, Y_test))

报错:

img


错误信息:

Py4JJavaError: An error occurred while calling o3362.fit.
: java.util.NoSuchElementException: Failed to find a default value for inputCol
    at org.apache.spark.ml.param.Params.$anonfun$getOrDefault$2(params.scala:756)
    at scala.Option.getOrElse(Option.scala:189)
    at org.apache.spark.ml.param.Params.getOrDefault(params.scala:756)
    at org.apache.spark.ml.param.Params.getOrDefault$(params.scala:753)
    at org.apache.spark.ml.PipelineStage.getOrDefault(Pipeline.scala:41)
    at org.apache.spark.ml.param.Params.$(params.scala:762)
    at org.apache.spark.ml.param.Params.$$(params.scala:762)
    at org.apache.spark.ml.PipelineStage.$(Pipeline.scala:41)
    at org.apache.spark.ml.feature.StandardScalerParams.validateAndTransformSchema(StandardScaler.scala:64)
    at org.apache.spark.ml.feature.StandardScalerParams.validateAndTransformSchema$(StandardScaler.scala:63)
    at org.apache.spark.ml.feature.StandardScaler.validateAndTransformSchema(StandardScaler.scala:84)
    at org.apache.spark.ml.feature.StandardScaler.transformSchema(StandardScaler.scala:121)
    at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:71)
    at org.apache.spark.ml.feature.StandardScaler.fit(StandardScaler.scala:109)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
    at py4j.Gateway.invoke(Gateway.java:282)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:238)
    at java.lang.Thread.run(Thread.java:748)

pyspark_df数据样例:

img