Torna alla versione inglese della documentazioneEsempi
Esempi (SPSS Modeler)
Ultimo aggiornamento: 07 ott 2024
Questa sezione fornisce esempi di script Python for Spark.
Esempio di script di base per i dati di elaborazione
import spss.pyspark.runtime
from pyspark.sql.types import *
cxt = spss.pyspark.runtime.getContext()
if cxt.isComputeDataModelOnly():
_schema = cxt.getSparkInputSchema()
cxt.setSparkOutputSchema(_schema)
else:
_structType = cxt.getSparkInputSchema()
df = cxt.getSparkInputData()
_newDF = df.sample(False, 0.01, 1)
cxt.setSparkOutputData(_newDF)
Esempio di script di creazione del modello che utilizza l'algoritmo LinearRegressionWithSGD
from pyspark.context import SparkContext
from pyspark.sql.context import SQLContext
from pyspark.sql import Row
from pyspark.mllib.regression import
LabeledPoint,LinearRegressionWithSGD, LinearRegressionModel
from pyspark.mllib.linalg import DenseVector
import numpy
import json
import spss.pyspark.runtime
from spss.pyspark.exceptions import ASContextException
ascontext = spss.pyspark.runtime.getContext()
sc = ascontext.getSparkContext()
df = ascontext.getSparkInputData()
# field settings and algorithm parameters
# replace target_field, predictor_fields, and num iterations with your actual values!
target = #'target_field'
predictors = [#predictor_fields]
num_iterations = #num iterations
prediction_field = "$LR-" + target
# save linear regression model to a filesystem path
def save(model, sc, path):
data =
sc.parallelize([json.dumps({"intercept":model.intercept,"weights":model.weights.tolist()})])
data.saveAsTextFile(path)
# print model details to stdout
def dump(model,predictors):
print(prediction_field+" = " + str(model.intercept))
weights = model.weights.tolist()
for i in range(0,len(predictors)):
print("\t+ "+predictors[i]+"*"+ str(weights[i]))
# check that required fields exist in the input data
input_field_names = [ty[0] for ty in df.dtypes[:]]
if target not in input_field_names:
raise ASContextException("target field "+target+" not found") for predictor in predictors:
if predictor not in input_field_names:
raise ASContextException("predictor field "+predictor+" not found")
# define map function to convert from dataframe Row objects to mllib LabeledPoint
def row2LabeledPoint(target,predictors,row):
pvals = []
for predictor in predictors:
pval = getattr(row,predictor)
pvals.append(float(pval))
tval = getattr(row,target)
return LabeledPoint(float(tval),DenseVector(pvals))
# convert dataframe to an RDD containing LabeledPoint
training_points = df.rdd.map(lambda row:
row2LabeledPoint(target,predictors,row))
# build the model
model = LinearRegressionWithSGD.train(training_points,num_iterations,intercept=True)
# write a text description of the model to stdout
dump(model,predictors)
# save the model to the filesystem and store into the output model content
modelpath = ascontext.createTemporaryFolder()
save(model,sc,modelpath)
ascontext.setModelContentFromPath("model",modelpath)