0 / 0
資料の 英語版 に戻る
最終更新: 2024年10月07日
例 (SPSS Modeler)

このセクションでは、Python for Spark のスクリプトの例を紹介します。

データを処理するための基本的なスクリプトの例

import spss.pyspark.runtime
from pyspark.sql.types import *

cxt = spss.pyspark.runtime.getContext() 

if  cxt.isComputeDataModelOnly():   
        _schema = cxt.getSparkInputSchema()   
        cxt.setSparkOutputSchema(_schema)
else:   
        _structType = cxt.getSparkInputSchema()
        df = cxt.getSparkInputData()   
        _newDF = df.sample(False, 0.01, 1)
        cxt.setSparkOutputData(_newDF)

LinearRegressionWithSGDアルゴリズムを使用したモデル構築スクリプト例

from pyspark.context import SparkContext
from pyspark.sql.context import SQLContext
from pyspark.sql import Row
from pyspark.mllib.regression import
LabeledPoint,LinearRegressionWithSGD, LinearRegressionModel
from pyspark.mllib.linalg import DenseVector
import numpy
import json

import spss.pyspark.runtime
from spss.pyspark.exceptions import ASContextException

ascontext = spss.pyspark.runtime.getContext()
sc = ascontext.getSparkContext()
df = ascontext.getSparkInputData()

# field settings and algorithm parameters
# replace target_field, predictor_fields, and num iterations with your actual values!

target = #'target_field'
predictors = [#predictor_fields]
num_iterations = #num iterations
prediction_field = "$LR-" + target

# save linear regression model to a filesystem path

def save(model, sc, path):
        data =
sc.parallelize([json.dumps({"intercept":model.intercept,"weights":model.weights.tolist()})])
        data.saveAsTextFile(path)

# print model details to stdout

def dump(model,predictors):   
        print(prediction_field+" = " + str(model.intercept))   
        weights = model.weights.tolist()
        for i in range(0,len(predictors)):        
                print("\t+ "+predictors[i]+"*"+ str(weights[i]))

# check that required fields exist in the input data

input_field_names = [ty[0] for ty in df.dtypes[:]]
if target not in input_field_names:
        raise ASContextException("target field "+target+" not found") for predictor in predictors:
        if predictor not in input_field_names:        
                raise ASContextException("predictor field "+predictor+" not found")

# define map function to convert from dataframe Row objects to mllib LabeledPoint 

def row2LabeledPoint(target,predictors,row):
        pvals = []
        for predictor in predictors:        
                pval = getattr(row,predictor)        
                pvals.append(float(pval))
        tval = getattr(row,target)   
        return LabeledPoint(float(tval),DenseVector(pvals))

# convert dataframe to an RDD containing LabeledPoint

training_points = df.rdd.map(lambda row:
row2LabeledPoint(target,predictors,row))

# build the model 

model = LinearRegressionWithSGD.train(training_points,num_iterations,intercept=True) 

# write a text description of the model to stdout

dump(model,predictors)

# save the model to the filesystem and store into the output model content

modelpath = ascontext.createTemporaryFolder()
save(model,sc,modelpath)
ascontext.setModelContentFromPath("model",modelpath)
生成 AI の検索と回答
これらの回答は、製品資料の内容に基づいて、 watsonx.ai のラージ言語モデルによって生成されます。 詳細