About cookies on this site Our websites require some cookies to function properly (required). In addition, other cookies may be used with your consent to analyze site usage, improve the user experience and for advertising. For more information, please review your options. By visiting our website, you agree to our processing of information as described in IBM’sprivacy statement. To provide a smooth navigation, your cookie preferences will be shared across the IBM web domains listed here.
Last updated: Feb 11, 2025
With the Extension Import node, you can run R or Python for Spark scripts to
import data.
Python for Spark example
import modeler.api
stream = modeler.script.stream()
node = stream.create("extension_importer", "extension_importer")
node.setPropertyValue("syntax_type", "Python")
python_script = """
import spss.pyspark
from pyspark.sql.types import *
cxt = spss.pyspark.runtime.getContext()
_schema = StructType([StructField('id', LongType(), nullable=False), \
StructField('age', LongType(), nullable=True), \
StructField('Sex', StringType(), nullable=True), \
StructField('BP', StringType(), nullable=True), \
StructField('Cholesterol', StringType(), nullable=True), \
StructField('K', DoubleType(), nullable=True), \
StructField('Na', DoubleType(), nullable=True), \
StructField('Drug', StringType(), nullable=True)])
if cxt.isComputeDataModelOnly():
cxt.setSparkOutputSchema(_schema)
else:
df = cxt.getSparkInputData()
if df is None:
drugList=[(1,23,'F','HIGH','HIGH',0.792535,0.031258,'drugY'), \
(2,47,'M','LOW','HIGH',0.739309,0.056468,'drugC'),\
(3,47,'M','LOW','HIGH',0.697269,0.068944,'drugC'),\
(4,28,'F','NORMAL','HIGH',0.563682,0.072289,'drugX'),\
(5,61,'F','LOW','HIGH',0.559294,0.030998,'drugY'),\
(6,22,'F','NORMAL','HIGH',0.676901,0.078647,'drugX'),\
(7,49,'F','NORMAL','HIGH',0.789637,0.048518,'drugY'),\
(8,41,'M','LOW','HIGH',0.766635,0.069461,'drugC'),\
(9,60,'M','NORMAL','HIGH',0.777205,0.05123,'drugY'),\
(10,43,'M','LOW','NORMAL',0.526102,0.027164,'drugY')]
sqlcxt = cxt.getSparkSQLContext()
rdd = cxt.getSparkContext().parallelize(drugList)
print 'pyspark read data count = '+str(rdd.count())
df = sqlcxt.createDataFrame(rdd, _schema)
cxt.setSparkOutputData(df)
"""
node.setPropertyValue("python_syntax", python_script)
R example
node.setPropertyValue("syntax_type", "R")
R_script = """# 'JSON Import' Node v1.0 for IBM SPSS Modeler
# 'RJSONIO' package created by Duncan Temple Lang - http://cran.r-project.org/web/packages/RJSONIO
# 'plyr' package created by Hadley Wickham http://cran.r-project.org/web/packages/plyr
# Node developer: Danil Savine - IBM Extreme Blue 2014
# Description: This node allows you to import into SPSS a table data from a JSON.
# Install function for packages
packages <- function(x){
x <- as.character(match.call()[[2]])
if (!require(x,character.only=TRUE)){
install.packages(pkgs=x,repos="http://cran.r-project.org")
require(x,character.only=TRUE)
}
}
# packages
packages(RJSONIO)
packages(plyr)
### This function is used to generate automatically the dataModel
getMetaData <- function (data) {
if (dim(data)[1]<=0) {
print("Warning : modelerData has no line, all fieldStorage fields set to strings")
getStorage <- function(x){return("string")}
} else {
getStorage <- function(x) {
res <- NULL
#if x is a factor, typeof will return an integer so we treat the case on the side
if(is.factor(x)) {
res <- "string"
} else {
res <- switch(typeof(unlist(x)),
integer = "integer",
double = "real",
character = "string",
"string")
}
return (res)
}
}
col = vector("list", dim(data)[2])
for (i in 1:dim(data)[2]) {
col[[i]] <- c(fieldName=names(data[i]),
fieldLabel="",
fieldStorage=getStorage(data[i]),
fieldMeasure="",
fieldFormat="",
fieldRole="")
}
mdm<-do.call(cbind,col)
mdm<-data.frame(mdm)
return(mdm)
}
# From JSON to a list
txt <- readLines('C:/test.json')
formatedtxt <- paste(txt, collapse = '')
json.list <- fromJSON(formatedtxt)
# Apply path to json.list
if(strsplit(x='true', split='
' ,fixed=TRUE)[[1]][1]) {
path.list <- unlist(strsplit(x='id_array', split=','))
i = 1
while(i<length(path.list)+1){
if(is.null(getElement(json.list, path.list[i]))){
json.list <- json.list[[1]]
}else{
json.list <- getElement(json.list, path.list[i])
i <- i+1
}
}
}
# From list to dataframe via unlisted json
i <-1
filled <- data.frame()
while(i < length(json.list)+ 1){
unlisted.json <- unlist(json.list[[i]])
to.fill <- data.frame(t(as.data.frame(unlisted.json, row.names = names(unlisted.json))), stringsAsFactors=FALSE)
filled <- rbind.fill(filled,to.fill)
i <- 1 + i
}
# Export to SPSS Modeler Data
modelerData <- filled
print(modelerData)
modelerDataModel <- getMetaData(modelerData)
print(modelerDataModel)
"""
node.setPropertyValue("r_syntax", R_script)
properties |
Data type | Property description |
---|---|---|
|
R Python | Specify which script runs – R or Python (R is the default). |
|
string | The R scripting syntax to run. |
|
string | The Python scripting syntax to run. |
Was the topic helpful?
0/1000