api_key = 'PASTE YOUR PLATFORM API KEY HERE'
location = 'PASTE YOUR INSTANCE LOCATION HERE'


wml_credentials = {
    "apikey": api_key,
    "url": 'https://' + location + '.ml.cloud.ibm.com'
}


!pip install -U ibm-watson-machine-learning


from ibm_watson_machine_learning import APIClient

client = APIClient(wml_credentials)


space_id = 'PASTE YOUR SPACE ID HERE'


client.spaces.list(limit=10)


client.set.default_space(space_id)


cos_credentials = client.spaces.get_details(space_id=space_id)['entity']['storage']['properties']


datasource_name = 'bluemixcloudobjectstorage'
bucketname = cos_credentials['bucket_name']


conn_meta_props= {
    client.connections.ConfigurationMetaNames.NAME: f"Connection to Database - {datasource_name} ",
    client.connections.ConfigurationMetaNames.DATASOURCE_TYPE: client.connections.get_datasource_type_uid_by_name(datasource_name),
    client.connections.ConfigurationMetaNames.DESCRIPTION: "Connection to external Database",
    client.connections.ConfigurationMetaNames.PROPERTIES: {
        'bucket': bucketname,
        'access_key': cos_credentials['credentials']['editor']['access_key_id'],
        'secret_key': cos_credentials['credentials']['editor']['secret_access_key'],
        'iam_url': 'https://iam.cloud.ibm.com/identity/token',
        'url': cos_credentials['endpoint_url']
    }
}

conn_details = client.connections.create(meta_props=conn_meta_props)


connection_id = client.connections.get_uid(conn_details)


!rm german_credit_data_biased_training.csv
!wget https://raw.githubusercontent.com/pmservice/ai-openscale-tutorials/master/assets/historical_data/german_credit_risk/wml/german_credit_data_biased_training.csv


import numpy as np
import pandas as pd

training_data_file_name = "german_credit_data_biased_training.csv"
data_df = pd.read_csv(training_data_file_name)


data_df.head()


print('Columns: ', list(data_df.columns))
print('Number of columns: ', len(data_df.columns))


print('Number of records: ', data_df.Risk.count())


target_count = data_df.groupby('Risk')['Risk'].count()
target_count


target_count.plot.pie(figsize=(8, 8));


from ibm_watson_machine_learning.helpers import DataConnection, S3Location

data_connections = []
data_connection = DataConnection(
        connection_asset_id=connection_id,
        location=S3Location(bucket=cos_credentials['bucket_name'],
                            path=training_data_file_name)
    )

data_connection.set_client(client)
data_connection.write(data=training_data_file_name, remote_name=training_data_file_name)
data_connections.append(data_connection)


MODEL_NAME = "Scikit German Risk Model WML V4"

DEPLOYMENT_NAME = "Scikit German Risk Deployment WML V4"


from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.decomposition import TruncatedSVD
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split


train_data, test_data = train_test_split(data_df, test_size=0.2)


features_idx = np.s_[0:-1]
all_records_idx = np.s_[:]
first_record_idx = np.s_[0]


string_fields = [type(fld) is str for fld in train_data.iloc[first_record_idx, features_idx]]
ct = ColumnTransformer([("ohe", OneHotEncoder(), list(np.array(train_data.columns)[features_idx][string_fields]))])
clf_linear = SGDClassifier(loss='log', penalty='l2', max_iter=1000, tol=1e-5)

pipeline_linear = Pipeline([('ct', ct), ('clf_linear', clf_linear)])


risk_model = pipeline_linear.fit(train_data.drop('Risk', axis=1), train_data.Risk)


from sklearn.metrics import roc_auc_score

predictions = risk_model.predict(test_data.drop('Risk', axis=1))
indexed_preds = [0 if prediction=='No Risk' else 1 for prediction in predictions]

real_observations = test_data.Risk.replace('Risk', 1)
real_observations = real_observations.replace('No Risk', 0).values

auc = roc_auc_score(real_observations, indexed_preds)
print(auc)


import json


software_spec_uid = client.software_specifications.get_id_by_name("runtime-23.1-py3.10")
print("Software Specification ID: {}".format(software_spec_uid))
model_props = {
    client.repository.ModelMetaNames.NAME: "{}".format(MODEL_NAME),
    client.repository.ModelMetaNames.TYPE: 'scikit-learn_1.1',
    client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
}


print("Storing model ...")

published_model_details = client.repository.store_model(model=risk_model, meta_props=model_props, training_data=data_df.drop(["Risk"], axis=1), training_target=data_df.Risk)
model_uid = client.repository.get_model_id(published_model_details)
print("Done")
print("Model ID: {}".format(model_uid))


print("Deploying model...")
metadata = {
    client.deployments.ConfigurationMetaNames.NAME: DEPLOYMENT_NAME,
    client.deployments.ConfigurationMetaNames.ONLINE: {}
}
deployment = client.deployments.create(model_uid, meta_props=metadata)
deployment_uid = client.deployments.get_uid(deployment)
    
print("Model id: {}".format(model_uid))
print("Deployment id: {}".format(deployment_uid))


fields = ["CheckingStatus", "LoanDuration", "CreditHistory", "LoanPurpose", "LoanAmount", "ExistingSavings",
                  "EmploymentDuration", "InstallmentPercent", "Sex", "OthersOnLoan", "CurrentResidenceDuration",
                  "OwnsProperty", "Age", "InstallmentPlans", "Housing", "ExistingCreditsCount", "Job", "Dependents",
                  "Telephone", "ForeignWorker"]
values = [
            ["no_checking", 13, "credits_paid_to_date", "car_new", 1343, "100_to_500", "1_to_4", 2, "female", "none", 3,
             "savings_insurance", 46, "none", "own", 2, "skilled", 1, "none", "yes"],
            ["no_checking", 24, "prior_payments_delayed", "furniture", 4567, "500_to_1000", "1_to_4", 4, "male", "none",
             4, "savings_insurance", 36, "none", "free", 2, "management_self-employed", 1, "none", "yes"],
        ]

scoring_payload = {"input_data": [{"fields": fields, "values": values}]}


predictions = client.deployments.score(deployment_uid, scoring_payload)
predictions

German credit risk prediction with Scikit-learn for model monitoring¶

Learning goals¶

Contents¶

1. Set up the environment¶

Connection to WML¶

Install and import the `ibm-watson-machine-learning` package¶

Working with spaces¶

Run the notebook¶

Load the training data from github¶

Explore data ¶

Visualize data¶

Save training data to Cloud Object Storage¶

Create a model ¶

You will start with importing required libraries¶

Splitting the data into train and test¶

Preparing the pipeline¶

Train a model¶

Evaluate the model¶

Publish the model ¶

Deploy and score ¶

Score the model¶

Clean up¶

Summary and next steps¶

Authors¶

German credit risk prediction with Scikit-learn for model monitoring¶

Learning goals¶

Contents¶

1. Set up the environment¶

Connection to WML¶

Install and import the ibm-watson-machine-learning package¶

Working with spaces¶

Run the notebook¶

Load the training data from github¶

Explore data ¶

Visualize data¶

Save training data to Cloud Object Storage¶

Create a model ¶

You will start with importing required libraries¶

Splitting the data into train and test¶

Preparing the pipeline¶

Train a model¶

Evaluate the model¶

Publish the model ¶

Deploy and score ¶

Score the model¶

Clean up¶

Summary and next steps¶

Authors¶

Install and import the `ibm-watson-machine-learning` package¶