api_key = 'PASTE YOUR API KEY HERE'
location = 'us-south'


wml_credentials = {
    "apikey": api_key,
    "url": 'https://' + location + '.ml.cloud.ibm.com'
}


!pip install -U ibm-watson-machine-learning | tail -n 1
!pip install -U autoai-libs | tail -n 1
!pip install -U scikit-learn==1.1.1 | tail -n 1
!pip install wget | tail -n 1
!pip install matplotlib | tail -n 1


from ibm_watson_machine_learning import APIClient

client = APIClient(wml_credentials)


space_id = ''


client.spaces.list(limit=10)


client.set.default_space(space_id)


cos_credentials = client.spaces.get_details(space_id=space_id)['entity']['storage']['properties']


filename = 'credit_risk_training_light.csv'
datasource_name = 'bluemixcloudobjectstorage'
bucketname = cos_credentials['bucket_name']


import os, wget

url = 'https://raw.githubusercontent.com/IBM/watson-machine-learning-samples/master/cloud/data/credit_risk/credit_risk_training_light.csv'
if not os.path.isfile(filename): wget.download(url)


datasource_type_name='PUT YOUR DATASOURCE TYPE NAME HERE'
db_name='PUT YOUR DATABASE NAME HERE'
schema_name='PUT YOUR SCHEMA NAME HERE'

# datasource_type_name='db2'
# db_name='BLUDB'
# schema_name='ZL'


table_name='CREDIT_RISK'


db_credentials= {
    "database":db_name,
    "password":"***",
    "port":"***",
    "host":"***",
    "ssl":"***",
    "username":"***"
}


conn_meta_props= {
    client.connections.ConfigurationMetaNames.NAME: f"Connection to Database - {datasource_name}:{db_name} ",
    client.connections.ConfigurationMetaNames.DATASOURCE_TYPE: client.connections.get_datasource_type_uid_by_name(datasource_name),
    client.connections.ConfigurationMetaNames.DESCRIPTION: "Connection to external Database",
    client.connections.ConfigurationMetaNames.PROPERTIES: db_credentials
}

conn_details = client.connections.create(meta_props=conn_meta_props)


connection_id = client.connections.get_uid(conn_details)


from ibm_watson_machine_learning.helpers import DataConnection, S3Location


credit_risk_conn = DataConnection(
    connection_asset_id=connection_id,
    location=S3Location(bucket=bucketname,
                        path=filename))

training_data_reference=[credit_risk_conn]


credit_risk_conn.set_client(client)
credit_risk_conn.write(data=filename, remote_name=filename)
credit_risk_conn.read()


from ibm_watson_machine_learning.experiment import AutoAI

experiment = AutoAI(wml_credentials, space_id=space_id)

pipeline_optimizer = experiment.optimizer(
    name='Credit Risk Prediction - AutoAI',
    prediction_type=AutoAI.PredictionType.BINARY,
    prediction_column='Risk',
    scoring=AutoAI.Metrics.ROC_AUC_SCORE,
)


pipeline_optimizer.get_params()


run_details = pipeline_optimizer.fit(
            training_data_reference=training_data_reference,
            background_mode=False)


pipeline_optimizer.get_run_status()


summary = pipeline_optimizer.summary()
summary


import pandas as pd
pd.options.plotting.backend = "plotly"

summary.holdout_roc_auc.plot()


best_pipeline = pipeline_optimizer.get_pipeline()


pipeline_optimizer.get_pipeline_details()['confusion_matrix']


pipeline_optimizer.get_pipeline_details()['features_importance']


from ibm_watson_machine_learning.helpers import pipeline_to_script
pipeline_to_script(best_pipeline)


best_pipeline.visualize()


best_pipeline.pretty_print(ipython_display=True, astype='sklearn')


train_df = pipeline_optimizer.get_data_connections()[0].read()

train_X = train_df.drop(['Risk'], axis=1).values
train_y = train_df.Risk.values


predicted_y = best_pipeline.predict(train_X)
predicted_y[:5]


experiment.runs(filter='Credit Risk Prediction - AutoAI').list()


run_id = run_details['metadata']['id']


experiment.runs.get_params(run_id=run_id)


historical_opt = experiment.runs.get_optimizer(run_id)


run_details = historical_opt.get_run_details()


historical_opt.summary()


hist_pipeline = historical_opt.get_pipeline(pipeline_name='Pipeline_3')


predicted_y = hist_pipeline.predict(train_X)
predicted_y[:5]


from sklearn.linear_model import LogisticRegression as LR
from sklearn.tree import DecisionTreeClassifier as Tree
from sklearn.neighbors import KNeighborsClassifier as KNN
from lale.lib.lale import Hyperopt
from lale import wrap_imported_operators

wrap_imported_operators()


prefix = hist_pipeline.remove_last().freeze_trainable()
prefix.visualize()


new_pipeline = prefix >> (LR | Tree | KNN)
new_pipeline.visualize()


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train_X, train_y, test_size=0.15, random_state=33)


hyperopt = Hyperopt(estimator=new_pipeline, cv=3, max_evals=20, scoring='roc_auc')
hyperopt_pipelines = hyperopt.fit(X_train, y_train)


pipeline_model = hyperopt_pipelines.get_pipeline()


from sklearn.metrics import roc_auc_score

predicted_y = pipeline_model.predict(X_test)
score = roc_auc_score(predicted_y=='Risk', y_test=='Risk')
print(f'roc_auc_score {score:.1%}')


pipeline_model.visualize()


pipeline_name = "Pipeline_1"


from ibm_watson_machine_learning.deployment import WebService

service = WebService(wml_credentials, source_space_id=space_id)

service.create(
    experiment_run_id=run_id,
    model=pipeline_name, 
    deployment_name="Credit Risk Deployment AutoAI")


print(service)


service.get_params()


predictions = service.score(payload=train_df.drop(['Risk'], axis=1).iloc[:10])
predictions


batch_payload_df = train_df.drop(['Risk'], axis=1)[:5]
batch_payload_df


from ibm_watson_machine_learning.deployment import Batch

service_batch = Batch(wml_credentials,source_space_id=space_id)
service_batch.create(
            experiment_run_id=run_id,
            model="Pipeline_2",
            deployment_name="Credit Risk Batch Deployment AutoAI")


scoring_params = service_batch.run_job(
            payload=batch_payload_df,
            background_mode=False)


scoring_params['entity']['scoring'].get('predictions')

Use AutoAI and Lale to predict credit risk with `ibm-watson-machine-learning`¶

Learning goals¶

Contents¶

1. Set up the environment¶

Connection to WML¶

Install and import the `ibm-watson-machine-learning` and dependecies¶

Working with spaces¶

Connections to COS¶

2. Optimizer definition¶

Training data connection¶

Create connection¶

Optimizer configuration¶

3. Experiment run¶

4. Pipelines comparison and testing¶

Get selected pipeline model¶

Convert the pipeline model to a Python script and download it¶

Visualize pipeline¶

Pipeline source code¶

Reading training data from COS¶

Test pipeline model locally¶

5. Historical runs¶

Get executed optimizer's configuration parameters¶

Get historical optimizer instance and training details¶

List trained pipelines for selected optimizer¶

Get selected pipeline and test locally¶

6. Pipeline refinement with Lale and testing¶

Pipeline decomposition and new definition¶

New optimizer `Hyperopt` configuration and training¶

Pipeline model tests and visualization¶

7. Deploy and Score¶

Online deployment creation¶

Scoring of webservice¶

Deleting deployment¶

Batch deployment creation¶

Score batch deployment with inline payload as pandas DataFrame.¶

8. Clean up¶

9. Summary and next steps¶

Authors¶

Use AutoAI and Lale to predict credit risk with ibm-watson-machine-learning¶

Learning goals¶

Contents¶

1. Set up the environment¶

Connection to WML¶

Install and import the ibm-watson-machine-learning and dependecies¶

Working with spaces¶

Connections to COS¶

2. Optimizer definition¶

Training data connection¶

Create connection¶

Optimizer configuration¶

3. Experiment run¶

4. Pipelines comparison and testing¶

Get selected pipeline model¶

Convert the pipeline model to a Python script and download it¶

Visualize pipeline¶

Pipeline source code¶

Reading training data from COS¶

Test pipeline model locally¶

5. Historical runs¶

Get executed optimizer's configuration parameters¶

Get historical optimizer instance and training details¶

List trained pipelines for selected optimizer¶

Get selected pipeline and test locally¶

6. Pipeline refinement with Lale and testing¶

Pipeline decomposition and new definition¶

New optimizer Hyperopt configuration and training¶

Pipeline model tests and visualization¶

7. Deploy and Score¶

Online deployment creation¶

Scoring of webservice¶

Deleting deployment¶

Batch deployment creation¶

Score batch deployment with inline payload as pandas DataFrame.¶

8. Clean up¶

9. Summary and next steps¶

Authors¶

Use AutoAI and Lale to predict credit risk with `ibm-watson-machine-learning`¶

Install and import the `ibm-watson-machine-learning` and dependecies¶

New optimizer `Hyperopt` configuration and training¶