API_VERSION = "2021-10-01"

WML_SERVICES_HOST = "us-south.ml.cloud.ibm.com" # or "eu-de.ml.cloud.ibm.com", "eu-gb.ml.cloud.ibm.com", "jp-tok.ml.cloud.ibm.com"

WML_SERVICES_URL = "https://" + WML_SERVICES_HOST
IAM_TOKEN_URL = "https://iam.cloud.ibm.com/oidc/token"
 
IAM_APIKEY = "XXX"  

# Get this from Manage < IAM < Users, and check the URL. Your user ID should be in the format IBMid-<xxx>.
CLOUD_USERID = "IBMid-XXX" 

PROJECT_ID = "XXX" # Get this by going into your WS project and checking the URL.


MODEL_TAG = "wmlfltf2samplemodel"
RTS_TAG = "wmlfltf2samplerts"
TRAINING_TAG = "wmlfltf2sampletraining"


import urllib3
import requests
import json
from string import Template

urllib3.disable_warnings()


payload = "grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey=" + IAM_APIKEY
token_resp = requests.post(IAM_TOKEN_URL ,
                           headers={"Content-Type": "application/x-www-form-urlencoded"}, 
                           data = payload,
                           verify=True)

print(token_resp)

token = "Bearer " + json.loads(token_resp.content.decode("utf-8"))["access_token"]
print("WS token: %s " % token)


base_model_save_payload = Template("""
{
    "name": "Untrained MNIST Model",
    "tags": [ "$tag" ],
    "type": "tensorflow_2.9",
    "software_spec": {
        "name": "runtime-22.2-py3.10"
    },
    "custom": {
        "untrained" : true
    },
    "project_id": "$projectId"
}
""").substitute(projectId = PROJECT_ID,
                tag = MODEL_TAG)

base_model_save_resp = requests.post(WML_SERVICES_URL + "/ml/v4/models",
                                     headers={"Content-Type": "application/json",
                                              "Authorization": token},
                                     params={"version": API_VERSION},
                                     data=base_model_save_payload,
                                     verify=False)

print(base_model_save_resp)
status_json = json.loads(base_model_save_resp.content.decode("utf-8"))
print("Create model asset response : "+ json.dumps(status_json, indent=4))

base_model_id = json.loads(base_model_save_resp.content.decode("utf-8"))["metadata"]["id"]
print("Model asset id: %s" % base_model_id)


base_model_content_resp = requests.get("https://github.com/IBMDataScience/sample-notebooks/raw/master/Files/tf_mnist_model.zip",
                                       headers={"Content-Type": "application/octet-stream"})

base_model_content_uri = "/ml/v4/models/"+ base_model_id + "/content"
print("Host URL = " + WML_SERVICES_URL + base_model_content_uri)

base_model_put_resp = requests.put(WML_SERVICES_URL + base_model_content_uri,
                                   headers={"Content-Type": "application/json",
                                            "Authorization": token},
                                   params={"version": API_VERSION,
                                           "project_id": PROJECT_ID,
                                           "content_format": "native"},
                                   data=base_model_content_resp.content,
                                   verify=False)

print(base_model_put_resp)
status_json = json.loads(base_model_put_resp.content.decode("utf-8"))
print("Upload model response : "+ json.dumps(status_json, indent=4))


wml_remote_training_system_asset_one_def = Template("""
{
  "name": "Remote Party 1",
  "project_id": "$projectId",
  "description": "Sample Remote Training System",
  "tags": [ "$tag" ],
  "organization": {
    "name": "IBM",
    "region": "US"
  },
  "allowed_identities": [
    {
      "id": "$userID",
      "type": "user"
    }
  ],
  "remote_admin": {
    "id": "$userID",
    "type": "user"
  }
}
""").substitute(userID = CLOUD_USERID,
                projectId = PROJECT_ID,
                tag = RTS_TAG)


wml_remote_training_system_one_resp = requests.post(WML_SERVICES_URL + "/ml/v4/remote_training_systems", 
                                                    headers={"Content-Type": "application/json",
                                                             "Authorization": token}, 
                                                    params={"version": API_VERSION,
                                                            "project_id": PROJECT_ID}, 
                                                    data=wml_remote_training_system_asset_one_def, 
                                                    verify=False)

print(wml_remote_training_system_one_resp)
status_json = json.loads(wml_remote_training_system_one_resp.content.decode("utf-8"))
print("Create remote training system response : "+ json.dumps(status_json, indent=4))

wml_remote_training_system_one_asset_uid = json.loads(wml_remote_training_system_one_resp.content.decode("utf-8"))["metadata"]["id"]
print("Remote Training System id: %s" % wml_remote_training_system_one_asset_uid)


training_payload = Template(""" 
{
  "name": "FL Aggregator",
  "tags": [ "$tag" ],
  "federated_learning": {
    "model": {
      "spec": {
        "id": "$modelID"
      },
      "type": "tensorflow"
    },
    "fusion_type": "iter_avg",
    "rounds": 5,
    "remote_training" : {
      "quorum": 1.0,
      "remote_training_systems": [ { "id" : "$rts_one", "required" : true  } ]
    },
    "software_spec": {
      "name": "runtime-22.2-py3.10"
    },
    "hardware_spec": {
      "name": "XS"
    }
  },
  "training_data_references": [],
  "results_reference": {
    "type": "container",
    "name": "outputData",
    "connection": {},
    "location": {
      "path": "."
    }
  },
  "project_id": "$projectId"  
}
""").substitute(modelID = base_model_id,
                projectId = PROJECT_ID,
                rts_one = wml_remote_training_system_one_asset_uid,
                tag = TRAINING_TAG)

create_training_resp = requests.post(WML_SERVICES_URL + "/ml/v4/trainings", params={"version": API_VERSION},
                                     headers={"Content-Type": "application/json",
                                              "Authorization": token},
                                     data=training_payload,
                                     verify=False)

print(create_training_resp)
status_json = json.loads(create_training_resp.content.decode("utf-8"))
print("Create training response : "+ json.dumps(status_json, indent=4))

training_id = json.loads(create_training_resp.content.decode("utf-8"))["metadata"]["id"]
print("Training id: %s" % training_id)


get_training_resp = requests.get(WML_SERVICES_URL + "/ml/v4/trainings/" + training_id,
                                 headers={"Content-Type": "application/json",
                                          "Authorization": token},
                                  params={"version": API_VERSION,
                                          "project_id": PROJECT_ID},
                                  verify=False)

print(get_training_resp)
status_json = json.loads(get_training_resp.content.decode("utf-8"))
print("Get training response : "+ json.dumps(status_json, indent=4))


print("WML_SERVICES_HOST = '%s'" % WML_SERVICES_HOST)
print("PROJECT_ID = '%s'" % PROJECT_ID)
print("IAM_APIKEY = '%s'" % IAM_APIKEY)
print("RTS_ID = '%s'" % wml_remote_training_system_one_asset_uid)
print("TRAINING_ID = '%s'" % (training_id))


BUCKET = "XXX" # bucket used by project ex. myproject-donotdelete-pr-tdnvueqivxep8v

COS_ENDPOINT = "https://s3.us.cloud-object-storage.appdomain.cloud" # Current list avaiable at https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints
COS_APIKEY = "XXX" # eg "W00YixxxxxxxxxxMB-odB-2ySfTrFBIQQWanc--P3byk"
COS_RESOURCE_INSTANCE_ID = "XXX" # eg "crn:v1:bluemix:public:cloud-object-storage:global:a/3bf0d9003xxxxxxxxxx1c3e97696b71c:d6f04d83-6c4f-4a62-a165-696756d63903::"


!pip install ibm-cos-sdk


import ibm_boto3
from ibm_botocore.client import Config, ClientError

cos = ibm_boto3.resource("s3",
    ibm_api_key_id=COS_APIKEY,
    ibm_service_instance_id=COS_RESOURCE_INSTANCE_ID,
    config=Config(signature_version="oauth"),
    endpoint_url=COS_ENDPOINT
)

ITEM_NAME = training_id + "/assets/" + training_id + "/resources/wml_model/request.json"

file = cos.Object(BUCKET, ITEM_NAME).get()
req = json.loads(file["Body"].read())


req["name"] = "Trained MNIST Model"

model_save_payload = json.dumps(req)
print ("Model save payload: %s" % model_save_payload)


model_save_resp = requests.post(WML_SERVICES_URL + "/ml/v4/models",
                                params={"version": API_VERSION,
                                        "project_id": PROJECT_ID,
                                        "content_format": "native"},
                                headers={"Content-Type": "application/json",
                                         "Authorization": token},
                                data=model_save_payload,
                                verify=False)

print(model_save_resp)
status_json = json.loads(model_save_resp.content.decode("utf-8"))
print("Save model response : "+ json.dumps(status_json, indent=4))

model_id = json.loads(model_save_resp.content.decode("utf-8"))["metadata"]["id"]
print("Saved model id: %s" % model_id)


get_training_resp = requests.get(WML_SERVICES_URL + "/ml/v4/trainings",
                                 headers={"Content-Type": "application/json",
                                          "Authorization": token},
                                 params={"version": API_VERSION,
                                         "project_id": PROJECT_ID},
                                 verify=False)

print(get_training_resp)
status_json = json.loads(get_training_resp.content.decode("utf-8"))
print("Get training response : "+ json.dumps(status_json, indent=4))


get_training_resp = requests.get(WML_SERVICES_URL + "/ml/v4/trainings",
                                 headers={"Content-Type": "application/json",
                                          "Authorization": token},
                                 params={"version": API_VERSION,
                                         "project_id": PROJECT_ID,
                                         "tag.value": TRAINING_TAG},
                                 verify=False)

training_list_json = json.loads(get_training_resp.content.decode("utf-8"))
training_resources = training_list_json["resources"]

for training in training_resources:
    training_id = training["metadata"]["id"]
    print("Deleting Training ID: " + training_id)
    delete_training_resp = requests.delete(WML_SERVICES_URL + "/ml/v4/trainings/" + training_id,
                                           headers={"Content-Type": "application/json",
                                                    "Authorization": token},
                                           params={"version": API_VERSION,
                                                   "project_id": PROJECT_ID,
                                                   "hard_delete": True},
                                           verify=False)
    print(delete_training_resp)


get_rts_resp = requests.get(WML_SERVICES_URL + "/ml/v4/remote_training_systems", 
                            headers={"Content-Type": "application/json",
                                     "Authorization": token}, 
                            params={"version": API_VERSION,
                                    "project_id": PROJECT_ID}, 
                            verify=False)

print(get_rts_resp)
rts_list_json = json.loads(get_rts_resp.content.decode("utf-8"))
print("Remote Training Systems in Project : "+ json.dumps(rts_list_json, indent=4))


get_rts_resp = requests.get(WML_SERVICES_URL + "/ml/v4/remote_training_systems", 
                            headers={"Content-Type": "application/json",
                                     "Authorization": token}, 
                            params={"version": API_VERSION,
                                    "project_id": PROJECT_ID,
                                    "tag.value": RTS_TAG}, 
                            verify=False)

rts_list_json = json.loads(get_rts_resp.content.decode("utf-8"))
rts_resources = rts_list_json["resources"]

for rts in rts_resources:
    rts_id = rts["metadata"]["id"]
    print("Deleting RTS ID: " + rts_id)
    delete_rts_resp = requests.delete(WML_SERVICES_URL + "/ml/v4/remote_training_systems/" + rts_id, 
                                      headers={"Content-Type": "application/json",
                                               "Authorization": token}, 
                                      params={"version": API_VERSION,
                                              "project_id": PROJECT_ID}, 
                                      verify=False)
    print(delete_rts_resp)


get_model_resp = requests.get(WML_SERVICES_URL + "/ml/v4/models", 
                              headers={"Content-Type": "application/json",
                                       "Authorization": token}, 
                              params={"version": API_VERSION,
                                      "project_id": PROJECT_ID}, 
                              verify=False)

print(get_model_resp)
model_list_json = json.loads(get_model_resp.content.decode("utf-8"))
print("Models in Project : "+ json.dumps(model_list_json, indent=4))


get_model_resp = requests.get(WML_SERVICES_URL + "/ml/v4/models", 
                              headers={"Content-Type": "application/json",
                                       "Authorization": token}, 
                              params={"version": API_VERSION,
                                      "project_id": PROJECT_ID,
                                      "tag.value": MODEL_TAG}, 
                              verify=False)

model_list_json = json.loads(get_model_resp.content.decode("utf-8"))
model_resources = model_list_json["resources"]

for model in model_resources:
    model_id = model["metadata"]["id"]
    print("Deleting Model ID: " + model_id)
    delete_model_resp = requests.delete(WML_SERVICES_URL + "/ml/v4/models/" + model_id, 
                                        headers={"Content-Type": "application/json",
                                                 "Authorization": token}, 
                                        params={"version": API_VERSION,
                                                "project_id": PROJECT_ID}, 
                                        verify=False)
    print(delete_model_resp)

Part 1 - WML Federated Learning with MNIST for Admin¶

Learning Goals¶

Table of Contents¶

1. Prequisites¶

1.1 Define variables¶

1.2 Define tags¶

1.3 Import libraries¶

2. Obtain Cloud authentication token¶

3. Load the model¶

3.1 Create Untrained Model Asset¶

3.2 Upload Initial Model¶

4. Create Remote Training System Asset¶

5. Create FL Training Job¶

5.1 Get Training Job Status¶

6. Get Variables And Paste Into Party Notebook¶

7. Save Trained Model To Project¶

7.1 Connection to COS¶

7.2 Install pre-req¶

7.3 Save model to project¶

8. Clean Up Project¶

8.1 List all training jobs in project¶

8.2 Delete all training jobs in this project created by this notebook¶

8.3 List all remote training systems in project¶

8.4 Delete all remote training systems in this project created by this notebook¶

8.5 List all models in project¶

8.6 Delete all untrained models in this project created by this notebook¶