!pip install datasets | tail -n 1
!pip install requests | tail -n 1
!pip install wget | tail -n 1
!pip install ibm-cloud-sdk-core | tail -n 1
!pip install rouge | tail -n 1


import os, getpass, wget
import requests
from datasets import load_dataset
from ibm_cloud_sdk_core import IAMTokenManager
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator, BearerTokenAuthenticator
from pandas import value_counts, read_csv, DataFrame
from rouge import Rouge
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


endpoint_url = input("Please enter your WML endpoint url (hit enter): ")

Please enter your WML endpoint url (hit enter): ········


class Prompt:
    def __init__(self, access_token, project_id):
        self.access_token = access_token
        self.project_id = project_id

    def generate(self, input, model_id, parameters):
        wml_url = f"{endpoint_url}/ml/v1-beta/generation/text?version=2023-05-28"
        Headers = {
            "Authorization": "Bearer " + self.access_token,
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        data = {
            "model_id": model_id,
            "input": input,
            "parameters": parameters,
            "project_id": self.project_id
        }
        response = requests.post(wml_url, json=data, headers=Headers)
        if response.status_code == 200:
            return response.json()["results"][0]
        else:
            return response.text


access_token = IAMTokenManager(
    apikey = getpass.getpass("Please enter your WML api key (hit enter): "),
    url = "https://iam.cloud.ibm.com/identity/token"
).get_token()

Please enter your WML api key (hit enter): ········


try:
    project_id = os.environ["PROJECT_ID"]
except KeyError:
    project_id = input("Please enter your project_id (hit enter): ")

Please enter your project_id (hit enter): ········


filename = 'Data_Cyber.csv'
url = 'https://raw.githubusercontent.com/IBM/watson-machine-learning-samples/master/cloud/data/spec5g/spec5g.csv'
if not os.path.isfile(filename): wget.download(url, out=filename)

  0% [                                                        ]      0 / 641145
  1% [                                                        ]   8192 / 641145
  2% [.                                                       ]  16384 / 641145
  3% [..                                                      ]  24576 / 641145
  5% [..                                                      ]  32768 / 641145
  6% [...                                                     ]  40960 / 641145
  7% [....                                                    ]  49152 / 641145
  8% [.....                                                   ]  57344 / 641145
 10% [.....                                                   ]  65536 / 641145
 11% [......                                                  ]  73728 / 641145
 12% [.......                                                 ]  81920 / 641145
 14% [.......                                                 ]  90112 / 641145
 15% [........                                                ]  98304 / 641145
 16% [.........                                               ] 106496 / 641145
 17% [..........                                              ] 114688 / 641145
 19% [..........                                              ] 122880 / 641145
 20% [...........                                             ] 131072 / 641145
 21% [............                                            ] 139264 / 641145
 22% [............                                            ] 147456 / 641145
 24% [.............                                           ] 155648 / 641145
 25% [..............                                          ] 163840 / 641145
 26% [...............                                         ] 172032 / 641145
 28% [...............                                         ] 180224 / 641145
 29% [................                                        ] 188416 / 641145
 30% [.................                                       ] 196608 / 641145
 31% [.................                                       ] 204800 / 641145
 33% [..................                                      ] 212992 / 641145
 34% [...................                                     ] 221184 / 641145
 35% [....................                                    ] 229376 / 641145
 37% [....................                                    ] 237568 / 641145
 38% [.....................                                   ] 245760 / 641145
 39% [......................                                  ] 253952 / 641145
 40% [......................                                  ] 262144 / 641145
 42% [.......................                                 ] 270336 / 641145
 43% [........................                                ] 278528 / 641145
 44% [.........................                               ] 286720 / 641145
 45% [.........................                               ] 294912 / 641145
 47% [..........................                              ] 303104 / 641145
 48% [...........................                             ] 311296 / 641145
 49% [...........................                             ] 319488 / 641145
 51% [............................                            ] 327680 / 641145
 52% [.............................                           ] 335872 / 641145
 53% [..............................                          ] 344064 / 641145
 54% [..............................                          ] 352256 / 641145
 56% [...............................                         ] 360448 / 641145
 57% [................................                        ] 368640 / 641145
 58% [................................                        ] 376832 / 641145
 60% [.................................                       ] 385024 / 641145
 61% [..................................                      ] 393216 / 641145
 62% [...................................                     ] 401408 / 641145
 63% [...................................                     ] 409600 / 641145
 65% [....................................                    ] 417792 / 641145
 66% [.....................................                   ] 425984 / 641145
 67% [.....................................                   ] 434176 / 641145
 68% [......................................                  ] 442368 / 641145
 70% [.......................................                 ] 450560 / 641145
 71% [........................................                ] 458752 / 641145
 72% [........................................                ] 466944 / 641145
 74% [.........................................               ] 475136 / 641145
 75% [..........................................              ] 483328 / 641145
 76% [..........................................              ] 491520 / 641145
 77% [...........................................             ] 499712 / 641145
 79% [............................................            ] 507904 / 641145
 80% [.............................................           ] 516096 / 641145
 81% [.............................................           ] 524288 / 641145
 83% [..............................................          ] 532480 / 641145
 84% [...............................................         ] 540672 / 641145
 85% [...............................................         ] 548864 / 641145
 86% [................................................        ] 557056 / 641145
 88% [.................................................       ] 565248 / 641145
 89% [..................................................      ] 573440 / 641145
 90% [..................................................      ] 581632 / 641145
 91% [...................................................     ] 589824 / 641145
 93% [....................................................    ] 598016 / 641145
 94% [....................................................    ] 606208 / 641145
 95% [.....................................................   ] 614400 / 641145
 97% [......................................................  ] 622592 / 641145
 98% [....................................................... ] 630784 / 641145
 99% [....................................................... ] 638976 / 641145
100% [........................................................] 641145 / 641145


data= read_csv("Data_Cyber.csv", index_col=0)
data.head()


data.Paragraph.apply(lambda x: len(x.split())).describe()

count    713.000000
mean     101.632539
std       34.300754
min       35.000000
25%       78.000000
50%       98.000000
75%      121.000000
max      266.000000
Name: Paragraph, dtype: float64


data.Simplification.apply(lambda x: len(x.split())).describe()

count    713.000000
mean      43.927069
std       24.889311
min        8.000000
25%       28.000000
50%       38.000000
75%       53.000000
max      249.000000
Name: Simplification, dtype: float64


data_train, data_test, y_train, y_test = train_test_split(data['Paragraph'], 
                                                    data['Simplification'],
                                                    test_size=0.3,
                                                    random_state=33,)
data_train = DataFrame(data_train)
data_test = DataFrame(data_test)


models_json = requests.get(endpoint_url + '/ml/v1-beta/foundation_model_specs?version=2022-08-01&limit=50',
                           headers={
                                    'Authorization': f'Bearer {access_token}',
                                    'Content-Type': 'application/json',
                                    'Accept': 'application/json'
                            }).json()
models_ids = [m['model_id'] for m in models_json['resources']]
print(models_ids)

['bigscience/mt0-xxl', 'eleutherai/gpt-neox-20b', 'google/flan-t5-xxl', 'google/flan-ul2', 'ibm/mpt-7b-instruct2']


model_id = "google/flan-ul2"


instruction =  """
Extract the key outline of the "Original text" similar to the Simplification according to the examples."""


zero_shot_inputs = [{"input": text} for text in data_test['Paragraph']]
for i in range(2):
    print(f"The sentence example {i+1} is:\n {zero_shot_inputs[i]['input']}\n")

The sentence example 1 is:
 UE A can then prompt the user to initiate a voice call to UE B 6a(Successful case). The RAB Assignment Request message is sent from MSC B to the RNC B, requesting the establishment of a RAB for a Video Call.
 The radio bearer is established between the RNC B and UE B.
 RNC B responds to MSC B with a RAB Assignment Response message.
 Following the allocation of the radio resources, UE B sends an Alerting message to 6b (Failure case). The video call fails because of lack of radio resources on the B side.
 

The sentence example 2 is:
 As a network option, the operator may refuse to provide the requested information. When gsmSCF processing is complete the call control is returned to the GMSC server .
 The GMSC server interrogates the HLR in order to determine his current location.
 The HLR shall create an HLR interrogation record. The GMSC server routes the call to the VPLMN in which subscriber "B" is currently located.
 The GMSC server shall create an outgoing gateway record for accounting purposes.
 The GMSC server shall also create a roaming record.


data_train_and_labels=data_train.copy()
data_train_and_labels['Simplification']=y_train


train_samples=data_train_and_labels.sample(2)
few_shot_example=[]
examples = []
for s in range(len(train_samples)):
    examples.append(f"\tsentence:\t{train_samples['Paragraph'].iloc[s]}\n\tSimplification: {train_samples['Simplification'].iloc[s]}\n")
few_shot_examples=[''.join(examples)]


few_shot_inputs_ = [{"input": text} for text in data_test['Paragraph'].values]
for i in range(2):
    print(f"The sentence example {i+1} is:\n {few_shot_inputs_[i]['input']}\n")

The sentence example 1 is:
 UE A can then prompt the user to initiate a voice call to UE B 6a(Successful case). The RAB Assignment Request message is sent from MSC B to the RNC B, requesting the establishment of a RAB for a Video Call.
 The radio bearer is established between the RNC B and UE B.
 RNC B responds to MSC B with a RAB Assignment Response message.
 Following the allocation of the radio resources, UE B sends an Alerting message to 6b (Failure case). The video call fails because of lack of radio resources on the B side.
 

The sentence example 2 is:
 As a network option, the operator may refuse to provide the requested information. When gsmSCF processing is complete the call control is returned to the GMSC server .
 The GMSC server interrogates the HLR in order to determine his current location.
 The HLR shall create an HLR interrogation record. The GMSC server routes the call to the VPLMN in which subscriber "B" is currently located.
 The GMSC server shall create an outgoing gateway record for accounting purposes.
 The GMSC server shall also create a roaming record.


parameters = {
         "decoding_method": "greedy",
         "random_seed": 33,
         "repetition_penalty":1,
         "min_new_tokens": 50,
         "max_new_tokens": 300
}


prompt = Prompt(access_token, project_id)


results = []
for inp in few_shot_inputs_[:2]:
    results.append(prompt.generate(" ".join([instruction+few_shot_examples[0], inp['input']]), model_id, parameters))


results

[{'generated_text': 'Simplification: UE A can then prompt the user to initiate a voice call to UE B 6a(Successful case). The RAB Assignment Request message is sent from MSC B to the RNC B, requesting the establishment of a RAB for a Video Call. The radio bearer is established between the RNC B and UE B. RNC B responds to MSC B with a RAB Assignment Response message. Following the allocation of the radio resources, UE B sends an Alerting message to',
  'generated_token_count': 118,
  'input_token_count': 556,
  'stop_reason': 'EOS_TOKEN'},
 {'generated_text': 'Simplification: The GMSC server interrogates the HLR in order to determine his current location. The HLR shall create an HLR interrogation record. The GMSC server routes the call to the VPLMN in which subscriber "B" is currently located. The GMSC server shall create an outgoing gateway record for accounting purposes. The GMSC server shall also create a roaming record.',
  'generated_token_count': 91,
  'input_token_count': 544,
  'stop_reason': 'EOS_TOKEN'}]

	Paragraph	Simplification
0	In 5G NR, for the procedures such as handover ...	In 5G NR, signal strength or signal quality ma...
1	5G NR has introduced cell signal measurement b...	5G NR measures cell signal with SS/PBCH Block ...
2	New Radio (NR) is the wireless standard and fo...	New Radio (NR) is the wireless standard and fo...
3	Current regulations in the U.S. allow a device...	Current literature and presentations at variou...
4	In general, it is very critical for a UE to c...	It is important for UE to consider the certain...

Use watsonx, and Google `flan-ul2` to summarize Cybersecurity documents¶

Disclaimers¶

Notebook content¶

Learning goal¶

Use case & dataset¶

Contents¶

Set up the environment¶

Install and import the `datasets` and dependecies¶

Inferencing class¶

watsonx API connection¶

Defining the project id¶

Data loading¶

Check the sample text and summary length.¶

Foundation Models on watsonx¶

List available models¶

Generate document summary¶

Defining the model parameters¶

Generate the cybersecurity: SPEC5G Cellular Network Protocol summary using `ul2` model.¶

Score the model¶

Cosine Similarity¶

Rouge Metric¶

Summary and next steps¶

Use watsonx, and Google flan-ul2 to summarize Cybersecurity documents¶

Disclaimers¶

Notebook content¶

Learning goal¶

Use case & dataset¶

Contents¶

Set up the environment¶

Install and import the datasets and dependecies¶

Inferencing class¶

watsonx API connection¶

Defining the project id¶

Data loading¶

Check the sample text and summary length.¶

Foundation Models on watsonx¶

List available models¶

Generate document summary¶

Defining the model parameters¶

Generate the cybersecurity: SPEC5G Cellular Network Protocol summary using ul2 model.¶

Score the model¶

Cosine Similarity¶

Rouge Metric¶

Summary and next steps¶

Use watsonx, and Google `flan-ul2` to summarize Cybersecurity documents¶

Install and import the `datasets` and dependecies¶

Generate the cybersecurity: SPEC5G Cellular Network Protocol summary using `ul2` model.¶