# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


!pip install langchain -q
!pip install ibm-watson-machine-learning -q
!pip install wget -q
!pip install sentence-transformers -q


!pip install singlestoredb -q
!pip install sqlalchemy-singlestoredb -q


import os
import getpass


try:
    wxa_url = os.environ["WXA_URL"]
except KeyError:
    wxa_url = getpass.getpass("Please enter your watsonx.ai URL domain (hit enter): ")


try:
    wxa_api_key = os.environ["WXA_API_KEY"]
except KeyError:
    wxa_api_key = getpass.getpass("Please enter your watsonx.ai API key (hit enter): ")


try:
    wxa_project_id = os.environ["WXA_PROJECT_ID"]
except KeyError:
    wxa_project_id = getpass.getpass("Please enter your watsonx.ai Project ID (hit enter): ")


try:
    connection_user = os.environ["SINGLESTORE_USER"]
except KeyError:
    connection_user = getpass.getpass("Please enter your SingleStore username (hit enter): ")


try:
    connection_password = os.environ["SINGLESTORE_PASS"]
except KeyError:
    connection_password = getpass.getpass("Please enter your SingleStore password (hit enter): ")


try:
    connection_port = os.environ["SINGLESTORE_PORT"]
except KeyError:
    database_name = input("Please enter your SingleStore database name (hit enter): ")


try:
    connection_host = os.environ["SINGLESTORE_HOST"]
except KeyError:
    database_name = input("Please enter your SingleStore database name (hit enter): ")


try:
    database_name = os.environ["SINGLESTORE_DATABASE"]
except KeyError:
    database_name = input("Please enter your SingleStore database name (hit enter): ")


try:
    table_name = os.environ["SINGLESTORE_TABLE"]
except KeyError:
    table_name = input("Please enter your SingleStore table name (hit enter): ")


query = "What did the president say about Ketanji Brown Jackson?"


from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods


parameters = {
    GenParams.DECODING_METHOD: DecodingMethods.GREEDY,
    GenParams.MIN_NEW_TOKENS: 1,
    GenParams.MAX_NEW_TOKENS: 100
}


model = Model(
    model_id=ModelTypes.GRANITE_13B_CHAT,
    params=parameters,
    credentials={
        "url": wxa_url,
        "apikey": wxa_api_key
    },
    project_id=wxa_project_id
)


from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
granite_llm_ibm = WatsonxLLM(model=model)


response = granite_llm_ibm(query)


print("Query: " + query)
print("Response: " + response)

Query: What did the president say about Ketanji Brown Jackson?
Response: 
The president said that Ketanji Brown Jackson is an “incredible judge” and that he is “proud” to have nominated her to the Supreme Court.<|endoftext|>


import wget

filename = './state_of_the_union.txt'
url = 'https://raw.github.com/IBM/watson-machine-learning-samples/master/cloud/data/foundation_models/state_of_the_union.txt'


if not os.path.isfile(filename):
    wget.download(url, out=filename)


from langchain.document_loaders import TextLoader

loader = TextLoader(filename)
documents = loader.load()


from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)


print("We split our document into " + str(len(texts)) + " chunks.")

We split our document into 42 chunks.


from langchain.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings()


from sqlalchemy import *


connection_url = f"singlestoredb://{connection_user}:{connection_password}@{connection_host}:{connection_port}"
engine = create_engine(connection_url)


with engine.connect() as conn:
    result = conn.execute(text("CREATE DATABASE IF NOT EXISTS " + database_name))


# Verify that the database was created
print("Available databases:")
with engine.connect() as conn:
    result = conn.execute(text("SHOW DATABASES"))
    for row in result:
        print(row)

Available databases:
('cluster',)
('information_schema',)
('memsql',)
('movie_recommender',)
('movie_recommender2',)
('resume_evaluator',)
('tpch_optimized',)
('watsonx_ibm',)


with engine.connect() as conn:
    result = conn.execute(text("DROP TABLE IF EXISTS " + database_name + "." + table_name))


# Connection string to use Langchain with SingleStoreDB
os.environ["SINGLESTOREDB_URL"] = f"{connection_user}:{connection_password}@{connection_host}:{connection_port}/{database_name}"


from langchain.vectorstores import SingleStoreDB
vectorstore = SingleStoreDB.from_documents(
        texts,
        embedding_model,
        table_name = table_name
)


with engine.connect() as conn:
    result = conn.execute(text("DESCRIBE " + database_name + "." + table_name))
    print(database_name + "." + table_name + " table schema:")
    for row in result:
        print(row)

    result = conn.execute(text("SELECT COUNT(vector) FROM " + database_name + "." + table_name))
    print("\nNumber of rows in " + database_name + "." + table_name + ": " + str(result.first()[0]))

watsonx_ibm.docs_embeddings table schema:
('content', 'text', 'YES', '', None, '')
('vector', 'blob', 'YES', '', None, '')
('metadata', 'JSON', 'YES', '', None, '')

Number of rows in watsonx_ibm.docs_embeddings: 42


texts_sim = vectorstore.similarity_search(query, k=5)


print("Number of relevant texts: " + str(len(texts_sim)))

Number of relevant texts: 5


print("First 100 characters of relevant texts.")
for i in range(len(texts_sim)):
        print("Text " + str(i+1) + ": " + str(texts_sim[i].page_content[0:100]))

First 100 characters of relevant texts.
Text 1: Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Ac
Text 2: A former top litigator in private practice. A former federal public defender. And from a family of p
Text 3: As Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accou
Text 4: And I’m taking robust action to make sure the pain of our sanctions  is targeted at Russia’s economy
Text 5: But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. 

Danielle says Heat


from langchain.chains.question_answering import load_qa_chain
chain = load_qa_chain(granite_llm_ibm, chain_type="stuff")
response = chain.run(input_documents=texts_sim, question=query)


print("Query: " + query)
print("Response:" + response)

Query: What did the president say about Ketanji Brown Jackson?
Response: The president said that Ketanji Brown Jackson is a consensus builder who will continue Justice Breyer's legacy of excellence.<|endoftext|>


from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=granite_llm_ibm, chain_type="stuff", retriever=vectorstore.as_retriever())
response = qa.run(query)


print("Query: " + query)
print("Response:" + response)

Query: What did the president say about Ketanji Brown Jackson?
Response: The president said that Ketanji Brown Jackson is a consensus builder who will continue Justice Breyer's legacy of excellence.<|endoftext|>

Overview¶

Audience¶

Retrieval-Augmented Generation¶

Notebook Outline¶

Setup and Configuration¶

Set Dev Settings¶

Install Packages¶

Import Utility Packages¶

Get Environment Variables and Keys¶

Get watsonx URL¶

Get watsonx API Key¶

Get watsonx Project ID¶

Get SingleStoreDB credentials and configuration¶

Define Query¶

Initialize Language Model¶

Perform Basic Completion¶

Get Data for Documents¶

Download Data¶

Load Documents¶

Split Documents¶

Initialize Embedding Model¶

Initialize Vector Store¶

Create a SingleStore SQLAlchemy Engine¶

Create Database for Embeddings if Doesn't Exist¶

Drop Table for Embedings if Exist¶

Instantiate SingleStoreDB in LangChain¶

Check Table¶

Perform Similiarity Search¶

Perform RAG Generation with Explicit Context Control¶

Perform RAG Generation with Q&A Chain¶