import os
import json
import pandas as pd
# we want to show large text snippets to be able to explore the relevant text
pd.options.display.max_colwidth = 400


import watson_nlp


gpu_available = False
try:
    hw_spec = os.environ['RUNTIME_HARDWARE_SPEC']
    if 'num_gpu' in json.loads(hw_spec):
        gpu_available = True
except:
    pass


url = "https://www.consumerfinance.gov/data-research/consumer-complaints/search/api/v1/?date_received_max=2021-03-30&date_received_min=2021-02-28&field=all&format=csv&has_narrative=true&no_aggs=true&size=18102"


df_all = pd.read_csv(url)
text_col = 'Consumer complaint narrative'

# In this example, we take only the first 1000 complaints in the dataset for further analysis. 
# Set df to df_all to run on the complete dataset.
df_small = df_all.head(1000)
df = df_small
df.head(3)


df['Product'].value_counts().sort_values().plot(kind='barh')

<Axes: >


import os
RBR_dicts_folder = "RBR_dicts"
os.makedirs(RBR_dicts_folder, exist_ok=True)


fraud_file = "fraud_mappings.csv"
with open(os.path.join(RBR_dicts_folder, fraud_file), 'w') as dict:
    dict.write("\"label\", \"entry\"\n")
    dict.write("\"FRAUD\", \"fraud\"\n")
    dict.write("\"FRAUD\", \"takeover\"\n")
    dict.write("\"FRAUD\", \"skimming\"\n")
    dict.write("\"FRAUD\", \"phishing\"\n")
    dict.write("\"THEFT\", \"theft\"\n")
    dict.write("\"THEFT\", \"thief\"\n")
    dict.write("\"THEFT\", \"thieve\"\n")
    dict.write("\"THEFT\", \"steal\"\n")


from watson_nlp.toolkit.rule_utils import DictionaryConfig

dictionary = DictionaryConfig.load( 
    {
        'name': 'fraud_mappings',
        'source': fraud_file,
        'dict_type': 'table',
        'case': 'insensitive',
        'lemma': True,
        'mappings': {
            'columns': ['label', 'entry'],
            'entry': 'entry'
        }
    }
)


trained_dict_RBR = watson_nlp.resources.feature_extractor.RBR.train(RBR_dicts_folder, language='en', dictionaries=[dictionary,])


def extract_dictionary(complaint_text):
    RBR_result = trained_dict_RBR.executor.get_raw_response(complaint_text, language='en')
    # aggregate all matches into one array with the match label and the text evidence
    matches = []
    for view in RBR_result['annotations'].keys():
        for dict_match in RBR_result['annotations'][view]:
            matches.append(tuple((dict_match['label'], dict_match['match']['text'])))
    return matches


# run dictionary extraction and create a dataframe holding the results
dict_matches = df[text_col].apply(lambda text: extract_dictionary(text))
dict_matches_df = pd.DataFrame(dict_matches)
dict_matches_df.rename(inplace=True, columns={text_col:'Fraud Indicators'})
# combine with the complaint dataframe
text_dict_matches_df = df[["Product", text_col]].merge(dict_matches_df, how='left', left_index=True, right_index=True)
text_dict_matches_df.head(10)


# create a column indicating if the complaint contains at least one *FRAUD* or *THEFT* match
text_dict_matches_df['THEFT'] = text_dict_matches_df['Fraud Indicators'].apply(lambda matches: 'THEFT' in set([match[0] for match in matches]))
text_dict_matches_df['FRAUD'] = text_dict_matches_df['Fraud Indicators'].apply(lambda matches: 'FRAUD' in set([match[0] for match in matches]))
text_dict_matches_df['BOTH'] = text_dict_matches_df['Fraud Indicators'].apply(lambda matches: 'FRAUD' in set([match[0] for match in matches]) and 'THEFT' in set([match[0] for match in matches]))

# get the relative frequency by product group
perc_theft = text_dict_matches_df.groupby('Product')['THEFT'].value_counts(normalize=True)
perc_fraud = text_dict_matches_df.groupby('Product')['FRAUD'].value_counts(normalize=True)
perc_both = text_dict_matches_df.groupby('Product')['BOTH'].value_counts(normalize=True)
# combine the values into one dataframe
res = pd.concat([perc_theft, perc_fraud, perc_both], axis=1).reset_index().rename(columns={"level_1": "is_contained"})
# only use the counts for rows indicating a match
res = res[res['is_contained']].drop('is_contained', axis=1).set_index('Product')
res.plot.barh().legend(loc='center left',bbox_to_anchor=(1.0, 0.5))

<matplotlib.legend.Legend at 0x7f01fd91ecb0>


if gpu_available:
    entity_transformer_model = watson_nlp.load('entity-mentions_transformer-workflow_multilingual_slate.153m.distilled')
else:
    entity_transformer_model = watson_nlp.load('entity-mentions_transformer-workflow_multilingual_slate.153m.distilled-cpu')


def extract_entities(complaint_text):
    entity_mentions = entity_transformer_model.run(complaint_text, 'en')
    # get text and type of entities
    entities = entity_mentions.get_mention_pairs()
    return entities


from tqdm.notebook import tqdm
tqdm.pandas(colour='green')


# run entity extraction and create a dataframe holding the results
entities = df[text_col].progress_apply(lambda text: extract_entities(text))
entities_df = pd.DataFrame(entities)
entities_df.rename(inplace=True, columns={text_col:'Entities'})
# combine with our complaint dataframe
text_entities_df = df[["Product", text_col]].merge(entities_df, how='left', left_index=True, right_index=True)
text_entities_df.head()

  0%|          | 0/1000 [00:00<?, ?it/s]


# helper method to extract the mention type from the (text, type) tuple
def get_entity_type(ent):
    if not pd.isna(ent):
        return ent[1]
    return 'None'

# create separate rows for each entity mention
exp_entities = text_entities_df.explode('Entities')
# extract the entity type of each mention
exp_entities['Entity Type'] = exp_entities['Entities'].apply(lambda ent: get_entity_type(ent))
# count the number of occurrences for each entity type
exp_entities['Entity Type'].value_counts().head(20).sort_values().plot(kind='barh')

<Axes: >


unstacked = exp_entities.groupby('Product')['Entity Type'].value_counts(normalize=True).unstack()
unstacked.plot.barh(stacked=True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))

<matplotlib.legend.Legend at 0x7f01f450ac80>


exp_entities.query('`Entity Type` == "JobTitle" and Product == "Vehicle loan or lease"').head()

	Date received	Product	Sub-product	Issue	Sub-issue	Consumer complaint narrative	Company public response	Company	State	ZIP code	Tags	Consumer consent provided?	Submitted via	Date sent to company	Company response to consumer	Timely response?	Consumer disputed?	Complaint ID
0	03/19/21	Debt collection	Other debt	Attempts to collect debt not owed	Debt is not yours	ALDOUS is claiming that I owe them {$910.00} but I didn't recognize this debt. Please Remove this inaccurate collection from my report.	Company believes it acted appropriately as authorized by contract or law	Aldous & Associates, PLLC	TX	77044	None	Consent provided	Web	03/19/21	Closed with explanation	Yes	NaN	4230577
1	03/26/21	Credit reporting, credit repair services, or other personal consumer reports	Credit reporting	Improper use of your report	Credit inquiries on your report that you don't recognize	There were over 20 inquires place on my credit report from XX/XX/2021 up until the current date. I have called Transunion about this issue asking to have them removed as I only authorized 1 credit check from XXXX XXXX as that's who financed my car loan. They refused to remove these inquires as they state I have to contact these agencies myself. I contacted them and they all stated they can not...	Company has responded to the consumer and the CFPB and chooses not to provide a public response	TRANSUNION INTERMEDIATE HOLDINGS, INC.	WI	53209	None	Consent provided	Web	03/26/21	Closed with explanation	Yes	NaN	4246597
2	03/17/21	Credit reporting, credit repair services, or other personal consumer reports	Credit reporting	Incorrect information on your report	Old information reappears or never goes away	I have tried to fix the names and addresses on my credit. Transunion and XXXX and XXXX won't delete these wrong names and addeeas off my credit report. \n\nCan someone please help me remove these wrong addresses and alias names that are not me from my report	Company has responded to the consumer and the CFPB and chooses not to provide a public response	TRANSUNION INTERMEDIATE HOLDINGS, INC.	TX	XXXXX	None	Consent provided	Web	03/17/21	Closed with explanation	Yes	NaN	4222711

	Product	Consumer complaint narrative	Fraud Indicators
0	Debt collection	ALDOUS is claiming that I owe them {$910.00} but I didn't recognize this debt. Please Remove this inaccurate collection from my report.	[]
1	Credit reporting, credit repair services, or other personal consumer reports	There were over 20 inquires place on my credit report from XX/XX/2021 up until the current date. I have called Transunion about this issue asking to have them removed as I only authorized 1 credit check from XXXX XXXX as that's who financed my car loan. They refused to remove these inquires as they state I have to contact these agencies myself. I contacted them and they all stated they can not...	[(FRAUD, fraud)]
2	Credit reporting, credit repair services, or other personal consumer reports	I have tried to fix the names and addresses on my credit. Transunion and XXXX and XXXX won't delete these wrong names and addeeas off my credit report. \n\nCan someone please help me remove these wrong addresses and alias names that are not me from my report	[]
3	Credit reporting, credit repair services, or other personal consumer reports	This is a medical collection per collection company, When i reached out to them i asked them to validate and verify that this was my account, give me signatures with me agreeing to pay them, information stating i owed them, procedures done, Hippa release form from orginal creditor for me allowing them to have my information, They will not supply me with any information and the credit bureaus k...	[]
4	Debt collection	To whom it may concern, Account # XXXX No. XXXX XXXX XXXX I have been getting harass and threatened to be sued by Blitt and Gaines P.C Attorney at Law about a debt that I have asked to be validated multiple times which they have come back with bank statement in which under the fair debt collection practices is not enough. \n\nIn addition I also checked my credit reports and this account number...	[]
5	Mortgage	I WAS NEVER LATE ON THIS ACCOUNT. I WAS IN A PANDEMIC RELIEF PLAN BUT THIS COMPANY MARKED ME 30 DAYS LATE FOR XX/XX/2021. I HAVE ATTACHED A SCREENSHOT OF THE PANDEMIC RELEIF PLAN DIRECTLY FROM THE COMPANY 'S WEBSITE.	[]
6	Debt collection	XXXX XXXX XXXX XXXX XXXX XXXX XXXX, MI XXXX Social Security # XXXX DOB : XX/XX/XXXX XXXX XXXX XXXX, XXXX XXXX XXXX XXXX, XXXX, Texas XXXX XXXX XXXX XXXX XXXX, XXXX XXXX. XXXX XXXXXXXX, XXXX, GA XXXX XXXX XXXX XXXX, XXXX XXXX. XXXX XXXXXXXX, XXXX, PA XXXX DISCLOSURE : THIS IS NOT AN IDENTITY THEFT DISPUTE, PLEASE REFRAIN FROM TAKING ANY POSITION OF IDENTITY THEFT EITHER WITH ANY CREDIT REPORT...	[(THEFT, THEFT), (THEFT, THEFT), (THEFT, theft), (FRAUD, fraud), (THEFT, THEFT), (THEFT, THEFT)]
7	Credit reporting, credit repair services, or other personal consumer reports	Ive sent Equifax countless letters on XXXX, XX/XX/2020. regarding this account that is on my consumer credit report which is a deformation of my character! This account goes against FCRA section 623. All negative information on your credit report must have information so you can dispute it but it dont so this is a violate also How can you send a debt validation if you cant find who report it?	[]
8	Checking or savings account	I bank with regions and I was looking to rent a unit and set up a payment for XXXX dollars to secure the lease but after I made the payment and it was put on hold I found the woman did not have the keys I was being scammed probably so I immediately called regions to cancel the on hold payment which had not been taken from my account yet. The first woman I spoke to said she would have to verify...	[]
9	Credit reporting, credit repair services, or other personal consumer reports	I have filed a dispute in regards to the incorrect items on my credit report. It has been well over 30 days and I haven't received any investigation results.	[]

	Product	Consumer complaint narrative	Entities
0	Debt collection	ALDOUS is claiming that I owe them {$910.00} but I didn't recognize this debt. Please Remove this inaccurate collection from my report.	[(ALDOUS, Organization), ($910.00, Money)]
1	Credit reporting, credit repair services, or other personal consumer reports	There were over 20 inquires place on my credit report from XX/XX/2021 up until the current date. I have called Transunion about this issue asking to have them removed as I only authorized 1 credit check from XXXX XXXX as that's who financed my car loan. They refused to remove these inquires as they state I have to contact these agencies myself. I contacted them and they all stated they can not...	[(20, Number), (XX/XX/2021, Date), (Transunion, Organization), (1, Number), (XXXX, Organization), (XXXX, Person), (Transunion, Organization)]
2	Credit reporting, credit repair services, or other personal consumer reports	I have tried to fix the names and addresses on my credit. Transunion and XXXX and XXXX won't delete these wrong names and addeeas off my credit report. \n\nCan someone please help me remove these wrong addresses and alias names that are not me from my report	[(Transunion, Organization), (XXXX, Organization), (XXXX, Organization)]
3	Credit reporting, credit repair services, or other personal consumer reports	This is a medical collection per collection company, When i reached out to them i asked them to validate and verify that this was my account, give me signatures with me agreeing to pay them, information stating i owed them, procedures done, Hippa release form from orginal creditor for me allowing them to have my information, They will not supply me with any information and the credit bureaus k...	[(Hippa, Person), (creditor, JobTitle), (two, Number)]
4	Debt collection	To whom it may concern, Account # XXXX No. XXXX XXXX XXXX I have been getting harass and threatened to be sued by Blitt and Gaines P.C Attorney at Law about a debt that I have asked to be validated multiple times which they have come back with bank statement in which under the fair debt collection practices is not enough. \n\nIn addition I also checked my credit reports and this account number...	[(Blitt, Person), (Gaines, Organization), (P.C Attorney, JobTitle)]

	Product	Consumer complaint narrative	Entities	Entity Type
46	Vehicle loan or lease	I purchased a car was targetted to pay a insanely high rate after trading in my vehicle to purchase a new XXXX XXXX. The engine went bad and other things occurred during the time I had it which coated thousands. I've said XXXX on a XXXX car. they targetted me on a bad loan, called me none stop, lied and charged me more. This has destroyed my credit I can't find a good lawyer. I have zero paper...	(lawyer, JobTitle)	JobTitle
344	Vehicle loan or lease	I was looking through my report and noticed a vehicle that I returned to the dealership where it's reporting completely incorrect. I had the opportunity to talk to several lawmakers and friends, and learned some basic laws in regards to voluntary or repossession of a vehicle. Under the laws of MASS and UCC 9.506 as well as State RISA and MVISA statutes, a deficiency can not be claimed unless a...	(lawmakers, JobTitle)	JobTitle
377	Vehicle loan or lease	On XXXX XXXX I signed a " promissory note '' to get a " loan '' from GM Financial. However was not disclosed full details of the consumer credit transaction and how the transaction would be fulfilled. In addition there was no consideration given for the promissory note that was recorded on the companys books. The vehicle was also repossessed which is illegal as the the property was not the com...	(Legal, JobTitle)	JobTitle
463	Vehicle loan or lease	( I apologize for the format of this statement. The dates/times will be paraphrased. The paraphrased remarks will then be elaborated on in a paragraph below. \n\nXX/XX/XXXX - received letters from Kia financial regarding outstanding balance that needed to be paid before repossession. Also received a separate repossession letter. Contacted by Kia - set up a payment arrangement to pay and contin...	(rep, JobTitle)	JobTitle
463	Vehicle loan or lease	( I apologize for the format of this statement. The dates/times will be paraphrased. The paraphrased remarks will then be elaborated on in a paragraph below. \n\nXX/XX/XXXX - received letters from Kia financial regarding outstanding balance that needed to be paid before repossession. Also received a separate repossession letter. Contacted by Kia - set up a payment arrangement to pay and contin...	(sheriff, JobTitle)	JobTitle

Entity extraction on Financial Complaints with Watson NLP¶

What you'll learn in this notebook¶

Table of Contents¶

Before you start¶

Load the complaints¶

Custom term extraction using dictionaries¶

Entity extraction¶

Summary¶

Authors¶