import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

%matplotlib inline


known_behaviors = pd.read_csv("https://raw.githubusercontent.com/vberaudi/utwt/master/known_behaviors2.csv")
known_behaviors.head()


a = known_behaviors[known_behaviors.Mortgage == 1]
b = known_behaviors[known_behaviors.Pension == 1]
c = known_behaviors[known_behaviors.Savings == 1]
print("Number of clients: %d" %len(known_behaviors))
print("Number of clients predicted to buy mortgage accounts: %d" %len(a))
print("Number of clients predicted to buy pension accounts: %d" %len(b))
print("Number of clients predicted to buy savings accounts: %d" %len(c))

Number of clients: 11023
Number of clients predicted to buy mortgage accounts: 1664
Number of clients predicted to buy pension accounts: 2456
Number of clients predicted to buy savings accounts: 4222


known_behaviors["nb_products"] = known_behaviors.Mortgage + known_behaviors.Pension + known_behaviors.Savings


abc = known_behaviors[known_behaviors.nb_products > 1]
print("You have %d clients who bought several products" %len(abc))
abc = known_behaviors[known_behaviors.nb_products == 3]
print("You have %d clients who bought all the products" %len(abc))

You have 1650 clients who bought several products
You have 123 clients who bought all the products


products = ["Savings", "Mortgage", "Pension"]


def plot_cloud_points(df):
    figure = plt.figure(figsize=(20, 5))
    my_cm  = ListedColormap(['#bb0000', '#00FF00'])
    axes = {p : ('age', 'income') if p != "Mortgage"else ('members_in_household', 'loan_accounts') for p in products}
    for product in products:
        ax = plt.subplot(1, len(products), products.index(product)+1)
        ax.set_title(product)
        axe = axes[product]
        plt.xlabel(axe[0])
        plt.ylabel(axe[1])
        ax.scatter(df[axe[0]], df[axe[1]], c=df[product], cmap=my_cm, alpha=0.5)


plot_cloud_points(known_behaviors)


known_behaviors.columns

Index(['customer_id', 'age', 'age_youngest_child', 'debt_equity', 'gender',
       'bad_payment', 'gold_card', 'pension_plan',
       'household_debt_to_equity_ratio', 'income', 'members_in_household',
       'months_current_account', 'months_customer', 'call_center_contacts',
       'loan_accounts', 'number_products', 'number_transactions',
       'non_worker_percentage', 'white_collar_percentage', 'rfm_score',
       'Mortgage', 'Pension', 'Savings', 'nb_products'],
      dtype='object')


cols = ['age', 'income', 'members_in_household', 'loan_accounts']


X = known_behaviors[cols]
ys = [known_behaviors[p] for p in products]


X.head()


from sklearn import svm
from sklearn import ensemble


classifiers = []
for i,p in enumerate(products):
    clf = ensemble.GradientBoostingClassifier()
    clf.fit(X, ys[i])
    classifiers.append(clf)


unknown_behaviors = pd.read_csv("https://raw.githubusercontent.com/vberaudi/utwt/master/unknown_behaviors.csv")


for c in unknown_behaviors.columns:
    assert c in known_behaviors.columns


to_predict = unknown_behaviors[cols]


print("Number of new customers: %d" %len(unknown_behaviors))

Number of new customers: 2756


import warnings
warnings.filterwarnings('ignore')


predicted = [classifiers[i].predict(to_predict) for i in range(len(products))]
for i,p in enumerate(products):
    to_predict[p] = predicted[i]
to_predict["id"] = unknown_behaviors["customer_id"]


offers = to_predict
offers.head()


plot_cloud_points(offers)


a = offers[offers.Mortgage == 1]
b = offers[offers.Pension == 1]
c = offers[offers.Savings == 1]
print("Number of new customers: %d" %len(offers))
print("Number of customers predicted to buy mortgages: %d" %len(a))
print("Number of customers predicted to buy pensions: %d" %len(b))
print("Number of customers predicted to buy savings: %d" %len(c))

Number of new customers: 2756
Number of customers predicted to buy mortgages: 380
Number of customers predicted to buy pensions: 142
Number of customers predicted to buy savings: 713


to_predict["nb_products"] = to_predict.Mortgage + to_predict.Pension + to_predict.Savings

abc = to_predict[to_predict.nb_products > 1]
print("It's predicted that %d clients would buy more than one product" %len(abc))
abc = to_predict[to_predict.nb_products == 3]
print("It's predicted that %d clients would buy all three products" %len(abc))

It's predicted that 112 clients would buy more than one product
It's predicted that 0 clients would buy all three products


offers.reset_index(inplace=True)


# How much revenue is earned when selling each product
productValue = [200, 300, 400]
value_per_product = {products[i] : productValue[i] for i in range(len(products))}

# Total available budget
availableBudget = 25000

# For each channel, cost of making a marketing action and success factor
channels =  pd.DataFrame(data=[("gift", 20.0, 0.20), 
                               ("newsletter", 15.0, 0.05), 
                               ("seminar", 23.0, 0.30)], columns=["name", "cost", "factor"])

offersR = range(0, len(offers))
productsR = range(0, len(products))
channelsR = range(0, len(channels))


gsol = pd.DataFrame()
gsol['id'] = offers['id']

budget = 0
revenue = 0

for product in products:
    gsol[product] = 0
   
noffers = len(offers)

# ensure the 10% per channel by choosing the most promising per channel
for c in channelsR: #, channel in channels.iterrows():
    i = 0;
    while (i< ( noffers // 10 ) ):
        # find a possible offer in this channel for a customer not yet done
        added = False
        for o  in offersR:
            already = False
            for product in products:   
                if gsol.at[o, product] == 1:
                    already = True
                    break
            if already:
                continue
            possible = False
            possibleProduct = None
            for product in products:
                if offers.at[o, product] == 1:
                    possible = True
                    possibleProduct = product
                    break
            if not possible:
                continue
            #print "Assigning customer ", offers.at[o, "id"], " with product ", product, " and channel ", channel['name']
            gsol.at[o, possibleProduct]=1
            i = i+1
            added = True
            budget = budget + channels.at[c, "cost"]
            revenue = revenue + channels.at[c, "factor"]*value_per_product[product]            
            break
        if not added:
            print("NOT FEASIBLE")
            break


# add more to complete budget       
while (True):
    added = False
    for c, channel in channels.iterrows():
        if (budget + channel.cost > availableBudget):
            continue
        # find a possible offer in this channel for a customer not yet done
        for o  in offersR:
            already = False
            for product in products:   
                if gsol.at[o, product] == 1:
                    already = True
                    break
            if already:
                continue
            possible = False
            possibleProduct = None
            for product in products:
                if offers.at[o, product] == 1:
                    possible = True
                    possibleProduct = product
                    break
            if not possible:
                continue
            #print "Assigning customer ", offers.get_value(index=o, col="id"), " with product ", product, " and channel ", channel['name']
            gsol.at[o, possibleProduct]=1
            i = i+1
            added = True
            budget = budget + channel.cost
            revenue = revenue + channel.factor*value_per_product[product]            
            break
    if not added:
        print("FINISH BUDGET")
        break
    
print(gsol.head())

FINISH BUDGET
      id  Savings  Mortgage  Pension
0  44256        0         0        0
1  46883        0         0        0
2  32387        0         0        0
3  25504        0         0        0
4  35979        0         1        0


a = gsol[gsol.Mortgage == 1]
b = gsol[gsol.Pension == 1]
c = gsol[gsol.Savings == 1]

abc = gsol[(gsol.Mortgage == 1) | (gsol.Pension == 1) | (gsol.Savings == 1)]

print("Number of clients: %d" %len(abc))
print("Numbers of Mortgage offers: %d" %len(a))
print("Numbers of Pension offers: %d" %len(b))
print("Numbers of Savings offers: %d" %len(c))
print("Total Budget Spent: %d" %budget)
print("Total revenue: %d" %revenue)

Number of clients: 1123
Numbers of Mortgage offers: 299
Numbers of Pension offers: 111
Numbers of Savings offers: 713
Total Budget Spent: 21712
Total revenue: 50800


import sys
import docplex.mp


from docplex.mp.model import Model

mdl = Model(name="marketing_campaign", checker='on')


channelVars = mdl.binary_var_cube(offersR, productsR, channelsR)


# At most 1 product is offered to each customer
mdl.add_constraints( mdl.sum(channelVars[o,p,c] for p in productsR for c in channelsR) <=1
                   for o in offersR)

# Do not exceed the budget
mdl.add_constraint( mdl.sum(channelVars[o,p,c]*channels.at[c, "cost"] 
                                           for o in offersR 
                                           for p in productsR 
                                           for c in channelsR)  <= availableBudget, "budget")  

# At least 10% offers per channel
for c in channelsR:
    mdl.add_constraint(mdl.sum(channelVars[o,p,c] for p in productsR for o in offersR) >= len(offers) // 10)

mdl.print_information()

Model: marketing_campaign
 - number of variables: 24804
   - binary=24804, integer=0, continuous=0
 - number of constraints: 2760
   - linear=2760
 - parameters: defaults
 - objective: none
 - problem type is: MILP


obj = 0

for c in channelsR:
    for p in productsR:
        product=products[p]
        coef = channels.at[c, "factor"] * value_per_product[product]
        obj += mdl.sum(channelVars[o,p,c] * coef* offers.at[o, product] for o in offersR)

mdl.maximize(obj)


mdl.parameters.timelimit = 30


s = mdl.solve()
assert s, "No Solution !!!"


print(mdl.get_solve_status())
print(mdl.get_solve_details())

JobSolveStatus.OPTIMAL_SOLUTION
status  = integer optimal solution
time    = 0.245473 s.
problem = MILP
gap     = 0%


totaloffers = mdl.sum(channelVars[o,p,c] 
                      for o in offersR
                      for p in productsR 
                      for c in channelsR)
mdl.add_kpi(totaloffers, "nb_offers")

budgetSpent = mdl.sum(channelVars[o,p,c]*channels.at[c, "cost"] 
                                           for o in offersR 
                                           for p in productsR 
                                           for c in channelsR)
mdl.add_kpi(budgetSpent, "budgetSpent")

for c in channelsR:
    channel = channels.at[c, "name"]
    kpi = mdl.sum(channelVars[o,p,c] for p in productsR for o in offersR)
    mdl.add_kpi(kpi, channel)

for p in productsR:
    product = products[p]
    kpi = mdl.sum(channelVars[o,p,c] for c in channelsR for o in offersR)
    mdl.add_kpi(kpi, product)


mdl.report()

* model marketing_campaign solved with objective = 72620.000
*  KPI: nb_offers   = 1218.000
*  KPI: budgetSpent = 24989.000
*  KPI: gift        = 275.000
*  KPI: newsletter  = 275.000
*  KPI: seminar     = 668.000
*  KPI: Savings     = 690.000
*  KPI: Mortgage    = 381.000
*  KPI: Pension     = 147.000


#get the hand on the budget constraint
ct = mdl.get_constraint_by_name("budget")


res = []
for i in range(20):
    ct.rhs = availableBudget+1000*i
    s = mdl.solve()
    assert s, "No Solution !!!"
    res.append((availableBudget+1000*i, mdl.objective_value, mdl.kpi_value_by_name("nb_offers"), mdl.kpi_value_by_name("budgetSpent")))


mdl.report()

* model marketing_campaign solved with objective = 87120.000
*  KPI: nb_offers   = 1677.000
*  KPI: budgetSpent = 35546.000
*  KPI: gift        = 275.000
*  KPI: newsletter  = 275.000
*  KPI: seminar     = 1127.000
*  KPI: Savings     = 1108.000
*  KPI: Mortgage    = 416.000
*  KPI: Pension     = 153.000


pd.DataFrame(res, columns=["budget", "revenue", "nb_offers", "budgetSpent"])


ct.rhs = 0


s = mdl.solve()


if not s:
    #rename the constraint with a "low" prefix to automatically put a low priority on it.
    ct.name = "low_budget"
    #setting all bool vars to 0 is an easy relaxation, so let's refuse it and force to offer something to 1/3 of the clients
    mdl.add_constraint(totaloffers >= len(offers)//20, ctname="high")
    
    # solve has failed, trying relaxation, based on constraint names
    # constraints are prioritized according to their names
    # if a name contains "low", it has priority LOW
    # if a ct name contains "medium" it has priority MEDIUM
    # same for HIGH
    # if a constraint has no name or does not match any, it is not relaxable.
    from docplex.mp.relaxer import Relaxer
    relaxer = Relaxer(prioritizer='match', verbose=True)

    relaxed_sol = relaxer.relax(mdl)
    relaxed_ok = relaxed_sol is not None
    assert relaxed_ok, "relaxation failed"
    relaxer.print_information()

Warning: 2759 constraint(s) will not be relaxed (e.g.: x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 <= 1)
* number of relaxations: 1
 - relaxed: low_budget, with relaxation: 15950.0000002
* total absolute relaxation: 15950.0000002


mdl.report()
print(mdl.get_solve_status())
print(mdl.get_solve_details())

* model marketing_campaign solved with objective = 47580.000
*  KPI: nb_offers   = 825.000
*  KPI: budgetSpent = 15950.000
*  KPI: gift        = 275.000
*  KPI: newsletter  = 275.000
*  KPI: seminar     = 275.000
*  KPI: Savings     = 309.000
*  KPI: Mortgage    = 374.000
*  KPI: Pension     = 142.000
JobSolveStatus.INFEASIBLE_SOLUTION
status  = optimal relaxed sum of infeasibilities
time    = 0.375536 s.
problem = MILP
gap     = 0%

	budget	revenue	nb_offers	budgetSpent
0	25000	72620.0	1218.0	24989.0
1	26000	74800.0	1262.0	25998.0
2	27000	76970.0	1305.0	26990.0
3	28000	79150.0	1349.0	27999.0
4	29000	81320.0	1392.0	28991.0
5	30000	82360.0	1436.0	30000.0
6	31000	83240.0	1479.0	30992.0
7	32000	84100.0	1522.0	31981.0
8	33000	84980.0	1566.0	32993.0
9	34000	85840.0	1609.0	33982.0
10	35000	86720.0	1653.0	34994.0
11	36000	87120.0	1677.0	35546.0
12	37000	87120.0	1677.0	35546.0
13	38000	87120.0	1677.0	35546.0
14	39000	87120.0	1677.0	35546.0
15	40000	87120.0	1677.0	35546.0
16	41000	87120.0	1677.0	35546.0
17	42000	87120.0	1677.0	35546.0
18	43000	87120.0	1677.0	35546.0
19	44000	87120.0	1677.0	35546.0

Scenario	Budget	Revenue	Number of clients	Mortgage offers	Pension offers	Savings offers
Standard	25000	72600	1218	381	117	691
Minimum	16000	47500	825	374	142	309
Maximum	35500	87000	1680	406	155	1119

Promoting financial products to bank customers¶

Predictive and prescriptive workflow¶

How Decision Optimization can help¶

Understand the historical data¶

Load the historical customer data with their purchases (Mortgage, Savings, and Pension).¶

Check the 2016 customers¶

Provide some visual analysis of the historical data¶

Understanding the 2016 customers¶

Predict the 2017 customer behavior¶

Create and train a simple machine-learning algorithm to predict what the new clients will buy.¶

New customer data and predictions¶

Load new customer data¶

Predict behaviors of the new customers¶

Package new data with predictions for optimization¶

Perform some visual analysis of the predicted data¶

Remarks on the prediction¶

Get business decisions for the 2017 data¶

Assign campaigns to customers¶

Some input data for optimization¶

Using a greedy algorithm¶

Using IBM Decision Optimization CPLEX Modeling for Python¶

Set up the prescriptive model¶

Define the decision variables¶

Set up the constraints¶

Express the objective¶

Solve with the Decision Optimization solve service¶

Analyze the solution¶

What if our budget is increased?¶

Dealing with infeasibility¶

Conclusion¶

Algorithm comparison¶

Marketing campaign analysis¶

Summary¶

References¶

Authors¶

	customer_id	age	age_youngest_child	debt_equity	gender	household_debt_to_equity_ratio	income	...	call_center_contacts	loan_accounts	number_products	number_transactions	non_worker_percentage	white_collar_percentage	rfm_score	Pension
0	15	45	12	45	0	65	13453	...	0	4	2	1	14	19	7.602	0
1	16	43	12	43	0	65	13453	...	0	0	3	2	14	19	10.143	0
2	30	23	0	23	0	65	13453	...	0	1	0	0	14	19	0.000	0
3	42	35	8	35	1	65	13453	...	0	1	0	0	14	19	0.000	1
4	52	43	12	43	1	47	14124	...	3	1	0	0	16	35	0.000	1

	age	income	members_in_household	loan_accounts	Mortgage	id
0	38	47958.0	4	1	0	44256
1	30	48606.0	2	4	0	46883
2	41	42152.0	4	0	0	32387
3	42	39788.0	3	3	0	25504
4	42	44365.0	6	2	1	35979