In this project, I did Sales Conversion Optimization by analyzing Facebook ad campaigns Prediction using Random Forest Regressor

Aim: To optimize Sales conversion and predict future sales.

Approach: Exploratory Data Analysis using matplotlib and seaborn and model training using

Random Forest Regressor.

Importing Libraries

import numpy as np
import pandas as pd
import os

df=pd.read_csv("File/path.csv")

df.head()

df.info() #Checking for null values

# Doing Exploratory Data Analysis

df.shape

(1143, 11)


df.describe()



import matplotlib.pyplot as plt
import seaborn as sns

Correlation Matrix :

import matplotlib.pyplot as plt
import seaborn as sns

g=sns.heatmap(df[["Impressions","Clicks","Spent","Total_Conversion",
"Approved_Conversion"]].corr(),annot=True ,fmt=".2f", cmap="coolwarm")


# Here above it's clear, "Impressions" and "Total_Conversion" are more correlated with 
#"Approved_Conversion" than "Clicks" and "Spent".

df["abc_campaign_id"].unique()
array([ 916,  936, 1178], dtype=int64)

# Here, we see there are 3 different ad campaigns for abc company.
Now we'll replace their names with campaign_a, campaign_b and campaign_c 
for better visualisation which creates problem with integer values
df.head()
df["abc_campaign_id"].replace({916:"campaign_a",936:"campaign_b"
,1178:"campaign_c"}, inplace=True)
df.head()
# count plot on single categorical variable 
sns.countplot(x ='abc_campaign_id', data = df) 

# Show the plot 
plt.show() 

#This shows campaign_c has most number of ads.
#Approved_Conversion
# Creating our bar plot

plt.bar(df["abc_campaign_id"], df["Approved_Conversion"])
plt.ylabel("Approved_Conversion")
plt.title("company vs Approved_Conversion")
plt.show()


#It's clear from both the above graphs that compaign_c has better Approved_conversion count, 
#i.e. most people bought products in campaign_c.
#checking L the distribution with age.

# count plot on single categorical variable 
sns.countplot(x ='age', data = df) 
# Show the plot 
plt.show() 


import seaborn as sns
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
sns.barplot(x=df["abc_campaign_id"], y=df["Approved_Conversion"], hue=df["age"], data=tips)




#It's interesting to note that in campaign_c and campaign_b, the age group of 30-34 shows more interest, 
#whereas in campaign_a the age group of 40-44 shows more interest.

# count plot on single categorical variable 

sns.countplot(x ='gender', data = df) 
# Show the plot 
plt.show() 

import seaborn as sns
sns.set(style="whitegrid")
tips = sns.load_dataset("tips")
sns.barplot(x=df["abc_campaign_id"], y=df["Approved_Conversion"], hue=df["gender"], data=tips)

#here we see Both the genders shows similar interests in all three campaigns.

# count plot on single categorical variable 
fig_dims = (15,6)
fig, ax = plt.subplots(figsize=fig_dims)
sns.countplot(x ='interest', data = df) 
# Show the plot 
plt.show()


plt.scatter(df["interest"], df["Approved_Conversion"])
plt.title("interest vs. Approved_Conversion")
plt.xlabel("interest")
plt.ylabel("Approved_Conversion")
plt.show()


#It's interesting to note that, although the count of interest after 100 is less,there is a rise of users after 100 who actually bought the product. 
#Rest of the distribution is according to what was expected.

g = sns.FacetGrid(df, col="gender")
g.map(plt.scatter, "interest", "Approved_Conversion", alpha=.4)
g.add_legend();

g = sns.FacetGrid(df, col="age")
g.map(plt.scatter, "interest", "Approved_Conversion", alpha=.4)
g.add_legend();



plt.hist(df['Spent'], bins = 25)
plt.xlabel("Spent")
plt.ylabel("Frequency")
plt.show()


plt.scatter(df["Spent"], df["Approved_Conversion"])
plt.title("Spent vs. Approved_Conversion")
plt.xlabel("Spent")
plt.ylabel("Approved_Conversion")
plt.show()

#We can see, as the amount of money spent increases, no of product bought increases.

g = sns.FacetGrid(df, col="gender")
g.map(plt.scatter, "Spent", "Approved_Conversion", alpha=.4)
g.add_legend();


g = sns.FacetGrid(df, col="age")
g.map(plt.scatter, "Spent", "Approved_Conversion", alpha=.4)
g.add_legend();



plt.hist(df['Impressions'], bins = 25)
plt.xlabel("Impressions")
plt.ylabel("Frequency")
plt.show()


plt.scatter(df["Impressions"], df["Approved_Conversion"])
plt.title("Impressions vs. Approved_Conversion")
plt.xlabel("Impressions")
plt.ylabel("Approved_Conversion")
plt.show()


#There is a sudden rise in Approved conversions after a certain point in Impressions.
#People who actually bought the product

#After Clicking the ad ?


g = sns.FacetGrid(df, col="gender")
g.map(plt.scatter, "Clicks", "Approved_Conversion", alpha=.4)
g.add_legend();



#People in age group 30-34 has more tendency to buy product after clicking the add.
#After enquiring the product?
#Let's see people who actually went from enquiring to buying the product.

Impressions
plt.hist(df['Impressions'], bins = 25)
plt.xlabel("Impressions")
plt.ylabel("Frequency")
plt.show()
plt.scatter(df["Impressions"], df["Approved_Conversion"])
plt.title("Impressions vs. Approved_Conversion")
plt.xlabel("Impressions")
plt.ylabel("Approved_Conversion")
plt.show()

Here we can see that There is a sudden rise in Approved conversions after a certain point in Impressions.
in the People who actually bought the product

Now we analyse #After Clicking the ad ?
g = sns.FacetGrid(df, col="gender")
g.map(plt.scatter, "Clicks", "Approved_Conversion", alpha=.4)
g.add_legend();

HERE It seems men tend to click more than women but
women buy more products than men after clicking the add.
g = sns.FacetGrid(df, col="age")
g.map(plt.scatter, "Clicks", "Approved_Conversion", alpha=.4)
g.add_legend();

#People in age group 30-34 has more tendency to buy product after clicking the add.
#After enquiring the product?
#Let's see people who actually went from enquiring to buying the product.
g = sns.FacetGrid(df, col="gender")
g.map(plt.scatter, "Total_Conversion", "Approved_Conversion", alpha=.4)
g.add_legend();
HERE It seems men tend to click more than women but women 
buy more products than men after clicking the add.
g = sns.FacetGrid(df, col="age")
g.map(plt.scatter, "Total_Conversion", "Approved_Conversion",alpha=.5)
g.add_legend()

#It seems people in age group 30-34 are more likely to buy 
the product after enquiring the product.
#Zooming into campaign_c(campaign with most approved_conversion)

a=[]
b=[]
c=[]
for i,j,k in zip(df.abc_campaign_id, df.fb_campaign_id, df.Approved_Conversion):
    if i=="campaign_c":
      a.append(i),b.append(j),c.append(k)

d={'campaign_name':a, 'fb_campaign_id':b, 'Approved_Conversion':c}     
campaign_c=pd.DataFrame(d)
campaign_c.head()

#Distribution of fb_campaign_id with Approved_Conversion for campaign_c

plt.figure(figsize=(20,5))
plt.scatter(campaign_c["fb_campaign_id"], campaign_c["Approved_Conversion"])
plt.title("fb_campaign_id vs. Approved_Conversion for campaign_c")
plt.xlabel("fb_campaign_id")
plt.ylabel("Approved_Conversion")
plt.show()



#We can see fb_campaign_ids around 145000 have more Approved_Conversion than around 180000 for campaign_c

OBSERVATIONS 
#"Impressions" and "Total_Conversion" are more correlated with "Approved_Conversion" than "Clicks" and "Spent".
#Campaign_C:

#campaign_c has most number of ads.
#compaign_c has better Approved_conversion count, i.e. most people bought products in campaign_c.age_group:

#In campaign_c and campaign_b, the age group of 30-34 shows more interest, whereas in campaign_a the age group of 40-44 shows more interest.
#gender:

#Both the genders shows similar interests in all three campaigns.interest:

#Although the count of interest after 100 is less,there is a rise of users after 100 who actually bought the product. 
#Rest of the distribution is according to what was expected.money spent:

#As the amount of money spent increases, no of product bought increases.
#There is a sudden rise in the Approved_Conversion after a certain point in Impressions.Product bought after clicking the ad:

#It seems men tend to click more than women but women buy more products than men after clicking the add.
#People in age group 30-34 has more tendency to buy product after clicking the add.
#Product bought after enquiring the ad:


#Modelling

#Replacing xyz_campaign_ids again with actual ids for modelling

df["abc_campaign_id"].replace({"campaign_a":916 ,"campaign_b":936 
,"campaign_c":1178}, inplace=True)

#Encoding the Labels 'gender' and 'age' for better modelling


#encoding gender

from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
encoder.fit(df["gender"])
df["gender"]=encoder.transform(df["gender"])
print(df["gender"])

#encoding age

encoder.fit(df["age"])
df["age"]=encoder.transform(df["age"])
print(df["age"])

df.head()


#Removing "Approved_Conversion" and "Total_Conversion" from dataset

x=np.array(df.drop(labels=["Approved_Conversion","Total_Conversion"], axis=1))
y=np.array(df["Total_Conversion"])

y=y.reshape(len(y),1)
y

from sklearn.preprocessing import StandardScaler  #Feature Scaling
sc_x= StandardScaler()
x = sc_x.fit_transform(x)

#Splitting Data into testset and trainset

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

#Random Forest Regressor to predict Total_Conversion

from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(n_estimators = 10, random_state = 0)
rfr.fit(x_train, y_train)



#Predicting Total Conversion in test_set and rounding up values

y_pred=rfr.predict(x_test)
y_pred=np.round(y_pred)

y_pred



from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
mae=mean_absolute_error(y_test, y_pred)
mse=mean_squared_error(y_test, y_pred)
rmse=np.sqrt(mse)
r2_score=r2_score(y_test, y_pred)

mae

0.9912663755458515
The mean absolute error achieved is 0.99.
r2_square
0.7530816415210646

we have got 0.753 of R-squared value which means 75.3% of the data fits the regression model.
Please, upvote my work if it could help. Thank you!

Search This Blog

ANALYSTDHIREN

Facebook Ad-Campaigns Analysis /Sales-Prediction - Project B - python

Importing Libraries

Comments

Post a Comment

Popular posts from this blog

Docker

Data Analytics for Healthcare Industry

🔸Netflix ML Architecture🔸