# Import necessary libraries import pandas as pd import numpy as np # Load the dataset data = pd.read_csv('customer_purchase.csv') # Handle missing values using mean imputation data.fillna(data.mean(), inplace=True) import matplotlib.pyplot as plt # Visualize the relationship between Age and Purchase plt.scatter(data['Age'], data['Purchase']) plt.title('Age vs Purchase') plt.xlabel('Age') plt.ylabel('Purchase') plt.show() # Normalize the Age and Income columns using Min-Max scaling data['Age_scaled'] = (data['Age'] - data['Age'].min()) / (data['Age'].max() - data['Age'].min()) data['Income_scaled'] = (data['Income'] - data['Income'].min()) / (data['Income'].max() - data['Income'].min()) from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # Prepare data X = data[['Age_scaled']] y = data['Purchase'] # Split data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create and train the model model_simple = LinearRegression() model_simple.fit(X_train, y_train) # Predict on the test set y_pred_simple = model_simple.predict(X_test) # Assessing Accuracy print("Simple Linear Regression:") print("Coefficients:", model_simple.coef_) print("R-squared:", r2_score(y_test, y_pred_simple)) print("MSE:", mean_squared_error(y_test, y_pred_simple)) # Prepare data X = data[['Age_scaled', 'Income_scaled']] y = data['Purchase'] # Split data into training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create and train the model model_multiple = LinearRegression() model_multiple.fit(X_train, y_train) # Predict on the test set y_pred_multiple = model_multiple.predict(X_test) # Assessing Accuracy print("\nMultiple Linear Regression:") print("Intercept:", model_multiple.intercept_) print("Coefficients:", model_multiple.coef_) print("R-squared:", r2_score(y_test, y_pred_multiple)) print("MSE:", mean_squared_error(y_test, y_pred_multiple)) X = data[['Age_scaled', 'Income_scaled']] y = data['Purchase'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)