add code blocks for practical 1; linear regression.
This commit is contained in:
+186
@@ -0,0 +1,186 @@
|
|||||||
|
# Practical-1 (Linear Regression using Deep Neural Network)
|
||||||
|
|
||||||
|
Problem Statement: Linear regression by using Deep Neural network: Implement Boston housing price prediction problem by Linear regression using Deep Neural network. Use Boston House price prediction dataset.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Dataset available in [Datasets](../Datasets/boston.csv) directory.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pre-requisities
|
||||||
|
|
||||||
|
1. Install packages using `pip`: `pip install tensorflow keras pandas numpy scikit-learn matplotlib seaborn` (`tensorflow` requires Python 3.9 - 3.12)
|
||||||
|
2. Copy the `boston.csv` dataset in the same directory as the Jupyter notebook.
|
||||||
|
|
||||||
|
## Steps
|
||||||
|
|
||||||
|
1. Import Libraries
|
||||||
|
2. Load Dataset
|
||||||
|
3. Exploratory Data Analysis (EDA)
|
||||||
|
4. Check for Missing Values
|
||||||
|
5. Correlation Heatmap
|
||||||
|
6. Separate Features and Target
|
||||||
|
7. Split into Training and Testing Sets
|
||||||
|
8. Feature Scaling (Standardization)
|
||||||
|
9. Build the Neural Network Model
|
||||||
|
10. Compile the Model
|
||||||
|
11. Train the Model
|
||||||
|
12. Evaluate the Model on Test Data
|
||||||
|
13. Make Predictions
|
||||||
|
14. Plot Training vs Validation Loss
|
||||||
|
15. Plot Predicted vs Actual Prices
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code
|
||||||
|
|
||||||
|
### 1. Import Libraries:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from keras import Input
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Load Dataset:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
data = pd.read_csv('boston.csv')
|
||||||
|
print(data.head())
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Exploratory Data Analysis (EDA):
|
||||||
|
|
||||||
|
```python3
|
||||||
|
print("Shape:", data.shape) # number of rows and columns
|
||||||
|
print("\nData Types:\n", data.dtypes)
|
||||||
|
print("\nStatistical Summary:\n", data.describe()) # min, max, mean, std, etc.
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Check for Missing Values:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
print("Missing values per column:\n", data.isnull().sum())
|
||||||
|
|
||||||
|
# Drop rows with missing values (if any)
|
||||||
|
data = data.dropna()
|
||||||
|
print("\nShape after dropping nulls:", data.shape)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Correlation Heatmap:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
plt.figure(figsize=(12, 8))
|
||||||
|
sns.heatmap(data.corr(), annot=True, fmt=".2f", cmap="coolwarm") # show correlation between all feature pairs
|
||||||
|
plt.title("Feature Correlation Heatmap")
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Separate Features and Target:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
X = data.drop('MEDV', axis=1) # all columns except house price
|
||||||
|
y = data['MEDV'] # target: median house price
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Split into Training and Testing Sets:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
# 80% train, 20% test; random_state=42 ensures reproducible split
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8. Feature Scaling (Standardization):
|
||||||
|
|
||||||
|
```python3
|
||||||
|
scaler = StandardScaler()
|
||||||
|
|
||||||
|
X_train = scaler.fit_transform(X_train) # learn mean/std from train, then scale
|
||||||
|
X_test = scaler.transform(X_test) # apply same mean/std to test (no leakage)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9. Build the Neural Network Model:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Input(shape=(X_train.shape[1],))) # input shape = number of features
|
||||||
|
model.add(Dense(64, activation='relu')) # hidden layer 1: 64 neurons
|
||||||
|
model.add(Dense(32, activation='relu')) # hidden layer 2: 32 neurons
|
||||||
|
model.add(Dense(1, activation='linear')) # output layer: single value (house price)
|
||||||
|
|
||||||
|
model.summary()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10. Compile the Model:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
# adam: adaptive optimizer; mse: standard regression loss; mae: human-readable error metric
|
||||||
|
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
|
||||||
|
```
|
||||||
|
|
||||||
|
### 11. Train the Model:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
# validation_split=0.2 reserves 20% of training data to monitor val loss each epoch
|
||||||
|
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 12. Evaluate the Model on Test Data:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
loss, mae = model.evaluate(X_test, y_test)
|
||||||
|
print(f"Test Loss (MSE): {loss:.4f}")
|
||||||
|
print(f"Test Mean Absolute Error: {mae:.4f}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 13. Make Predictions:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
predictions = model.predict(X_test)
|
||||||
|
print("First 5 Predicted Prices:", predictions[:5].flatten())
|
||||||
|
print("First 5 Actual Prices: ", y_test.values[:5])
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14. Plot Training vs Validation Loss:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
plt.plot(history.history['loss'], label='Training Loss')
|
||||||
|
plt.plot(history.history['val_loss'], label='Validation Loss')
|
||||||
|
plt.title('Model Loss Over Epochs')
|
||||||
|
plt.ylabel('Loss (MSE)')
|
||||||
|
plt.xlabel('Epoch')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 15. Plot Predicted vs Actual Prices:
|
||||||
|
|
||||||
|
```python3
|
||||||
|
plt.figure(figsize=(8, 6))
|
||||||
|
plt.scatter(y_test, predictions, alpha=0.7) # each point = one test sample
|
||||||
|
plt.plot([y_test.min(), y_test.max()],
|
||||||
|
[y_test.min(), y_test.max()], 'r--', label='Ideal Fit') # diagonal = perfect prediction
|
||||||
|
plt.xlabel('Actual Price')
|
||||||
|
plt.ylabel('Predicted Price')
|
||||||
|
plt.title('Actual vs Predicted House Prices')
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
plt.show()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Miscellaneous
|
||||||
|
|
||||||
|
- [Dataset source](https://www.kaggle.com/datasets/fedesoriano/the-boston-houseprice-data)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
Reference in New Issue
Block a user