From fd71b0ff24e583afaa088cfff59f5875b41437a0 Mon Sep 17 00:00:00 2001
From: Kshitij <notkshitij@git.kska.io>
Date: Sun, 3 May 2026 23:14:46 +0530
Subject: [PATCH] add code blocks for practical 4; rnn @ google stock price.

---
 Codes/Code-4.md | 276 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 Codes/Code-4.md

diff --git a/Codes/Code-4.md b/Codes/Code-4.md
new file mode 100644
index 0000000..c9feb6e
--- /dev/null
+++ b/Codes/Code-4.md
@@ -0,0 +1,276 @@
+# Practical-4 (Recurrent Neural Network - Google Stock Price Dataset)
+
+Problem Statement: Recurrent neural network (RNN): Use the Google stock prices dataset and design a time series analysis and prediction system using RNN.
+
+> [!NOTE]
+> Dataset available in [Datasets](../Datasets/GOOG.csv) directory.
+> In the code, dataset is downloaded directly from Keras/TensorFlow in 2nd step (Load Dataset)
+
+---
+
+## Pre-requisities
+
+1. Install packages using `pip`: `pip install tensorflow keras numpy pandas matplotlib scikit-learn yfinance` (`tensorflow` requires Python 3.9 - 3.12)
+
+## Steps
+
+1. Import Libraries
+2. Load Dataset
+3. Exploratory Data Analysis (EDA)
+4. Visualize Closing Price Over Time
+5. Preprocess Data - Normalize Closing Price
+6. Create Sequences for RNN Input
+7. Build the RNN Model
+8. Train the Model
+9. Plot Training vs Validation Loss
+10. Make Predictions and Inverse Scale
+11. Evaluate the Model
+12. Plot Actual vs Predicted Stock Price
+13. Forecast Next 30 Days
+
+---
+
+## Code
+
+### 1. Import Libraries:
+
+```python3
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import yfinance as yf
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Input, Dense, SimpleRNN, Dropout
+from tensorflow.keras.callbacks import EarlyStopping
+```
+
+### 2. Load Dataset:
+
+```python3
+# Downloads GOOGL stock data from Yahoo Finance for the given date range
+ticker = "GOOGL"
+df = yf.download(ticker, start="2018-01-01", end="2024-01-01")
+
+# --- Offline alternative (comment out the yf.download above and use this instead if using local dataset) ---
+# df = pd.read_csv('GOOGL.csv', index_col='Date', parse_dates=True)
+# df = df.sort_index()  # ensure chronological order
+
+# yfinance returns MultiIndex columns — flatten to single level
+df.columns = df.columns.get_level_values(0)
+
+print(f"Dataset Shape: {df.shape}")
+print(f"Date Range: {df.index.min().date()} to {df.index.max().date()}")
+print(df.head())
+```
+
+### 3. Exploratory Data Analysis (EDA):
+
+```python3
+print("=== Dataset Info ===")
+print(df.info())
+print("\n=== Statistical Summary ===")
+print(df.describe())
+print("\n=== Missing Values ===")
+print(df.isnull().sum())
+```
+
+### 4. Visualize Closing Price Over Time:
+
+```python3
+plt.figure(figsize=(16, 6))
+plt.plot(df.index, df['Close'], color='steelblue', linewidth=1.5, label='Close Price')
+plt.title('Google (GOOGL) Stock Closing Price (2018–2024)')
+plt.xlabel('Date')
+plt.ylabel('Price (USD)')
+plt.legend()
+plt.grid(alpha=0.3)
+plt.tight_layout()
+plt.show()
+```
+
+### 5. Preprocess Data - Normalize Closing Price:
+
+```python3
+data = df[['Close']].values   # use only Close price for prediction
+
+scaler = MinMaxScaler(feature_range=(0, 1))
+data_scaled = scaler.fit_transform(data)  # scale values to [0, 1]
+
+print(f"Original data range: [{data.min():.2f}, {data.max():.2f}]")
+print(f"Scaled data range:   [{data_scaled.min():.4f}, {data_scaled.max():.4f}]")
+print(f"Total data points:   {len(data_scaled)}")
+```
+
+### 6. Create Sequences for RNN Input:
+
+```python3
+def create_sequences(data, time_steps=60):
+    X, y = [], []
+    for i in range(time_steps, len(data)):
+        X.append(data[i - time_steps:i, 0])  # window of past `time_steps` days
+        y.append(data[i, 0])                  # next day's price
+    return np.array(X), np.array(y)
+
+TIME_STEPS = 60  # use past 60 days to predict the next day
+
+# 80/20 train-test split (manual, to preserve time order)
+train_size = int(len(data_scaled) * 0.80)
+train_data = data_scaled[:train_size]
+test_data  = data_scaled[train_size - TIME_STEPS:]  # overlap ensures test sequences start correctly
+
+X_train, y_train = create_sequences(train_data, TIME_STEPS)
+X_test,  y_test  = create_sequences(test_data,  TIME_STEPS)
+
+# Reshape to [samples, time_steps, features] — required format for RNN layers
+X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
+X_test  = X_test.reshape((X_test.shape[0],   X_test.shape[1],  1))
+
+print(f"Training samples: {X_train.shape}")
+print(f"Testing samples:  {X_test.shape}")
+```
+
+### 7. Build the RNN Model:
+
+```python3
+model = Sequential()
+
+model.add(Input(shape=(TIME_STEPS, 1)))                               # input: sequence of 60 days
+model.add(SimpleRNN(units=64, return_sequences=True))                 # first RNN layer, passes output to next
+model.add(Dropout(0.2))                                               # drop 20% neurons to reduce overfitting
+model.add(SimpleRNN(units=64, return_sequences=False))                # second RNN layer, outputs single vector
+model.add(Dropout(0.2))
+model.add(Dense(units=32, activation='relu'))                         # fully connected layer
+model.add(Dense(units=1))                                             # output: single predicted price
+
+model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
+model.summary()
+```
+
+### 8. Train the Model:
+
+```python3
+# EarlyStopping stops training if val_loss doesn't improve for 10 consecutive epochs
+early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
+
+history = model.fit(
+    X_train, y_train,
+    epochs=60,
+    batch_size=32,
+    validation_split=0.1,   # use 10% of training data for validation
+    callbacks=[early_stop],
+    verbose=1
+)
+print(f"\nTraining stopped at epoch: {len(history.history['loss'])}")
+```
+
+### 9. Plot Training vs Validation Loss:
+
+```python3
+plt.plot(history.history['loss'], label='Train Loss', color='royalblue')
+plt.plot(history.history['val_loss'], label='Val Loss', color='tomato')
+plt.title('Model Training Loss Over Epochs')
+plt.xlabel('Epoch')
+plt.ylabel('MSE Loss')
+plt.legend()
+plt.grid(alpha=0.3)
+plt.tight_layout()
+plt.show()
+```
+
+### 10. Make Predictions and Inverse Scale:
+
+```python3
+y_pred_scaled = model.predict(X_test)
+
+# Convert scaled predictions back to original USD price range
+y_pred   = scaler.inverse_transform(y_pred_scaled)
+y_actual = scaler.inverse_transform(y_test.reshape(-1, 1))
+
+print(f"Sample predictions (first 5): {y_pred[:5].flatten().round(2)}")
+print(f"Actual values      (first 5): {y_actual[:5].flatten().round(2)}")
+```
+
+### 11. Evaluate the Model:
+
+```python3
+mse  = mean_squared_error(y_actual, y_pred)
+rmse = np.sqrt(mse)
+mae  = mean_absolute_error(y_actual, y_pred)
+mape = np.mean(np.abs((y_actual - y_pred) / y_actual)) * 100  # mean absolute percentage error
+
+print("=" * 40)
+print("     MODEL EVALUATION METRICS")
+print("=" * 40)
+print(f"  MSE  : {mse:.4f}")
+print(f"  RMSE : {rmse:.4f}")
+print(f"  MAE  : {mae:.4f}")
+print(f"  MAPE : {mape:.2f}%")
+print("=" * 40)
+```
+
+### 12. Plot Actual vs Predicted Stock Price:
+
+```python3
+test_dates = df.index[train_size:]   # align dates with test predictions
+
+plt.figure(figsize=(16, 6))
+plt.plot(test_dates, y_actual, label='Actual Price',    color='steelblue', linewidth=1.5)
+plt.plot(test_dates, y_pred,   label='Predicted Price', color='tomato',    linewidth=1.5, linestyle='--')
+plt.title('Google Stock Price: Actual vs Predicted (RNN)')
+plt.xlabel('Date')
+plt.ylabel('Price (USD)')
+plt.legend()
+plt.grid(alpha=0.3)
+plt.tight_layout()
+plt.show()
+```
+
+### 13. Forecast Next 30 Days:
+
+```python3
+n_future = 30  # number of future days to predict
+
+# Seed the forecast with the last TIME_STEPS days of known data
+future_input       = data_scaled[-TIME_STEPS:].reshape(1, TIME_STEPS, 1)
+future_predictions = []
+
+for _ in range(n_future):
+    pred = model.predict(future_input, verbose=0)
+    future_predictions.append(pred[0, 0])
+    # Slide the window: drop oldest day, append new prediction
+    future_input = np.append(future_input[:, 1:, :], pred.reshape(1, 1, 1), axis=1)
+
+# Inverse scale forecasted prices back to USD
+future_prices = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))
+
+# Generate business day dates starting from the day after last known date
+last_date    = df.index[-1]
+future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_future, freq='B')
+
+plt.figure(figsize=(16, 6))
+plt.plot(df.index[-120:], scaler.inverse_transform(data_scaled[-120:]),
+         label='Historical', color='steelblue', linewidth=1.5)
+plt.plot(future_dates, future_prices,
+         label='30-Day Forecast', color='orange', linewidth=1.5)
+plt.axvline(x=last_date, color='gray', linestyle='--', label='Forecast Start')
+plt.title('Google Stock — 30-Day Future Price Forecast (RNN)')
+plt.xlabel('Date')
+plt.ylabel('Price (USD)')
+plt.legend()
+plt.grid(alpha=0.3)
+plt.tight_layout()
+plt.show()
+
+print(f"\nForecasted price range: {future_prices.min():.2f} USD - {future_prices.max():.2f} USD")
+```
+
+---
+
+## Miscellaneous
+
+- [Dataset source](https://www.kaggle.com/datasets/henryshan/google-stock-price)
+
+---
+