From fd71b0ff24e583afaa088cfff59f5875b41437a0 Mon Sep 17 00:00:00 2001 From: Kshitij Date: Sun, 3 May 2026 23:14:46 +0530 Subject: [PATCH] add code blocks for practical 4; rnn @ google stock price. --- Codes/Code-4.md | 276 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) create mode 100644 Codes/Code-4.md diff --git a/Codes/Code-4.md b/Codes/Code-4.md new file mode 100644 index 0000000..c9feb6e --- /dev/null +++ b/Codes/Code-4.md @@ -0,0 +1,276 @@ +# Practical-4 (Recurrent Neural Network - Google Stock Price Dataset) + +Problem Statement: Recurrent neural network (RNN): Use the Google stock prices dataset and design a time series analysis and prediction system using RNN. + +> [!NOTE] +> Dataset available in [Datasets](../Datasets/GOOG.csv) directory. +> In the code, dataset is downloaded directly from Keras/TensorFlow in 2nd step (Load Dataset) + +--- + +## Pre-requisities + +1. Install packages using `pip`: `pip install tensorflow keras numpy pandas matplotlib scikit-learn yfinance` (`tensorflow` requires Python 3.9 - 3.12) + +## Steps + +1. Import Libraries +2. Load Dataset +3. Exploratory Data Analysis (EDA) +4. Visualize Closing Price Over Time +5. Preprocess Data - Normalize Closing Price +6. Create Sequences for RNN Input +7. Build the RNN Model +8. Train the Model +9. Plot Training vs Validation Loss +10. Make Predictions and Inverse Scale +11. Evaluate the Model +12. Plot Actual vs Predicted Stock Price +13. Forecast Next 30 Days + +--- + +## Code + +### 1. Import Libraries: + +```python3 +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import yfinance as yf +from sklearn.preprocessing import MinMaxScaler +from sklearn.metrics import mean_squared_error, mean_absolute_error +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Input, Dense, SimpleRNN, Dropout +from tensorflow.keras.callbacks import EarlyStopping +``` + +### 2. Load Dataset: + +```python3 +# Downloads GOOGL stock data from Yahoo Finance for the given date range +ticker = "GOOGL" +df = yf.download(ticker, start="2018-01-01", end="2024-01-01") + +# --- Offline alternative (comment out the yf.download above and use this instead if using local dataset) --- +# df = pd.read_csv('GOOGL.csv', index_col='Date', parse_dates=True) +# df = df.sort_index() # ensure chronological order + +# yfinance returns MultiIndex columns — flatten to single level +df.columns = df.columns.get_level_values(0) + +print(f"Dataset Shape: {df.shape}") +print(f"Date Range: {df.index.min().date()} to {df.index.max().date()}") +print(df.head()) +``` + +### 3. Exploratory Data Analysis (EDA): + +```python3 +print("=== Dataset Info ===") +print(df.info()) +print("\n=== Statistical Summary ===") +print(df.describe()) +print("\n=== Missing Values ===") +print(df.isnull().sum()) +``` + +### 4. Visualize Closing Price Over Time: + +```python3 +plt.figure(figsize=(16, 6)) +plt.plot(df.index, df['Close'], color='steelblue', linewidth=1.5, label='Close Price') +plt.title('Google (GOOGL) Stock Closing Price (2018–2024)') +plt.xlabel('Date') +plt.ylabel('Price (USD)') +plt.legend() +plt.grid(alpha=0.3) +plt.tight_layout() +plt.show() +``` + +### 5. Preprocess Data - Normalize Closing Price: + +```python3 +data = df[['Close']].values # use only Close price for prediction + +scaler = MinMaxScaler(feature_range=(0, 1)) +data_scaled = scaler.fit_transform(data) # scale values to [0, 1] + +print(f"Original data range: [{data.min():.2f}, {data.max():.2f}]") +print(f"Scaled data range: [{data_scaled.min():.4f}, {data_scaled.max():.4f}]") +print(f"Total data points: {len(data_scaled)}") +``` + +### 6. Create Sequences for RNN Input: + +```python3 +def create_sequences(data, time_steps=60): + X, y = [], [] + for i in range(time_steps, len(data)): + X.append(data[i - time_steps:i, 0]) # window of past `time_steps` days + y.append(data[i, 0]) # next day's price + return np.array(X), np.array(y) + +TIME_STEPS = 60 # use past 60 days to predict the next day + +# 80/20 train-test split (manual, to preserve time order) +train_size = int(len(data_scaled) * 0.80) +train_data = data_scaled[:train_size] +test_data = data_scaled[train_size - TIME_STEPS:] # overlap ensures test sequences start correctly + +X_train, y_train = create_sequences(train_data, TIME_STEPS) +X_test, y_test = create_sequences(test_data, TIME_STEPS) + +# Reshape to [samples, time_steps, features] — required format for RNN layers +X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) +X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) + +print(f"Training samples: {X_train.shape}") +print(f"Testing samples: {X_test.shape}") +``` + +### 7. Build the RNN Model: + +```python3 +model = Sequential() + +model.add(Input(shape=(TIME_STEPS, 1))) # input: sequence of 60 days +model.add(SimpleRNN(units=64, return_sequences=True)) # first RNN layer, passes output to next +model.add(Dropout(0.2)) # drop 20% neurons to reduce overfitting +model.add(SimpleRNN(units=64, return_sequences=False)) # second RNN layer, outputs single vector +model.add(Dropout(0.2)) +model.add(Dense(units=32, activation='relu')) # fully connected layer +model.add(Dense(units=1)) # output: single predicted price + +model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae']) +model.summary() +``` + +### 8. Train the Model: + +```python3 +# EarlyStopping stops training if val_loss doesn't improve for 10 consecutive epochs +early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) + +history = model.fit( + X_train, y_train, + epochs=60, + batch_size=32, + validation_split=0.1, # use 10% of training data for validation + callbacks=[early_stop], + verbose=1 +) +print(f"\nTraining stopped at epoch: {len(history.history['loss'])}") +``` + +### 9. Plot Training vs Validation Loss: + +```python3 +plt.plot(history.history['loss'], label='Train Loss', color='royalblue') +plt.plot(history.history['val_loss'], label='Val Loss', color='tomato') +plt.title('Model Training Loss Over Epochs') +plt.xlabel('Epoch') +plt.ylabel('MSE Loss') +plt.legend() +plt.grid(alpha=0.3) +plt.tight_layout() +plt.show() +``` + +### 10. Make Predictions and Inverse Scale: + +```python3 +y_pred_scaled = model.predict(X_test) + +# Convert scaled predictions back to original USD price range +y_pred = scaler.inverse_transform(y_pred_scaled) +y_actual = scaler.inverse_transform(y_test.reshape(-1, 1)) + +print(f"Sample predictions (first 5): {y_pred[:5].flatten().round(2)}") +print(f"Actual values (first 5): {y_actual[:5].flatten().round(2)}") +``` + +### 11. Evaluate the Model: + +```python3 +mse = mean_squared_error(y_actual, y_pred) +rmse = np.sqrt(mse) +mae = mean_absolute_error(y_actual, y_pred) +mape = np.mean(np.abs((y_actual - y_pred) / y_actual)) * 100 # mean absolute percentage error + +print("=" * 40) +print(" MODEL EVALUATION METRICS") +print("=" * 40) +print(f" MSE : {mse:.4f}") +print(f" RMSE : {rmse:.4f}") +print(f" MAE : {mae:.4f}") +print(f" MAPE : {mape:.2f}%") +print("=" * 40) +``` + +### 12. Plot Actual vs Predicted Stock Price: + +```python3 +test_dates = df.index[train_size:] # align dates with test predictions + +plt.figure(figsize=(16, 6)) +plt.plot(test_dates, y_actual, label='Actual Price', color='steelblue', linewidth=1.5) +plt.plot(test_dates, y_pred, label='Predicted Price', color='tomato', linewidth=1.5, linestyle='--') +plt.title('Google Stock Price: Actual vs Predicted (RNN)') +plt.xlabel('Date') +plt.ylabel('Price (USD)') +plt.legend() +plt.grid(alpha=0.3) +plt.tight_layout() +plt.show() +``` + +### 13. Forecast Next 30 Days: + +```python3 +n_future = 30 # number of future days to predict + +# Seed the forecast with the last TIME_STEPS days of known data +future_input = data_scaled[-TIME_STEPS:].reshape(1, TIME_STEPS, 1) +future_predictions = [] + +for _ in range(n_future): + pred = model.predict(future_input, verbose=0) + future_predictions.append(pred[0, 0]) + # Slide the window: drop oldest day, append new prediction + future_input = np.append(future_input[:, 1:, :], pred.reshape(1, 1, 1), axis=1) + +# Inverse scale forecasted prices back to USD +future_prices = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)) + +# Generate business day dates starting from the day after last known date +last_date = df.index[-1] +future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_future, freq='B') + +plt.figure(figsize=(16, 6)) +plt.plot(df.index[-120:], scaler.inverse_transform(data_scaled[-120:]), + label='Historical', color='steelblue', linewidth=1.5) +plt.plot(future_dates, future_prices, + label='30-Day Forecast', color='orange', linewidth=1.5) +plt.axvline(x=last_date, color='gray', linestyle='--', label='Forecast Start') +plt.title('Google Stock — 30-Day Future Price Forecast (RNN)') +plt.xlabel('Date') +plt.ylabel('Price (USD)') +plt.legend() +plt.grid(alpha=0.3) +plt.tight_layout() +plt.show() + +print(f"\nForecasted price range: {future_prices.min():.2f} USD - {future_prices.max():.2f} USD") +``` + +--- + +## Miscellaneous + +- [Dataset source](https://www.kaggle.com/datasets/henryshan/google-stock-price) + +--- +