From 0c674d02f98afc1d50919c68b8589703c8ec825e Mon Sep 17 00:00:00 2001 From: Kshitij Date: Sun, 3 May 2026 23:13:48 +0530 Subject: [PATCH] add code blocks for practical 3a; cnn plant diseases. --- Codes/Code-3a.md | 240 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 Codes/Code-3a.md diff --git a/Codes/Code-3a.md b/Codes/Code-3a.md new file mode 100644 index 0000000..060cbd1 --- /dev/null +++ b/Codes/Code-3a.md @@ -0,0 +1,240 @@ +# Practical-3a (Convolutional Neural Network - Plant Diseases) + +Problem Statement: Convolutional Neural Network (CNN): Use any dataset of plant disease and design a plant disease detection system using CNN. + +> [!NOTE] +> Download dataset directly from [source](https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset/data). +> Haven't added it to the `/Datasets` directory due to its large size. +> tbh the dataset doesn't really matter in this case, you just need to ensure dataset directory contains `train` and `valid` sub-directories. +> Refer the above dataset to understand the required directory structure. + +--- + +## Pre-requisities + +1. Install packages using `pip`: `pip install tensorflow keras numpy opencv-python matplotlib seaborn scikit-learn` (`tensorflow` requires Python 3.9 - 3.12) +2. Download and unzip the dataset in the same directory as the Jupyter notebook. +3. Ensure your unzipped dataset has the required directory structure: + +```shell +New Plant Diseases Dataset(Augmented)/ +├── train +│   ├── Apple___Apple_scab +│ ├── Apple___Black_rot +│ ├── Apple___Cedar_apple_rust +├── valid +│ ├── Apple___Apple_scab +│ ├── Apple___Black_rot +│ ├── Apple___Cedar_apple_rust +``` + +## Steps + +1. Import Libraries +2. Load Dataset +3. Exploratory Data Analysis (EDA) +4. Split into Training and Testing Sets +5. Build the CNN Model +6. Compile the Model +7. Train the Model +8. Evaluate the Model on Test Data +9. Plot Training vs Validation Accuracy +10. Plot Training vs Validation Loss +11. Confusion Matrix and Classification Report + +--- + +## Code + +### 1. Import Libraries: + +```python3 +import os +import numpy as np +import cv2 +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix, classification_report +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout +from tensorflow.keras.utils import to_categorical +``` + +### 2. Load Dataset: + +```python3 +data = [] +labels = [] + +# Path to dataset folder containing one subfolder per disease class +path = './New Plant Diseases Dataset(Augmented)/train/' +categories = sorted(os.listdir(path)) # sort for consistent label ordering + +# Map each category name to a numeric index +label_dict = {category: idx for idx, category in enumerate(categories)} +print("Classes found:", len(categories)) + +max_per_class = 200 # cap images per class to avoid RAM overflow on large datasets + +for category in categories: + folder = os.path.join(path, category) + count = 0 + for img_name in os.listdir(folder): + if count >= max_per_class: + break + img_path = os.path.join(folder, img_name) + img_array = cv2.imread(img_path) + if img_array is not None: # skip unreadable files + img_array = cv2.resize(img_array, (64, 64)) # resize to fixed 64x64 pixels + data.append(img_array) + labels.append(label_dict[category]) + count += 1 + +data = np.array(data) / 255.0 # normalize pixel values from [0,255] to [0,1] +labels = np.array(labels) +print("Dataset shape:", data.shape) +print("Labels shape:", labels.shape) +``` + +### 3. Exploratory Data Analysis (EDA): + +```python3 +print("Total images:", len(data)) +print("Image shape:", data[0].shape) +print("Number of classes:", len(categories)) + +# Class distribution bar chart +class_counts = {cat: int((labels == idx).sum()) for cat, idx in label_dict.items()} +plt.figure(figsize=(14, 5)) +plt.bar(class_counts.keys(), class_counts.values()) +plt.xticks(rotation=90) +plt.title("Number of Images per Disease Class") +plt.xlabel("Class") +plt.ylabel("Count") +plt.tight_layout() +plt.show() + +# Sample images from first 5 classes +plt.figure(figsize=(15, 3)) +for i, category in enumerate(categories[:5]): + idx = np.where(labels == label_dict[category])[0][0] # index of first image in class + plt.subplot(1, 5, i + 1) + plt.imshow(cv2.cvtColor((data[idx] * 255).astype(np.uint8), cv2.COLOR_BGR2RGB)) + plt.title(category[:15], fontsize=8) + plt.axis('off') +plt.suptitle("Sample Images per Class") +plt.show() +``` + +### 4. Split into Training and Testing Sets: + +```python3 +X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42) + +num_classes = len(categories) +# One-hot encode labels: e.g. class 2 of 5 → [0, 0, 1, 0, 0] +y_train = to_categorical(y_train, num_classes) +y_test = to_categorical(y_test, num_classes) +print("Train samples:", X_train.shape[0]) +print("Test samples: ", X_test.shape[0]) +``` + +### 5. Build the CNN Model: + +```python3 +model = Sequential() + +model.add(Input(shape=(64, 64, 3))) # input: 64x64 RGB image +model.add(Conv2D(32, (3, 3), activation='relu')) # 32 filters, detect basic features +model.add(MaxPooling2D(2, 2)) # downsample by 2x + +model.add(Conv2D(64, (3, 3), activation='relu')) # 64 filters, detect complex features +model.add(MaxPooling2D(2, 2)) + +model.add(Flatten()) # convert 2D feature maps to 1D vector + +model.add(Dense(128, activation='relu')) # fully connected layer +model.add(Dropout(0.5)) # randomly drop 50% neurons to reduce overfitting + +model.add(Dense(num_classes, activation='softmax')) # output: probability for each class + +model.summary() +``` + +### 6. Compile the Model: + +```python3 +# categorical_crossentropy: standard loss for multi-class classification with one-hot labels +model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) +``` + +### 7. Train the Model: + +```python3 +history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2) +``` + +### 8. Evaluate the Model on Test Data: + +```python3 +loss, accuracy = model.evaluate(X_test, y_test) +print(f"Test Loss: {loss:.4f}") +print(f"Test Accuracy: {accuracy*100:.2f}%") +``` + +### 9. Plot Training vs Validation Accuracy: + +```python3 +plt.plot(history.history['accuracy'], label='Training Accuracy') +plt.plot(history.history['val_accuracy'], label='Validation Accuracy') +plt.title('CNN Model Accuracy Over Epochs') +plt.xlabel('Epoch') +plt.ylabel('Accuracy') +plt.legend() +plt.grid(True) +plt.show() +``` + +### 10. Plot Training vs Validation Loss: + +```python3 +plt.plot(history.history['loss'], label='Training Loss') +plt.plot(history.history['val_loss'], label='Validation Loss') +plt.title('CNN Model Loss Over Epochs') +plt.xlabel('Epoch') +plt.ylabel('Loss') +plt.legend() +plt.grid(True) +plt.show() +``` + +### 11. Confusion Matrix and Classification Report: + +```python3 +y_pred = np.argmax(model.predict(X_test), axis=1) # predicted class index +y_true = np.argmax(y_test, axis=1) # actual class index (from one-hot) + +cm = confusion_matrix(y_true, y_pred) +plt.figure(figsize=(14, 12)) +sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', + xticklabels=categories, yticklabels=categories) +plt.title('Confusion Matrix') +plt.ylabel('Actual') +plt.xlabel('Predicted') +plt.xticks(rotation=90) +plt.tight_layout() +plt.show() + +print("\nClassification Report:\n") +print(classification_report(y_true, y_pred, target_names=categories)) +``` + +--- + +## Miscellaneous + +- [Dataset source](https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset) + +--- +