from PIL import Image
import torch
import numpy as np
from transformers import CLIPModel, CLIPTokenizer

# Load the model
model_name = "TonyStarkD99/CLIP-Crop_Disease-Large"
model = CLIPModel.from_pretrained(model_name)

# Load your image
image_path = "/home/overnion/Status200/tomato.png"  # Replace with your image path
image = Image.open(image_path)

# Define the class labels (text prompts)
class_labels = [
    "healthy plant",
    "diseased plant",
    "wilted plant",
    "pest-infested plant"
]

# Resize and normalize the image
image = image.convert("RGB")  # Ensure the image is in RGB format
image = image.resize((224, 224))  # Resize to the expected input size

# Convert the image to a tensor
image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0)  # Convert to (1, C, H, W)
image_tensor = image_tensor.float() / 255.0  # Normalize to [0, 1]

# Load the tokenizer
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch16")  # Use a compatible tokenizer

# Tokenize the text prompts
text_inputs = tokenizer(class_labels, padding=True, return_tensors="pt")

# Make predictions
with torch.no_grad():
    outputs = model(pixel_values=image_tensor, input_ids=text_inputs['input_ids'])

logits_per_image = outputs.logits_per_image  # This gives the similarity scores
probs = logits_per_image.softmax(dim=1)  # Convert to probabilities

# Get the predicted class
predicted_class_idx = probs.argmax().item()
predicted_class = class_labels[predicted_class_idx]

# Print the predicted class and probabilities
print("Predicted class:", predicted_class)
print("Probabilities:", probs.detach().numpy())