InformationRetrieval/Codes/Code-1.3.py

# Import libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Ensure you have the necessary NLTK resources downloaded
nltk.download('punkt')
nltk.download('stopwords')

def remove_stop_words(text):
    # Tokenizing the text into words
    words = word_tokenize(text)

    # Defining the English stop words
    stop_words = set(stopwords.words('english'))

    # Removing stop words from the text
    filtered_words = [word for word in words if word.lower() not in stop_words]

    return ' '.join(filtered_words)

# Example usage
if __name__ == "__main__":
    input_text = "This is an example of a text document that needs stop word removal."
    preprocessed_text = remove_stop_words(input_text)
    print("Original Text:")
    print(input_text)
    print("\nPreprocessed Text:")
    print(preprocessed_text)