Files

31 lines
870 B
Python

# Import libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Ensure you have the necessary NLTK resources downloaded
nltk.download('punkt')
nltk.download('stopwords')
def remove_stop_words(text):
# Tokenizing the text into words
words = word_tokenize(text)
# Defining the English stop words
stop_words = set(stopwords.words('english'))
# Removing stop words from the text
filtered_words = [word for word in words if word.lower() not in stop_words]
return ' '.join(filtered_words)
# Example usage
if __name__ == "__main__":
input_text = "This is an example of a text document that needs stop word removal."
preprocessed_text = remove_stop_words(input_text)
print("Original Text:")
print(input_text)
print("\nPreprocessed Text:")
print(preprocessed_text)