Added codes 1.1, 1.2, 1.3 and 1.5
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
# Import libraries
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
# Ensure you have the necessary NLTK resources downloaded
|
||||
nltk.download('punkt')
|
||||
nltk.download('stopwords')
|
||||
|
||||
def remove_stop_words(text):
|
||||
# Tokenizing the text into words
|
||||
words = word_tokenize(text)
|
||||
|
||||
# Defining the English stop words
|
||||
stop_words = set(stopwords.words('english'))
|
||||
|
||||
# Removing stop words from the text
|
||||
filtered_words = [word for word in words if word.lower() not in stop_words]
|
||||
|
||||
return ' '.join(filtered_words)
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
input_text = "This is an example of a text document that needs stop word removal."
|
||||
preprocessed_text = remove_stop_words(input_text)
|
||||
print("Original Text:")
|
||||
print(input_text)
|
||||
print("\nPreprocessed Text:")
|
||||
print(preprocessed_text)
|
||||
|
||||
Reference in New Issue
Block a user