Added codes 1.1, 1.2, 1.3 and 1.5

2025-10-12 22:51:57 +05:30
parent 0ac0a2859b
commit 4c84c01a65
4 changed files with 188 additions and 0 deletions
@@ -0,0 +1,30 @@
+# Import libraries
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+
+# Ensure you have the necessary NLTK resources downloaded
+nltk.download('punkt')
+nltk.download('stopwords')
+
+def remove_stop_words(text):
+    # Tokenizing the text into words
+    words = word_tokenize(text)
+    
+    # Defining the English stop words
+    stop_words = set(stopwords.words('english'))
+    
+    # Removing stop words from the text
+    filtered_words = [word for word in words if word.lower() not in stop_words]
+    
+    return ' '.join(filtered_words)
+
+# Example usage
+if __name__ == "__main__":
+    input_text = "This is an example of a text document that needs stop word removal."
+    preprocessed_text = remove_stop_words(input_text)
+    print("Original Text:")
+    print(input_text)
+    print("\nPreprocessed Text:")
+    print(preprocessed_text)
+