From 6e04e517a7d3f3bed80e2f1d571fe9a0a7311a2c Mon Sep 17 00:00:00 2001
From: THIYAGARAJAN <thiyagarajanravi22@gmail.com>
Date: Tue, 22 Oct 2019 18:27:27 +0530
Subject: [PATCH] Fixed - to display the output

It can able to extract the words and make count of it
---
 text_preprocessing.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 text_preprocessing.py

diff --git a/text_preprocessing.py b/text_preprocessing.py
new file mode 100644
index 0000000..0b117a7
--- /dev/null
+++ b/text_preprocessing.py
@@ -0,0 +1,39 @@
+import clean_text
+
+# import all our functions
+from clean_text import *
+
+#!pylint cleantext
+
+import pandas as pd
+from sklearn.feature_extraction.text import CountVectorizer
+
+training = [
+    " I am master of all",
+    "I am a absolute learner"
+]
+
+generalization = [
+    "I am absolute learner learner"
+]
+
+vectorization = CountVectorizer(
+    stop_words = "english",
+    preprocessor = process.master_clean_text)
+
+vectorization.fit(training)
+
+build_vocab = {
+     value:key 
+     for key , value in vectorization.vocabulary_.items()
+}
+
+vocab = [build_vocab[i] for i in range(len(build_vocab))]
+
+extracted = pd.DataFrame(
+data = vectorization.transform(generalization).toarray(),
+    index=["generalization"],
+    columns=vocab
+)
+
+print(extracted)