import shutil 
							 | 
						|
								
							 | 
						|
								from shutil import copyfile
							 | 
						|
								
							 | 
						|
								copyfile(src = "../input/cleantext/cleantext.py", dst = "../working/cleantext.py")
							 | 
						|
								
							 | 
						|
								# import all our functions
							 | 
						|
								from cleantext import *
							 | 
						|
								
							 | 
						|
								#!pylint cleantext
							 | 
						|
								
							 | 
						|
								import pandas as pd
							 | 
						|
								from sklearn.feature_extraction.text import CountVectorizer
							 | 
						|
								
							 | 
						|
								training = [
							 | 
						|
								    " I am master of all",
							 | 
						|
								    "I am a absolute learner"
							 | 
						|
								]
							 | 
						|
								
							 | 
						|
								generalization = [
							 | 
						|
								    "I am absolute learner learner"
							 | 
						|
								]
							 | 
						|
								
							 | 
						|
								vectorization = CountVectorizer(
							 | 
						|
								    stop_words = "english",
							 | 
						|
								    preprocessor = process.master_clean_text)
							 | 
						|
								
							 | 
						|
								vectorization.fit(training)
							 | 
						|
								
							 | 
						|
								build_vocab = {
							 | 
						|
								     value:key 
							 | 
						|
								     for key , value in vectorization.vocabulary_.items()
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								vocab = [build_vocab[i] for i in range(len(build_vocab))]
							 | 
						|
								
							 | 
						|
								pd.DataFrame(
							 | 
						|
								data = vectorization.transform(generalization).toarray(),
							 | 
						|
								    index=["generalization"],
							 | 
						|
								    columns=vocab
							 | 
						|
								)
							 |