import re as clear class process(): def master_clean_text(text): #clean up all the html tags text = clear.sub(r'<.*?>','',text) #remove the unwanted punctation chars text = clear.sub(r"\\","",text) text = clear.sub(r"\'","",text) text = clear.sub(r"\"","",text) # coversion to lowercase to remove complexity text = text.strip().lower() #removing unwanted expressions unwanted = '!"\'#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n' convert = dict((c," ") for c in unwanted) # str.maketrans() --->> creates a one to one mapping of a character to its translation/replacement. mapping_trans = str.maketrans(convert) text = text.translate(mapping_trans) return text #master_clean_text(" Say youre scrapping a text from you'r website !! WEll it might be swap CASE or unevened you wanna remove all the punctation's into separate WOrd !!!!").split()