Repository where I mostly put random python scripts.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

678 lines
104 KiB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 5,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import gensim\n",
  10. "model = gensim.models.KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True) "
  11. ]
  12. },
  13. {
  14. "cell_type": "code",
  15. "execution_count": 3,
  16. "metadata": {},
  17. "outputs": [
  18. {
  19. "data": {
  20. "text/plain": [
  21. "[('hi', 0.654898464679718),\n",
  22. " ('goodbye', 0.639905571937561),\n",
  23. " ('howdy', 0.6310957074165344),\n",
  24. " ('goodnight', 0.5920578241348267),\n",
  25. " ('greeting', 0.5855878591537476),\n",
  26. " ('Hello', 0.5842196941375732),\n",
  27. " (\"g'day\", 0.5754077434539795),\n",
  28. " ('See_ya', 0.5688871145248413),\n",
  29. " ('ya_doin', 0.5643119812011719),\n",
  30. " ('greet', 0.5636603832244873)]"
  31. ]
  32. },
  33. "execution_count": 3,
  34. "metadata": {},
  35. "output_type": "execute_result"
  36. }
  37. ],
  38. "source": [
  39. "model.most_similar(\"hello\")"
  40. ]
  41. },
  42. {
  43. "cell_type": "code",
  44. "execution_count": 4,
  45. "metadata": {},
  46. "outputs": [
  47. {
  48. "data": {
  49. "text/plain": [
  50. "[('coders', 0.6104331612586975),\n",
  51. " ('coder', 0.6063331365585327),\n",
  52. " ('Coding', 0.5804804563522339),\n",
  53. " ('formatting', 0.5671651363372803),\n",
  54. " ('soluble_receptors', 0.5576372146606445),\n",
  55. " ('ICD9', 0.5571348667144775),\n",
  56. " ('refactoring', 0.5495434999465942),\n",
  57. " ('database_schemas', 0.5372464656829834),\n",
  58. " ('recode', 0.534299373626709),\n",
  59. " ('XHTML_CSS', 0.5328801870346069)]"
  60. ]
  61. },
  62. "execution_count": 4,
  63. "metadata": {},
  64. "output_type": "execute_result"
  65. }
  66. ],
  67. "source": [
  68. "model.most_similar(\"coding\")"
  69. ]
  70. },
  71. {
  72. "cell_type": "code",
  73. "execution_count": 5,
  74. "metadata": {},
  75. "outputs": [
  76. {
  77. "data": {
  78. "text/plain": [
  79. "[('cats', 0.8099379539489746),\n",
  80. " ('dog', 0.7609456777572632),\n",
  81. " ('kitten', 0.7464985251426697),\n",
  82. " ('feline', 0.7326233983039856),\n",
  83. " ('beagle', 0.7150583267211914),\n",
  84. " ('puppy', 0.7075453996658325),\n",
  85. " ('pup', 0.6934291124343872),\n",
  86. " ('pet', 0.6891531348228455),\n",
  87. " ('felines', 0.6755931377410889),\n",
  88. " ('chihuahua', 0.6709762215614319)]"
  89. ]
  90. },
  91. "execution_count": 5,
  92. "metadata": {},
  93. "output_type": "execute_result"
  94. }
  95. ],
  96. "source": [
  97. "model.most_similar(\"cat\")"
  98. ]
  99. },
  100. {
  101. "cell_type": "code",
  102. "execution_count": 6,
  103. "metadata": {},
  104. "outputs": [
  105. {
  106. "name": "stdout",
  107. "output_type": "stream",
  108. "text": [
  109. "hi globe \n"
  110. ]
  111. }
  112. ],
  113. "source": [
  114. "def transformSentence(sentence):\n",
  115. " outputSentence = \"\"\n",
  116. " \n",
  117. " for word in sentence.split(\" \"):\n",
  118. " try:\n",
  119. " outputSentence += model.most_similar(word)[0][0] + \" \"\n",
  120. " except Exception:\n",
  121. " outputSentence += word + \" \"\n",
  122. " return outputSentence\n",
  123. "\n",
  124. "print(transformSentence(\"hello world\"))"
  125. ]
  126. },
  127. {
  128. "cell_type": "code",
  129. "execution_count": 7,
  130. "metadata": {},
  131. "outputs": [
  132. {
  133. "name": "stdout",
  134. "output_type": "stream",
  135. "text": [
  136. "looks Mom No hand \n"
  137. ]
  138. }
  139. ],
  140. "source": [
  141. "print(transformSentence(\"look mom no hands\"))"
  142. ]
  143. },
  144. {
  145. "cell_type": "code",
  146. "execution_count": 8,
  147. "metadata": {},
  148. "outputs": [
  149. {
  150. "name": "stdout",
  151. "output_type": "stream",
  152. "text": [
  153. "This gen_eral concept of Clustering was to groups Data wtih similiar trait \n"
  154. ]
  155. }
  156. ],
  157. "source": [
  158. "print(transformSentence(\"The general idea of clustering is to group data with similar traits\"))"
  159. ]
  160. },
  161. {
  162. "cell_type": "code",
  163. "execution_count": 12,
  164. "metadata": {},
  165. "outputs": [
  166. {
  167. "name": "stdout",
  168. "output_type": "stream",
  169. "text": [
  170. "This manager concept of clusters was to groups datasets wtih similiar traits. \n"
  171. ]
  172. }
  173. ],
  174. "source": [
  175. "def removeFromString(string, chars):\n",
  176. " for c in chars:\n",
  177. " string = string.replace(c, \"\")\n",
  178. " return string\n",
  179. "\n",
  180. "\n",
  181. "def transformSentenceWithHeuristic(sentence):\n",
  182. " outputSentence = \"\"\n",
  183. " \n",
  184. " for word in sentence.split(\" \"):\n",
  185. " try:\n",
  186. " changed = False\n",
  187. " for w, _ in model.most_similar(word):\n",
  188. " clean = removeFromString(w, [' ', '_']).lower()\n",
  189. " if clean not in word.lower() and \"_\" not in w:\n",
  190. " outputSentence += w + \" \"\n",
  191. " changed = True\n",
  192. " break\n",
  193. " outputSentence = outputSentence if changed else outputSentence + word + \" \"\n",
  194. " except Exception:\n",
  195. " outputSentence += word + \" \"\n",
  196. " return outputSentence\n",
  197. "print(transformSentenceWithHeuristic(\"The general idea of clustering is to group data with similar traits.\"))"
  198. ]
  199. },
  200. {
  201. "cell_type": "code",
  202. "execution_count": 10,
  203. "metadata": {},
  204. "outputs": [
  205. {
  206. "name": "stdout",
  207. "output_type": "stream",
  208. "text": [
  209. "Relax up and grabbing a drinks but that was day it I talking abut this hallucinogenic trips it was this 1981 film Fever Treatment. \n"
  210. ]
  211. }
  212. ],
  213. "source": [
  214. "print(transformSentenceWithHeuristic(\"Sit down and grab a drink because it is time that we talk about the LSD trip that is the 1981 movie Shock Treatment.\"))"
  215. ]
  216. },
  217. {
  218. "cell_type": "code",
  219. "execution_count": 15,
  220. "metadata": {},
  221. "outputs": [
  222. {
  223. "name": "stdout",
  224. "output_type": "stream",
  225. "text": [
  226. "(300,)\n",
  227. "[ 0.0123291 0.20410156 -0.28515625 0.21679688 0.11816406 0.08300781\n",
  228. " 0.04980469 -0.00952148 0.22070312 -0.12597656 0.08056641 -0.5859375\n",
  229. " -0.00445557 -0.296875 -0.01312256 -0.08349609 0.05053711 0.15136719\n",
  230. " -0.44921875 -0.0135498 0.21484375 -0.14746094 0.22460938 -0.125\n",
  231. " -0.09716797 0.24902344 -0.2890625 0.36523438 0.41210938 -0.0859375\n",
  232. " -0.07861328 -0.19726562 -0.09082031 -0.14160156 -0.10253906 0.13085938\n",
  233. " -0.00346375 0.07226562 0.04418945 0.34570312 0.07470703 -0.11230469\n",
  234. " 0.06738281 0.11230469 0.01977539 -0.12353516 0.20996094 -0.07226562\n",
  235. " -0.02783203 0.05541992 -0.33398438 0.08544922 0.34375 0.13964844\n",
  236. " 0.04931641 -0.13476562 0.16308594 -0.37304688 0.39648438 0.10693359\n",
  237. " 0.22167969 0.21289062 -0.08984375 0.20703125 0.08935547 -0.08251953\n",
  238. " 0.05957031 0.10205078 -0.19238281 -0.09082031 0.4921875 0.03955078\n",
  239. " -0.07080078 -0.0019989 -0.23046875 0.25585938 0.08984375 -0.10644531\n",
  240. " 0.00105286 -0.05883789 0.05102539 -0.0291748 0.19335938 -0.14160156\n",
  241. " -0.33398438 0.08154297 -0.27539062 0.10058594 -0.10449219 -0.12353516\n",
  242. " -0.140625 0.03491211 -0.11767578 -0.1796875 -0.21484375 -0.23828125\n",
  243. " 0.08447266 -0.07519531 -0.25976562 -0.21289062 -0.22363281 -0.09716797\n",
  244. " 0.11572266 0.15429688 0.07373047 -0.27539062 0.14257812 -0.0201416\n",
  245. " 0.10009766 -0.19042969 -0.09375 0.14160156 0.17089844 0.3125\n",
  246. " -0.16699219 -0.08691406 -0.05004883 -0.24902344 -0.20800781 -0.09423828\n",
  247. " -0.12255859 -0.09472656 -0.390625 -0.06640625 -0.31640625 0.10986328\n",
  248. " -0.00156403 0.04345703 0.15625 -0.18945312 -0.03491211 0.03393555\n",
  249. " -0.14453125 0.01611328 -0.14160156 -0.02392578 0.01501465 0.07568359\n",
  250. " 0.10742188 0.12695312 0.10693359 -0.01184082 -0.24023438 0.0291748\n",
  251. " 0.16210938 0.19921875 -0.28125 0.16699219 -0.11621094 -0.25585938\n",
  252. " 0.38671875 -0.06640625 -0.4609375 -0.06176758 -0.14453125 -0.11621094\n",
  253. " 0.05688477 0.03588867 -0.10693359 0.18847656 -0.16699219 -0.01794434\n",
  254. " 0.10986328 -0.12353516 -0.16308594 -0.14453125 0.12890625 0.11523438\n",
  255. " 0.13671875 0.05688477 -0.08105469 -0.06152344 -0.06689453 0.27929688\n",
  256. " -0.19628906 0.07226562 0.12304688 -0.20996094 -0.22070312 0.21386719\n",
  257. " -0.1484375 -0.05932617 0.05224609 0.06445312 -0.02636719 0.13183594\n",
  258. " 0.19433594 0.27148438 0.18652344 0.140625 0.06542969 -0.14453125\n",
  259. " 0.05029297 0.08837891 0.12255859 0.26757812 0.0534668 -0.32226562\n",
  260. " -0.20703125 0.18164062 0.04418945 -0.22167969 -0.13769531 -0.04174805\n",
  261. " -0.00286865 0.04077148 0.07275391 -0.08300781 0.08398438 -0.3359375\n",
  262. " -0.40039062 0.01757812 -0.18652344 -0.0480957 -0.19140625 0.10107422\n",
  263. " 0.09277344 -0.30664062 -0.19921875 -0.0168457 0.12207031 0.14648438\n",
  264. " -0.12890625 -0.23535156 -0.05371094 -0.06640625 0.06884766 -0.03637695\n",
  265. " 0.2109375 -0.06005859 0.19335938 0.05151367 -0.05322266 0.02893066\n",
  266. " -0.27539062 0.08447266 0.328125 0.01818848 0.01495361 0.04711914\n",
  267. " 0.37695312 -0.21875 -0.03393555 0.01116943 0.36914062 0.02160645\n",
  268. " 0.03466797 0.07275391 0.16015625 -0.16503906 -0.296875 0.15039062\n",
  269. " -0.29101562 0.13964844 0.00448608 0.171875 -0.21972656 0.09326172\n",
  270. " -0.19042969 0.01599121 -0.09228516 0.15722656 -0.14160156 -0.0534668\n",
  271. " 0.03613281 0.23632812 -0.15136719 -0.00689697 -0.27148438 -0.07128906\n",
  272. " -0.16503906 0.18457031 -0.08398438 0.18554688 0.11669922 0.02758789\n",
  273. " -0.04760742 0.17871094 0.06542969 -0.03540039 0.22949219 0.02697754\n",
  274. " -0.09765625 0.26953125 0.08349609 -0.13085938 -0.10107422 -0.00738525\n",
  275. " 0.07128906 0.14941406 -0.20605469 0.18066406 -0.15820312 0.05932617\n",
  276. " 0.28710938 -0.04663086 0.15136719 0.4921875 -0.27539062 0.05615234]\n"
  277. ]
  278. }
  279. ],
  280. "source": [
  281. "print(model[\"cat\"].shape)\n",
  282. "print(model[\"cat\"])"
  283. ]
  284. },
  285. {
  286. "cell_type": "code",
  287. "execution_count": 17,
  288. "metadata": {},
  289. "outputs": [
  290. {
  291. "name": "stdout",
  292. "output_type": "stream",
  293. "text": [
  294. "[[1. 0.76094574 0.17324439]\n",
  295. " [0.76094574 0.99999994 0.12194333]\n",
  296. " [0.17324439 0.12194333 1. ]]\n"
  297. ]
  298. }
  299. ],
  300. "source": [
  301. "import numpy as np\n",
  302. "\n",
  303. "def createCorrelationMatrix(words):\n",
  304. " l = len(words)\n",
  305. " matrix = np.empty((l, l), np.float)\n",
  306. " \n",
  307. " for r in range(0, l):\n",
  308. " for c in range(0, l):\n",
  309. " matrix[r][c] = model.similarity(words[r], words[c])\n",
  310. " return matrix\n",
  311. "\n",
  312. "testMatrix = [\"cat\", \"dog\", \"computer\"]\n",
  313. "print(createCorrelationMatrix(testMatrix))"
  314. ]
  315. },
  316. {
  317. "cell_type": "code",
  318. "execution_count": 21,
  319. "metadata": {},
  320. "outputs": [
  321. {
  322. "data": {
  323. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQ8AAAD8CAYAAABpXiE9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAANfklEQVR4nO3dXYxd1XnG8f9TMBc1rgg1BGPMRySriFYKcUcOlKpy1RCBheRcoMpcBIQqjUBQJVK4sIJEriq1vYhUGoRrKSggRdALErBapymJokAuoBjXBoxLcCgSI1sxAWpwoaFO3l7MJh0NZzzjdfacc2z+P+no7L3XOnu9rLGf2Wd/4FQVknSyfmvcBUg6NRkekpoYHpKaGB6SmhgekpoYHpKanDnMh5OcC/wjcCnwGvDnVfX2gH6vAe8CvwKOV9XUMONKGr9hjzy2AT+sqvXAD7v1hfxpVV1pcEinh2HDYwvwYLf8IPCFIfcn6RSRYe4wTfJfVXXOnPW3q+oTA/r9J/A2UMA/VNWOE+xzGpgGWLly5R9efvnlzfWd9n793LgrmHgv/fu4K5hsHwDHq9Ly2UXDI8kPgAsGNN0NPLjE8Liwqg4lOR94AvjLqnpyseKmpqZq9+7di3X7+DrW9DP/WLly1bgrmGw/Bd5rDI9FT5hW1ecWakvy8yRrqupwkjXAkQX2cah7P5Lku8BGYNHwkDS5hj3nsRO4pVu+BXh8fockK5Os+nAZ+Dzw4pDjShqzYcPjr4Frk7wCXNutk+TCJLu6Pp8EfpJkH/BvwD9X1b8MOa6kMRvqPo+qehP4swHbDwGbu+VXgU8PM46kyeMdppKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6Smhgekpr0Eh5JrkvycpKDSbYNaE+Se7v255Ns6GNcSeMzdHgkOQO4D7geuAK4KckV87pdD6zvXtPA/cOOK2m8+jjy2AgcrKpXq+oD4BFgy7w+W4CHatbTwDlJ1vQwtqQx6SM81gKvz1mf6badbB9Jp5A+wiMDtlVDn9mOyXSS3Ul2v/HGG0MXJ2l59BEeM8C6OesXAYca+gBQVTuqaqqqps4777weypO0HPoIj2eB9UkuS3IWsBXYOa/PTuDm7qrLVcDRqjrcw9iSxuTMYXdQVceT3Al8HzgDeKCq9ie5rWvfDuwCNgMHgfeAW4cdV9J4DR0eAFW1i9mAmLtt+5zlAu7oYyxJk8E7TCU1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ16SU8klyX5OUkB5NsG9C+KcnRJHu71z19jCtpfM4cdgdJzgDuA64FZoBnk+ysqpfmdX2qqm4YdjxJk6GPI4+NwMGqerWqPgAeAbb0sF9JE2zoIw9gLfD6nPUZ4LMD+l2dZB9wCLirqvYP2lmSaWAa4OJ1wLH0UOJp6uwadwUT7xX883Mivxzis30ceQz66cz/U70HuKSqPg38PfDYQjurqh1VNVVVU+et7qE6Scuij/CYAdbNWb+I2aOL36iqd6rqWLe8C1iRxGiQTmF9hMezwPoklyU5C9gK7JzbIckFSdItb+zGfbOHsSWNydDnPKrqeJI7ge8DZwAPVNX+JLd17duBG4HbkxwH3ge2VpVf2KVTWCb57/DUhtTuJ8ddxQTzhOmiVsYTpifyP8CvqpomyTtMJTUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNeklPJI8kORIkhcXaE+Se5McTPJ8kg19jCtpfPo68vgWcN0J2q8H1nevaeD+nsaVNCa9hEdVPQm8dYIuW4CHatbTwDlJ1vQxtqTxGNU5j7XA63PWZ7ptH5FkOsnuJLvf+MVIapPUYFThkQHbalDHqtpRVVNVNXXe6mWuSlKzUYXHDLBuzvpFwKERjS1pGYwqPHYCN3dXXa4CjlbV4RGNLWkZnNnHTpI8DGwCVieZAb4GrACoqu3ALmAzcBB4D7i1j3EljU8v4VFVNy3SXsAdfYwlaTJ4h6mkJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmvYRHkgeSHEny4gLtm5IcTbK3e93Tx7iSxqeXf+ga+BbwDeChE/R5qqpu6Gk8SWPWy5FHVT0JvNXHviSdGvo68liKq5PsAw4Bd1XV/kGdkkwD0wArgCtXja7AU80rZNwlTLz/rhp3CRNtamqq+bOjCo89wCVVdSzJZuAxYP2gjlW1A9gB8NuJP3lpQo3kaktVvVNVx7rlXcCKJKtHMbak5TGS8EhyQZJ0yxu7cd8cxdiSlkcvX1uSPAxsAlYnmQG+xuwpC6pqO3AjcHuS48D7wNYqv4xKp7JewqOqblqk/RvMXsqVdJrwDlNJTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNhg6PJOuS/CjJgST7k3xpQJ8kuTfJwSTPJ9kw7LiSxquPf+j6OPCVqtqTZBXwXJInquqlOX2uB9Z3r88C93fvkk5RQx95VNXhqtrTLb8LHADWzuu2BXioZj0NnJNkzbBjSxqfXs95JLkU+AzwzLymtcDrc9Zn+GjASDqF9PG1BYAkZwOPAl+uqnfmNw/4SC2wn2lgGmBFX8VJ6l0vRx5JVjAbHN+uqu8M6DIDrJuzfhFwaNC+qmpHVU1V1VRvySapd31cbQnwTeBAVX19gW47gZu7qy5XAUer6vCwY0sanz5+uV8DfBF4IcnebttXgYsBqmo7sAvYDBwE3gNu7WFcSWM0dHhU1U8YfE5jbp8C7hh2LEmTwztMJTUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUZOjySrEvyoyQHkuxP8qUBfTYlOZpkb/e6Z9hxJY3XmT3s4zjwlarak2QV8FySJ6rqpXn9nqqqG3oYT9IEGPrIo6oOV9Webvld4ACwdtj9SppsfRx5/EaSS4HPAM8MaL46yT7gEHBXVe1fYB/TwHS3+st98GKfNQ5pNfCLcRcxh/UsIsmk1TRp9fxe6wdTVb1UkORs4MfAX1XVd+a1/Q7w66o6lmQz8HdVtX4J+9xdVVO9FNgD6zmxSasHJq+m06meXq62JFkBPAp8e35wAFTVO1V1rFveBazofiNIOkX1cbUlwDeBA1X19QX6XND1I8nGbtw3hx1b0vj0cc7jGuCLwAtJ9nbbvgpcDFBV24EbgduTHAfeB7bW0r4v7eihvj5Zz4lNWj0weTWdNvX0ds5D0seLd5hKamJ4SGoyMeGR5NwkTyR5pXv/xAL9XkvyQneb++5lqOO6JC8nOZhk24D2JLm3a38+yYa+a2ioaWS3/yd5IMmRJAPvvxnT/CxW00gfj1jiIxsjm6dle4SkqibiBfwtsK1b3gb8zQL9XgNWL1MNZwA/Az4FnAXsA66Y12cz8D0gwFXAM8s8L0upaRPwTyP6Of0JsAF4cYH2kc7PEmsa2fx0460BNnTLq4CfjvP
  324. "text/plain": [
  325. "<Figure size 432x288 with 1 Axes>"
  326. ]
  327. },
  328. "metadata": {
  329. "needs_background": "light"
  330. },
  331. "output_type": "display_data"
  332. }
  333. ],
  334. "source": [
  335. "def displayMap(a):\n",
  336. " plt.imshow(a, cmap='hot', interpolation='nearest')\n",
  337. " plt.show()\n",
  338. "\n",
  339. "displayMap(createCorrelationMatrix(testMatrix))"
  340. ]
  341. },
  342. {
  343. "cell_type": "code",
  344. "execution_count": null,
  345. "metadata": {},
  346. "outputs": [],
  347. "source": []
  348. },
  349. {
  350. "cell_type": "code",
  351. "execution_count": null,
  352. "metadata": {},
  353. "outputs": [],
  354. "source": []
  355. },
  356. {
  357. "cell_type": "code",
  358. "execution_count": 48,
  359. "metadata": {},
  360. "outputs": [
  361. {
  362. "name": "stdout",
  363. "output_type": "stream",
  364. "text": [
  365. "AxesImage(90,90;446.4x543.6)\n",
  366. "AxesImage(90,90;446.4x543.6)\n"
  367. ]
  368. },
  369. {
  370. "data": {
  371. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq8AAALICAYAAABRkBl/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde5xdZXXw8d+aTK7kBoTcI4SLQCI0QAIqtYIUjIiAooKgVkQjvkK1Wi+tb7Xeba0WENuglqK2r6BWBPxAUCgIKnKHmIRQIkFzIcQUIXeSzKz3j30STpKZIWeckz078/v6OR/O3s+zT9aZz0lcs856nh2ZiSRJklQFLWUHIEmSJO0qk1dJkiRVhsmrJEmSKsPkVZIkSZVh8ipJkqTKMHmVJElSZZi8SpIkqUsRcWVErIyIeZ2MR0RcFhGLImJuRBzdrFhMXiVJkvRCrgJmdjH+GuCQ2mMW8K/NCsTkVZIkSV3KzDuAp7uYcgbw7Sz8ChgZEeOaEUtrM15UkiRJXYtRQ5NNbWWHUVizcT6wse7M1zPz6w28wgRgSd3x0tq5J3sguu2YvEqSJJVhUxu8dHLZURR++sjGzJz+R7xCdHAu/4jX65RtA5IkSfpjLQUm1R1PBJY34w8yeZUkSdIf63rg7bVdB14KPJuZPd4yALYNSJIklSPo+Mv2XigivgucAIyKiKXAJ4H+AJk5G7gROBVYBKwHzm9WLCavkiRJ6lJmvuUFxhN43+6IxeRVkiSpLFGR0msvYs+rJEmSKsPkVZIkSZVh24AkSVJZ7BpomJVXSZIkVYbJqyRJkirDtgFJkqSyuNtAw6y8SpIkqTKsvEqSJJXFwmvDrLxKkiSpMkxeJUmSVBm2DUiSJJUhgBb7Bhpl5VWSJEmVYfIqSZKkyrBtQJIkqSx2DTTMyqskSZIqw+RVkiRJlWHbgCRJUinC28N2g5VXSZIkVYaVV0mSpLJYeG2YlVdJkiRVhsmrJEmSKsO2AUmSpDIEtg10g5VXSZIkVYbJqyRJkirDtgFJkqSyuM9rw6y8SpIkqTKsvEqSJJXFwmvDrLxKkiSpMkxeJUmSVBm2DUiSJJXFBVsNs/IqSZKkyjB5lSRJUmXYNiBJklQGbw/bLVZeJUmSVBkmr5IkSaoM2wYkSZLK4m4DDbPyKkmSpMqw8ipJklQWy4gN80cmSZKkyjB5lSRJUmXYNiBJklQW12s1zMqrJEmSKsPkVZIkSZVh24AkSVIZAvd57QYrr5IkSaoMK6+SJHUiIiIzs+w4tAez8NowK6+SJHUiMzMi/P9KqRex8ipJUp2ImAq8Gtgf+GJmPllySJLq+NukJEk1EfHnwBeAvYE/ADdExJk7zPGLXvWQKBZs9YZHhVh5lSQJiIghwKuAH2bmVbVzVwDHR8SpwMDMvNYeWKlcVl4lSSoMAP4ceLDu3IuAjwInAH8ZEd+LiGFb+2Dth5V2P//SSZJUaAWWAr+rO/dvwJWZ+ZHMPBF4Dhieme0RMSgz28EkVn+E6CWPCvEvmyRJQGauAu4GfhUR74mINwDPZea/RsSA2rQXAxMj4lXAv0XEibVr28uJWup7TF4lSarJzH8AXgvsR1GBvbt2flNEvBUYkpl3A2cCk4A3R8T8iJiy9TWswkrN5YItSZLYtotAZOYi4LMRMQw4NCJ+ANwPvAl4Z23x1mDgU5l5a0R8hWJR11HAfZn5aFnvQRVUsZX+vYG/HUqSRHFDglova9SO12TmScCtwBrgzcCjwNnAbcBdtUsPq40NAq6NiLPg+QqslVipZ1l5lSSpztatsCKiX2a2Zea/bh2LiE8B64G7M3N9RJwEjANOysynI+J4YHJt+ihgZd2iLm81q+1VcLFUb+Bvg5IkdSAz2zo4/Q3g34HHI6I/8FfAt2qJ6zSKmxv8sjb37oj4ZkQcWXs9E1epB1h5lSRpF9Qqp0spttOi1h5wUGaeVptyNrAgM38ZERcAo4GHgMsi4tcUiW5mZptVWKn7TF4lSdoFOyabmXl1RNwHEBGnU+w+8NXa8FeAN2bmTRFxO/DxzNxSq9a2mbhqGxdsNcy2AUl7pIjoV3YM2nNtXdQF/Kb2332ARzLz7oj4R2BxLXEdAGwA9ouIwcCnIuKf6/aNdUGX1CD/wkjao9QqW9S+mt0rIiaUHZP2PFsrp3X/vSozPxcR44GLgDl1098FLKTYjeA44DpgC0BEHFTb4WBwROzvL13SC7NtQN0SES3eUUa9Sa0SdhTw1tr+nH8FvBI4IyK+lpkPlxqg9lh1+8O2A/8P+B7wXERcDfweOAg4B/hb4B6KvWDbI+LPgO9HxCTg1cAhwPUU23Gpr7BroGEmr2pIRAzJzPUmruqFTgbeC9xHsaDmuxTVrseAxwEiojUzt5QWofZIteprRsTBwNTMPAEgIj5O8fn7LMW2WUcBH6RoI4Aimf0CRa/sD4C7MvNLuzd6qXpsG9AuiYjWiDgXmB0Rd0XEm3cY93dHlSYi9gJOBK7LzM9l5leA4cAQ4M6t82oLZvysqilqd+Y6oO74c5n5vcx8iqK+tgp4rNbS8jZgv8y8BJgA/BDYGBGX1b45AIp/W/3M7uFaesmjQqy8ale9AzgJ+E+Kr8H+LiLGAv+RmU9nZtpKoBINAf6U4itbIuIYivvSL6NoH9hr656cmbmgtCi1x8vMddDhDQmeAfYHvhER8yi+JXhPROwDvAG4MzO/WuuZ3at2C9p5mTl/N78FqderWK6tMtSqAFOAqzPzx5l5N/B6io24/29EXBkRI01cVaK9gE3A1sT0zcB8in03l9X24byRYqX3dlUsF8ioGTrYVut/M/NPKW5wcBzFbgQ/Bc4EBgO31KaeAPwdMIYi0b28/jNqFVYyedWuGQycSrHQYKs24LeZ+UHgYeDGiBgDEBGTI+KEiBi0+0NVH/Vb4OfALyLi+8CxwFiKRTQfrM2ZQ7HCe2hE9IuI0fD8XZTcrkjNtDUBzczbgfNqD4BjKBZwPVKruv4txbdcazPz5bU54yOitXb91lvXmsTuCYJin9fe8KgQ/7HWrngOeACYVnfuCuDSiLiJYnHMrcCgiJgCfBo4miLBlZouC58E3gT8lOJbgQHANXXTxlNUs9ZRVL+uiIiHI2JG7TX85kBNU/dLUr/MfA5YWUto+wHvriWurwVuoygWnBERd1Bss9UCTI+Ij0bEUbXX8yYH6rPsedULysxnI+JK4LMRcRrwv8BEikThBODjwGbgX4BXAKcDL6ZIHJaVEbP6lrqtipYAX4+IkcDBwBdqldgDgA8Ds4F3145fCfw58PaIeCfwIWCDSYGaaWsSW/fL0oUR8QpgJMW3WK/MzMcpktfTKP4tXQucBrwNOCYitgDv3tpfK+0OETETuJTiF65vZuYXdxjfG7iSYmu4jcA7M3NeM2IxedUuycxbgFsi4mSKKsChmbk2Ih4G5gJLKFbS7g98hqLqdWZEXJmZGzp7Xakn1G1VFLUq7DPAfRExB/gPis/odylaBx4EZtYWGT4AfAJ4JjPXlxW/+qZaFbYtM++sHR8ATIyIzwJfzcwf186fRlEw+OvM/H5EnGbiugepwDf2tW8JvkaxJeFS4N6IuH6HBbB/CzyUma+PiMNq809qRjwmr2pIZv40IgYCb6tVtPam+C3swxRfeY0D/jMz79r6D3OJ4aqPqe8HrCWxX46If60NbYiI1wNzM/OxWo/rSGAEMKv+utLegPqUHf99zMwnIuJMir1fb6ptT7gOmAH8JjO/X5v3490erPq6Y4FFtW8FqN2A4wyeXyQLxcLuLwBk5sKIOCAixtS2iutR9ryqYZn5XGa+FbgYuB/4EbCGomXgYYrK1k7/MEu7S10S2692U42t1f/7gQm1OxsdD3wFuK22WMbEVaWqfV6fycz3Amdk5kKK1paDKG5i4O4YaqZREXFf3WNW3dgEim9Yt1paO1fvYYpt34iIYym+iZ3YjECtvKrbMnMF8FGAiBhAcWvDOzNzY6mBSTX1v0BFxEuApyj217yQ4huDiRR9hFLpajcvaCme5tZE4UGgLTMf2TqntADVHL1npf+
  372. "text/plain": [
  373. "<Figure size 720x720 with 2 Axes>"
  374. ]
  375. },
  376. "metadata": {
  377. "needs_background": "light"
  378. },
  379. "output_type": "display_data"
  380. },
  381. {
  382. "data": {
  383. "text/plain": [
  384. "<Figure size 432x288 with 0 Axes>"
  385. ]
  386. },
  387. "metadata": {},
  388. "output_type": "display_data"
  389. }
  390. ],
  391. "source": [
  392. "from matplotlib import pyplot as plt\n",
  393. "import matplotlib.image as mpimg\n",
  394. "\n",
  395. "\n",
  396. "def displayMap(a):\n",
  397. " plt.imshow(a, cmap='hot', interpolation='nearest')\n",
  398. " plt.show()\n",
  399. " \n",
  400. " \n",
  401. " \n",
  402. "def heatmap(data, row_labels, col_labels, ax=None):\n",
  403. " \"\"\"\n",
  404. " Create a heatmap from a numpy array and two lists of labels.\n",
  405. "\n",
  406. " Parameters\n",
  407. " ----------\n",
  408. " data\n",
  409. " A 2D numpy array of shape (N, M).\n",
  410. " row_labels\n",
  411. " A list or array of length N with the labels for the rows.\n",
  412. " col_labels\n",
  413. " A list or array of length M with the labels for the columns.\n",
  414. " cbar_kw\n",
  415. " A dictionary with arguments to `matplotlib.Figure.colorbar`. Optional.\n",
  416. " cbarlabel\n",
  417. " The label for the colorbar. Optional.\n",
  418. " **kwargs\n",
  419. " All other arguments are forwarded to `imshow`.\n",
  420. " \"\"\"\n",
  421. " cbar_kw={}\n",
  422. " ax = plt.gca()\n",
  423. "\n",
  424. " im = ax.imshow(data, cmap=\"YlGn\")\n",
  425. "\n",
  426. " # Create colorbar\n",
  427. " cbar = ax.figure.colorbar(im, ax=ax, label=\"Correlation\")\n",
  428. " cbar.ax.set_ylabel(\"Correlation\", rotation=-90, va=\"bottom\")\n",
  429. "\n",
  430. " # We want to show all ticks...\n",
  431. " ax.set_xticks(np.arange(data.shape[1]))\n",
  432. " ax.set_yticks(np.arange(data.shape[0]))\n",
  433. " # ... and label them with the respective list entries.\n",
  434. " ax.set_xticklabels(col_labels)\n",
  435. " ax.set_yticklabels(row_labels)\n",
  436. "\n",
  437. " # Let the horizontal axes labeling appear on top.\n",
  438. " ax.tick_params(top=True, bottom=False,\n",
  439. " labeltop=True, labelbottom=False)\n",
  440. "\n",
  441. " # Rotate the tick labels and set their alignment.\n",
  442. " plt.setp(ax.get_xticklabels(), rotation=-30, ha=\"right\",\n",
  443. " rotation_mode=\"anchor\")\n",
  444. "\n",
  445. " # Turn spines off and create white grid.\n",
  446. " for edge, spine in ax.spines.items():\n",
  447. " spine.set_visible(False)\n",
  448. "\n",
  449. " ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)\n",
  450. " ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)\n",
  451. " ax.grid(which=\"minor\", color=\"w\", linestyle='-', linewidth=3)\n",
  452. " ax.tick_params(which=\"minor\", bottom=False, left=False)\n",
  453. "\n",
  454. " print(im)\n",
  455. " \n",
  456. " return im, cbar\n",
  457. "\n",
  458. "\n",
  459. "def annotate_heatmap(im, data=None,\n",
  460. " threshold=None, **textkw):\n",
  461. " \"\"\"\n",
  462. " A function to annotate a heatmap.\n",
  463. "\n",
  464. " Parameters\n",
  465. " ----------\n",
  466. " im\n",
  467. " The AxesImage to be labeled.\n",
  468. " data\n",
  469. " Data used to annotate. If None, the image's data is used. Optional.\n",
  470. " valfmt\n",
  471. " The format of the annotations inside the heatmap. This should either\n",
  472. " use the string format method, e.g. \"$ {x:.2f}\", or be a\n",
  473. " `matplotlib.ticker.Formatter`. Optional.\n",
  474. " textcolors\n",
  475. " A list or array of two color specifications. The first is used for\n",
  476. " values below a threshold, the second for those above. Optional.\n",
  477. " threshold\n",
  478. " Value in data units according to which the colors from textcolors are\n",
  479. " applied. If None (the default) uses the middle of the colormap as\n",
  480. " separation. Optional.\n",
  481. " **kwargs\n",
  482. " All other arguments are forwarded to each call to `text` used to create\n",
  483. " the text labels.\n",
  484. " \"\"\"\n",
  485. " valfmt=\"{x:.2f}\"\n",
  486. " textcolors=[\"black\", \"white\"]\n",
  487. " if not isinstance(data, (list, np.ndarray)):\n",
  488. " data = im.get_array()\n",
  489. "\n",
  490. " # Normalize the threshold to the images color range.\n",
  491. " if threshold is not None:\n",
  492. " threshold = im.norm(threshold)\n",
  493. " else:\n",
  494. " threshold = im.norm(data.max())/2.\n",
  495. "\n",
  496. " # Set default alignment to center, but allow it to be\n",
  497. " # overwritten by textkw.\n",
  498. " kw = dict(horizontalalignment=\"center\",\n",
  499. " verticalalignment=\"center\")\n",
  500. " kw.update(textkw)\n",
  501. "\n",
  502. " # Get the formatter in case a string is supplied\n",
  503. " if isinstance(valfmt, str):\n",
  504. " valfmt = matplotlib.ticker.StrMethodFormatter(valfmt)\n",
  505. "\n",
  506. " # Loop over the data and create a `Text` for each \"pixel\".\n",
  507. " # Change the text's color depending on the data.\n",
  508. " texts = []\n",
  509. " for i in range(data.shape[0]):\n",
  510. " for j in range(data.shape[1]):\n",
  511. " kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)])\n",
  512. " text = im.axes.text(j, i, valfmt(data[i, j], None))\n",
  513. " texts.append(text)\n",
  514. "\n",
  515. " return texts\n",
  516. "\n",
  517. "def plotWordCorrelations(words):\n",
  518. " fig, ax = plt.subplots(figsize=(10,10))\n",
  519. " \n",
  520. " matrix = createCorrelationMatrix(words)\n",
  521. "\n",
  522. " im, cbar = heatmap(matrix, words, words, ax=ax)\n",
  523. " \n",
  524. " print(im)\n",
  525. " texts = annotate_heatmap(im, valfmt=\"{x:.1f} t\")\n",
  526. "\n",
  527. " fig.tight_layout()\n",
  528. " plt.show()\n",
  529. " plt.savefig(str(len(words)) + '.png')\n",
  530. " \n",
  531. " \n",
  532. "plotWordCorrelations([\"cat\", \"dog\", \"computer\"])"
  533. ]
  534. },
  535. {
  536. "cell_type": "code",
  537. "execution_count": 49,
  538. "metadata": {},
  539. "outputs": [
  540. {
  541. "name": "stdout",
  542. "output_type": "stream",
  543. "text": [
  544. "AxesImage(90,90;446.4x543.6)\n",
  545. "AxesImage(90,90;446.4x543.6)\n"
  546. ]
  547. },
  548. {
  549. "data": {
  550. "image/png": "iVBORw0KGgoAAAANSUhEUgAAArAAAALICAYAAACHNcMaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdd3gVdfbH8fdJo4UOgRQURKTYQEBUsKCrstjQn72soiu6a29rXRf7KoqVVbHLumtBFywIWFAEQVEEEQREQCABAek17fz+mJuQQMAYSeZO+LyeJ4/3zsxNzh2HuWfOnO/3mrsjIiIiIhIVCWEHICIiIiLyWyiBFREREZFIUQIrIiIiIpGiBFZEREREIkUJrIiIiIhEihJYEREREYkUJbAiIiIiskNm9ryZLTWz77az3szsMTObY2bfmtkBlRmPElgRERER+TUvAr12sP6PQJvYTz/gycoMRgmsiIiIiOyQu48FVuxgk5OAlz0wEWhgZumVFU9SZf1iEREREdk+a5Lq5BaEHUZg7abpwKYSSwa7++Df8BsygYUlni+KLVu8E6LbhhJYERERkTDkFsBBrcKOIvDB95vcvcvv+A1WxjL/Hb9vh9RCICIiIiK/1yKgRYnnWUBOZf0xJbAiIiIi8nu9DfwpNhvBQcBqd6+U9gFQC4GIiIhIOIyyb7zHITP7L3AE0MTMFgH/AJIB3P0pYATQG5gDbAD6VmY8SmBFREREZIfc/axfWe/AZVUUjhJYERERkdBYREqwcUY9sCIiIiISKUpgRURERCRS1EIgIiIiEhZ1EFSIKrAiIiIiEilKYEVEREQkUtRCICIiIhIWzUJQIarAioiIiEikqAIrIiIiEhYVYCtEFVgRERERiRQlsCIiIiISKWohEBEREQmDAQnqIagIVWBFREREJFKUwIqIiIhIpKiFQERERCQs6iCoEFVgRURERCRSlMCKiIiISKSohUBEREQkFKavkq0gVWBFREREJFJUgRUREREJiwqwFaIKrIiIiIhEihJYEREREYkUtRCIiIiIhMFQC0EFqQIrIiIiIpGiBFZEREREIkUtBCIiIiJh0TywFaIKrIiIiIhEiiqwIiIiImFRAbZCVIEVERERkUhRAisiIiIikaIWAhEREZGwaBBXhagCKyIiIiKRogRWRERERCJFLQQiIiIiYdBXyVaYKrAiIiIiEilKYEVEREQkUtRCICIiIhIWzUJQIarAioiIiEikqAIrIiIiEhaVEitEu01EREREIkUJrIiIiIhEiloIRERERMKiMVwVogqsiIiIiESKElgRERERiRS1EIiIiIiEwdA8sBWkCqyIiIiIRIoqsCIiIhFnZonuXhB2HFIBKsBWiCqwIiIiEWVmtQGKklczqxVuRCJVQwmsiIhIxJhZkpmdDTxlZhPM7IzYqkvN7A4zqx9mfCKVTS0EIiIi0XMBcBTwCvALcK2ZdQaaAO+7++oQY5NyMw3iqiBVYEVERCLEzOoCHYBX3f1dd5/g7qcRfKZvAloVtRaIVFdKYEVERKKlFtAb+LJogZn1APYCUoBUYKaZHVDyRWamz3ypNtRCICIiEi2bgclAR2BxbNmZwEfAm+6+yMzqxNZPNrP67r7a3QvNLMXdc8MJW8qkDoIK0dWYiIhIhMT6W58H7jCzB83sOoKC1Jfuvii22bnAT7HHd5vZ3WbWsih5NbNBZlbfTA2YEk1KYEVERCLG3T909wOBsUAbYCrwPYCZDQSmuvtHZnYSwYAvAz4ysy6xXzE0VpX1qo9e5PdTC4GISCUxswR3Lww7Dqm+3P1tMxsJpLn7KjNrAVwIdIwN9joZuNfd7zOzvwOJZjYLOLjk79GxGiIVwStEFVgRkUoS6znUeVYqlbvnlmgduAwY5u7zgTOAOsBDse0KgWeA6e6+wsx2M7OuRet0rEqUqAIrIrITmdnewLHA7sA/3X1xbLnpdu3Oparhttz9JgAz2x04D3i8RN9rB4KK7G5mdjxwDHCImS0ELnf37Nh2PYGx+mraKmBoEFcF6WqriukKV6JEx+tvY2Z/AO4DGgIrgGFmdipAUfJqZonhRVg9mNnBUFxRlDK4+0/A/e4+tMTiJ4FHY4PA2gDN3b0L8BVwmJntaWb3A6e4e4H+/Us8UwW2iphZsrvnFd2m0YlX4pmZNXL3FTpeyy82cfyRwFvu/mJs2X+B9mZ2M/CTu/9HVa2KM7MawF+A48ysHvCIu/83ts5gy4WCgLuPKHoc+9rZA9z98Niid4EDzOxgd78ndmF1LXAdcEvs9YWx1+ocIHFHV1eVzAIHAA+Y2WAza6YTQeUzs5ZhxxBFZpZoZn2A18zsOTPLKPkhFnJ48S4F+APwDRS3DMwGRgP/A041s6FmVq9oX2oKo98shWB6qHsJBir1MLMjzGw3j9Fxul0zgN5mVtvMjnb3H4C3gHvMLD12YXUg8B7Q1cyuM7MU2NIfq+O1kpjFx0/E6B965TsauBVYSnACecbMepXcQLcUd65Y/9YYM2sQdiwRVAO4CHgcmAY8a2b7QOlqTHjhxbUkYBGwEIJKoJldDNwDnAhcA/wIJJeYUF5tBeVkZknuvpbgYqCnu08HrgYKgIfM7MXYhP0qEJTB3ae4+2cEvdl3mtmjQC6wFkg2s+7Ake5+UuxraV8AHjSzO82sgbsXqrot8UQfRJXIzFIJbim+4+73ufsjBL1x55rZAWbWFCDWaxS9y584FLvFeCEwKDalTKL2bfnE9pMDeUDr2PHaD2htZi+Y2TmgvsPtcfflwBfARDPra2YXEowIHwFkAe8DXYDmZrY/cF+sZxa1FexY7Ng8MFbVvgfoZGanAMlATizh+hZ4MtbKIdvh7t8D3YHVwCUELS8LCPpj7y6xaQ/gUoIhRiPNrG/J36MLWQmbDsDKVQc4iKBBHjPrDMwFvgNuAl4ws1Fm1kFXtjvNecRGgJvZSe5eUKLKpUR2x1q6+0bgz8DBZtYjNjXPRwS3bM8ws0fNrBaAmXUxs1tjc00K4O73E3xHfWZs0ejYhPNXEvQcfh+rHP6BYAT4BWb2TdGgJNmuBKAn8KWZ/RF4G+gDjCeovo4kuHuwyt03xG6Tdy16sZKt0mLV1NuB09z9JTPrCJi7PwzFxZcrgBvd/e/AAOCs2L/5Y4t+R1jxVzsWJz8Ro3/UlasOwWExM/b8dIJvSjkLyHb34wmqM3ds/ULdUvztzKwhcCPwFPAs0N/MzipaH7ulq/1aBjNLJkimniWown5N0C/3NMFXVv4f8CBQI5bkApwGNAIKzSzNzHbpQaGxfvcEd5/j7ncDHwInxnqJXwUOBZ4ws25AK4Lpjc4FngDOi9092OZ3VuV7iFexC9F7gNuBvwOpwAHAN+7eh2Bu0/OANbGXXEmwr/9iZu2UbJXN3TfHHuYAC8zs1djguPOAhKKEFjgMqA+0BAaa2UMlf48uECQMOugqSSxRygHeBMaZ2RCgK5BBcKV7TWzTkUB+0cAOM2sMuqVYQdcBM9z9dnefSnDLtpaZHW5mt5tZQ+3Xsrl7HkGVdRXBbfCaBINlOhGM+m4GvE7QXoCZ9QYaAMOADcBAoEWVBx5HYmOICouSTndf4O7tgH8TnGs/AeYAJxD0yr4Ve+kBQCN332xmdc3skBK3wU80s5OVyAbc/X13P4Qg6X+N4GILoB4wAXjfzHYDegETY+s+MbMTAMyslpkdVHSelYC7L3X34wj6XusCfwPuB7BgvtjawM2xKbn+BNQ0s2QzqxHr5S7qj1eBoCIS4uQnYnbpikkl+wNwOUFS9QlB5eBdYA+CE2+RDKA5QSP98wQJV2vgVncfXZUBR5kFk8dfDxwfe94EWA+kEVxE7E9wIXGnu7+23V+0i4pVDjcD18cGd2wkSAp+dvdfzOxlgqrXZ7FK60kErTAAY4BUd58XRuzxpuTArFjlcIyZfQEUEtz2zgRedPflZtYWOISgPaMdQZU7GciIVbnaEQy0GU1wPAvBBZeZDQQeMbO3CfbTf939UzN7nOCc+6i7r4xdDOwWe+mNBBdeTwG/hBB6XHP3UbEktL+7j47tuxO
  551. "text/plain": [
  552. "<Figure size 720x720 with 2 Axes>"
  553. ]
  554. },
  555. "metadata": {
  556. "needs_background": "light"
  557. },
  558. "output_type": "display_data"
  559. },
  560. {
  561. "data": {
  562. "text/plain": [
  563. "<Figure size 432x288 with 0 Axes>"
  564. ]
  565. },
  566. "metadata": {},
  567. "output_type": "display_data"
  568. }
  569. ],
  570. "source": [
  571. "plotWordCorrelations([\"good\", \"bad\", \"salty\", \"candy\", \"santa\", \"christmas\"])"
  572. ]
  573. },
  574. {
  575. "cell_type": "code",
  576. "execution_count": 11,
  577. "metadata": {},
  578. "outputs": [
  579. {
  580. "name": "stderr",
  581. "output_type": "stream",
  582. "text": [
  583. "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:11: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
  584. " # This is added back by InteractiveShellApp.init_path()\n",
  585. "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:12: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
  586. " if sys.path[0] == '':\n"
  587. ]
  588. },
  589. {
  590. "ename": "MemoryError",
  591. "evalue": "Unable to allocate array with shape (3000000, 300) and data type float64",
  592. "output_type": "error",
  593. "traceback": [
  594. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  595. "\u001b[0;31mMemoryError\u001b[0m Traceback (most recent call last)",
  596. "\u001b[0;32m<ipython-input-11-7b046372cab7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mx_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce_dimensions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  597. "\u001b[0;32m<ipython-input-11-7b046372cab7>\u001b[0m in \u001b[0;36mreduce_dimensions\u001b[0;34m(model)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mvectors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvectors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mtsne\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTSNE\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_dimensions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mvectors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtsne\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvectors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mx_vals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvectors\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  598. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 884\u001b[0m \u001b[0mEmbedding\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mtraining\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlow\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mdimensional\u001b[0m \u001b[0mspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 885\u001b[0m \"\"\"\n\u001b[0;32m--> 886\u001b[0;31m \u001b[0membedding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 887\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  599. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py\u001b[0m in \u001b[0;36m_fit\u001b[0;34m(self, X, skip_num_points)\u001b[0m\n\u001b[1;32m 751\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[0mt0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 753\u001b[0;31m \u001b[0mdistances_nn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mknn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkneighbors_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'distance'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 754\u001b[0m \u001b[0mduration\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mt0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  600. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36mkneighbors_graph\u001b[0;34m(self, X, n_neighbors, mode)\u001b[0m\n\u001b[1;32m 761\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'distance'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 762\u001b[0m A_data, A_ind = self.kneighbors(\n\u001b[0;32m--> 763\u001b[0;31m X, n_neighbors, return_distance=True)\n\u001b[0m\u001b[1;32m 764\u001b[0m \u001b[0mA_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  601. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36mkneighbors\u001b[0;34m(self, X, n_neighbors, return_distance)\u001b[0m\n\u001b[1;32m 661\u001b[0m delayed_query(\n\u001b[1;32m 662\u001b[0m self._tree, X[s], n_neighbors, return_distance)\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgen_even_slices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m )\n\u001b[1;32m 665\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  602. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1002\u001b[0m \u001b[0;31m# remaining jobs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  603. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 833\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 835\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 836\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  604. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 753\u001b[0m \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 754\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 755\u001b[0m \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  605. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 209\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 210\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  606. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 590\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 591\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  607. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m return [func(*args, **kwargs)\n\u001b[0;32m--> 256\u001b[0;31m for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m 257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  608. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m return [func(*args, **kwargs)\n\u001b[0;32m--> 256\u001b[0;31m for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m 257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  609. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36m_tree_query_parallel_helper\u001b[0;34m(tree, *args, **kwargs)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0munder\u001b[0m \u001b[0mPyPy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \"\"\"\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  610. "\u001b[0;32msklearn/neighbors/_binary_tree.pxi\u001b[0m in \u001b[0;36msklearn.neighbors._kd_tree.BinaryTree.query\u001b[0;34m()\u001b[0m\n",
  611. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 529\u001b[0m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcasting\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"unsafe\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 532\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
  612. "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/numpy/core/_asarray.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \"\"\"\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  613. "\u001b[0;31mMemoryError\u001b[0m: Unable to allocate array with shape (3000000, 300) and data type float64"
  614. ]
  615. }
  616. ],
  617. "source": [
  618. "from sklearn.decomposition import IncrementalPCA # inital reduction\n",
  619. "from sklearn.manifold import TSNE # final reduction\n",
  620. "import numpy as np # array handling\n",
  621. "\n",
  622. "\n",
  623. "def reduce_dimensions(model):\n",
  624. " num_dimensions = 2 # final num dimensions (2D, 3D, etc)\n",
  625. "\n",
  626. " vectors = [] # positions in vector space\n",
  627. " labels = [] # keep track of words to label our data again later\n",
  628. " for word in model.wv.vocab:\n",
  629. " vectors.append(model.wv[word])\n",
  630. " labels.append(word)\n",
  631. "\n",
  632. " # convert both lists into numpy vectors for reduction\n",
  633. " vectors = np.asarray(vectors)\n",
  634. " labels = np.asarray(labels)\n",
  635. "\n",
  636. " # reduce using t-SNE\n",
  637. " vectors = np.asarray(vectors)\n",
  638. " tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
  639. " vectors = tsne.fit_transform(vectors)\n",
  640. "\n",
  641. " x_vals = [v[0] for v in vectors]\n",
  642. " y_vals = [v[1] for v in vectors]\n",
  643. " return x_vals, y_vals, labels\n",
  644. "\n",
  645. "\n",
  646. "x_vals, y_vals, labels = reduce_dimensions(model)"
  647. ]
  648. },
  649. {
  650. "cell_type": "code",
  651. "execution_count": null,
  652. "metadata": {},
  653. "outputs": [],
  654. "source": []
  655. }
  656. ],
  657. "metadata": {
  658. "kernelspec": {
  659. "display_name": "Python 3",
  660. "language": "python",
  661. "name": "python3"
  662. },
  663. "language_info": {
  664. "codemirror_mode": {
  665. "name": "ipython",
  666. "version": 3
  667. },
  668. "file_extension": ".py",
  669. "mimetype": "text/x-python",
  670. "name": "python",
  671. "nbconvert_exporter": "python",
  672. "pygments_lexer": "ipython3",
  673. "version": "3.7.6"
  674. }
  675. },
  676. "nbformat": 4,
  677. "nbformat_minor": 4
  678. }