|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "import gensim\n",
- "model = gensim.models.KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('hi', 0.654898464679718),\n",
- " ('goodbye', 0.639905571937561),\n",
- " ('howdy', 0.6310957074165344),\n",
- " ('goodnight', 0.5920578241348267),\n",
- " ('greeting', 0.5855878591537476),\n",
- " ('Hello', 0.5842196941375732),\n",
- " (\"g'day\", 0.5754077434539795),\n",
- " ('See_ya', 0.5688871145248413),\n",
- " ('ya_doin', 0.5643119812011719),\n",
- " ('greet', 0.5636603832244873)]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.most_similar(\"hello\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('coders', 0.6104331612586975),\n",
- " ('coder', 0.6063331365585327),\n",
- " ('Coding', 0.5804804563522339),\n",
- " ('formatting', 0.5671651363372803),\n",
- " ('soluble_receptors', 0.5576372146606445),\n",
- " ('ICD9', 0.5571348667144775),\n",
- " ('refactoring', 0.5495434999465942),\n",
- " ('database_schemas', 0.5372464656829834),\n",
- " ('recode', 0.534299373626709),\n",
- " ('XHTML_CSS', 0.5328801870346069)]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.most_similar(\"coding\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('cats', 0.8099379539489746),\n",
- " ('dog', 0.7609456777572632),\n",
- " ('kitten', 0.7464985251426697),\n",
- " ('feline', 0.7326233983039856),\n",
- " ('beagle', 0.7150583267211914),\n",
- " ('puppy', 0.7075453996658325),\n",
- " ('pup', 0.6934291124343872),\n",
- " ('pet', 0.6891531348228455),\n",
- " ('felines', 0.6755931377410889),\n",
- " ('chihuahua', 0.6709762215614319)]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model.most_similar(\"cat\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "hi globe \n"
- ]
- }
- ],
- "source": [
- "def transformSentence(sentence):\n",
- " outputSentence = \"\"\n",
- " \n",
- " for word in sentence.split(\" \"):\n",
- " try:\n",
- " outputSentence += model.most_similar(word)[0][0] + \" \"\n",
- " except Exception:\n",
- " outputSentence += word + \" \"\n",
- " return outputSentence\n",
- "\n",
- "print(transformSentence(\"hello world\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "looks Mom No hand \n"
- ]
- }
- ],
- "source": [
- "print(transformSentence(\"look mom no hands\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "This gen_eral concept of Clustering was to groups Data wtih similiar trait \n"
- ]
- }
- ],
- "source": [
- "print(transformSentence(\"The general idea of clustering is to group data with similar traits\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "This manager concept of clusters was to groups datasets wtih similiar traits. \n"
- ]
- }
- ],
- "source": [
- "def removeFromString(string, chars):\n",
- " for c in chars:\n",
- " string = string.replace(c, \"\")\n",
- " return string\n",
- "\n",
- "\n",
- "def transformSentenceWithHeuristic(sentence):\n",
- " outputSentence = \"\"\n",
- " \n",
- " for word in sentence.split(\" \"):\n",
- " try:\n",
- " changed = False\n",
- " for w, _ in model.most_similar(word):\n",
- " clean = removeFromString(w, [' ', '_']).lower()\n",
- " if clean not in word.lower() and \"_\" not in w:\n",
- " outputSentence += w + \" \"\n",
- " changed = True\n",
- " break\n",
- " outputSentence = outputSentence if changed else outputSentence + word + \" \"\n",
- " except Exception:\n",
- " outputSentence += word + \" \"\n",
- " return outputSentence\n",
- "print(transformSentenceWithHeuristic(\"The general idea of clustering is to group data with similar traits.\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Relax up and grabbing a drinks but that was day it I talking abut this hallucinogenic trips it was this 1981 film Fever Treatment. \n"
- ]
- }
- ],
- "source": [
- "print(transformSentenceWithHeuristic(\"Sit down and grab a drink because it is time that we talk about the LSD trip that is the 1981 movie Shock Treatment.\"))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(300,)\n",
- "[ 0.0123291 0.20410156 -0.28515625 0.21679688 0.11816406 0.08300781\n",
- " 0.04980469 -0.00952148 0.22070312 -0.12597656 0.08056641 -0.5859375\n",
- " -0.00445557 -0.296875 -0.01312256 -0.08349609 0.05053711 0.15136719\n",
- " -0.44921875 -0.0135498 0.21484375 -0.14746094 0.22460938 -0.125\n",
- " -0.09716797 0.24902344 -0.2890625 0.36523438 0.41210938 -0.0859375\n",
- " -0.07861328 -0.19726562 -0.09082031 -0.14160156 -0.10253906 0.13085938\n",
- " -0.00346375 0.07226562 0.04418945 0.34570312 0.07470703 -0.11230469\n",
- " 0.06738281 0.11230469 0.01977539 -0.12353516 0.20996094 -0.07226562\n",
- " -0.02783203 0.05541992 -0.33398438 0.08544922 0.34375 0.13964844\n",
- " 0.04931641 -0.13476562 0.16308594 -0.37304688 0.39648438 0.10693359\n",
- " 0.22167969 0.21289062 -0.08984375 0.20703125 0.08935547 -0.08251953\n",
- " 0.05957031 0.10205078 -0.19238281 -0.09082031 0.4921875 0.03955078\n",
- " -0.07080078 -0.0019989 -0.23046875 0.25585938 0.08984375 -0.10644531\n",
- " 0.00105286 -0.05883789 0.05102539 -0.0291748 0.19335938 -0.14160156\n",
- " -0.33398438 0.08154297 -0.27539062 0.10058594 -0.10449219 -0.12353516\n",
- " -0.140625 0.03491211 -0.11767578 -0.1796875 -0.21484375 -0.23828125\n",
- " 0.08447266 -0.07519531 -0.25976562 -0.21289062 -0.22363281 -0.09716797\n",
- " 0.11572266 0.15429688 0.07373047 -0.27539062 0.14257812 -0.0201416\n",
- " 0.10009766 -0.19042969 -0.09375 0.14160156 0.17089844 0.3125\n",
- " -0.16699219 -0.08691406 -0.05004883 -0.24902344 -0.20800781 -0.09423828\n",
- " -0.12255859 -0.09472656 -0.390625 -0.06640625 -0.31640625 0.10986328\n",
- " -0.00156403 0.04345703 0.15625 -0.18945312 -0.03491211 0.03393555\n",
- " -0.14453125 0.01611328 -0.14160156 -0.02392578 0.01501465 0.07568359\n",
- " 0.10742188 0.12695312 0.10693359 -0.01184082 -0.24023438 0.0291748\n",
- " 0.16210938 0.19921875 -0.28125 0.16699219 -0.11621094 -0.25585938\n",
- " 0.38671875 -0.06640625 -0.4609375 -0.06176758 -0.14453125 -0.11621094\n",
- " 0.05688477 0.03588867 -0.10693359 0.18847656 -0.16699219 -0.01794434\n",
- " 0.10986328 -0.12353516 -0.16308594 -0.14453125 0.12890625 0.11523438\n",
- " 0.13671875 0.05688477 -0.08105469 -0.06152344 -0.06689453 0.27929688\n",
- " -0.19628906 0.07226562 0.12304688 -0.20996094 -0.22070312 0.21386719\n",
- " -0.1484375 -0.05932617 0.05224609 0.06445312 -0.02636719 0.13183594\n",
- " 0.19433594 0.27148438 0.18652344 0.140625 0.06542969 -0.14453125\n",
- " 0.05029297 0.08837891 0.12255859 0.26757812 0.0534668 -0.32226562\n",
- " -0.20703125 0.18164062 0.04418945 -0.22167969 -0.13769531 -0.04174805\n",
- " -0.00286865 0.04077148 0.07275391 -0.08300781 0.08398438 -0.3359375\n",
- " -0.40039062 0.01757812 -0.18652344 -0.0480957 -0.19140625 0.10107422\n",
- " 0.09277344 -0.30664062 -0.19921875 -0.0168457 0.12207031 0.14648438\n",
- " -0.12890625 -0.23535156 -0.05371094 -0.06640625 0.06884766 -0.03637695\n",
- " 0.2109375 -0.06005859 0.19335938 0.05151367 -0.05322266 0.02893066\n",
- " -0.27539062 0.08447266 0.328125 0.01818848 0.01495361 0.04711914\n",
- " 0.37695312 -0.21875 -0.03393555 0.01116943 0.36914062 0.02160645\n",
- " 0.03466797 0.07275391 0.16015625 -0.16503906 -0.296875 0.15039062\n",
- " -0.29101562 0.13964844 0.00448608 0.171875 -0.21972656 0.09326172\n",
- " -0.19042969 0.01599121 -0.09228516 0.15722656 -0.14160156 -0.0534668\n",
- " 0.03613281 0.23632812 -0.15136719 -0.00689697 -0.27148438 -0.07128906\n",
- " -0.16503906 0.18457031 -0.08398438 0.18554688 0.11669922 0.02758789\n",
- " -0.04760742 0.17871094 0.06542969 -0.03540039 0.22949219 0.02697754\n",
- " -0.09765625 0.26953125 0.08349609 -0.13085938 -0.10107422 -0.00738525\n",
- " 0.07128906 0.14941406 -0.20605469 0.18066406 -0.15820312 0.05932617\n",
- " 0.28710938 -0.04663086 0.15136719 0.4921875 -0.27539062 0.05615234]\n"
- ]
- }
- ],
- "source": [
- "print(model[\"cat\"].shape)\n",
- "print(model[\"cat\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[1. 0.76094574 0.17324439]\n",
- " [0.76094574 0.99999994 0.12194333]\n",
- " [0.17324439 0.12194333 1. ]]\n"
- ]
- }
- ],
- "source": [
- "import numpy as np\n",
- "\n",
- "def createCorrelationMatrix(words):\n",
- " l = len(words)\n",
- " matrix = np.empty((l, l), np.float)\n",
- " \n",
- " for r in range(0, l):\n",
- " for c in range(0, l):\n",
- " matrix[r][c] = model.similarity(words[r], words[c])\n",
- " return matrix\n",
- "\n",
- "testMatrix = [\"cat\", \"dog\", \"computer\"]\n",
- "print(createCorrelationMatrix(testMatrix))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQ8AAAD8CAYAAABpXiE9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAANfklEQVR4nO3dXYxd1XnG8f9TMBc1rgg1BGPMRySriFYKcUcOlKpy1RCBheRcoMpcBIQqjUBQJVK4sIJEriq1vYhUGoRrKSggRdALErBapymJokAuoBjXBoxLcCgSI1sxAWpwoaFO3l7MJh0NZzzjdfacc2z+P+no7L3XOnu9rLGf2Wd/4FQVknSyfmvcBUg6NRkekpoYHpKaGB6SmhgekpoYHpKanDnMh5OcC/wjcCnwGvDnVfX2gH6vAe8CvwKOV9XUMONKGr9hjzy2AT+sqvXAD7v1hfxpVV1pcEinh2HDYwvwYLf8IPCFIfcn6RSRYe4wTfJfVXXOnPW3q+oTA/r9J/A2UMA/VNWOE+xzGpgGWLly5R9efvnlzfWd9n793LgrmHgv/fu4K5hsHwDHq9Ly2UXDI8kPgAsGNN0NPLjE8Liwqg4lOR94AvjLqnpyseKmpqZq9+7di3X7+DrW9DP/WLly1bgrmGw/Bd5rDI9FT5hW1ecWakvy8yRrqupwkjXAkQX2cah7P5Lku8BGYNHwkDS5hj3nsRO4pVu+BXh8fockK5Os+nAZ+Dzw4pDjShqzYcPjr4Frk7wCXNutk+TCJLu6Pp8EfpJkH/BvwD9X1b8MOa6kMRvqPo+qehP4swHbDwGbu+VXgU8PM46kyeMdppKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6SmhgekpoYHpKaGB6Smhgekpr0Eh5JrkvycpKDSbYNaE+Se7v255Ns6GNcSeMzdHgkOQO4D7geuAK4KckV87pdD6zvXtPA/cOOK2m8+jjy2AgcrKpXq+oD4BFgy7w+W4CHatbTwDlJ1vQwtqQx6SM81gKvz1mf6badbB9Jp5A+wiMDtlVDn9mOyXSS3Ul2v/HGG0MXJ2l59BEeM8C6OesXAYca+gBQVTuqaqqqps4777weypO0HPoIj2eB9UkuS3IWsBXYOa/PTuDm7qrLVcDRqjrcw9iSxuTMYXdQVceT3Al8HzgDeKCq9ie5rWvfDuwCNgMHgfeAW4cdV9J4DR0eAFW1i9mAmLtt+5zlAu7oYyxJk8E7TCU1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ16SU8klyX5OUkB5NsG9C+KcnRJHu71z19jCtpfM4cdgdJzgDuA64FZoBnk+ysqpfmdX2qqm4YdjxJk6GPI4+NwMGqerWqPgAeAbb0sF9JE2zoIw9gLfD6nPUZ4LMD+l2dZB9wCLirqvYP2lmSaWAa4OJ1wLH0UOJp6uwadwUT7xX883Mivxzis30ceQz66cz/U70HuKSqPg38PfDYQjurqh1VNVVVU+et7qE6Scuij/CYAdbNWb+I2aOL36iqd6rqWLe8C1iRxGiQTmF9hMezwPoklyU5C9gK7JzbIckFSdItb+zGfbOHsSWNydDnPKrqeJI7ge8DZwAPVNX+JLd17duBG4HbkxwH3ge2VpVf2KVTWCb57/DUhtTuJ8ddxQTzhOmiVsYTpifyP8CvqpomyTtMJTUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNeklPJI8kORIkhcXaE+Se5McTPJ8kg19jCtpfPo68vgWcN0J2q8H1nevaeD+nsaVNCa9hEdVPQm8dYIuW4CHatbTwDlJ1vQxtqTxGNU5j7XA63PWZ7ptH5FkOsnuJLvf+MVIapPUYFThkQHbalDHqtpRVVNVNXXe6mWuSlKzUYXHDLBuzvpFwKERjS1pGYwqPHYCN3dXXa4CjlbV4RGNLWkZnNnHTpI8DGwCVieZAb4GrACoqu3ALmAzcBB4D7i1j3EljU8v4VFVNy3SXsAdfYwlaTJ4h6mkJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmhoekJoaHpCaGh6QmvYRHkgeSHEny4gLtm5IcTbK3e93Tx7iSxqeXf+ga+BbwDeChE/R5qqpu6Gk8SWPWy5FHVT0JvNXHviSdGvo68liKq5PsAw4Bd1XV/kGdkkwD0wArgCtXja7AU80rZNwlTLz/rhp3CRNtamqq+bOjCo89wCVVdSzJZuAxYP2gjlW1A9gB8NuJP3lpQo3kaktVvVNVx7rlXcCKJKtHMbak5TGS8EhyQZJ0yxu7cd8cxdiSlkcvX1uSPAxsAlYnmQG+xuwpC6pqO3AjcHuS48D7wNYqv4xKp7JewqOqblqk/RvMXsqVdJrwDlNJTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNDA9JTQwPSU0MD0lNhg6PJOuS/CjJgST7k3xpQJ8kuTfJwSTPJ9kw7LiSxquPf+j6OPCVqtqTZBXwXJInquqlOX2uB9Z3r88C93fvkk5RQx95VNXhqtrTLb8LHADWzuu2BXioZj0NnJNkzbBjSxqfXs95JLkU+AzwzLymtcDrc9Zn+GjASDqF9PG1BYAkZwOPAl+uqnfmNw/4SC2wn2lgGmBFX8VJ6l0vRx5JVjAbHN+uqu8M6DIDrJuzfhFwaNC+qmpHVU1V1VRvySapd31cbQnwTeBAVX19gW47gZu7qy5XAUer6vCwY0sanz5+uV8DfBF4IcnebttXgYsBqmo7sAvYDBwE3gNu7WFcSWM0dHhU1U8YfE5jbp8C7hh2LEmTwztMJTUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUxPCQ1MTwkNTE8JDUZOjySrEvyoyQHkuxP8qUBfTYlOZpkb/e6Z9hxJY3XmT3s4zjwlarak2QV8FySJ6rqpXn9nqqqG3oYT9IEGPrIo6oOV9Webvld4ACwdtj9SppsfRx5/EaSS4HPAM8MaL46yT7gEHBXVe1fYB/TwHS3+st98GKfNQ5pNfCLcRcxh/UsIsmk1TRp9fxe6wdTVb1UkORs4MfAX1XVd+a1/Q7w66o6lmQz8HdVtX4J+9xdVVO9FNgD6zmxSasHJq+m06meXq62JFkBPAp8e35wAFTVO1V1rFveBazofiNIOkX1cbUlwDeBA1X19QX6XND1I8nGbtw3hx1b0vj0cc7jGuCLwAtJ9nbbvgpcDFBV24EbgduTHAfeB7bW0r4v7eihvj5Zz4lNWj0weTWdNvX0ds5D0seLd5hKamJ4SGoyMeGR5NwkTyR5pXv/xAL9XkvyQneb++5lqOO6JC8nOZhk24D2JLm3a38+yYa+a2ioaWS3/yd5IMmRJAPvvxnT/CxW00gfj1jiIxsjm6dle4SkqibiBfwtsK1b3gb8zQL9XgNWL1MNZwA/Az4FnAXsA66Y12cz8D0gwFXAM8s8L0upaRPwTyP6Of0JsAF4cYH2kc7PEmsa2fx0460BNnTLq4CfjvP
- "text/plain": [
- "<Figure size 432x288 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "def displayMap(a):\n",
- " plt.imshow(a, cmap='hot', interpolation='nearest')\n",
- " plt.show()\n",
- "\n",
- "displayMap(createCorrelationMatrix(testMatrix))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "AxesImage(90,90;446.4x543.6)\n",
- "AxesImage(90,90;446.4x543.6)\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq8AAALICAYAAABRkBl/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde5xdZXXw8d+aTK7kBoTcI4SLQCI0QAIqtYIUjIiAooKgVkQjvkK1Wi+tb7Xeba0WENuglqK2r6BWBPxAUCgIKnKHmIRQIkFzIcQUIXeSzKz3j30STpKZIWeckz078/v6OR/O3s+zT9aZz0lcs856nh2ZiSRJklQFLWUHIEmSJO0qk1dJkiRVhsmrJEmSKsPkVZIkSZVh8ipJkqTKMHmVJElSZZi8SpIkqUsRcWVErIyIeZ2MR0RcFhGLImJuRBzdrFhMXiVJkvRCrgJmdjH+GuCQ2mMW8K/NCsTkVZIkSV3KzDuAp7uYcgbw7Sz8ChgZEeOaEUtrM15UkiRJXYtRQ5NNbWWHUVizcT6wse7M1zPz6w28wgRgSd3x0tq5J3sguu2YvEqSJJVhUxu8dHLZURR++sjGzJz+R7xCdHAu/4jX65RtA5IkSfpjLQUm1R1PBJY34w8yeZUkSdIf63rg7bVdB14KPJuZPd4yALYNSJIklSPo+Mv2XigivgucAIyKiKXAJ4H+AJk5G7gROBVYBKwHzm9WLCavkiRJ6lJmvuUFxhN43+6IxeRVkiSpLFGR0msvYs+rJEmSKsPkVZIkSZVh24AkSVJZ7BpomJVXSZIkVYbJqyRJkirDtgFJkqSyuNtAw6y8SpIkqTKsvEqSJJXFwmvDrLxKkiSpMkxeJUmSVBm2DUiSJJUhgBb7Bhpl5VWSJEmVYfIqSZKkyrBtQJIkqSx2DTTMyqskSZIqw+RVkiRJlWHbgCRJUinC28N2g5VXSZIkVYaVV0mSpLJYeG2YlVdJkiRVhsmrJEmSKsO2AUmSpDIEtg10g5VXSZIkVYbJqyRJkirDtgFJkqSyuM9rw6y8SpIkqTKsvEqSJJXFwmvDrLxKkiSpMkxeJUmSVBm2DUiSJJXFBVsNs/IqSZKkyjB5lSRJUmXYNiBJklQGbw/bLVZeJUmSVBkmr5IkSaoM2wYkSZLK4m4DDbPyKkmSpMqw8ipJklQWy4gN80cmSZKkyjB5lSRJUmXYNiBJklQW12s1zMqrJEmSKsPkVZIkSZVh24AkSVIZAvd57QYrr5IkSaoMK6+SJHUiIiIzs+w4tAez8NowK6+SJHUiMzMi/P9KqRex8ipJUp2ImAq8Gtgf+GJmPllySJLq+NukJEk1EfHnwBeAvYE/ADdExJk7zPGLXvWQKBZs9YZHhVh5lSQJiIghwKuAH2bmVbVzVwDHR8SpwMDMvNYeWKlcVl4lSSoMAP4ceLDu3IuAjwInAH8ZEd+LiGFb+2Dth5V2P//SSZJUaAWWAr+rO/dvwJWZ+ZHMPBF4Dhieme0RMSgz28EkVn+E6CWPCvEvmyRJQGauAu4GfhUR74mINwDPZea/RsSA2rQXAxMj4lXAv0XEibVr28uJWup7TF4lSarJzH8AXgvsR1GBvbt2flNEvBUYkpl3A2cCk4A3R8T8iJiy9TWswkrN5YItSZLYtotAZOYi4LMRMQw4NCJ+ANwPvAl4Z23x1mDgU5l5a0R8hWJR11HAfZn5aFnvQRVUsZX+vYG/HUqSRHFDglova9SO12TmScCtwBrgzcCjwNnAbcBdtUsPq40NAq6NiLPg+QqslVipZ1l5lSSpztatsCKiX2a2Zea/bh2LiE8B64G7M3N9RJwEjANOysynI+J4YHJt+ihgZd2iLm81q+1VcLFUb+Bvg5IkdSAz2zo4/Q3g34HHI6I/8FfAt2qJ6zSKmxv8sjb37oj4ZkQcWXs9E1epB1h5lSRpF9Qqp0spttOi1h5wUGaeVptyNrAgM38ZERcAo4GHgMsi4tcUiW5mZptVWKn7TF4lSdoFOyabmXl1RNwHEBGnU+w+8NXa8FeAN2bmTRFxO/DxzNxSq9a2mbhqGxdsNcy2AUl7pIjoV3YM2nNtXdQF/Kb2332ARzLz7oj4R2BxLXEdAGwA9ouIwcCnIuKf6/aNdUGX1CD/wkjao9QqW9S+mt0rIiaUHZP2PFsrp3X/vSozPxcR44GLgDl1098FLKTYjeA44DpgC0BEHFTb4WBwROzvL13SC7NtQN0SES3eUUa9Sa0SdhTw1tr+nH8FvBI4IyK+lpkPlxqg9lh1+8O2A/8P+B7wXERcDfweOAg4B/hb4B6KvWDbI+LPgO9HxCTg1cAhwPUU23Gpr7BroGEmr2pIRAzJzPUmruqFTgbeC9xHsaDmuxTVrseAxwEiojUzt5QWofZIteprRsTBwNTMPAEgIj5O8fn7LMW2WUcBH6RoI4Aimf0CRa/sD4C7MvNLuzd6qXpsG9AuiYjWiDgXmB0Rd0XEm3cY93dHlSYi9gJOBK7LzM9l5leA4cAQ4M6t82oLZvysqilqd+Y6oO74c5n5vcx8iqK+tgp4rNbS8jZgv8y8BJgA/BDYGBGX1b45AIp/W/3M7uFaesmjQqy8ale9AzgJ+E+Kr8H+LiLGAv+RmU9nZtpKoBINAf6U4itbIuIYivvSL6NoH9hr656cmbmgtCi1x8vMddDhDQmeAfYHvhER8yi+JXhPROwDvAG4MzO/WuuZ3at2C9p5mTl/N78FqderWK6tMtSqAFOAqzPzx5l5N/B6io24/29EXBkRI01cVaK9gE3A1sT0zcB8in03l9X24byRYqX3dlUsF8ioGTrYVut/M/NPKW5wcBzFbgQ/Bc4EBgO31KaeAPwdMIYi0b28/jNqFVYyedWuGQycSrHQYKs24LeZ+UHgYeDGiBgDEBGTI+KEiBi0+0NVH/Vb4OfALyLi+8CxwFiKRTQfrM2ZQ7HCe2hE9IuI0fD8XZTcrkjNtDUBzczbgfNqD4BjKBZwPVKruv4txbdcazPz5bU54yOitXb91lvXmsTuCYJin9fe8KgQ/7HWrngOeACYVnfuCuDSiLiJYnHMrcCgiJgCfBo4miLBlZouC58E3gT8lOJbgQHANXXTxlNUs9ZRVL+uiIiHI2JG7TX85kBNU/dLUr/MfA5YWUto+wHvriWurwVuoygWnBERd1Bss9UCTI+Ij0bEUbXX8yYH6rPsedULysxnI+JK4LMRcRrwv8BEikThBODjwGbgX4BXAKcDL6ZIHJaVEbP6lrqtipYAX4+IkcDBwBdqldgDgA8Ds4F3145fCfw58PaIeCfwIWCDSYGaaWsSW/fL0oUR8QpgJMW3WK/MzMcpktfTKP4tXQucBrwNOCYitgDv3tpfK+0OETETuJTiF65vZuYXdxjfG7iSYmu4jcA7M3NeM2IxedUuycxbgFsi4mSKKsChmbk2Ih4G5gJLKFbS7g98hqLqdWZEXJmZGzp7Xakn1G1VFLUq7DPAfRExB/gPis/odylaBx4EZtYWGT4AfAJ4JjPXlxW/+qZaFbYtM++sHR8ATIyIzwJfzcwf186fRlEw+OvM/H5EnGbiugepwDf2tW8JvkaxJeFS4N6IuH6HBbB/CzyUma+PiMNq809qRjwmr2pIZv40IgYCb6tVtPam+C3swxRfeY0D/jMz79r6D3OJ4aqPqe8HrCWxX46If60NbYiI1wNzM/OxWo/rSGAEMKv+utLegPqUHf99zMwnIuJMir1fb6ptT7gOmAH8JjO/X5v3490erPq6Y4FFtW8FqN2A4wyeXyQLxcLuLwBk5sKIOCAixtS2iutR9ryqYZn5XGa+FbgYuB/4EbCGomXgYYrK1k7/MEu7S10S2692U42t1f/7gQm1OxsdD3wFuK22WMbEVaWqfV6fycz3Amdk5kKK1paDKG5i4O4YaqZREXFf3WNW3dgEim9Yt1paO1fvYYpt34iIYym+iZ3YjECtvKrbMnMF8FGAiBhAcWvDOzNzY6mBSTX1v0BFxEuApyj217yQ4huDiRR9hFLpajcvaCme5tZE4UGgLTMf2TqntADVHL1npf+
- "text/plain": [
- "<Figure size 720x720 with 2 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "<Figure size 432x288 with 0 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "from matplotlib import pyplot as plt\n",
- "import matplotlib.image as mpimg\n",
- "\n",
- "\n",
- "def displayMap(a):\n",
- " plt.imshow(a, cmap='hot', interpolation='nearest')\n",
- " plt.show()\n",
- " \n",
- " \n",
- " \n",
- "def heatmap(data, row_labels, col_labels, ax=None):\n",
- " \"\"\"\n",
- " Create a heatmap from a numpy array and two lists of labels.\n",
- "\n",
- " Parameters\n",
- " ----------\n",
- " data\n",
- " A 2D numpy array of shape (N, M).\n",
- " row_labels\n",
- " A list or array of length N with the labels for the rows.\n",
- " col_labels\n",
- " A list or array of length M with the labels for the columns.\n",
- " cbar_kw\n",
- " A dictionary with arguments to `matplotlib.Figure.colorbar`. Optional.\n",
- " cbarlabel\n",
- " The label for the colorbar. Optional.\n",
- " **kwargs\n",
- " All other arguments are forwarded to `imshow`.\n",
- " \"\"\"\n",
- " cbar_kw={}\n",
- " ax = plt.gca()\n",
- "\n",
- " im = ax.imshow(data, cmap=\"YlGn\")\n",
- "\n",
- " # Create colorbar\n",
- " cbar = ax.figure.colorbar(im, ax=ax, label=\"Correlation\")\n",
- " cbar.ax.set_ylabel(\"Correlation\", rotation=-90, va=\"bottom\")\n",
- "\n",
- " # We want to show all ticks...\n",
- " ax.set_xticks(np.arange(data.shape[1]))\n",
- " ax.set_yticks(np.arange(data.shape[0]))\n",
- " # ... and label them with the respective list entries.\n",
- " ax.set_xticklabels(col_labels)\n",
- " ax.set_yticklabels(row_labels)\n",
- "\n",
- " # Let the horizontal axes labeling appear on top.\n",
- " ax.tick_params(top=True, bottom=False,\n",
- " labeltop=True, labelbottom=False)\n",
- "\n",
- " # Rotate the tick labels and set their alignment.\n",
- " plt.setp(ax.get_xticklabels(), rotation=-30, ha=\"right\",\n",
- " rotation_mode=\"anchor\")\n",
- "\n",
- " # Turn spines off and create white grid.\n",
- " for edge, spine in ax.spines.items():\n",
- " spine.set_visible(False)\n",
- "\n",
- " ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)\n",
- " ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)\n",
- " ax.grid(which=\"minor\", color=\"w\", linestyle='-', linewidth=3)\n",
- " ax.tick_params(which=\"minor\", bottom=False, left=False)\n",
- "\n",
- " print(im)\n",
- " \n",
- " return im, cbar\n",
- "\n",
- "\n",
- "def annotate_heatmap(im, data=None,\n",
- " threshold=None, **textkw):\n",
- " \"\"\"\n",
- " A function to annotate a heatmap.\n",
- "\n",
- " Parameters\n",
- " ----------\n",
- " im\n",
- " The AxesImage to be labeled.\n",
- " data\n",
- " Data used to annotate. If None, the image's data is used. Optional.\n",
- " valfmt\n",
- " The format of the annotations inside the heatmap. This should either\n",
- " use the string format method, e.g. \"$ {x:.2f}\", or be a\n",
- " `matplotlib.ticker.Formatter`. Optional.\n",
- " textcolors\n",
- " A list or array of two color specifications. The first is used for\n",
- " values below a threshold, the second for those above. Optional.\n",
- " threshold\n",
- " Value in data units according to which the colors from textcolors are\n",
- " applied. If None (the default) uses the middle of the colormap as\n",
- " separation. Optional.\n",
- " **kwargs\n",
- " All other arguments are forwarded to each call to `text` used to create\n",
- " the text labels.\n",
- " \"\"\"\n",
- " valfmt=\"{x:.2f}\"\n",
- " textcolors=[\"black\", \"white\"]\n",
- " if not isinstance(data, (list, np.ndarray)):\n",
- " data = im.get_array()\n",
- "\n",
- " # Normalize the threshold to the images color range.\n",
- " if threshold is not None:\n",
- " threshold = im.norm(threshold)\n",
- " else:\n",
- " threshold = im.norm(data.max())/2.\n",
- "\n",
- " # Set default alignment to center, but allow it to be\n",
- " # overwritten by textkw.\n",
- " kw = dict(horizontalalignment=\"center\",\n",
- " verticalalignment=\"center\")\n",
- " kw.update(textkw)\n",
- "\n",
- " # Get the formatter in case a string is supplied\n",
- " if isinstance(valfmt, str):\n",
- " valfmt = matplotlib.ticker.StrMethodFormatter(valfmt)\n",
- "\n",
- " # Loop over the data and create a `Text` for each \"pixel\".\n",
- " # Change the text's color depending on the data.\n",
- " texts = []\n",
- " for i in range(data.shape[0]):\n",
- " for j in range(data.shape[1]):\n",
- " kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)])\n",
- " text = im.axes.text(j, i, valfmt(data[i, j], None))\n",
- " texts.append(text)\n",
- "\n",
- " return texts\n",
- "\n",
- "def plotWordCorrelations(words):\n",
- " fig, ax = plt.subplots(figsize=(10,10))\n",
- " \n",
- " matrix = createCorrelationMatrix(words)\n",
- "\n",
- " im, cbar = heatmap(matrix, words, words, ax=ax)\n",
- " \n",
- " print(im)\n",
- " texts = annotate_heatmap(im, valfmt=\"{x:.1f} t\")\n",
- "\n",
- " fig.tight_layout()\n",
- " plt.show()\n",
- " plt.savefig(str(len(words)) + '.png')\n",
- " \n",
- " \n",
- "plotWordCorrelations([\"cat\", \"dog\", \"computer\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "AxesImage(90,90;446.4x543.6)\n",
- "AxesImage(90,90;446.4x543.6)\n"
- ]
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAArAAAALICAYAAACHNcMaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdd3gVdfbH8fdJo4UOgRQURKTYQEBUsKCrstjQn72soiu6a29rXRf7KoqVVbHLumtBFywIWFAEQVEEEQREQCABAek17fz+mJuQQMAYSeZO+LyeJ4/3zsxNzh2HuWfOnO/3mrsjIiIiIhIVCWEHICIiIiLyWyiBFREREZFIUQIrIiIiIpGiBFZEREREIkUJrIiIiIhEihJYEREREYkUJbAiIiIiskNm9ryZLTWz77az3szsMTObY2bfmtkBlRmPElgRERER+TUvAr12sP6PQJvYTz/gycoMRgmsiIiIiOyQu48FVuxgk5OAlz0wEWhgZumVFU9SZf1iEREREdk+a5Lq5BaEHUZg7abpwKYSSwa7++Df8BsygYUlni+KLVu8E6LbhhJYERERkTDkFsBBrcKOIvDB95vcvcvv+A1WxjL/Hb9vh9RCICIiIiK/1yKgRYnnWUBOZf0xJbAiIiIi8nu9DfwpNhvBQcBqd6+U9gFQC4GIiIhIOIyyb7zHITP7L3AE0MTMFgH/AJIB3P0pYATQG5gDbAD6VmY8SmBFREREZIfc/axfWe/AZVUUjhJYERERkdBYREqwcUY9sCIiIiISKUpgRURERCRS1EIgIiIiEhZ1EFSIKrAiIiIiEilKYEVEREQkUtRCICIiIhIWzUJQIarAioiIiEikqAIrIiIiEhYVYCtEFVgRERERiRQlsCIiIiISKWohEBEREQmDAQnqIagIVWBFREREJFKUwIqIiIhIpKiFQERERCQs6iCoEFVgRURERCRSlMCKiIiISKSohUBEREQkFKavkq0gVWBFREREJFJUgRUREREJiwqwFaIKrIiIiIhEihJYEREREYkUtRCIiIiIhMFQC0EFqQIrIiIiIpGiBFZEREREIkUtBCIiIiJh0TywFaIKrIiIiIhEiiqwIiIiImFRAbZCVIEVERERkUhRAisiIiIikaIWAhEREZGwaBBXhagCKyIiIiKRogRWRERERCJFLQQiIiIiYdBXyVaYKrAiIiIiEilKYEVEREQkUtRCICIiIhIWzUJQIarAioiIiEikqAIrIiIiEhaVEitEu01EREREIkUJrIiIiIhEiloIRERERMKiMVwVogqsiIiIiESKElgRERERiRS1EIiIiIiEwdA8sBWkCqyIiIiIRIoqsCIiIhFnZonuXhB2HFIBKsBWiCqwIiIiEWVmtQGKklczqxVuRCJVQwmsiIhIxJhZkpmdDTxlZhPM7IzYqkvN7A4zqx9mfCKVTS0EIiIi0XMBcBTwCvALcK2ZdQaaAO+7++oQY5NyMw3iqiBVYEVERCLEzOoCHYBX3f1dd5/g7qcRfKZvAloVtRaIVFdKYEVERKKlFtAb+LJogZn1APYCUoBUYKaZHVDyRWamz3ypNtRCICIiEi2bgclAR2BxbNmZwEfAm+6+yMzqxNZPNrP67r7a3QvNLMXdc8MJW8qkDoIK0dWYiIhIhMT6W58H7jCzB83sOoKC1Jfuvii22bnAT7HHd5vZ3WbWsih5NbNBZlbfTA2YEk1KYEVERCLG3T909wOBsUAbYCrwPYCZDQSmuvtHZnYSwYAvAz4ysy6xXzE0VpX1qo9e5PdTC4GISCUxswR3Lww7Dqm+3P1tMxsJpLn7KjNrAVwIdIwN9joZuNfd7zOzvwOJZjYLOLjk79GxGiIVwStEFVgRkUoS6znUeVYqlbvnlmgduAwY5u7zgTOAOsBDse0KgWeA6e6+wsx2M7OuRet0rEqUqAIrIrITmdnewLHA7sA/3X1xbLnpdu3Oparhttz9JgAz2x04D3i8RN9rB4KK7G5mdjxwDHCImS0ELnf37Nh2PYGx+mraKmBoEFcF6WqriukKV6JEx+tvY2Z/AO4DGgIrgGFmdipAUfJqZonhRVg9mNnBUFxRlDK4+0/A/e4+tMTiJ4FHY4PA2gDN3b0L8BVwmJntaWb3A6e4e4H+/Us8UwW2iphZsrvnFd2m0YlX4pmZNXL3FTpeyy82cfyRwFvu/mJs2X+B9mZ2M/CTu/9HVa2KM7MawF+A48ysHvCIu/83ts5gy4WCgLuPKHoc+9rZA9z98Niid4EDzOxgd78ndmF1LXAdcEvs9YWx1+ocIHFHV1eVzAIHAA+Y2WAza6YTQeUzs5ZhxxBFZpZoZn2A18zsOTPLKPkhFnJ48S4F+APwDRS3DMwGRgP/A041s6FmVq9oX2oKo98shWB6qHsJBir1MLMjzGw3j9Fxul0zgN5mVtvMjnb3H4C3gHvMLD12YXUg8B7Q1cyuM7MU2NIfq+O1kpjFx0/E6B965TsauBVYSnACecbMepXcQLcUd65Y/9YYM2sQdiwRVAO4CHgcmAY8a2b7QOlqTHjhxbUkYBGwEIJKoJldDNwDnAhcA/wIJJeYUF5tBeVkZknuvpbgYqCnu08HrgYKgIfM7MXYhP0qEJTB3ae4+2cEvdl3mtmjQC6wFkg2s+7Ake5+UuxraV8AHjSzO82sgbsXqrot8UQfRJXIzFIJbim+4+73ufsjBL1x55rZAWbWFCDWaxS9y584FLvFeCEwKDalTKL2bfnE9pMDeUDr2PHaD2htZi+Y2TmgvsPtcfflwBfARDPra2YXEowIHwFkAe8DXYDmZrY/cF+sZxa1FexY7Ng8MFbVvgfoZGanAMlATizh+hZ4MtbKIdvh7t8D3YHVwCUELS8LCPpj7y6xaQ/gUoIhRiPNrG/J36MLWQmbDsDKVQc4iKBBHjPrDMwFvgNuAl4ws1Fm1kFXtjvNecRGgJvZSe5eUKLKpUR2x1q6+0bgz8DBZtYjNjXPRwS3bM8ws0fNrBaAmXUxs1tjc00K4O73E3xHfWZs0ejYhPNXEvQcfh+rHP6BYAT4BWb2TdGgJNmuBKAn8KWZ/RF4G+gDjCeovo4kuHuwyt03xG6Tdy16sZKt0mLV1NuB09z9JTPrCJi7PwzFxZcrgBvd/e/AAOCs2L/5Y4t+R1jxVzsWJz8Ro3/UlasOwWExM/b8dIJvSjkLyHb34wmqM3ds/ULdUvztzKwhcCPwFPAs0N/MzipaH7ulq/1aBjNLJkimniWown5N0C/3NMFXVv4f8CBQI5bkApwGNAIKzSzNzHbpQaGxfvcEd5/j7ncDHwInxnqJXwUOBZ4ws25AK4Lpjc4FngDOi9092OZ3VuV7iFexC9F7gNuBvwOpwAHAN+7eh2Bu0/OANbGXXEmwr/9iZu2UbJXN3TfHHuYAC8zs1djguPOAhKKEFjgMqA+0BAaa2UMlf48uECQMOugqSSxRygHeBMaZ2RCgK5BBcKV7TWzTkUB+0cAOM2sMuqVYQdcBM9z9dnefSnDLtpaZHW5mt5tZQ+3Xsrl7HkGVdRXBbfCaBINlOhGM+m4GvE7QXoCZ9QYaAMOADcBAoEWVBx5HYmOICouSTndf4O7tgH8TnGs/AeYAJxD0yr4Ve+kBQCN332xmdc3skBK3wU80s5OVyAbc/X13P4Qg6X+N4GILoB4wAXjfzHYDegETY+s+MbMTAMyslpkdVHSelYC7L3X34wj6XusCfwPuB7BgvtjawM2xKbn+BNQ0s2QzqxHr5S7qj1eBoCIS4uQnYnbpikkl+wNwOUFS9QlB5eBdYA+CE2+RDKA5QSP98wQJV2vgVncfXZUBR5kFk8dfDxwfe94EWA+kEVxE7E9wIXGnu7+23V+0i4pVDjcD18cGd2wkSAp+dvdfzOxlgqrXZ7FK60kErTAAY4BUd58XRuzxpuTArFjlcIyZfQEUEtz2zgRedPflZtYWOISgPaMdQZU7GciIVbnaEQy0GU1wPAvBBZeZDQQeMbO3CfbTf939UzN7nOCc+6i7r4xdDOwWe+mNBBdeTwG/hBB6XHP3UbEktL+7j47tuxO
- "text/plain": [
- "<Figure size 720x720 with 2 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "<Figure size 432x288 with 0 Axes>"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "plotWordCorrelations([\"good\", \"bad\", \"salty\", \"candy\", \"santa\", \"christmas\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:11: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
- " # This is added back by InteractiveShellApp.init_path()\n",
- "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:12: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
- " if sys.path[0] == '':\n"
- ]
- },
- {
- "ename": "MemoryError",
- "evalue": "Unable to allocate array with shape (3000000, 300) and data type float64",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mMemoryError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-11-7b046372cab7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mx_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_vals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduce_dimensions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m<ipython-input-11-7b046372cab7>\u001b[0m in \u001b[0;36mreduce_dimensions\u001b[0;34m(model)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mvectors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvectors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mtsne\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTSNE\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_dimensions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mvectors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtsne\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvectors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mx_vals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvectors\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 884\u001b[0m \u001b[0mEmbedding\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mtraining\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlow\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mdimensional\u001b[0m \u001b[0mspace\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 885\u001b[0m \"\"\"\n\u001b[0;32m--> 886\u001b[0;31m \u001b[0membedding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 887\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membedding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py\u001b[0m in \u001b[0;36m_fit\u001b[0;34m(self, X, skip_num_points)\u001b[0m\n\u001b[1;32m 751\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[0mt0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 753\u001b[0;31m \u001b[0mdistances_nn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mknn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkneighbors_graph\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'distance'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 754\u001b[0m \u001b[0mduration\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mt0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 755\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36mkneighbors_graph\u001b[0;34m(self, X, n_neighbors, mode)\u001b[0m\n\u001b[1;32m 761\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'distance'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 762\u001b[0m A_data, A_ind = self.kneighbors(\n\u001b[0;32m--> 763\u001b[0;31m X, n_neighbors, return_distance=True)\n\u001b[0m\u001b[1;32m 764\u001b[0m \u001b[0mA_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 765\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36mkneighbors\u001b[0;34m(self, X, n_neighbors, return_distance)\u001b[0m\n\u001b[1;32m 661\u001b[0m delayed_query(\n\u001b[1;32m 662\u001b[0m self._tree, X[s], n_neighbors, return_distance)\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgen_even_slices\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m )\n\u001b[1;32m 665\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1002\u001b[0m \u001b[0;31m# remaining jobs.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1003\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_one_batch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1005\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_original_iterator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36mdispatch_one_batch\u001b[0;34m(self, iterator)\u001b[0m\n\u001b[1;32m 833\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 834\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 835\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtasks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 836\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 837\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m_dispatch\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 752\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 753\u001b[0m \u001b[0mjob_idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 754\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 755\u001b[0m \u001b[0;31m# A job can complete so quickly than its callback is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[0;31m# called before we get here, causing self._jobs to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mapply_async\u001b[0;34m(self, func, callback)\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_async\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[0;34m\"\"\"Schedule a func to be run\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 209\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateResult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 210\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 211\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, batch)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 590\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 591\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m return [func(*args, **kwargs)\n\u001b[0;32m--> 256\u001b[0;31m for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m 257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/joblib/parallel.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mparallel_backend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_n_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 255\u001b[0m return [func(*args, **kwargs)\n\u001b[0;32m--> 256\u001b[0;31m for func, args, kwargs in self.items]\n\u001b[0m\u001b[1;32m 257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/neighbors/_base.py\u001b[0m in \u001b[0;36m_tree_query_parallel_helper\u001b[0;34m(tree, *args, **kwargs)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0munder\u001b[0m \u001b[0mPyPy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 489\u001b[0m \"\"\"\n\u001b[0;32m--> 490\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 491\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32msklearn/neighbors/_binary_tree.pxi\u001b[0m in \u001b[0;36msklearn.neighbors._kd_tree.BinaryTree.query\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m 529\u001b[0m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcasting\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"unsafe\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 531\u001b[0;31m \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 532\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m raise ValueError(\"Complex data not supported\\n\"\n",
- "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/numpy/core/_asarray.py\u001b[0m in \u001b[0;36masarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m \"\"\"\n\u001b[0;32m---> 85\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 86\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mMemoryError\u001b[0m: Unable to allocate array with shape (3000000, 300) and data type float64"
- ]
- }
- ],
- "source": [
- "from sklearn.decomposition import IncrementalPCA # inital reduction\n",
- "from sklearn.manifold import TSNE # final reduction\n",
- "import numpy as np # array handling\n",
- "\n",
- "\n",
- "def reduce_dimensions(model):\n",
- " num_dimensions = 2 # final num dimensions (2D, 3D, etc)\n",
- "\n",
- " vectors = [] # positions in vector space\n",
- " labels = [] # keep track of words to label our data again later\n",
- " for word in model.wv.vocab:\n",
- " vectors.append(model.wv[word])\n",
- " labels.append(word)\n",
- "\n",
- " # convert both lists into numpy vectors for reduction\n",
- " vectors = np.asarray(vectors)\n",
- " labels = np.asarray(labels)\n",
- "\n",
- " # reduce using t-SNE\n",
- " vectors = np.asarray(vectors)\n",
- " tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
- " vectors = tsne.fit_transform(vectors)\n",
- "\n",
- " x_vals = [v[0] for v in vectors]\n",
- " y_vals = [v[1] for v in vectors]\n",
- " return x_vals, y_vals, labels\n",
- "\n",
- "\n",
- "x_vals, y_vals, labels = reduce_dimensions(model)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
- }
|