{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -r node2vec/requirements.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install --upgrade gensim" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Walk iteration:\n", "1 / 40\n", "2 / 40\n", "3 / 40\n", "4 / 40\n", "5 / 40\n", "6 / 40\n", "7 / 40\n", "8 / 40\n", "9 / 40\n", "10 / 40\n", "11 / 40\n", "12 / 40\n", "13 / 40\n", "14 / 40\n", "15 / 40\n", "16 / 40\n", "17 / 40\n", "18 / 40\n", "19 / 40\n", "20 / 40\n", "21 / 40\n", "22 / 40\n", "23 / 40\n", "24 / 40\n", "25 / 40\n", "26 / 40\n", "27 / 40\n", "28 / 40\n", "29 / 40\n", "30 / 40\n", "31 / 40\n", "32 / 40\n", "33 / 40\n", "34 / 40\n", "35 / 40\n", "36 / 40\n", "37 / 40\n", "38 / 40\n", "39 / 40\n", "40 / 40\n" ] } ], "source": [ "!python node2vec/src/main.py --input jrtechs.edgelist --output output/jrtechs2.emd --num-walks=40 --dimensions=50" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "39\n" ] } ], "source": [ "labels=[]\n", "vectors=[]\n", "\n", "with open(\"output/jrtechs2.emd\") as fp:\n", " for line in fp:\n", " l_list = list(map(float, line.split()))\n", " vectors.append(l_list[1::])\n", " labels.append(line.split()[0])\n", " \n", "print(len(labels))" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "name_map = {}\n", "with open(\"friendsMap.map\") as fp:\n", " for line in fp:\n", " name_map[line.split()[0]] = line.split()[1]" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'76561198188400721': 'jrtechs',\n", " '76561198049526995': 'Noosh',\n", " '76561198067517157': 'nweis76',\n", " '76561198131175960': 'Delta_∆',\n", " '76561198084464357': 'Ceta',\n", " '76561198085584420': 'PureMaths',\n", " '76561198068647768': 'Void',\n", " '76561198094968588': 'Dumcumpster',\n", " '76561198107069713': 'yankeeman1081',\n", " '76561198086854442': 'jspike397',\n", " '76561198273751201': 'Kyon',\n", " '76561198083124245': 'drichardson005',\n", " '76561198035606013': 'BK🐻',\n", " '76561198255835078': 'Zelazny',\n", " '76561198170096391': 'Greata',\n", " '76561198306786411': 'fazeboojujee',\n", " '76561198062501319': 'Cantankerous',\n", " '76561198036270560': 'Toaster',\n", " '76561198306443796': 'numstudequals1',\n", " '76561198133044936': 'Panda',\n", " '76561198095814450': 'Tbonedog',\n", " '76561198069739846': 'Alminikar',\n", " '76561198233398192': 'Saxophones',\n", " '76561198162654610': 'Raydan',\n", " '76561198047367972': 'Oberyn',\n", " '76561198057450983': 'TraceTheKitteh',\n", " '76561198176504246': 'shistthis',\n", " '76561198111538799': 'sergiozygmunt',\n", " '76561198853827591': 'theramendragon',\n", " '76561198055948417': 'Xaldin31',\n", " '76561198137304077': 'lilsar419',\n", " '76561198406334664': 'wywyit',\n", " '76561198098761042': 'wee',\n", " '76561198192510666': 'Rebel',\n", " '76561198066260593': 'NUT',\n", " '76561198099625445': 'DAW',\n", " '76561198017734545': 'Which',\n", " '76561198036176189': 'Ben',\n", " '76561198069739485': 'Rkames517',\n", " '76561198062300654': 'Rawls',\n", " '76561198068098265': 'Brendy',\n", " '76561198065642391': 'Therefore',\n", " '76561198121369685': 'DataFrogman'}" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "name_map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import IncrementalPCA # inital reduction\n", "from sklearn.manifold import TSNE # final reduction\n", "import numpy as np \n", "\n", "def reduce_dimensions(labels, vectors, num_dimensions=2):\n", " \n", " # convert both lists into numpy vectors for reduction\n", " vectors = np.asarray(vectors)\n", " labels = np.asarray(labels)\n", "\n", " # reduce using t-SNE\n", " vectors = np.asarray(vectors)\n", " tsne = TSNE(n_components=num_dimensions, random_state=0)\n", " vectors = tsne.fit_transform(vectors)\n", "\n", " x_vals = [v[0] for v in vectors]\n", " y_vals = [v[1] for v in vectors]\n", " return x_vals, y_vals, labels\n", "\n", "vectors\n", "x_vals, y_vals, labels = reduce_dimensions(labels, vectors)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import random\n", "\n", "def plot_with_matplotlib(x_vals, y_vals, labels, num_to_label):\n", " plt.figure(figsize=(5, 5))\n", " plt.scatter(x_vals, y_vals)\n", " plt.title(\"Embedding Space\")\n", " indices = list(range(len(labels)))\n", " selected_indices = random.sample(indices, num_to_label)\n", " for i in selected_indices:\n", " plt.annotate(name_map[labels[i]], (x_vals[i], y_vals[i]))\n", " plt.savefig('ex.png')\n", " \n", "plot_with_matplotlib(x_vals, y_vals, labels, 12)" ] } ], "metadata": { "kernelspec": { "display_name": "ml", "language": "python", "name": "ml" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }