|
|
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!pip install -r node2vec/requirements.txt"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "!pip install --upgrade gensim"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Walk iteration:\n",
- "1 / 40\n",
- "2 / 40\n",
- "3 / 40\n",
- "4 / 40\n",
- "5 / 40\n",
- "6 / 40\n",
- "7 / 40\n",
- "8 / 40\n",
- "9 / 40\n",
- "10 / 40\n",
- "11 / 40\n",
- "12 / 40\n",
- "13 / 40\n",
- "14 / 40\n",
- "15 / 40\n",
- "16 / 40\n",
- "17 / 40\n",
- "18 / 40\n",
- "19 / 40\n",
- "20 / 40\n",
- "21 / 40\n",
- "22 / 40\n",
- "23 / 40\n",
- "24 / 40\n",
- "25 / 40\n",
- "26 / 40\n",
- "27 / 40\n",
- "28 / 40\n",
- "29 / 40\n",
- "30 / 40\n",
- "31 / 40\n",
- "32 / 40\n",
- "33 / 40\n",
- "34 / 40\n",
- "35 / 40\n",
- "36 / 40\n",
- "37 / 40\n",
- "38 / 40\n",
- "39 / 40\n",
- "40 / 40\n"
- ]
- }
- ],
- "source": [
- "!python node2vec/src/main.py --input jrtechs.edgelist --output output/jrtechs2.emd --num-walks=40 --dimensions=50"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 72,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "39\n"
- ]
- }
- ],
- "source": [
- "labels=[]\n",
- "vectors=[]\n",
- "\n",
- "with open(\"output/jrtechs2.emd\") as fp:\n",
- " for line in fp:\n",
- " l_list = list(map(float, line.split()))\n",
- " vectors.append(l_list[1::])\n",
- " labels.append(line.split()[0])\n",
- " \n",
- "print(len(labels))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 73,
- "metadata": {},
- "outputs": [],
- "source": [
- "name_map = {}\n",
- "with open(\"friendsMap.map\") as fp:\n",
- " for line in fp:\n",
- " name_map[line.split()[0]] = line.split()[1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'76561198188400721': 'jrtechs',\n",
- " '76561198049526995': 'Noosh',\n",
- " '76561198067517157': 'nweis76',\n",
- " '76561198131175960': 'Delta_∆',\n",
- " '76561198084464357': 'Ceta',\n",
- " '76561198085584420': 'PureMaths',\n",
- " '76561198068647768': 'Void',\n",
- " '76561198094968588': 'Dumcumpster',\n",
- " '76561198107069713': 'yankeeman1081',\n",
- " '76561198086854442': 'jspike397',\n",
- " '76561198273751201': 'Kyon',\n",
- " '76561198083124245': 'drichardson005',\n",
- " '76561198035606013': 'BK🐻',\n",
- " '76561198255835078': 'Zelazny',\n",
- " '76561198170096391': 'Greata',\n",
- " '76561198306786411': 'fazeboojujee',\n",
- " '76561198062501319': 'Cantankerous',\n",
- " '76561198036270560': 'Toaster',\n",
- " '76561198306443796': 'numstudequals1',\n",
- " '76561198133044936': 'Panda',\n",
- " '76561198095814450': 'Tbonedog',\n",
- " '76561198069739846': 'Alminikar',\n",
- " '76561198233398192': 'Saxophones',\n",
- " '76561198162654610': 'Raydan',\n",
- " '76561198047367972': 'Oberyn',\n",
- " '76561198057450983': 'TraceTheKitteh',\n",
- " '76561198176504246': 'shistthis',\n",
- " '76561198111538799': 'sergiozygmunt',\n",
- " '76561198853827591': 'theramendragon',\n",
- " '76561198055948417': 'Xaldin31',\n",
- " '76561198137304077': 'lilsar419',\n",
- " '76561198406334664': 'wywyit',\n",
- " '76561198098761042': 'wee',\n",
- " '76561198192510666': 'Rebel',\n",
- " '76561198066260593': 'NUT',\n",
- " '76561198099625445': 'DAW',\n",
- " '76561198017734545': 'Which',\n",
- " '76561198036176189': 'Ben',\n",
- " '76561198069739485': 'Rkames517',\n",
- " '76561198062300654': 'Rawls',\n",
- " '76561198068098265': 'Brendy',\n",
- " '76561198065642391': 'Therefore',\n",
- " '76561198121369685': 'DataFrogman'}"
- ]
- },
- "execution_count": 63,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "name_map"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 75,
- "metadata": {},
- "outputs": [],
- "source": [
- "from sklearn.decomposition import IncrementalPCA # inital reduction\n",
- "from sklearn.manifold import TSNE # final reduction\n",
- "import numpy as np \n",
- "\n",
- "def reduce_dimensions(labels, vectors, num_dimensions=2):\n",
- " \n",
- " # convert both lists into numpy vectors for reduction\n",
- " vectors = np.asarray(vectors)\n",
- " labels = np.asarray(labels)\n",
- "\n",
- " # reduce using t-SNE\n",
- " vectors = np.asarray(vectors)\n",
- " tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
- " vectors = tsne.fit_transform(vectors)\n",
- "\n",
- " x_vals = [v[0] for v in vectors]\n",
- " y_vals = [v[1] for v in vectors]\n",
- " return x_vals, y_vals, labels\n",
- "\n",
- "vectors\n",
- "x_vals, y_vals, labels = reduce_dimensions(labels, vectors)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 78,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAE/CAYAAABRkiSsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3iUVfbA8e9JAQJBQldCSVR6yhBCAEOVpitKWZCSXUEEhNWfa0NAXUVEjWLBwoqggrgIKtIUEKRolCIkJGBoBiSoEektEErC/f0xk3ESQhmSycwk5/M88zhz33bfYXdObjuvGGNQSimlrpaPuyuglFLKu2jgUEop5RQNHEoppZyigUMppZRTNHAopZRyigYOpZRSTtHAoYqMiMwQkQlFdK7BIvLDZbZ/KyJDbe/jRGR5UVxXKXVlGjhKORFJF5EsEcl0eL3j7no5wxgzyxjT1RXnFpE2IrJWRI6LyBERWSMiLVxxLaW8hZ+7K6A8wp3GmBXuroSnEZHrgK+AkcBnQBmgLXDWnfVSyt20xaEuydZdtEZE3hCRYyLyi4jcYiv/TUQOiMigfIdVE5FvROSkiHwnIvUcztfItu2IiOwUkbsdtlUVkUUickJENgA35atLFxHZYfvL/x1A8tXzB4fPRkRGiEiard6TRURs23xF5DUROSQie0TkQdv+Bf0R1QDAGDPbGJNjjMkyxiw3xmzJ9/28Y6vXDhHp5FCPe0Vku+27+EVE7s93Tz1EJMV2z7tF5DZbeSUR+UBE9olIhohMEBHfq/13U8rVNHCoK2kJbAGqAp8Ac4AWwM3AP4B3RCTQYf844HmgGpACzAIQkQrAN7Zz1AD6A/8VkSa24yYDZ4AbgCG2F7ZjqwHzgKdt590NxF6h3t1t9YwA7ga62cqHAbcDFiAK6HmZc/wM5IjIRyJyu4hULmCflrb6VAOeBeaJSBXbtgO2elwH3Au8ISJRtnuKAWYCo4AgoB2QbjtuBpCN9TtuBnQFhl7hfpUqPsYYfZXiF9Yfq0zgmMNrmG3bYCDNYd9wwAA1HcoOAxbb+xnAHIdtgUAOUAfoB3yf79rvYf2x9QXOA40ctr0I/GB7fw+w3mGbAL8DQx3q+YPDdgO0cfj8GTDG9n4VcL/Dts62/f0u8f00tt3X71h/zBfl3r/tun8A4rD/BuCflzjXAuDfDvf+RgH71MTaFRbgUDYAWO3u/63oS1+5Lx3jUAA9zaXHOPY7vM8CMMbkL3NscfyW+8YYkykiR4BaQD2gpYgcc9jXD/gYqG57/5vDtr0O72vlO68REcd9C/Knw/vTDnXMc6587y9ijNmONUAgIo2A/wGTsP6YA2QYYxwzhe61XQMRuR1rYGyAtXVfHvjJtl8dYEkBl6wH+AP7bL1r2I690v0qVWw0cKiiVif3ja0LqwrWv8p/A74zxnTJf4Ct/z7bduwOW3Fdh1325TuvOH520j6gdkH1vRJjzA4RmQE4jlUEi4g4BI+6wCIRKQt8gbW1tNAYc15EFvDX2Mxv5BvHcSg/C1QzxmRfbd2UKk46xqGK2t9sU1jLYB3rWG+M+Q3r7KQGIvJPEfG3vVqISGNjTA7WMYxxIlLeNu7hOOi+GGgqIr1tg9gPAddfY/0+A/4tIsEiEgSMvtSOtsH8x0Sktu1zHawtjfUOu9UAHrLdT1+sXVtLsM7AKgscBLJtrQ/HKcMfAPeKSCcR8bHVp5ExZh+wHHhNRK6zbbtJRNpf4/0qVeQ0cCiALyXvOo75hTjXJ1i7Z44AzbEOoGOMOYn1h7M/1hbIn8DLWH9cAR7E2p30J9Yxhem5JzTGHAL6AvFYx1TqA2uusX7TsP4wbwGSsf7IZ2Mdi8nvJNbB7x9F5BTWgJEKPOawz4+2+hwCXgD6GGMO2+73IayB6igwEOv4SO49bcA2YA4cB77D2k0F1lZKGWCb7di5WCcNKOURJG/3rFKli60lMMUYU++KO1987GCsA/RtirxiSnkwbXGoUkVEAkTkbyLiJyLBWFtHhWlhKVXqaOBQpY0Az2HtAkoGtgPPuLVGSnkZ7apSSinlFG1xKKWUcooGDqWUUk7xmgWA1apVMyEhIe6uhlKqhElKSjpkjKnu7np4E68JHCEhISQmJrq7GkqpEkZE9l55L+VIu6qUUko5RQOHUkopp2jgUEop5RQNHOqqzJ8/H4vFkufl4+PD0qVLL3lMYGDgJbcppbyX1ywAjI6ONjo47jmmTp3KrFmzWL16NT4+Bf/9ERgYSGZmZjHXTCnniEiSMSba3fXwJtriUE77+eefGT9+PB9//DE+Pj5MnDiRFi1aEBERwbPPPnvR/pmZmXTq1ImoqCjCw8NZuHAhAFOmTLG3XkJDQ+nYsSNgDThPPfUUkZGRtGrViv3793Py5ElCQ0M5f/48ACdOnMjzWSlVfDRwKKecP3+egQMH8tprr1G3bl2WL19OWloaGzZsICUlhaSkJBISEvIcU65cOebPn8+mTZtYvXo1jz32GMYYRowYQUpKChs3bqR27do8+uijAJw6dYpWrVqxefNm2rVrx7Rp06hYsSIdOnRg8eLFAMyZM4fevXvj7+9f7N+BUqWd16zjUMVvQXIGE5ft5I9jWdQKCmBUt4as//RtmjZtSr9+/QBYvnw5y5cvp1mzZoC1dZGWlka7du3s5zHG8OSTT5KQkICPjw8ZGRns37+f66+3Povp3//+N7feeit33nknAGXKlKF79+4ANG/enG+++QaAoUOH8sorr9CzZ0+mT5/OtGnTiu27UEr9RQOHKtCC5AzGzvuJrPPW5xtlHMvioddnkf3dp+zcusW+nzGGsWPHcv/991/qVMyaNYuDBw+SlJSEv78/ISEhnDlzBoAZM2awd+9e3nnnHfv+/v7+5D5v29fXl+xs6xNUY2NjSU9P59tvvyUnJ4ewsLAiv2+l1JVpV5Uq0MRlO+1BAyDnTCZ/fPk61e54lIoVK9rLu3XrxocffmgfBM/IyODAgQN5znX8+HFq1KiBv78/q1evZu9e60LdpKQkXn31Vf73v/9dcoA9v3vuuYeBAwdy7733FvYWlVLXSAOHKtAfx7LyfM5MXsKF08fYMe+NPFNyjx49ysCBA2ndujXh4eH06dOHkydP5jk2Li6OxMREwsPDmTlzJo0aNQLgnXfe4ciRI3Ts2BGLxcLQoUOvWK+4uDiOHj3KgAEDiu5mlVJO0em4qkCx8avIyBc8AIKDAlgz5lY31Mhq7ty5LFy4kI8//thtdVAli07HdZ6OcagCjerWMM8YB0CAvy+jujV0W53+7//+j6VLl7JkyRK31UEppYFDXULPZsEAF82qyi13h7fffttt11ZK/UUDh7qkns2C3RoolFKeSQfHPcxvv/1GaGgoR44cAeDo0aOEhoaSnp5+yWMulRNq8ODBzJ07F7Cugdi2bdtlrz1lyhTCw8OxWCy0adPGvv/hw4fp2LEjgYGBPPjgg9dwV0qpkqRIAoeIfCgiB0Qk1aFsnIhkiEiK7fU3h21jRWSXiOwUkW5FUYeSok6dOowcOZIxY8YAMGbMGIYPH05hn374/vvv06RJk8vuM3DgQH766SdSUlJ44okn7Cu5y5Urx/PPP8+rr75aqDoopUqGompxzABuK6D8DWOMxfZaAiAiTYD+QFPbMf8VEd8iqkeJ8Mgjj7B+/XomTZrEDz/8wOOPP37JfE+OjDE8+OCDNGzYkM6dO+dZT9GhQwf7ExQLygUFcN1119n3P3XqlH0RXoUKFWjTpg3lypVz5W0rpbxEkQQOY0wCcOQqd+8BzDHGnDXG7AF2ATFFUY+Swt/fn4kTJ/LII48wadIk/P39L5nvydH8+fPZuXMn27ZtY+bMmaxdu7bA8xeUCyrX5MmTuemmm3jiiSd46623XHqfSinv5OoxjgdFZIutK6uyrSwY+M1hn99tZcrB0qVLueGGG0hNtfb+5eZ7ioiIoHPnzvZ8T44SEhIYMGAAvr6+1KpVi1tvLXi9Rf5cUI7jJw888AC7d+/m5ZdfZsKECa65OaWUV3PlrKp3gecBY/vva8AQZ04gIsOB4QB
- "text/plain": [
- "<Figure size 360x360 with 1 Axes>"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import random\n",
- "\n",
- "def plot_with_matplotlib(x_vals, y_vals, labels, num_to_label):\n",
- " plt.figure(figsize=(5, 5))\n",
- " plt.scatter(x_vals, y_vals)\n",
- " plt.title(\"Embedding Space\")\n",
- " indices = list(range(len(labels)))\n",
- " selected_indices = random.sample(indices, num_to_label)\n",
- " for i in selected_indices:\n",
- " plt.annotate(name_map[labels[i]], (x_vals[i], y_vals[i]))\n",
- " plt.savefig('ex.png')\n",
- " \n",
- "plot_with_matplotlib(x_vals, y_vals, labels, 12)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "ml",
- "language": "python",
- "name": "ml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
- }
|