Repository where I mostly put random python scripts.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
30 KiB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": null,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "!pip install -r node2vec/requirements.txt"
  10. ]
  11. },
  12. {
  13. "cell_type": "code",
  14. "execution_count": null,
  15. "metadata": {},
  16. "outputs": [],
  17. "source": [
  18. "!pip install --upgrade gensim"
  19. ]
  20. },
  21. {
  22. "cell_type": "code",
  23. "execution_count": null,
  24. "metadata": {},
  25. "outputs": [],
  26. "source": []
  27. },
  28. {
  29. "cell_type": "code",
  30. "execution_count": 60,
  31. "metadata": {},
  32. "outputs": [
  33. {
  34. "name": "stdout",
  35. "output_type": "stream",
  36. "text": [
  37. "Walk iteration:\n",
  38. "1 / 40\n",
  39. "2 / 40\n",
  40. "3 / 40\n",
  41. "4 / 40\n",
  42. "5 / 40\n",
  43. "6 / 40\n",
  44. "7 / 40\n",
  45. "8 / 40\n",
  46. "9 / 40\n",
  47. "10 / 40\n",
  48. "11 / 40\n",
  49. "12 / 40\n",
  50. "13 / 40\n",
  51. "14 / 40\n",
  52. "15 / 40\n",
  53. "16 / 40\n",
  54. "17 / 40\n",
  55. "18 / 40\n",
  56. "19 / 40\n",
  57. "20 / 40\n",
  58. "21 / 40\n",
  59. "22 / 40\n",
  60. "23 / 40\n",
  61. "24 / 40\n",
  62. "25 / 40\n",
  63. "26 / 40\n",
  64. "27 / 40\n",
  65. "28 / 40\n",
  66. "29 / 40\n",
  67. "30 / 40\n",
  68. "31 / 40\n",
  69. "32 / 40\n",
  70. "33 / 40\n",
  71. "34 / 40\n",
  72. "35 / 40\n",
  73. "36 / 40\n",
  74. "37 / 40\n",
  75. "38 / 40\n",
  76. "39 / 40\n",
  77. "40 / 40\n"
  78. ]
  79. }
  80. ],
  81. "source": [
  82. "!python node2vec/src/main.py --input jrtechs.edgelist --output output/jrtechs2.emd --num-walks=40 --dimensions=50"
  83. ]
  84. },
  85. {
  86. "cell_type": "code",
  87. "execution_count": 72,
  88. "metadata": {},
  89. "outputs": [
  90. {
  91. "name": "stdout",
  92. "output_type": "stream",
  93. "text": [
  94. "39\n"
  95. ]
  96. }
  97. ],
  98. "source": [
  99. "labels=[]\n",
  100. "vectors=[]\n",
  101. "\n",
  102. "with open(\"output/jrtechs2.emd\") as fp:\n",
  103. " for line in fp:\n",
  104. " l_list = list(map(float, line.split()))\n",
  105. " vectors.append(l_list[1::])\n",
  106. " labels.append(line.split()[0])\n",
  107. " \n",
  108. "print(len(labels))"
  109. ]
  110. },
  111. {
  112. "cell_type": "code",
  113. "execution_count": 73,
  114. "metadata": {},
  115. "outputs": [],
  116. "source": [
  117. "name_map = {}\n",
  118. "with open(\"friendsMap.map\") as fp:\n",
  119. " for line in fp:\n",
  120. " name_map[line.split()[0]] = line.split()[1]"
  121. ]
  122. },
  123. {
  124. "cell_type": "code",
  125. "execution_count": 63,
  126. "metadata": {},
  127. "outputs": [
  128. {
  129. "data": {
  130. "text/plain": [
  131. "{'76561198188400721': 'jrtechs',\n",
  132. " '76561198049526995': 'Noosh',\n",
  133. " '76561198067517157': 'nweis76',\n",
  134. " '76561198131175960': 'Delta_∆',\n",
  135. " '76561198084464357': 'Ceta',\n",
  136. " '76561198085584420': 'PureMaths',\n",
  137. " '76561198068647768': 'Void',\n",
  138. " '76561198094968588': 'Dumcumpster',\n",
  139. " '76561198107069713': 'yankeeman1081',\n",
  140. " '76561198086854442': 'jspike397',\n",
  141. " '76561198273751201': 'Kyon',\n",
  142. " '76561198083124245': 'drichardson005',\n",
  143. " '76561198035606013': 'BK🐻',\n",
  144. " '76561198255835078': 'Zelazny',\n",
  145. " '76561198170096391': 'Greata',\n",
  146. " '76561198306786411': 'fazeboojujee',\n",
  147. " '76561198062501319': 'Cantankerous',\n",
  148. " '76561198036270560': 'Toaster',\n",
  149. " '76561198306443796': 'numstudequals1',\n",
  150. " '76561198133044936': 'Panda',\n",
  151. " '76561198095814450': 'Tbonedog',\n",
  152. " '76561198069739846': 'Alminikar',\n",
  153. " '76561198233398192': 'Saxophones',\n",
  154. " '76561198162654610': 'Raydan',\n",
  155. " '76561198047367972': 'Oberyn',\n",
  156. " '76561198057450983': 'TraceTheKitteh',\n",
  157. " '76561198176504246': 'shistthis',\n",
  158. " '76561198111538799': 'sergiozygmunt',\n",
  159. " '76561198853827591': 'theramendragon',\n",
  160. " '76561198055948417': 'Xaldin31',\n",
  161. " '76561198137304077': 'lilsar419',\n",
  162. " '76561198406334664': 'wywyit',\n",
  163. " '76561198098761042': 'wee',\n",
  164. " '76561198192510666': 'Rebel',\n",
  165. " '76561198066260593': 'NUT',\n",
  166. " '76561198099625445': 'DAW',\n",
  167. " '76561198017734545': 'Which',\n",
  168. " '76561198036176189': 'Ben',\n",
  169. " '76561198069739485': 'Rkames517',\n",
  170. " '76561198062300654': 'Rawls',\n",
  171. " '76561198068098265': 'Brendy',\n",
  172. " '76561198065642391': 'Therefore',\n",
  173. " '76561198121369685': 'DataFrogman'}"
  174. ]
  175. },
  176. "execution_count": 63,
  177. "metadata": {},
  178. "output_type": "execute_result"
  179. }
  180. ],
  181. "source": [
  182. "name_map"
  183. ]
  184. },
  185. {
  186. "cell_type": "code",
  187. "execution_count": null,
  188. "metadata": {},
  189. "outputs": [],
  190. "source": []
  191. },
  192. {
  193. "cell_type": "code",
  194. "execution_count": 75,
  195. "metadata": {},
  196. "outputs": [],
  197. "source": [
  198. "from sklearn.decomposition import IncrementalPCA # inital reduction\n",
  199. "from sklearn.manifold import TSNE # final reduction\n",
  200. "import numpy as np \n",
  201. "\n",
  202. "def reduce_dimensions(labels, vectors, num_dimensions=2):\n",
  203. " \n",
  204. " # convert both lists into numpy vectors for reduction\n",
  205. " vectors = np.asarray(vectors)\n",
  206. " labels = np.asarray(labels)\n",
  207. "\n",
  208. " # reduce using t-SNE\n",
  209. " vectors = np.asarray(vectors)\n",
  210. " tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
  211. " vectors = tsne.fit_transform(vectors)\n",
  212. "\n",
  213. " x_vals = [v[0] for v in vectors]\n",
  214. " y_vals = [v[1] for v in vectors]\n",
  215. " return x_vals, y_vals, labels\n",
  216. "\n",
  217. "vectors\n",
  218. "x_vals, y_vals, labels = reduce_dimensions(labels, vectors)"
  219. ]
  220. },
  221. {
  222. "cell_type": "code",
  223. "execution_count": 78,
  224. "metadata": {},
  225. "outputs": [
  226. {
  227. "data": {
  228. "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAE/CAYAAABRkiSsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3iUVfbA8e9JAQJBQldCSVR6yhBCAEOVpitKWZCSXUEEhNWfa0NAXUVEjWLBwoqggrgIKtIUEKRolCIkJGBoBiSoEektEErC/f0xk3ESQhmSycwk5/M88zhz33bfYXdObjuvGGNQSimlrpaPuyuglFLKu2jgUEop5RQNHEoppZyigUMppZRTNHAopZRyigYOpZRSTtHAoYqMiMwQkQlFdK7BIvLDZbZ/KyJDbe/jRGR5UVxXKXVlGjhKORFJF5EsEcl0eL3j7no5wxgzyxjT1RXnFpE2IrJWRI6LyBERWSMiLVxxLaW8hZ+7K6A8wp3GmBXuroSnEZHrgK+AkcBnQBmgLXDWnfVSyt20xaEuydZdtEZE3hCRYyLyi4jcYiv/TUQOiMigfIdVE5FvROSkiHwnIvUcztfItu2IiOwUkbsdtlUVkUUickJENgA35atLFxHZYfvL/x1A8tXzB4fPRkRGiEiard6TRURs23xF5DUROSQie0TkQdv+Bf0R1QDAGDPbGJNjjMkyxiw3xmzJ9/28Y6vXDhHp5FCPe0Vku+27+EVE7s93Tz1EJMV2z7tF5DZbeSUR+UBE9olIhohMEBHfq/13U8rVNHCoK2kJbAGqAp8Ac4AWwM3AP4B3RCTQYf844HmgGpACzAIQkQrAN7Zz1AD6A/8VkSa24yYDZ4AbgCG2F7ZjqwHzgKdt590NxF6h3t1t9YwA7ga62cqHAbcDFiAK6HmZc/wM5IjIRyJyu4hULmCflrb6VAOeBeaJSBXbtgO2elwH3Au8ISJRtnuKAWYCo4AgoB2QbjtuBpCN9TtuBnQFhl7hfpUqPsYYfZXiF9Yfq0zgmMNrmG3bYCDNYd9wwAA1HcoOAxbb+xnAHIdtgUAOUAfoB3yf79rvYf2x9QXOA40ctr0I/GB7fw+w3mGbAL8DQx3q+YPDdgO0cfj8GTDG9n4VcL/Dts62/f0u8f00tt3X71h/zBfl3r/tun8A4rD/BuCflzjXAuDfDvf+RgH71MTaFRbgUDYAWO3u/63oS1+5Lx3jUAA9zaXHOPY7vM8CMMbkL3NscfyW+8YYkykiR4BaQD2gpYgcc9jXD/gYqG57/5vDtr0O72vlO68REcd9C/Knw/vTDnXMc6587y9ijNmONUAgIo2A/wGTsP6YA2QYYxwzhe61XQMRuR1rYGyAtXVfHvjJtl8dYEkBl6wH+AP7bL1r2I690v0qVWw0cKiiVif3ja0LqwrWv8p/A74zxnTJf4Ct/z7bduwOW3Fdh1325TuvOH520j6gdkH1vRJjzA4RmQE4jlUEi4g4BI+6wCIRKQt8gbW1tNAYc15EFvDX2Mxv5BvHcSg/C1QzxmRfbd2UKk46xqGK2t9sU1jLYB3rWG+M+Q3r7KQGIvJPEfG3vVqISGNjTA7WMYxxIlLeNu7hOOi+GGgqIr1tg9gPAddfY/0+A/4tIsEiEgSMvtSOtsH8x0Sktu1zHawtjfUOu9UAHrLdT1+sXVtLsM7AKgscBLJtrQ/HKcMfAPeKSCcR8bHVp5ExZh+wHHhNRK6zbbtJRNpf4/0qVeQ0cCiALyXvOo75hTjXJ1i7Z44AzbEOoGOMOYn1h7M/1hbIn8DLWH9cAR7E2p30J9Yxhem5JzTGHAL6AvFYx1TqA2uusX7TsP4wbwGSsf7IZ2Mdi8nvJNbB7x9F5BTWgJEKPOawz4+2+hwCXgD6GGMO2+73IayB6igwEOv4SO49bcA2YA4cB77D2k0F1lZKGWCb7di5WCcNKOURJG/3rFKli60lMMUYU++KO1987GCsA/RtirxiSnkwbXGoUkVEAkTkbyLiJyLBWFtHhWlhKVXqaOBQpY0Az2HtAkoGtgPPuLVGSnkZ7apSSinlFG1xKKWUcooGDqWUUk7xmgWA1apVMyEhIe6uhlKqhElKSjpkjKnu7np4E68JHCEhISQmJrq7GkqpEkZE9l55L+VIu6qUUko5RQOHUkopp2jgUEop5RQNHOqqzJ8/H4vFkufl4+PD0qVLL3lMYGDgJbcppbyX1ywAjI6ONjo47jmmTp3KrFmzWL16NT4+Bf/9ERgYSGZmZjHXTCnniEiSMSba3fXwJtriUE77+eefGT9+PB9//DE+Pj5MnDiRFi1aEBERwbPPPnvR/pmZmXTq1ImoqCjCw8NZuHAhAFOmTLG3XkJDQ+nYsSNgDThPPfUUkZGRtGrViv3793Py5ElCQ0M5f/48ACdOnMjzWSlVfDRwKKecP3+egQMH8tprr1G3bl2WL19OWloaGzZsICUlhaSkJBISEvIcU65cOebPn8+mTZtYvXo1jz32GMYYRowYQUpKChs3bqR27do8+uijAJw6dYpWrVqxefNm2rVrx7Rp06hYsSIdOnRg8eLFAMyZM4fevXvj7+9f7N+BUqWd16zjUMVvQXIGE5ft5I9jWdQKCmBUt4as//RtmjZtSr9+/QBYvnw5y5cvp1mzZoC1dZGWlka7du3s5zHG8OSTT5KQkICPjw8ZGRns37+f66+3Povp3//+N7feeit33nknAGXKlKF79+4ANG/enG+++QaAoUOH8sorr9CzZ0+mT5/OtGnTiu27UEr9RQOHKtCC5AzGzvuJrPPW5xtlHMvioddnkf3dp+zcusW+nzGGsWPHcv/991/qVMyaNYuDBw+SlJSEv78/ISEhnDlzBoAZM2awd+9e3nnnHfv+/v7+5D5v29fXl+xs6xNUY2NjSU9P59tvvyUnJ4ewsLAiv2+l1JVpV5Uq0MRlO+1BAyDnTCZ/fPk61e54lIoVK9rLu3XrxocffmgfBM/IyODAgQN5znX8+HFq1KiBv78/q1evZu9e60LdpKQkXn31Vf73v/9dcoA9v3vuuYeBAwdy7733FvYWlVLXSAOHKtAfx7LyfM5MXsKF08fYMe+NPFNyjx49ysCBA2ndujXh4eH06dOHkydP5jk2Li6OxMREwsPDmTlzJo0aNQLgnXfe4ciRI3Ts2BGLxcLQoUOvWK+4uDiOHj3KgAEDiu5mlVJO0em4qkCx8avIyBc8AIKDAlgz5lY31Mhq7ty5LFy4kI8//thtdVAli07HdZ6OcagCjerWMM8YB0CAvy+jujV0W53+7//+j6VLl7JkyRK31UEppYFDXULPZsEAF82qyi13h7fffttt11ZK/UUDh7qkns2C3RoolFKeSQfHPcxvv/1GaGgoR44cAeDo0aOEhoaSnp5+yWMulRNq8ODBzJ07F7Cugdi2bdtlrz1lyhTCw8OxWCy0adPGvv/hw4fp2LEjgYGBPPjgg9dwV0qpkqRIAoeIfCgiB0Qk1aFsnIhkiEiK7fU3h21jRWSXiOwUkW5FUYeSok6dOowcOZIxY8YAMGbMGIYPH05hn374/vvv06RJk8vuM3DgQH766SdSUlJ44okn7Cu5y5Urx/PPP8+rr75aqDoopUqGompxzABuK6D8DWOMxfZaAiAiTYD+QFPbMf8VEd8iqkeJ8Mgjj7B+/XomTZrEDz/8wOOPP37JfE+OjDE8+OCDNGzYkM6dO+dZT9GhQwf7ExQLygUFcN1119n3P3XqlH0RXoUKFWjTpg3lypVz5W0rpbxEkQQOY0wCcOQqd+8BzDHGnDXG7AF2ATFFUY+Swt/fn4kTJ/LII48wadIk/P39L5nvydH8+fPZuXMn27ZtY+bMmaxdu7bA8xeUCyrX5MmTuemmm3jiiSd46623XHqfSinv5OoxjgdFZIutK6uyrSwY+M1hn99tZcrB0qVLueGGG0hNtfb+5eZ7ioiIoHPnzvZ8T44SEhIYMGAAvr6+1KpVi1tvLXi9Rf5cUI7jJw888AC7d+/m5ZdfZsKECa65OaWUV3PlrKp3gecBY/vva8AQZ04gIsOB4QB
  229. "text/plain": [
  230. "<Figure size 360x360 with 1 Axes>"
  231. ]
  232. },
  233. "metadata": {
  234. "needs_background": "light"
  235. },
  236. "output_type": "display_data"
  237. }
  238. ],
  239. "source": [
  240. "import matplotlib.pyplot as plt\n",
  241. "import random\n",
  242. "\n",
  243. "def plot_with_matplotlib(x_vals, y_vals, labels, num_to_label):\n",
  244. " plt.figure(figsize=(5, 5))\n",
  245. " plt.scatter(x_vals, y_vals)\n",
  246. " plt.title(\"Embedding Space\")\n",
  247. " indices = list(range(len(labels)))\n",
  248. " selected_indices = random.sample(indices, num_to_label)\n",
  249. " for i in selected_indices:\n",
  250. " plt.annotate(name_map[labels[i]], (x_vals[i], y_vals[i]))\n",
  251. " plt.savefig('ex.png')\n",
  252. " \n",
  253. "plot_with_matplotlib(x_vals, y_vals, labels, 12)"
  254. ]
  255. }
  256. ],
  257. "metadata": {
  258. "kernelspec": {
  259. "display_name": "ml",
  260. "language": "python",
  261. "name": "ml"
  262. },
  263. "language_info": {
  264. "codemirror_mode": {
  265. "name": "ipython",
  266. "version": 3
  267. },
  268. "file_extension": ".py",
  269. "mimetype": "text/x-python",
  270. "name": "python",
  271. "nbconvert_exporter": "python",
  272. "pygments_lexer": "ipython3",
  273. "version": "3.8.3"
  274. }
  275. },
  276. "nbformat": 4,
  277. "nbformat_minor": 4
  278. }