|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install -r node2vec/requirements.txt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install --upgrade gensim"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 60,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Walk iteration:\n",
|
|
"1 / 40\n",
|
|
"2 / 40\n",
|
|
"3 / 40\n",
|
|
"4 / 40\n",
|
|
"5 / 40\n",
|
|
"6 / 40\n",
|
|
"7 / 40\n",
|
|
"8 / 40\n",
|
|
"9 / 40\n",
|
|
"10 / 40\n",
|
|
"11 / 40\n",
|
|
"12 / 40\n",
|
|
"13 / 40\n",
|
|
"14 / 40\n",
|
|
"15 / 40\n",
|
|
"16 / 40\n",
|
|
"17 / 40\n",
|
|
"18 / 40\n",
|
|
"19 / 40\n",
|
|
"20 / 40\n",
|
|
"21 / 40\n",
|
|
"22 / 40\n",
|
|
"23 / 40\n",
|
|
"24 / 40\n",
|
|
"25 / 40\n",
|
|
"26 / 40\n",
|
|
"27 / 40\n",
|
|
"28 / 40\n",
|
|
"29 / 40\n",
|
|
"30 / 40\n",
|
|
"31 / 40\n",
|
|
"32 / 40\n",
|
|
"33 / 40\n",
|
|
"34 / 40\n",
|
|
"35 / 40\n",
|
|
"36 / 40\n",
|
|
"37 / 40\n",
|
|
"38 / 40\n",
|
|
"39 / 40\n",
|
|
"40 / 40\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!python node2vec/src/main.py --input jrtechs.edgelist --output output/jrtechs2.emd --num-walks=40 --dimensions=50"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 72,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"39\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"labels=[]\n",
|
|
"vectors=[]\n",
|
|
"\n",
|
|
"with open(\"output/jrtechs2.emd\") as fp:\n",
|
|
" for line in fp:\n",
|
|
" l_list = list(map(float, line.split()))\n",
|
|
" vectors.append(l_list[1::])\n",
|
|
" labels.append(line.split()[0])\n",
|
|
" \n",
|
|
"print(len(labels))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 73,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"name_map = {}\n",
|
|
"with open(\"friendsMap.map\") as fp:\n",
|
|
" for line in fp:\n",
|
|
" name_map[line.split()[0]] = line.split()[1]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"{'76561198188400721': 'jrtechs',\n",
|
|
" '76561198049526995': 'Noosh',\n",
|
|
" '76561198067517157': 'nweis76',\n",
|
|
" '76561198131175960': 'Delta_∆',\n",
|
|
" '76561198084464357': 'Ceta',\n",
|
|
" '76561198085584420': 'PureMaths',\n",
|
|
" '76561198068647768': 'Void',\n",
|
|
" '76561198094968588': 'Dumcumpster',\n",
|
|
" '76561198107069713': 'yankeeman1081',\n",
|
|
" '76561198086854442': 'jspike397',\n",
|
|
" '76561198273751201': 'Kyon',\n",
|
|
" '76561198083124245': 'drichardson005',\n",
|
|
" '76561198035606013': 'BK🐻',\n",
|
|
" '76561198255835078': 'Zelazny',\n",
|
|
" '76561198170096391': 'Greata',\n",
|
|
" '76561198306786411': 'fazeboojujee',\n",
|
|
" '76561198062501319': 'Cantankerous',\n",
|
|
" '76561198036270560': 'Toaster',\n",
|
|
" '76561198306443796': 'numstudequals1',\n",
|
|
" '76561198133044936': 'Panda',\n",
|
|
" '76561198095814450': 'Tbonedog',\n",
|
|
" '76561198069739846': 'Alminikar',\n",
|
|
" '76561198233398192': 'Saxophones',\n",
|
|
" '76561198162654610': 'Raydan',\n",
|
|
" '76561198047367972': 'Oberyn',\n",
|
|
" '76561198057450983': 'TraceTheKitteh',\n",
|
|
" '76561198176504246': 'shistthis',\n",
|
|
" '76561198111538799': 'sergiozygmunt',\n",
|
|
" '76561198853827591': 'theramendragon',\n",
|
|
" '76561198055948417': 'Xaldin31',\n",
|
|
" '76561198137304077': 'lilsar419',\n",
|
|
" '76561198406334664': 'wywyit',\n",
|
|
" '76561198098761042': 'wee',\n",
|
|
" '76561198192510666': 'Rebel',\n",
|
|
" '76561198066260593': 'NUT',\n",
|
|
" '76561198099625445': 'DAW',\n",
|
|
" '76561198017734545': 'Which',\n",
|
|
" '76561198036176189': 'Ben',\n",
|
|
" '76561198069739485': 'Rkames517',\n",
|
|
" '76561198062300654': 'Rawls',\n",
|
|
" '76561198068098265': 'Brendy',\n",
|
|
" '76561198065642391': 'Therefore',\n",
|
|
" '76561198121369685': 'DataFrogman'}"
|
|
]
|
|
},
|
|
"execution_count": 63,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"name_map"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 75,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.decomposition import IncrementalPCA # inital reduction\n",
|
|
"from sklearn.manifold import TSNE # final reduction\n",
|
|
"import numpy as np \n",
|
|
"\n",
|
|
"def reduce_dimensions(labels, vectors, num_dimensions=2):\n",
|
|
" \n",
|
|
" # convert both lists into numpy vectors for reduction\n",
|
|
" vectors = np.asarray(vectors)\n",
|
|
" labels = np.asarray(labels)\n",
|
|
"\n",
|
|
" # reduce using t-SNE\n",
|
|
" vectors = np.asarray(vectors)\n",
|
|
" tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
|
|
" vectors = tsne.fit_transform(vectors)\n",
|
|
"\n",
|
|
" x_vals = [v[0] for v in vectors]\n",
|
|
" y_vals = [v[1] for v in vectors]\n",
|
|
" return x_vals, y_vals, labels\n",
|
|
"\n",
|
|
"vectors\n",
|
|
"x_vals, y_vals, labels = reduce_dimensions(labels, vectors)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 78,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 360x360 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import random\n",
|
|
"\n",
|
|
"def plot_with_matplotlib(x_vals, y_vals, labels, num_to_label):\n",
|
|
" plt.figure(figsize=(5, 5))\n",
|
|
" plt.scatter(x_vals, y_vals)\n",
|
|
" plt.title(\"Embedding Space\")\n",
|
|
" indices = list(range(len(labels)))\n",
|
|
" selected_indices = random.sample(indices, num_to_label)\n",
|
|
" for i in selected_indices:\n",
|
|
" plt.annotate(name_map[labels[i]], (x_vals[i], y_vals[i]))\n",
|
|
" plt.savefig('ex.png')\n",
|
|
" \n",
|
|
"plot_with_matplotlib(x_vals, y_vals, labels, 12)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "ml",
|
|
"language": "python",
|
|
"name": "ml"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|