Repository where I mostly put random python scripts.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

278 lines
30 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -r node2vec/requirements.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install --upgrade gensim"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Walk iteration:\n",
"1 / 40\n",
"2 / 40\n",
"3 / 40\n",
"4 / 40\n",
"5 / 40\n",
"6 / 40\n",
"7 / 40\n",
"8 / 40\n",
"9 / 40\n",
"10 / 40\n",
"11 / 40\n",
"12 / 40\n",
"13 / 40\n",
"14 / 40\n",
"15 / 40\n",
"16 / 40\n",
"17 / 40\n",
"18 / 40\n",
"19 / 40\n",
"20 / 40\n",
"21 / 40\n",
"22 / 40\n",
"23 / 40\n",
"24 / 40\n",
"25 / 40\n",
"26 / 40\n",
"27 / 40\n",
"28 / 40\n",
"29 / 40\n",
"30 / 40\n",
"31 / 40\n",
"32 / 40\n",
"33 / 40\n",
"34 / 40\n",
"35 / 40\n",
"36 / 40\n",
"37 / 40\n",
"38 / 40\n",
"39 / 40\n",
"40 / 40\n"
]
}
],
"source": [
"!python node2vec/src/main.py --input jrtechs.edgelist --output output/jrtechs2.emd --num-walks=40 --dimensions=50"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"39\n"
]
}
],
"source": [
"labels=[]\n",
"vectors=[]\n",
"\n",
"with open(\"output/jrtechs2.emd\") as fp:\n",
" for line in fp:\n",
" l_list = list(map(float, line.split()))\n",
" vectors.append(l_list[1::])\n",
" labels.append(line.split()[0])\n",
" \n",
"print(len(labels))"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"name_map = {}\n",
"with open(\"friendsMap.map\") as fp:\n",
" for line in fp:\n",
" name_map[line.split()[0]] = line.split()[1]"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'76561198188400721': 'jrtechs',\n",
" '76561198049526995': 'Noosh',\n",
" '76561198067517157': 'nweis76',\n",
" '76561198131175960': 'Delta_∆',\n",
" '76561198084464357': 'Ceta',\n",
" '76561198085584420': 'PureMaths',\n",
" '76561198068647768': 'Void',\n",
" '76561198094968588': 'Dumcumpster',\n",
" '76561198107069713': 'yankeeman1081',\n",
" '76561198086854442': 'jspike397',\n",
" '76561198273751201': 'Kyon',\n",
" '76561198083124245': 'drichardson005',\n",
" '76561198035606013': 'BK🐻',\n",
" '76561198255835078': 'Zelazny',\n",
" '76561198170096391': 'Greata',\n",
" '76561198306786411': 'fazeboojujee',\n",
" '76561198062501319': 'Cantankerous',\n",
" '76561198036270560': 'Toaster',\n",
" '76561198306443796': 'numstudequals1',\n",
" '76561198133044936': 'Panda',\n",
" '76561198095814450': 'Tbonedog',\n",
" '76561198069739846': 'Alminikar',\n",
" '76561198233398192': 'Saxophones',\n",
" '76561198162654610': 'Raydan',\n",
" '76561198047367972': 'Oberyn',\n",
" '76561198057450983': 'TraceTheKitteh',\n",
" '76561198176504246': 'shistthis',\n",
" '76561198111538799': 'sergiozygmunt',\n",
" '76561198853827591': 'theramendragon',\n",
" '76561198055948417': 'Xaldin31',\n",
" '76561198137304077': 'lilsar419',\n",
" '76561198406334664': 'wywyit',\n",
" '76561198098761042': 'wee',\n",
" '76561198192510666': 'Rebel',\n",
" '76561198066260593': 'NUT',\n",
" '76561198099625445': 'DAW',\n",
" '76561198017734545': 'Which',\n",
" '76561198036176189': 'Ben',\n",
" '76561198069739485': 'Rkames517',\n",
" '76561198062300654': 'Rawls',\n",
" '76561198068098265': 'Brendy',\n",
" '76561198065642391': 'Therefore',\n",
" '76561198121369685': 'DataFrogman'}"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"name_map"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.decomposition import IncrementalPCA # inital reduction\n",
"from sklearn.manifold import TSNE # final reduction\n",
"import numpy as np \n",
"\n",
"def reduce_dimensions(labels, vectors, num_dimensions=2):\n",
" \n",
" # convert both lists into numpy vectors for reduction\n",
" vectors = np.asarray(vectors)\n",
" labels = np.asarray(labels)\n",
"\n",
" # reduce using t-SNE\n",
" vectors = np.asarray(vectors)\n",
" tsne = TSNE(n_components=num_dimensions, random_state=0)\n",
" vectors = tsne.fit_transform(vectors)\n",
"\n",
" x_vals = [v[0] for v in vectors]\n",
" y_vals = [v[1] for v in vectors]\n",
" return x_vals, y_vals, labels\n",
"\n",
"vectors\n",
"x_vals, y_vals, labels = reduce_dimensions(labels, vectors)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 360x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import random\n",
"\n",
"def plot_with_matplotlib(x_vals, y_vals, labels, num_to_label):\n",
" plt.figure(figsize=(5, 5))\n",
" plt.scatter(x_vals, y_vals)\n",
" plt.title(\"Embedding Space\")\n",
" indices = list(range(len(labels)))\n",
" selected_indices = random.sample(indices, num_to_label)\n",
" for i in selected_indices:\n",
" plt.annotate(name_map[labels[i]], (x_vals[i], y_vals[i]))\n",
" plt.savefig('ex.png')\n",
" \n",
"plot_with_matplotlib(x_vals, y_vals, labels, 12)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ml",
"language": "python",
"name": "ml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}