Browse Source

Implemented a more efficient way to crawl the steam network.

pull/8/head
jrtechs 6 years ago
parent
commit
e5e30bf9c8
5 changed files with 164 additions and 26 deletions
  1. +8
    -3
      src/main/java/net/jrtechs/www/SteamAPI/APIConnection.java
  2. +1
    -0
      src/main/java/net/jrtechs/www/webCrawler/APIThrottler.java
  3. +30
    -0
      src/main/java/net/jrtechs/www/webCrawler/FileIO.java
  4. +44
    -0
      src/main/java/net/jrtechs/www/webCrawler/FileReader.java
  5. +81
    -23
      src/main/java/net/jrtechs/www/webCrawler/SteamWebCrawler.java

+ 8
- 3
src/main/java/net/jrtechs/www/SteamAPI/APIConnection.java View File

@ -69,7 +69,8 @@ public class APIConnection
} }
catch (Exception ex) catch (Exception ex)
{ {
ex.printStackTrace();
System.out.println("Friends not public :(");
//ex.printStackTrace();
} }
return friendsId; return friendsId;
@ -108,8 +109,12 @@ public class APIConnection
for(int i = 0; i < names.length(); i++) for(int i = 0; i < names.length(); i++)
{ {
JSONObject player = names.getJSONObject(i); JSONObject player = names.getJSONObject(i);
map.put(player.getString("steamid"),
player.getString("personaname"));
if(player.has("steamid") && player.has("personaname"))
{
map.put(player.getString("steamid"),
player.getString("personaname"));
}
} }
} }
return map; return map;

+ 1
- 0
src/main/java/net/jrtechs/www/webCrawler/APIThrottler.java View File

@ -55,6 +55,7 @@ public class APIThrottler
*/ */
public void wait(int numofQueries) public void wait(int numofQueries)
{ {
System.out.println("Hold the door:" + numofQueries);
int totalWaitTime = numofQueries * waitTimePerQuerie; int totalWaitTime = numofQueries * waitTimePerQuerie;
while(!queryAvailable(totalWaitTime)) while(!queryAvailable(totalWaitTime))

+ 30
- 0
src/main/java/net/jrtechs/www/webCrawler/FileIO.java View File

@ -1,10 +1,12 @@
package net.jrtechs.www.webCrawler; package net.jrtechs.www.webCrawler;
import net.jrtechs.www.server.Player; import net.jrtechs.www.server.Player;
import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
import java.io.File; import java.io.File;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
@ -30,6 +32,12 @@ public class FileIO
} }
private String getURL(String id)
{
return baseFilaPath + id + ".json";
}
/** /**
* Determines if we already have the player * Determines if we already have the player
* on disk. * on disk.
@ -60,6 +68,28 @@ public class FileIO
} }
public List<String> readFriends(String id)
{
String fileContents = FileReader.readFile(this.getURL(id));
JSONObject player = new JSONObject(fileContents);
if(player.has("friends"))
{
List<String> list = new ArrayList<>();
JSONArray jsonArray = player.getJSONArray("friends");
for(int i = 0 ; i < jsonArray.length();i++)
{
list.add(jsonArray.getString(i));
}
return list;
}
return new ArrayList<>();
}
/** /**
* Writes the player to the file. * Writes the player to the file.
* *

+ 44
- 0
src/main/java/net/jrtechs/www/webCrawler/FileReader.java View File

@ -0,0 +1,44 @@
package net.jrtechs.www.webCrawler;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Simple utility class for reading a file in as a
* {@link List} of strings
*
* @author Jeffery Russell 11-19-18
*/
public class FileReader
{
/**
* Reads a file and return's its contents in a array list of strings
*
* @return contents of file as a list of Strings
*/
public static String readFile(String filePath)
{
String result = "";
try
{
BufferedReader br = new BufferedReader(
new InputStreamReader(new FileInputStream(filePath)));
String line;
while ((line = br.readLine()) != null)
{
result = result.concat(line);
}
br.close();
}
catch (IOException e)
{
System.out.println("ERROR: unable to read file " + filePath);
}
return result;
}
}

+ 81
- 23
src/main/java/net/jrtechs/www/webCrawler/SteamWebCrawler.java View File

@ -3,17 +3,14 @@ package net.jrtechs.www.webCrawler;
import net.jrtechs.www.SteamAPI.APIConnection; import net.jrtechs.www.SteamAPI.APIConnection;
import net.jrtechs.www.server.Player; import net.jrtechs.www.server.Player;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.*;
/** /**
* Main class for digging up the entire * Main class for digging up the entire
* steam network. * steam network.
* *
* @author Jeffery Russell
* @author Jeffery Russell 11-18-18
*/ */
public class SteamWebCrawler public class SteamWebCrawler
{ {
@ -28,7 +25,17 @@ public class SteamWebCrawler
private FileIO fileIO; private FileIO fileIO;
/** Queue used for a BFS search */ /** Queue used for a BFS search */
private Queue<Player> downlaodQueue;
private LinkedList<Player> downlaodQueue;
/** Players which have been detected by
* our search and currently in a queue
* or has already been processed*/
private HashSet<String> visited;
/** List of players which we have accessed
* in the steam network, but, have no clue what
* their name is*/
private LinkedList<String> namelessQueue;
/** /**
@ -43,41 +50,92 @@ public class SteamWebCrawler
this.fileIO = new FileIO("/media/jeff/A4BA9239BA920846/steamData/"); this.fileIO = new FileIO("/media/jeff/A4BA9239BA920846/steamData/");
this.downlaodQueue = new LinkedList<>(); this.downlaodQueue = new LinkedList<>();
visited = new HashSet<>();
namelessQueue = new LinkedList<>();
} }
/** /**
* Runs a BFS search of the steam network
* If the download queue is empty, this will
* look up the names of the first 100 players in the
* nameless queue and add them to download queue.
*/ */
private void runCrawler()
private void shiftNamelessToDownload()
{ {
while(!downlaodQueue.isEmpty())
if(this.downlaodQueue.isEmpty() && !this.namelessQueue.isEmpty())
{ {
Player current = downlaodQueue.remove();
List<String> currentFriends = connection.getFriends(current.getId());
List<String> winners = new ArrayList<>();
for(int i = 0; i < (100 < namelessQueue.size()? 100: namelessQueue.size()); i++)
{
winners.add(this.namelessQueue.remove());
}
List<Player> namedPlayers = connection.getFullPlayers(winners);
this.throttler.wait(1);
downlaodQueue.addAll(namedPlayers);
}
}
List<String> neededFriends = new ArrayList<>();
currentFriends.forEach(s ->
/**
* Does one of the following three actions for each
* of the steam members in the list:
* 1: Ignore- already has been queued by program
* 2: Add to nameless queue -- doesn't have name yet
* 3: Add to download queue -- already on HHD but needed for
* the search algo to work.
*
* @param ids list of steam ids
*/
private void queueUpPlayers(List<String> ids)
{
for(String s: ids)
{
if(!visited.contains(s))
{ {
if(!fileIO.playerExists(s))
neededFriends.add(s);
});
if(fileIO.playerExists(s))
{
downlaodQueue.add(new Player("dummy", s));
}
else
{
namelessQueue.add(s);
}
visited.add(s);
}
}
System.out.println("Download Queue: " + downlaodQueue.size());
System.out.println("Nameless Queue: " + namelessQueue.size());
}
connection.getFullPlayers(neededFriends).forEach(f->
{
downlaodQueue.add(f);
});
int queriesRan = neededFriends.size()/100 + 2;
this.throttler.wait(queriesRan);
/**
* Runs a BFS search of the steam network
*/
private void runCrawler()
{
while(!downlaodQueue.isEmpty())
{
Player current = downlaodQueue.remove();
List<String> currentFriends;
if(!fileIO.playerExists(current.getId())) if(!fileIO.playerExists(current.getId()))
{ {
this.throttler.wait(1);
currentFriends = connection.getFriends(current.getId());
fileIO.writeToFile(current, currentFriends); fileIO.writeToFile(current, currentFriends);
} }
else
{
currentFriends = fileIO.readFriends(current.getId());
}
queueUpPlayers(currentFriends);
shiftNamelessToDownload();
} }
} }

Loading…
Cancel
Save