Browse Source

Updated the web crawler to work properly and traverse the network using BFS.

pull/8/head
jrtechs 6 years ago
parent
commit
980e5d5378
4 changed files with 182 additions and 17 deletions
  1. +24
    -1
      src/main/java/net/jrtechs/www/SteamAPI/APIConnection.java
  2. +55
    -2
      src/main/java/net/jrtechs/www/webCrawler/APIThrottler.java
  3. +30
    -10
      src/main/java/net/jrtechs/www/webCrawler/FileIO.java
  4. +73
    -4
      src/main/java/net/jrtechs/www/webCrawler/SteamWebCrawler.java

+ 24
- 1
src/main/java/net/jrtechs/www/SteamAPI/APIConnection.java View File

@ -1,5 +1,6 @@
package net.jrtechs.www.SteamAPI; package net.jrtechs.www.SteamAPI;
import net.jrtechs.www.server.Player;
import net.jrtechs.www.utils.ConfigLoader; import net.jrtechs.www.utils.ConfigLoader;
import net.jrtechs.www.utils.WebScraper; import net.jrtechs.www.utils.WebScraper;
@ -107,7 +108,6 @@ public class APIConnection
for(int i = 0; i < names.length(); i++) for(int i = 0; i < names.length(); i++)
{ {
JSONObject player = names.getJSONObject(i); JSONObject player = names.getJSONObject(i);
System.out.println(player);
map.put(player.getString("steamid"), map.put(player.getString("steamid"),
player.getString("personaname")); player.getString("personaname"));
} }
@ -116,6 +116,28 @@ public class APIConnection
} }
/**
* Wrapper for getNames which returns a list of players instead
* of a map from id's to names
*
* @param ids
* @return
*/
public List<Player> getFullPlayers(List<String> ids)
{
Map<String, String> map = this.getNames(ids);
List<Player> players = new ArrayList<>();
for(String id: map.keySet())
{
players.add(new Player(map.get(id),id));
}
return players;
}
/** /**
* Returns the name of the player with a specific steam id * Returns the name of the player with a specific steam id
* *
@ -143,6 +165,7 @@ public class APIConnection
return null; return null;
} }
public static void main(String[] args) public static void main(String[] args)
{ {
APIConnection con = new APIConnection(); APIConnection con = new APIConnection();

+ 55
- 2
src/main/java/net/jrtechs/www/webCrawler/APIThrottler.java View File

@ -3,21 +3,74 @@ package net.jrtechs.www.webCrawler;
import java.util.Calendar; import java.util.Calendar;
/** /**
* Class which is used to throttle your
* program to not query an API too fast.
*
* @author Jeffery Russell * @author Jeffery Russell
*/ */
public class APIThrottler public class APIThrottler
{ {
/** The total amount of queries ran */
public int totalqueries; public int totalqueries;
/** Time in MS that the last query ran */
private long lastQuery; private long lastQuery;
/** The number of MS that we have to wait
* between each query to not get our account
* banned from steam*/
private int waitTimePerQuerie;
boolean queryAvailable()
/**
* initializes start parameters
*/
public APIThrottler()
{ {
return true;
lastQuery = getCurrentTimeInMS();
waitTimePerQuerie = 864;
}
/**
* Determines if it a certain amount
* of time has passed since the last
* query
*
* @param waitTime
* @return
*/
private boolean queryAvailable(int waitTime)
{
long currTime = getCurrentTimeInMS();
return currTime > lastQuery + waitTime;
}
/**
* Pauses untill the wait time out has been met
* @param numofQueries
*/
public void wait(int numofQueries)
{
int totalWaitTime = numofQueries * waitTimePerQuerie;
while(!queryAvailable(totalWaitTime))
{
}
lastQuery = getCurrentTimeInMS();
totalqueries++;
if(totalqueries % 1000 == 0)
System.out.println("Queries ran: " + totalqueries);
} }
/**
* Fetches the current time in milliseconds
**/
public long getCurrentTimeInMS() public long getCurrentTimeInMS()
{ {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();

+ 30
- 10
src/main/java/net/jrtechs/www/webCrawler/FileIO.java View File

@ -1,7 +1,12 @@
package net.jrtechs.www.webCrawler; package net.jrtechs.www.webCrawler;
import com.google.gson.Gson;
import net.jrtechs.www.server.Player; import net.jrtechs.www.server.Player;
import org.json.JSONObject;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
/** /**
@ -15,10 +20,6 @@ public class FileIO
/** Base directory to store all the data */ /** Base directory to store all the data */
private String baseFilaPath; private String baseFilaPath;
/** Object used to convert objects to json strings */
private final Gson gson;
/** /**
* Initalizes the base directory * Initalizes the base directory
* @param basePath * @param basePath
@ -26,7 +27,6 @@ public class FileIO
public FileIO(String basePath) public FileIO(String basePath)
{ {
this.baseFilaPath = basePath; this.baseFilaPath = basePath;
this.gson = new Gson();
} }
@ -39,7 +39,24 @@ public class FileIO
*/ */
public boolean playerExists(String id) public boolean playerExists(String id)
{ {
return false;
String fileName = baseFilaPath + id + ".json";
return new File(fileName).isFile();
}
/**
* Returns the date in a form which is easy to read and write
* to from a file.
*
* @return
*/
private String getDate()
{
String pattern = "yyyy-MM-dd";
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(pattern);
return simpleDateFormat.format(new Date());
} }
@ -48,12 +65,15 @@ public class FileIO
* *
* @param player * @param player
*/ */
public void writeToFile(Player player)
public void writeToFile(Player player, List<String> friendIDS)
{ {
String data = gson.toJson(player);
JSONObject object = new JSONObject();
object.put("name", player.getName());
object.put("date", getDate());
object.put("friends", friendIDS);
String fileName = baseFilaPath + player.getId() + ".json"; String fileName = baseFilaPath + player.getId() + ".json";
SteamdFileWriter.writeToFile(data, fileName);
SteamdFileWriter.writeToFile(object.toString(4), fileName);
} }
} }

+ 73
- 4
src/main/java/net/jrtechs/www/webCrawler/SteamWebCrawler.java View File

@ -1,8 +1,12 @@
package net.jrtechs.www.webCrawler; package net.jrtechs.www.webCrawler;
import net.jrtechs.www.SteamAPI.APIConnection; import net.jrtechs.www.SteamAPI.APIConnection;
import net.jrtechs.www.server.Player;
import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
/** /**
@ -13,22 +17,87 @@ import java.io.File;
*/ */
public class SteamWebCrawler public class SteamWebCrawler
{ {
/** Object used to limit the speed at which I access the steam
* network */
private APIThrottler throttler; private APIThrottler throttler;
/** Connection to the steam network */
private APIConnection connection; private APIConnection connection;
/** Saves players to the disk */
private FileIO fileIO; private FileIO fileIO;
public void runSteamCrawler(String baseID)
/** Queue used for a BFS search */
private Queue<Player> downlaodQueue;
/**
* Initializes the steam crawler's objects
*/
public SteamWebCrawler()
{
throttler = new APIThrottler();
this.connection = new APIConnection();
this.fileIO = new FileIO("/media/jeff/A4BA9239BA920846/steamData/");
this.downlaodQueue = new LinkedList<>();
}
/**
* Runs a BFS search of the steam network
*/
private void runCrawler()
{ {
while(!downlaodQueue.isEmpty())
{
Player current = downlaodQueue.remove();
List<String> currentFriends = connection.getFriends(current.getId());
List<String> neededFriends = new ArrayList<>();
currentFriends.forEach(s ->
{
if(!fileIO.playerExists(s))
neededFriends.add(s);
});
connection.getFullPlayers(neededFriends).forEach(f->
{
downlaodQueue.add(f);
});
int queriesRan = neededFriends.size()/100 + 2;
this.throttler.wait(queriesRan);
if(!fileIO.playerExists(current.getId()))
{
fileIO.writeToFile(current, currentFriends);
}
}
}
/**
* pop first fiend on the queue
* and release the beast
*
* @param baseID
*/
public void runSteamCrawlerBase(String baseID)
{
downlaodQueue.add(new Player("jrtechs", baseID));
runCrawler();
} }
public static void main(String args[]) public static void main(String args[])
{ {
new SteamWebCrawler().runSteamCrawler("76561198188400721");
new SteamWebCrawler().runSteamCrawlerBase("76561198188400721");
} }
} }

Loading…
Cancel
Save