package tools; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.List; public class GoogleQuery { private BufferedWriter out; private final static String googleBaseQueryUrl = "http://www.google.com/search?q="; private final static String yahooBaseQueryUrl = "http://search.yahoo.com/search?p="; public static void main(String[] args) { try { new GoogleQuery("tcp+port", 1024, 65536, true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public GoogleQuery(String queryString, int from, int to, boolean useYahoo) throws IOException, InterruptedException { out = new BufferedWriter(new FileWriter(new File("query" + System.currentTimeMillis()))); Integer num = to - from + 1; Integer[] queries = new Integer[num]; for (int i = 0; i < queries.length; i++) { int val = from + i; queries[i] = val; } List<Integer> shuffled = new ArrayList<Integer>(); Collections.addAll(shuffled, queries); Collections.shuffle(shuffled); for (Integer val : shuffled) { String fulllQueryString; if (useYahoo) { fulllQueryString = yahooBaseQueryUrl + queryString + "+" + val; } else { fulllQueryString = googleBaseQueryUrl + queryString + "+" + val; } URL url = new URL(fulllQueryString); HttpURLConnection m_con = (HttpURLConnection) url.openConnection(); m_con.setRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6"); BufferedReader reader = new BufferedReader( new InputStreamReader(m_con.getInputStream())); String line; boolean found = false; while ((line = reader.readLine()) != null && !found) { // System.out.println(line + "\n\n"); String split[] = line.split(" "); for (int i = 0; i < split.length - 3 && !found; i++) { if (split[i].equals("of") && split[i + 1].equals("about")) { String pageNum; if (useYahoo) { pageNum = split[i + 2]; } else { pageNum = split[i + 2].substring(3, split[i + 2].length() - 4); } int pageInt = Integer.parseInt(pageNum.replaceAll(",", "")); System.out.println(val + "\t" + pageInt); out.write(val + "\t" + pageInt + "\n"); out.flush(); found = true; } } } reader.close(); m_con.disconnect(); if (!found) { System.out.println("did not find anything, stange? got: " + line); System.exit(1); } // sleep 30 sek + random*60 sek, average one query per min long sleepTime = Math.round(30 * 1000 + Math.random() * 60 * 1000); System.out.println("sleeping: " + (sleepTime / 1000)); Thread.sleep(sleepTime); } out.close(); } }