// RemoteCrawl_p.java // -------------------- // (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 20.04.2007 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate: 2010-09-02 21:24:22 +0200 (Do, 02 Sep 2010) $ // $LastChangedRevision: 7092 $ // $LastChangedBy: orbiter $ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; import net.yacy.cora.protocol.RequestHeader; import net.yacy.data.WorkTables; import net.yacy.peers.PeerActions; import net.yacy.peers.Seed; import net.yacy.peers.operation.yacyVersion; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; public class RemoteCrawl_p { public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); if (post != null) { // store this call as api call sb.tables.recordAPICall(post, "RemoteCrawl_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "remote crawler configuration"); if (post.containsKey("crawlResponse")) { boolean crawlResponse = post.get("crawlResponse", "off").equals("on"); // read remote crawl request settings sb.initRemoteCrawler(crawlResponse); } if (post.containsKey("acceptCrawlLimit")) { // read remote crawl request settings int newppm = 1; try { newppm = Math.max(1, post.getInt("acceptCrawlLimit", 1)); } catch (final NumberFormatException e) {} sb.setRemotecrawlPPM(newppm); } } // set seed information directly sb.peers.mySeed().setFlagAcceptRemoteCrawl(sb.getConfigBool(SwitchboardConstants.CRAWLJOB_REMOTE, false)); // write remote crawl request settings prop.put("disabled", !sb.peers.mySeed().isActive() && !sb.peers.mySeed().getFlagAcceptRemoteCrawl() ? 1 : 0); prop.put("crawlResponse", sb.peers.mySeed().getFlagAcceptRemoteCrawl() ? 1 : 0); long RTCbusySleep = Math.max(1, env.getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100)); final int RTCppm = (int) (60000L / RTCbusySleep); prop.put("acceptCrawlLimit", RTCppm); // ------------------------------------------------------------------------------------- // write network list final String STR_TABLE_LIST = "list_"; int conCount = 0; boolean dark = true; Seed seed; Iterator<Seed> e = null; e = sb.peers.seedsSortedConnected(false, Seed.RCOUNT); //e = sb.peers.seedsSortedConnected(false, yacySeed.LCOUNT); Pattern peerSearchPattern = null; while (e.hasNext() && conCount < 300) { seed = e.next(); assert seed != null; if (seed != null) { final long lastseen = Math.abs((System.currentTimeMillis() - seed.getLastSeenUTC()) / 1000 / 60); if (lastseen > 720) continue; long rcount = seed.getLong(Seed.RCOUNT, 0); if (rcount == 0) continue; if ((post != null && post.containsKey("search")) && peerSearchPattern != null /*(wrongregex == null)*/) { boolean abort = true; Matcher m = peerSearchPattern.matcher (seed.getName()); if (m.find ()) { abort = false; } m = peerSearchPattern.matcher (seed.hash); if (m.find ()) { abort = false; } if (abort) continue; } prop.put(STR_TABLE_LIST + conCount + "_dark", ((dark) ? 1 : 0) ); dark=!dark; String shortname = seed.get(Seed.NAME, "deadlink"); if (shortname.length() > 20) shortname = shortname.substring(0, 20) + "..."; final String peeradr = seed.getPublicAddress(seed.getIPs().iterator().next()); prop.putHTML(STR_TABLE_LIST + conCount + "_shortname", shortname); prop.putHTML(STR_TABLE_LIST + conCount + "_peeraddress", peeradr); prop.put(STR_TABLE_LIST + conCount + "_age", seed.getAge()); String[] yv = yacyVersion.combined2prettyVersion(seed.get(Seed.VERSION, "0.1"), shortname); prop.putHTML(STR_TABLE_LIST + conCount + "_version", yv[0] + "/" + yv[1]); prop.putNum(STR_TABLE_LIST + conCount + "_lastSeen", /*seed.getLastSeenString() + " " +*/ lastseen); prop.put(STR_TABLE_LIST + conCount + "_utc", seed.get(Seed.UTC, "-")); prop.putHTML(STR_TABLE_LIST + conCount + "_uptime", PeerActions.formatInterval(60000 * seed.getLong(Seed.UPTIME, 0L))); prop.putNum(STR_TABLE_LIST + conCount + "_LCount", seed.getLinkCount()); prop.putNum(STR_TABLE_LIST + conCount + "_ICount", seed.getWordCount()); prop.putNum(STR_TABLE_LIST + conCount + "_RCount", rcount); prop.putNum(STR_TABLE_LIST + conCount + "_ppm", seed.getPPM()); prop.putNum(STR_TABLE_LIST + conCount + "_qph", Math.round(6000d * seed.getQPM()) / 100d); conCount++; } // seed != null } // while prop.putNum("list", conCount); return prop; } }