DescriptorExample.java example

package tor.examples;

import tor.Consensus;
import tor.OnionRouter;

import tor.TorCrypto;
import tor.util.URLUtil;

import java.io.IOException;
import java.util.*;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.codec.binary.Hex;
// We only use this once, so we use the fully-qualified name
//import org.apache.commons.codec.binary.StringUtils;

/**
 * Created by twilsonb on 3/08/2014.
 * Test directory cache responses to multiple descriptor requests
 */
public class DescriptorExample {

    /*
        Test handling of:
            - uncompressed and compressed descriptors,
            - single and multiple descriptors per request

        Tor Directory Specification
        https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt

        Clients MUST handle compressed concatenated information in two forms:
          - A concatenated list of zlib-compressed objects.
          - A zlib-compressed concatenated list of objects.
        Directory servers MAY generate either format: the former requires less
        CPU, but the latter requires less bandwidth.
     */

    // Generally, I'd use "Running,Valid,Stable", because
    // we want routers that have a good chance of being in the consensus and caches,
    // to reduce spurious errors due to genuinely missing descriptors
    // "Running" is essentially a no-op, because non-running routers are not in the consensus
    public static String FLAGS = "Running,Valid,Stable";

    // How many fingerprints do we want to collect?
    // Appendix B, https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt
    //  "Due to squid proxy url limitations at most 96 fingerprints can be retrieved in a single request."

    /*
        The directory specification recommends the following client behaviour:
        No more than 128 descriptors are requested from a single mirror. ...
        After receiving any response client MUST discard any descriptors that it
        did not request.
    */

    // In reality, most caches will handle up to 500,
    // start silently dropping routers at 1000+,
    // (it's unclear in the spec if this is correct or not)
    // and at 1300, tor  will immediately terminate the connection
    //   with "request too large" in its logs

    // Use 10000 or INT_MAX to get all routers with the specified flags
    public static int ROUTER_COUNT = 10;

    // Where we get the router strings we send to the server
    public static Boolean useConsensusRouters = true;
    public static Boolean useRandomHexRouters = false;
    // This option generally makes a mess of everything when turned on,
    // including the Java URL classes and the terminal output
    public static Boolean useRandomByteRouters = false;

    public static int FINGERPRINT_BYTE_LENGTH = 20;
    public static int FINGERPRINT_CHAR_LENGTH = FINGERPRINT_BYTE_LENGTH*2;


    /*
         The Tor directory spec allows fingerprints to be shortened,
         but only when requesting the consensus using *authority* fingerprints:

         http://<hostname>/tor/status-vote/current/consensus/<F1>+<F2>+<F3>.z

        fingerprints can be shortened to a length of any multiple of
        two, using only the leftmost part of the encoded fingerprint.
        Tor uses 3 bytes (6 hex characters) of the fingerprint.
    */

    // The number of characters to use from each fingerprint in requests
    // The tor directory spec says this should be a multiple of 2.
    // We don't enforce this.
    // public static int TRUNCATE_FINGERPRINT_CHAR_LENGTH = 6;

    // It's much better to test against a local instance - it's faster, and reduces the load on the Tor network
    // And you get much better debugging info from both sides
    // And there's much less chance you'll be seen to be launching an (unsolicited) attack
    // However, this also disables retries
    public static String DIRECTORY_SERVER_ADDRESS = "127.0.0.1";
    public static String DIRECTORY_SERVER_PORT = "8880";

    // gho's router - request permission before using
    //public static String DIRECTORY_SERVER_ADDRESS = "37.187.247.150";
    //public static String DIRECTORY_SERVER_PORT = "9030";

    // Fallback to a random directory server if either of these are null
    // TODO: implement default port 9030?
    //public static String DIRECTORY_SERVER_ADDRESS = null;
    //public static String DIRECTORY_SERVER_PORT = null;

    public static void main(String[] args) {
        // Let's only retry twice - we really don't want to get picked up as a DoS attack
        // TODO: we could do this much better with a setter method - on the class or on the object?
        Consensus.MAX_TRIES = 3;

        Consensus con = Consensus.getConsensus();
        TreeMap<String, OnionRouter> orMap = new TreeMap<>();
        TreeSet<String> requestFingerprintList = new TreeSet<>();

        Boolean debugPrintedReply = false;

        // Create a map with ROUTER_COUNT random routers with FLAGS, eliminating duplicates
        // If there are fewer than ROUTER_COUNT routers with FLAGS, use them all
        TreeMap<String, OnionRouter> allWithFlags = con.getORsWithFlag(FLAGS);

        if (allWithFlags.size() <= ROUTER_COUNT) {

            orMap = allWithFlags;

        } else {

            while (orMap.size() < ROUTER_COUNT) {
                OnionRouter router = con.getRandomORWithFlag(FLAGS);

                // Check we're not using the same router multiple times
                // Servers eliminate duplicates, and this messes with our counts
                // Alternately, we could skip the check, and just overwrite the key.
                //   But this logic is clearer.
                if (!orMap.containsKey(router.identityhash))
                    orMap.put(router.identityhash, router);

            }
        }

        /*
            This code is terribly slow for more than around 10 routers, as it launches a request for every one.
            No wonder multiple fingerprints are permitted in a single request!

            This code is obsolete and may not work any more.
        */
        /*
        System.out.println("Retrieve authority descriptors in separate requests");
        System.out.println("===================================================");
        System.out.println("Retrieving single authority descriptors with optimistic compression...");
        for (OnionRouter or: ors.values()) {
            try {
                String descriptor = con.getRouterDescriptor(or.identityhash);
                if (descriptor.startsWith("router ")) {
                    System.out.println("Successfully retrieved descriptor for fingerprint: " + or.identityhash);
                } else {
                    System.err.println("Consistency checks failed on descriptor for fingerprint: " + or.identityhash);
                    System.err.println(descriptor);
                }
            } catch (IOException e) {
                System.err.println("IO Error attempting to retrieve single descriptor for fingerprint: " + or.identityhash
                        + "\n Error: " + e.toString());
            }
        }
        */

        System.out.println("Retrieve authority descriptors in one request");
        System.out.println("=============================================");
        System.out.println("Retrieving multiple authority descriptors with optimistic compression...");

        // Concatenate the identity hashes together, using "+" as a separator
        String fingerprintURLFragment = null;
        for (String identityFingerprint: orMap.keySet()) {

            // Only for requesting consensuses via an (potentially truncated) authority fingerprint
            //String requestFingerprint = StringUtils.left(identityFingerprint, TRUNCATE_FINGERPRINT_CHAR_LENGTH);
            String requestFingerprint = null;

            if (useConsensusRouters) {
                requestFingerprint = identityFingerprint;
            }

            // Flip a coin to decide whether to replace the fingerprint with a random hex string
            if (useRandomHexRouters && (TorCrypto.rnd.nextBoolean() || requestFingerprint == null)) {
                byte[] randomHexBytes = new byte[FINGERPRINT_BYTE_LENGTH];
                TorCrypto.rnd.nextBytes(randomHexBytes);
                // converts to lowercase by default
                requestFingerprint = Hex.encodeHexString(randomHexBytes);

                // Flip a coin to decide whether to uppercase it
                if (TorCrypto.rnd.nextBoolean())
                    requestFingerprint = requestFingerprint.toUpperCase();
            }

            // Flip a coin to decide whether to replace the fingerprint with random binary bytes
            if (useRandomByteRouters && (TorCrypto.rnd.nextBoolean() || requestFingerprint == null)) {
                // Though the name is a little counter-intuitive, in this particular instance
                // we want one byte for every character in the fingerprint
                byte[] randomBinaryBytes = new byte[FINGERPRINT_CHAR_LENGTH];
                TorCrypto.rnd.nextBytes(randomBinaryBytes);

                // Because we need to convert bytes to a Charset, then to a URL,
                // our ability to manipulate what tor sees is limited by the Java APIs.
                // They do an awful lot of sanity checking.

                // This code is as close as we can get to sending random binary data to tor,
                // without writing a basic HTTP client ourselves.

                // And it is hard to replicate requests using binary
                // by copying and pasting from various terminals or logs
                requestFingerprint = org.apache.commons.codec.binary.StringUtils.newStringIso8859_1(randomBinaryBytes);
            }

            if (requestFingerprint != null) {

                // only record real router fingerprints, not garbage
                if (requestFingerprint.equals(identityFingerprint))
                    requestFingerprintList.add(requestFingerprint);

                if (fingerprintURLFragment == null) {
                    fingerprintURLFragment = requestFingerprint;
                } else {
                    fingerprintURLFragment += "+" + requestFingerprint;
                }
            }
        }

        // Now try to retrieve them in a single request
        // Either:
        //  - the InflaterInputStream handles a "concatenated list of zlib-compressed objects" transparently, or
        //  - few servers send a "concatenated list of zlib-compressed objects"
        //    (instead choosing a "zlib-compressed concatenated list of objects")
        String descriptorReply = null;
        try {

            if (DIRECTORY_SERVER_ADDRESS != null && DIRECTORY_SERVER_PORT != null) {
                // Connect to a specified directory (cache)
                // Note: this disables automatic retries
                descriptorReply = con.getRouterDescriptor(fingerprintURLFragment, DIRECTORY_SERVER_ADDRESS, DIRECTORY_SERVER_PORT);
            } else {
                // Connect to a random directory (cache)
                // Note: using multiple, random caches may make errors harder to reproduce
                descriptorReply = con.getRouterDescriptor(fingerprintURLFragment);
            }

            System.out.println("Requested descriptors: " + URLUtil.URLEncode(fingerprintURLFragment));

        } catch (IOException e) {
            System.err.println("IO Error attempting to retrieve descriptor list for "
                    + String.valueOf(orMap.size()) + " fingerprints: "
                    + URLUtil.URLEncode(fingerprintURLFragment)
                    + "\n Error: " + e.toString());
        }

        // Print what we wanted, what we got, and a comparison
        if (descriptorReply != null) {

            int descriptorCount = 0;
            String descriptorLines[] = descriptorReply.split("\n");
            TreeSet<String> replyFingerprints = new TreeSet<>();

            for (String line : descriptorLines) {

                String fp = null;

                // The descriptor fingerprints can have an "opt " in front of them
                if (line.startsWith("fingerprint ")) {
                    fp = line.substring(("fingerprint ").length());
                } else if (line.startsWith("opt fingerprint ")) {
                    fp = line.substring(("opt fingerprint ").length());
                } else if (line.startsWith("router ")) {
                    descriptorCount++;
                }

                if (fp != null) {

                    // Match the consensus fingerprints, which are lowercase and contain no whitespace
                    fp = StringUtils.deleteWhitespace(fp);
                    fp = fp.toLowerCase();

                    if (!replyFingerprints.contains(fp))
                        replyFingerprints.add(fp);
                    else
                        System.err.println("Duplicate fingerprint: " + fp + " in reply.");
                }
            }


            // Do our descriptors appear valid, and did we get the right number of them?
            if (descriptorReply.startsWith("router ") && descriptorCount == orMap.size()) {
                System.out.println("Downloaded " + String.valueOf(descriptorCount)
                        + " descriptors for fingerprints: " + URLUtil.URLEncode(fingerprintURLFragment));
            } else {
                System.err.println();
                System.err.println("Downloaded " + String.valueOf(descriptorCount)
                        + " descriptors for " + String.valueOf(orMap.size())
                        + " fingerprints: " + URLUtil.URLEncode(fingerprintURLFragment));

                //System.err.println();
                // this is often so long that it is larger than the IntelliJ console buffer
                //System.err.println(descriptors);
                //debugPrintedReply = true;
            }

            //TreeSet<String> allFingerprints = new TreeSet<>(requestFingerprintList);
            //allFingerprints.addAll(replyFingerprints);

            if (!requestFingerprintList.equals(replyFingerprints)) {

                if (!debugPrintedReply) {
                    //System.err.println();
                    //System.err.println(descriptors);
                    //debugPrintedReply = true;
                }

                System.err.println();

                // Print what we wanted and what we got
                for (String requestFingerprint : requestFingerprintList)
                    System.err.println("Requested fingerprint: " + requestFingerprint + ".");

                System.err.println();

                for (String replyFingerprint : replyFingerprints)
                    System.err.println("Received fingerprint:  " + replyFingerprint + ".");

                System.err.println();

                // Now list missing and extra fingerprints

                TreeSet<String> missingInReply = new TreeSet<>(requestFingerprintList);
                missingInReply.removeAll(replyFingerprints);

                for (String missingFingerprint : missingInReply)
                    System.err.println("Missing fingerprint:   "
                            + missingFingerprint + ". Requested, but not in reply.");

                TreeSet<String> extraInReply = new TreeSet<>(replyFingerprints);
                extraInReply.removeAll(requestFingerprintList);

                for (String extraFingerprint : extraInReply)
                    System.err.println("Extra fingerprint:     "
                            + extraFingerprint + ". Not requested, but provided in reply.");

                // Try and find missing and extra fingerprint fragments in the descriptors
                // This is error-prone, as fingerprints are space-separated in descriptors
                for (String line : descriptorLines) {
                    for (String fingerprint : missingInReply) {
                        // fingerprints are already lowercase
                        String fragment = StringUtils.left(fingerprint, 4);

                        if (line.toLowerCase().contains(fragment)){
                            System.err.println("Missing fingerprint:   "
                                    + fingerprint + " fragment " + fragment
                                    + " found in descriptor line: " + line);
                        }

                    }

                    for (String fingerprint : extraInReply) {
                        // fingerprints are already lowercase
                        String fragment = StringUtils.left(fingerprint, 4);

                        if (line.toLowerCase().contains(fragment)){
                            System.err.println("Extra fingerprint:     "
                                    + fingerprint + " fragment " + fragment
                                    + " found in descriptor line: " + line);
                        }

                    }
                }

                // Pull up OnionRouter details for missing and extra routers
                for (String fingerprint : missingInReply) {
                    OnionRouter router = con.routers.get(fingerprint);

                    if (router != null){
                        System.err.println("Missing fingerprint:   "
                                + fingerprint + " found router: " + router.toString());
                    }

                }

                for (String fingerprint : extraInReply) {
                    OnionRouter router = con.routers.get(fingerprint);

                    if (router != null){
                        System.err.println("Extra fingerprint:     "
                                + fingerprint + " found router: " + router.toString());
                    }

                }
            }


        }

    }

}