package tor.examples;
import tor.Consensus;
import tor.OnionRouter;
import tor.TorCrypto;
import tor.util.URLUtil;
import java.io.IOException;
import java.util.*;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.codec.binary.Hex;
// We only use this once, so we use the fully-qualified name
//import org.apache.commons.codec.binary.StringUtils;
/**
* Created by twilsonb on 3/08/2014.
* Test directory cache responses to multiple descriptor requests
*/
public class DescriptorExample {
/*
Test handling of:
- uncompressed and compressed descriptors,
- single and multiple descriptors per request
Tor Directory Specification
https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt
Clients MUST handle compressed concatenated information in two forms:
- A concatenated list of zlib-compressed objects.
- A zlib-compressed concatenated list of objects.
Directory servers MAY generate either format: the former requires less
CPU, but the latter requires less bandwidth.
*/
// Generally, I'd use "Running,Valid,Stable", because
// we want routers that have a good chance of being in the consensus and caches,
// to reduce spurious errors due to genuinely missing descriptors
// "Running" is essentially a no-op, because non-running routers are not in the consensus
public static String FLAGS = "Running,Valid,Stable";
// How many fingerprints do we want to collect?
// Appendix B, https://gitweb.torproject.org/torspec.git/blob/HEAD:/dir-spec.txt
// "Due to squid proxy url limitations at most 96 fingerprints can be retrieved in a single request."
/*
The directory specification recommends the following client behaviour:
No more than 128 descriptors are requested from a single mirror. ...
After receiving any response client MUST discard any descriptors that it
did not request.
*/
// In reality, most caches will handle up to 500,
// start silently dropping routers at 1000+,
// (it's unclear in the spec if this is correct or not)
// and at 1300, tor will immediately terminate the connection
// with "request too large" in its logs
// Use 10000 or INT_MAX to get all routers with the specified flags
public static int ROUTER_COUNT = 10;
// Where we get the router strings we send to the server
public static Boolean useConsensusRouters = true;
public static Boolean useRandomHexRouters = false;
// This option generally makes a mess of everything when turned on,
// including the Java URL classes and the terminal output
public static Boolean useRandomByteRouters = false;
public static int FINGERPRINT_BYTE_LENGTH = 20;
public static int FINGERPRINT_CHAR_LENGTH = FINGERPRINT_BYTE_LENGTH*2;
/*
The Tor directory spec allows fingerprints to be shortened,
but only when requesting the consensus using *authority* fingerprints:
http://<hostname>/tor/status-vote/current/consensus/<F1>+<F2>+<F3>.z
fingerprints can be shortened to a length of any multiple of
two, using only the leftmost part of the encoded fingerprint.
Tor uses 3 bytes (6 hex characters) of the fingerprint.
*/
// The number of characters to use from each fingerprint in requests
// The tor directory spec says this should be a multiple of 2.
// We don't enforce this.
// public static int TRUNCATE_FINGERPRINT_CHAR_LENGTH = 6;
// It's much better to test against a local instance - it's faster, and reduces the load on the Tor network
// And you get much better debugging info from both sides
// And there's much less chance you'll be seen to be launching an (unsolicited) attack
// However, this also disables retries
public static String DIRECTORY_SERVER_ADDRESS = "127.0.0.1";
public static String DIRECTORY_SERVER_PORT = "8880";
// gho's router - request permission before using
//public static String DIRECTORY_SERVER_ADDRESS = "37.187.247.150";
//public static String DIRECTORY_SERVER_PORT = "9030";
// Fallback to a random directory server if either of these are null
// TODO: implement default port 9030?
//public static String DIRECTORY_SERVER_ADDRESS = null;
//public static String DIRECTORY_SERVER_PORT = null;
public static void main(String[] args) {
// Let's only retry twice - we really don't want to get picked up as a DoS attack
// TODO: we could do this much better with a setter method - on the class or on the object?
Consensus.MAX_TRIES = 3;
Consensus con = Consensus.getConsensus();
TreeMap<String, OnionRouter> orMap = new TreeMap<>();
TreeSet<String> requestFingerprintList = new TreeSet<>();
Boolean debugPrintedReply = false;
// Create a map with ROUTER_COUNT random routers with FLAGS, eliminating duplicates
// If there are fewer than ROUTER_COUNT routers with FLAGS, use them all
TreeMap<String, OnionRouter> allWithFlags = con.getORsWithFlag(FLAGS);
if (allWithFlags.size() <= ROUTER_COUNT) {
orMap = allWithFlags;
} else {
while (orMap.size() < ROUTER_COUNT) {
OnionRouter router = con.getRandomORWithFlag(FLAGS);
// Check we're not using the same router multiple times
// Servers eliminate duplicates, and this messes with our counts
// Alternately, we could skip the check, and just overwrite the key.
// But this logic is clearer.
if (!orMap.containsKey(router.identityhash))
orMap.put(router.identityhash, router);
}
}
/*
This code is terribly slow for more than around 10 routers, as it launches a request for every one.
No wonder multiple fingerprints are permitted in a single request!
This code is obsolete and may not work any more.
*/
/*
System.out.println("Retrieve authority descriptors in separate requests");
System.out.println("===================================================");
System.out.println("Retrieving single authority descriptors with optimistic compression...");
for (OnionRouter or: ors.values()) {
try {
String descriptor = con.getRouterDescriptor(or.identityhash);
if (descriptor.startsWith("router ")) {
System.out.println("Successfully retrieved descriptor for fingerprint: " + or.identityhash);
} else {
System.err.println("Consistency checks failed on descriptor for fingerprint: " + or.identityhash);
System.err.println(descriptor);
}
} catch (IOException e) {
System.err.println("IO Error attempting to retrieve single descriptor for fingerprint: " + or.identityhash
+ "\n Error: " + e.toString());
}
}
*/
System.out.println("Retrieve authority descriptors in one request");
System.out.println("=============================================");
System.out.println("Retrieving multiple authority descriptors with optimistic compression...");
// Concatenate the identity hashes together, using "+" as a separator
String fingerprintURLFragment = null;
for (String identityFingerprint: orMap.keySet()) {
// Only for requesting consensuses via an (potentially truncated) authority fingerprint
//String requestFingerprint = StringUtils.left(identityFingerprint, TRUNCATE_FINGERPRINT_CHAR_LENGTH);
String requestFingerprint = null;
if (useConsensusRouters) {
requestFingerprint = identityFingerprint;
}
// Flip a coin to decide whether to replace the fingerprint with a random hex string
if (useRandomHexRouters && (TorCrypto.rnd.nextBoolean() || requestFingerprint == null)) {
byte[] randomHexBytes = new byte[FINGERPRINT_BYTE_LENGTH];
TorCrypto.rnd.nextBytes(randomHexBytes);
// converts to lowercase by default
requestFingerprint = Hex.encodeHexString(randomHexBytes);
// Flip a coin to decide whether to uppercase it
if (TorCrypto.rnd.nextBoolean())
requestFingerprint = requestFingerprint.toUpperCase();
}
// Flip a coin to decide whether to replace the fingerprint with random binary bytes
if (useRandomByteRouters && (TorCrypto.rnd.nextBoolean() || requestFingerprint == null)) {
// Though the name is a little counter-intuitive, in this particular instance
// we want one byte for every character in the fingerprint
byte[] randomBinaryBytes = new byte[FINGERPRINT_CHAR_LENGTH];
TorCrypto.rnd.nextBytes(randomBinaryBytes);
// Because we need to convert bytes to a Charset, then to a URL,
// our ability to manipulate what tor sees is limited by the Java APIs.
// They do an awful lot of sanity checking.
// This code is as close as we can get to sending random binary data to tor,
// without writing a basic HTTP client ourselves.
// And it is hard to replicate requests using binary
// by copying and pasting from various terminals or logs
requestFingerprint = org.apache.commons.codec.binary.StringUtils.newStringIso8859_1(randomBinaryBytes);
}
if (requestFingerprint != null) {
// only record real router fingerprints, not garbage
if (requestFingerprint.equals(identityFingerprint))
requestFingerprintList.add(requestFingerprint);
if (fingerprintURLFragment == null) {
fingerprintURLFragment = requestFingerprint;
} else {
fingerprintURLFragment += "+" + requestFingerprint;
}
}
}
// Now try to retrieve them in a single request
// Either:
// - the InflaterInputStream handles a "concatenated list of zlib-compressed objects" transparently, or
// - few servers send a "concatenated list of zlib-compressed objects"
// (instead choosing a "zlib-compressed concatenated list of objects")
String descriptorReply = null;
try {
if (DIRECTORY_SERVER_ADDRESS != null && DIRECTORY_SERVER_PORT != null) {
// Connect to a specified directory (cache)
// Note: this disables automatic retries
descriptorReply = con.getRouterDescriptor(fingerprintURLFragment, DIRECTORY_SERVER_ADDRESS, DIRECTORY_SERVER_PORT);
} else {
// Connect to a random directory (cache)
// Note: using multiple, random caches may make errors harder to reproduce
descriptorReply = con.getRouterDescriptor(fingerprintURLFragment);
}
System.out.println("Requested descriptors: " + URLUtil.URLEncode(fingerprintURLFragment));
} catch (IOException e) {
System.err.println("IO Error attempting to retrieve descriptor list for "
+ String.valueOf(orMap.size()) + " fingerprints: "
+ URLUtil.URLEncode(fingerprintURLFragment)
+ "\n Error: " + e.toString());
}
// Print what we wanted, what we got, and a comparison
if (descriptorReply != null) {
int descriptorCount = 0;
String descriptorLines[] = descriptorReply.split("\n");
TreeSet<String> replyFingerprints = new TreeSet<>();
for (String line : descriptorLines) {
String fp = null;
// The descriptor fingerprints can have an "opt " in front of them
if (line.startsWith("fingerprint ")) {
fp = line.substring(("fingerprint ").length());
} else if (line.startsWith("opt fingerprint ")) {
fp = line.substring(("opt fingerprint ").length());
} else if (line.startsWith("router ")) {
descriptorCount++;
}
if (fp != null) {
// Match the consensus fingerprints, which are lowercase and contain no whitespace
fp = StringUtils.deleteWhitespace(fp);
fp = fp.toLowerCase();
if (!replyFingerprints.contains(fp))
replyFingerprints.add(fp);
else
System.err.println("Duplicate fingerprint: " + fp + " in reply.");
}
}
// Do our descriptors appear valid, and did we get the right number of them?
if (descriptorReply.startsWith("router ") && descriptorCount == orMap.size()) {
System.out.println("Downloaded " + String.valueOf(descriptorCount)
+ " descriptors for fingerprints: " + URLUtil.URLEncode(fingerprintURLFragment));
} else {
System.err.println();
System.err.println("Downloaded " + String.valueOf(descriptorCount)
+ " descriptors for " + String.valueOf(orMap.size())
+ " fingerprints: " + URLUtil.URLEncode(fingerprintURLFragment));
//System.err.println();
// this is often so long that it is larger than the IntelliJ console buffer
//System.err.println(descriptors);
//debugPrintedReply = true;
}
//TreeSet<String> allFingerprints = new TreeSet<>(requestFingerprintList);
//allFingerprints.addAll(replyFingerprints);
if (!requestFingerprintList.equals(replyFingerprints)) {
if (!debugPrintedReply) {
//System.err.println();
//System.err.println(descriptors);
//debugPrintedReply = true;
}
System.err.println();
// Print what we wanted and what we got
for (String requestFingerprint : requestFingerprintList)
System.err.println("Requested fingerprint: " + requestFingerprint + ".");
System.err.println();
for (String replyFingerprint : replyFingerprints)
System.err.println("Received fingerprint: " + replyFingerprint + ".");
System.err.println();
// Now list missing and extra fingerprints
TreeSet<String> missingInReply = new TreeSet<>(requestFingerprintList);
missingInReply.removeAll(replyFingerprints);
for (String missingFingerprint : missingInReply)
System.err.println("Missing fingerprint: "
+ missingFingerprint + ". Requested, but not in reply.");
TreeSet<String> extraInReply = new TreeSet<>(replyFingerprints);
extraInReply.removeAll(requestFingerprintList);
for (String extraFingerprint : extraInReply)
System.err.println("Extra fingerprint: "
+ extraFingerprint + ". Not requested, but provided in reply.");
// Try and find missing and extra fingerprint fragments in the descriptors
// This is error-prone, as fingerprints are space-separated in descriptors
for (String line : descriptorLines) {
for (String fingerprint : missingInReply) {
// fingerprints are already lowercase
String fragment = StringUtils.left(fingerprint, 4);
if (line.toLowerCase().contains(fragment)){
System.err.println("Missing fingerprint: "
+ fingerprint + " fragment " + fragment
+ " found in descriptor line: " + line);
}
}
for (String fingerprint : extraInReply) {
// fingerprints are already lowercase
String fragment = StringUtils.left(fingerprint, 4);
if (line.toLowerCase().contains(fragment)){
System.err.println("Extra fingerprint: "
+ fingerprint + " fragment " + fragment
+ " found in descriptor line: " + line);
}
}
}
// Pull up OnionRouter details for missing and extra routers
for (String fingerprint : missingInReply) {
OnionRouter router = con.routers.get(fingerprint);
if (router != null){
System.err.println("Missing fingerprint: "
+ fingerprint + " found router: " + router.toString());
}
}
for (String fingerprint : extraInReply) {
OnionRouter router = con.routers.get(fingerprint);
if (router != null){
System.err.println("Extra fingerprint: "
+ fingerprint + " found router: " + router.toString());
}
}
}
}
}
}