/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.tunnel;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.ServerSocketChannel;
import java.util.Set;
import java.util.concurrent.Callable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import gobblin.util.ExecutorsUtils;
/**
* This class implements a tunnel through a proxy to resource on the internet. Frequently data stores to be accessed by
* Gobblin reside outside data centers. In these cases, outbound access from a data center typically needs to go through
* a gateway proxy for security purposes. In some cases this is an HTTP proxy. However, some protocols like JDBC don't
* support the concept of "proxies", let alone HTTP proxies, and hence a solution is needed to enable this.
*
* This class provides a method of tunneling arbitrary protocols like JDBC connections over an HTTP proxy. Note that
* while it is currently only implemented for JDBC (see {@link gobblin.source.extractor.extract.jdbc.JdbcExtractor} and
* {@link gobblin.source.extractor.extract.jdbc.JdbcExtractor}), it can be extended to work with any other
* TCP-based protocol.
*
* The way the Tunnel works is as follows:
* 1. When a Gobblin data source or Extractor or related class (such as JdbcProvider) is invoked to fetch data from an
* externally hosted resource, it should check if the WorkUnit has a proxy host and port defined.
* 2. If a proxy is defined, it should extract the remote host and port from the target URL hosting the resource (e.g.
* the JdbcProvider gets this from the connectionUrl.)
* 3. The extractor then creates a Tunnel instance configured with the remote host and port and the proxy host and port.
* 4. The Tunnel starts a thread that listens on an arbitrary port on localhost.
* 5. The extractor then points the target URL to the localhost and port the Tunnel is listening on. (E.g. in the case
* of JDBC, the JdbcProvider changes the connectionUrl to replace the remote host and port with the localhost and
* port before passing it on to the driver.)
* 6. Hence when the extractor client (e.g. JDBC driver) creates a connection, it connects to the Tunnel socket instead
* of the actual target host.
* 7. The Tunnel then connects to the remote host through the proxy via a HTTP CONNECT request.
* 8. If established successfully, the Tunnel then simply relays bytes back and forth between the Gobblin extractor and
* the target host via the intermediate proxy.)
* 7. When the Gobblin extractor (e.g. JDBC data source) is closed down, the Tunnel must be shut down as well.
*
* The Tunnel can accept as many connections as the JdbcExtractor opens. It uses NIO to minimize resource usage.
*
* @author navteniev@linkedin.com
* @author kkandekar@linkedin.com
*/
public class Tunnel {
public static final int NON_EXISTENT_PORT = -1;
private static final Logger LOG = LoggerFactory.getLogger(Tunnel.class);
private ServerSocketChannel server;
private Thread thread;
private final Config config;
private Tunnel(String remoteHost, int remotePort, String proxyHost, int proxyPort) {
this.config = new Config(remoteHost, remotePort, proxyHost, proxyPort);
}
private Tunnel open() throws IOException {
try {
this.server = ServerSocketChannel.open().bind(null);
this.server.configureBlocking(false);
Selector selector = Selector.open();
startTunnelThread(selector);
return this;
} catch (IOException ioe) {
LOG.error("Failed to open the tunnel", ioe);
throw ioe;
}
}
public int getPort() throws IOException {
SocketAddress localAddress = null;
try {
if (this.server != null && this.server.isOpen()) {
localAddress = this.server.getLocalAddress();
}
if (localAddress instanceof InetSocketAddress) {
return ((InetSocketAddress) localAddress).getPort();
}
} catch (IOException e) {
LOG.error("Failed to get tunnel port", e);
throw e;
}
return NON_EXISTENT_PORT;
}
private void startTunnelThread(Selector selector) {
this.thread = new Thread(new Dispatcher(selector), "Tunnel Listener");
this.thread.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
LOG.error("Uncaught exception in thread " + t.getName(), e);
}
});
//so we don't prevent the JVM from shutting down, just in case
this.thread.setDaemon(true);
this.thread.start();
}
public boolean isTunnelThreadAlive() {
return (this.thread != null && this.thread.isAlive());
}
private class Dispatcher implements Runnable {
private final Selector selector;
public Dispatcher(Selector selector) {
this.selector = selector;
}
@Override
public void run() {
try {
Tunnel.this.server.register(this.selector, SelectionKey.OP_ACCEPT,
ExecutorsUtils.loggingDecorator(new AcceptHandler(Tunnel.this.server, this.selector, Tunnel.this.config)));
while (!Thread.interrupted()) {
this.selector.select();
Set<SelectionKey> selectionKeys = this.selector.selectedKeys();
for (SelectionKey selectionKey : selectionKeys) {
dispatch(selectionKey);
}
selectionKeys.clear();
}
} catch (IOException ioe) {
LOG.error("Unhandled IOException. Tunnel will close", ioe);
}
LOG.info("Closing tunnel");
}
private void dispatch(SelectionKey selectionKey) {
Callable<?> attachment = (Callable<?>) selectionKey.attachment();
try {
attachment.call();
} catch (Exception e) {
LOG.error("exception handling event on {}", selectionKey.channel(), e);
}
}
}
public void close() {
try {
this.server.close();
LOG.info("Closed tunnel.");
} catch (IOException ioe) {
LOG.warn("Exception during shutdown of tunnel", ioe);
} finally {
try {
this.thread.interrupt();
this.thread.join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
public static Tunnel build(String remoteHost, int remotePort, String proxyHost, int proxyPort) throws IOException {
return new Tunnel(remoteHost, remotePort, proxyHost, proxyPort).open();
}
}