// // ProxyHandler // Copyright 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // Copyright 2011 by Florian Richter // First released 2011 at http://yacy.net // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this program in the file lgpl21.txt // If not, see <http://www.gnu.org/licenses/>. // package net.yacy.http; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.SocketException; import java.util.Date; import java.util.Enumeration; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.crawler.data.Cache; import net.yacy.crawler.retrieval.Response; import net.yacy.document.TextParser; import net.yacy.server.http.HTTPDProxyHandler; import net.yacy.server.http.MultiOutputStream; import org.apache.http.Header; import org.apache.http.HttpResponse; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Request; /** * jetty http handler * proxies request, caches responses and adds urls to crawler */ public class ProxyHandler extends AbstractRemoteHandler implements Handler { protected int timeout = 10000; @Override protected void doStart() throws Exception { super.doStart(); timeout = sb.getConfigInt("proxy.clientTimeout", 10000); } private void convertHeaderToJetty(HttpResponse in, HttpServletResponse out) { for(Header h: in.getAllHeaders()) { out.addHeader(h.getName(), h.getValue()); } } private void cleanResponseHeader(HttpResponse headers) { headers.removeHeaders(HeaderFramework.CONTENT_ENCODING); headers.removeHeaders(HeaderFramework.CONTENT_LENGTH); } private void deleteFromCache(final byte[] hash) { // long size = -1; ResponseHeader rh = Cache.getResponseHeader(hash); if (rh != null) { // delete the cache // if ((size = rh.getContentLength()) == 0) { // byte[] b = Cache.getContent(hash); // if (b != null) size = b.length; // } try { Cache.delete(hash); } catch (final IOException e) { // log refresh miss HTTPDProxyHandler.proxyLog.fine(e.getMessage()); } } } private void storeToCache(final Response yacyResponse, final byte[] cacheArray) { final Thread t = new Thread() { @Override public void run() { if (yacyResponse == null) return; this.setName("ProxyHandler.storeToCache(" + yacyResponse.url().toNormalform(true) + ")"); // the cache does either not exist or is (supposed to be) stale deleteFromCache(yacyResponse.url().hash()); if (cacheArray == null || cacheArray.length <= 0) return; yacyResponse.setContent(cacheArray); try { Cache.store(yacyResponse.url(), yacyResponse.getResponseHeader(), cacheArray); sb.toIndexer(yacyResponse); } catch (IOException e) { //log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e); } } }; t.setPriority(Thread.MIN_PRIORITY); t.start(); } @Override public void handleRemote(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response) throws IOException, ServletException { sb.proxyLastAccess = System.currentTimeMillis(); RequestHeader proxyHeaders = ProxyHandler.convertHeaderFromJetty(request); setProxyHeaderForClient(request, proxyHeaders); final HTTPClient client = new HTTPClient(ClientIdentification.yacyProxyAgent); client.setTimout(timeout); client.setHeader(proxyHeaders.entrySet()); client.setRedirecting(false); // send request try { String queryString = request.getQueryString() != null ? "?" + request.getQueryString() : ""; DigestURL digestURI = new DigestURL(request.getScheme(), request.getServerName(), request.getServerPort(), request.getRequestURI() + queryString); if (request.getMethod().equals(HeaderFramework.METHOD_GET)) { client.GET(digestURI, false); } else if (request.getMethod().equals(HeaderFramework.METHOD_POST)) { client.POST(digestURI, request.getInputStream(), request.getContentLength(), false); } else if (request.getMethod().equals(HeaderFramework.METHOD_HEAD)) { client.HEADResponse(digestURI, false); } else { throw new ServletException("Unsupported Request Method"); } HttpResponse clientresponse = client.getHttpResponse(); int statusCode = clientresponse.getStatusLine().getStatusCode(); final ResponseHeader responseHeaderLegacy = new ResponseHeader(statusCode, clientresponse.getAllHeaders()); if (responseHeaderLegacy.isEmpty()) { throw new SocketException(clientresponse.getStatusLine().toString()); } cleanResponseHeader(clientresponse); // reserver cache entry final net.yacy.crawler.retrieval.Request yacyRequest = new net.yacy.crawler.retrieval.Request( null, digestURI, null, //requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(), "", responseHeaderLegacy.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0, sb.crawler.defaultProxyProfile.timezoneOffset()); //sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete); final Response yacyResponse = new Response( yacyRequest, null, responseHeaderLegacy, sb.crawler.defaultProxyProfile, false, null ); final String storeError = yacyResponse.shallStoreCacheForProxy(); final boolean storeHTCache = yacyResponse.profile().storeHTCache(); final String supportError = TextParser.supports(yacyResponse.url(), yacyResponse.getMimeType()); if ( /* * Now we store the response into the htcache directory if * a) the response is cacheable AND */ (storeError == null) && /* * b) the user has configured to use the htcache OR * c) the content should be indexed */ ((storeHTCache) || (supportError != null)) ) { // we don't write actually into a file, only to RAM, and schedule writing the file. int l = responseHeaderLegacy.size(); final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l); final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {response.getOutputStream(), byteStream}); convertHeaderToJetty(clientresponse, response); response.setStatus(statusCode); client.writeTo(toClientAndMemory); // cached bytes storeToCache(yacyResponse, byteStream.toByteArray()); } else { // no caching /*if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." + " StoreError=" + ((storeError==null)?"None":storeError) + " StoreHTCache=" + storeHTCache + " SupportError=" + supportError);*/ convertHeaderToJetty(clientresponse, response); response.setStatus(statusCode); if (statusCode == HttpServletResponse.SC_OK) { // continue to serve header to client e.g. HttpStatus = 302 (while skiping content) client.writeTo(response.getOutputStream()); // may throw exception on httpStatus=302 while gzip encoded inputstream } } } catch (final SocketException se) { throw new ServletException("Socket Exception: " + se.getMessage()); } finally { client.finish(); } // we handled this request, break out of handler chain logProxyAccess(request); baseRequest.setHandled(true); } /** * Convert ServletRequest header to modifiable YaCy RequestHeader * * @param request ServletRequest * @return RequestHeader created from ServletRequest */ public static RequestHeader convertHeaderFromJetty(HttpServletRequest request) { RequestHeader result = new RequestHeader(); Enumeration<String> headerNames = request.getHeaderNames(); while (headerNames.hasMoreElements()) { String headerName = headerNames.nextElement(); Enumeration<String> headers = request.getHeaders(headerName); while (headers.hasMoreElements()) { String header = headers.nextElement(); result.add(headerName, header); } } return result; } /** * adds specific header elements for the connection of the internal * httpclient to the remote server according to local config * * @param header header for http client (already preset with headers from * original ServletRequest) * @param origServletRequest original request/header */ private void setProxyHeaderForClient(final HttpServletRequest origServletRequest, final HeaderFramework header) { header.remove(RequestHeader.KEEP_ALIVE); header.remove(HeaderFramework.CONTENT_LENGTH); // setting the X-Forwarded-For header if (sb.getConfigBool("proxy.sendXForwardedForHeader", true)) { String ip = origServletRequest.getRemoteAddr(); if (!Domains.isThisHostIP(ip)) { // if originator is local host no user ip to forward (= request from localhost) header.put(HeaderFramework.X_FORWARDED_FOR, origServletRequest.getRemoteAddr()); } } String httpVersion = origServletRequest.getProtocol(); HTTPDProxyHandler.modifyProxyHeaders(header, httpVersion); } public final static synchronized void logProxyAccess(HttpServletRequest request) { final StringBuilder logMessage = new StringBuilder(80); // Timestamp logMessage.append(GenericFormatter.SHORT_SECOND_FORMATTER.format(new Date())); logMessage.append(' '); // Remote Host final String clientIP = request.getRemoteAddr(); logMessage.append(clientIP); logMessage.append(' '); // Method final String requestMethod = request.getMethod(); logMessage.append(requestMethod); logMessage.append(' '); // URL logMessage.append(request.getRequestURL()); final String requestArgs = request.getQueryString(); if (requestArgs != null) { logMessage.append("?").append(requestArgs); } HTTPDProxyHandler.proxyLog.fine(logMessage.toString()); } }