/**
* HTTPClient
* Copyright 2010 by Sebastian Gaebel
* First released 01.07.2010 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol.http;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.auth.AuthSchemeProvider;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.AuthSchemes;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.Lookup;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.ConnectionKeepAliveStrategy;
import org.apache.http.conn.DnsResolver;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.impl.auth.BasicSchemeFactory;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultConnectionKeepAliveStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HTTP;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.ByteArrayBuffer;
import org.apache.http.util.EntityUtils;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ConnectionInfo;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.http.auth.YaCyDigestSchemeFactory;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.Memory;
import net.yacy.kelondro.util.NamePrefixThreadFactory;
/**
* HttpClient implementation which uses <a href="http://hc.apache.org/">HttpComponents Client</a>.
*
* @author sixcooler
*
*/
public class HTTPClient {
private final static int default_timeout = 6000;
private final static int maxcon = 200;
private static IdleConnectionMonitorThread connectionMonitor = null;
private final static RequestConfig dfltReqConf = initRequestConfig();
private final static HttpClientBuilder clientBuilder = initClientBuilder();
private final RequestConfig.Builder reqConfBuilder;
private Set<Entry<String, String>> headers = null;
private CloseableHttpResponse httpResponse = null;
private HttpUriRequest currentRequest = null;
private long upbytes = 0L;
private String host = null;
private final long timeout;
private static ExecutorService executor = Executors
.newCachedThreadPool(new NamePrefixThreadFactory(HTTPClient.class.getSimpleName() + ".execute"));
public HTTPClient(final ClientIdentification.Agent agent) {
super();
this.timeout = agent.clientTimeout;
clientBuilder.setUserAgent(agent.userAgent);
reqConfBuilder = RequestConfig.copy(dfltReqConf);
setTimout(agent.clientTimeout);
}
public HTTPClient(final ClientIdentification.Agent agent, final int timeout) {
super();
this.timeout = timeout;
clientBuilder.setUserAgent(agent.userAgent);
reqConfBuilder = RequestConfig.copy(dfltReqConf);
setTimout(timeout);
}
public static void setDefaultUserAgent(final String defaultAgent) {
clientBuilder.setUserAgent(defaultAgent);
}
private static RequestConfig initRequestConfig() {
final RequestConfig.Builder builder = RequestConfig.custom();
// IMPORTANT - if not set to 'false' then servers do not process the request until a time-out of 2 seconds
builder.setExpectContinueEnabled(false);
// timeout in milliseconds until a connection is established in milliseconds
builder.setConnectionRequestTimeout(default_timeout);
builder.setConnectTimeout(default_timeout);
// SO_TIMEOUT: maximum period inactivity between two consecutive data packets in milliseconds
builder.setSocketTimeout(default_timeout);
// ignore cookies, cause this may cause segfaults in default cookiestore and is not needed
builder.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
builder.setRedirectsEnabled(true);
builder.setRelativeRedirectsAllowed(true);
return builder.build();
}
private static HttpClientBuilder initClientBuilder() {
final HttpClientBuilder builder = HttpClientBuilder.create();
builder.setConnectionManager(initPoolingConnectionManager());
builder.setDefaultRequestConfig(dfltReqConf);
// UserAgent
builder.setUserAgent(ClientIdentification.yacyInternetCrawlerAgent.userAgent);
// remove retries; we expect connections to fail; therefore we should not retry
//builder.disableAutomaticRetries();
// disable the cookiestore, cause this may cause segfaults and is not needed
builder.setDefaultCookieStore(null);
builder.disableCookieManagement();
// add custom keep alive strategy
builder.setKeepAliveStrategy(customKeepAliveStrategy());
// ask for gzip
builder.addInterceptorLast(new GzipRequestInterceptor());
// uncompress gzip
builder.addInterceptorLast(new GzipResponseInterceptor());
// Proxy
builder.setRoutePlanner(ProxySettings.RoutePlanner);
builder.setDefaultCredentialsProvider(ProxySettings.CredsProvider);
return builder;
}
private static PoolingHttpClientConnectionManager initPoolingConnectionManager() {
final PlainConnectionSocketFactory plainsf = PlainConnectionSocketFactory.getSocketFactory();
final Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", plainsf)
.register("https", getSSLSocketFactory())
.build();
final PoolingHttpClientConnectionManager pooling = new PoolingHttpClientConnectionManager(registry, new DnsResolver(){
@Override
public InetAddress[] resolve(final String host0)throws UnknownHostException {
final InetAddress ip = Domains.dnsResolve(host0);
if (ip == null) throw new UnknownHostException(host0);
return new InetAddress[]{ip};
}});
// how much connections do we need? - default: 20
pooling.setMaxTotal(maxcon);
// for statistics same value should also be set here
ConnectionInfo.setMaxcount(maxcon);
// connections per host (2 default)
pooling.setDefaultMaxPerRoute((int) (2 * Memory.cores()));
// Increase max connections for localhost
final HttpHost localhost = new HttpHost(Domains.LOCALHOST);
pooling.setMaxPerRoute(new HttpRoute(localhost), maxcon);
pooling.setValidateAfterInactivity(default_timeout); // on init set to default 5000ms
final SocketConfig socketConfig = SocketConfig.custom()
// Defines whether the socket can be bound even though a previous connection is still in a timeout state.
.setSoReuseAddress(true)
// SO_TIMEOUT: maximum period inactivity between two consecutive data packets in milliseconds
.setSoTimeout(3000)
// conserve bandwidth by minimizing the number of segments that are sent
.setTcpNoDelay(false)
.build();
pooling.setDefaultSocketConfig(socketConfig);
if (connectionMonitor == null) {
connectionMonitor = new IdleConnectionMonitorThread(pooling);
connectionMonitor.start();
}
return pooling;
}
/**
* This method should be called just before shutdown
* to stop the ConnectionManager and idledConnectionEvictor
*
* @throws InterruptedException
*/
public static void closeConnectionManager() throws InterruptedException {
if (connectionMonitor != null) {
// Shut down the evictor thread
connectionMonitor.shutdown();
connectionMonitor.join();
}
}
/**
* This method sets the Header used for the request
*
* @param entrys to be set as request header
*/
public void setHeader(final Set<Entry<String, String>> entrys) {
this.headers = entrys;
}
/**
* This method sets the timeout of the Connection and Socket
*
* @param timeout in milliseconds
*/
public void setTimout(final int timeout) {
reqConfBuilder.setSocketTimeout(timeout);
reqConfBuilder.setConnectTimeout(timeout);
reqConfBuilder.setConnectionRequestTimeout(timeout);
}
/**
* This method sets the UserAgent to be used for the request
*
* @param userAgent
*/
public void setUserAgent(final ClientIdentification.Agent agent) {
clientBuilder.setUserAgent(agent.userAgent);
}
/**
* This method sets the host to be called at the request
*
* @param host
*/
public void setHost(final String host) {
this.host = host;
}
/**
* This method sets if requests should follow redirects
*
* @param redirecting
*/
public void setRedirecting(final boolean redirecting) {
reqConfBuilder.setRedirectsEnabled(redirecting);
reqConfBuilder.setRelativeRedirectsAllowed(redirecting);
}
/**
* This method GETs a page from the server.
*
* @param uri the url to get
* @param username user name for HTTP authentication : only sent requesting localhost
* @param pass password for HTTP authentication : only sent when requesting localhost
* @param concurrent whether a new thread should be created to handle the request.
* Ignored when requesting localhost or when the authentication password is not null
* @return content bytes
* @throws IOException
*/
public byte[] GETbytes(final String uri, final String username, final String pass, final boolean concurrent) throws IOException {
return GETbytes(uri, username, pass, Integer.MAX_VALUE, concurrent);
}
/**
* This method GETs a page from the server.
*
* @param uri the url to get
* @param username user name for HTTP authentication : only sent requesting localhost
* @param pass password for HTTP authentication : only sent when requesting localhost
* @param concurrent whether a new thread should be created to handle the request.
* Ignored when requesting localhost or when the authentication password is not null
* @return content bytes
* @throws IOException
*/
public byte[] GETbytes(final MultiProtocolURL url, final String username, final String pass, final boolean concurrent) throws IOException {
return GETbytes(url, username, pass, Integer.MAX_VALUE, concurrent);
}
/**
* This method GETs a page from the server.
*
* @param uri the url to get
* @param username user name for HTTP authentication : only sent requesting localhost
* @param pass password for HTTP authentication : only sent when requesting localhost
* @param maxBytes to get
* @param concurrent whether a new thread should be created to handle the request.
* Ignored when requesting localhost or when the authentication password is not null
* @return content bytes
* @throws IOException
*/
public byte[] GETbytes(final String uri, final String username, final String pass, final int maxBytes, final boolean concurrent) throws IOException {
return GETbytes(new MultiProtocolURL(uri), username, pass, maxBytes, concurrent);
}
/**
* This method GETs a page from the server.
*
* @param uri the url to get
* @param username user name for HTTP authentication : only sent requesting localhost
* @param pass password for HTTP authentication : only sent when requesting localhost
* @param maxBytes maximum response bytes to read
* @param concurrent whether a new thread should be created to handle the request.
* Ignored when requesting localhost or when the authentication password is not null
* @return content bytes
* @throws IOException
*/
public byte[] GETbytes(final MultiProtocolURL url, final String username, final String pass, final int maxBytes, final boolean concurrent) throws IOException {
final boolean localhost = Domains.isLocalhost(url.getHost());
final String urix = url.toNormalform(true);
HttpGet httpGet = null;
try {
httpGet = new HttpGet(urix);
} catch (IllegalArgumentException e) {
throw new IOException(e.getMessage()); // can be caused at java.net.URI.create()
}
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
if (!localhost || pass == null) {
return getContentBytes(httpGet, maxBytes, concurrent);
}
CredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(
new AuthScope("localhost", url.getPort()),
new UsernamePasswordCredentials(username, pass));
/* Use the custom YaCyDigestScheme for HTTP Digest Authentication */
final Lookup<AuthSchemeProvider> authSchemeRegistry = RegistryBuilder.<AuthSchemeProvider>create()
.register(AuthSchemes.BASIC, new BasicSchemeFactory())
.register(AuthSchemes.DIGEST, new YaCyDigestSchemeFactory())
.build();
CloseableHttpClient httpclient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider)
.setDefaultAuthSchemeRegistry(authSchemeRegistry).build();
byte[] content = null;
try {
this.httpResponse = httpclient.execute(httpGet);
try {
HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && (maxBytes < 0 || httpEntity.getContentLength() < maxBytes)) {
content = getByteArray(httpEntity, maxBytes);
}
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
}
} finally {
this.httpResponse.close();
}
} finally {
httpclient.close();
}
return content;
}
/**
* This method GETs a page from the server.
* to be used for streaming out
* Please take care to call finish()!
*
* @param uri the url to get
* @throws IOException
*/
public void GET(final String uri, final boolean concurrent) throws IOException {
GET(new MultiProtocolURL(uri), concurrent);
}
/**
* This method GETs a page from the server.
* to be used for streaming out
* Please take care to call finish()!
*
* @param url the url to get
* @throws IOException
*/
public void GET(final MultiProtocolURL url, final boolean concurrent) throws IOException {
if (this.currentRequest != null) throw new IOException("Client is in use!");
final String urix = url.toNormalform(true);
HttpGet httpGet = null;
try {
httpGet = new HttpGet(urix);
} catch (IllegalArgumentException e) {
throw new IOException(e.getMessage()); // can be caused at java.net.URI.create()
}
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
this.currentRequest = httpGet;
execute(httpGet, concurrent);
}
/**
* This method gets HEAD response
*
* @param uri the url to Response from
* @return the HttpResponse
* @throws IOException
*/
public HttpResponse HEADResponse(final String uri, final boolean concurrent) throws IOException {
return HEADResponse(new MultiProtocolURL(uri), concurrent);
}
/**
* This method gets HEAD response
*
* @param url the url to Response from
* @return the HttpResponse
* @throws IOException
*/
public HttpResponse HEADResponse(final MultiProtocolURL url, final boolean concurrent) throws IOException {
final HttpHead httpHead = new HttpHead(url.toNormalform(true));
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
execute(httpHead, concurrent);
finish();
ConnectionInfo.removeConnection(httpHead.hashCode());
return this.httpResponse;
}
/**
* This method POSTs a page from the server.
* to be used for streaming out
* Please take care to call finish()!
*
* @param uri the url to post
* @param instream the input to post
* @param length the contentlength
* @throws IOException
*/
/*
public void POST(final String uri, final InputStream instream, final long length, final boolean concurrent) throws IOException {
POST(new MultiProtocolURL(uri), instream, length, concurrent);
}
*/
/**
* This method POSTs a page from the server.
* to be used for streaming out
* Please take care to call finish()!
*
* @param url the url to post
* @param instream the input to post
* @param length the contentlength
* @throws IOException
*/
public void POST(final MultiProtocolURL url, final InputStream instream, final long length, final boolean concurrent) throws IOException {
if (this.currentRequest != null) throw new IOException("Client is in use!");
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
String host = url.getHost();
if (host == null) host = Domains.LOCALHOST;
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
final NonClosingInputStreamEntity inputStreamEntity = new NonClosingInputStreamEntity(instream, length);
// statistics
this.upbytes = length;
httpPost.setEntity(inputStreamEntity);
this.currentRequest = httpPost;
execute(httpPost, concurrent);
}
/**
* send data to the server named by uri
*
* @param uri the url to post
* @param parts to post
* @return content bytes
* @throws IOException
*/
/*
public byte[] POSTbytes(final String uri, final Map<String, ContentBody> parts, final boolean usegzip, final boolean concurrent) throws IOException {
final MultiProtocolURL url = new MultiProtocolURL(uri);
return POSTbytes(url, url.getHost(), parts, usegzip, concurrent);
}
*/
/**
* send data to the server named by vhost
*
* @param url address of the server
* @param vhost name of the server at address which should respond
* @param post data to send (name-value-pairs)
* @param usegzip if the body should be gzipped
* @return response body
* @throws IOException
*/
public byte[] POSTbytes(final MultiProtocolURL url, final String vhost, final Map<String, ContentBody> post, final boolean usegzip, final boolean concurrent) throws IOException {
return POSTbytes(url, vhost, post, null, null, usegzip, concurrent);
}
/**
* Send data using HTTP POST method to the server named by vhost
*
* @param url address to request on the server
* @param vhost name of the server at address which should respond. When null, localhost is assumed.
* @param post data to send (name-value-pairs)
* @param userName user name for HTTP authentication : only sent when requesting localhost
* @param password encoded password for HTTP authentication : only sent when requesting localhost
* @param usegzip if the body should be gzipped
* @return response body
* @throws IOException when an error occurred
*/
public byte[] POSTbytes(final MultiProtocolURL url, final String vhost, final Map<String, ContentBody> post,
final String userName, final String password, final boolean usegzip, final boolean concurrent) throws IOException {
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
final boolean localhost = Domains.isLocalhost(url.getHost());
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
if (vhost == null) setHost(Domains.LOCALHOST);
final MultipartEntityBuilder entityBuilder = MultipartEntityBuilder.create();
for (final Entry<String,ContentBody> part : post.entrySet()) entityBuilder.addPart(part.getKey(), part.getValue());
final HttpEntity multipartEntity = entityBuilder.build();
// statistics
this.upbytes = multipartEntity.getContentLength();
if (usegzip) {
httpPost.setEntity(new GzipCompressingEntity(multipartEntity));
} else {
httpPost.setEntity(multipartEntity);
}
if (!localhost || password == null) {
return getContentBytes(httpPost, Integer.MAX_VALUE, concurrent);
}
byte[] content = null;
final CredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(
new AuthScope("localhost", url.getPort()),
new UsernamePasswordCredentials(userName, password));
/* Use the custom YaCyDigestScheme for HTTP Digest Authentication */
final Lookup<AuthSchemeProvider> authSchemeRegistry = RegistryBuilder.<AuthSchemeProvider>create()
.register(AuthSchemes.BASIC, new BasicSchemeFactory())
.register(AuthSchemes.DIGEST, new YaCyDigestSchemeFactory())
.build();
CloseableHttpClient httpclient = HttpClients.custom().setDefaultCredentialsProvider(credsProvider)
.setDefaultAuthSchemeRegistry(authSchemeRegistry).build();
try {
this.httpResponse = httpclient.execute(httpPost);
try {
HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == HttpStatus.SC_OK) {
content = getByteArray(httpEntity, Integer.MAX_VALUE);
}
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
}
} finally {
this.httpResponse.close();
}
} finally {
httpclient.close();
}
return content;
}
/**
* send stream-data to the server named by uri
*
* @param uri the url to post
* @param instream the stream to send
* @param length the length of the stream
* @return content bytes
* @throws IOException
*/
/*
public byte[] POSTbytes(final String uri, final InputStream instream, final long length, final boolean concurrent) throws IOException {
final MultiProtocolURL url = new MultiProtocolURL(uri);
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
String host = url.getHost();
if (host == null) host = Domains.LOCALHOST;
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
final InputStreamEntity inputStreamEntity = new InputStreamEntity(instream, length);
// statistics
this.upbytes = length;
httpPost.setEntity(inputStreamEntity);
return getContentBytes(httpPost, Integer.MAX_VALUE, concurrent);
}
*/
/**
*
* @return HttpResponse from call
*/
public HttpResponse getHttpResponse() {
return this.httpResponse;
}
/**
*
* @return status code from http request
*/
public int getStatusCode() {
return this.httpResponse.getStatusLine().getStatusCode();
}
/**
* Get Mime type from the response header
* @return mime type (trimmed and lower cased) or null when not specified
*/
public String getMimeType() {
String mimeType = null;
if (this.httpResponse != null) {
Header contentType = this.httpResponse.getFirstHeader(HttpHeaders.CONTENT_TYPE);
if (contentType != null) {
mimeType = contentType.getValue();
if (mimeType != null) {
mimeType = mimeType.trim().toLowerCase();
final int pos = mimeType.indexOf(';');
if(pos >= 0) {
mimeType = mimeType.substring(0, pos);
}
}
}
}
return mimeType;
}
/**
* Get character encoding from the response header
*
* @return the characters set name or null when not specified
*/
public String getCharacterEncoding() {
String charsetName = null;
if (this.httpResponse != null) {
Header contentTypeHeader = this.httpResponse.getFirstHeader(HttpHeaders.CONTENT_TYPE);
if (contentTypeHeader != null) {
String contentType = contentTypeHeader.getValue();
if (contentType != null) {
final String[] parts = CommonPattern.SEMICOLON.split(contentType);
if (parts != null && parts.length > 1) {
for (int i = 1; i < parts.length; i++) {
final String param = parts[i].trim();
if (param.startsWith("charset=")) {
String charset = param.substring("charset=".length()).trim();
if (charset.length() > 0 && (charset.charAt(0) == '\"' || charset.charAt(0) == '\'')) {
charset = charset.substring(1);
}
if (charset.endsWith("\"") || charset.endsWith("'")) {
charset = charset.substring(0, charset.length() - 1);
}
charsetName = charset.trim();
}
}
}
}
}
}
return charsetName;
}
/**
* This method gets direct access to the content-stream
* Since this way is uncontrolled by the Client think of using 'writeTo' instead!
* Please take care to call finish()!
*
* @return the content as InputStream
* @throws IOException
*/
public InputStream getContentstream() throws IOException {
if (this.httpResponse != null && this.currentRequest != null) {
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) try {
return httpEntity.getContent();
} catch (final IOException e) {
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest.abort();
this.currentRequest = null;
this.httpResponse.close();
throw e;
}
}
return null;
}
/**
* This method streams the content to the outputStream
* Please take care to call finish()!
*
* @param outputStream
* @throws IOException
*/
public void writeTo(final OutputStream outputStream) throws IOException {
if (this.httpResponse != null && this.currentRequest != null) {
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) try {
httpEntity.writeTo(outputStream);
outputStream.flush();
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest = null;
} catch (final IOException e) {
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest.abort();
this.currentRequest = null;
this.httpResponse.close();
throw e;
}
}
}
/**
* This method ensures correct finish of client-connections
* This method should be used after every use of GET or POST and writeTo or getContentstream!
*
* @throws IOException
*/
public void finish() throws IOException {
try {
if (this.httpResponse != null) {
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null && httpEntity.isStreaming()) {
/*
* Try to fully consume the eventual remaining of the
* content stream : if too long abort the request. Not using
* EntityUtils.consumeQuietly(httpEntity) because too long
* to perform on large resources when calling this before
* full stream processing end : for example on caller
* exception handling .
*/
InputStream contentStream = null;
try {
contentStream = httpEntity.getContent();
if (contentStream != null) {
byte[] buffer = new byte[2048];
int count = 0;
int readNb = contentStream.read(buffer);
while (readNb >= 0 && count < 10) {
readNb = contentStream.read(buffer);
count++;
}
if (readNb >= 0) {
if (this.currentRequest != null) {
this.currentRequest.abort();
}
}
}
} catch(IOException e){
/* Silently ignore here IOException (for example caused by stream already closed) as in EntityUtils.consumeQuietly() */
} finally {
if (contentStream != null) {
try {
contentStream.close();
} catch(IOException ignored) {}
}
this.httpResponse.close();
}
}
}
} finally {
if (this.currentRequest != null) {
ConnectionInfo.removeConnection(this.currentRequest.hashCode());
this.currentRequest = null;
}
}
}
private byte[] getContentBytes(final HttpUriRequest httpUriRequest, final int maxBytes, final boolean concurrent) throws IOException {
byte[] content = null;
try {
execute(httpUriRequest, concurrent);
if (this.httpResponse == null) return null;
// get the response body
final HttpEntity httpEntity = this.httpResponse.getEntity();
if (httpEntity != null) {
if (getStatusCode() == 200 && (maxBytes < 0 || httpEntity.getContentLength() < maxBytes)) {
content = getByteArray(httpEntity, maxBytes);
}
// Ensures that the entity content is fully consumed and the content stream, if exists, is closed.
EntityUtils.consume(httpEntity);
}
} catch (final IOException e) {
httpUriRequest.abort();
throw e;
} finally {
if (this.httpResponse != null) this.httpResponse.close();
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
}
return content;
}
private void execute(final HttpUriRequest httpUriRequest, final boolean concurrent) throws IOException {
final HttpClientContext context = HttpClientContext.create();
context.setRequestConfig(reqConfBuilder.build());
if (this.host != null)
context.setTargetHost(new HttpHost(this.host));
setHeaders(httpUriRequest);
// statistics
storeConnectionInfo(httpUriRequest);
// execute the method; some asserts confirm that that the request can be send with Content-Length and is therefore not terminated by EOF
if (httpUriRequest instanceof HttpEntityEnclosingRequest) {
final HttpEntityEnclosingRequest hrequest = (HttpEntityEnclosingRequest) httpUriRequest;
final HttpEntity entity = hrequest.getEntity();
assert entity != null;
//assert !entity.isChunked();
//assert entity.getContentLength() >= 0;
assert !hrequest.expectContinue();
}
final String initialThreadName = Thread.currentThread().getName();
Thread.currentThread().setName("HTTPClient-" + httpUriRequest.getURI());
final long time = System.currentTimeMillis();
try {
if (concurrent) {
FutureTask<CloseableHttpResponse> t = new FutureTask<CloseableHttpResponse>(new Callable<CloseableHttpResponse>() {
@Override
public CloseableHttpResponse call() throws ClientProtocolException, IOException {
final CloseableHttpClient client = clientBuilder.build();
CloseableHttpResponse response = client.execute(httpUriRequest, context);
return response;
}
});
executor.execute(t);
try {
this.httpResponse = t.get(this.timeout, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
throw e.getCause();
} catch (Throwable e) {}
try {t.cancel(true);} catch (Throwable e) {}
if (this.httpResponse == null) throw new IOException("timout to client after " + this.timeout + "ms" + " for url " + httpUriRequest.getURI().toString());
} else {
final CloseableHttpClient client = clientBuilder.build();
this.httpResponse = client.execute(httpUriRequest, context);
}
this.httpResponse.setHeader(HeaderFramework.RESPONSE_TIME_MILLIS, Long.toString(System.currentTimeMillis() - time));
} catch (final Throwable e) {
ConnectionInfo.removeConnection(httpUriRequest.hashCode());
httpUriRequest.abort();
if (this.httpResponse != null) this.httpResponse.close();
//e.printStackTrace();
throw new IOException("Client can't execute: "
+ (e.getCause() == null ? e.getMessage() : e.getCause().getMessage())
+ " duration=" + Long.toString(System.currentTimeMillis() - time) + " for url " + httpUriRequest.getURI().toString());
} finally {
/* Restore the thread initial name */
Thread.currentThread().setName(initialThreadName);
}
}
/**
* Return entity content loaded as a byte array
* @param entity HTTP entity
* @param maxBytes maximum bytes to read. -1 means no maximum limit.
* @return content bytes or null when entity content is null.
* @throws IOException when a read error occured or content length is over maxBytes
*/
public static byte[] getByteArray(final HttpEntity entity, int maxBytes) throws IOException {
final InputStream instream = entity.getContent();
if (instream == null) {
return null;
}
try {
long contentLength = entity.getContentLength();
/*
* When no maxBytes is specified, the default limit is
* Integer.MAX_VALUE as a byte array size can not be over
*/
if (maxBytes < 0) {
maxBytes = Integer.MAX_VALUE;
}
/*
* Content length may already be known now : check it before
* downloading
*/
if (contentLength > maxBytes) {
throw new IOException("Content to download exceed maximum value of " + maxBytes + " bytes");
}
int initialSize = Math.min(maxBytes, (int) contentLength);
/* ContentLenght may be negative because unknown for now */
if (initialSize < 0) {
initialSize = 4096;
}
final ByteArrayBuffer buffer = new ByteArrayBuffer(initialSize);
byte[] tmp = new byte[4096];
int l = 0;
/* Sum is a long to enable check against Integer.MAX_VALUE */
long sum = 0;
while ((l = instream.read(tmp)) != -1) {
sum += l;
/*
* Check total length while downloading as content lenght might
* not be known at beginning
*/
if (sum > maxBytes) {
throw new IOException("Download exceeded maximum value of " + maxBytes + " bytes");
}
buffer.append(tmp, 0, l);
}
return buffer.toByteArray();
} catch (final OutOfMemoryError e) {
throw new IOException(e.toString());
} finally {
instream.close();
}
}
private void setHeaders(final HttpUriRequest httpUriRequest) {
if (this.headers != null) {
for (final Entry<String, String> entry : this.headers) {
httpUriRequest.setHeader(entry.getKey(),entry.getValue());
}
}
if (this.host != null) httpUriRequest.setHeader(HTTP.TARGET_HOST, this.host);
httpUriRequest.setHeader(HTTP.CONN_DIRECTIVE, "close"); // don't keep alive, prevent CLOSE_WAIT state
}
private void storeConnectionInfo(final HttpUriRequest httpUriRequest) {
final int port = httpUriRequest.getURI().getPort();
final String thost = httpUriRequest.getURI().getHost();
//assert thost != null : "uri = " + httpUriRequest.getURI().toString();
ConnectionInfo.addConnection(new ConnectionInfo(
httpUriRequest.getURI().getScheme(),
port == -1 ? thost : thost + ":" + port,
httpUriRequest.getMethod() + " " + httpUriRequest.getURI().getPath(),
httpUriRequest.hashCode(),
System.currentTimeMillis(),
this.upbytes));
}
private static SSLConnectionSocketFactory getSSLSocketFactory() {
final TrustManager trustManager = new X509TrustManager() {
@Override
public void checkClientTrusted(final X509Certificate[] chain, final String authType)
throws CertificateException {
}
@Override
public void checkServerTrusted(final X509Certificate[] chain, final String authType)
throws CertificateException {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
SSLContext sslContext = null;
try {
sslContext = SSLContext.getInstance("TLS");
sslContext.init(null, new TrustManager[] { trustManager }, null);
} catch (final NoSuchAlgorithmException e) {
// should not happen
// e.printStackTrace();
} catch (final KeyManagementException e) {
// should not happen
// e.printStackTrace();
}
final SSLConnectionSocketFactory sslSF = new SSLConnectionSocketFactory(
sslContext,
new NoopHostnameVerifier());
return sslSF;
}
/**
* If the Keep-Alive header is not present in the response,
* HttpClient assumes the connection can be kept alive indefinitely.
* Here we limit this to 5 seconds if unset and to a max of 25 seconds
*
* @param defaultHttpClient
*/
private static ConnectionKeepAliveStrategy customKeepAliveStrategy() {
return new DefaultConnectionKeepAliveStrategy() {
@Override
public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
long keepAlive = super.getKeepAliveDuration(response, context);
return Math.min(Math.max(keepAlive, 5000), 25000);
}
};
}
/**
* testing
*
* @param args urls to test
*/
public static void main(final String[] args) {
String url = null;
// prepare Parts
// final Map<String,ContentBody> newparts = new LinkedHashMap<String,ContentBody>();
// try {
// newparts.put("foo", new StringBody("FooBar"));
// newparts.put("bar", new StringBody("BarFoo"));
// } catch (final UnsupportedEncodingException e) {
// System.out.println(e.getStackTrace());
// }
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setRedirecting(false);
// Get some
for (final String arg : args) {
url = arg;
if (!url.toUpperCase().startsWith("HTTP://")) {
url = "http://" + url;
}
try {
System.out.println(UTF8.String(client.GETbytes(url, null, null, true)));
} catch (final IOException e) {
e.printStackTrace();
}
}
// Head some
// try {
// client.HEADResponse(url);
// } catch (final IOException e) {
// e.printStackTrace();
// }
for (final Header header: client.getHttpResponse().getAllHeaders()) {
System.out.println("Header " + header.getName() + " : " + header.getValue());
// for (HeaderElement element: header.getElements())
// System.out.println("Element " + element.getName() + " : " + element.getValue());
}
// System.out.println(client.getHttpResponse().getLocale());
System.out.println(client.getHttpResponse().getProtocolVersion());
System.out.println(client.getHttpResponse().getStatusLine());
// Post some
// try {
// System.out.println(UTF8.String(client.POSTbytes(url, newparts)));
// } catch (final IOException e1) {
// e1.printStackTrace();
// }
// Close out connection manager
try {
HTTPClient.closeConnectionManager();
} catch (final InterruptedException e) {
e.printStackTrace();
}
}
public static class IdleConnectionMonitorThread extends Thread {
private final HttpClientConnectionManager connMgr;
private volatile boolean shutdown;
public IdleConnectionMonitorThread(HttpClientConnectionManager connMgr) {
super();
this.setName("HTTPClient.IdleConnectionMonitorThread");
this.connMgr = connMgr;
}
@Override
public void run() {
try {
while (!shutdown) {
synchronized (this) {
wait(5000);
// Close expired connections
connMgr.closeExpiredConnections();
// Optionally, close connections
// that have been idle longer than 30 sec
connMgr.closeIdleConnections(30, TimeUnit.SECONDS);
}
}
connMgr.shutdown();
} catch (final InterruptedException ex) {
// terminate
}
}
public void shutdown() {
shutdown = true;
synchronized (this) {
notifyAll();
}
}
}
}