package esmska.transfer;
import esmska.data.Tuple;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
/** Class for connecting to HTTP resources and sending GET and POST requests.
* For each SMS there should be a separate instance.
* @author ripper
*/
public class GatewayConnector {
private static final Logger logger = Logger.getLogger(GatewayConnector.class.getName());
private static final String USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; cs-CZ; rv:1.9.1.9) " +
"Gecko/20100402 Ubuntu/9.10 (karmic) Firefox/3.5.9";
private static final Pattern metaRedirPattern = Pattern.compile(
"<meta\\s+http-equiv=[^>]*refresh[^>]*url=([^>]*)(\"|')[^>]*>",
Pattern.CASE_INSENSITIVE);
private static final Pattern metaCharsetPattern = Pattern.compile(
"<meta\\s+http-equiv=[^>]*content-type[^>]*charset=([^>]*)(\"|')[^>]*>",
Pattern.CASE_INSENSITIVE);
private static final Pattern xmlCharsetPattern = Pattern.compile(
"<\\?xml[^>]*encoding=(\"|')([^>]*)(\"|')[^>]*\\?>",
Pattern.CASE_INSENSITIVE);
private final HttpClient client = new HttpClient();
private String[] postData;
private boolean doPost;
private String textContent;
private byte[] binaryContent;
private String referer;
private String fullURL;
// remembers last redirect and number of requests to that URL
private Tuple<String, Integer> lastRedirect = new Tuple<String, Integer>(null,0);
/** Constructor for GatewayConnector. */
public GatewayConnector() {
//set cookie compatibility mode
client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
client.getParams().setParameter(HttpMethodParams.SINGLE_COOKIE_HEADER, true);
//set user-agent - just to be sure that the server won't screw us
client.getParams().setParameter(HttpMethodParams.USER_AGENT, USER_AGENT);
//set wise redirect policy
//allow circular redirects because some sites use it (with cookies)
client.getParams().setParameter(HttpClientParams.ALLOW_CIRCULAR_REDIRECTS, true);
client.getParams().setParameter(HttpClientParams.REJECT_RELATIVE_REDIRECT, false);
client.getParams().setParameter(HttpClientParams.MAX_REDIRECTS, 50);
//set UTF-8 as default charset
client.getParams().setParameter(HttpClientParams.HTTP_CONTENT_CHARSET, "UTF-8");
client.getParams().setParameter(HttpClientParams.HTTP_ELEMENT_CHARSET, "UTF-8");
client.getParams().setParameter(HttpClientParams.HTTP_URI_CHARSET, "UTF-8");
}
// <editor-fold defaultstate="collapsed" desc="Get Methods">
/** True if received response is textual, false if binary */
public boolean isTextContent() {
return textContent != null;
}
/** Get text response */
public String getTextContent() {
return textContent;
}
/** Get binary response */
public byte[] getBinaryContent() {
return binaryContent;
}
// </editor-fold>
// <editor-fold defaultstate="collapsed" desc="Set Methods">
/** Set referer. Default is empty string. Use null to clear referer. */
public void setReferer(String referer) {
this.referer = referer;
}
/** Sets preferred language to retrieve web content.
* @param languageCode two-letter language code as defined in ISO 639-1
*/
public void setLanguage(String languageCode) {
//set Accept-Language headers
@SuppressWarnings("unchecked")
HashSet<Header> headerSet = (HashSet<Header>) client.getHostConfiguration().
getParams().getParameter("http.default-headers");
if (headerSet == null) {
headerSet = new HashSet<Header>();
}
Header languageHeader = new Header("Accept-Language", languageCode);
headerSet.add(languageHeader);
client.getHostConfiguration().getParams().setParameter("http.default-headers", headerSet);
logger.log(Level.FINER, "Preferred language set: {0}", languageCode);
}
/** Sets binary content, clears text content. */
private void setBinaryContent(byte[] binaryContent) {
this.binaryContent = binaryContent;
this.textContent = null;
}
/** Sets text content, clears binary content. */
private void setTextContent(String textContent) {
this.textContent = textContent;
this.binaryContent = null;
}
// </editor-fold>
/** Remove specified cookie from the cookie cache. Removes all cookies that
* satisfies the requirements (the parameters). The parameters are handled
* like having logical AND between them. Any of the parameter may be null to
* mean "any value". Therefore all null parameters mean remove all cookies.
*
* @param name name of the cookie; may be null
* @param domain domain of the cookie; may be null
* @param path path of the cookie; may be null
*/
public void forgetCookie(String name, String domain, String path) {
Cookie[] cookies = client.getState().getCookies();
for (int i = 0; i < cookies.length; i++) {
Cookie cookie = cookies[i];
if ((name == null || name.equals(cookie.getName())) &&
(domain == null || domain.equals(cookie.getDomain())) &&
(path == null || path.equals(cookie.getPath()))) {
cookies[i] = null;
}
}
client.getState().clearCookies();
client.getState().addCookies(cookies);
}
/** Prepare connector for a new connection.
* @param url URL where to connect. If you specify <tt>params</tt>, this must not
* contain '?'.
* @param params Additional parameters to the URL (aka query string).
* The array is in the form [key1,value1,key2,value2,...]. Use null or
* empty array for no parameters.
* @param doPost true if this should be POST request; false if this should
* bet GET request
* @param postData Data to be sent in the POST request. The array is in the
* form [key1,value1,key2,value2,...]. Use null or empty array for no data.
* @throws IllegalArgumentException When <tt>url</tt> is null.
* @throws IOException When the <tt>url</tt> and <tt>params</tt> together does not
* create a correct URL.
*/
public void setConnection(String url, String[] params, boolean doPost, String[] postData)
throws IOException {
if (url == null) {
throw new IllegalArgumentException("url");
}
this.doPost = doPost;
this.postData = postData;
//create final url
fullURL = url;
String param = convertParamsToString(params);
if (param.length() > 0) {
fullURL += "?" + param;
}
//set host - useful for redirects
URL address = new URL(fullURL);
client.getHostConfiguration().setHost(address.getHost(), address.getPort(),
address.getProtocol());
//set proxy
if ("http".equals(address.getProtocol())) {
client.getHostConfiguration().setProxyHost(
ProxyManager.getProxyHost(ProxyManager.ProxyType.HTTP));
} else if ("https".equals(address.getProtocol())) {
client.getHostConfiguration().setProxyHost(
ProxyManager.getProxyHost(ProxyManager.ProxyType.HTTPS));
} else {
client.getHostConfiguration().setProxyHost(
ProxyManager.getProxyHost(ProxyManager.ProxyType.SOCKS));
}
}
/** Perform a connection (GET or POST, depending on configuration).
* @throws IOException when there is a problem with connection
*/
public boolean connect() throws IOException {
//delete previous response to allow repeated usage
textContent = null;
binaryContent = null;
//connect
if (doPost) {
return doPost(fullURL, postData);
} else {
return doGet(fullURL);
}
}
/** Find charset definition inside html/xml file
* @param content content retrieved from web page
* @return charset name or null if none found
*/
private String findContentCharset(byte[] content) {
Validate.notNull(content);
try {
String text = new String(content, "UTF-8");
Matcher matcher = metaCharsetPattern.matcher(text);
if (matcher.find()) {
String charset = matcher.group(1);
return charset;
}
matcher = xmlCharsetPattern.matcher(text);
if (matcher.find()) {
String charset = matcher.group(2);
return charset;
}
} catch (Exception ex) {
logger.log(Level.FINER, "Could not find encoding of reponse: ", ex);
}
return null;
}
/** Perform GET request.
* @param url URL where to connect
* @return true if connection succeeded; false otherwise
* @throws java.io.IOException When there is some problem with connection
*/
private boolean doGet(String url) throws IOException {
logger.log(Level.FINE, "Getting url: {0}", url);
GetMethod method = new GetMethod(url);
//set referer
if (referer != null) {
method.setRequestHeader("Referer", referer);
}
int statusCode = client.executeMethod(method);
//only HTTP 200 OK status code is correct
if (statusCode != HttpStatus.SC_OK) {
logger.log(Level.WARNING, "Problem connecting to \"{0}\". Response: {1}",
new Object[]{url, method.getStatusLine()});
return false;
}
//decide whether text or binary response
Header contentType = method.getResponseHeader("Content-Type");
boolean text = (contentType != null && contentType.getValue().startsWith("text"));
//read the response
byte[] response = new byte[0];
InputStream responseStream = method.getResponseBodyAsStream();
if (responseStream != null) {
response = IOUtils.toByteArray(responseStream);
responseStream.close();
}
//don't forget to release connection
method.releaseConnection();
//save response
if (text) { //text content
String charset = findContentCharset(response);
setTextContent(new String(response,
StringUtils.defaultIfEmpty(charset, method.getResponseCharSet())));
logger.log(Level.FINEST,"Retrieved text web content: {0}\n" +
"#### WEB CONTENT START ####\n{1}\n#### WEB CONTENT END ####",
new Object[]{contentType, getTextContent()});
} else { //binary content
setBinaryContent(response);
logger.log(Level.FINEST, "Retrieved binary web content: {0}", contentType);
}
//if text response, check for meta redirects
if (text) {
String redirect = checkMetaRedirect(textContent);
if (redirect != null) {
//redirect to new url
logger.log(Level.FINE, "Following web redirect to: {0}", redirect);
String redir = computeRedirect(redirect, method.getURI());
return doGet(redir);
}
}
return true;
}
/** Perform POST request.
* @param url URL where to connect
* @param postData data which to send. In the form [key1, value1, key2, value2, ...].
* @return true if connection succeeded; false otherwise
* @throws java.io.IOException When there is some problem with connection
*/
private boolean doPost(String url, String[] postData) throws IOException {
logger.log(Level.FINE, "Posting data to url: {0}", url);
PostMethod method = new PostMethod(url);
//set referer
if (referer != null) {
method.setRequestHeader("Referer", referer);
}
//set post data
method.setRequestEntity(new StringRequestEntity(
convertParamsToString(postData),
"application/x-www-form-urlencoded",
"UTF-8"));
int statuscode = client.executeMethod(method);
//check for error (4xx or 5xx) HTTP status codes
if (statuscode >= 400) {
logger.log(Level.WARNING, "Problem connecting to \"{0}\". Response: {1}",
new Object[]{url, method.getStatusLine()});
return false;
}
//decide whether text or binary response
Header contentType = method.getResponseHeader("Content-Type");
boolean text = (contentType != null && contentType.getValue().startsWith("text"));
//read the response
byte[] response = new byte[0];
InputStream responseStream = method.getResponseBodyAsStream();
if (responseStream != null) {
response = IOUtils.toByteArray(responseStream);
responseStream.close();
}
//don't forget to release connection
method.releaseConnection();
//save response
if (text) { //text content
String charset = findContentCharset(response);
setTextContent(new String(response,
StringUtils.defaultIfEmpty(charset, method.getResponseCharSet())));
logger.log(Level.FINEST,"Retrieved text web content: {0}\n" +
"#### WEB CONTENT START ####\n{1}\n#### WEB CONTENT END ####",
new Object[]{contentType, getTextContent()});
} else { //binary content
setBinaryContent(response);
logger.log(Level.FINEST, "Retrieved binary web content: {0}", contentType);
}
//check for HTTP redirection
if (statuscode >= 300 && statuscode < 400) {
Header header = method.getResponseHeader("Location");
if (header == null) {
throw new IOException("Invalid HTTP redirect, no Location header");
}
String newURL = header.getValue();
if (StringUtils.isEmpty(newURL)) {
throw new IOException("Invalid HTTP redirect, Location header is empty");
}
if (!isAbsoluteURL(newURL) && !newURL.startsWith("/")) {
newURL = computeRedirect(newURL, method.getURI());
}
//sometimes websites send us binary mess instead of properly urlencoded address
//see http://code.google.com/p/esmska/issues/detail?id=436
//we need to convert it in that case
if (!StringUtils.isAsciiPrintable(newURL)) {
logger.log(Level.FINE, "Received invalidly encoded redirect URL: {0}", newURL);
String[] parts = StringUtils.split(newURL, "?", 2);
newURL = parts[0] + "?" + URLEncoder.encode(parts[1], "UTF-8");
logger.log(Level.FINE, "URL converted to: {0}", newURL);
}
//some characters are not allowed in URI spec, but websites still use them
//and browsers tolerate them, e.g. `|`
//see https://code.google.com/p/esmska/issues/detail?id=469
//we need to convert those
if (newURL.contains("|")) {
logger.log(Level.FINE, "URL contains invalid character `|`: {0}", newURL);
newURL = newURL.replace("|", "%7C");
logger.log(Level.FINE, "URL converted to: {0}", newURL);
}
//redirect to new url
logger.log(Level.FINE, "Following http redirect to: {0}", newURL);
return doGet(newURL);
}
//if text response, check for meta redirects
if (text) {
String redirect = checkMetaRedirect(textContent);
if (redirect != null) {
//redirect to new url
logger.log(Level.FINE, "Following web redirect to: {0}", redirect);
String redir = computeRedirect(redirect, method.getURI());
return doGet(redir);
}
}
return true;
}
/** Convert url parameters to string
* @param params input array in form [key1,value1,key2,value2,...]
* @return string key1=value1&key2=value2&... in the x-www-form-urlencoded format;
* or null when <tt>params</tt> are null
*/
private static String convertParamsToString(String[] params) throws UnsupportedEncodingException {
if (params == null) {
return null;
}
String string = "";
for (int i = 0; i < params.length; i++) {
//skip the even ones
if (i % 2 == 0) {
continue;
}
String value = params[i];
String key = params[i - 1];
//skip empty keys
if (StringUtils.isEmpty(key)) {
continue;
}
string += key + "=";
string += URLEncoder.encode(value, "UTF-8") + "&";
}
if (string.endsWith("&")) {
string = string.substring(0, string.length() - 1);
}
return string;
}
/** Check in the HTML page for meta redirects
* (<meta http-equiv="refresh"...> tags).
*
* @param page A HTML page as string.
* @return URL of the new address if meta redirect found; null otherwise.
*/
private static String checkMetaRedirect(String page) {
Matcher matcher = metaRedirPattern.matcher(page);
if (matcher.find()) {
String redirect = matcher.group(1);
return redirect;
}
return null;
}
/** Combine an old URL and a redirect to a new URL.
*
* @param redirectURL new URL. May be absolute or relative. It if starts with
* slash (/) it is applied to domain root.
* @param currentURI current URI. Absolute or relative.
* @return new URL to get
* @throws java.io.IOException Problem when computing redirect
*/
private String computeRedirect(String redirectURL, URI currentURI) throws IOException {
if (redirectURL == null) {
throw new IllegalArgumentException("redirectURL");
}
if (currentURI == null) {
throw new IllegalArgumentException("currentURI");
}
if (redirectURL.startsWith("./") || redirectURL.startsWith("../")) {
try {
redirectURL = convertRelativeRedirectToAbsolute(currentURI.toString(), redirectURL);
} catch (IOException ex) {
throw new IOException("Invalid HTTP redirect, Location header must " +
"be an absolute path and is: '" + redirectURL + "'", ex);
}
}
if (redirectURL.startsWith("/")) {
//relative redirect with slash
if (!isAbsoluteURL(currentURI.getEscapedURI())) {
//current uri is not absolute, nothing to do with it
//keep redirect url intact
} else {
//current uri is absolute, strip it to domain and append redirect
String uri = currentURI.getEscapedURI();
int slash = uri.indexOf('/', "https://".length());
if (slash > 0) {
uri = uri.substring(0, slash);
}
redirectURL = uri + redirectURL;
}
} else if (!isAbsoluteURL(redirectURL)) {
//relative redirect without slash
if (!isAbsoluteURL(currentURI.getEscapedURI())) {
//current uri is not absolute, strip it to last slash
//and append redirect
String uri = currentURI.getEscapedURI();
int slash = uri.lastIndexOf('/');
if (slash > 0) {
uri = uri.substring(0, slash + 1);
} else {
uri = "";
}
redirectURL = uri + redirectURL;
} else {
//current uri is absolute, strip it to last slash
//(but preserve domain) and append redirect
String uri = currentURI.getEscapedURI();
int slash = uri.lastIndexOf('/');
if (slash > "https://".length()) {
uri = uri.substring(0, slash);
}
redirectURL = uri + "/" + redirectURL;
}
} else {
//absolute redirect
//keep redirect url intact
}
//because some websites sends incorrectly escaped (or more specifically
//unescaped) URL characters, see http://code.google.com/p/esmska/issues/detail?id=269,
//let's try to build an URI from it, and if it fails, let's try to fix it
//and encode characters properly
try {
URI uri = new URI(redirectURL, true);
} catch (URIException ex) {
//the URL is bad, we have to fix it
logger.log(Level.FINER, "The computed redirect URL has invalid syntax ({0}): {1}",
new Object[]{ex.getMessage(), redirectURL});
try {
URI uri = new URI(redirectURL, false);
redirectURL = uri.getEscapedURI();
} catch (Exception e) {
throw new IOException("The computed redirect URL has invalid " +
"syntax and it can't be even fixed (" +
e.getMessage() + "): " + redirectURL);
}
}
if (ObjectUtils.equals(lastRedirect.get1(), redirectURL)) {
lastRedirect.set2(lastRedirect.get2() + 1);
} else {
lastRedirect.set1(redirectURL);
lastRedirect.set2(0);
}
//check for redirection loops
if (lastRedirect.get2() > 5) {
throw new IOException("HTTP meta redirection endless loop detected");
}
logger.log(Level.FINE, "Computed new redirect full URL is: {0}", redirectURL);
return redirectURL;
}
/** Convert relative redirect to absolute url
* @param oldUrl full original URL
* @param redirect relative redirect starting with './' or '../'
* @throws IOException when redirect can't be applied to original URL
*/
private String convertRelativeRedirectToAbsolute(String oldUrl, String redirect)
throws IOException {
try {
String protocol = oldUrl.substring(0, oldUrl.indexOf("//") + 2);
String stub = oldUrl.substring(protocol.length());
String redir = redirect;
//strip ?a=b part
if (stub.contains("?")) {
stub = stub.substring(0, stub.indexOf("?"));
}
//strip the last path segment
if (stub.contains("/")) {
stub = stub.substring(0, stub.lastIndexOf("/"));
}
//traverse
while (redir.startsWith("./") || redir.startsWith("../")) {
if (redir.startsWith("./")) {
redir = redir.substring(2);
continue;
}
if (redir.startsWith("../")) {
redir = redir.substring(3);
stub = stub.substring(0, stub.lastIndexOf("/"));
}
}
return protocol + stub + "/" + redir;
} catch (Exception ex) {
throw new IOException("The redirect '" + redirect + "' is not valid " +
"redirect to URL '" + oldUrl + "'", ex);
}
}
/** Return true if string starts with http:// or https:// */
private static boolean isAbsoluteURL(String url) {
return url.startsWith("http://") || url.startsWith("https://");
}
}