/*
GNU General Public License
CacheWolf is a software for PocketPC, Win and Linux that
enables paperless caching.
It supports the sites geocaching.com and opencaching.de
Copyright (C) 2006 CacheWolf development team
See http://www.cachewolf.de/ for more information.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package CacheWolf.utils;
import com.jcraft.jzlib.GZIPInputStream;
import ewe.data.Property;
import ewe.data.PropertyList;
import ewe.io.AsciiCodec;
import ewe.io.ByteArrayInputStream;
import ewe.io.File;
import ewe.io.FileOutputStream;
import ewe.io.IOException;
import ewe.io.JavaUtf8Codec;
import ewe.io.TextReader;
import ewe.sys.Time;
import ewe.util.ByteArray;
import ewe.util.CharArray;
import ewe.util.mString;
public class UrlFetcher {
static HttpConnection conn;
static int maxRedirections = 5;
static PropertyList requestorProperties = null;
static PropertyList permanentRequestorProperties = null;
static PropertyList cookies = null;
static String postData = null;
static boolean forceRedirect = false;
public static long usedTime = 0;
static Time webZeitStart;
public static PropertyList getDocumentProperties() {
if (conn != null)
return conn.documentProperties;
else
return null;
}
public static void setMaxRedirections(int value) {
maxRedirections = value;
};
public static void setForceRedirect(boolean value) {
forceRedirect = value;
};
public static void setRequestorProperties(PropertyList value) {
requestorProperties = value;
};
public static void setRequestorProperty(String name, String value) {
if (requestorProperties == null)
requestorProperties = new PropertyList();
requestorProperties.set(name, value);
}
private static void initPermanentRequestorProperty() {
permanentRequestorProperties = new PropertyList();
//permanentRequestorProperties.set("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0");
permanentRequestorProperties.set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
//permanentRequestorProperties.set("Connection", "close");
permanentRequestorProperties.set("Connection", "keep-alive");
}
public static void setPermanentRequestorProperty(String name, String value) {
if (permanentRequestorProperties == null)
initPermanentRequestorProperty();
if (value != null)
permanentRequestorProperties.set(name, value);
else {
int index = permanentRequestorProperties.find(name);
if (index >= 0)
permanentRequestorProperties.del(index);
}
}
public static void clearCookies() {
if (cookies == null) {
cookies = new PropertyList();
} else
cookies.clear();
}
public static void setCookie(String name, String value) {
if (cookies == null) {
cookies = new PropertyList();
}
if (name != null)
if (value != null) {
cookies.set(name, value);
}
}
public static void delCookie(String name) {
if (cookies == null) {
cookies = new PropertyList();
}
if (name != null) {
int index = cookies.find(name);
if (index >= 0)
cookies.del(index);
}
}
public static String getCookie(String name) {
Property p = cookies.get(name);
if (p != null) {
return (String) p.value;
} else
return null;
}
public static String getCookieValue(String SetValue) {
String[] theCookie = mString.split((String) SetValue, ';');
if (theCookie.length > 1) {
String[] rp = mString.split(theCookie[0], '=');
if (rp.length == 2) {
return rp[1];
}
}
return null;
}
private static void addCookies2RequestorProperties() {
String value = "";
if (cookies == null) {
cookies = new PropertyList();
}
for (int i = 0; i < cookies.size(); i++) {
final Property cookie = (Property) cookies.get(i);
// so war es
String cd[] = mString.split(cookie.name, ';');
// ist das cookie f�r diesen host?
if (cd[1].equalsIgnoreCase(conn.getHost())) {
value = value + cd[0] + "=" + getCookieValue((String) cookie.value) + "; ";
}
}
if (value.length() > 0) {
conn.setRequestorProperty("Cookie", value);
//Preferences.itself().log("Cookies sent for " + conn.getHost() + " : " + value);
}
}
private static void addPermanent2RequestorProperties() {
if (permanentRequestorProperties == null)
initPermanentRequestorProperty();
conn.setRequestorProperty(permanentRequestorProperties);
}
private static void add2RequestorProperties() {
if (requestorProperties != null)
conn.setRequestorProperty(requestorProperties);
}
public static void setpostData(String value) {
postData = value;
};
public static String fetch(String address) throws IOException {
// Preferences.itself().log("fetch: " + address);
return fetch(address, true);
}
public static String fetch(String address, boolean useGZip) throws IOException {
if (useGZip) {
setRequestorProperty("Accept-Encoding", "gzip");
}
ByteArray daten = fetchByteArray(address);
boolean gzip = false;
if (conn != null) {
if (conn.documentProperties != null) {
Property p = conn.documentProperties.get("Content-Encoding");
if (p != null) {
if (p.value.toString().equalsIgnoreCase("gzip")) {
gzip = true;
}
}
}
if (gzip) {
ByteArrayInputStream bis = new ByteArrayInputStream(daten.data);
GZIPInputStream zis = new GZIPInputStream(bis);
TextReader br = new TextReader(zis);
br.codec = new BetterUTF8Codec();
String line;
StringBuffer sb = new StringBuffer();
try {
while ((line = br.readLine()) != null) {
sb.append(line.trim() + "\n");
}
} catch (Exception e) {
} finally {
if (br != null) {
try {
br.close();
} catch (Exception e) {
}
}
}
return sb.toString();
} else {
return new BetterUTF8Codec().decodeUTF8(daten.data, 0, daten.length).toString();
}
}
throw new IOException("got no data from web");
}
public static void fetchDataFile(String address, String target) throws IOException {
FileOutputStream outp = null;
try {
byte[] buffer = fetchByteArray(address).toBytes();
File f = new File(target);
outp = new FileOutputStream(f);
outp.write(buffer);
} finally {
if (outp != null)
outp.close();
}
}
/**
* @param url
* @return ByteArray
* @throws IOException
*/
public static ByteArray fetchByteArray(String url) throws IOException {
webZeitStart = new Time();
conn = new HttpConnection(url);
String urltmp = url;
conn.documentIsEncoded = isUrlEncoded(urltmp);
addPermanent2RequestorProperties();
addCookies2RequestorProperties();
add2RequestorProperties();
if (postData != null) {
conn.setPostData(postData);
conn.setRequestorProperty("Content-Type", "application/x-www-form-urlencoded");
}
int redirectionCounter = 0;
do {
redirectionCounter++;
conn.connect();
if (conn.responseCode < 300 || conn.responseCode > 399) {
if (conn.responseCode > 399) {
// abort with error
maxRedirections = 5;
requestorProperties = null;
postData = null;
forceRedirect = false;
throw new IOException("URL: " + urltmp + "\nhttp response code: " + conn.responseCode);
} else {
if (forceRedirect) {
// hack for expedia, doing the original url again. (forceRedirect == true)
// expedia always must redirect >=1 time, but sometimes that is missed
// see also: http://www.geoclub.de/viewtopic.php?p=305071#305071
urltmp = url;
redirectionCounter = redirectionCounter - 1;
forceRedirect = false;
} else {
// now can get data
urltmp = null;
}
}
} else {
// redirection
urltmp = conn.documentProperties.getString("location", null);
// Preferences.itself().log("Url Redirected to " + urltmp);
rememberCookies();
addCookies2RequestorProperties();
conn.disconnect();
conn = conn.getRedirectedConnection(urltmp);
if (redirectionCounter > maxRedirections)
throw new IOException("too many http redirections while trying to fetch: " + url + " only " + maxRedirections + " are allowed");
}
} while (urltmp != null);
ByteArray daten;
if (conn.isOpen()) {
daten = conn.readData();
conn.disconnect();
} else
daten = null;
maxRedirections = 5;
requestorProperties = null;
postData = null;
forceRedirect = false;
usedTime = usedTime + ((new Time()).getTime() - webZeitStart.getTime()) / 1000; // sec
return daten;
}
public static void rememberCookies() {
final PropertyList pl = getDocumentProperties();
// collect Set-Cookie
for (int j = 0; j < pl.size(); j++) {
final Property p = (Property) pl.get(j);
if (p.name.equalsIgnoreCase("Set-Cookie")) {
String completeCookieString = (String) p.value;
String[] theCookie = mString.split(completeCookieString, ';');
if (theCookie.length > 1) {
String[] rp = mString.split(theCookie[0], '=');
if (rp.length == 2) {
setCookie(rp[0] + ";" + conn.getHost(), completeCookieString); // alles (wegen Ablaufdatum speichern)
// Preferences.itself().log(rp[0] + ";" + conn.getHost() + ":" + completeCookieString);
}
}
}
}
}
/**
* @param url
* @return true, if the string seems to be already URL encoded (that is, it contains only url-allowd chars), false otherwise
*/
private static boolean isUrlEncoded(String url) {
final String allowed = "-_.~!*'();:@&=+$,/?%#[]";
char[] src = ewe.sys.Vm.getStringChars(url);
char c;
for (int i = 0; i < src.length; i++) {
c = src[i];
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (allowed.indexOf(c) >= 0))
continue;
else
return false;
}
return true;
}
/**
* This method encodes an URL containing special characters using the UTF-8 codec in %nn%nn notation<br>
* Note that the encoding for URLs is not generally defined. Usually cp1252 or UTF-8 is used. It depends on what the server expects, what encoding you must use.
*
* @param cc
* @return
* @throws IOException
*/
public final static String toUtf8Url(String cc) throws IOException {
JavaUtf8Codec coder = new JavaUtf8Codec();
ByteArray utf8 = new ByteArray();
coder.encodeText(cc.toCharArray(), 0, cc.length(), true, utf8);
AsciiCodec asciicod = new AsciiCodec();
CharArray utf8bytes = new CharArray();
asciicod.decodeText(utf8.data, 0, utf8.length, true, utf8bytes);
return encodeURL(utf8bytes.toString(), true);
}
final static String hex = ewe.util.TextEncoder.hex;
/**
* Encode the URL using %## notation. Note: this fixes a bug in ewe.net.URL.encodeURL(): that routine assumes all chars to be < 127. This method is mainly copied from there
* It also encodes the /. This is necessary for the __VIEWSTATEs of GC
*
* @param url
* The unencoded URL.
* @param spaceToPlus
* true if you wish a space to be encoded as a '+', false to encode it as %20
* @return The encoded URL.
*/
// ===================================================================
public static String encodeURL(String url, boolean spaceToPlus)
// ===================================================================
{
char[] what = ewe.sys.Vm.getStringChars(url);
int max = what.length;
char[] dest = new char[max + max / 2];
char d = 0;
for (int i = 0; i < max; i++) {
if (d >= dest.length - 2) {
char[] n = new char[dest.length + dest.length / 2 + 3];
ewe.sys.Vm.copyArray(dest, 0, n, 0, d);
dest = n;
}
char c = what[i];
// added || c == '$' || c == '/' || c == ','
if (spaceToPlus && c == ' ')
c = '+';
else if (c <= ' ' || c >= 127 || c == '+' || c == '&' || c == '%' || c == '=' || c == '|' || c == '{' || c == '}' || c == '$' || c == '/' || c == ',') {
dest[d++] = '%';
dest[d++] = hex.charAt((c >> 4) & 0xf);
dest[d++] = hex.charAt(c & 0xf);
continue;
}
dest[d++] = c;
}
return new String(dest, 0, d);
}
}