package com.zillabyte.motherbrain.utils;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.io.IOUtils;
public class UrlHelper {
// http://www.regexplanet.com/advanced/java/index.html
// (http(s)?://)?(\w+@)?[\w\.\-]*?(([\w\-]+\.[\w\-]+\.\w{2})|([\w\-]+\.\w{2,4}))($|/|\s)(.*)
public static String v = "(http(s)?://)?(\\w+@)?[\\w\\.\\-]*?(([\\w\\-]+\\.[\\w\\-]+\\.\\w{2})|([\\w\\-]+\\.\\w{2,4}))($|/|\\s)(.*)";
// http://www.regexplanet.com/advanced/java/index.html
// (http(s)?://)?((\w+@)?[\w\.\-]*?(([\w\-]+\.[\w\-]+\.\w{2})|([\w\-]+\.\w{2,4})))($|/|\s)(.*)
static String fullString = "(http(s)?://)?((\\w+@)?[\\w\\.\\-]*?(([\\w\\-]+\\.[\\w\\-]+\\.\\w{2})|([\\w\\-]+\\.\\w{2,4})))($|/|\\s)(.*)";
final static Pattern pattern;
final public static Pattern fullPattern;
static {
final Pattern _pattern = Pattern.compile(v);
assert(_pattern != null);
pattern = _pattern;
final Pattern _fullPattern = Pattern.compile(fullString);
assert(_fullPattern != null);
fullPattern = _fullPattern;
}
public static @Nullable String getHost(String url) {
Matcher m = pattern.matcher(url.trim());
if (m.matches()) {
return m.group(4);
}
return null;
}
public static @Nullable String getHostFull(String url) {
Matcher m = fullPattern.matcher(url.trim());
if (m.matches()) {
return m.group(3).replace("www.", "");
}
return null;
}
public static String fetchWebBody(String rawUrl) throws IOException {
// Logger.debug("Fetching: " + rawUrl);
URL url = new URL(rawUrl);
URLConnection con = url.openConnection();
con.setConnectTimeout(10 * 1000);
con.setReadTimeout(15 * 1000);
try{
if (con.getContentType() == null) {
throw new IOException("can't connect: " + rawUrl);
}
if (con.getContentType().contains("text/html")) {
// success
} else if (con.getContentType().contains("text/plain")) {
// success
} else if (con.getContentType().contains("application/json")) {
// success
} else {
throw new IOException("not text/html " + con.getContentType() + " : " + rawUrl);
}
}catch(IllegalArgumentException e){
throw new IOException("bad url: " + rawUrl);
}
final InputStream in = con.getInputStream();
try {
String encoding = con.getContentEncoding();
encoding = encoding == null ? "UTF-8" : encoding;
String body = IOUtils.toString(in, encoding);
assert (body != null);
return body;
} finally {
in.close();
}
}
}