// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package org.chromium.chrome.browser.util;
import android.text.TextUtils;
import org.chromium.base.CollectionUtil;
import org.chromium.base.Log;
import org.chromium.base.VisibleForTesting;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Utilities for working with URIs (and URLs). These methods may be used in security-sensitive
* contexts (after all, origins are the security boundary on the web), and so the correctness bar
* must be high.
*/
public class UrlUtilities {
private static final String TAG = "UrlUtilities";
/**
* URI schemes that are internal to Chrome.
*/
private static final HashSet<String> INTERNAL_SCHEMES = CollectionUtil.newHashSet(
"chrome", "chrome-native", "about");
// Patterns used in validateIntentUrl.
private static final Pattern DNS_HOSTNAME_PATTERN =
Pattern.compile("^[\\w\\.-]*$");
private static final Pattern JAVA_PACKAGE_NAME_PATTERN =
Pattern.compile("^[\\w\\.-]*$");
private static final Pattern ANDROID_COMPONENT_NAME_PATTERN =
Pattern.compile("^[\\w\\./-]*$");
private static final Pattern URL_SCHEME_PATTERN =
Pattern.compile("^[a-zA-Z]+$");
/**
* @param uri A URI.
*
* @return True if the URI's scheme is one that ContentView can handle.
*/
public static boolean isAcceptedScheme(String uri) {
return nativeIsAcceptedScheme(uri);
}
/**
* @param uri A URI.
*
* @return True if the URI is valid for Intent fallback navigation.
*/
public static boolean isValidForIntentFallbackNavigation(String uri) {
return nativeIsValidForIntentFallbackNavigation(uri);
}
/**
* @param uri A URI.
*
* @return True if the URI's scheme is one that Chrome can download.
*/
public static boolean isDownloadableScheme(String uri) {
return nativeIsDownloadable(uri);
}
/**
* @param uri A URI.
*
* @return Whether the URI's scheme is for a internal chrome page.
*/
public static boolean isInternalScheme(URI uri) {
return INTERNAL_SCHEMES.contains(uri.getScheme());
}
/**
* Determines whether or not the given URLs belong to the same broad domain or host.
* "Broad domain" is defined as the TLD + 1 or the host.
*
* For example, the TLD + 1 for http://news.google.com would be "google.com" and would be shared
* with other Google properties like http://finance.google.com.
*
* If {@code includePrivateRegistries} is marked as true, then private domain registries (like
* appspot.com) are considered "effective TLDs" -- all subdomains of appspot.com would be
* considered distinct (effective TLD = ".appspot.com" + 1).
* This means that http://chromiumreview.appspot.com and http://example.appspot.com would not
* belong to the same host.
* If {@code includePrivateRegistries} is false, all subdomains of appspot.com
* would be considered to be the same domain (TLD = ".com" + 1).
*
* @param primaryUrl First URL
* @param secondaryUrl Second URL
* @param includePrivateRegistries Whether or not to consider private registries.
* @return True iff the two URIs belong to the same domain or host.
*/
public static boolean sameDomainOrHost(String primaryUrl, String secondaryUrl,
boolean includePrivateRegistries) {
return nativeSameDomainOrHost(primaryUrl, secondaryUrl, includePrivateRegistries);
}
/**
* Determines whether or not the given URLs have the same host.
* Unlike the above sameDomainOrHost(...) method, this does a simpler host matching, so
* http://news.google.com and http://finance.google.com do not have the same host.
*/
public static boolean sameHost(String primaryUrl, String secondaryUrl) {
return nativeSameHost(primaryUrl, secondaryUrl);
}
/**
* This function works by calling net::registry_controlled_domains::GetDomainAndRegistry
*
* @param uri A URI
* @param includePrivateRegistries Whether or not to consider private registries.
*
* @return The registered, organization-identifying host and all its registry information, but
* no subdomains, from the given URI. Returns an empty string if the URI is invalid, has no host
* (e.g. a file: URI), has multiple trailing dots, is an IP address, has only one subcomponent
* (i.e. no dots other than leading/trailing ones), or is itself a recognized registry
* identifier.
*/
public static String getDomainAndRegistry(String uri, boolean includePrivateRegistries) {
if (TextUtils.isEmpty(uri)) return uri;
return nativeGetDomainAndRegistry(uri, includePrivateRegistries);
}
/** @return whether two URLs match, ignoring the #fragment. */
@VisibleForTesting
public static boolean urlsMatchIgnoringFragments(String url, String url2) {
if (TextUtils.equals(url, url2)) return true;
return nativeUrlsMatchIgnoringFragments(url, url2);
}
/** @return whether the #fragmant differs in two URLs. */
@VisibleForTesting
public static boolean urlsFragmentsDiffer(String url, String url2) {
if (TextUtils.equals(url, url2)) return false;
return nativeUrlsFragmentsDiffer(url, url2);
}
/**
* @param url An Android intent:// URL to validate.
*
* @throws URISyntaxException if url is not a valid Android intent://
* URL, as specified at
* https://developer.chrome.com/multidevice/android/intents#syntax.
*/
@VisibleForTesting
public static boolean validateIntentUrl(String url) {
if (url == null) {
Log.d(TAG, "url was null");
return false;
}
URI parsed;
try {
parsed = new URI(url);
} catch (URISyntaxException e) {
// It may be that we received a URI of the form "intent:#Intent...",
// which e.g. Google Authenticator produces. Work around that
// specific case.
if (url.indexOf("intent:#Intent;") == 0) {
return validateIntentUrl(url.replace("intent:#Intent;", "intent://foo/#Intent;"));
}
Log.d(TAG, "Could not parse url '%s': %s", url, e.toString());
return false;
}
String scheme = parsed.getScheme();
if (scheme == null || !scheme.equals("intent")) {
Log.d(TAG, "scheme was not 'intent'");
return false;
}
String hostname = parsed.getHost();
if (hostname == null) {
Log.d(TAG, "hostname was null for '%s'", url);
return false;
}
Matcher m = DNS_HOSTNAME_PATTERN.matcher(hostname);
if (!m.matches()) {
Log.d(TAG, "hostname did not match DNS_HOSTNAME_PATTERN");
return false;
}
String path = parsed.getPath();
if (path == null || (!path.isEmpty() && !path.equals("/"))) {
Log.d(TAG, "path was null or not \"/\"");
return false;
}
// We need to get the raw, unparsed, un-URL-decoded fragment.
// parsed.getFragment() returns a URL-decoded fragment, which can
// interfere with lexing and parsing Intent extras correctly. Therefore,
// we handle the fragment "manually", but first assert that it
// URL-decodes correctly.
int fragmentStart = url.indexOf('#');
if (fragmentStart == -1 || fragmentStart == url.length() - 1) {
Log.d(TAG, "Could not find '#'");
return false;
}
String fragment = url.substring(url.indexOf('#') + 1);
try {
String f = parsed.getFragment();
if (f == null) {
Log.d(TAG, "Could not get fragment from parsed URL");
return false;
}
if (!URLDecoder.decode(fragment, "UTF-8").equals(f)) {
Log.d(TAG, "Parsed fragment does not equal lexed fragment");
return false;
}
} catch (UnsupportedEncodingException e) {
Log.d(TAG, e.toString());
return false;
}
// Now lex and parse the correctly-encoded fragment.
String[] parts = fragment.split(";");
if (parts.length < 3
|| !parts[0].equals("Intent")
|| !parts[parts.length - 1].equals("end")) {
Log.d(TAG, "Invalid fragment (not enough parts, lacking Intent, or lacking end)");
return false;
}
boolean seenPackage = false;
boolean seenAction = false;
boolean seenCategory = false;
boolean seenComponent = false;
boolean seenScheme = false;
for (int i = 1; i < parts.length - 1; ++i) {
// This is OK *only* because no valid package, action, category,
// component, or scheme contains (unencoded) "=".
String[] pair = parts[i].split("=");
if (2 != pair.length) {
Log.d(TAG, "Invalid key=value pair '%s'", parts[i]);
return false;
}
m = JAVA_PACKAGE_NAME_PATTERN.matcher(pair[1]);
if (pair[0].equals("package")) {
if (seenPackage || !m.matches()) {
Log.d(TAG, "Invalid package '%s'", pair[1]);
return false;
}
seenPackage = true;
} else if (pair[0].equals("action")) {
if (seenAction || !m.matches()) {
Log.d(TAG, "Invalid action '%s'", pair[1]);
return false;
}
seenAction = true;
} else if (pair[0].equals("category")) {
if (seenCategory || !m.matches()) {
Log.d(TAG, "Invalid category '%s'", pair[1]);
return false;
}
seenCategory = true;
} else if (pair[0].equals("component")) {
Matcher componentMatcher = ANDROID_COMPONENT_NAME_PATTERN.matcher(pair[1]);
if (seenComponent || !componentMatcher.matches()) {
Log.d(TAG, "Invalid component '%s'", pair[1]);
return false;
}
seenComponent = true;
} else if (pair[0].equals("scheme")) {
if (seenScheme) return false;
Matcher schemeMatcher = URL_SCHEME_PATTERN.matcher(pair[1]);
if (!schemeMatcher.matches()) {
Log.d(TAG, "Invalid scheme '%s'", pair[1]);
return false;
}
seenScheme = true;
} else {
// Assume we are seeing an Intent Extra. Up above, we ensured
// that the #Intent... fragment was correctly URL-encoded;
// beyond that, there is no further validation we can do. Extras
// are blobs to us.
continue;
}
}
return true;
}
private static native boolean nativeIsDownloadable(String url);
private static native boolean nativeIsValidForIntentFallbackNavigation(String url);
private static native boolean nativeIsAcceptedScheme(String url);
private static native boolean nativeSameDomainOrHost(String primaryUrl, String secondaryUrl,
boolean includePrivateRegistries);
private static native boolean nativeSameHost(String primaryUrl, String secondaryUrl);
private static native String nativeGetDomainAndRegistry(String url,
boolean includePrivateRegistries);
public static native boolean nativeIsGoogleSearchUrl(String url);
public static native boolean nativeIsGoogleHomePageUrl(String url);
private static native boolean nativeUrlsMatchIgnoringFragments(String url, String url2);
private static native boolean nativeUrlsFragmentsDiffer(String url, String url2);
}