/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.util;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.io.Closer;
/**
* Allows conversion of URLs identifying a Hadoop cluster (e.g. resource manager url or
* a job tracker URL) to a human-readable name.
*
* <p>The class will automatically load a resource named {@link #URL_TO_NAME_MAP_RESOURCE_NAME} to
* get a default mapping. It expects this resource to be in the Java Properties file format. The
* name of the property is the cluster URL and the value is the human-readable name.
*
* <p><b>IMPORTANT:</b> Don't forget to escape colons ":" in the file as those may be interpreted
* as name/value separators.
*/
public class ClustersNames {
public static final String URL_TO_NAME_MAP_RESOURCE_NAME = "GobblinClustersNames.properties";
private static final Logger LOG = LoggerFactory.getLogger(ClustersNames.class);
private static final Configuration HADOOP_CONFIGURATION = new Configuration();
private static ClustersNames THE_INSTANCE;
private Properties urlToNameMap = new Properties();
protected ClustersNames() {
try (Closer closer = Closer.create()) {
InputStream propsInput = closer.register(getClass().getResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
if (null == propsInput) {
propsInput = closer.register(ClassLoader.getSystemResourceAsStream(URL_TO_NAME_MAP_RESOURCE_NAME));
}
if (null != propsInput) {
try {
this.urlToNameMap.load(propsInput);
LOG.info("Loaded cluster names map:" + this.urlToNameMap);
} catch (IOException e) {
LOG.warn("Unable to load cluster names map: " + e, e);
}
} else {
LOG.info("no default cluster mapping found");
}
} catch (IOException e) {
LOG.warn("unable to close resource input stream for " + URL_TO_NAME_MAP_RESOURCE_NAME + ":" + e, e);
}
}
public String getClusterName(String clusterUrl) {
if (null == clusterUrl)
return null;
String res = this.urlToNameMap.getProperty(clusterUrl);
return null != res ? res : normalizeClusterUrl(clusterUrl);
}
public void addClusterMapping(String clusterUrl, String clusterName) {
Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
Preconditions.checkNotNull(clusterName, "cluster name expected");
this.urlToNameMap.put(clusterUrl, clusterName);
}
public void addClusterMapping(URL clusterUrl, String clusterName) {
Preconditions.checkNotNull(clusterUrl, "cluster URL expected");
Preconditions.checkNotNull(clusterName, "cluster name expected");
this.urlToNameMap.put(clusterUrl.toString(), clusterName);
}
// Strip out the port number if it is a valid URI
private static String normalizeClusterUrl(String clusterIdentifier) {
try {
URI uri = new URI(clusterIdentifier.trim());
// URIs without protocol prefix
if (!uri.isOpaque() && null != uri.getHost()) {
clusterIdentifier = uri.getHost();
} else {
clusterIdentifier = uri.toString().replaceAll("[/:]"," ").trim().replaceAll(" ", "_");
}
} catch (URISyntaxException e) {
//leave ID as is
}
return clusterIdentifier;
}
/**
*
* Returns the cluster name on which the application is running. Uses default hadoop {@link Configuration} to get the
* url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
* {@link #getClusterName(String)}
*
* @see #getClusterName(Configuration)
*
*/
public String getClusterName() {
return getClusterName(HADOOP_CONFIGURATION);
}
/**
* Returns the cluster name on which the application is running. Uses Hadoop configuration passed in to get the
* url of the resourceManager or jobtracker. The URL is then translated into a human readable cluster name using
* {@link #getClusterName(String)}
*
* <p>
* <b>MapReduce mode</b> Uses the value for "yarn.resourcemanager.address" from {@link Configuration} excluding the
* port number.
* </p>
*
* <p>
* <b>Standalone mode (outside of hadoop)</b> Uses the Hostname of {@link InetAddress#getLocalHost()}
* </p>
*
* <p>
* Use {@link #getClusterName(String)} if you already have the cluster URL
* </p>
*
* @see #getClusterName()
* @param conf Hadoop configuration to use to get resourceManager or jobTracker URLs
*/
public String getClusterName(Configuration conf) {
// ResourceManager address in Hadoop2
String clusterIdentifier = conf.get("yarn.resourcemanager.address");
clusterIdentifier = getClusterName(clusterIdentifier);
// If job is running outside of Hadoop (Standalone) use hostname
// If clusterIdentifier is localhost or 0.0.0.0 use hostname
if (clusterIdentifier == null || StringUtils.startsWithIgnoreCase(clusterIdentifier, "localhost")
|| StringUtils.startsWithIgnoreCase(clusterIdentifier, "0.0.0.0")) {
try {
clusterIdentifier = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
// Do nothing. Tag will not be generated
}
}
return clusterIdentifier;
}
public static ClustersNames getInstance() {
synchronized (ClustersNames.class) {
if (null == THE_INSTANCE) {
THE_INSTANCE = new ClustersNames();
}
return THE_INSTANCE;
}
}
}