/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.ports.metadata;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.rapidminer.RapidMiner;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.ParameterService;
import com.rapidminer.tools.Tools;
/**
* This class stores detailed meta data information about ExampleSets.
*
* @author Simon Fischer, Sebastian Land
*/
public class ExampleSetMetaData extends MetaData {
/**
*
*/
private static final long serialVersionUID = 1L;
private SetRelation attributesRelation = SetRelation.EQUAL;
//private final SetRelation specialAttributesRelation = SetRelation.EQUAL;
private MDInteger numberOfExamples = new MDInteger();
private Map<String,AttributeMetaData> attributeMetaData = new LinkedHashMap<String,AttributeMetaData>();
private boolean nominalDataWasShrinked = false;
public ExampleSetMetaData() {
super(ExampleSet.class);
}
public ExampleSetMetaData(Map<String, Object> keyValueMap) {
super(ExampleSet.class, keyValueMap);
}
public ExampleSetMetaData(String key, Object value) {
super(ExampleSet.class, key, value);
}
public ExampleSetMetaData(List<AttributeMetaData> attributeMetaData) {
super(ExampleSet.class);
addAllAttributes(attributeMetaData);
}
public ExampleSetMetaData(Class<? extends ExampleSet> clazz) {
super(clazz);
}
public ExampleSetMetaData(Class<? extends ExampleSet> clazz, List<AttributeMetaData> attributeMetaData) {
super(clazz);
addAllAttributes(attributeMetaData);
}
/**
* This constructor will generate a complete meta data description of the
* given example set. Please pay attention to the fact that it might be
* very big since the meta data will contain each nominal value stored in the
* data. With large, id-like data this will become very big.
*/
public ExampleSetMetaData(ExampleSet exampleSet) {
this(exampleSet, false);
}
public ExampleSetMetaData(ExampleSet exampleSet, boolean shortened) {
super(ExampleSet.class);
int maxNumber = Integer.MAX_VALUE;
if (shortened) {
maxNumber = getMaximumNumberOfAttributes();
} else {
exampleSet.recalculateAllAttributeStatistics();
}
Iterator<AttributeRole> i = exampleSet.getAttributes().allAttributeRoles();
while (i.hasNext()) {
AttributeRole role = i.next();
addAttribute(new AttributeMetaData(role, exampleSet, shortened));
maxNumber--;
if (maxNumber == 0)
break;
}
numberOfExamples = new MDInteger(exampleSet.size());
}
public AttributeMetaData getAttributeByName(String name) {
return attributeMetaData.get(name);
}
public AttributeMetaData getAttributeByRole(String role) {
for (AttributeMetaData amd: attributeMetaData.values()) {
String currentRole = amd.getRole();
if (currentRole != null && currentRole.equals(role))
return amd;
}
return null;
}
public void addAllAttributes(Collection<AttributeMetaData> attributes) {
for (AttributeMetaData amd : attributes) {
addAttribute(amd);
}
}
/*
public void removeAllAttributes(List<AttributeMetaData> attributes) {
attributeMetaData.removeAll(attributes);
}*/
public Collection<AttributeMetaData> getAllAttributes() {
return attributeMetaData.values();
}
public void removeAttribute(AttributeMetaData attribute) {
attributeMetaData.remove(attribute.getName());
}
public void addAttribute(AttributeMetaData attribute) {
if (attributeMetaData == null) {
attributeMetaData = new LinkedHashMap<String,AttributeMetaData>();
}
// registering this exampleSetmetaData as owner of the attribute.
attribute = attribute.registerOwner(this);
attributeMetaData.put(attribute.getName(), attribute);
}
@Override
public String getDescription() {
StringBuilder buf = new StringBuilder(super.getDescription());
buf.append("<br/>Number of examples ");
buf.append(numberOfExamples.toString());
if (attributeMetaData != null) {
buf.append("<br/>");
switch (attributesRelation) {
case SUBSET:
buf.append("At most ");
break;
case SUPERSET:
buf.append("At least ");
break;
}
buf.append(attributeMetaData.size());
buf.append(" attribute" + (attributeMetaData.size() != 1 ? "s" : "") + ": ");
buf.append("<table><thead><tr><th>Role</th><th>Name</th><th>Type</th><th>Range</th><th>Missings</th><th>Comment</th></tr></thead><tbody>");
//boolean first = true;
for (AttributeMetaData amd : attributeMetaData.values()) {
// if (!first) {
// buf.append("<br/>");
// } else {
// first = false;
// }
buf.append(amd.getDescriptionAsTableRow());
}
buf.append("</tbody></table>");
}
return buf.toString();
}
public void setAttributes(List<AttributeMetaData> attributes) {
attributeMetaData.clear();
addAllAttributes(attributes);
}
@Override
public ExampleSetMetaData clone() {
ExampleSetMetaData clone = (ExampleSetMetaData)super.clone();
clone.attributesRelation = this.attributesRelation;
clone.numberOfExamples = this.numberOfExamples.copy();
if (this.attributeMetaData != null) {
clone.attributeMetaData = new LinkedHashMap<String,AttributeMetaData>();
for (AttributeMetaData attribute : this.attributeMetaData.values()) {
clone.addAttribute(attribute.clone());
}
}
clone.nominalDataWasShrinked = this.nominalDataWasShrinked;
return clone;
}
public MetaDataInfo containsAttributesWithValueType(int type, boolean includeSpecials) {
if (attributeMetaData != null) {
for (AttributeMetaData amd : attributeMetaData.values()) {
SetRelation relation;
if (amd.isSpecial()) {
if (!includeSpecials) {
continue;
}
relation = attributesRelation;
} else {
relation = attributesRelation;
}
if (amd.getRole() == null && Ontology.ATTRIBUTE_VALUE_TYPE.isA(amd.getValueType(), type)) {
if (relation == SetRelation.EQUAL ||
relation == SetRelation.SUPERSET) {
return MetaDataInfo.YES;
} else {
return MetaDataInfo.UNKNOWN;
}
}
}
if (attributesRelation == SetRelation.SUPERSET ||
attributesRelation == SetRelation.UNKNOWN) {
return MetaDataInfo.UNKNOWN;
} else {
return MetaDataInfo.NO;
}
} else {
return MetaDataInfo.UNKNOWN;
}
}
public AttributeMetaData getSpecial(String role) {
if (attributeMetaData != null) {
for (AttributeMetaData amd : attributeMetaData.values()) {
if (role.equals(amd.getRole())) {
return amd;
}
}
}
return null;
}
public AttributeMetaData getLabelMetaData() {
return getSpecial(Attributes.LABEL_NAME);
}
/**
* This returns if an attribute with the given role exists in the
* example set. If the role is confidence, then it checks not whether exactly the same role occurs,
* but if any role starts with the confidence stem.
*/
public MetaDataInfo hasSpecial(String role) {
if (attributeMetaData == null) {
return MetaDataInfo.UNKNOWN;
}
// TODO: This is too slow
if (role.equals(Attributes.CONFIDENCE_NAME)) {
for (AttributeMetaData amd : attributeMetaData.values()) {
String currentRole = amd.getRole();
if (currentRole != null && currentRole.startsWith(role)) {
return MetaDataInfo.YES;
}
}
} else {
for (AttributeMetaData amd : attributeMetaData.values()) {
if (role.equals(amd.getRole())) {
return MetaDataInfo.YES;
}
}
}
switch (attributesRelation) {
case SUBSET:
return MetaDataInfo.UNKNOWN;
case SUPERSET:
case EQUAL:
return MetaDataInfo.NO;
default:
return MetaDataInfo.UNKNOWN;
}
}
/** Joins the attributes of both example sets.
* @param prefixForDuplicates If this is non-null, attributes with duplicate names will be renamed.
* Otherwise, only one will be kept. */
public ExampleSetMetaData joinAttributes(ExampleSetMetaData es2, String prefixForDuplicates) {
ExampleSetMetaData result = this.clone();
if (this.attributeMetaData == null || es2.attributeMetaData == null) {
return result;
}
// joining
for (AttributeMetaData a : es2.attributeMetaData.values()) {
AttributeMetaData clone = a.clone();
if (a.getRole() == null || !a.getRole().equals(Attributes.ID_NAME)) {
switch (result.containsAttributeName(a.getName())) {
case YES:
if (prefixForDuplicates != null) {
clone.setName(a.getName() + prefixForDuplicates);
result.attributeMetaData.put(clone.getName(), clone);
}
break;
case NO:
result.attributeMetaData.put(clone.getName(), clone);
break;
case UNKNOWN:
result.attributeMetaData.put(clone.getName(), clone);
// at least one with this name will be there, but the duplicate may be as well
result.attributesAreSubset();
break;
}
}
}
// check how sure we can be to have the correct attribute meta data
if (this.attributesRelation == SetRelation.EQUAL && es2.attributesRelation == SetRelation.EQUAL) {
result.attributesRelation = SetRelation.EQUAL;
} else if (es2.attributesRelation == SetRelation.SUPERSET || attributesRelation == SetRelation.SUPERSET) {
result.attributesRelation = SetRelation.SUPERSET;
} else {
result.attributesRelation = SetRelation.UNKNOWN;
}
return result;
}
public MetaDataInfo containsAttributeName(String name) {
if (attributeMetaData != null) {
boolean contains = attributeMetaData.containsKey(name);
switch (attributesRelation) {
case EQUAL:
return contains ? MetaDataInfo.YES : MetaDataInfo.NO;
case SUPERSET:
return contains ? MetaDataInfo.YES : MetaDataInfo.UNKNOWN;
case SUBSET:
return contains ? MetaDataInfo.UNKNOWN : MetaDataInfo.NO;
case UNKNOWN:
default: // cannot happen
return MetaDataInfo.UNKNOWN;
}
} else {
return MetaDataInfo.UNKNOWN;
}
}
public MetaDataInfo containsSpecialAttribute(String role) {
if (attributeMetaData != null) {
boolean contains = false;
for (AttributeMetaData amd: getAllAttributes()) {
String itsRole = amd.getRole();
if (itsRole != null)
if (itsRole.equals(role))
contains = true;
}
switch (attributesRelation) {
case EQUAL:
return contains ? MetaDataInfo.YES : MetaDataInfo.NO;
case SUPERSET:
return contains ? MetaDataInfo.YES : MetaDataInfo.UNKNOWN;
case SUBSET:
return contains ? MetaDataInfo.UNKNOWN : MetaDataInfo.NO;
case UNKNOWN:
default: // cannot happen
return MetaDataInfo.UNKNOWN;
}
} else {
return MetaDataInfo.UNKNOWN;
}
}
/** Changes the knowledge about the attributes in this set.
* Example: If we had full knowledge ({@link SetRelation#EQUAL)
* and <code>relation</code> if {@link SetRelation#SUBSET}, our knowledge
* changes to {@link SetRelation#SUBSET}. If the current knowledge
* is {@link SetRelation#SUBSET} and <code>relation</code> is
* {@link SetRelation#SUPERSET}, our knowledge changes to
* {@link SetRelation#UNKNOWN}
*/
public void mergeSetRelation(SetRelation relation) {
this.attributesRelation = this.attributesRelation.merge(relation);
}
public SetRelation getAttributeSetRelation() {
return attributesRelation;
}
public void attributesAreKnown() {
attributesRelation = SetRelation.EQUAL;
}
/** Declares that the attributes in this example set are a
* superset of {@link #attributeMetaData}. */
public void attributesAreSuperset() {
mergeSetRelation(SetRelation.SUPERSET);
}
/** Declares that the attributes in this example set are only a
* subset of {@link #attributeMetaData}. */
public void attributesAreSubset() {
mergeSetRelation(SetRelation.SUBSET);
}
/**
* Convenience method for setting the number of examples if the number is known exactly.
*/
public void setNumberOfExamples(int num) {
numberOfExamples = new MDInteger(num);
}
/**
* Method for setting the number of examples.
*/
public void setNumberOfExamples(MDInteger num) {
numberOfExamples = num;
}
public void numberOfExamplesIsUnkown() {
numberOfExamples.setUnkown();
}
public MDInteger getNumberOfExamples() {
return numberOfExamples;
}
@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("ExampleSetMetaData: #examples: "+numberOfExamples+"; #attributes: " + getAllAttributes().size() + Tools.getLineSeparator());
for (AttributeMetaData amd: getAllAttributes()) {
buffer.append(amd.toString() + Tools.getLineSeparator());
}
return buffer.toString();
}
public MetaData transpose() {
ExampleSetMetaData transposedMD = new ExampleSetMetaData();
transposedMD.addAttribute(new AttributeMetaData(Attributes.ID_NAME, Ontology.NOMINAL, Attributes.ID_NAME));
if (this.numberOfExamples.isKnown()) {
int num = this.numberOfExamples.getValue();
int type;
switch (this.containsAttributesWithValueType(Ontology.NOMINAL, true)) {
case YES:
type = Ontology.NOMINAL;
break;
case NO:
type = Ontology.REAL;
break;
case UNKNOWN:
default:
type = Ontology.ATTRIBUTE_VALUE;
}
for (int i = 0; i < num; i++) {
transposedMD.addAttribute(new AttributeMetaData("att_"+(i+1), type));
}
} else {
transposedMD.attributesAreSuperset();
}
transposedMD.numberOfExamples = new MDInteger(this.attributeMetaData.size());
switch (this.attributesRelation) {
case EQUAL:
// do nothing
break;
case SUBSET:
transposedMD.numberOfExamples.reduceByUnknownAmount();
break;
case SUPERSET:
transposedMD.numberOfExamples.increaseByUnknownAmount();
break;
case UNKNOWN:
transposedMD.numberOfExamples = new MDInteger();
default:
}
return transposedMD;
}
/**
* This method removes all regular attributes from this exampleSet meta data
*/
public void clearRegular() {
Iterator<AttributeMetaData> iterator = getAllAttributes().iterator();
while (iterator.hasNext()) {
AttributeMetaData amd = iterator.next();
if (!amd.isSpecial())
iterator.remove();
}
}
/**
* This method removes every attribute
*/
public void clear() {
getAllAttributes().clear();
}
public int getNumberOfRegularAttributes() {
int regular = 0;
for (AttributeMetaData amd: getAllAttributes())
if (!amd.isSpecial())
regular++;
return regular;
}
/** Checks if the attribute sets are equal. */
public MetaDataInfo equalHeader(ExampleSetMetaData other) {
if (other == this)
return MetaDataInfo.YES;
if (other.getAllAttributes().size() != getAllAttributes().size() && other.getAttributeSetRelation() == SetRelation.EQUAL && getAttributeSetRelation() == SetRelation.EQUAL) {
return MetaDataInfo.NO;
}
if (other.getAllAttributes().size() == getAllAttributes().size() &&
other.getAttributeSetRelation() == SetRelation.EQUAL &&
getAttributeSetRelation() == SetRelation.EQUAL) {
for (AttributeMetaData amd: getAllAttributes()) {
AttributeMetaData otherAMD = other.getAttributeByName(amd.getName());
if (otherAMD == null) {
return MetaDataInfo.NO;
}
String otherRole = otherAMD.getRole();
if (otherRole != null)
if (!otherAMD.getRole().equals(amd.getRole()))
return MetaDataInfo.NO;
if (otherAMD.getValueType() != amd.getValueType()) {
return MetaDataInfo.NO;
}
}
return MetaDataInfo.YES;
}
return MetaDataInfo.UNKNOWN;
}
public Collection<String> getAttributeNamesByType(int mustBeOfType) {
Collection<String> names = new LinkedList<String>();
for (AttributeMetaData attribute : getAllAttributes()) {
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), mustBeOfType))
names.add(attribute.getName());
}
return names;
}
public String getShortDescription() {
StringBuilder buf = new StringBuilder(super.getDescription());
buf.append("<br/>Number of examples ");
buf.append(numberOfExamples.toString());
if (attributeMetaData != null) {
buf.append("<br/>");
switch (attributesRelation) {
case SUBSET:
buf.append("At most ");
break;
case SUPERSET:
buf.append("At least ");
break;
}
buf.append(attributeMetaData.size());
buf.append(" attribute" + (attributeMetaData.size() != 1 ? "s" : "") + ": ");
}
if (nominalDataWasShrinked) {
buf.append("<br/><small><strong>Note:</strong> Some of the nominal values in this set were discarded due to performance reasons. You can change this behaviour in the preferences (<code>"+RapidMiner.PROPERTY_RAPIDMINER_GENERAL_MAX_NOMINAL_VALUES +"</code>).</small>");
}
return buf.toString();
}
/**
* This method must be called by attributes in order to inform the example set that they have been renamed.
* Before calling this method, the amd already must have its new name.
*/
/*pp*/ void attributeRenamed(AttributeMetaData amd, String oldName) {
attributeMetaData.remove(oldName);
attributeMetaData.put(amd.getName(), amd);
}
public void removeAllAttributes() {
attributeMetaData.clear();
}
public void setNominalDataWasShrinked(boolean b) {
nominalDataWasShrinked = true;
}
/** Returns the maximum number of attributes to be used for shortened meta data generation as specified by
* {@link RapidMiner#PROPERTY_RAPIDMINER_GENERAL_MAX_META_DATA_ATTRIBUTES}. */
public static int getMaximumNumberOfAttributes() {
int maxSize = 250;
String maxSizeString = ParameterService.getParameterValue(RapidMiner.PROPERTY_RAPIDMINER_GENERAL_MAX_META_DATA_ATTRIBUTES);
if (maxSizeString != null) {
maxSize = Integer.parseInt(maxSizeString);
if (maxSize == 0) {
maxSize = Integer.MAX_VALUE;
}
}
return maxSize;
}
}