package util.gdl.transforms;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import util.gdl.grammar.Gdl;
import util.gdl.grammar.GdlConstant;
import util.gdl.grammar.GdlDistinct;
import util.gdl.grammar.GdlLiteral;
import util.gdl.grammar.GdlNot;
import util.gdl.grammar.GdlPool;
import util.gdl.grammar.GdlRule;
import util.gdl.grammar.GdlSentence;
import util.gdl.grammar.GdlTerm;
import util.gdl.grammar.GdlVariable;
import util.gdl.model.SentenceModel;
public class SimpleCondensationIsolator {
/*
* The purpose of CondensationIsolater is to rewrite rules to make
* them easier for propnets to handle. Other forward-chaining
* approaches may also find this useful.
*
* "Condensation" refers to rules that contain variables not in the
* head. For example, we might have a rule
* (<= (threatened ?x2 ?y2)
* (threatens ?x1 ?y1 ?x2 ?y2))
* which "condenses" a potentially large number of pairs of ?x1 and ?y1
* into a single statement that may be more useful elsewhere on the
* board.
*
* - Alex Landau
*/
public static List<Gdl> run(List<Gdl> description, boolean experimental) {
//This is much easier on a deORed description
description = DeORer.run(description);
List<Gdl> newDescription = new ArrayList<Gdl>();
List<GdlRule> rules = new ArrayList<GdlRule>();
//First, separate into relations (which go unchanged) and rules
for(Gdl gdl : description) {
if(gdl instanceof GdlRule) {
rules.add((GdlRule) gdl);
} else {
newDescription.add(gdl);
}
}
Set<String> sentenceNames = new SentenceModel(description).getSentenceNames();
//the list of rules might grow as we go
for(int i = 0; i < rules.size(); i++) {
GdlRule rule = rules.get(i);
//See if this rule contains a condensation that can be factored out
Condensation condensation;
if(experimental) {
condensation = Condensation.getCondensation2(rule, rules, sentenceNames);
if(condensation == null)
condensation = Condensation.getCondensation(rule, rules, sentenceNames);
} else {
condensation = Condensation.getCondensation(rule, rules, sentenceNames);
}
if(condensation != null) {
//Modify the rule
rules.set(i, condensation.getModifiedRule());
i--; //Go over the rule again
if(condensation.hasNewRule()) {
rules.add(condensation.getNewRule());
}
}
}
newDescription.addAll(rules);
return newDescription;
}
private static class Condensation {
GdlRule modifiedRule;
//We generate the new rule, but then we check to see if there's already
//some other rule that matches it. If not, this stays null.
GdlRule newRule = null;
public Condensation(Set<GdlVariable> condensationVars, GdlRule rule,
List<GdlRule> rules, Set<String> sentenceNames) {
//This condensation has to:
//- Modify the rule to remove all components with the given variables
//- Do one of the following:
// - Create a new rule combining those variables
// - Find an existing rule
//It seems reasonable to do the second step first.
//Even if we go looking for an existing rule, we need something to compare it to
//The head should contain all the vars in the relevant literals
List<GdlLiteral> modifiedBody = new ArrayList<GdlLiteral>();
List<GdlLiteral> condenserBody = new ArrayList<GdlLiteral>();
for(GdlLiteral literal : rule.getBody()) {
if(Collections.disjoint(SentenceModel.getVariables(literal), condensationVars))
modifiedBody.add(literal);
else
condenserBody.add(literal);
}
//Now we want to make the condenser rule
//First we have to get all the right variables in the head
Set<GdlVariable> condenserHeadVars = new HashSet<GdlVariable>();
for(GdlLiteral literal : condenserBody)
condenserHeadVars.addAll(SentenceModel.getVariables(literal));
condenserHeadVars.removeAll(condensationVars);
//TODO: Do comparisons
//If nothing found, name the rule
GdlConstant condenserName;
for(int i = 0; ; i++) {
String candidateName = rule.getHead().getName().getValue() + "_tmp" + i;
if(!sentenceNames.contains(candidateName)) {
condenserName = GdlPool.getConstant(candidateName);
sentenceNames.add(candidateName);
//Success!
break;
}
}
//Make the rule head
List<GdlTerm> orderedVars = new ArrayList<GdlTerm>(condenserHeadVars);
GdlSentence condenserHead;
if(orderedVars.isEmpty()) {
condenserHead = GdlPool.getProposition(condenserName);
} else {
condenserHead = GdlPool.getRelation(condenserName, orderedVars);
}
//Make the condenser rule and add it
//GdlRule condenserRule = GdlPool.getRule(condenserHead, condenserBody);
newRule = GdlPool.getRule(condenserHead, condenserBody);
//Add the condenser relation to the modified rule
modifiedBody.add(condenserHead);
modifiedRule = GdlPool.getRule(rule.getHead(), modifiedBody);
}
public GdlRule getNewRule() {
return newRule;
}
public boolean hasNewRule() {
return (newRule != null);
}
public GdlRule getModifiedRule() {
return modifiedRule;
}
public static Condensation getCondensation(GdlRule rule, List<GdlRule> rules, Set<String> sentenceNames) {
//First: Check that it has at least one
int relevantCount = 0;
for(GdlLiteral literal : rule.getBody()) {
if(literal instanceof GdlSentence || literal instanceof GdlNot)
relevantCount++;
}
if(relevantCount < 2)
return null;
//We're now looking for variables that appear in only one literal
//Recent slight change: if there's a distinct clause with
//only one variable in it, ignore that variable for these purposes.
Set<GdlVariable> singleUseVars = new HashSet<GdlVariable>();
Set<GdlVariable> multiUseVars = new HashSet<GdlVariable>();
GdlSentence head = rule.getHead();
multiUseVars.addAll(SentenceModel.getVariables(head));
//Go through the body
for(GdlLiteral literal : rule.getBody()) {
Set<GdlVariable> usedVars = new HashSet<GdlVariable>(SentenceModel.getVariables(literal));
//See chinesecheckers4.kif for example of this being helpful
if(literal instanceof GdlDistinct && usedVars.size() == 1)
continue;
for(GdlVariable var : usedVars) {
if(multiUseVars.contains(var)) {
//do nothing
} else if(singleUseVars.contains(var)) {
singleUseVars.remove(var);
multiUseVars.add(var);
} else {
singleUseVars.add(var);
}
}
}
//Do we have any single use vars?
if(singleUseVars.isEmpty())
return null;
GdlVariable varChosen = singleUseVars.iterator().next();
//Find the body literal it's in
for(GdlLiteral literal : rule.getBody()) {
List<GdlVariable> varsInLiteral = SentenceModel.getVariables(literal);
if(varsInLiteral.contains(varChosen)) {
//Get all the single-use variables
singleUseVars.retainAll(varsInLiteral);
return new Condensation(singleUseVars, rule, rules, sentenceNames);
}
}
//Shouldn't happen?
return null;
}
public static Condensation getCondensation2(GdlRule rule, List<GdlRule> rules, Set<String> sentenceNames) {
/* We now take the following approach to finding a useful condensation:
* We start with a single candidate variable not found in the head.
* We add all the literals with that variable.
* Whenever we add a literal, we add all the non-head vars in that literal;
* whenever we add a variable, we add all the literals containing it.
* We stop when there's nothing left to add.
* If we end up with the entire set of literals, there's no condensation;
* otherwise, we generate a condensation of those vars and literals.
* Am I missing any cases?
* Yes, plenty
* What if we just gathered all the literals associated with the
* first variable, then used all the variables that only appear
* in those
* That would have problems with examples like
* (<= (r1 x)
* (r2 x a b)
* (r3 x a c)
* (not (r4 x b c)))
* In this case, we need both r2 and r3 if we're to factor out r4
* We could, however, factor out r2 and r3 to get:
* (<= (r5 x b c)
* (r2 x a b)
* (r3 x a c))
* (<= (r1 x)
* (r5 x b c)
* (not (r4 x b c)))
* How could we find this case?
* Only by explicitly focusing on factoring out a
* It's worth factoring out because it doesn't appear across all literals
* It's feasible to factor out because it doesn't include a not
* or distinct with other variables in it
*
* But sometimes we want to look at pairs of variables, don't we?
*
* (<= (r1 x)
* (r2 x a)
* (r3 x a b)
* (r4 x b)
* (r5 x c d))
* We might like to factor this into:
* (<= (r6 x)
* (r2 x a)
* (r3 x a b)
* (r4 x b))
* (<= (r7 x)
* (r5 x c d))
* (<= (r1 x)
* (r6 x)
* (r7 x))
* On the other hand, this might be better:
* (<= (r8 x b)
* (r2 x a)
* (r3 x a b))
* (<= (r6 x)
* (r8 x b)
* (r4 x b))
* Which is better? Let's assume here that r8 wouldn't be referenced
* by other relations for other reasons.
* Assume a and b each have domain size n.
* Then in the first case, for each (r6 const), we get:
* - n^2 AND gates for the n^2 possible combinations
* - 3n^2 inputs to the AND gates
* - 1 OR gate leading to (r6 const)
* - n^2 outputs from the AND gates to the OR gate
* Total: 4n^2 links
* In the second case, again for a given constant:
* - For each value of b, leading to r8:
* - n AND gates, each with 2 inputs
* - Outputs to an OR gate for each one
* Total: 3n
* - Total for r8: 3n^2 links
* - For r6:
* - n AND gates, two inputs each
* - one OR gate
* - 3n links
* - So a total of 3n^2 + 3n; if 3 < n, this is smaller
* So the more "thorough" method actually appears to be better
* for large numbers of constants (the more typical case).
*
* This means we really do want to factor out just one variable at a time.
* It just has to be useful and feasible.
* It is useful if there is some literal in the rule body that it
* doesn't appear in.
* It is feasible if none of the distinct or not literals that
* contain the variable contain other variables not fully contained
* in the set to be factored out.
* But the set to be factored out is somewhat flexible...
* Let's look at another example
* (<= (r1 x)
* (r2 x a)
* (r3 x b)
* (not (r4 x a b))
* (r5 x c)
* (r6 x d)
* (not (r7 x c d)))
* We could even entwine the two halves with something like (r5 x a c),
* and it would still be worth factoring.
* So we do want to expand to other variables in the case of not/distinct,
* just not in the case of positive literals.
* And then, of course, if it becomes useless, we ___.
* Maybe we can start with the nots/distincts?
* Though just one positive literal/one distinct doesn't seem worth factoring out.
*
* Okay, look at it this way:
* We have a graph with a node for each variable not in the head
* Each "not" or "distinct" creates links between all variables in
* that literal
* This generates connected components that are minimal condensations
* as long as they are useful, which requires having at least one
* non-"distinct" literal including at least one of the variables
*/
//First, we identify the variables in the head of the rule,
//as well as the variables not in the head
List<GdlVariable> headVars = SentenceModel.getVariables(rule.getHead());
List<GdlVariable> allRules = SentenceModel.getVariables(rule);
List<GdlVariable> nonHeadVars = new ArrayList<GdlVariable>(allRules);
nonHeadVars.removeAll(headVars);
//Let's try the graph approach
Map<GdlVariable, Set<GdlVariable>> varGraph = new HashMap<GdlVariable, Set<GdlVariable>>();
for(GdlVariable nonHeadVar : nonHeadVars) {
varGraph.put(nonHeadVar, new HashSet<GdlVariable>());
}
//For each variable in the rule, add constraints
for(GdlLiteral literal : rule.getBody()) {
List<GdlVariable> vars = SentenceModel.getVariables(literal);
vars.removeAll(headVars);
//Add links between these vars
for(GdlVariable var : vars) {
varGraph.get(var).addAll(vars);
}
}
//Now we separate the graph into connected components
//Remember, nodes may be connected indirectly, so we need BFS to get the
//whole component
List<Set<GdlVariable>> connectedComponents = getConnectedComponents(varGraph);
//Now we'll test each component to see if it's useful
for(Set<GdlVariable> condensationVars : connectedComponents) {
if(isUsefulCondensation(condensationVars, rule)) {
return new Condensation(condensationVars, rule, rules, sentenceNames);
}
}
return null;
}
private static boolean isUsefulCondensation(
Set<GdlVariable> condensationVars, GdlRule rule) {
//Look for a substantial (i.e. non-"distinct") literal in the rule body
//that does not contain any of these variables
for(GdlLiteral literal : rule.getBody()) {
if(literal instanceof GdlSentence || literal instanceof GdlNot) {
List<GdlVariable> varsInLiteral = SentenceModel.getVariables(literal);
if(Collections.disjoint(condensationVars, varsInLiteral))
return true;
}
}
return false;
}
private static List<Set<GdlVariable>> getConnectedComponents(
Map<GdlVariable, Set<GdlVariable>> graph) {
List<Set<GdlVariable>> components = new ArrayList<Set<GdlVariable>>();
Set<GdlVariable> varsAdded = new HashSet<GdlVariable>();
for(GdlVariable key : graph.keySet()) {
Set<GdlVariable> component = new HashSet<GdlVariable>();
Queue<GdlVariable> varsToAdd = new LinkedList<GdlVariable>();
if(!varsAdded.contains(key))
varsToAdd.add(key);
while(!varsToAdd.isEmpty()) {
GdlVariable curVar = varsToAdd.remove();
if(varsAdded.contains(curVar))
continue;
//Find the children
Set<GdlVariable> children = graph.get(curVar);
//Add those children that have not been handled
for(GdlVariable child : children) {
if(!varsAdded.contains(child))
varsToAdd.add(child);
}
component.add(curVar);
varsAdded.add(curVar);
}
if(!component.isEmpty())
components.add(component);
}
return components;
}
}
}