/*
* This file is part of the Jikes RVM project (http://jikesrvm.org).
*
* This file is licensed to You under the Common Public License (CPL);
* You may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.opensource.org/licenses/cpl1.0.php
*
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.
*/
import java.io.*;
import java.util.*;
import java.lang.reflect.*;
/**
* <P> Generates the assembler that is used by the optimizing compiler,
* using a combination of the tables describing the low-level
* instruction formats and operators used by the opt compiler, and the
* interface of the low-level assembler that understands how to
* generate IA32 opcodes given specific operands. Essentially, the
* opt assembler becomes a rather large piece of impedence-matching
* code that decodes the OPT_Instructions and OPT_Operators understood
* by the opt compiler to determine what is the appropriate IA32
* machine code to emit. </P>
*
* <P> In order for this to work, both the optimizing compiler tables and
* the VM_Assembler must use stylized formats. On the optimizing
* com[piler side, the major stylization is that the low-level
* operators that represent assembly code must correspond directly to
* the official IA32 assembler pneumonics; i.e. since there is an ADD
* assembler pneumonic in the Intel assembly specification, there must
* be a correponding IA32_ADD operator in the opt compiler tables.
* The stylization of the VM_Assembler side is more thoroughgoing, and
* the reader is referred to the VM_Assembler header comments for a
* definition. </P>
*
* <P> Given these stylizations, GenerateAssembler reads the set of
* assembler pneumonics supported by the VM_Assembler using reflection
* to examinme its stylized method signitures. GenerateAssembler also
* reads the set of IA32 operators that the opt compiler defines,
* using the helper classes OPT_InstructionFormatTable and
* OPT_OperatorFormatTable. It then, for each operator, generates a
* handler method to call the appropriate VM_Assembler emit method
* given an OPT_Instruction. The VM_Assembler will have a family of
* emit methods named for each opcode, each such emit method takes a
* specific set of operand addressing modes and sizes. The handler
* methods that the GenerateAssembler emits examine the operands to an
* OPT_Instruction, and determine which VM_Assembler method to call
* for the operand addressing modes and sizes that it finds.
* GenerateAssembler also generates a top-level dispatch method that
* examines the operator and calls the appropriate handler. </P>
*
* <P> GenerateAssembler generates the opt assembler as part of the
* normal build process; this poses a slight problem in that it needs
* to examine the VM_Assembler via reflection to generate the
* OPT_Assembler, but that is not possible until the VM sources
* (including, of course, the OPT_Assembler) have been compiled. The
* current hack to get around this is to compile the VM_Assembler in
* advance, and read the resulting class file. This utilizies some
* supporting files to make the VM_Assembler compile in isolation.
* This is the purpose of the .fake files in the optimizing compiler's
* assembler directory. </P>
*
* <P>Since this is a freestanding program, use the regular Java exit
* code conventions.</P>
*
* @see OPT_InstructionFormatTables
* @see OPT_OperatorFormatTables
* @see org.jikesrvm.compilers.opt.OPT_AssemblerBase
* @see org.jikesrvm.compilers.opt.ir.OPT_Instruction
* @see org.jikesrvm.compilers.opt.OPT_Assembler
* @see VM_Assembler
*/
public class GenerateAssembler {
/** Global flag controlling printing of debugging information */
static final boolean DEBUG = false;
/** Global reference to the assembler being generated */
static FileWriter out;
/**
* Write a single string to the assembler source file.
* @param String s The string to be written
*/
private static void emit(String s) {
try {
out.write(s, 0, s.length());
} catch (IOException e) {
throw new Error(e);
}
}
/**
* Write tabification to the assembler source file. This is used
* to make the generates source more readable by identing it.
* @param int level The level of indentation to generate
*/
private static void emitTab(int level) {
for(int i = 0; i < level; i++) emit(" ");
}
/**
* Global reference to the OPT_InstructionFormatTables class that
* contains descriptions of each optimizing compiler instruction
* format that sis visible to the assembler (i.e. the MIR_*
* instruction formats.
*
* @see OPT_InstructionFormatTables
*/
private static Class<?> formats;
/**
* Load the instruction format table, and throw up if that is
* not possible.
*/
static {
try {
formats = Class.forName("OPT_InstructionFormatTables");
} catch (ClassNotFoundException e) {
throw new Error(e);
}
}
/**
* Global reference to the opcode argument table for the current
* opcode being processed. This table is null unless some of the
* operands in the OPT_Instruction are to ignored when generating
* code for the opcode. Ignoring arguments is an ad-hock special
* case that is controlled by the global opcodeArgTable.
*/
static int[] currentOpcodeArgTable;
/**
* Global reference to the table of symbolic names of the arguments
* to the current MIR_ instruction format. This information is read
* from the OPT_InstructionFormatTables
*/
static String[] currentOpcodeSymbolicNames;
/**
* The current IA32 opcode being processed. This is the name of
* IA32 instruction. Typically, it is the name of the opt compiler
* IA32_* opcode as well, but there are exceptions in that multiple
* IA32_* opcodes can map to the same IA32 instruction
*/
static String currentOpcode;
/**
* The instruction format for the IA32_* opt compiler opcode(s)
* being processed.
*/
static String currentFormat;
/**
* Global table mapping opt compiler IA32_* opcodes to arrays listing the
* set of OPT_Instruction operands that are to be used as arguments to the
* IA32 architecture instruction. This is used when an instruction has extra
* operands that are not used in assembly. The array is indexed by the
* desired argument for the instruction/VM_Assembler method, the value in
* the array states which operand of the intruction contains the operand
* for the instruction. For example, an array of "new int{2}" means the
* instruction has 1 operand and it is read from the 2 operand of the
* instruction.
*/
static final Hashtable<String,int[]> opcodeArgTables;
/**
* Initialize the opcodeArgTables table
*/
static {
opcodeArgTables = new Hashtable<String,int[]>();
opcodeArgTables.put("CALL", new int[]{2});
opcodeArgTables.put("INT", new int[]{1});
opcodeArgTables.put("CDQ", new int[]{0});
opcodeArgTables.put("DIV", new int[]{1,2});
opcodeArgTables.put("IDIV", new int[]{1,2});
opcodeArgTables.put("MUL", new int[]{1,2});
opcodeArgTables.put("IMUL1", new int[]{1,2});
opcodeArgTables.put("DIV", new int[]{1,2});
opcodeArgTables.put("IDIV", new int[]{1,2});
opcodeArgTables.put("SET", new int[]{1,0});
opcodeArgTables.put("CMPXCHG", new int[]{1,2});
opcodeArgTables.put("CMPXCHG8B", new int[]{2});
opcodeArgTables.put("FCMOV", new int[]{2,0,1});
opcodeArgTables.put("CMOV", new int[]{2,0,1});
}
/**
* Set the current opcode. This sets four global fields:
* the currentOpcode, the currentOpcodeArgTable, the currentFormat
* and the currentOpcodeSymbolicNames.
*
* @param opcode The IA32 architecture opcode to make the current opcode
*/
static void setCurrentOpcode(String opcode) {
try {
currentOpcode = opcode;
currentOpcodeArgTable = (int[]) opcodeArgTables.get(opcode);
currentFormat = OPT_OperatorFormatTables.getFormat(opcode);
Field f = formats.getDeclaredField(currentFormat+"ParameterNames");
currentOpcodeSymbolicNames = (String[]) f.get(null);
} catch (Throwable e) {
throw new Error("Cannot handle VM_Assembler opcode " + opcode, e);
}
}
/**
* Constant representing immediate arguments to VM_Assembler calls
*/
static final int Immediate = 0;
/**
* Constant representing register arguments to VM_Assembler calls.
* This covers the cases when a register is encoded into the mod/rm
* byte; the VM_Assembler handles the detais of generating either
* the reg bits of the mod/rm byte or encoding a register as mod 11.
*/
static final int Register = 1;
/**
* Constant representing condition arguments to VM_Assembler calls.
* Such operands are not arguments to the ultimate IA32 machine
* code instruction, but they are used to calculate the opcode that
* is generated.
*/
static final int Condition = 2;
/**
* Constant representing arguments to VM_Assembler calls that use the
* scaled-index-base (SIB) addressing mode in the special way that uses
* neither a base not an index to generate an absolute address
*/
static final int Absolute = 3;
/**
* Constant representing IA32 memory operands that use register-
* displacement addressing mode (usually mod bits 01 and 10) arguments
* to VM_Assembler calls. The VM_Assembler takes care of choosing the
* right mode for the size of the displacement, so this one mode
* covers two of the four addressing modes the IA32 has. The
* VM_Assembler also handles the special cases in which this mode
* requires weird SIB bytes.
*/
static final int RegisterDisplacement = 4;
/**
* Constant representing arguments to VM_Assembler calls that use the
* scaled-index-base (SIB) addressing mode in the special way that does
* not use a base register. The OPT_Assembler simply assumes it has
* an [index < < scale + disp] addressing mode, and the VM_Assembler takes
* care of generating the special mod/rm that causes the base register
* to be ignored.
*/
static final int RegisterOffset = 5;
/**
* Constant representing scaled-index-base (SIB) mode arguments to
* VM_Assembler calls.
*/
static final int RegisterIndexed = 6;
/**
* Constant representing register-indirect arguments to VM_Assembler
* calls. This mode handles what is (usually) mod 00 in the mod/rm
* byte.
*/
static final int RegisterIndirect = 7;
/**
* Constant representing labels used as branch targets. While code
* is being generated, the machine code offset for a forward branch
* cannot, in general, be computed as the target code has not been
* generated yet. The OPT_Assembler uses synthetic code offsets,
* based upon the order of OPT_Instructions in the code being
* compiled, to communicate forward branch targets to the
* VM_Assembler. These synthetic offsets are passed to the
* VM_Assembler where it expected Label arguments.
*/
static final int Label = 8;
/**
* Constant representing arguments to VM_Assembler calls in which
* it may be either a backward branch target (resolved to an
* immediate being the exact branch displacement) or a forward
* branch (which will be a synthetic Label).
*/
static final int LabelOrImmediate = 9;
/**
* How many different sizes of instruction operand are there, not
* counting the standard double word.
*/
static final int SIZES = 3;
/**
* Constant representing instructions that operate upon bytes
*/
static final int Byte = 10;
/**
* Constant representing instructions that operate upon words (16 bits)
*/
static final int Word = 11;
/**
* Constant representing instructions that operate upon quad words (64 bits)
*/
static final int Quad = 12;
/**
* This array denotes all possible encodings in a VM_Assembler emitter
* function. It includes all possible operand types and all possible
* instruction sizes. For all of the constants corresponding to a
* possible operand type or instruction size, the corresponding entry
* is this table holds the string that the VM_Assembler uses to denote
* that operand type or instruction size.
*
* This table is used when parsing a VM_Assembler emitter name to create
* a descriptor that denotes the operand size and types of the given
* emitter in terms of the constants.
*
* This table is also used when generating the OPT_Assembler emitter
* functions to allow the generator to pick which queries to use to
* dispatch an OPT_Instruction to the appropriate VM_Assembler emitter.
*/
static final String[] encoding =
{"Imm", // encoding[Immediate]
"Reg", // encoding[Register]
"Cond", // encoding[Condition]
"Abs", // encoding[Absolute]
"RegDisp", // encoding[RegisterDisplacement]
"RegOff", // encoding[RegisterOffset]
"RegIdx", // encoding[RegisterIndexed]
"RegInd", // encoding[RegisterIndirect]
"Label", // encoding[Label]
"ImmOrLabel", // encoding[LabelOrImmediate]
"Byte",
"Word",
"Quad"};
/**
* For a given string representing a valid operand encoding for the
* VM_Assembler, return the corresponding OPT_Assembler constant. This
* function only looks for encodings of operand types, and will not
* accept strings that correspond to size encodings.
*
* @param str A valid VM_Assembler encoding of operand type
* @return The OPT_Assembler constant corresponding to str, or -1 if none
*/
private static int getEncoding(String str) {
for(int i = 0; i < encoding.length - SIZES; i++)
if (encoding[i].equals(str))
return i;
return -1;
}
/**
* For a given string representing a valid size encoding for the
* VM_Assembler, return the corresponding OPT_Assembler constant. This
* function only looks for encodings of sizes, and will not accept
* strings that correspond to operand types.
*
* @param str A valid VM_Assembler encoding of operand size
* @return The OPT_Assembler constant corresponding to str, or -1 if none
*/
private static int getSize(String str) {
for(int i = encoding.length - SIZES; i < encoding.length; i++)
if (encoding[i].equals(str))
return i;
return -1;
}
/**
* For a given operand number, return a string which is a valid Java
* expression for reading that operand out of the current instruction.
* This function uses the currentOpcodeSymbolicNames table to determine
* the appropriate accessor (e.g. getValue if the current name is Value),
* and it uses the currentOpcodeArgTable (in cases where it has an
* entry for the kind of instruction being processed) to determine which
* operand in OPT_Instruction corresponds to operand sought.
*
* @param op The operand number sought.
* @return A Java expression for accessing the requested operand.
*/
private static String getOperand(int op) {
try {
if (currentOpcodeArgTable == null)
return currentFormat + ".get" + currentOpcodeSymbolicNames[op] + "(inst)";
else
return currentFormat + ".get" + currentOpcodeSymbolicNames[currentOpcodeArgTable[op]] + "(inst)";
} catch (ArrayIndexOutOfBoundsException e) {
String error = currentOpcode + ": cannot access operand " + op + ":";
for(int i = 0; i < currentOpcodeSymbolicNames.length; i++)
error += currentOpcodeSymbolicNames[i];
throw new Error(error);
}
}
/**
* Given an operand number and an encoding, generate a test to
* determine whether the given operand matches the encoding. That
* is, generate code to the OPT_Assembler that examines a given operand
* of the current OPT_Instruction, and determines whether it is of
* the type encoded by the given encoding. This is used to generate the
* if statements of the dispatch functions for each opt compiler opcode.
*
* @param argNumber The argument to examine
* @param argEncoding The encoding for which to check
*/
private static void emitTest(int argNumber, int argEncoding) {
if (argEncoding < encoding.length - SIZES)
emit("is" + encoding[argEncoding] + "(" + getOperand(argNumber) + ")");
else
emit("is" + encoding[argEncoding] + "(inst)");
}
/**
* Generate code to verify that a given operand matches a given encoding.
* Since the IA32 architecture is not exactly orthogonal (please note
* the charitable understatement), there are cases when the opt assembler
* can determine the VM_Assembler emitter to call without looking at
* all (or, in some cases, any) of the arguments of the OPT_Instruction.
* An example is the ENTER instruction that only takes one immediate
* parameter, so the opt assembler could simply call that VM_Assembler
* emiiter without checking that argument is really an immediate. In
* such cases, the opt assembler generates guarded tests that verify
* that OPT_Instruction operand actually matches the required encoding.
* This function emits such tests to the assembler being generated.
*
* @param argNumber The argument to examine
* @param argEncoding The encoding for which to check
* @param level current level for generating pretty, tabified output
*/
private static void emitVerify(int argNumber, int argEncoding, int level) {
emitTab(level);
emit("if (VM.VerifyAssertions && !");
emitTest(argNumber, argEncoding);
emit(") VM._assert(false, inst.toString());\n");
}
/**
* Generate code to fetch all the arguments needed for a given operand
* number and encoding. The different argument encodings of the
* VM_Assembler need different arguments to be passed to the emitter
* function. For instance, a register-displacement mode operand
* needs to be given a base register and an immediate displacement.
* This function generates the appropriate arguments given the
* operand number and encoding; that is, it generates reads of the
* appropriate OPT_Instruction argument and fetches of the appropriate
* pieces of information from the operand.
*
* @param argNumber The argument being generated.
* @param argEcoding The encoding to use.
*/
private static void emitArgs(int argNumber, int argEncoding) {
String op = getOperand(argNumber);
if (argEncoding == LabelOrImmediate)
emit("getImm(" + op + "), getLabel(" + op + ")");
else if (argEncoding == RegisterDisplacement)
emit("getBase(" + op + "), getDisp(" + op + ")");
else if (argEncoding == Absolute)
emit("getDisp(" + op + ")");
else if (argEncoding == RegisterOffset)
emit("getIndex(" + op + "), getScale(" + op +
"), getDisp(" + op + ")");
else if (argEncoding == RegisterIndexed)
emit("getBase(" + op + "), getIndex(" + op +
"), getScale(" + op + "), getDisp(" + op + ")");
else if (argEncoding == RegisterIndirect)
emit("getBase(" + op + ")");
else
emit("get" + encoding[argEncoding] + "(" + op + ")");
}
/**
* This exception class is used to indicate that GenerateAssembler
* found an emit* method in the vM_Assembler that it does not
* understand. To generate the OPT_Assembler for a given
* IA32 OPT_Operator, GenerateAssembler looks at all of the emit*
* methods for the corresponding IA32 opcode in the VM_Assembler. It
* parses each name to determine what kinds of operands it expects and
* what size operands it uses; this requires the emit* methods to
* have stylized names (see the header comment of VM_Assembler for
* details). If an emit* method name does not have the stylized
* format required, GenerateAssembler will throw a BadEmitMethod
* exception and abort.
*/
static class BadEmitMethod extends RuntimeException {
static final long serialVersionUID = 0; // Keep Eclipse quiet
/**
* Create a BadEmitMethod exception indicating that
* GenerateAssembler cannot understand the code portion
* of the method name methodName.
*
* @param methodName The method name causing trouble
* @param code The portion of methodName that does not parse
*/
BadEmitMethod(String methodName, String code) {
super("cannot interpret method " + methodName + "(" + code + ")");
}
}
/**
* An EmitterDescriptor represents a single emit method from the
* VM_Assembler: it explicitly represents the types of operands the
* method expects, their number, and the size of the data it uses.
* When GenerateAssembler encounters an emit* method from the
* VM_Assembler, it creates an EmitterDescriptor for it. Based upon
* the stlyized form the method name is required to have, the
* EmitterDexcriptor represents information about its arguments. This
* information is stored in terms of the GenerateAssembler constants
* that represent operand type and size.
* <P>
* The EmitterDescriptor class encapsulates the logic for parsing the
* stylized emit* method names that the VM_Assembler has, and turning
* them into the explicit representation that GenerateAssembler uses.
* If parsing a name fails, a {@link GenerateAssembler.BadEmitMethod}
* runtime exception is thrown and assembler generation is aborted.
* <P>
* <HR>
* <EM>See the descriptions of the GenerateAssembler constants:</EM>
* <DL>
* <DT> <EM>Operand types</EM>
* <DI>
* <UL>
* <LI> {@link #Immediate}
* <LI> {@link #Label}
* <LI> {@link #LabelOrImmediate}
* <LI> {@link #Absolute}
* <LI> {@link #Register}
* <LI> {@link #RegisterIndirect}
* <LI> {@link #RegisterOffset}
* <LI> {@link #RegisterIndexed}
* </UL>
* <DT> <EM>Data size</EM>
* <UL>
* <LI> {@link #Byte}
* <LI> {@link #Word}
* <LI> {@link #Quad}
* </UL>
* </DL>
*/
static class EmitterDescriptor {
private int size;
private int count;
private final int[] args;
/**
* Create an EmitterDescriptor for the given methodName. This
* conmstructor creates a descriptor that represents explicitly
* the types and size of the operands of the given emit* method.
* This constructor encapsulate the logic to parse the given
* method name into the appropriate explicit representation.
*/
EmitterDescriptor(String methodName) {
StringTokenizer toks = new StringTokenizer(methodName, "_");
toks.nextElement(); // first element is emitXXX;
args = new int[ toks.countTokens() ];
this.size = 0;
this.count = 0;
for(int i = 0; i < args.length; i++) {
String cs = toks.nextToken();
int code = getEncoding(cs);
int size = GenerateAssembler.getSize(cs);
if (DEBUG) {
System.err.println(methodName + "[" + i + "] is " + code + "," + size + " for " + cs);
}
if (code != -1)
args[count++] = code;
else if (size != -1)
this.size = size;
else
throw new BadEmitMethod(methodName, cs);
}
}
/**
* This method checks whether the emit* method represented by
* this EmitterDescriptor expects the argument type represented
* by enc as its argument'th operand. If enc is an operand type
* encoding, this method checks wether the given argument is of
* the appropriate type. If enc is an operand size encoding,
* the argument parameter is ignored, and this method checks
* whether the emit* method represented operates upon data of
* the desired size.
* <P>
* <EM>See the descriptions of the GenerateAssembler constants:</EM>
* <DL>
* <DT> <EM>Operand types</EM>
* <DI>
* <UL>
* <LI> {@link #Immediate}
* <LI> {@link #Label}
* <LI> {@link #LabelOrImmediate}
* <LI> {@link #Absolute}
* <LI> {@link #Register}
* <LI> {@link #RegisterIndirect}
* <LI> {@link #RegisterOffset}
* <LI> {@link #RegisterIndexed}
* </UL>
* <DT> <EM>Data size</EM>
* <UL>
* <LI> {@link #Byte}
* <LI> {@link #Word}
* <LI> {@link #Quad}
* </UL>
* </DL>
* <P>
* @param argument The operand number examined
* @param enc The argument type queried, as encoded as one of
* the operand type constants used throughout
* GenerateAssembler.
*
* @return True if this method expects an argument type encoded
* by enc as its argument'th operand, and false otherwise.
*/
boolean argMatchesEncoding(int argument, int enc) {
if (enc < encoding.length - SIZES)
return (count > argument) && args[argument] == enc;
else
return size == enc;
}
/**
* Access the array that stores the encodings of the arguments
* to the emit method represented by this EmitterDescriptor.
*
* @return the array of argument encodings
*/
int[] getArgs() { return args; }
/**
* Access the data size operated upon by emit method represented
* by this EmitterDescriptor.
*
* @return data size for this descriptor
*/
int getSize() { return size; }
/**
* Access the number of operands operated upon by emit method
* represented by this EmitterDescriptor.
*
* @return number of operands for this descriptor
*/
int getCount() { return count; }
public String toString() {
StringBuffer s = new StringBuffer();
s.append("ed:");
for(int i = 0; i < count; i++)
s.append(" " + encoding[args[i]]);
if (size != 0) s.append(" (" + encoding[size] + ")");
return s.toString();
}
}
/**
* An EmitterSet represents a set of emit methods from the
* VM_Assembler for the same IA32 assembler opcode. These sets
* are used when generating the do<opcode> method for a given IA32
* opcde: first an EmitterSet of all the VM_Assembler emit methods
* for that opcode is built, and then the do method is recursively
* generated by emitting operand type and size tests that
* partition the set of emitters into two smaller sets. This
* continues until the set is a singleton
*/
static class EmitterSet {
/**
* The VM_Assembler emit methods that this set represents.
* This is a set of EmitterDescriptor objects.
*/
private final Set<EmitterDescriptor> emitters = new HashSet<EmitterDescriptor>();
/**
* Print this EmitterSet readably.
* @return a string describing this EmitterSet
*/
public String toString() {
StringBuffer s = new StringBuffer();
s.append("Emitter Set of:\n");
Iterator<EmitterDescriptor> i = emitters.iterator();
while (i.hasNext())
s.append(i.next().toString() + "\n");
s.append("-------------\n");
return s.toString();
}
/**
* Test whethe rthis EmitterSet as exactly one element.
* @return true if this EmitterSet as exactly one element.
*/
boolean isSingleton() {
return (emitters.size() == 1);
}
/**
* Insert an EmitterDescriptor into this set
* @param ed the EmitterDescriptor to insert
*/
void add(EmitterDescriptor ed) {
emitters.add(ed);
}
/**
* Count how many of the emit represented by this set match a
* given operand type and size encoding. This method is used
* (via getEncodingSplit) while recursively partitioning a
* given EmitterSet to determine how evenly (or even whether)
* a given operand type and size splits this set.
*
* @see #getEncodingSplit
*
* @param n the operand being examined
* @param code the operand type or size code being considered
* @return the number of emit methods of which the specified
* operand type matches the specified one. */
private int countEncoding(int n, int code) {
Iterator<EmitterDescriptor> i = emitters.iterator();
int count = 0;
while (i.hasNext())
if (((EmitterDescriptor)i.next()).argMatchesEncoding(n, code))
count++;
return count;
}
/**
* Return the difference between the number of emit methods
* in this set that match a given operand type and size for a
* given operand, and the number of those that do not. This
* method is used while recursively partitioning a given
* EmitterSet to determine how evenly (or even whether) a
* given operand type and size splits this set.
*
* @param n the operand being examined
* @param code the operand type or size code being considered
* @return the different between matching and non-matching
* emit method in this set. */
private int getEncodingSplit(int n, int code) {
int count = countEncoding(n, code);
return Math.abs((emitters.size() - count) - count);
}
/**
* This class is used just to communicate the two results of
* searching for the best split for a given set: the chosen
* operand type or size, and the chosen operand nummber. This
* class is basically to avoid writing the slew of required
* type casts that a generic pair would need given Java's
* primitive type system.
*
* @see #makeSplit
* @see #split
*/
static class SplitRecord {
/**
* The operand number to be split.
*/
int argument;
/**
* The operand type or size test on which to split.
*/
int test;
/**
* Make s split record to communicate the results of
* searching for the best operand to split.
*
* argument The operand number to be split.
* test The operand type or size test on which to split.
*/
SplitRecord(int argument, int test) {
this.argument = argument;
this.test = test;
}
}
/**
* This method uses a SplitRecord as the criertion to
* partition the given EmitterSet into two subsets.
*
* @param split the plit record dicatating how to split
*/
private EmitterSet[] makeSplit(SplitRecord split) {
int arg = split.argument;
int test = split.test;
EmitterSet yes = new EmitterSet();
EmitterSet no = new EmitterSet();
Iterator<EmitterDescriptor> i = emitters.iterator();
while (i.hasNext()) {
EmitterDescriptor ed = (EmitterDescriptor) i.next();
if (ed.argMatchesEncoding(arg, test)) {
yes.add(ed);
} else {
no.add(ed);
}
}
return new EmitterSet[]{yes, no};
}
/**
* Find the best operand type or size and operand number to
* partition this EmitterSet. This method searches across all
* possible ways of splitting this set--all possible operand
* types and sizes, and all possible operands--to determine
* which one splits the set most evenly.
*
* @return a SplitRecord representing the most-even split
*/
SplitRecord split() {
int splitArg = -1;
int splitTest = -1;
int splitDiff = 1000;
for(int arg = 0; arg < 4; arg++) {
for (int test = 0; test < encoding.length; test++) {
int c = getEncodingSplit(arg, test);
if (c == 0)
return new SplitRecord(arg, test);
else if (c < splitDiff) {
splitArg = arg;
splitTest = test;
splitDiff = c;
}
}
}
return new SplitRecord(splitArg, splitTest);
}
/**
* Emit the Java code to call a particular emit method for a
* particular opcode. This method takes representations of
* the opcode and operands of a given emit method, and
* generates the appropriate Java source code to call it. It
* synthesizes the encoded emit method name, and uses emitArgs
* to pass all the required arguments.
*
* @see #emitArgs
*
* @param opcode the IA32 opcode of the emit method
* @param args the encoding of each operand to the emit method
* @param count the number of operands
* @param level the level of tabbing for pretty output
*/
private void emitEmitCall(String opcode, int[] args, int count, int level, int size) {
emitTab(level);
emit("emit" + opcode);
for(int i = 0; i < count; i++)
emit("_" + encoding[args[i]]);
if (size != 0) emit("_" + encoding[size]);
if (count == 0)
emit("();\n");
else {
emit("(");
for(int i = 0; i < count; i++) {
emit("\n");
emitTab(level+1);
emitArgs(i, args[i]);
if (i == count-1)
emit(");\n");
else
emit(",");
}
}
}
/**
* Write the Java code required for error checking and
* calling the emit method represented by a singleton
* EmitterSet. A singleton EmiiterSet will typically be the
* result of a series of splits of bigger sets, where the
* splits represent emitted queries of operand types and
* sizes. (See emitSet) However, there may be cases when some
* operand has only one possible options, so the splitting
* will not have generated any tests for it. In this case, we
* will emit assertions that guarantee the operand is of the
* expected type. Note that the answers to queries alrrready
* performed by splitting are known to be fine, so no
* additional error checking is needed for cases they cover.
*
* @see #emitSet
*
* @param opcode the IA32 opcode to generate
* @param testsPerformed the set of queries already performed
* by splitting.
* @param level level of indentation for prett printing */
private void emitSingleton(String opcode, boolean[][] testsPerformed, int level) {
EmitterDescriptor ed =
(EmitterDescriptor) emitters.iterator().next();
int[] args = ed.getArgs();
int count = ed.getCount();
for(int i = 0; i < count; i++)
if (! testsPerformed[i][args[i]])
emitVerify(i, args[i], level);
int size = ed.getSize();
if (size != 0) {
boolean needed = true;
for(int i = 0; i < count; i++)
if (testsPerformed[i][size])
needed = false;
if (needed)
emitVerify(0, size, level);
if (size == Byte)
for(int i = 0; i < count; i++)
if (args[i] == Register)
if (currentOpcode.indexOf("MOVZX") == -1 &&
currentOpcode.indexOf("MOVSX") == -1) {
emitTab(level);
emit("if (VM.VerifyAssertions && !(");
emitArgs(i, Register);
emit(" < 4)) VM._assert(false, inst.toString());\n");
}
}
emitEmitCall(opcode, args, count, level, ed.getSize());
}
/**
* Emit Java code for deciding which emit method in the given
* set applies to an OPT_Instruction, and then calling the
* apprpriate method. The method essentially works by
* recursively parititioning the given set into two smaller
* pieces until it finds a set with only one element. On each
* partition, this method generates code for the appropriate
* operand type or size query, and then calls itself
* recursively on the two sets resulting from the partition.
*
* This method uses split to determine what test to apply, and
* emitSingleton when it encounteres a singleton set.
*
* Note that the testsPerformed parameter is not needed to do
* the recursive splitting; this is passed to emitSingleton to
* help it generate appropriate error checking for operands.
*
* @see #split
* @see #emitSingleton
*
* @param opcode the IA32 opcode being generated
* @param testsPerformed the set of tests already performed
* @param level the indentation level for pretty printing
*/
private void emitSet(String opcode, boolean[][] testsPerformed, int level) {
if (emitters.isEmpty()) {
// do nothing
} else if (isSingleton())
emitSingleton(opcode, testsPerformed, level);
else {
SplitRecord rec = split();
if (DEBUG) {
for(int i = 0; i < level; i++) System.err.print(" ");
System.err.println("split of " + opcode + "[" + rec.argument + "] for " + encoding[rec.test]);
}
if (testsPerformed[rec.argument][rec.test]) {
throw new Error("repeated split of " + opcode +
"[" + rec.argument + "] for " +
encoding[rec.test] + "\n" + this);
}
testsPerformed[rec.argument][rec.test] = true;
EmitterSet[] splits = makeSplit(rec);
emitTab(level); emit("if (");
emitTest(rec.argument, rec.test);
emit(") {\n");
splits[0].emitSet(opcode, testsPerformed, level+1);
emit("\n"); emitTab(level); emit("} else {\n");
splits[1].emitSet(opcode, testsPerformed, level+1);
emitTab(level); emit("}\n");
testsPerformed[rec.argument][rec.test] = false;
}
}
}
/**
* the Class object of the VM_Assembler. This is used for
* reflective inquiries about emit methods.
*
* @see #main
*/
static Class<?> lowLevelAsm;
/**
* Computes the set of emit methods in the VM_Assembler for a
* given IA32 opcode.
*
* @param emitters the set of all emit methods
* @param opcode the opcode being examined
*/
private static EmitterSet
buildSetForOpcode(Method[] emitters, String opcode) {
EmitterSet s = new EmitterSet();
for(int i = 0; i < emitters.length; i++) {
Method m = emitters[i];
if (m.getName().startsWith("emit" + opcode + "_") ||
m.getName().equals("emit" + opcode)) {
s.add(new EmitterDescriptor(m.getName()));
}
}
return s;
}
/**
* the set of IA32 opcodes to ignore. Some opcode are not used by
* the opt compiler (NOP is a good example) but may be present in
* the VM_Assembler if other compilers use them. We keep an
* explicit list of such opcodes to ignore.
*/
private static Set<String> excludedOpcodes;
/**
* Initialize the set of opcodes to ignore
*
* @see #excludedOpcodes
*/
static {
excludedOpcodes = new HashSet<String>();
excludedOpcodes.add("FSAVE");
excludedOpcodes.add("FNSTSW");
excludedOpcodes.add("FUCOMPP");
excludedOpcodes.add("SAHF");
excludedOpcodes.add("NOP");
excludedOpcodes.add("ENTER");
excludedOpcodes.add("JMP");
excludedOpcodes.add("JCC");
}
/**
* Compute the set of all IA32 opcodes that have emit methods in
* the VM_Assembler. This method uses the stylized form of all
* emit method names in the VM_Assembler to extract the opcode of
* each one. It returns a set of all such distinct names, as a
* set of Strings.
*
* @param emitters the set of all emit methods in the VM_Assembler
* @return the set of all opcodes handled by the VM_Assembler
*/
private static Set<String> getOpcodes(Method[] emitters) {
Set<String> s = new HashSet<String>();
for(int i = 0; i < emitters.length; i++) {
String name = emitters[i].getName();
if (DEBUG) System.out.println(name);
if (name.startsWith("emit")) {
int posOf_ = name.indexOf('_');
if (posOf_ != -1) {
String opcode = name.substring(4, posOf_);
if (!excludedOpcodes.contains(opcode)) {
s.add(opcode);
}
} else {
String opcode = name.substring(4);
// make sure it is an opcode
if (opcode.equals(opcode.toUpperCase(Locale.getDefault()))) {
if (!excludedOpcodes.contains(opcode)) {
s.add(opcode);
}
}
}
}
}
return s;
}
/**
* returns a list of all IA32_ opt compiler operators that do not
* correspond to real IA32 opcodes handled by the assembler.
* These are all supposed to have been removed by the time the
* assembler is called, so the assembler actually seeing such an
* opcode is an internal compiler error. This set is used during
* generating of error checking code.
*
* @param emittedOpcodes the set of IA32 opcodes the assembler
* understands.
* @return the set of IA32 opt operators that the assembler does
* not understand.
*/
private static Set<String> getErrorOpcodes(Set<String> emittedOpcodes) {
Iterator<String> e = OPT_OperatorFormatTables.getOpcodes();
Set<String> errorOpcodes = new HashSet<String>();
while (e.hasNext()) {
String opcode = (String) e.next();
if (! emittedOpcodes.contains(opcode))
errorOpcodes.add(opcode);
}
return errorOpcodes;
}
/**
* Given an IA32 opcode, return the set of opt compiler IA32_
* operators that translate to it. There is, by and large, a
* one-to-one mapping in each each IA332_ opt operator represents
* an IA32 opcde, so this method might seem useless. However,
* there are some special cases, notably for operand size. In
* this case, an opt operator of the form ADD__B would mean use the
* ADD IA32 opcode with a byte operand size.
*/
private static Set<String> getMatchingOperators(String lowLevelOpcode) {
Iterator<String> e = OPT_OperatorFormatTables.getOpcodes();
Set<String> matchingOperators = new HashSet<String>();
while (e.hasNext()) {
String o = (String) e.next();
if (o.equals(lowLevelOpcode) || o.startsWith(lowLevelOpcode+"__"))
matchingOperators.add(o);
}
return matchingOperators;
}
/**
* Generate an assembler for the opt compiler
*/
public static void main(String[] args) {
try {
out = new FileWriter(System.getProperty("generateToDir") + "/OPT_Assembler.java");
} catch (IOException e) {
throw new Error(e);
}
try {
lowLevelAsm = Class.forName("org.jikesrvm.compilers.common.assembler.ia32.VM_Assembler");
} catch (ClassNotFoundException e) {
throw new Error(e);
}
emit("package org.jikesrvm.compilers.opt.ia32;\n\n");
emit("import org.jikesrvm.*;\n\n");
emit("import org.jikesrvm.compilers.opt.*;\n\n");
emit("import org.jikesrvm.compilers.opt.ir.*;\n\n");
emit("\n\n");
emit("/**\n");
emit(" * This class is the automatically-generated assembler for\n");
emit(" * the optimizing compiler. It consists of methods that\n");
emit(" * understand the possible operand combinations of each\n");
emit(" * instruction type, and how to translate those operands to\n");
emit(" * calls to the VM_Assember low-level emit method\n");
emit(" *\n");
emit(" * It is generated by GenerateAssembler.java\n");
emit(" *\n");
emit(" */\n");
emit("public abstract class OPT_Assembler extends OPT_AssemblerBase {\n\n");
emitTab(1);emit("/**\n");
emitTab(1);emit(" * @see VM_Assembler\n");
emitTab(1);emit(" */\n");
emitTab(1); emit("public OPT_Assembler(int bcSize, boolean print, OPT_IR ir) {\n");
emitTab(2); emit("super(bcSize, print, ir);\n");
emitTab(1); emit("}");
emit("\n\n");
Method[] emitters = lowLevelAsm.getDeclaredMethods();
Set<String> opcodes = getOpcodes(emitters);
Iterator<String> i = opcodes.iterator();
while (i.hasNext()) {
String opcode = (String) i.next();
setCurrentOpcode(opcode);
emitTab(1);emit("/**\n");
emitTab(1);emit(" * Emit the given instruction, assuming that\n");
emitTab(1);emit(" * it is a " + currentFormat + " instruction\n");
emitTab(1);emit(" * and has a " + currentOpcode + " operator\n");
emitTab(1);emit(" *\n");
emitTab(1);emit(" * @param inst the instruction to assemble\n");
emitTab(1);emit(" */\n");
emitTab(1);
emit("private void do" + opcode + "(OPT_Instruction inst) {\n");
EmitterSet emitter = buildSetForOpcode(emitters, opcode);
boolean[][] tp = new boolean[4][ encoding.length ];
emitter.emitSet(opcode, tp, 2);
emitTab(1);
emit("}\n\n");
}
emitTab(1);emit("/**\n");
emitTab(1);emit(" * The number of instructions emitted so far\n");
emitTab(1);emit(" */\n");
emitTab(1); emit("private int instructionCount = 0;\n\n");
emitTab(1);emit("/**\n");
emitTab(1);emit(" * Assemble the given instruction\n");
emitTab(1);emit(" *\n");
emitTab(1);emit(" * @param inst the instruction to assemble\n");
emitTab(1);emit(" */\n");
emitTab(1); emit("public void doInst(OPT_Instruction inst) {\n");
emitTab(2); emit("resolveForwardReferences(++instructionCount);\n");
emitTab(2); emit("switch (inst.getOpcode()) {\n");
Set<String> emittedOpcodes = new HashSet<String>();
i = opcodes.iterator();
while (i.hasNext()) {
String opcode = i.next();
Iterator<String> operators = getMatchingOperators(opcode).iterator();
while (operators.hasNext()) {
String operator = operators.next();
emitTab(3);
emittedOpcodes.add(operator);
emit("case IA32_" + operator + "_opcode:\n");
}
emitTab(4); emit("do" + opcode + "(inst);\n");
emitTab(4); emit("break;\n");
}
// Special case because doJCC is handwritten to add
// logic for short-forward branches
emittedOpcodes.add("JCC");
emitTab(3); emit("case IA32_JCC_opcode:\n");
emitTab(4); emit("doJCC(inst);\n");
emitTab(4); emit("break;\n");
// Special case because doJMP is handwritten to add
// logic for short-forward branches
emittedOpcodes.add("JMP");
emitTab(3); emit("case IA32_JMP_opcode:\n");
emitTab(4); emit("doJMP(inst);\n");
emitTab(4); emit("break;\n");
// Kludge for IA32_LOCK which needs to call emitLockNextInstruction
emittedOpcodes.add("LOCK");
emitTab(3); emit("case IA32_LOCK_opcode:\n");
emitTab(4); emit("emitLockNextInstruction();\n");
emitTab(4); emit("break;\n");
// Kludge for PATCH_POINT
emitTab(3); emit("case IG_PATCH_POINT_opcode:\n");
emitTab(4); emit("emitPatchPoint();\n");
emitTab(4); emit("break;\n");
Set<String> errorOpcodes = getErrorOpcodes(emittedOpcodes);
if (! errorOpcodes.isEmpty()) {
i = errorOpcodes.iterator();
while (i.hasNext()) {
emitTab(3);
emit("case IA32_" + i.next() + "_opcode:\n");
}
emitTab(4); emit("throw new OPT_OptimizingCompilerException(inst + \" has unimplemented IA32 opcode (check excludedOpcodes)\");\n");
}
emitTab(2); emit("}\n");
emitTab(2); emit("inst.setmcOffset( mi );\n");
emitTab(1); emit("}\n\n");
emit("\n}\n");
try {
out.close();
} catch (IOException e) {
throw new Error(e);
}
}
}