GenerateAssembler.java example

Explorer
JikesRVM-master
/*
 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
 *
 *  This file is licensed to You under the Eclipse Public License (EPL);
 *  You may not use this file except in compliance with the License. You
 *  may obtain a copy of the License at
 *
 *      http://www.opensource.org/licenses/eclipse-1.0.php
 *
 *  See the COPYRIGHT.txt file distributed with this work for information
 *  regarding copyright ownership.
 */
import java.io.*;
import java.util.*;
import java.lang.reflect.*;

/**
 * <P>
 * Generates the assembler that is used by the optimizing compiler, using a
 * combination of the tables describing the low-level instruction formats and
 * operators used by the opt compiler, and the interface of the low-level
 * assembler that understands how to generate IA32 opcodes given specific
 * operands. Essentially, the opt assembler becomes a rather large piece of
 * impedence-matching code that decodes the Instructions and Operators
 * understood by the opt compiler to determine what is the appropriate IA32
 * machine code to emit.
 * </P>
 *
 * <P>
 * In order for this to work, both the optimizing compiler tables and the
 * Assembler must use stylized formats. On the optimizing com[piler side, the
 * major stylization is that the low-level operators that represent assembly
 * code must correspond directly to the official IA32 assembler pneumonics; i.e.
 * since there is an ADD assembler pneumonic in the Intel assembly
 * specification, there must be a correponding IA32_ADD operator in the opt
 * compiler tables. The stylization of the Assembler side is more
 * thoroughgoing, and the reader is referred to the Assembler header comments
 * for a definition.
 * </P>
 *
 * <P>
 * Given these stylizations, GenerateAssembler reads the set of assembler
 * pneumonics supported by the Assembler using reflection to examinme its
 * stylized method signitures. GenerateAssembler also reads the set of IA32
 * operators that the opt compiler defines, using the helper classes
 * InstructionFormatTable and OperatorFormatTable. It then, for each
 * operator, generates a handler method to call the appropriate Assembler
 * emit method given an Instruction. The Assembler will have a family of
 * emit methods named for each opcode, each such emit method takes a specific
 * set of operand addressing modes and sizes. The handler methods that the
 * GenerateAssembler emits examine the operands to an Instruction, and
 * determine which Assembler method to call for the operand addressing modes
 * and sizes that it finds. GenerateAssembler also generates a top-level
 * dispatch method that examines the operator and calls the appropriate handler.
 * </P>
 *
 * <P>
 * GenerateAssembler generates the opt assembler as part of the normal build
 * process; this poses a slight problem in that it needs to examine the
 * Assembler via reflection to generate the Assembler, but that is not
 * possible until the VM sources (including, of course, the Assembler) have
 * been compiled. The current hack to get around this is to compile the
 * Assembler in advance, and read the resulting class file. This utilizies
 * some supporting files to make the Assembler compile in isolation. This is
 * the purpose of the .fake files in the optimizing compiler's assembler
 * directory.
 * </P>
 *
 * <P>
 * Since this is a freestanding program, use the regular Java exit code
 * conventions.
 * </P>
 *
 * @see InstructionFormatTables
 * @see OperatorFormatTables
 * @see org.jikesrvm.compilers.opt.mir2mc.AssemblerBase
 * @see org.jikesrvm.compilers.opt.ir.Instruction
 * @see org.jikesrvm.compilers.opt.Assembler
 * @see Assembler
 */
public class GenerateAssembler {

  /** Global flag controlling printing of debugging information */
  static final boolean DEBUG = false;

  /** Global reference to the assembler being generated */
  static FileWriter out;

  /**
   * Write a single string to the assembler source file.
   *
   * @param String s The string to be written
   */
  private static void emit(String s) {
    try {
      out.write(s, 0, s.length());
    } catch (IOException e) {
      throw new Error(e);
    }
  }

  /**
   * Write tabification to the assembler source file. This is used to make the
   * generates source more readable by identing it.
   *
   * @param int level The level of indentation to generate
   */
  private static void emitTab(int level) {
    for (int i = 0; i < level; i++)
      emit("  ");
  }

  /**
   * Global reference to the InstructionFormatTables class that contains
   * descriptions of each optimizing compiler instruction format that sis
   * visible to the assembler (i.e. the MIR_* instruction formats.
   *
   * @see InstructionFormatTables
   */
  private static Class<InstructionFormatTables> formats = InstructionFormatTables.class;

  /**
   * Global reference to the opcode argument table for the current opcode being
   * processed. This table is null unless some of the operands in the
   * Instruction are to ignored when generating code for the opcode.
   * Ignoring arguments is an ad-hock special case that is controlled by the
   * global opcodeArgTable.
   */
  static int[] currentOpcodeArgTable;

  /**
   * Global reference to the table of symbolic names of the arguments to the
   * current MIR_ instruction format. This information is read from the
   * InstructionFormatTables
   */
  static String[] currentOpcodeSymbolicNames;

  /**
   * The current IA32 opcode being processed. This is the name of IA32
   * instruction. Typically, it is the name of the opt compiler IA32_* opcode as
   * well, but there are exceptions in that multiple IA32_* opcodes can map to
   * the same IA32 instruction
   */
  static String currentOpcode;

  /**
   * The instruction format for the IA32_* opt compiler opcode(s) being
   * processed.
   */
  static String currentFormat;

  /**
   * Global table mapping opt compiler IA32_* opcodes to arrays listing the set
   * of Instruction operands that are to be used as arguments to the IA32
   * architecture instruction. This is used when an instruction has extra
   * operands that are not used in assembly. The array is indexed by the desired
   * argument for the instruction/Assembler method, the value in the array
   * states which operand of the intruction contains the operand for the
   * instruction. For example, an array of "new int{2}" means the instruction
   * has 1 operand and it is read from the 2 operand of the instruction.
   */
  static final Hashtable<String, int[]> opcodeArgTables;

  /**
   * Initialize the opcodeArgTables table
   */
  static {
    opcodeArgTables = new Hashtable<String, int[]>();
    opcodeArgTables.put("CALL", new int[] { 2 });
    opcodeArgTables.put("INT", new int[] { 1 });
    opcodeArgTables.put("CDQ", new int[] { 0 });
    opcodeArgTables.put("CDO", new int[] { 0 });
    opcodeArgTables.put("CDQE", new int[] { 0 });
    opcodeArgTables.put("DIV", new int[] { 1, 2 });
    opcodeArgTables.put("IDIV", new int[] { 1, 2 });
    opcodeArgTables.put("MUL", new int[] { 1, 2 });
    opcodeArgTables.put("IMUL1", new int[] { 1, 2 });
    opcodeArgTables.put("DIV", new int[] { 1, 2 });
    opcodeArgTables.put("IDIV", new int[] { 1, 2 });
    opcodeArgTables.put("SET", new int[] { 1, 0 });
    opcodeArgTables.put("CMPXCHG", new int[] { 1, 2 });
    opcodeArgTables.put("CMPXCHG8B", new int[] { 2 });
    opcodeArgTables.put("FCMOV", new int[] { 2, 0, 1 });
    opcodeArgTables.put("CMOV", new int[] { 2, 0, 1 });
  }

  /**
   * Set the current opcode. This sets four global fields: the currentOpcode,
   * the currentOpcodeArgTable, the currentFormat and the
   * currentOpcodeSymbolicNames.
   *
   * @param opcode The IA32 architecture opcode to make the current opcode
   */
  static void setCurrentOpcode(String opcode) {
    try {
      currentOpcode = opcode;
      currentOpcodeArgTable = (int[]) opcodeArgTables.get(opcode);
      currentFormat = OperatorFormatTables.getFormat(opcode);
      Field f = formats.getDeclaredField(currentFormat + "ParameterNames");
      currentOpcodeSymbolicNames = (String[]) f.get(null);
    } catch (Throwable e) {
      throw new Error("Cannot handle Assembler opcode " + opcode, e);
    }
  }

  enum ArgumentType {
    /**
     * Constant representing immediate arguments to Assembler calls
     */
    Immediate("Imm"),
    /**
     * Constant representing register arguments to Assembler calls. This
     * covers the cases when a general purpose register is encoded into the
     * mod/rm byte; the Assembler handles the detais of generating either the
     * reg bits of the mod/rm byte or encoding a register as mod 11.
     */
    GPRegister("Reg", "GPR_Reg"), FPRegister("Reg", "FPR_Reg"), MMRegister(
        "Reg", "MM_Reg"), XMMRegister("Reg", "XMM_Reg"),
    /**
     * Constant representing condition arguments to Assembler calls. Such
     * operands are not arguments to the ultimate IA32 machine code instruction,
     * but they are used to calculate the opcode that is generated.
     */
    Condition("Cond"),
    /**
     * Constant representing arguments to Assembler calls that use the
     * scaled-index-base (SIB) addressing mode in the special way that uses
     * neither a base not an index to generate an absolute address
     */
    Absolute("Abs"),
    /**
     * Constant representing IA32 memory operands that use register-
     * displacement addressing mode (usually mod bits 01 and 10) arguments to
     * Assembler calls. The Assembler takes care of choosing the right
     * mode for the size of the displacement, so this one mode covers two of the
     * four addressing modes the IA32 has. The Assembler also handles the
     * special cases in which this mode requires weird SIB bytes.
     */
    RegisterDisplacement(2, "RegDisp"),
    /**
     * Constant representing arguments to Assembler calls that use the
     * scaled-index-base (SIB) addressing mode in the special way that does not
     * use a base register. The Assembler simply assumes it has an [index < <
     * scale + disp] addressing mode, and the Assembler takes care of
     * generating the special mod/rm that causes the base register to be
     * ignored.
     */
    RegisterOffset(3, "RegOff"),
    /**
     * Constant representing scaled-index-base (SIB) mode arguments to
     * Assembler calls.
     */
    RegisterIndexed(4, "RegIdx"),
    /**
     * Constant representing register-indirect arguments to Assembler calls.
     * This mode handles what is (usually) mod 00 in the mod/rm byte.
     */
    RegisterIndirect("RegInd"),
    /**
     * Constant representing labels used as branch targets. While code is being
     * generated, the machine code offset for a forward branch cannot, in
     * general, be computed as the target code has not been generated yet. The
     * Assembler uses synthetic code offsets, based upon the order of
     * Instructions in the code being compiled, to communicate forward
     * branch targets to the Assembler. These synthetic offsets are passed to
     * the Assembler where it expected Label arguments.
     */
    Label("Label"),
    /**
     * Constant representing arguments to Assembler calls in which it may be
     * either a backward branch target (resolved to an immediate being the exact
     * branch displacement) or a forward branch (which will be a synthetic
     * Label).
     */
    LabelOrImmediate("ImmOrLabel"),

    /**
     * Constant representing instructions that operate upon bytes
     */
    Byte("Byte", 1),
    /**
     * Constant representing instructions that operate upon words (16 bits)
     */
    Word("Word", 2),
    /**
     * Constant representing instructions that operate upon quad words (64 bits)
     */
    Quad("Quad", 8);

    private final int size;
    private final int parameters;
    private final String assemblerName;
    private final String optName;

    /**
     * Regular constructor - not a size argument, 1 parameter passed to the
     * assembler, opt and assembler names match
     *
     * @param name string used as part of assembler emit method name or for is
     *            and get methods in AssemblerBase
     */
    ArgumentType(String name) {
      this.assemblerName = name;
      this.optName = name;
      this.size = -1;
      this.parameters = 1;
    }

    /**
     * As with regular constructor except assembler and opt names don't match to
     * support typing of assembler registers
     *
     * @param asmName string used as part of assembler emit method name
     * @param optName string used for is and get methods in AssemblerBase
     */
    ArgumentType(String asmName, String optName) {
      this.assemblerName = asmName;
      this.optName = optName;
      this.size = -1;
      this.parameters = 1;
    }

    /**
     * As with regular constructor except more parameters are consumed by this
     * argument
     *
     * @param parameters number of parameters consumed by this kind of argument
     * @param name string used as part of assembler emit method name or for is
     *            and get methods in AssemblerBase
     */
    ArgumentType(int parameters, String name) {
      this.assemblerName = name;
      this.optName = name;
      this.size = -1;
      this.parameters = parameters;
    }

    /**
     * Size argument constructor - size as given, 0 parameters passed to the
     * assembler, opt and assembler names match
     *
     * @param name string used as part of assembler emit method name or for is
     *            and get methods in AssemblerBase
     * @param size number of bytes encoded by this size
     */
    ArgumentType(String name, int size) {
      this.assemblerName = name;
      this.optName = name;
      this.size = size;
      this.parameters = 0;
    }

    /**
     * @return does this argument type encode a size information for the
     *         instruction
     */
    boolean isSize() {
      return size != -1;
    }

    /**
     * @return size encoded by argument or -1
     */
    int getSize() {
      return size;
    }

    /** @return number of parameters used for this argument type */
    int getParameters() {
      return parameters;
    }

    /** @return the name used for accessing the opt operand for this type */
    String getOptName() {
      return optName;
    }

    /** @return the name used for accessing the opt operand for this type */
    String getAssemblerName() {
      return assemblerName;
    }
  }

  /**
   * For a given string representing a valid operand encoding for the
   * Assembler, return the corresponding Assembler constant. This
   * function only looks for encodings of operand types, and will not accept
   * strings that correspond to size encodings.
   *
   * @param str A valid Assembler encoding of operand type
   * @return The Assembler constant corresponding to str, or -1 if none
   */
  private static ArgumentType getEncoding(String str, Class<?> type) {
    if (str.equals("Reg")) {
      if (type == null) {
        throw new Error("Unable to encode Reg with no type information");
      }
      String typeName = type.getName();
      str = typeName.substring(typeName.lastIndexOf('$') + 1) + "_Reg";
    }
    for (ArgumentType arg : ArgumentType.values()) {
      if (arg.getOptName().equals(str)) {
        return arg;
      }
    }
    throw new Error("Unable to encode the argument " + str + " of type " + type +
        " as a valid argument type");
  }

  /**
   * For a given operand number, return a string which is a valid Java
   * expression for reading that operand out of the current instruction. This
   * function uses the currentOpcodeSymbolicNames table to determine the
   * appropriate accessor (e.g. getValue if the current name is Value), and it
   * uses the currentOpcodeArgTable (in cases where it has an entry for the kind
   * of instruction being processed) to determine which operand in
   * Instruction corresponds to operand sought.
   *
   * @param op The operand number sought.
   * @return A Java expression for accessing the requested operand.
   */
  private static String getOperand(int op) {
    try {
      if (currentOpcodeArgTable == null)
        return currentFormat + ".get" + currentOpcodeSymbolicNames[op] + "(inst)";
      else
        return currentFormat + ".get" +
            currentOpcodeSymbolicNames[currentOpcodeArgTable[op]] + "(inst)";
    } catch (ArrayIndexOutOfBoundsException e) {
      String error = currentOpcode + ": cannot access operand " + op + ":";
      for (int i = 0; i < currentOpcodeSymbolicNames.length; i++)
        error += currentOpcodeSymbolicNames[i];
      throw new Error(error, e);
    }
  }

  /**
   * Given an operand number and an encoding, generate a test to determine
   * whether the given operand matches the encoding. That is, generate code to
   * the Assembler that examines a given operand of the current
   * Instruction, and determines whether it is of the type encoded by the
   * given encoding. This is used to generate the if statements of the dispatch
   * functions for each opt compiler opcode.
   *
   * @param argNumber The argument to examine
   * @param argEncoding The encoding for which to check
   */
  private static void emitTest(int argNumber, ArgumentType argEncoding) {
    if (argEncoding.isSize())
      emit("is" + argEncoding.getOptName() + "(inst)");
    else
      emit("is" + argEncoding.getOptName() + "(" + getOperand(argNumber) + ")");
  }

  /**
   * Generate code to verify that a given operand matches a given encoding.
   * Since the IA32 architecture is not exactly orthogonal (please note the
   * charitable understatement), there are cases when the opt assembler can
   * determine the Assembler emitter to call without looking at all (or, in
   * some cases, any) of the arguments of the Instruction. An example is the
   * ENTER instruction that only takes one immediate parameter, so the opt
   * assembler could simply call that Assembler emiiter without checking that
   * argument is really an immediate. In such cases, the opt assembler generates
   * guarded tests that verify that Instruction operand actually matches the
   * required encoding. This function emits such tests to the assembler being
   * generated.
   *
   * @param argNumber The argument to examine
   * @param argEncoding The encoding for which to check
   * @param level current level for generating pretty, tabified output
   */
  private static void emitVerify(int argNumber, ArgumentType argEncoding,
      int level) {
    emitTab(level);
    emit("if (VM.VerifyAssertions && !");
    emitTest(argNumber, argEncoding);
    emit(") throw new OptimizingCompilerException(\"Unexpected operand \" + inst.toString());\n");
  }

  /**
   * Generate code to fetch all the arguments needed for a given operand number
   * and encoding. The different argument encodings of the Assembler need
   * different arguments to be passed to the emitter function. For instance, a
   * register-displacement mode operand needs to be given a base register and an
   * immediate displacement. This function generates the appropriate arguments
   * given the operand number and encoding; that is, it generates reads of the
   * appropriate Instruction argument and fetches of the appropriate pieces
   * of information from the operand.
   *
   * @param argNumber The argument being generated.
   * @param argEcoding The encoding to use.
   */
  private static void emitArgs(int argNumber, ArgumentType argEncoding) {
    String op = getOperand(argNumber);
    switch (argEncoding) {
    case LabelOrImmediate:
      emit("getImm(" + op + "), getLabel(" + op + ")");
      break;
    case RegisterDisplacement:
      emit("getBase(" + op + "), getDisp(" + op + ")");
      break;
    case Absolute:
      emit("getDisp(" + op + ").toWord().toAddress()");
      break;
    case RegisterOffset:
      emit("getIndex(" + op + "), getScale(" + op + "), getDisp(" + op + ")");
      break;
    case RegisterIndexed:
      emit("getBase(" + op + "), getIndex(" + op + "), getScale(" + op + "), getDisp(" + op + ")");
      break;
    case RegisterIndirect:
      emit("getBase(" + op + ")");
      break;
    default:
      emit("get" + argEncoding.getOptName() + "(" + op + ")");
    }
  }

  /**
   * An EmitterDescriptor represents a single emit method from the Assembler:
   * it explicitly represents the types of operands the method expects, their
   * number, and the size of the data it uses. When GenerateAssembler encounters
   * an emit* method from the Assembler, it creates an EmitterDescriptor for
   * it. Based upon the stlyized form the method name is required to have, the
   * EmitterDexcriptor represents information about its arguments. This
   * information is stored in terms of the GenerateAssembler constants that
   * represent operand type and size.
   * <P>
   * The EmitterDescriptor class encapsulates the logic for parsing the stylized
   * emit* method names that the Assembler has, and turning them into the
   * explicit representation that GenerateAssembler uses. If parsing a name
   * fails, a {@link GenerateAssembler.BadEmitMethod} runtime exception is
   * thrown and assembler generation is aborted.
   * <P>
   * <HR>
   * <EM>See the descriptions of the GenerateAssembler constants:</EM>
   * <DL>
   * <DT> <EM>Operand types</EM> <DI>
   * <UL>
   * <LI> {@link #Immediate}
   * <LI> {@link #Label}
   * <LI> {@link #LabelOrImmediate}
   * <LI> {@link #Absolute}
   * <LI> {@link #Register}
   * <LI> {@link #RegisterIndirect}
   * <LI> {@link #RegisterOffset}
   * <LI> {@link #RegisterIndexed}
   * </UL>
   * <DT> <EM>Data size</EM>
   * <UL>
   * <LI> {@link #Byte}
   * <LI> {@link #Word}
   * <LI> {@link #Quad}
   * </UL>
   * </DL>
   */
  static class EmitterDescriptor {
    private final ArgumentType size;
    private final int count;
    private final ArgumentType[] args;

    /**
     * Create an EmitterDescriptor for the given methodName. This conmstructor
     * creates a descriptor that represents explicitly the types and size of the
     * operands of the given emit* method. This constructor encapsulate the
     * logic to parse the given method name into the appropriate explicit
     * representation.
     */
    EmitterDescriptor(String methodName, Class<?>[] argTypes) {
      StringTokenizer toks = new StringTokenizer(methodName, "_");
      toks.nextElement(); // first element is emitXXX;
      args = new ArgumentType[toks.countTokens()];
      ArgumentType size = null;
      int count = 0;
      int argTypeNum = 0;
      for (int i = 0; i < args.length; i++) {
        String cs = toks.nextToken();
        ArgumentType code;
        if (argTypeNum < argTypes.length) {
          code = getEncoding(cs, argTypes[argTypeNum]);
        } else {
          code = getEncoding(cs, null);
        }
        argTypeNum += code.getParameters();
        if (DEBUG) {
          System.err.println(methodName + "[" + i + "] is " + code + " for " + cs);
        }

        args[count] = code;
        count++;
        if (code.isSize()) {
          size = code;
          count--;
        }
      }
      this.size = size;
      this.count = count;
    }

    /**
     * This method checks whether the emit* method represented by this
     * EmitterDescriptor expects the argument type represented by enc as its
     * argument'th operand. If enc is an operand type encoding, this method
     * checks wether the given argument is of the appropriate type. If enc is an
     * operand size encoding, the argument parameter is ignored, and this method
     * checks whether the emit* method represented operates upon data of the
     * desired size.
     * <P>
     * <EM>See the descriptions of the GenerateAssembler constants:</EM>
     * <DL>
     * <DT> <EM>Operand types</EM> <DI>
     * <UL>
     * <LI> {@link #Immediate}
     * <LI> {@link #Label}
     * <LI> {@link #LabelOrImmediate}
     * <LI> {@link #Absolute}
     * <LI> {@link #Register}
     * <LI> {@link #RegisterIndirect}
     * <LI> {@link #RegisterOffset}
     * <LI> {@link #RegisterIndexed}
     * </UL>
     * <DT> <EM>Data size</EM>
     * <UL>
     * <LI> {@link #Byte}
     * <LI> {@link #Word}
     * <LI> {@link #Quad}
     * </UL>
     * </DL>
     * <P>
     *
     * @param argument The operand number examined
     * @param enc The argument type queried, as encoded as one of the operand
     *            type constants used throughout GenerateAssembler.
     *
     * @return True if this method expects an argument type encoded by enc as
     *         its argument'th operand, and false otherwise.
     */
    boolean argMatchesEncoding(int argument, ArgumentType enc) {
      if (!enc.isSize())
        return (count > argument) && args[argument] == enc;
      else
        return size == enc;
    }

    /**
     * Access the array that stores the encodings of the arguments to the emit
     * method represented by this EmitterDescriptor.
     *
     * @return the array of argument encodings
     */
    ArgumentType[] getArgs() {
      return args;
    }

    /**
     * Access the data size operated upon by emit method represented by this
     * EmitterDescriptor.
     *
     * @return data size for this descriptor
     */
    ArgumentType getSize() {
      return size;
    }

    /**
     * Access the number of operands operated upon by emit method represented by
     * this EmitterDescriptor.
     *
     * @return number of operands for this descriptor
     */
    int getCount() {
      return count;
    }

    public String toString() {
      StringBuffer s = new StringBuffer();
      s.append("ed:");
      for (int i = 0; i < count; i++)
        s.append(" " + args[i]);
      if (size != null)
        s.append(" (" + size + ")");
      return s.toString();
    }
  }

  /**
   * An EmitterSet represents a set of emit methods from the Assembler for
   * the same IA32 assembler opcode. These sets are used when generating the do<opcode>
   * method for a given IA32 opcde: first an EmitterSet of all the Assembler
   * emit methods for that opcode is built, and then the do method is
   * recursively generated by emitting operand type and size tests that
   * partition the set of emitters into two smaller sets. This continues until
   * the set is a singleton
   */
  static class EmitterSet {

    /**
     * The Assembler emit methods that this set represents. This is a set of
     * EmitterDescriptor objects.
     */
    private final Set<EmitterDescriptor> emitters = new HashSet<EmitterDescriptor>();

    /**
     * Print this EmitterSet readably.
     *
     * @return a string describing this EmitterSet
     */
    public String toString() {
      StringBuffer s = new StringBuffer();
      s.append("Emitter Set of:\n");
      Iterator<EmitterDescriptor> i = emitters.iterator();
      while (i.hasNext())
        s.append(i.next().toString() + "\n");

      s.append("-------------\n");
      return s.toString();
    }

    /**
     * Test whethe rthis EmitterSet as exactly one element.
     *
     * @return true if this EmitterSet as exactly one element.
     */
    boolean isSingleton() {
      return (emitters.size() == 1);
    }

    /**
     * Insert an EmitterDescriptor into this set
     *
     * @param ed the EmitterDescriptor to insert
     */
    void add(EmitterDescriptor ed) {
      emitters.add(ed);
    }

    /**
     * Count how many of the emit represented by this set match a given operand
     * type and size encoding. This method is used (via getEncodingSplit) while
     * recursively partitioning a given EmitterSet to determine how evenly (or
     * even whether) a given operand type and size splits this set.
     *
     * @see #getEncodingSplit
     *
     * @param n the operand being examined
     * @param code the operand type or size code being considered
     * @return the number of emit methods of which the specified operand type
     *         matches the specified one.
     */
    private int countEncoding(int n, ArgumentType code) {
      Iterator<EmitterDescriptor> i = emitters.iterator();
      int count = 0;
      while (i.hasNext())
        if (((EmitterDescriptor) i.next()).argMatchesEncoding(n, code))
          count++;
      return count;
    }

    /**
     * Return the difference between the number of emit methods in this set that
     * match a given operand type and size for a given operand, and the number
     * of those that do not. This method is used while recursively partitioning
     * a given EmitterSet to determine how evenly (or even whether) a given
     * operand type and size splits this set.
     *
     * @param n the operand being examined
     * @param code the operand type or size code being considered
     * @return the different between matching and non-matching emit method in
     *         this set.
     */
    private int getEncodingSplit(int n, ArgumentType code) {
      int count = countEncoding(n, code);
      return Math.abs((emitters.size() - count) - count);
    }

    /**
     * This class is used just to communicate the two results of searching for
     * the best split for a given set: the chosen operand type or size, and the
     * chosen operand nummber. This class is basically to avoid writing the slew
     * of required type casts that a generic pair would need given Java's
     * primitive type system.
     *
     * @see #makeSplit
     * @see #split
     */
    static class SplitRecord {
      /**
       * The operand number to be split.
       */
      final int argument;

      /**
       * The operand type or size test on which to split.
       */
      final ArgumentType test;

      /**
       * Make s split record to communicate the results of searching for the
       * best operand to split.
       *
       * argument The operand number to be split. test The operand type or size
       * test on which to split.
       */
      SplitRecord(int argument, ArgumentType test) {
        this.argument = argument;
        this.test = test;
      }
    }

    /**
     * This method uses a SplitRecord as the criertion to partition the given
     * EmitterSet into two subsets.
     *
     * @param split the plit record dicatating how to split
     */
    private EmitterSet[] makeSplit(SplitRecord split) {
      int arg = split.argument;
      ArgumentType test = split.test;
      EmitterSet yes = new EmitterSet();
      EmitterSet no = new EmitterSet();
      Iterator<EmitterDescriptor> i = emitters.iterator();
      while (i.hasNext()) {
        EmitterDescriptor ed = (EmitterDescriptor) i.next();
        if (ed.argMatchesEncoding(arg, test)) {
          yes.add(ed);
        } else {
          no.add(ed);
        }
      }

      return new EmitterSet[] { yes, no };
    }

    /**
     * Find the best operand type or size and operand number to partition this
     * EmitterSet. This method searches across all possible ways of splitting
     * this set--all possible operand types and sizes, and all possible
     * operands--to determine which one splits the set most evenly.
     *
     * @return a SplitRecord representing the most-even split
     */
    SplitRecord split() {
      int splitArg = -1;
      ArgumentType splitTest = null;
      int splitDiff = 1000;
      for (int arg = 0; arg < 4; arg++) {
        for (ArgumentType test : ArgumentType.values()) {
          int c = getEncodingSplit(arg, test);
          if (c == 0)
            return new SplitRecord(arg, test);
          else if (c < splitDiff) {
            splitArg = arg;
            splitTest = test;
            splitDiff = c;
          }
        }
      }

      return new SplitRecord(splitArg, splitTest);
    }

    /**
     * Emit the Java code to call a particular emit method for a particular
     * opcode. This method takes representations of the opcode and operands of a
     * given emit method, and generates the appropriate Java source code to call
     * it. It synthesizes the encoded emit method name, and uses emitArgs to
     * pass all the required arguments.
     *
     * @see #emitArgs
     *
     * @param opcode the IA32 opcode of the emit method
     * @param args the encoding of each operand to the emit method
     * @param count the number of operands
     * @param level the level of tabbing for pretty output
     */
    private void emitEmitCall(String opcode, ArgumentType[] args, int count,
        int level, ArgumentType size) {
      if (DEBUG) {
        System.err.print("Emitting call for " + opcode + " with args: ");
        for (ArgumentType arg : args) {
          System.err.print(arg + " ");
        }
        System.err.println(" count=" + count + " level=" + level + " size=" + size);
      }
      emitTab(level);
      emit("emit" + opcode);
      for (int i = 0; i < count; i++)
        emit("_" + args[i].getAssemblerName());
      if (size != null)
        emit("_" + size.getAssemblerName());

      if (count == 0)
        emit("();\n");
      else {
        emit("(");
        for (int i = 0; i < count; i++) {
          emit("\n");
          emitTab(level + 1);
          emitArgs(i, args[i]);
          if (i == count - 1)
            emit(");\n");
          else
            emit(",");
        }
      }
    }

    /**
     * Write the Java code required for error checking and calling the emit
     * method represented by a singleton EmitterSet. A singleton EmiiterSet will
     * typically be the result of a series of splits of bigger sets, where the
     * splits represent emitted queries of operand types and sizes. (See
     * emitSet) However, there may be cases when some operand has only one
     * possible options, so the splitting will not have generated any tests for
     * it. In this case, we will emit assertions that guarantee the operand is
     * of the expected type. Note that the answers to queries alrrready
     * performed by splitting are known to be fine, so no additional error
     * checking is needed for cases they cover.
     *
     * @see #emitSet
     *
     * @param opcode the IA32 opcode to generate
     * @param testsPerformed the set of queries already performed by splitting.
     * @param level level of indentation for prett printing
     */
    private void emitSingleton(String opcode, boolean[][] testsPerformed,
        int level) {
      EmitterDescriptor ed = (EmitterDescriptor) emitters.iterator().next();

      ArgumentType[] args = ed.getArgs();
      int count = ed.getCount();
      for (int i = 0; i < count; i++)
        if (!testsPerformed[i][args[i].ordinal()])
          emitVerify(i, args[i], level);

      ArgumentType size = ed.getSize();
      if (size != null) {
        boolean needed = true;

        for (int i = 0; i < count; i++)
          if (testsPerformed[i][size.ordinal()])
            needed = false;

        if (needed)
          emitVerify(0, size, level);

        if (size == ArgumentType.Byte)
          for (int i = 0; i < count; i++)
            if (args[i] == ArgumentType.GPRegister)
              if (currentOpcode.indexOf("MOVZX") == -1 &&
                    currentOpcode.indexOf("MOVSX") == -1) {
                emitTab(level);
                emit("if (VM.VerifyAssertions) opt_assert(");
                emitArgs(i, ArgumentType.GPRegister);
                emit(".isValidAs8bitRegister());\n");
              }
      }

      emitEmitCall(opcode, args, count, level, ed.getSize());
    }

    /**
     * Emit Java code for deciding which emit method in the given set applies to
     * an Instruction, and then calling the apprpriate method. The method
     * essentially works by recursively parititioning the given set into two
     * smaller pieces until it finds a set with only one element. On each
     * partition, this method generates code for the appropriate operand type or
     * size query, and then calls itself recursively on the two sets resulting
     * from the partition.
     *
     * This method uses split to determine what test to apply, and emitSingleton
     * when it encounteres a singleton set.
     *
     * Note that the testsPerformed parameter is not needed to do the recursive
     * splitting; this is passed to emitSingleton to help it generate
     * appropriate error checking for operands.
     *
     * @see #split
     * @see #emitSingleton
     *
     * @param opcode the IA32 opcode being generated
     * @param testsPerformed the set of tests already performed
     * @param level the indentation level for pretty printing
     */
    private void emitSet(String opcode, boolean[][] testsPerformed, int level) {
      if (emitters.isEmpty()) {
        // do nothing
      } else if (isSingleton())
        emitSingleton(opcode, testsPerformed, level);
      else {
        SplitRecord rec = split();

        if (DEBUG) {
          for (int i = 0; i < level; i++)
            System.err.print("  ");
          System.err.println("split of " + opcode + "[" + rec.argument +
               "] for " + rec.test);
        }

        if (testsPerformed[rec.argument][rec.test.ordinal()]) {
          throw new Error("repeated split of " + opcode + "[" + rec.argument +
               "] for " + rec.test + "\n" + this);
        }

        testsPerformed[rec.argument][rec.test.ordinal()] = true;
        EmitterSet[] splits = makeSplit(rec);
        emitTab(level);
        emit("if (");
        emitTest(rec.argument, rec.test);
        emit(") {\n");
        splits[0].emitSet(opcode, testsPerformed, level + 1);
        emit("\n");
        emitTab(level);
        emit("} else {\n");
        splits[1].emitSet(opcode, testsPerformed, level + 1);
        emitTab(level);
        emit("}\n");
        testsPerformed[rec.argument][rec.test.ordinal()] = false;
      }
    }
  }

  /**
   * the Class object of the Assembler. This is used for reflective inquiries
   * about emit methods.
   *
   * @see #main
   */
  static final Class<org.jikesrvm.compilers.common.assembler.ia32.Assembler> lowLevelAsm = org.jikesrvm.compilers.common.assembler.ia32.Assembler.class;

  /**
   * Computes the set of emit methods in the Assembler for a given IA32
   * opcode.
   *
   * @param emitters the set of all emit methods
   * @param opcode the opcode being examined
   */
  private static EmitterSet buildSetForOpcode(Method[] emitters, String opcode) {
    EmitterSet s = new EmitterSet();
    for (int i = 0; i < emitters.length; i++) {
      Method m = emitters[i];
      if (m.getName().startsWith("emit" + opcode + "_") ||
           m.getName().equals("emit" + opcode)) {
        s.add(new EmitterDescriptor(m.getName(), m.getParameterTypes()));
      }
    }

    return s;
  }

  /**
   * the set of IA32 opcodes to ignore. Some opcode are not used by the opt
   * compiler (NOP is a good example) but may be present in the Assembler if
   * other compilers use them. We keep an explicit list of such opcodes to
   * ignore.
   */
  private static Set<String> excludedOpcodes;

  /**
   * Initialize the set of opcodes to ignore
   *
   * @see #excludedOpcodes
   */
  static {
    excludedOpcodes = new HashSet<String>();
    excludedOpcodes.add("FSAVE");
    excludedOpcodes.add("FNSTSW");
    excludedOpcodes.add("FUCOMPP");
    excludedOpcodes.add("SAHF");
    excludedOpcodes.add("NOP");
    excludedOpcodes.add("ENTER");
    excludedOpcodes.add("JMP");
    excludedOpcodes.add("JCC");
    excludedOpcodes.add("EMMS");
  }

  /**
   * Compute the set of all IA32 opcodes that have emit methods in the
   * Assembler. This method uses the stylized form of all emit method names
   * in the Assembler to extract the opcode of each one. It returns a set of
   * all such distinct names, as a set of Strings.
   *
   * @param emitters the set of all emit methods in the Assembler
   * @return the set of all opcodes handled by the Assembler
   */
  private static Set<String> getOpcodes(Method[] emitters) {
    Set<String> s = new HashSet<String>();
    for (int i = 0; i < emitters.length; i++) {
      String name = emitters[i].getName();
      if (DEBUG)
        System.err.println(name);
      if (name.startsWith("emit")) {
        int posOf_ = name.indexOf('_');
        if (posOf_ != -1) {
          String opcode = name.substring(4, posOf_);
          if (!excludedOpcodes.contains(opcode)) {
            s.add(opcode);
          }
        } else {
          String opcode = name.substring(4);
          // make sure it is an opcode
          if (opcode.equals(opcode.toUpperCase(Locale.getDefault()))) {
            if (!excludedOpcodes.contains(opcode)) {
              s.add(opcode);
            }
          }
        }
      }
    }

    return s;
  }

  /**
   * returns a list of all IA32_ opt compiler operators that do not correspond
   * to real IA32 opcodes handled by the assembler. These are all supposed to
   * have been removed by the time the assembler is called, so the assembler
   * actually seeing such an opcode is an internal compiler error. This set is
   * used during generating of error checking code.
   *
   * @param emittedOpcodes the set of IA32 opcodes the assembler understands.
   * @return the set of IA32 opt operators that the assembler does not
   *         understand.
   */
  private static Set<String> getErrorOpcodes(Set<String> emittedOpcodes) {
    Set<String> errorOpcodes = OperatorFormatTables.getCopyOfOpcodeSet();
    errorOpcodes.removeAll(emittedOpcodes);
    return errorOpcodes;
  }

  /**
   * Given an IA32 opcode, return the set of opt compiler IA32_ operators that
   * translate to it. There is, by and large, a one-to-one mapping in each each
   * IA332_ opt operator represents an IA32 opcde, so this method might seem
   * useless. However, there are some special cases, notably for operand size.
   * In this case, an opt operator of the form ADD__B would mean use the ADD
   * IA32 opcode with a byte operand size.
   */
  private static Set<String> getMatchingOperators(String lowLevelOpcode) {
    Set<String> matchingOperators = new HashSet<String>();
    Set<String> opcodes = OperatorFormatTables.getCopyOfOpcodeSet();
    for (String o : opcodes) {
      if (o.equals(lowLevelOpcode) || o.startsWith(lowLevelOpcode + "__"))
        matchingOperators.add(o);
    }

    return matchingOperators;
  }

  /**
   * Generate an assembler for the opt compiler
   */
  public static void main(String[] args) {
    try {
      out = new FileWriter(System.getProperty("generateToDir") + "/AssemblerOpt.java");
    } catch (IOException e) {
      throw new Error(e);
    }

    emit("package org.jikesrvm.compilers.opt.mir2mc.ia32;\n\n");
    emit("import static org.jikesrvm.compilers.opt.ir.Operators.*;\n\n");
    emit("import org.jikesrvm.*;\n\n");
    emit("import org.jikesrvm.compilers.opt.*;\n\n");
    emit("import org.jikesrvm.compilers.opt.ir.*;\n\n");
    emit("import org.jikesrvm.compilers.opt.ir.ia32.*;\n\n");
    emit("import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.*;\n\n");
    emit("import static org.jikesrvm.compilers.opt.OptimizingCompilerException.opt_assert;\n\n");
    emit("\n\n");

    emit("/**\n");
    emit(" *  This class is the automatically-generated assembler for\n");
    emit(" * the optimizing compiler.  It consists of methods that\n");
    emit(" * understand the possible operand combinations of each\n");
    emit(" * instruction type, and how to translate those operands to\n");
    emit(" * calls to the Assember low-level emit method\n");
    emit(" *\n");
    emit(" * It is generated by GenerateAssembler.java\n");
    emit(" *\n");
    emit(" */\n");
    emit("public class AssemblerOpt extends AssemblerBase {\n\n");

    emitTab(1);
    emit("/**\n");
    emitTab(1);
    emit(" * @param bcSize initial machine code buffer size.\n");
    emitTab(1);
    emit(" * @param print whether to dump generated machine code.\n");
    emitTab(1);
    emit(" * @param ir the IR object for the opt compilation.\n");
    emitTab(1);
    emit(" * @see org.jikesrvm.compilers.common.assembler.ia32.Assembler\n");
    emitTab(1);
    emit(" */\n");
    emitTab(1);
    emit("public AssemblerOpt(int bcSize, boolean print, IR ir) {\n");
    emitTab(2);
    emit("super(bcSize, print, ir);\n");
    emitTab(1);
    emit("}");
    emit("\n\n");

    Method[] emitters = lowLevelAsm.getDeclaredMethods();
    Set<String> opcodes = getOpcodes(emitters);

    Iterator<String> i = opcodes.iterator();
    while (i.hasNext()) {
      String opcode = (String) i.next();
      setCurrentOpcode(opcode);
      emitTab(1);
      emit("/**\n");
      emitTab(1);
      emit(" *  Emit the given instruction, assuming that\n");
      emitTab(1);
      emit(" * it is a " + currentFormat + " instruction\n");
      emitTab(1);
      emit(" * and has a " + currentOpcode + " operator\n");
      emitTab(1);
      emit(" *\n");
      emitTab(1);
      emit(" * @param inst the instruction to assemble\n");
      emitTab(1);
      emit(" */\n");
      emitTab(1);
      emit("private void do" + opcode + "(Instruction inst) {\n");
      EmitterSet emitter = buildSetForOpcode(emitters, opcode);
      boolean[][] tp = new boolean[4][ArgumentType.values().length];
      emitter.emitSet(opcode, tp, 2);
      emitTab(1);
      emit("}\n\n");
    }

    emitTab(1);
    emit("/**\n");
    emitTab(1);
    emit(" *  The number of instructions emitted so far\n");
    emitTab(1);
    emit(" */\n");
    emitTab(1);
    emit("private int instructionCount = 0;\n\n");

    emitTab(1);
    emit("@Override\n");
    emitTab(1);
    emit("public void doInst(Instruction inst) {\n");
    emitTab(2);
    emit("instructionCount++;\n");
    emitTab(2);
    emit("resolveForwardReferences(instructionCount);\n");
    emitTab(2);
    emit("switch (inst.getOpcode()) {\n");

    Set<String> emittedOpcodes = new HashSet<String>();

    i = opcodes.iterator();
    while (i.hasNext()) {
      String opcode = i.next();
      Iterator<String> operators = getMatchingOperators(opcode).iterator();
      while (operators.hasNext()) {
        String operator = operators.next();
        emitTab(3);
        emittedOpcodes.add(operator);
        emit("case IA32_" + operator + "_opcode:\n");
      }
      emitTab(4);
      emit("do" + opcode + "(inst);\n");
      emitTab(4);
      emit("break;\n");
    }

    // Special case because doJCC is handwritten to add
    // logic for short-forward branches
    emittedOpcodes.add("JCC");
    emitTab(3);
    emit("case IA32_JCC_opcode:\n");
    emitTab(4);
    emit("doJCC(inst);\n");
    emitTab(4);
    emit("break;\n");

    // Special case because doJMP is handwritten to add
    // logic for short-forward branches
    emittedOpcodes.add("JMP");
    emitTab(3);
    emit("case IA32_JMP_opcode:\n");
    emitTab(4);
    emit("doJMP(inst);\n");
    emitTab(4);
    emit("break;\n");

    // Kludge for IA32_LOCK which needs to call emitLockNextInstruction
    emittedOpcodes.add("LOCK");
    emitTab(3);
    emit("case IA32_LOCK_opcode:\n");
    emitTab(4);
    emit("emitLockNextInstruction();\n");
    emitTab(4);
    emit("break;\n");

    // Kludge for PATCH_POINT
    emitTab(3);
    emit("case IG_PATCH_POINT_opcode:\n");
    emitTab(4);
    emit("emitPatchPoint();\n");
    emitTab(4);
    emit("break;\n");

    // Kludge for IMMQ_MOV
    emitTab(3);
    emit("case IMMQ_MOV_opcode:\n");
    emitTab(4);
    emit("doIMMQ_MOV(inst);\n");
    emitTab(4);
    emit("break;\n");

    // Kludge for LOWTABLESWITCH
    emitTab(3);
    emit("case MIR_LOWTABLESWITCH_opcode:\n");
    emitTab(4);
    emit("doLOWTABLESWITCH(inst);\n");
    emitTab(4);
    emit("// kludge table switches that are unusually long instructions\n");
    emitTab(4);
    emit("instructionCount += MIR_LowTableSwitch.getNumberOfTargets(inst);\n");
    emitTab(4);
    emit("break;\n");

    // Kludge for LABEL (only needed for prettier output)
    // A line is printed before LABEL (instead of after BBEND) because
    // the later case interferes with placements of NOPs and thus doesn't
    // lead to a good visual distinction of blocks.
    emitTab(3);
    emit("// Make machine code dumps more readable by visually\n");
    emitTab(3);
    emit("// distinguishing basic blocks\n");
    emitTab(3);
    emit("case LABEL_opcode:\n");
    emitTab(4);
    emit("comment(\"\");\n");
    emitTab(4);
    emit("comment(Label.getBlock(inst).toString());\n");
    emitTab(4);
    emit("break;\n");

    emitTab(3);
    emit("// Opcodes that are not allowed to be present. Those are expected\n");
    emitTab(3);
    emit("// to be handled earlier.\n");
    Set<String> errorOpcodes = getErrorOpcodes(emittedOpcodes);
    if (!errorOpcodes.isEmpty()) {
      i = errorOpcodes.iterator();
      while (i.hasNext()) {
        emitTab(3);
        emit("case IA32_" + i.next() + "_opcode:\n");
      }
      emitTab(4);
      emit("throw new OptimizingCompilerException(inst + \" has unimplemented IA32 opcode (check excludedOpcodes)\");\n");
    }
    emitTab(2);
    emit("}\n");
    emitTab(2);
    emit("mcOffsets.setMachineCodeOffset(inst, mi);\n");
    emitTab(1);
    emit("}\n\n");

    emit("\n}\n");

    try {
      out.close();
    } catch (IOException e) {
      throw new Error(e);
    }
  }
}