[kaffe] CVS kaffe (inaba): ARM FP reg numbering fix for jit.

Sat Jul 21 10:40:37 PDT 2007

PatchSet 7501 
Date: 2007/07/21 17:39:26
Author: inaba
Branch: HEAD
Tag: (none) 
Log:
ARM FP reg numbering fix for jit.

Members: 
	ChangeLog:1.5000->1.5001 
	config/arm/jit-arm.def:INITIAL->1.9 

Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.5000 kaffe/ChangeLog:1.5001

--- kaffe/ChangeLog:1.5000	Wed Jul 11 15:22:32 2007
+++ kaffe/ChangeLog	Sat Jul 21 17:39:26 2007
@@ -1,3 +1,7 @@
+2007-07-22  Kiyo Inaba <inaba at src.ricoh.co.jp>
+
+	* config/arm/jit-arm.def: FP reg numbering fix for jit.
+
 2007-07-11  Ito Kazumitsu  <kaz at maczuka.gcd.org>
 
 	* FAQ/FAQ.classlibrary-compile: Updated the ecj status.
===================================================================
Checking out kaffe/config/arm/jit-arm.def
RCS:  /home/cvs/kaffe/kaffe/config/arm/jit-arm.def,v
VERS: 1.9
***************
--- /dev/null	Sun Aug  4 19:57:58 2002
+++ kaffe/config/arm/jit-arm.def	Sat Jul 21 17:40:36 2007
@@ -0,0 +1,1759 @@
+/* 
+ * jit-arm.def
+ * ARM instruction definition.
+ *
+ * Copyright (c) 1996, 1997, 1999
+ *	Transvirtual Technologies, Inc.  All rights reserved.
+ *
+ * See the file "license.terms" for information on usage and redistribution 
+ * of this file. 
+ */
+
+#ifdef KAFFE_VMDEBUG
+int jit_debug;
+#define debug(x)        (jit_debug ? dprintf("%x:\t", CODEPC), dprintf x : 0)
+#else
+#define	debug(x)
+#endif
+
+#include "icode.h"
+
+/*
+ * The information in this file was extracted from the ARM
+ * Architecture Reference, Document ARM DDI 0100A.
+ *
+ * The page and section numbers correspond to this document.
+ */
+#define	R0	0		/* arg0 + int return */
+#define	R1	1		/* arg2 */
+#define	R2	2		/* arg3 */
+#define	R3	3		/* arg4 */
+#define	R4	4
+#define	R5	5
+#define	R6	6
+#define	R7	7
+#define	R8	8
+#define	R9	9
+#define	R10	10
+#define	R11	11
+#define	R12	12
+#define	R13	13
+#define	R14	14
+#define	R15	15
+
+#define	F0	0		/* float return */
+#define	F1	1
+#define	F2	2
+#define	F3	3
+#define	F4	4
+#define	F5	5
+#define	F6	6
+#define	F7	7
+
+#define	Reg_F0	(R15 + 1 + F0)
+#define	Reg_F1	(R15 + 1 + F1)
+#define	Reg_F2	(R15 + 1 + F2)
+#define	Reg_F3	(R15 + 1 + F3)
+#define	Reg_F4	(R15 + 1 + F4)
+#define	Reg_F5	(R15 + 1 + F5)
+#define	Reg_F6	(R15 + 1 + F6)
+#define	Reg_F7	(R15 + 1 + F7)
+
+/*
+ * The bits for each register - used for pushing and popping.
+ */
+#define	BR0	(1<<R0)
+#define	BR1	(1<<R1)
+#define	BR2	(1<<R2)
+#define	BR3	(1<<R3)
+#define	BR4	(1<<R4)
+#define	BR5	(1<<R5)
+#define	BR6	(1<<R6)
+#define	BR7	(1<<R7)
+#define	BR8	(1<<R8)
+#define	BR9	(1<<R9)
+#define	BR10	(1<<R10)
+#define	BR11	(1<<R11)
+#define	BR12	(1<<R12)
+#define	BR13	(1<<R13)
+#define	BR14	(1<<R14)
+#define	BR15	(1<<R15)
+
+/*
+ * Some register aliases.
+ */
+#define	FP	R11
+#define	IP	R12
+#define	SP	R13
+#define	LR	R14
+#define	PC	R15
+
+#define	BFP	BR11
+#define	BIP	BR12
+#define	BSP	BR13
+#define	BLR	BR14
+#define	BPC	BR15
+
+/*
+ * Define condition codes, see 3.2 pg 3-3
+ */
+#define	CC_EQ		( 0 << 28)	/* Equal */
+#define	CC_NE		( 1 << 28)	/* Not Equal */
+#define	CC_CS		( 2 << 28)	/* Carry Set/Unsigned Higher or Same */
+#define	CC_CC		( 3 << 28)	/* Carry Clear/Unsigned Lower */
+#define	CC_MI		( 4 << 28)	/* Minus/Negative */
+#define	CC_PL		( 5 << 28)	/* Plus/Positive or Zero */
+#define	CC_VS		( 6 << 28)	/* Overflow */
+#define	CC_VC		( 7 << 28)	/* No Overflow */
+#define	CC_HI		( 8 << 28)	/* Unsigned Higher */
+#define	CC_LS		( 9 << 28)	/* Unsigned Lower or Same */
+#define	CC_GE		(10 << 28)	/* Signed Greater Than or Equal */
+#define	CC_LT		(11 << 28)	/* Signed Less Than */
+#define	CC_GT		(12 << 28)	/* Signed Greater Than */
+#define	CC_LE		(13 << 28)	/* Signed Less Than or Equal */
+#define	CC_AL		(14 << 28)	/* Always (unconditional) */
+#define	CC_NV		(15 << 28)	/* Never */
+
+/*
+ * Selection of operand modes, see 3.6.2 pg 3-12
+ */
+#define ALU_OP_REG   (0 << 25)		/* operand is in register */
+#define ALU_OP_IMMED (1 << 25)		/* operand is 8bit+shifted immediate */
+
+/*
+ * Define ALU operations.
+ */
+#define ALU_SETCC	( 1 << 20)	/* Set condition codes for this op */
+
+#define	ALU_AND		( 0 << 21)	/* And */
+#define	ALU_EOR		( 1 << 21)	/* Exclusive OR */
+#define	ALU_SUB		( 2 << 21)	/* Subtract */
+#define	ALU_RSB		( 3 << 21)	/* Reverse subtract */
+#define	ALU_ADD		( 4 << 21)	/* Add */
+#define	ALU_ADC		( 5 << 21)	/* Add carry */
+#define	ALU_SBC		( 6 << 21)	/* Subtract carry */
+#define	ALU_RSC		( 7 << 21)	/* Reverser subtract carry */
+#define	ALU_TST		( 8 << 21)	/* Test (uses AND) */
+#define	ALU_TEQ		( 9 << 21)	/* Test Equivalence (uses EOR) */
+#define	ALU_CMP		(10 << 21)	/* Compare */
+#define	ALU_CMPN	(11 << 21)	/* Compared negated */
+#define	ALU_ORR		(12 << 21)	/* Logical inclusive OR */
+#define	ALU_MOV		(13 << 21)	/* Move */
+#define	ALU_BIC		(14 << 21)	/* Bit Clear */
+#define	ALU_MVN		(15 << 21)	/* Move Not */
+
+/*
+ * Define Shift operations.
+ * The "C" form use an immediate constant as opposed to a register.
+ * See 3.6.2 pg 3-12 and up.
+ */
+#define	SFT_LSLC	( 0 <<  8)
+#define	SFT_LSL		( 1 <<  8)
+#define	SFT_LSRC	( 2 <<  8)
+#define	SFT_LSR		( 3 <<  8)
+#define	SFT_ASRC	( 4 <<  8)
+#define	SFT_ASR		( 5 <<  8)
+#define	SFT_RORC	( 6 <<  8)
+#define	SFT_ROR		( 7 <<  8)
+
+/*
+ * Memory operations.
+ */
+
+/* 
+ * A. Load and Store Word or Unsigned Byte Addressing Modes 3.11 pg 3-55 
+ * B. Load and Store Halfword or Load Signed Byte Addr. Modes 3.13 pg 3-76
+ */
+#define	MEM_LOAD	0x00100000	/* load or store */
+#define	MEM_STORE	0x00000000
+
+/* NB: A only */
+#define MEM_UBYTE       0x00400000	/* unsigned byte or word */
+#define	MEM_WORD	0x00000000
+
+#define	MEM_ADDOFF	0x00800000	/* add or subtract offset */
+#define	MEM_SUBOFF	0x00000000
+
+#define	MEM_PREIDX	0x01000000	/* pre-indexing or post-indexing */
+#define	MEM_POSTIDX	0x00000000
+
+#define	MEM_WRTBCK	0x00200000	/* update register with addr or not */
+
+/* NB: B. only */
+#define	MEM_SGN		0x00000040	/* signed or unsigned halfword access */
+#define	MEM_USGN	0x00000000
+
+/* NB: B. only */
+#define	MEM_HALF	0x00000020	/* halfword or byte */
+#define	MEM_BYTE	0x00000000
+
+/*
+ * Multiple load/store operations. See 3.15 pg 3-88
+ */
+#define	MULTI_STORE	0x00000000
+#define	MULTI_LOAD	0x00100000
+
+#define	MULTI_DA	0x00000000	/* Decrement After */
+#define	MULTI_DB	0x01000000	/* Decrement Before */
+#define	MULTI_IA	0x00800000	/* Increment After */
+#define	MULTI_IB	0x01800000	/* Increment Before */
+
+/*
+ * Multiply operations.
+ * Not implemented in Architecture Version 1
+ */
+#define	MUL_MUL		0x00000000	/* Multiply */
+#define	MUL_MULLADD	0x00200000	/* Multiply Accumulate */
+
+/*
+ * Floating point operations.
+ */
+
+/*
+ * Dyadic operations
+ */
+
+#define	FLT_ADF		0x00000000
+#define	FLT_MUF		0x00100000
+#define	FLT_SUF		0x00200000
+#define	FLT_RSF		0x00300000
+#define	FLT_DVF		0x00400000
+#define	FLT_RDF		0x00500000
+#define	FLT_POW		0x00600000
+#define	FLT_RPW		0x00700000
+#define	FLT_RMF		0x00800000 /* remainder */
+#define	FLT_FML		0x00900000 /* fast multiply */
+#define	FLT_FDV		0x00A00000 /* fast divide */
+#define	FLT_FRD		0x00B00000 /* reverse fast divide */
+#define	FLT_POL		0x00C00000 /* polar arc (arctan2) */
+
+#define	FLT_MVF		0x00008000 /* Fd := Fm */
+#define	FLT_MNF		0x00108000 /* Fd := -Fm */
+#define	FLT_RND		0x00308000 /* Fd := integer value of Fm 
+				      (NOTE: float-to-float operation!) */
+
+/*
+ * Floating point coprocessor register transfer
+ */
+#define FLT_FLT  0x00000110  /* Fn := Rd */
+#define FLT_FIX  0x00100110  /* Rd := Fm */
+
+#define FLT_CMP  0x0090F110  /* Compare Fn, Fm */
+#define FLT_CMPE 0x00D0F110  /* Compare Fn, Fm */
+
+/*
+ * Floating point precision (arith).
+ */
+#define	FLT_PREC_SINGLE	0x00000000
+#define	FLT_PREC_DOUBLE	0x00000080
+#define	FLT_PREC_EXTEND	0x00080000
+
+/*
+ * Floating point precision (load/store).
+ * Notice that we also include the 0x100 to force this
+ * to be a floating point Ld/St.
+ */
+#define	FLT_MPREC_SINGLE 0x00000000
+#define	FLT_MPREC_DOUBLE 0x00008000
+#define	FLT_MPREC_EXTEND 0x00400000
+
+/*
+ * For ld/st of single values
+ */
+#define FLT_MEMOP_SINGLE   0x00000100
+
+/*
+ * for ld/st of multiple values
+ */
+#define FLT_MEMOP_MULTIPLE 0x00000200
+
+/*
+ * Floating point rounding modes
+ */
+#define	FLT_RND_NEAREST	0x00000000
+#define	FLT_RND_PLUSINF	0x00000020
+#define	FLT_RND_MININF	0x00000040
+#define	FLT_RND_ZERO	0x00000060
+
+/*
+ * Define general instruction forms.
+ *
+ * We use the following conventions:
+ * NB: Rd is called DST
+ *     Rn is called SRC1
+ *     Rm is called SRC2
+ *
+ * A constant is encoded as an 8-bit immediate IMM shifted left by
+ * a shift IMMSFT.
+ */
+/* register := register x register */
+#define	op_rrr(CC,ALU,SFT,SFTVAL,DST,SRC1,SRC2) \
+  LOUT(0x00000000|(CC)|(ALU)|(SFT)|((SFTVAL)<<7)|((DST)<<12)|((SRC1)<<16)|(SRC2))
+
+/* register := register x constant */
+#define	op_rrc(CC,ALU,DST,SRC1,IMM,IMMSFT) \
+  LOUT(0x02000000|(CC)|(ALU)|((DST)<<12)|((SRC1)<<16)|((IMMSFT)<<8)|((IMM)&0xFF))
+
+/* register := register x register */
+#define	op_muladd_rrr(CC,MULOP,DST,SRC1,SRC2) \
+  LOUT(0x00000090|(CC)|(MULOP)|((DST)<<16)|(SRC1)|((SRC2)<<8))
+
+/* Not used or debugged: see 3.8.5
+#define	op_muladd_rrrr(CC,MULOP,DST,SRC1,SRC2,SRC3) \
+  LOUT(0x00200090|(CC)|(MULOP)|((DST)<<16)|(SRC1)|((SRC2)<<8)|((SRC3)<<12))
+*/
+
+#define	op_branch(CC,DST) \
+  LOUT(0x0A000000|(CC)|(((DST)>>2)&0x00FFFFFF))
+
+#define	op_branch_linked(CC,DST) \
+  LOUT(0x0B000000|(CC)|(((DST)>>2)&0x00FFFFFF))
+
+/* Load/Store Word or Unsigned Byte, see 3.11
+ * ldr REG, [IDX, +/- IDX2] or
+ * str REG, [IDX, +/- IDX2]
+ */
+#define	op_mem_rrr(CC,MEM,SFT,SFTVAL,REG,IDX,IDX2) \
+  LOUT(0x04000000|(CC)|(MEM)|(SFT)|((SFTVAL)<<7)|((IDX)<<16)|((REG)<<12)|(IDX2))
+
+/* Load/Store Word or Unsigned Byte, see 3.11
+ * ldr REG, [IDX +/- OFF]
+ * str REG, [IDX +/- OFF]
+ */
+#define	op_mem_rrc(CC,MEM,REG,IDX,OFF) \
+  LOUT(0x04000000|(CC)|(MEM)|((REG)<<12)|((IDX)<<16)|((OFF)&0xFFF))
+
+/* Load and Store Halfword or Load Signed Byte with an immediate 8-bit offset 
+ * ldr{h|sh|sb} REG, [IDX +/- OFF]
+ * str{h|sh|sb} REG, [IDX +/- OFF]
+ * 
+ * See 3.13 pg 3-76
+ * NB: you cannot use this instruction to store a byte.
+ */
+#define	op_mem_hb_rrc(CC,MEM,SGN,SIZE,REG,IDX,OFF) \
+  LOUT(0x00400090|(CC)|(MEM)|(SGN)|(SIZE)|((IDX)<<16)|((REG)<<12)|((OFF)&0xF)|(((OFF)&0xF0)<<4))
+
+#define	op_f_rrr(CC,FALU,PREC,RND,DST,SRC1,SRC2) \
+  LOUT(0x0E000100|(CC)|(FALU)|(PREC)|(RND)|(((DST)&0x07)<<12)|(((SRC1)&0x07)<<16)|((SRC2)&0x07))
+#define	op_f_rrc(CC,FALU,PREC,RND,DST,SRC1,SRC2) \
+  LOUT(0x0E000100|(CC)|(FALU)|(PREC)|(RND)|(((DST)&0x07)<<12)|(((SRC1)&0x07)<<16)|(SRC2))
+
+
+/*
+ * The FLT instruction has a non-obvious encoding in that the 
+ * "destination" register is really the source integer register
+ */
+#define op_cvt_i2f(F,R) op_f_rrr(CC_AL, FLT_FLT, FLT_PREC_SINGLE, FLT_RND_ZERO, R, F, 0)
+#define op_cvt_i2d(F,R) op_f_rrr(CC_AL, FLT_FLT, FLT_PREC_DOUBLE, FLT_RND_ZERO, R, F, 0)
+
+#define op_cvt_f2i(R,F) op_f_rrr(CC_AL, FLT_FIX, FLT_PREC_SINGLE, FLT_RND_ZERO, R, 0, F)
+#define op_cvt_d2i(R,F) op_f_rrr(CC_AL, FLT_FIX, FLT_PREC_DOUBLE, FLT_RND_ZERO, R, 0, F)
+
+
+/*
+ * Floating point memory operation
+ */
+#define	op_fmem_rrc(CC,MEM,MPREC,REG,IDX,OFF) \
+  LOUT(0x0C000100|(CC)|(MEM)|(MPREC)|(((REG)&0x07)<<12)|((IDX)<<16)|((OFF)&0xFF))
+
+#define	op_push_m(T,BITS) \
+  LOUT(0x08000000|CC_AL|MULTI_STORE|MULTI_DB|((T)<<16)|(BITS))
+
+/*
+ * Redefine push to use auto index & writeback (like C calling convention)
+ */
+#define	op_push_prologue(T,BITS) \
+  LOUT(0x08000000|CC_AL|MULTI_STORE|MULTI_DB|MEM_WRTBCK|((T)<<16)|(BITS))
+
+/*
+ * Note that this generates the same opcode as used in the normal C
+ * linkage convention. The T register should always be FP for
+ * this particular usage.
+ */
+#define	op_pop_epilogue(T,BITS) \
+  LOUT(0x08000000|CC_AL|MULTI_LOAD|MULTI_DB|((T)<<16)|(BITS))
+
+/*
+ * A pop that decrements the SP.  This would normally match an op_push
+ * (which used a MULTI_DB).  Note that it's different than the normal
+ * epilogue pop.
+ */
+#define	op_pop_m(T,BITS) \
+  LOUT(0x08000000|CC_AL| MULTI_LOAD | MULTI_IA | MEM_WRTBCK |((T)<<16)|(BITS))
+
+/*
+ * Define the instruction macros for later use.
+ *
+ * NB: We must use ALU_SETCC to set the condition code as the result of
+ * this operation.  We obviously need this for the CMP operation (in fact, 
+ * Dirk says the processor will SIGILL us otherwise)
+ *
+ * We must also set it for op_add and op_sub because we need the carry
+ * to construct a 64-bit add and sub via add/adc.
+ */
+#define	op_mov(T,F)		op_rrr(CC_AL,ALU_MOV,SFT_LSLC,0,T,0,F)
+
+/* NB: this will rotate right V by S bits before using it!  */
+#define	op_mov_c(T,V,S)		op_rrc(CC_AL,ALU_MOV,T,0,V,((S)>>1))
+
+#define	op_add(T,F1,F2)		op_rrr(CC_AL,ALU_ADD | ALU_SETCC,SFT_LSLC,0,T,F1,F2)
+#define	op_add_const(T,F1,I8)	op_rrr(CC_AL,ALU_ADD | ALU_SETCC | ALU_OP_IMMED, SFT_LSLC, 0, T, F1, I8 & 0xff)
+
+#define	op_adc(T,F1,F2)		op_rrr(CC_AL,ALU_ADC,SFT_LSLC,0,T,F1,F2)
+
+#define	op_sub(T,F1,F2)		op_rrr(CC_AL,ALU_SUB | ALU_SETCC,SFT_LSLC,0,T,F1,F2)
+#define	op_sub_const(T,F1,I8)	op_rrr(CC_AL,ALU_SUB | ALU_SETCC | ALU_OP_IMMED, SFT_LSLC, 0, T, F1, I8 & 0xff)
+
+#define	op_sbc(T,F1,F2)		op_rrr(CC_AL,ALU_SBC,SFT_LSLC,0,T,F1,F2)
+
+#define	op_add_c(T,F,V,S)	op_rrc(CC_AL,ALU_ADD,T,F,V,((S)>>1))
+#define	op_sub_c(T,F,V,S)	op_rrc(CC_AL,ALU_SUB,T,F,V,((S)>>1))
+
+#define	op_and(T,F1,F2)		op_rrr(CC_AL,ALU_AND,SFT_LSLC,0,T,F1,F2)
+#define	op_and_const(T,F1,I8)	op_rrr(CC_AL,ALU_AND | ALU_OP_IMMED, SFT_LSLC, 0, T, F1, I8 & 0xff)
+
+#define	op_or(T,F1,F2)		op_rrr(CC_AL,ALU_ORR,SFT_LSLC,0,T,F1,F2)
+#define	op_or_const(T,F1,I8)	op_rrr(CC_AL,ALU_OR | ALU_OP_IMMED, SFT_LSLC, 0, T, F1, I8 & 0xff)
+
+#define	op_eor(T,F1,F2)		op_rrr(CC_AL,ALU_EOR,SFT_LSLC,0,T,F1,F2)
+#define	op_eor_const(T,F1,I8)	op_rrr(CC_AL,ALU_EOR | ALU_OP_IMMED, SFT_LSLC, 0, T, F1, I8 & 0xff)
+
+#if 0
+/* defined differently below --- why? */
+#define	op_ashr(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,SFT_ASR,(F2)<<1,T,0,F1)
+#define	op_lshr(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,SFT_LSR,(F2)<<1,T,0,F1)
+#define	op_lshl(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,SFT_LSL,(F2)<<1,T,0,F1)
+#endif
+
+/*
+ *
+ * These use the register-specified shift forms.
+ *
+ * F2 = register containing number of locations to shift
+ * F1 = source register
+ * T  = destination register
+ */
+#define	op_lshl(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,0,0,T,0,((F2) << 8) | 0x10 | F1)
+#define	op_lshr(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,0,0,T,0,((F2) << 8) | 0x30 | F1)
+#define	op_ashr(T,F1,F2)	op_rrr(CC_AL,ALU_MOV,0,0,T,0,((F2) << 8) | 0x50 | F1)
+
+#define	op_mul(T,F1,F2)		op_muladd_rrr(CC_AL,MUL_MUL,T,F1,F2)
+#define	op_not(T,F)		op_rrr(CC_AL,ALU_MVN,SFT_LSLC,0,T,0,F)
+#define	op_cmp(F1,F2)		op_rrr(CC_AL,ALU_CMP | ALU_SETCC,SFT_LSLC,0,0,F1,F2)
+#define	op_cmp_const(F1,I8)	op_rrr(CC_AL,ALU_CMP | ALU_SETCC | ALU_OP_IMMED,SFT_LSLC,0,0,F1,I8 & 0xff)
+
+#define	op_fmov(T,F)		op_f_rrr(CC_AL,FLT_MVF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,0,F)
+#define	op_fmov_const(T,F)	op_f_rrc(CC_AL,FLT_MVF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,0,F)
+#define	op_fmovl(T,F)		op_f_rrr(CC_AL,FLT_MVF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,0,F)
+#define	op_fmovl_const(T,F)	op_f_rrc(CC_AL,FLT_MVF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,0,F)
+
+#define	op_fadd(T,F1,F2)	op_f_rrr(CC_AL,FLT_ADF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fsub(T,F1,F2)	op_f_rrr(CC_AL,FLT_SUF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fmul(T,F1,F2)	op_f_rrr(CC_AL,FLT_MUF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fdiv(T,F1,F2)	op_f_rrr(CC_AL,FLT_DVF,FLT_PREC_SINGLE,FLT_RND_ZERO,T,F1,F2)
+
+#define	op_faddl(T,F1,F2)	op_f_rrr(CC_AL,FLT_ADF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fsubl(T,F1,F2)	op_f_rrr(CC_AL,FLT_SUF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fmull(T,F1,F2)	op_f_rrr(CC_AL,FLT_MUF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,F1,F2)
+#define	op_fdivl(T,F1,F2)	op_f_rrr(CC_AL,FLT_DVF,FLT_PREC_DOUBLE,FLT_RND_ZERO,T,F1,F2)
+
+/*
+ * fixz == float->int or double -> int
+ */
+#define op_fixz(R,F) op_f_rrr(CC_AL, FLT_ADF, FLT_PREC_SINGLE, FLT_RND_FTOI, R, F, 0)
+
+/*
+ * Floating compare - we only use CMPE. Precision doesn't matter
+ */
+#define op_fcmp(Fn,Fm) op_f_rrr(CC_AL, FLT_CMPE, FLT_PREC_SINGLE, FLT_RND_NEAREST, 0xf, Fn, Fm)
+
+
+/* 
+ * We only encode positive offset constants.
+ * If the offset is negative, we negate it and subtract it.
+ */
+#define	op_load_offset(R,A,O) \
+    if ((O) < 0) \
+	op_mem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,R,A,-(O)); \
+    else \
+	op_mem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,R,A,O)
+
+#define	op_load_ub_offset(R,A,O)  \
+    if ((O) < 0) \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,MEM_USGN,MEM_BYTE,R,A,-(O)); \
+    else \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,MEM_USGN,MEM_BYTE,R,A,O)
+
+#define	op_load_uh_offset(R,A,O)  \
+    if ((O) < 0) \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,MEM_USGN,MEM_HALF,R,A,-(O)); \
+    else \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,MEM_USGN,MEM_HALF,R,A,O)
+
+#define	op_load_sb_offset(R,A,O)  \
+    if ((O) < 0) \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,MEM_SGN,MEM_BYTE,R,A,-(O)); \
+    else \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,MEM_SGN,MEM_BYTE,R,A,O)
+
+#define	op_load_sh_offset(R,A,O)  \
+    if ((O) < 0) \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,MEM_SGN,MEM_HALF,R,A,-(O)); \
+    else \
+	op_mem_hb_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,MEM_SGN,MEM_HALF,R,A,O)
+
+/***************************************************************************
+ *
+ * NOTE: Floating loads and stores treat the offset as a *word* index,
+ * not a BYTE index as in the regular ld/st. We don't compensate
+ * for this here, we do it in the individual memory operations.
+ * For this reason, we should have the rangecheck actually be
+ * larger than an I8 rangecheck, but I haven't done that yet.
+ *
+ ***************************************************************************/
+
+
+#define	op_fload_offset(R,A,O)  \
+    if ((O) < 0) \
+	op_fmem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,FLT_MPREC_SINGLE,R,A,-(O)); \
+    else \
+	op_fmem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,FLT_MPREC_SINGLE,R,A,O)
+
+#define	op_floadl_offset(R,A,O)	\
+    if ((O) < 0) \
+	op_fmem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_SUBOFF,FLT_MPREC_DOUBLE,R,A,-(O)); \
+    else \
+	op_fmem_rrc(CC_AL,MEM_LOAD|MEM_PREIDX|MEM_ADDOFF,FLT_MPREC_DOUBLE,R,A,O)
+
+#define	op_store_offset(R,A,O)	\
+    if ((O) < 0) \
+	op_mem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_SUBOFF,R,A,-(O)); \
+    else \
+	op_mem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_ADDOFF,R,A,O)
+
+/* Do not use op_mem_hb_rrc here because that does not store bytes */
+#define	op_store_b_offset(R,A,O) \
+    if ((O) < 0) \
+	op_mem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_SUBOFF|MEM_UBYTE,R,A,-(O)); \
+    else \
+	op_mem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_ADDOFF|MEM_UBYTE,R,A,O)
+
+#define	op_store_h_offset(R,A,O) \
+    if ((O) < 0) \
+	op_mem_hb_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_SUBOFF,MEM_USGN,MEM_HALF,R,A,-(O)); \
+    else \
+	op_mem_hb_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_ADDOFF,MEM_USGN,MEM_HALF,R,A,O)
+
+#define	op_fstore_offset(R,A,O)	\
+    if ((O) < 0) \
+	op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_SUBOFF,FLT_MPREC_SINGLE,R,A,-(O)); \
+    else \
+	op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_ADDOFF,FLT_MPREC_SINGLE,R,A,O)
+
+#define	op_fstorel_offset(R,A,O) \
+    if ((O) < 0) \
+	op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_SUBOFF,FLT_MPREC_DOUBLE,R,A,-(O)); \
+    else \
+	op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_ADDOFF,FLT_MPREC_DOUBLE,R,A,O)
+
+#define	op_push(T,R)		op_mem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_WRTBCK,R,T,4)
+
+/*
+ * Remember to left shift the offset since it's a WORD, not a BYTE offset.
+ */
+#define	op_fpush(T,R)		op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_WRTBCK,FLT_MPREC_SINGLE,R,T,(4 >> 2));
+#define	op_fpushl(T,R)		op_fmem_rrc(CC_AL,MEM_STORE|MEM_PREIDX|MEM_WRTBCK,FLT_MPREC_DOUBLE,R,T,(8 >> 2))
+
+
+/* Define convenience macros for load/store instructions with offset zero */ 
+#define	op_load(R,A)		op_load_offset(R,A,0)
+#define	op_load_ub(R,A)		op_load_ub_offset(R,A,0)
+#define	op_load_sb(R,A)		op_load_sb_offset(R,A,0)
+#define	op_load_uh(R,A)		op_load_uh_offset(R,A,0)
+#define	op_load_sh(R,A)		op_load_sh_offset(R,A,0)
+#define	op_fload(R,A)		op_fload_offset(R,A,0)
+#define	op_floadl(R,A)		op_floadl_offset(R,A,0)
+
+#define	op_store(R,A)		op_store_offset(R,A,0)
+#define	op_store_b(R,A)		op_store_b_offset(R,A,0)
+#define	op_store_h(R,A)		op_store_h_offset(R,A,0)
+#define	op_fstore(R,A)		op_fstore_offset(R,A,0)
+#define	op_fstorel(R,A)		op_fstorel_offset(R,A,0)
+
+#define	xop_mov(T,F)		if ((T) != (F)) op_mov(T,F)
+#define	xop_fmov(T,F)		if ((T) != (F)) op_fmov(T,F)
+#define	xop_fmovl(T,F)		if ((T) != (F)) op_fmovl(T,F)
+
+/* --------------------------------------------------------------------- */
+
+define_insn(unimplemented, unimplemented)
+{
+	abort();
+}
+
+define_insn(nop, nop)
+{
+	op_mov(R0,R0);
+}
+
+/* --------------------------------------------------------------------- */
+
+define_insn(prologue, prologue_xxx)
+{
+	int i;
+	int limit;
+	label* l = const_label(1);
+
+	l->type = Lframe|Labsolute|Lgeneral;
+
+	op_mov(IP, SP);
+	/*
+	 * Every stack segment saves all callee saves registers
+ 	 * NB: this takes 4 * REGISTERS_SAVED = 40 bytes below the fp. 
+	 */
+
+	op_push_prologue(SP,BPC|BFP|BIP|BLR|BR4|BR5|BR6|BR7|BR8|BR9|BR10);
+	debug(("maxStack = %d, maxLocal = %d, maxTemp = %d\n", maxStack, maxLocal, maxTemp));
+
+	/*
+	 * This effectively hides/removes the just-pushed
+	 * PC from the stack pop
+	 * and when the stack pop occurs, the FP over-write
+	 * reclaims the storage.
+	 */
+	op_sub_c(FP, IP, 4, 0); 
+
+	/* FOO:  Should save floating point state here !!! F4-F7 */
+
+	l->at = (uintp)CODEPC;
+
+	/* Framesize is filled in by Lframe */
+	op_sub_const(SP,SP,0); 
+
+	limit = 4;
+	if (maxArgs < limit) {
+		limit = maxArgs;
+	}
+
+	/* Force first 4 arguments into corresponding registers */
+	for (i = 0; i < limit; i++) {
+		forceRegister(&localinfo[i], R0+i, Rint /* ? */);
+	}
+}
+
+define_insn(exception_prologue, eprologue_xxx)
+{
+	label* l = const_label(1);
+
+	l->type = Lframe|Labsolute|Lgeneral;
+
+	/* Initially, the FP is set --- see CALL_KAFFE_EXCEPTION
+	 * We must restore the SP and we must do so in a way that never 
+	 * frees anything on the stack.  We use IP as a temp register.
+	 */
+	op_mov(IP, FP);
+	op_sub_c(IP, IP, (REGISTERS_SAVED * SLOTSIZE), 0);
+	l->at = (uintp)CODEPC;
+	op_sub_const(SP,IP,0); /* Framesize is filled in by Lframe */
+}
+
+define_insn(epilogue, epilogue_xxx)
+{
+	KaffeJIT_setEpilogueLabel ((uintp)CODEPC);
+
+	/* Should restore floating point state here !!! F4-F7 */
+
+	debug(("maxStack = %d, maxLocal = %d, maxTemp = %d\n", maxStack, maxLocal, maxTemp));
+	/*
+	 * Pushed as
+	 * FP	-> FP
+	 * IP 	-> SP (actually, SP -> SP)
+	 * LR   -> PC
+	 * Rest are caller-saves
+	 */
+	op_pop_epilogue(FP,BFP|BSP|BPC|BR4|BR5|BR6|BR7|BR8|BR9|BR10);
+}
+
+/* --------------------------------------------------------------------- */
+
+define_insn(spill_int, spill_Rxx)
+{
+	int r = sreg_int(0);
+	int o = const_int(1);
+
+	assert(__I12const_rangecheck(o));
+	op_store_offset(r, FP, o);
+
+	debug(("spill_int %d, [FP, %d]\n", r, o));
+}
+
+define_insn(spill_float, fspill_Rxx)
+{
+	int r = sreg_float(0);
+	int o = const_int(1);
+
+	assert(__I10const_rangecheck(o));
+	assert( (o & 0x3) == 0);
+	op_fstore_offset(r, FP, o >> 2);
+
+	debug(("spill_float %d, [FP, %d]\n", r, o));
+}
+
+define_insn(spill_double, fspilll_Rxx)
+{
+	int r = sreg_double(0);
+	int o = const_int(1);
+
+	assert(__I10const_rangecheck(o));
+	assert( (o & 0x3) == 0);
+	op_fstorel_offset(r, FP, o >> 2);
+
+	debug(("spill_double %d, [FP, %d]\n", r, o));
+}
+
+define_insn(reload_int, reload_Rxx)
+{
+	int r = lreg_int(0);
+	int o = const_int(1);
+
+	assert(__I12const_rangecheck(o));
+	op_load_offset(r, FP, o);
+
+	debug(("reload_int %d, [FP, %d]\n", r, o));
+}
+
+define_insn(reload_float, freload_Rxx)
+{
+	int r = lreg_float(0);
+	int o = const_int(1);
+
+	assert(__I10const_rangecheck(o));
+	assert( (o & 0x3) == 0);
+	op_fload_offset(r, FP, o >> 2);
+
+	debug(("reload_float %d, [FP, %d]\n", r, o));
+}
+
+define_insn(reload_double, freloadl_Rxx)
+{
+	int r = lreg_double(0);
+	int o = const_int(1);
+
+	assert(__I10const_rangecheck(o));
+	assert( (o & 0x3) == 0);
+	op_floadl_offset(r, FP, o >> 2);
+
+	debug(("reload_double %d, [FP, %d]\n", r, o));
+}
+
+/* --------------------------------------------------------------------- */
+
+define_insn(move_int_const, move_RxC)
+{
+	int val = const_int(2);
+	int w = wreg_int(0);
+	int shift = 0;
+	int sval = val;
+
+	while (shift < 32) {
+		/* Does sval fit in 8 bits after shifted by `shift' bits?
+		 * NB: op_mov_c requires that the 8-bit immediate 
+		 * be ROTATED to the right, hence the (32 - shift) % 32.
+		 */
+		if ((sval & 0xFFFFFF00) == 0) {
+			op_mov_c(w, sval, (32 - shift) % 32);
+
+			debug(("move_int_const %d, %d (0x%x) via shift (%d, %d)\n", w, val, val, sval, shift));
+
+			return;
+		}
+		else if ((sval & 0x000000FF) != 0) {
+			/* Can't handle this kind of number encoding */
+			break;
+		}
+		sval = sval >> 2;
+		shift += 2;
+	}
+
+	/* Fail to encode in a short fashion - fall back on a long one */
+	op_mov_c(w, val & 0x000000FF, 0);
+	op_add_c(w, w, (val >> 8) & 0x000000FF, 24);
+	op_add_c(w, w, (val >> 16) & 0x000000FF, 16);
+	op_add_c(w, w, (val >> 24) & 0x000000FF, 8);
+
+	debug(("move_int_const %d, %d (0x%x)\n", w, val, val));
+}
+
+define_insn(move_label_const, move_RxL)
+{
+	label* l = const_label(2);
+	int w = wreg_int(0);
+
+	l->type |= Llong8x8x8x8|Labsolute;
+	l->at = CODEPC;
+
+	/* Assemble constants with a move and four shifted adds */
+	op_mov_c(w, 0, 0);
+	op_add_c(w, w, 0, 24);
+	op_add_c(w, w, 0, 16);
+	op_add_c(w, w, 0, 8);
+
+	debug(("move_label_const %d, ?\n", w));
+}
+
+define_insn(move_int, move_RxR)
+{
+	int r = rreg_int(2);
+	int w = wreg_int(0);
+
+	op_mov(w, r);
+
+	debug(("move_int %d, %d\n", w, r));
+}
+
+define_insn(move_float, fmove_RxR)
+{
+	int r = rreg_float(2);
+	int w = wreg_float(0);
+
+	op_fmov(w, r);
+
+	debug(("move_float %d, %d\n", w, r));
+}
+
+define_insn(move_float_const, fmove_RxC)
+{
+	float o = const_float(2);
+	int w = wreg_float(0);
+
+	int r = 0;
+
+	if ( o == 0.0 ) {
+	    r = 0x8 | 0;
+	} else if (o == 1.0) {
+	    r = 0x8 | 1;
+	} else if (o == 2.0) {
+	    r = 0x8 | 2;
+	} else if (o == 3.0) {
+	    r = 0x8 | 3;
+	} else if (o == 4.0) {
+	    r = 0x8 | 4;
+	} else if (o == 5.0) {
+	    r = 0x8 | 5;
+	} else if (o == 0.5) {
+	    r = 0x8 | 6;
+	} else if (o == 10.0) {
+	    r = 0x8 | 7;
+	} else {
+	    fprintf(stderr,"[%s:%d] Can't represent the floating value %f as a manifest constant\n",
+		    __FILE__, __LINE__, o);
+	    abort();
+	}
+
+	op_fmov_const(w, r);
+
+	debug(("move_float_const %d, %f(0x%x)\n", w, o,r ));
+}
+
+define_insn(move_double, fmovel_RxR)
+{
+	int r = rreg_double(2);
+	int w = wreg_double(0);
+
+	op_fmovl(w, r);
+
+	debug(("move_double %d, %d\n", w, r));
+}
+
+define_insn(move_double_const, fmovel_RxC)
+{
+	double o = const_double(2);
+	int w = wreg_double(0);
+
+	int r = 0;
+
+	if ( o == 0.0 ) {
+	    r = 0x8 | 0;
+	} else if (o == 1.0) {
+	    r = 0x8 | 1;
+	} else if (o == 2.0) {
+	    r = 0x8 | 2;
+	} else if (o == 3.0) {
+	    r = 0x8 | 3;
+	} else if (o == 4.0) {
+	    r = 0x8 | 4;
+	} else if (o == 5.0) {
+	    r = 0x8 | 5;
+	} else if (o == 0.5) {
+	    r = 0x8 | 6;
+	} else if (o == 10.0) {
+	    r = 0x8 | 7;
+	} else {
+	    fprintf(stderr,"[%s:%d] Can't represent the floating value %f as a manifest constant\n",
+		    __FILE__, __LINE__, o);
+	    abort();
+	}
+
+	op_fmovl_const(w, r);
+
+	debug(("move_double_const %d, %f(0x%x)\n", w, o,r ));
+}
+
+
+/* --------------------------------------------------------------------- */
+
+define_insn(add_int, add_RRR)
+{
+	int r2 = rreg_int(2);
+	int r1 = rreg_int(1);
+	int w = wreg_int(0);
+
+	op_add(w, r1, r2);
+
+	debug(("add_int %d, %d, %d\n", w, r1, r2));
+}
+
+define_insn(add_int_const, add_RRC)
+{
+	int o = const_int(2);
+	int r1 = rreg_int(1);
+	int w = wreg_int(0);
+
+	if ( o < 0 ) {
+		op_sub_const(w, r1, -o);
+		debug(("sub_int_const %d, %d, %d\n", w, r1, -o));
+	} else {
+		op_add_const(w, r1, o);
+		debug(("add_int_const %d, %d, %d\n", w, r1, o));
+	}
+}
+
+define_insn(adc_int, adc_RRR)
+{
+	int r2 = rreg_int(2);
+	int r1 = rreg_int(1);
+	int w = wreg_int(0);
+
+	op_adc(w, r1, r2);
+
+	debug(("adc_int %d, %d, %d\n", w, r1, r2));
+}
+
+define_insn(add_float, fadd_RRR)
+{
+	int r2 = rreg_float(2);
+	int r1 = rreg_float(1);
+	int w = wreg_float(0);
+
+	op_fadd(w, r1, r2);
+
+	debug(("add_float %d, %d, %d\n", w, r1, r2));
+}
+
+define_insn(add_double, faddl_RRR)
+{
+	int r2 = rreg_double(2);
+	int r1 = rreg_double(1);
+	int w = wreg_double(0);
+
+	op_faddl(w, r1, r2);
+
+	debug(("add_double %d, %d, %d\n", w, r1, r2));
+}
+
+define_insn(sub_int, sub_RRR)
+{
+	int r2 = rreg_int(2);
+	int r1 = rreg_int(1);
+	int w = wreg_int(0);

*** Patch too long, truncated ***