aboutsummaryrefslogtreecommitdiffstats
path: root/tcg
diff options
context:
space:
mode:
authorTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
committerTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
commite02cda008591317b1625707ff8e115a4841aa889 (patch)
treeaee302e3cf8b59ec2d32ec481be3d1afddfc8968 /tcg
parentcc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff)
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback design to work with QEMU and rust-vmm vhost-user backend without require any changes. Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README784
-rw-r--r--tcg/aarch64/tcg-target-con-set.h36
-rw-r--r--tcg/aarch64/tcg-target-con-str.h24
-rw-r--r--tcg/aarch64/tcg-target.c.inc3028
-rw-r--r--tcg/aarch64/tcg-target.h159
-rw-r--r--tcg/aarch64/tcg-target.opc.h15
-rw-r--r--tcg/arm/tcg-target-con-set.h45
-rw-r--r--tcg/arm/tcg-target-con-str.h25
-rw-r--r--tcg/arm/tcg-target.c.inc3191
-rw-r--r--tcg/arm/tcg-target.h161
-rw-r--r--tcg/arm/tcg-target.opc.h16
-rw-r--r--tcg/i386/tcg-target-con-set.h55
-rw-r--r--tcg/i386/tcg-target-con-str.h33
-rw-r--r--tcg/i386/tcg-target.c.inc3901
-rw-r--r--tcg/i386/tcg-target.h240
-rw-r--r--tcg/i386/tcg-target.opc.h35
-rw-r--r--tcg/meson.build20
-rw-r--r--tcg/mips/tcg-target-con-set.h36
-rw-r--r--tcg/mips/tcg-target-con-str.h24
-rw-r--r--tcg/mips/tcg-target.c.inc2577
-rw-r--r--tcg/mips/tcg-target.h214
-rw-r--r--tcg/optimize.c2213
-rw-r--r--tcg/ppc/tcg-target-con-set.h42
-rw-r--r--tcg/ppc/tcg-target-con-str.h30
-rw-r--r--tcg/ppc/tcg-target.c.inc3907
-rw-r--r--tcg/ppc/tcg-target.h190
-rw-r--r--tcg/ppc/tcg-target.opc.h32
-rw-r--r--tcg/region.c891
-rw-r--r--tcg/riscv/tcg-target-con-set.h30
-rw-r--r--tcg/riscv/tcg-target-con-str.h21
-rw-r--r--tcg/riscv/tcg-target.c.inc1804
-rw-r--r--tcg/riscv/tcg-target.h175
-rw-r--r--tcg/s390x/tcg-target-con-set.h36
-rw-r--r--tcg/s390x/tcg-target-con-str.h29
-rw-r--r--tcg/s390x/tcg-target.c.inc3411
-rw-r--r--tcg/s390x/tcg-target.h186
-rw-r--r--tcg/s390x/tcg-target.opc.h15
-rw-r--r--tcg/sparc/tcg-target-con-set.h32
-rw-r--r--tcg/sparc/tcg-target-con-str.h23
-rw-r--r--tcg/sparc/tcg-target.c.inc1825
-rw-r--r--tcg/sparc/tcg-target.h172
-rw-r--r--tcg/tcg-common.c34
-rw-r--r--tcg/tcg-internal.h62
-rw-r--r--tcg/tcg-ldst.c.inc78
-rw-r--r--tcg/tcg-op-gvec.c3699
-rw-r--r--tcg/tcg-op-vec.c816
-rw-r--r--tcg/tcg-op.c3371
-rw-r--r--tcg/tcg-pool.c.inc162
-rw-r--r--tcg/tcg.c4738
-rw-r--r--tcg/tci.c1401
-rw-r--r--tcg/tci/README120
-rw-r--r--tcg/tci/tcg-target-con-set.h21
-rw-r--r--tcg/tci/tcg-target-con-str.h11
-rw-r--r--tcg/tci/tcg-target.c.inc862
-rw-r--r--tcg/tci/tcg-target.h180
55 files changed, 45238 insertions, 0 deletions
diff --git a/tcg/README b/tcg/README
new file mode 100644
index 000000000..bc15cc3b3
--- /dev/null
+++ b/tcg/README
@@ -0,0 +1,784 @@
+Tiny Code Generator - Fabrice Bellard.
+
+1) Introduction
+
+TCG (Tiny Code Generator) began as a generic backend for a C
+compiler. It was simplified to be used in QEMU. It also has its roots
+in the QOP code generator written by Paul Brook.
+
+2) Definitions
+
+TCG receives RISC-like "TCG ops" and performs some optimizations on them,
+including liveness analysis and trivial constant expression
+evaluation. TCG ops are then implemented in the host CPU back end,
+also known as the TCG "target".
+
+The TCG "target" is the architecture for which we generate the
+code. It is of course not the same as the "target" of QEMU which is
+the emulated architecture. As TCG started as a generic C backend used
+for cross compiling, it is assumed that the TCG target is different
+from the host, although it is never the case for QEMU.
+
+In this document, we use "guest" to specify what architecture we are
+emulating; "target" always means the TCG target, the machine on which
+we are running QEMU.
+
+A TCG "function" corresponds to a QEMU Translated Block (TB).
+
+A TCG "temporary" is a variable only live in a basic
+block. Temporaries are allocated explicitly in each function.
+
+A TCG "local temporary" is a variable only live in a function. Local
+temporaries are allocated explicitly in each function.
+
+A TCG "global" is a variable which is live in all the functions
+(equivalent of a C global variable). They are defined before the
+functions defined. A TCG global can be a memory location (e.g. a QEMU
+CPU register), a fixed host register (e.g. the QEMU CPU state pointer)
+or a memory location which is stored in a register outside QEMU TBs
+(not implemented yet).
+
+A TCG "basic block" corresponds to a list of instructions terminated
+by a branch instruction.
+
+An operation with "undefined behavior" may result in a crash.
+
+An operation with "unspecified behavior" shall not crash. However,
+the result may be one of several possibilities so may be considered
+an "undefined result".
+
+3) Intermediate representation
+
+3.1) Introduction
+
+TCG instructions operate on variables which are temporaries, local
+temporaries or globals. TCG instructions and variables are strongly
+typed. Two types are supported: 32 bit integers and 64 bit
+integers. Pointers are defined as an alias to 32 bit or 64 bit
+integers depending on the TCG target word size.
+
+Each instruction has a fixed number of output variable operands, input
+variable operands and always constant operands.
+
+The notable exception is the call instruction which has a variable
+number of outputs and inputs.
+
+In the textual form, output operands usually come first, followed by
+input operands, followed by constant operands. The output type is
+included in the instruction name. Constants are prefixed with a '$'.
+
+add_i32 t0, t1, t2 (t0 <- t1 + t2)
+
+3.2) Assumptions
+
+* Basic blocks
+
+- Basic blocks end after branches (e.g. brcond_i32 instruction),
+ goto_tb and exit_tb instructions.
+- Basic blocks start after the end of a previous basic block, or at a
+ set_label instruction.
+
+After the end of a basic block, the content of temporaries is
+destroyed, but local temporaries and globals are preserved.
+
+* Floating point types are not supported yet
+
+* Pointers: depending on the TCG target, pointer size is 32 bit or 64
+ bit. The type TCG_TYPE_PTR is an alias to TCG_TYPE_I32 or
+ TCG_TYPE_I64.
+
+* Helpers:
+
+Using the tcg_gen_helper_x_y it is possible to call any function
+taking i32, i64 or pointer types. By default, before calling a helper,
+all globals are stored at their canonical location and it is assumed
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+ either directly or via an exception. They will not be saved to their
+ canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+ They will only be saved to their canonical location before calling helpers,
+ but they won't be reloaded afterwards.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+ the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
+
+On some TCG targets (e.g. x86), several calling conventions are
+supported.
+
+* Branches:
+
+Use the instruction 'br' to jump to a label.
+
+3.3) Code Optimizations
+
+When generating instructions, you can count on at least the following
+optimizations:
+
+- Single instructions are simplified, e.g.
+
+ and_i32 t0, t0, $0xffffffff
+
+ is suppressed.
+
+- A liveness analysis is done at the basic block level. The
+ information is used to suppress moves from a dead variable to
+ another one. It is also used to remove instructions which compute
+ dead results. The later is especially useful for condition code
+ optimization in QEMU.
+
+ In the following example:
+
+ add_i32 t0, t1, t2
+ add_i32 t0, t0, $1
+ mov_i32 t0, $1
+
+ only the last instruction is kept.
+
+3.4) Instruction Reference
+
+********* Function call
+
+* call <ret> <params> ptr
+
+call function 'ptr' (pointer type)
+
+<ret> optional 32 bit or 64 bit return value
+<params> optional 32 bit or 64 bit parameters
+
+********* Jumps/Labels
+
+* set_label $label
+
+Define label 'label' at the current program point.
+
+* br $label
+
+Jump to label.
+
+* brcond_i32/i64 t0, t1, cond, label
+
+Conditional jump if t0 cond t1 is true. cond can be:
+ TCG_COND_EQ
+ TCG_COND_NE
+ TCG_COND_LT /* signed */
+ TCG_COND_GE /* signed */
+ TCG_COND_LE /* signed */
+ TCG_COND_GT /* signed */
+ TCG_COND_LTU /* unsigned */
+ TCG_COND_GEU /* unsigned */
+ TCG_COND_LEU /* unsigned */
+ TCG_COND_GTU /* unsigned */
+
+********* Arithmetic
+
+* add_i32/i64 t0, t1, t2
+
+t0=t1+t2
+
+* sub_i32/i64 t0, t1, t2
+
+t0=t1-t2
+
+* neg_i32/i64 t0, t1
+
+t0=-t1 (two's complement)
+
+* mul_i32/i64 t0, t1, t2
+
+t0=t1*t2
+
+* div_i32/i64 t0, t1, t2
+
+t0=t1/t2 (signed). Undefined behavior if division by zero or overflow.
+
+* divu_i32/i64 t0, t1, t2
+
+t0=t1/t2 (unsigned). Undefined behavior if division by zero.
+
+* rem_i32/i64 t0, t1, t2
+
+t0=t1%t2 (signed). Undefined behavior if division by zero or overflow.
+
+* remu_i32/i64 t0, t1, t2
+
+t0=t1%t2 (unsigned). Undefined behavior if division by zero.
+
+********* Logical
+
+* and_i32/i64 t0, t1, t2
+
+t0=t1&t2
+
+* or_i32/i64 t0, t1, t2
+
+t0=t1|t2
+
+* xor_i32/i64 t0, t1, t2
+
+t0=t1^t2
+
+* not_i32/i64 t0, t1
+
+t0=~t1
+
+* andc_i32/i64 t0, t1, t2
+
+t0=t1&~t2
+
+* eqv_i32/i64 t0, t1, t2
+
+t0=~(t1^t2), or equivalently, t0=t1^~t2
+
+* nand_i32/i64 t0, t1, t2
+
+t0=~(t1&t2)
+
+* nor_i32/i64 t0, t1, t2
+
+t0=~(t1|t2)
+
+* orc_i32/i64 t0, t1, t2
+
+t0=t1|~t2
+
+* clz_i32/i64 t0, t1, t2
+
+t0 = t1 ? clz(t1) : t2
+
+* ctz_i32/i64 t0, t1, t2
+
+t0 = t1 ? ctz(t1) : t2
+
+* ctpop_i32/i64 t0, t1
+
+t0 = number of bits set in t1
+With "ctpop" short for "count population", matching
+the function name used in include/qemu/host-utils.h.
+
+********* Shifts/Rotates
+
+* shl_i32/i64 t0, t1, t2
+
+t0=t1 << t2. Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* shr_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (unsigned). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* sar_i32/i64 t0, t1, t2
+
+t0=t1 >> t2 (signed). Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotl_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the left.
+Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+* rotr_i32/i64 t0, t1, t2
+
+Rotation of t2 bits to the right.
+Unspecified behavior if t2 < 0 or t2 >= 32 (resp 64)
+
+********* Misc
+
+* mov_i32/i64 t0, t1
+
+t0 = t1
+
+Move t1 to t0 (both operands must have the same type).
+
+* ext8s_i32/i64 t0, t1
+ext8u_i32/i64 t0, t1
+ext16s_i32/i64 t0, t1
+ext16u_i32/i64 t0, t1
+ext32s_i64 t0, t1
+ext32u_i64 t0, t1
+
+8, 16 or 32 bit sign/zero extension (both operands must have the same type)
+
+* bswap16_i32/i64 t0, t1, flags
+
+16 bit byte swap on the low bits of a 32/64 bit input.
+If flags & TCG_BSWAP_IZ, then t1 is known to be zero-extended from bit 15.
+If flags & TCG_BSWAP_OZ, then t0 will be zero-extended from bit 15.
+If flags & TCG_BSWAP_OS, then t0 will be sign-extended from bit 15.
+If neither TCG_BSWAP_OZ nor TCG_BSWAP_OS are set, then the bits of
+t0 above bit 15 may contain any value.
+
+* bswap32_i64 t0, t1, flags
+
+32 bit byte swap on a 64-bit value. The flags are the same as for bswap16,
+except they apply from bit 31 instead of bit 15.
+
+* bswap32_i32 t0, t1, flags
+* bswap64_i64 t0, t1, flags
+
+32/64 bit byte swap. The flags are ignored, but still present
+for consistency with the other bswap opcodes.
+
+* discard_i32/i64 t0
+
+Indicate that the value of t0 won't be used later. It is useful to
+force dead code elimination.
+
+* deposit_i32/i64 dest, t1, t2, pos, len
+
+Deposit T2 as a bitfield into T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values:
+
+ LEN - the length of the bitfield
+ POS - the position of the first bit, counting from the LSB
+
+For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field
+at bit 8. This operation would be equivalent to
+
+ dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
+
+* extract_i32/i64 dest, t1, pos, len
+* sextract_i32/i64 dest, t1, pos, len
+
+Extract a bitfield from T1, placing the result in DEST.
+The bitfield is described by POS/LEN, which are immediate values,
+as above for deposit. For extract_*, the result will be extended
+to the left with zeros; for sextract_*, the result will be extended
+to the left with copies of the bitfield sign bit at pos + len - 1.
+
+For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field
+at bit 8. This operation would be equivalent to
+
+ dest = (t1 << 20) >> 28
+
+(using an arithmetic right shift).
+
+* extract2_i32/i64 dest, t1, t2, pos
+
+For N = {32,64}, extract an N-bit quantity from the concatenation
+of t2:t1, beginning at pos. The tcg_gen_extract2_{i32,i64} expander
+accepts 0 <= pos <= N as inputs. The backend code generator will
+not see either 0 or N as inputs for these opcodes.
+
+* extrl_i64_i32 t0, t1
+
+For 64-bit hosts only, extract the low 32-bits of input T1 and place it
+into 32-bit output T0. Depending on the host, this may be a simple move,
+or may require additional canonicalization.
+
+* extrh_i64_i32 t0, t1
+
+For 64-bit hosts only, extract the high 32-bits of input T1 and place it
+into 32-bit output T0. Depending on the host, this may be a simple shift,
+or may require additional canonicalization.
+
+********* Conditional moves
+
+* setcond_i32/i64 dest, t1, t2, cond
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
+********* Type conversions
+
+* ext_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does sign extension
+
+* extu_i32_i64 t0, t1
+Convert t1 (32 bit) to t0 (64 bit) and does zero extension
+
+* trunc_i64_i32 t0, t1
+Truncate t1 (64 bit) to t0 (32 bit)
+
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
+* concat32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (64 bit) and the high half
+from t2 (64 bit).
+
+********* Load/Store
+
+* ld_i32/i64 t0, t1, offset
+ld8s_i32/i64 t0, t1, offset
+ld8u_i32/i64 t0, t1, offset
+ld16s_i32/i64 t0, t1, offset
+ld16u_i32/i64 t0, t1, offset
+ld32s_i64 t0, t1, offset
+ld32u_i64 t0, t1, offset
+
+t0 = read(t1 + offset)
+Load 8, 16, 32 or 64 bits with or without sign extension from host memory.
+offset must be a constant.
+
+* st_i32/i64 t0, t1, offset
+st8_i32/i64 t0, t1, offset
+st16_i32/i64 t0, t1, offset
+st32_i64 t0, t1, offset
+
+write(t0, t1 + offset)
+Write 8, 16, 32 or 64 bits to host memory.
+
+All this opcodes assume that the pointed host memory doesn't correspond
+to a global. In the latter case the behaviour is unpredictable.
+
+********* Multiword arithmetic support
+
+* add2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+* sub2_i32/i64 t0_low, t0_high, t1_low, t1_high, t2_low, t2_high
+
+Similar to add/sub, except that the double-word inputs T1 and T2 are
+formed from two single-word arguments, and the double-word output T0
+is returned in two single-word outputs.
+
+* mulu2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mul, except two unsigned inputs T1 and T2 yielding the full
+double-word product T0. The later is returned in two single-word outputs.
+
+* muls2_i32/i64 t0_low, t0_high, t1, t2
+
+Similar to mulu2, except the two inputs T1 and T2 are signed.
+
+* mulsh_i32/i64 t0, t1, t2
+* muluh_i32/i64 t0, t1, t2
+
+Provide the high part of a signed or unsigned multiply, respectively.
+If mulu2/muls2 are not provided by the backend, the tcg-op generator
+can obtain the same results can be obtained by emitting a pair of
+opcodes, mul+muluh/mulsh.
+
+********* Memory Barrier support
+
+* mb <$arg>
+
+Generate a target memory barrier instruction to ensure memory ordering as being
+enforced by a corresponding guest memory barrier instruction. The ordering
+enforced by the backend may be stricter than the ordering required by the guest.
+It cannot be weaker. This opcode takes a constant argument which is required to
+generate the appropriate barrier instruction. The backend should take care to
+emit the target barrier instruction only when necessary i.e., for SMP guests and
+when MTTCG is enabled.
+
+The guest translators should generate this opcode for all guest instructions
+which have ordering side effects.
+
+Please see docs/devel/atomics.rst for more information on memory barriers.
+
+********* 64-bit guest on 32-bit host support
+
+The following opcodes are internal to TCG. Thus they are to be implemented by
+32-bit host code generators, but are not to be emitted by guest translators.
+They are emitted as needed by inline functions within "tcg-op.h".
+
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
+********* QEMU specific operations
+
+* exit_tb t0
+
+Exit the current TB and return the value t0 (word type).
+
+* goto_tb index
+
+Exit the current TB and jump to the TB index 'index' (constant) if the
+current TB was linked to this TB. Otherwise execute the next
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
+
+* lookup_and_goto_ptr tb_addr
+
+Look up a TB address ('tb_addr') and jump to it if valid. If not valid,
+jump to the TCG epilogue to go back to the exec loop.
+
+This operation is optional. If the TCG backend does not implement the
+goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
+
+* qemu_ld_i32/i64 t0, t1, flags, memidx
+* qemu_st_i32/i64 t0, t1, flags, memidx
+* qemu_st8_i32 t0, t1, flags, memidx
+
+Load data at the guest address t1 into t0, or store data in t0 at guest
+address t1. The _i32/_i64 size applies to the size of the input/output
+register t0 only. The address t1 is always sized according to the guest,
+and the width of the memory operation is controlled by flags.
+
+Both t0 and t1 may be split into little-endian ordered pairs of registers
+if dealing with 64-bit quantities on a 32-bit host.
+
+The memidx selects the qemu tlb index to use (e.g. user or kernel access).
+The flags are the MemOp bits, selecting the sign, width, and endianness
+of the memory access.
+
+For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
+64-bit memory access specified in flags.
+
+For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
+the memory operation is known to be 8-bit. This allows the backend to
+provide a different set of register constraints.
+
+********* Host vector operations
+
+All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
+The former specifies the length of the vector in log2 64-bit units; the
+later specifies the length of the element (if applicable) in log2 8-bit units.
+E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
+
+* mov_vec v0, v1
+* ld_vec v0, t1
+* st_vec v0, t1
+
+ Move, load and store.
+
+* dup_vec v0, r1
+
+ Duplicate the low N bits of R1 into VECL/VECE copies across V0.
+
+* dupi_vec v0, c
+
+ Similarly, for a constant.
+ Smaller values will be replicated to host register size by the expanders.
+
+* dup2_vec v0, r1, r2
+
+ Duplicate r2:r1 into VECL/64 copies across V0. This opcode is
+ only present for 32-bit hosts.
+
+* add_vec v0, v1, v2
+
+ v0 = v1 + v2, in elements across the vector.
+
+* sub_vec v0, v1, v2
+
+ Similarly, v0 = v1 - v2.
+
+* mul_vec v0, v1, v2
+
+ Similarly, v0 = v1 * v2.
+
+* neg_vec v0, v1
+
+ Similarly, v0 = -v1.
+
+* abs_vec v0, v1
+
+ Similarly, v0 = v1 < 0 ? -v1 : v1, in elements across the vector.
+
+* smin_vec:
+* umin_vec:
+
+ Similarly, v0 = MIN(v1, v2), for signed and unsigned element types.
+
+* smax_vec:
+* umax_vec:
+
+ Similarly, v0 = MAX(v1, v2), for signed and unsigned element types.
+
+* ssadd_vec:
+* sssub_vec:
+* usadd_vec:
+* ussub_vec:
+
+ Signed and unsigned saturating addition and subtraction. If the true
+ result is not representable within the element type, the element is
+ set to the minimum or maximum value for the type.
+
+* and_vec v0, v1, v2
+* or_vec v0, v1, v2
+* xor_vec v0, v1, v2
+* andc_vec v0, v1, v2
+* orc_vec v0, v1, v2
+* not_vec v0, v1
+
+ Similarly, logical operations with and without complement.
+ Note that VECE is unused.
+
+* shli_vec v0, v1, i2
+* shls_vec v0, v1, s2
+
+ Shift all elements from v1 by a scalar i2/s2. I.e.
+
+ for (i = 0; i < VECL/VECE; ++i) {
+ v0[i] = v1[i] << s2;
+ }
+
+* shri_vec v0, v1, i2
+* sari_vec v0, v1, i2
+* rotli_vec v0, v1, i2
+* shrs_vec v0, v1, s2
+* sars_vec v0, v1, s2
+
+ Similarly for logical and arithmetic right shift, and left rotate.
+
+* shlv_vec v0, v1, v2
+
+ Shift elements from v1 by elements from v2. I.e.
+
+ for (i = 0; i < VECL/VECE; ++i) {
+ v0[i] = v1[i] << v2[i];
+ }
+
+* shrv_vec v0, v1, v2
+* sarv_vec v0, v1, v2
+* rotlv_vec v0, v1, v2
+* rotrv_vec v0, v1, v2
+
+ Similarly for logical and arithmetic right shift, and rotates.
+
+* cmp_vec v0, v1, v2, cond
+
+ Compare vectors by element, storing -1 for true and 0 for false.
+
+* bitsel_vec v0, v1, v2, v3
+
+ Bitwise select, v0 = (v2 & v1) | (v3 & ~v1), across the entire vector.
+
+* cmpsel_vec v0, c1, c2, v3, v4, cond
+
+ Select elements based on comparison results:
+ for (i = 0; i < n; ++i) {
+ v0[i] = (c1[i] cond c2[i]) ? v3[i] : v4[i].
+ }
+
+*********
+
+Note 1: Some shortcuts are defined when the last operand is known to be
+a constant (e.g. addi for add, movi for mov).
+
+Note 2: When using TCG, the opcodes must never be generated directly
+as some of them may not be available as "real" opcodes. Always use the
+function tcg_gen_xxx(args).
+
+4) Backend
+
+tcg-target.h contains the target specific definitions. tcg-target.c.inc
+contains the target specific code; it is #included by tcg/tcg.c, rather
+than being a standalone C file.
+
+4.1) Assumptions
+
+The target word size (TCG_TARGET_REG_BITS) is expected to be 32 bit or
+64 bit. It is expected that the pointer has the same size as the word.
+
+On a 32 bit target, all 64 bit operations are converted to 32 bits. A
+few specific operations must be implemented to allow it (see add2_i32,
+sub2_i32, brcond2_i32).
+
+On a 64 bit target, the values are transferred between 32 and 64-bit
+registers using the following ops:
+- trunc_shr_i64_i32
+- ext_i32_i64
+- extu_i32_i64
+
+They ensure that the values are correctly truncated or extended when
+moved from a 32-bit to a 64-bit register or vice-versa. Note that the
+trunc_shr_i64_i32 is an optional op. It is not necessary to implement
+it if all the following conditions are met:
+- 64-bit registers can hold 32-bit values
+- 32-bit values in a 64-bit register do not need to stay zero or
+ sign extended
+- all 32-bit TCG ops ignore the high part of 64-bit registers
+
+Floating point operations are not supported in this version. A
+previous incarnation of the code generator had full support of them,
+but it is better to concentrate on integer operations first.
+
+4.2) Constraints
+
+GCC like constraints are used to define the constraints of every
+instruction. Memory constraints are not supported in this
+version. Aliases are specified in the input operands as for GCC.
+
+The same register may be used for both an input and an output, even when
+they are not explicitly aliased. If an op expands to multiple target
+instructions then care must be taken to avoid clobbering input values.
+GCC style "early clobber" outputs are supported, with '&'.
+
+A target can define specific register or constant constraints. If an
+operation uses a constant input constraint which does not allow all
+constants, it must also accept registers in order to have a fallback.
+The constraint 'i' is defined generically to accept any constant.
+The constraint 'r' is not defined generically, but is consistently
+used by each backend to indicate all registers.
+
+The movi_i32 and movi_i64 operations must accept any constants.
+
+The mov_i32 and mov_i64 operations must accept any registers of the
+same type.
+
+The ld/st/sti instructions must accept signed 32 bit constant offsets.
+This can be implemented by reserving a specific register in which to
+compute the address if the offset is too big.
+
+The ld/st instructions must accept any destination (ld) or source (st)
+register.
+
+The sti instruction may fail if it cannot store the given constant.
+
+4.3) Function call assumptions
+
+- The only supported types for parameters and return value are: 32 and
+ 64 bit integers and pointer.
+- The stack grows downwards.
+- The first N parameters are passed in registers.
+- The next parameters are passed on the stack by storing them as words.
+- Some registers are clobbered during the call.
+- The function can return 0 or 1 value in registers. On a 32 bit
+ target, functions must be able to return 2 values in registers for
+ 64 bit return type.
+
+5) Recommended coding rules for best performance
+
+- Use globals to represent the parts of the QEMU CPU state which are
+ often modified, e.g. the integer registers and the condition
+ codes. TCG will be able to use host registers to store them.
+
+- Avoid globals stored in fixed registers. They must be used only to
+ store the pointer to the CPU state and possibly to store a pointer
+ to a register window.
+
+- Use temporaries. Use local temporaries only when really needed,
+ e.g. when you need to use a value after a jump. Local temporaries
+ introduce a performance hit in the current TCG implementation: their
+ content is saved to memory at end of each basic block.
+
+- Free temporaries and local temporaries when they are no longer used
+ (tcg_temp_free). Since tcg_const_x() also creates a temporary, you
+ should free it after it is used. Freeing temporaries does not yield
+ a better generated code, but it reduces the memory usage of TCG and
+ the speed of the translation.
+
+- Don't hesitate to use helpers for complicated or seldom used guest
+ instructions. There is little performance advantage in using TCG to
+ implement guest instructions taking more than about twenty TCG
+ instructions. Note that this rule of thumb is more applicable to
+ helpers doing complex logic or arithmetic, where the C compiler has
+ scope to do a good job of optimisation; it is less relevant where
+ the instruction is mostly doing loads and stores, and in those cases
+ inline TCG may still be faster for longer sequences.
+
+- The hard limit on the number of TCG instructions you can generate
+ per guest instruction is set by MAX_OP_PER_INSTR in exec-all.h --
+ you cannot exceed this without risking a buffer overrun.
+
+- Use the 'discard' instruction if you know that TCG won't be able to
+ prove that a given global is "dead" at a given program point. The
+ x86 guest uses it to improve the condition codes optimisation.
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
new file mode 100644
index 000000000..d6c686687
--- /dev/null
+++ b/tcg/aarch64/tcg-target-con-set.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define AArch64 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(lZ, l)
+C_O0_I2(r, rA)
+C_O0_I2(rZ, r)
+C_O0_I2(w, r)
+C_O1_I1(r, l)
+C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
+C_O1_I1(w, wr)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, rA)
+C_O1_I2(r, r, rAL)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rL)
+C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, 0, w)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wN)
+C_O1_I2(w, w, wO)
+C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
+C_O1_I4(r, r, rA, rZ, rZ)
+C_O2_I4(r, r, rZ, rZ, rA, rMZ)
diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h
new file mode 100644
index 000000000..00adb6459
--- /dev/null
+++ b/tcg/aarch64/tcg-target-con-str.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define AArch64 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('l', ALL_QLDST_REGS)
+REGS('w', ALL_VECTOR_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('A', TCG_CT_CONST_AIMM)
+CONST('L', TCG_CT_CONST_LIMM)
+CONST('M', TCG_CT_CONST_MONE)
+CONST('O', TCG_CT_CONST_ORRI)
+CONST('N', TCG_CT_CONST_ANDI)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
new file mode 100644
index 000000000..5edca8d44
--- /dev/null
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -0,0 +1,3028 @@
+/*
+ * Initial TCG Implementation for aarch64
+ *
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ * Written by Claudio Fontana
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ */
+
+#include "../tcg-pool.c.inc"
+#include "qemu/bitops.h"
+
+/* We're going to re-use TCGType in setting of the SF bit, which controls
+ the size of the operation performed. If we know the values match, it
+ makes things much cleaner. */
+QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+ "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
+
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
+};
+#endif /* CONFIG_DEBUG_TCG */
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
+ TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
+ TCG_REG_X28, /* we will reserve this for guest_base if configured */
+
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X16, TCG_REG_X17,
+
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
+
+ /* X18 reserved by system */
+ /* X19 reserved for AREG0 */
+ /* X29 reserved as fp */
+ /* X30 reserved as temporary */
+
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ /* V8 - V15 are call-saved, and skipped. */
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+};
+
+static const int tcg_target_call_iarg_regs[8] = {
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
+};
+static const int tcg_target_call_oarg_regs[1] = {
+ TCG_REG_X0
+};
+
+#define TCG_REG_TMP TCG_REG_X30
+#define TCG_VEC_TMP TCG_REG_V31
+
+#ifndef CONFIG_SOFTMMU
+/* Note that XZR cannot be encoded in the address base register slot,
+ as that actaully encodes SP. So if we need to zero-extend the guest
+ address, via the address index register slot, we need to load even
+ a zero guest base into a register. */
+#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
+#define TCG_REG_GUEST_BASE TCG_REG_X28
+#endif
+
+static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = target - src_rx;
+
+ if (offset == sextract64(offset, 0, 26)) {
+ /* read instruction, mask away previous PC_REL26 parameter contents,
+ set the proper offset, then write back the instruction. */
+ *src_rw = deposit32(*src_rw, 0, 26, offset);
+ return true;
+ }
+ return false;
+}
+
+static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = target - src_rx;
+
+ if (offset == sextract64(offset, 0, 19)) {
+ *src_rw = deposit32(*src_rw, 5, 19, offset);
+ return true;
+ }
+ return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_debug_assert(addend == 0);
+ switch (type) {
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
+ case R_AARCH64_CONDBR19:
+ return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+#define TCG_CT_CONST_AIMM 0x100
+#define TCG_CT_CONST_LIMM 0x200
+#define TCG_CT_CONST_ZERO 0x400
+#define TCG_CT_CONST_MONE 0x800
+#define TCG_CT_CONST_ORRI 0x1000
+#define TCG_CT_CONST_ANDI 0x2000
+
+#define ALL_GENERAL_REGS 0xffffffffu
+#define ALL_VECTOR_REGS 0xffffffff00000000ull
+
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLDST_REGS \
+ (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
+ (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
+#else
+#define ALL_QLDST_REGS ALL_GENERAL_REGS
+#endif
+
+/* Match a constant valid for addition (12-bit, optionally shifted). */
+static inline bool is_aimm(uint64_t val)
+{
+ return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
+}
+
+/* Match a constant valid for logical operations. */
+static inline bool is_limm(uint64_t val)
+{
+ /* Taking a simplified view of the logical immediates for now, ignoring
+ the replication that can happen across the field. Match bit patterns
+ of the forms
+ 0....01....1
+ 0..01..10..0
+ and their inverses. */
+
+ /* Make things easier below, by testing the form with msb clear. */
+ if ((int64_t)val < 0) {
+ val = ~val;
+ }
+ if (val == 0) {
+ return false;
+ }
+ val += val & -val;
+ return (val & (val - 1)) == 0;
+}
+
+/* Return true if v16 is a valid 16-bit shifted immediate. */
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
+{
+ if (v16 == (v16 & 0xff)) {
+ *cmode = 0x8;
+ *imm8 = v16 & 0xff;
+ return true;
+ } else if (v16 == (v16 & 0xff00)) {
+ *cmode = 0xa;
+ *imm8 = v16 >> 8;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifted immediate. */
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == (v32 & 0xff)) {
+ *cmode = 0x0;
+ *imm8 = v32 & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff00)) {
+ *cmode = 0x2;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff0000)) {
+ *cmode = 0x4;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff000000)) {
+ *cmode = 0x6;
+ *imm8 = v32 >> 24;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifting ones immediate. */
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if ((v32 & 0xffff00ff) == 0xff) {
+ *cmode = 0xc;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if ((v32 & 0xff00ffff) == 0xffff) {
+ *cmode = 0xd;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid float32 immediate. */
+static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if (extract32(v32, 0, 19) == 0
+ && (extract32(v32, 25, 6) == 0x20
+ || extract32(v32, 25, 6) == 0x1f)) {
+ *cmode = 0xf;
+ *imm8 = (extract32(v32, 31, 1) << 7)
+ | (extract32(v32, 25, 1) << 6)
+ | extract32(v32, 19, 6);
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v64 is a valid float64 immediate. */
+static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
+{
+ if (extract64(v64, 0, 48) == 0
+ && (extract64(v64, 54, 9) == 0x100
+ || extract64(v64, 54, 9) == 0x0ff)) {
+ *cmode = 0xf;
+ *imm8 = (extract64(v64, 63, 1) << 7)
+ | (extract64(v64, 54, 1) << 6)
+ | extract64(v64, 48, 6);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Return non-zero if v32 can be formed by MOVI+ORR.
+ * Place the parameters for MOVI in (cmode, imm8).
+ * Return the cmode for ORR; the imm8 can be had via extraction from v32.
+ */
+static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
+{
+ int i;
+
+ for (i = 6; i > 0; i -= 2) {
+ /* Mask out one byte we can add with ORR. */
+ uint32_t tmp = v32 & ~(0xffu << (i * 4));
+ if (is_shimm32(tmp, cmode, imm8) ||
+ is_soimm32(tmp, cmode, imm8)) {
+ break;
+ }
+ }
+ return i;
+}
+
+/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
+static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == deposit32(v32, 16, 16, v32)) {
+ return is_shimm16(v32, cmode, imm8);
+ } else {
+ return is_shimm32(v32, cmode, imm8);
+ }
+}
+
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+ if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
+ }
+
+ switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
+ case 0:
+ break;
+ case TCG_CT_CONST_ANDI:
+ val = ~val;
+ /* fallthru */
+ case TCG_CT_CONST_ORRI:
+ if (val == deposit64(val, 32, 32, val)) {
+ int cmode, imm8;
+ return is_shimm1632(val, &cmode, &imm8);
+ }
+ break;
+ default:
+ /* Both bits should not be set for the same insn. */
+ g_assert_not_reached();
+ }
+
+ return 0;
+}
+
+enum aarch64_cond_code {
+ COND_EQ = 0x0,
+ COND_NE = 0x1,
+ COND_CS = 0x2, /* Unsigned greater or equal */
+ COND_HS = COND_CS, /* ALIAS greater or equal */
+ COND_CC = 0x3, /* Unsigned less than */
+ COND_LO = COND_CC, /* ALIAS Lower */
+ COND_MI = 0x4, /* Negative */
+ COND_PL = 0x5, /* Zero or greater */
+ COND_VS = 0x6, /* Overflow */
+ COND_VC = 0x7, /* No overflow */
+ COND_HI = 0x8, /* Unsigned greater than */
+ COND_LS = 0x9, /* Unsigned less or equal */
+ COND_GE = 0xa,
+ COND_LT = 0xb,
+ COND_GT = 0xc,
+ COND_LE = 0xd,
+ COND_AL = 0xe,
+ COND_NV = 0xf, /* behaves like COND_AL here */
+};
+
+static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
+ [TCG_COND_EQ] = COND_EQ,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_LT,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_GT,
+ /* unsigned */
+ [TCG_COND_LTU] = COND_LO,
+ [TCG_COND_GTU] = COND_HI,
+ [TCG_COND_GEU] = COND_HS,
+ [TCG_COND_LEU] = COND_LS,
+};
+
+typedef enum {
+ LDST_ST = 0, /* store */
+ LDST_LD = 1, /* load */
+ LDST_LD_S_X = 2, /* load and sign-extend into Xt */
+ LDST_LD_S_W = 3, /* load and sign-extend into Wt */
+} AArch64LdstType;
+
+/* We encode the format of the insn into the beginning of the name, so that
+ we can have the preprocessor help "typecheck" the insn vs the output
+ function. Arm didn't provide us with nice names for the formats, so we
+ use the section number of the architecture reference manual in which the
+ instruction group is described. */
+typedef enum {
+ /* Compare and branch (immediate). */
+ I3201_CBZ = 0x34000000,
+ I3201_CBNZ = 0x35000000,
+
+ /* Conditional branch (immediate). */
+ I3202_B_C = 0x54000000,
+
+ /* Unconditional branch (immediate). */
+ I3206_B = 0x14000000,
+ I3206_BL = 0x94000000,
+
+ /* Unconditional branch (register). */
+ I3207_BR = 0xd61f0000,
+ I3207_BLR = 0xd63f0000,
+ I3207_RET = 0xd65f0000,
+
+ /* AdvSIMD load/store single structure. */
+ I3303_LD1R = 0x0d40c000,
+
+ /* Load literal for loading the address at pc-relative offset */
+ I3305_LDR = 0x58000000,
+ I3305_LDR_v64 = 0x5c000000,
+ I3305_LDR_v128 = 0x9c000000,
+
+ /* Load/store register. Described here as 3.3.12, but the helper
+ that emits them can transform to 3.3.10 or 3.3.13. */
+ I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
+ I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
+ I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
+ I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
+
+ I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
+ I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
+ I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
+ I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
+
+ I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
+ I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
+
+ I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
+ I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
+ I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
+
+ I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
+ I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
+
+ I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
+ I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
+
+ I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
+ I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
+
+ I3312_TO_I3310 = 0x00200800,
+ I3312_TO_I3313 = 0x01000000,
+
+ /* Load/store register pair instructions. */
+ I3314_LDP = 0x28400000,
+ I3314_STP = 0x28000000,
+
+ /* Add/subtract immediate instructions. */
+ I3401_ADDI = 0x11000000,
+ I3401_ADDSI = 0x31000000,
+ I3401_SUBI = 0x51000000,
+ I3401_SUBSI = 0x71000000,
+
+ /* Bitfield instructions. */
+ I3402_BFM = 0x33000000,
+ I3402_SBFM = 0x13000000,
+ I3402_UBFM = 0x53000000,
+
+ /* Extract instruction. */
+ I3403_EXTR = 0x13800000,
+
+ /* Logical immediate instructions. */
+ I3404_ANDI = 0x12000000,
+ I3404_ORRI = 0x32000000,
+ I3404_EORI = 0x52000000,
+
+ /* Move wide immediate instructions. */
+ I3405_MOVN = 0x12800000,
+ I3405_MOVZ = 0x52800000,
+ I3405_MOVK = 0x72800000,
+
+ /* PC relative addressing instructions. */
+ I3406_ADR = 0x10000000,
+ I3406_ADRP = 0x90000000,
+
+ /* Add/subtract shifted register instructions (without a shift). */
+ I3502_ADD = 0x0b000000,
+ I3502_ADDS = 0x2b000000,
+ I3502_SUB = 0x4b000000,
+ I3502_SUBS = 0x6b000000,
+
+ /* Add/subtract shifted register instructions (with a shift). */
+ I3502S_ADD_LSL = I3502_ADD,
+
+ /* Add/subtract with carry instructions. */
+ I3503_ADC = 0x1a000000,
+ I3503_SBC = 0x5a000000,
+
+ /* Conditional select instructions. */
+ I3506_CSEL = 0x1a800000,
+ I3506_CSINC = 0x1a800400,
+ I3506_CSINV = 0x5a800000,
+ I3506_CSNEG = 0x5a800400,
+
+ /* Data-processing (1 source) instructions. */
+ I3507_CLZ = 0x5ac01000,
+ I3507_RBIT = 0x5ac00000,
+ I3507_REV = 0x5ac00000, /* + size << 10 */
+
+ /* Data-processing (2 source) instructions. */
+ I3508_LSLV = 0x1ac02000,
+ I3508_LSRV = 0x1ac02400,
+ I3508_ASRV = 0x1ac02800,
+ I3508_RORV = 0x1ac02c00,
+ I3508_SMULH = 0x9b407c00,
+ I3508_UMULH = 0x9bc07c00,
+ I3508_UDIV = 0x1ac00800,
+ I3508_SDIV = 0x1ac00c00,
+
+ /* Data-processing (3 source) instructions. */
+ I3509_MADD = 0x1b000000,
+ I3509_MSUB = 0x1b008000,
+
+ /* Logical shifted register instructions (without a shift). */
+ I3510_AND = 0x0a000000,
+ I3510_BIC = 0x0a200000,
+ I3510_ORR = 0x2a000000,
+ I3510_ORN = 0x2a200000,
+ I3510_EOR = 0x4a000000,
+ I3510_EON = 0x4a200000,
+ I3510_ANDS = 0x6a000000,
+
+ /* Logical shifted register instructions (with a shift). */
+ I3502S_AND_LSR = I3510_AND | (1 << 22),
+
+ /* AdvSIMD copy */
+ I3605_DUP = 0x0e000400,
+ I3605_INS = 0x4e001c00,
+ I3605_UMOV = 0x0e003c00,
+
+ /* AdvSIMD modified immediate */
+ I3606_MOVI = 0x0f000400,
+ I3606_MVNI = 0x2f000400,
+ I3606_BIC = 0x2f001400,
+ I3606_ORR = 0x0f001400,
+
+ /* AdvSIMD scalar shift by immediate */
+ I3609_SSHR = 0x5f000400,
+ I3609_SSRA = 0x5f001400,
+ I3609_SHL = 0x5f005400,
+ I3609_USHR = 0x7f000400,
+ I3609_USRA = 0x7f001400,
+ I3609_SLI = 0x7f005400,
+
+ /* AdvSIMD scalar three same */
+ I3611_SQADD = 0x5e200c00,
+ I3611_SQSUB = 0x5e202c00,
+ I3611_CMGT = 0x5e203400,
+ I3611_CMGE = 0x5e203c00,
+ I3611_SSHL = 0x5e204400,
+ I3611_ADD = 0x5e208400,
+ I3611_CMTST = 0x5e208c00,
+ I3611_UQADD = 0x7e200c00,
+ I3611_UQSUB = 0x7e202c00,
+ I3611_CMHI = 0x7e203400,
+ I3611_CMHS = 0x7e203c00,
+ I3611_USHL = 0x7e204400,
+ I3611_SUB = 0x7e208400,
+ I3611_CMEQ = 0x7e208c00,
+
+ /* AdvSIMD scalar two-reg misc */
+ I3612_CMGT0 = 0x5e208800,
+ I3612_CMEQ0 = 0x5e209800,
+ I3612_CMLT0 = 0x5e20a800,
+ I3612_ABS = 0x5e20b800,
+ I3612_CMGE0 = 0x7e208800,
+ I3612_CMLE0 = 0x7e209800,
+ I3612_NEG = 0x7e20b800,
+
+ /* AdvSIMD shift by immediate */
+ I3614_SSHR = 0x0f000400,
+ I3614_SSRA = 0x0f001400,
+ I3614_SHL = 0x0f005400,
+ I3614_SLI = 0x2f005400,
+ I3614_USHR = 0x2f000400,
+ I3614_USRA = 0x2f001400,
+
+ /* AdvSIMD three same. */
+ I3616_ADD = 0x0e208400,
+ I3616_AND = 0x0e201c00,
+ I3616_BIC = 0x0e601c00,
+ I3616_BIF = 0x2ee01c00,
+ I3616_BIT = 0x2ea01c00,
+ I3616_BSL = 0x2e601c00,
+ I3616_EOR = 0x2e201c00,
+ I3616_MUL = 0x0e209c00,
+ I3616_ORR = 0x0ea01c00,
+ I3616_ORN = 0x0ee01c00,
+ I3616_SUB = 0x2e208400,
+ I3616_CMGT = 0x0e203400,
+ I3616_CMGE = 0x0e203c00,
+ I3616_CMTST = 0x0e208c00,
+ I3616_CMHI = 0x2e203400,
+ I3616_CMHS = 0x2e203c00,
+ I3616_CMEQ = 0x2e208c00,
+ I3616_SMAX = 0x0e206400,
+ I3616_SMIN = 0x0e206c00,
+ I3616_SSHL = 0x0e204400,
+ I3616_SQADD = 0x0e200c00,
+ I3616_SQSUB = 0x0e202c00,
+ I3616_UMAX = 0x2e206400,
+ I3616_UMIN = 0x2e206c00,
+ I3616_UQADD = 0x2e200c00,
+ I3616_UQSUB = 0x2e202c00,
+ I3616_USHL = 0x2e204400,
+
+ /* AdvSIMD two-reg misc. */
+ I3617_CMGT0 = 0x0e208800,
+ I3617_CMEQ0 = 0x0e209800,
+ I3617_CMLT0 = 0x0e20a800,
+ I3617_CMGE0 = 0x2e208800,
+ I3617_CMLE0 = 0x2e209800,
+ I3617_NOT = 0x2e205800,
+ I3617_ABS = 0x0e20b800,
+ I3617_NEG = 0x2e20b800,
+
+ /* System instructions. */
+ NOP = 0xd503201f,
+ DMB_ISH = 0xd50338bf,
+ DMB_LD = 0x00000100,
+ DMB_ST = 0x00000200,
+} AArch64Insn;
+
+static inline uint32_t tcg_in32(TCGContext *s)
+{
+ uint32_t v = *(uint32_t *)s->code_ptr;
+ return v;
+}
+
+/* Emit an opcode with "type-checking" of the format. */
+#define tcg_out_insn(S, FMT, OP, ...) \
+ glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
+
+static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rt, TCGReg rn, unsigned size)
+{
+ tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
+}
+
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
+ int imm19, TCGReg rt)
+{
+ tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
+}
+
+static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rt, int imm19)
+{
+ tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
+}
+
+static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
+ TCGCond c, int imm19)
+{
+ tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
+}
+
+static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
+{
+ tcg_out32(s, insn | (imm26 & 0x03ffffff));
+}
+
+static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
+{
+ tcg_out32(s, insn | rn << 5);
+}
+
+static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
+ TCGReg r1, TCGReg r2, TCGReg rn,
+ tcg_target_long ofs, bool pre, bool w)
+{
+ insn |= 1u << 31; /* ext */
+ insn |= pre << 24;
+ insn |= w << 23;
+
+ tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
+ insn |= (ofs & (0x7f << 3)) << (15 - 3);
+
+ tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
+}
+
+static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, uint64_t aimm)
+{
+ if (aimm > 0xfff) {
+ tcg_debug_assert((aimm & 0xfff) == 0);
+ aimm >>= 12;
+ tcg_debug_assert(aimm <= 0xfff);
+ aimm |= 1 << 12; /* apply LSL 12 */
+ }
+ tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
+}
+
+/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
+ (Logical immediate). Both insn groups have N, IMMR and IMMS fields
+ that feed the DecodeBitMasks pseudo function. */
+static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, int n, int immr, int imms)
+{
+ tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
+ | rn << 5 | rd);
+}
+
+#define tcg_out_insn_3404 tcg_out_insn_3402
+
+static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, int imms)
+{
+ tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
+ | rn << 5 | rd);
+}
+
+/* This function is used for the Move (wide immediate) instruction group.
+ Note that SHIFT is a full shift count, not the 2 bit HW field. */
+static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, uint16_t half, unsigned shift)
+{
+ tcg_debug_assert((shift & ~0x30) == 0);
+ tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
+}
+
+static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, int64_t disp)
+{
+ tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
+}
+
+/* This function is for both 3.5.2 (Add/Subtract shifted register), for
+ the rare occasion when we actually want to supply a shift amount. */
+static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
+ TCGType ext, TCGReg rd, TCGReg rn,
+ TCGReg rm, int imm6)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
+}
+
+/* This function is for 3.5.2 (Add/subtract shifted register),
+ and 3.5.10 (Logical shifted register), for the vast majorty of cases
+ when we don't want to apply a shift. Thus it can also be used for
+ 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
+static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
+}
+
+#define tcg_out_insn_3503 tcg_out_insn_3502
+#define tcg_out_insn_3508 tcg_out_insn_3502
+#define tcg_out_insn_3510 tcg_out_insn_3502
+
+static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
+ | tcg_cond_to_aarch64[c] << 12);
+}
+
+static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn)
+{
+ tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
+}
+
+static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
+{
+ tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
+}
+
+static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
+{
+ /* Note that bit 11 set means general register input. Therefore
+ we can handle both register sets with one function. */
+ tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
+ | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
+}
+
+static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, bool op, int cmode, uint8_t imm8)
+{
+ tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
+ | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
+}
+
+static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, unsigned immhb)
+{
+ tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
+ unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
+ unsigned size, TCGReg rd, TCGReg rn)
+{
+ tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
+ TCGReg rd, TCGReg rn, unsigned immhb)
+{
+ tcg_out32(s, insn | q << 30 | immhb << 16
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
+ unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
+ unsigned size, TCGReg rd, TCGReg rn)
+{
+ tcg_out32(s, insn | q << 30 | (size << 22)
+ | (rn & 0x1f) << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg base, TCGType ext,
+ TCGReg regoff)
+{
+ /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
+ tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
+ 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, intptr_t offset)
+{
+ tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
+}
+
+static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
+ TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
+{
+ /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
+ tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
+ | rn << 5 | (rd & 0x1f));
+}
+
+/* Register to register move using ORR (shifted register with no shift). */
+static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
+{
+ tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
+}
+
+/* Register to register move using ADDI (move to/from SP). */
+static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
+{
+ tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
+}
+
+/* This function is used for the Logical (immediate) instruction group.
+ The value of LIMM must satisfy IS_LIMM. See the comment above about
+ only supporting simplified logical immediates. */
+static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
+ TCGReg rd, TCGReg rn, uint64_t limm)
+{
+ unsigned h, l, r, c;
+
+ tcg_debug_assert(is_limm(limm));
+
+ h = clz64(limm);
+ l = ctz64(limm);
+ if (l == 0) {
+ r = 0; /* form 0....01....1 */
+ c = ctz64(~limm) - 1;
+ if (h == 0) {
+ r = clz64(~limm); /* form 1..10..01..1 */
+ c += r;
+ }
+ } else {
+ r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
+ c = r - h - 1;
+ }
+ if (ext == TCG_TYPE_I32) {
+ r &= 31;
+ c &= 31;
+ }
+
+ tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, int64_t v64)
+{
+ bool q = type == TCG_TYPE_V128;
+ int cmode, imm8, i;
+
+ /* Test all bytes equal first. */
+ if (vece == MO_8) {
+ imm8 = (uint8_t)v64;
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
+ return;
+ }
+
+ /*
+ * Test all bytes 0x00 or 0xff second. This can match cases that
+ * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+ */
+ for (i = imm8 = 0; i < 8; i++) {
+ uint8_t byte = v64 >> (i * 8);
+ if (byte == 0xff) {
+ imm8 |= 1 << i;
+ } else if (byte != 0) {
+ goto fail_bytes;
+ }
+ }
+ tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
+ return;
+ fail_bytes:
+
+ /*
+ * Tests for various replications. For each element width, if we
+ * cannot find an expansion there's no point checking a larger
+ * width because we already know by replication it cannot match.
+ */
+ if (vece == MO_16) {
+ uint16_t v16 = v64;
+
+ if (is_shimm16(v16, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm16(~v16, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Otherwise, all remaining constants can be loaded in two insns:
+ * rd = v16 & 0xff, rd |= v16 & 0xff00.
+ */
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
+ tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
+ return;
+ } else if (vece == MO_32) {
+ uint32_t v32 = v64;
+ uint32_t n32 = ~v32;
+
+ if (is_shimm32(v32, &cmode, &imm8) ||
+ is_soimm32(v32, &cmode, &imm8) ||
+ is_fimm32(v32, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm32(n32, &cmode, &imm8) ||
+ is_soimm32(n32, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Restrict the set of constants to those we can load with
+ * two instructions. Others we load from the pool.
+ */
+ i = is_shimm32_pair(v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
+ tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
+ return;
+ }
+ i = is_shimm32_pair(n32, &cmode, &imm8);
+ if (i) {
+ tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
+ tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
+ return;
+ }
+ } else if (is_fimm64(v64, &cmode, &imm8)) {
+ tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * As a last resort, load from the constant pool. Sadly there
+ * is no LD1R (literal), so store the full 16-byte vector.
+ */
+ if (type == TCG_TYPE_V128) {
+ new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
+ tcg_out_insn(s, 3305, LDR_v128, 0, rd);
+ } else {
+ new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
+ tcg_out_insn(s, 3305, LDR_v64, 0, rd);
+ }
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg rs)
+{
+ int is_q = type - TCG_TYPE_V64;
+ tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg r, TCGReg base, intptr_t offset)
+{
+ TCGReg temp = TCG_REG_TMP;
+
+ if (offset < -0xffffff || offset > 0xffffff) {
+ tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
+ tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
+ base = temp;
+ } else {
+ AArch64Insn add_insn = I3401_ADDI;
+
+ if (offset < 0) {
+ add_insn = I3401_SUBI;
+ offset = -offset;
+ }
+ if (offset & 0xfff000) {
+ tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
+ base = temp;
+ }
+ if (offset & 0xfff) {
+ tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
+ base = temp;
+ }
+ }
+ tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
+ return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+ tcg_target_long value)
+{
+ tcg_target_long svalue = value;
+ tcg_target_long ivalue = ~value;
+ tcg_target_long t0, t1, t2;
+ int s0, s1;
+ AArch64Insn opc;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ tcg_debug_assert(rd < 32);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* For 32-bit values, discard potential garbage in value. For 64-bit
+ values within [2**31, 2**32-1], we can create smaller sequences by
+ interpreting this as a negative 32-bit number, while ensuring that
+ the high 32 bits are cleared by setting SF=0. */
+ if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
+ svalue = (int32_t)value;
+ value = (uint32_t)value;
+ ivalue = (uint32_t)ivalue;
+ type = TCG_TYPE_I32;
+ }
+
+ /* Speed things up by handling the common case of small positive
+ and negative values specially. */
+ if ((value & ~0xffffull) == 0) {
+ tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
+ return;
+ } else if ((ivalue & ~0xffffull) == 0) {
+ tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
+ return;
+ }
+
+ /* Check for bitfield immediates. For the benefit of 32-bit quantities,
+ use the sign-extended value. That lets us match rotated values such
+ as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
+ if (is_limm(svalue)) {
+ tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
+ return;
+ }
+
+ /* Look for host pointer values within 4G of the PC. This happens
+ often when loading pointers to QEMU's own data structures. */
+ if (type == TCG_TYPE_I64) {
+ intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
+ tcg_target_long disp = value - src_rx;
+ if (disp == sextract64(disp, 0, 21)) {
+ tcg_out_insn(s, 3406, ADR, rd, disp);
+ return;
+ }
+ disp = (value >> 12) - (src_rx >> 12);
+ if (disp == sextract64(disp, 0, 21)) {
+ tcg_out_insn(s, 3406, ADRP, rd, disp);
+ if (value & 0xfff) {
+ tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
+ }
+ return;
+ }
+ }
+
+ /* Would it take fewer insns to begin with MOVN? */
+ if (ctpop64(value) >= 32) {
+ t0 = ivalue;
+ opc = I3405_MOVN;
+ } else {
+ t0 = value;
+ opc = I3405_MOVZ;
+ }
+ s0 = ctz64(t0) & (63 & -16);
+ t1 = t0 & ~(0xffffUL << s0);
+ s1 = ctz64(t1) & (63 & -16);
+ t2 = t1 & ~(0xffffUL << s1);
+ if (t2 == 0) {
+ tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
+ if (t1 != 0) {
+ tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
+ }
+ return;
+ }
+
+ /* For more than 2 insns, dump it into the constant pool. */
+ new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
+ tcg_out_insn(s, 3305, LDR, 0, rd);
+}
+
+/* Define something more legible for general use. */
+#define tcg_out_ldst_r tcg_out_insn_3310
+
+static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
+ TCGReg rn, intptr_t offset, int lgsize)
+{
+ /* If the offset is naturally aligned and in range, then we can
+ use the scaled uimm12 encoding */
+ if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
+ uintptr_t scaled_uimm = offset >> lgsize;
+ if (scaled_uimm <= 0xfff) {
+ tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
+ return;
+ }
+ }
+
+ /* Small signed offsets can use the unscaled encoding. */
+ if (offset >= -256 && offset < 256) {
+ tcg_out_insn_3312(s, insn, rd, rn, offset);
+ return;
+ }
+
+ /* Worst-case scenario, move offset to temp register, use reg offset. */
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret == arg) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ if (ret < 32 && arg < 32) {
+ tcg_out_movr(s, type, ret, arg);
+ break;
+ } else if (ret < 32) {
+ tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
+ break;
+ } else if (arg < 32) {
+ tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
+ break;
+ }
+ /* FALLTHRU */
+
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= 32 && arg >= 32);
+ tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= 32 && arg >= 32);
+ tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg base, intptr_t ofs)
+{
+ AArch64Insn insn;
+ int lgsz;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
+ lgsz = 2;
+ break;
+ case TCG_TYPE_I64:
+ insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V64:
+ insn = I3312_LDRVD;
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V128:
+ insn = I3312_LDRVQ;
+ lgsz = 4;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
+ TCGReg base, intptr_t ofs)
+{
+ AArch64Insn insn;
+ int lgsz;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ insn = (src < 32 ? I3312_STRW : I3312_STRVS);
+ lgsz = 2;
+ break;
+ case TCG_TYPE_I64:
+ insn = (src < 32 ? I3312_STRX : I3312_STRVD);
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V64:
+ insn = I3312_STRVD;
+ lgsz = 3;
+ break;
+ case TCG_TYPE_V128:
+ insn = I3312_STRVQ;
+ lgsz = 4;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_out_ldst(s, insn, src, base, ofs, lgsz);
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ if (type <= TCG_TYPE_I64 && val == 0) {
+ tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
+ return true;
+ }
+ return false;
+}
+
+static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned int a, unsigned int b)
+{
+ tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
+}
+
+static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, TCGReg rm, unsigned int a)
+{
+ tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
+}
+
+static inline void tcg_out_shl(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int bits = ext ? 64 : 32;
+ int max = bits - 1;
+ tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
+}
+
+static inline void tcg_out_shr(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_ubfm(s, ext, rd, rn, m & max, max);
+}
+
+static inline void tcg_out_sar(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_sbfm(s, ext, rd, rn, m & max, max);
+}
+
+static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_extr(s, ext, rd, rn, rn, m & max);
+}
+
+static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
+ TCGReg rd, TCGReg rn, unsigned int m)
+{
+ int max = ext ? 63 : 31;
+ tcg_out_extr(s, ext, rd, rn, rn, -m & max);
+}
+
+static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
+ TCGReg rn, unsigned lsb, unsigned width)
+{
+ unsigned size = ext ? 64 : 32;
+ unsigned a = (size - lsb) & (size - 1);
+ unsigned b = width - 1;
+ tcg_out_bfm(s, ext, rd, rn, a, b);
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
+ tcg_target_long b, bool const_b)
+{
+ if (const_b) {
+ /* Using CMP or CMN aliases. */
+ if (b >= 0) {
+ tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
+ } else {
+ tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
+ }
+ } else {
+ /* Using CMP alias SUBS wzr, Wn, Wm */
+ tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
+ }
+}
+
+static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
+ tcg_debug_assert(offset == sextract64(offset, 0, 26));
+ tcg_out_insn(s, 3206, B, offset);
+}
+
+static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
+{
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
+ if (offset == sextract64(offset, 0, 26)) {
+ tcg_out_insn(s, 3206, B, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
+ }
+}
+
+static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
+{
+ tcg_out_insn(s, 3207, BLR, reg);
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
+{
+ ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
+ if (offset == sextract64(offset, 0, 26)) {
+ tcg_out_insn(s, 3206, BL, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+ tcg_out_callr(s, TCG_REG_TMP);
+ }
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ tcg_insn_unit i1, i2;
+ TCGType rt = TCG_TYPE_I64;
+ TCGReg rd = TCG_REG_TMP;
+ uint64_t pair;
+
+ ptrdiff_t offset = addr - jmp_rx;
+
+ if (offset == sextract64(offset, 0, 26)) {
+ i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
+ i2 = NOP;
+ } else {
+ offset = (addr >> 12) - (jmp_rx >> 12);
+
+ /* patch ADRP */
+ i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
+ /* patch ADDI */
+ i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
+ }
+ pair = (uint64_t)i2 << 32 | i1;
+ qatomic_set((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
+
+static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
+{
+ if (!l->has_value) {
+ tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
+ tcg_out_insn(s, 3206, B, 0);
+ } else {
+ tcg_out_goto(s, l->u.value_ptr);
+ }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
+ TCGArg b, bool b_const, TCGLabel *l)
+{
+ intptr_t offset;
+ bool need_cmp;
+
+ if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
+ need_cmp = false;
+ } else {
+ need_cmp = true;
+ tcg_out_cmp(s, ext, a, b, b_const);
+ }
+
+ if (!l->has_value) {
+ tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
+ offset = tcg_in32(s) >> 5;
+ } else {
+ offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
+ tcg_debug_assert(offset == sextract64(offset, 0, 19));
+ }
+
+ if (need_cmp) {
+ tcg_out_insn(s, 3202, B_C, c, offset);
+ } else if (c == TCG_COND_EQ) {
+ tcg_out_insn(s, 3201, CBZ, ext, a, offset);
+ } else {
+ tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
+ }
+}
+
+static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
+ TCGReg rd, TCGReg rn)
+{
+ /* REV, REV16, REV32 */
+ tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
+}
+
+static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
+ TCGReg rd, TCGReg rn)
+{
+ /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
+ int bits = (8 << s_bits) - 1;
+ tcg_out_sbfm(s, ext, rd, rn, 0, bits);
+}
+
+static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
+ TCGReg rd, TCGReg rn)
+{
+ /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
+ int bits = (8 << s_bits) - 1;
+ tcg_out_ubfm(s, 0, rd, rn, 0, bits);
+}
+
+static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
+ TCGReg rn, int64_t aimm)
+{
+ if (aimm >= 0) {
+ tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
+ } else {
+ tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
+ }
+}
+
+static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
+ TCGReg rh, TCGReg al, TCGReg ah,
+ tcg_target_long bl, tcg_target_long bh,
+ bool const_bl, bool const_bh, bool sub)
+{
+ TCGReg orig_rl = rl;
+ AArch64Insn insn;
+
+ if (rl == ah || (!const_bh && rl == bh)) {
+ rl = TCG_REG_TMP;
+ }
+
+ if (const_bl) {
+ if (bl < 0) {
+ bl = -bl;
+ insn = sub ? I3401_ADDSI : I3401_SUBSI;
+ } else {
+ insn = sub ? I3401_SUBSI : I3401_ADDSI;
+ }
+
+ if (unlikely(al == TCG_REG_XZR)) {
+ /* ??? We want to allow al to be zero for the benefit of
+ negation via subtraction. However, that leaves open the
+ possibility of adding 0+const in the low part, and the
+ immediate add instructions encode XSP not XZR. Don't try
+ anything more elaborate here than loading another zero. */
+ al = TCG_REG_TMP;
+ tcg_out_movi(s, ext, al, 0);
+ }
+ tcg_out_insn_3401(s, insn, ext, rl, al, bl);
+ } else {
+ tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
+ }
+
+ insn = I3503_ADC;
+ if (const_bh) {
+ /* Note that the only two constants we support are 0 and -1, and
+ that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
+ if ((bh != 0) ^ sub) {
+ insn = I3503_SBC;
+ }
+ bh = TCG_REG_XZR;
+ } else if (sub) {
+ insn = I3503_SBC;
+ }
+ tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
+
+ tcg_out_mov(s, ext, orig_rl, rl);
+}
+
+static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ static const uint32_t sync[] = {
+ [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
+ [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
+ [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
+ [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
+ [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
+ };
+ tcg_out32(s, sync[a0 & TCG_MO_ALL]);
+}
+
+static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
+ TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
+{
+ TCGReg a1 = a0;
+ if (is_ctz) {
+ a1 = TCG_REG_TMP;
+ tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
+ }
+ if (const_b && b == (ext ? 64 : 32)) {
+ tcg_out_insn(s, 3507, CLZ, ext, d, a1);
+ } else {
+ AArch64Insn sel = I3506_CSEL;
+
+ tcg_out_cmp(s, ext, a0, 0, 1);
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
+
+ if (const_b) {
+ if (b == -1) {
+ b = TCG_REG_XZR;
+ sel = I3506_CSINV;
+ } else if (b == 0) {
+ b = TCG_REG_XZR;
+ } else {
+ tcg_out_movi(s, ext, d, b);
+ b = d;
+ }
+ }
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
+ }
+}
+
+#ifdef CONFIG_SOFTMMU
+#include "../tcg-ldst.c.inc"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * MemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[MO_SIZE + 1] = {
+ [MO_8] = helper_ret_ldub_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_16] = helper_be_lduw_mmu,
+ [MO_32] = helper_be_ldul_mmu,
+ [MO_64] = helper_be_ldq_mmu,
+#else
+ [MO_16] = helper_le_lduw_mmu,
+ [MO_32] = helper_le_ldul_mmu,
+ [MO_64] = helper_le_ldq_mmu,
+#endif
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, MemOpIdx oi,
+ * uintptr_t ra)
+ */
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+ [MO_8] = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_16] = helper_be_stw_mmu,
+ [MO_32] = helper_be_stl_mmu,
+ [MO_64] = helper_be_stq_mmu,
+#else
+ [MO_16] = helper_le_stw_mmu,
+ [MO_32] = helper_le_stl_mmu,
+ [MO_64] = helper_le_stq_mmu,
+#endif
+};
+
+static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
+{
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
+ tcg_debug_assert(offset == sextract64(offset, 0, 21));
+ tcg_out_insn(s, 3406, ADR, rd, offset);
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+ MemOp size = opc & MO_SIZE;
+
+ if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
+ tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
+ tcg_out_adr(s, TCG_REG_X3, lb->raddr);
+ tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
+ if (opc & MO_SIGN) {
+ tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
+ } else {
+ tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
+ }
+
+ tcg_out_goto(s, lb->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+ MemOp size = opc & MO_SIZE;
+
+ if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
+ tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
+ tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
+ tcg_out_adr(s, TCG_REG_X4, lb->raddr);
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
+ tcg_out_goto(s, lb->raddr);
+ return true;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+ TCGType ext, TCGReg data_reg, TCGReg addr_reg,
+ tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->type = ext;
+ label->datalo_reg = data_reg;
+ label->addrlo_reg = addr_reg;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr;
+}
+
+/* We expect to use a 7-bit scaled negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
+
+/* These offsets are built into the LDP below. */
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
+
+/* Load and compare a TLB entry, emitting the conditional jump to the
+ slow path for the failure case, which will be patched later when finalizing
+ the slow path. Generated code returns the host addend in X1,
+ clobbers X0,X2,X3,TMP. */
+static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
+ tcg_insn_unit **label_ptr, int mem_index,
+ bool is_read)
+{
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_mask = (1u << a_bits) - 1;
+ unsigned s_mask = (1u << s_bits) - 1;
+ TCGReg x3;
+ TCGType mask_type;
+ uint64_t compare_mask;
+
+ mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
+
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index), 1, 0);
+
+ /* Extract the TLB index from the address into X0. */
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
+ TCG_REG_X0, TCG_REG_X0, addr_reg,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
+
+ /* Load the tlb comparator into X0, and the fast path addend into X1. */
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
+ ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
+ offsetof(CPUTLBEntry, addend));
+
+ /* For aligned accesses, we check the first byte and include the alignment
+ bits within the address. For unaligned access, we check that we don't
+ cross pages using the address of the last byte of the access. */
+ if (a_bits >= s_bits) {
+ x3 = addr_reg;
+ } else {
+ tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
+ TCG_REG_X3, addr_reg, s_mask - a_mask);
+ x3 = TCG_REG_X3;
+ }
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
+
+ /* Store the page mask part of the address into X3. */
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
+ TCG_REG_X3, x3, compare_mask);
+
+ /* Perform the address comparison. */
+ tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
+
+ /* If not equal, we jump to the slow path. */
+ *label_ptr = s->code_ptr;
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
+}
+
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
+ TCGReg data_r, TCGReg addr_r,
+ TCGType otype, TCGReg off_r)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((memop & MO_BSWAP) == 0);
+
+ switch (memop & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
+ break;
+ case MO_SB:
+ tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
+ data_r, addr_r, otype, off_r);
+ break;
+ case MO_UW:
+ tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
+ break;
+ case MO_SW:
+ tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
+ data_r, addr_r, otype, off_r);
+ break;
+ case MO_UL:
+ tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
+ break;
+ case MO_SL:
+ tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
+ break;
+ case MO_Q:
+ tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
+ TCGReg data_r, TCGReg addr_r,
+ TCGType otype, TCGReg off_r)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((memop & MO_BSWAP) == 0);
+
+ switch (memop & MO_SIZE) {
+ case MO_8:
+ tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
+ break;
+ case MO_16:
+ tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
+ break;
+ case MO_32:
+ tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
+ break;
+ case MO_64:
+ tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+ MemOpIdx oi, TCGType ext)
+{
+ MemOp memop = get_memop(oi);
+ const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+
+ tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ TCG_REG_X1, otype, addr_reg);
+ add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (USE_GUEST_BASE) {
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ TCG_REG_GUEST_BASE, otype, addr_reg);
+ } else {
+ tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+ addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
+ }
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+ MemOpIdx oi)
+{
+ MemOp memop = get_memop(oi);
+ const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+
+ tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ TCG_REG_X1, otype, addr_reg);
+ add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
+ data_reg, addr_reg, s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (USE_GUEST_BASE) {
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ TCG_REG_GUEST_BASE, otype, addr_reg);
+ } else {
+ tcg_out_qemu_st_direct(s, memop, data_reg,
+ addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
+ }
+#endif /* CONFIG_SOFTMMU */
+}
+
+static const tcg_insn_unit *tb_ret_addr;
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ /* 99% of the time, we can signal the use of extension registers
+ by looking to see if the opcode handles 64-bit data. */
+ TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
+
+ /* Hoist the loads of the most common arguments. */
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
+ /* Some operands are defined with "rZ" constraint, a register or
+ the zero register. These need not actually test args[I] == 0. */
+#define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ /* Reuse the zeroing that exists for goto_ptr. */
+ if (a0 == 0) {
+ tcg_out_goto_long(s, tcg_code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
+ tcg_out_goto_long(s, tb_ret_addr);
+ }
+ break;
+
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_insn_offset != NULL) {
+ /* TCG_TARGET_HAS_direct_jump */
+ /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+ write can be used to patch the target address. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /* actual branch destination will be patched by
+ tb_target_set_jmp_target later. */
+ tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+ } else {
+ /* !TCG_TARGET_HAS_direct_jump */
+ tcg_debug_assert(s->tb_jmp_target_addr != NULL);
+ intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
+ tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
+ }
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
+ set_jmp_reset_offset(s, a0);
+ break;
+
+ case INDEX_op_goto_ptr:
+ tcg_out_insn(s, 3207, BR, a0);
+ break;
+
+ case INDEX_op_br:
+ tcg_out_goto_label(s, arg_label(a0));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
+ break;
+ case INDEX_op_ld8s_i32:
+ tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
+ break;
+ case INDEX_op_ld8s_i64:
+ tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
+ break;
+ case INDEX_op_ld16s_i32:
+ tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
+ break;
+ case INDEX_op_ld16s_i64:
+ tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
+ break;
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
+ break;
+
+ case INDEX_op_add_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_add_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sub_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_sub_i64:
+ if (c2) {
+ tcg_out_addsubi(s, ext, a0, a1, -a2);
+ } else {
+ tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_neg_i64:
+ case INDEX_op_neg_i32:
+ tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
+ break;
+
+ case INDEX_op_and_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_and_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_andc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_andc_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_or_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_or_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_orc_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_orc_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_xor_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_xor_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_eqv_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_eqv_i64:
+ if (c2) {
+ tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
+ } else {
+ tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_not_i64:
+ case INDEX_op_not_i32:
+ tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
+ break;
+
+ case INDEX_op_mul_i64:
+ case INDEX_op_mul_i32:
+ tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
+ break;
+
+ case INDEX_op_div_i64:
+ case INDEX_op_div_i32:
+ tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
+ break;
+ case INDEX_op_divu_i64:
+ case INDEX_op_divu_i32:
+ tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
+ break;
+
+ case INDEX_op_rem_i64:
+ case INDEX_op_rem_i32:
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
+ break;
+ case INDEX_op_remu_i64:
+ case INDEX_op_remu_i32:
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
+ break;
+
+ case INDEX_op_shl_i64:
+ case INDEX_op_shl_i32:
+ if (c2) {
+ tcg_out_shl(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_shr_i64:
+ case INDEX_op_shr_i32:
+ if (c2) {
+ tcg_out_shr(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sar_i64:
+ case INDEX_op_sar_i32:
+ if (c2) {
+ tcg_out_sar(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_rotr_i64:
+ case INDEX_op_rotr_i32:
+ if (c2) {
+ tcg_out_rotr(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotl_i32:
+ if (c2) {
+ tcg_out_rotl(s, ext, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
+ }
+ break;
+
+ case INDEX_op_clz_i64:
+ case INDEX_op_clz_i32:
+ tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
+ break;
+ case INDEX_op_ctz_i64:
+ case INDEX_op_ctz_i32:
+ tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
+ break;
+
+ case INDEX_op_brcond_i32:
+ a1 = (int32_t)a1;
+ /* FALLTHRU */
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+
+ case INDEX_op_setcond_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_setcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
+ /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
+ tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
+ TCG_REG_XZR, tcg_invert_cond(args[3]));
+ break;
+
+ case INDEX_op_movcond_i32:
+ a2 = (int32_t)a2;
+ /* FALLTHRU */
+ case INDEX_op_movcond_i64:
+ tcg_out_cmp(s, ext, a1, a2, c2);
+ tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, a0, a1, a2, ext);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, REG0(0), a1, a2);
+ break;
+
+ case INDEX_op_bswap64_i64:
+ tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
+ break;
+ case INDEX_op_bswap32_i64:
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
+ if (a2 & TCG_BSWAP_OS) {
+ tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
+ }
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
+ break;
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap16_i32:
+ tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
+ if (a2 & TCG_BSWAP_OS) {
+ /* Output must be sign-extended. */
+ tcg_out_sxt(s, ext, MO_16, a0, a0);
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ /* Output must be zero-extended, but input isn't. */
+ tcg_out_uxt(s, MO_16, a0, a0);
+ }
+ break;
+
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8s_i32:
+ tcg_out_sxt(s, ext, MO_8, a0, a1);
+ break;
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16s_i32:
+ tcg_out_sxt(s, ext, MO_16, a0, a1);
+ break;
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
+ break;
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext8u_i32:
+ tcg_out_uxt(s, MO_8, a0, a1);
+ break;
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext16u_i32:
+ tcg_out_uxt(s, MO_16, a0, a1);
+ break;
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_ext32u_i64:
+ tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
+ break;
+
+ case INDEX_op_deposit_i64:
+ case INDEX_op_deposit_i32:
+ tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
+ break;
+
+ case INDEX_op_extract_i64:
+ case INDEX_op_extract_i32:
+ tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+ break;
+
+ case INDEX_op_sextract_i64:
+ case INDEX_op_sextract_i32:
+ tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
+ break;
+
+ case INDEX_op_extract2_i64:
+ case INDEX_op_extract2_i32:
+ tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+ (int32_t)args[4], args[5], const_args[4],
+ const_args[5], false);
+ break;
+ case INDEX_op_add2_i64:
+ tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+ args[5], const_args[4], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
+ (int32_t)args[4], args[5], const_args[4],
+ const_args[5], true);
+ break;
+ case INDEX_op_sub2_i64:
+ tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
+ args[5], const_args[4], const_args[5], true);
+ break;
+
+ case INDEX_op_muluh_i64:
+ tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
+ break;
+ case INDEX_op_mulsh_i64:
+ tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, a0);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ g_assert_not_reached();
+ }
+
+#undef REG0
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ static const AArch64Insn cmp_vec_insn[16] = {
+ [TCG_COND_EQ] = I3616_CMEQ,
+ [TCG_COND_GT] = I3616_CMGT,
+ [TCG_COND_GE] = I3616_CMGE,
+ [TCG_COND_GTU] = I3616_CMHI,
+ [TCG_COND_GEU] = I3616_CMHS,
+ };
+ static const AArch64Insn cmp_scalar_insn[16] = {
+ [TCG_COND_EQ] = I3611_CMEQ,
+ [TCG_COND_GT] = I3611_CMGT,
+ [TCG_COND_GE] = I3611_CMGE,
+ [TCG_COND_GTU] = I3611_CMHI,
+ [TCG_COND_GEU] = I3611_CMHS,
+ };
+ static const AArch64Insn cmp0_vec_insn[16] = {
+ [TCG_COND_EQ] = I3617_CMEQ0,
+ [TCG_COND_GT] = I3617_CMGT0,
+ [TCG_COND_GE] = I3617_CMGE0,
+ [TCG_COND_LT] = I3617_CMLT0,
+ [TCG_COND_LE] = I3617_CMLE0,
+ };
+ static const AArch64Insn cmp0_scalar_insn[16] = {
+ [TCG_COND_EQ] = I3612_CMEQ0,
+ [TCG_COND_GT] = I3612_CMGT0,
+ [TCG_COND_GE] = I3612_CMGE0,
+ [TCG_COND_LT] = I3612_CMLT0,
+ [TCG_COND_LE] = I3612_CMLE0,
+ };
+
+ TCGType type = vecl + TCG_TYPE_V64;
+ unsigned is_q = vecl;
+ bool is_scalar = !is_q && vece == MO_64;
+ TCGArg a0, a1, a2, a3;
+ int cmode, imm8;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+ case INDEX_op_add_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_sub_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_mul_vec:
+ tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_neg_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3612, NEG, vece, a0, a1);
+ } else {
+ tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
+ }
+ break;
+ case INDEX_op_abs_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3612, ABS, vece, a0, a1);
+ } else {
+ tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
+ }
+ break;
+ case INDEX_op_and_vec:
+ if (const_args[2]) {
+ is_shimm1632(~a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
+ return;
+ }
+ tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
+ a2 = a0;
+ }
+ tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_or_vec:
+ if (const_args[2]) {
+ is_shimm1632(a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
+ return;
+ }
+ tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
+ a2 = a0;
+ }
+ tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_andc_vec:
+ if (const_args[2]) {
+ is_shimm1632(a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
+ return;
+ }
+ tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
+ a2 = a0;
+ }
+ tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_orc_vec:
+ if (const_args[2]) {
+ is_shimm1632(~a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
+ return;
+ }
+ tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
+ a2 = a0;
+ }
+ tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_xor_vec:
+ tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
+ break;
+ case INDEX_op_ssadd_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_sssub_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_usadd_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_ussub_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_smax_vec:
+ tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_smin_vec:
+ tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_umax_vec:
+ tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_umin_vec:
+ tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
+ break;
+ case INDEX_op_not_vec:
+ tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
+ break;
+ case INDEX_op_shli_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
+ } else {
+ tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
+ }
+ break;
+ case INDEX_op_shri_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
+ } else {
+ tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
+ }
+ break;
+ case INDEX_op_sari_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
+ } else {
+ tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
+ }
+ break;
+ case INDEX_op_aa64_sli_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
+ } else {
+ tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
+ }
+ break;
+ case INDEX_op_shlv_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_aa64_sshl_vec:
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_cmp_vec:
+ {
+ TCGCond cond = args[3];
+ AArch64Insn insn;
+
+ if (cond == TCG_COND_NE) {
+ if (const_args[2]) {
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
+ } else {
+ tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
+ }
+ } else {
+ if (is_scalar) {
+ tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
+ } else {
+ tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
+ }
+ tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
+ }
+ } else {
+ if (const_args[2]) {
+ if (is_scalar) {
+ insn = cmp0_scalar_insn[cond];
+ if (insn) {
+ tcg_out_insn_3612(s, insn, vece, a0, a1);
+ break;
+ }
+ } else {
+ insn = cmp0_vec_insn[cond];
+ if (insn) {
+ tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
+ break;
+ }
+ }
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
+ a2 = TCG_VEC_TMP;
+ }
+ if (is_scalar) {
+ insn = cmp_scalar_insn[cond];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = cmp_scalar_insn[cond];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
+ } else {
+ insn = cmp_vec_insn[cond];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = cmp_vec_insn[cond];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
+ }
+ }
+ }
+ break;
+
+ case INDEX_op_bitsel_vec:
+ a3 = args[3];
+ if (a0 == a3) {
+ tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
+ } else if (a0 == a2) {
+ tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
+ } else {
+ if (a0 != a1) {
+ tcg_out_mov(s, type, a0, a1);
+ }
+ tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
+ }
+ break;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_abs_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_bitsel_vec:
+ return 1;
+ case INDEX_op_rotli_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return -1;
+ case INDEX_op_mul_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ return vece < MO_64;
+
+ default:
+ return 0;
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGv_vec v0, v1, v2, t1, t2, c1;
+ TCGArg a2;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+ va_end(va);
+
+ switch (opc) {
+ case INDEX_op_rotli_vec:
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
+ vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ /* Right shifts are negative left shifts for AArch64. */
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t1, v2);
+ opc = (opc == INDEX_op_shrv_vec
+ ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
+ vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_sub_vec(vece, t1, v2, c1);
+ /* Right shifts are negative left shifts for AArch64. */
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ tcg_gen_or_vec(vece, v0, v0, t1);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotrv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_neg_vec(vece, t1, v2);
+ tcg_gen_sub_vec(vece, t2, c1, v2);
+ /* Right shifts are negative left shifts for AArch64. */
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+ tcg_gen_or_vec(vece, v0, t1, t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ case INDEX_op_sextract_i32:
+ case INDEX_op_sextract_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(rZ, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(r, r, rA);
+
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_div_i32:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i32:
+ case INDEX_op_divu_i64:
+ case INDEX_op_rem_i32:
+ case INDEX_op_rem_i64:
+ case INDEX_op_remu_i32:
+ case INDEX_op_remu_i64:
+ case INDEX_op_muluh_i64:
+ case INDEX_op_mulsh_i64:
+ return C_O1_I2(r, r, r);
+
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ return C_O1_I2(r, r, rL);
+
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_clz_i32:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i64:
+ return C_O1_I2(r, r, rAL);
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(r, rA);
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ return C_O1_I4(r, r, rA, rZ, rZ);
+
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ return C_O1_I1(r, l);
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ return C_O0_I2(lZ, l);
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ return C_O1_I2(r, 0, rZ);
+
+ case INDEX_op_extract2_i32:
+ case INDEX_op_extract2_i64:
+ return C_O1_I2(r, rZ, rZ);
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
+
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_aa64_sshl_vec:
+ return C_O1_I2(w, w, w);
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_abs_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return C_O1_I1(w, w);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(w, r);
+ case INDEX_op_st_vec:
+ return C_O0_I2(w, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(w, wr);
+ case INDEX_op_or_vec:
+ case INDEX_op_andc_vec:
+ return C_O1_I2(w, w, wO);
+ case INDEX_op_and_vec:
+ case INDEX_op_orc_vec:
+ return C_O1_I2(w, w, wN);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, wZ);
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(w, w, w, w);
+ case INDEX_op_aa64_sli_vec:
+ return C_O1_I2(w, 0, w);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
+
+ tcg_target_call_clobber_regs = -1ull;
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
+}
+
+/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
+#define PUSH_SIZE ((30 - 19 + 1) * 8)
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & ~(TCG_TARGET_STACK_ALIGN - 1))
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+/* We're expecting to use a single ADDI insn. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ TCGReg r;
+
+ /* Push (FP, LR) and allocate space for all saved registers. */
+ tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, -PUSH_SIZE, 1, 1);
+
+ /* Set up frame pointer for canonical unwinding. */
+ tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
+
+ /* Store callee-preserved regs x19..x28. */
+ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
+ }
+
+ /* Make stack space for TCG locals. */
+ tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
+ FRAME_SIZE - PUSH_SIZE);
+
+ /* Inform TCG about how to find TCG locals with register, offset, size. */
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+#if !defined(CONFIG_SOFTMMU)
+ if (USE_GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
+ }
+#endif
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
+
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
+
+ /* TB epilogue */
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+
+ /* Remove TCG locals stack space. */
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
+ FRAME_SIZE - PUSH_SIZE);
+
+ /* Restore registers x19..x28. */
+ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
+ int ofs = (r - TCG_REG_X19 + 2) * 8;
+ tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
+ }
+
+ /* Pop (FP, LR), restore SP to previous frame. */
+ tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
+ TCG_REG_SP, PUSH_SIZE, 0, 1);
+ tcg_out_insn(s, 3207, RET, TCG_REG_LR);
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = NOP;
+ }
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[24];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_AARCH64
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x78, /* sleb128 -8 */
+ .h.cie.return_column = TCG_REG_LR,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
+ 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
+ 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
+ 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
+ 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
+ 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
+ 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
+ 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
+ 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
+ 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
+ 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
+ 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
new file mode 100644
index 000000000..7a93ac802
--- /dev/null
+++ b/tcg/aarch64/tcg-target.h
@@ -0,0 +1,159 @@
+/*
+ * Initial TCG Implementation for aarch64
+ *
+ * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
+ * Written by Claudio Fontana
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ */
+
+#ifndef AARCH64_TCG_TARGET_H
+#define AARCH64_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
+#undef TCG_TARGET_STACK_GROWSUP
+
+typedef enum {
+ TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
+ TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
+ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
+ TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
+ TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19,
+ TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
+ TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
+ TCG_REG_X28, TCG_REG_X29, TCG_REG_X30,
+
+ /* X31 is either the stack pointer or zero, depending on context. */
+ TCG_REG_SP = 31,
+ TCG_REG_XZR = 31,
+
+ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
+ /* Aliases. */
+ TCG_REG_FP = TCG_REG_X29,
+ TCG_REG_LR = TCG_REG_X30,
+ TCG_AREG0 = TCG_REG_X19,
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 64
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
+#define TCG_TARGET_HAS_extract2_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 0
+#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
+#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 1
+#define TCG_TARGET_HAS_extract2_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#define TCG_TARGET_HAS_direct_jump 1
+
+#define TCG_TARGET_HAS_v64 1
+#define TCG_TARGET_HAS_v128 1
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 1
+#define TCG_TARGET_HAS_cmpsel_vec 0
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
+
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
new file mode 100644
index 000000000..bce30accd
--- /dev/null
+++ b/tcg/aarch64/tcg-target.opc.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(aa64_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(aa64_sli_vec, 1, 2, 1, IMPLVEC)
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
new file mode 100644
index 000000000..3685e1786
--- /dev/null
+++ b/tcg/arm/tcg-target-con-set.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Arm target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(r, r)
+C_O0_I2(r, rIN)
+C_O0_I2(s, s)
+C_O0_I2(w, r)
+C_O0_I3(s, s, s)
+C_O0_I4(r, r, rI, rI)
+C_O0_I4(s, s, s, s)
+C_O1_I1(r, l)
+C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
+C_O1_I1(w, wr)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, l, l)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rIK)
+C_O1_I2(r, r, rIN)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, 0, w)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wO)
+C_O1_I2(w, w, wV)
+C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
+C_O1_I4(r, r, r, rI, rI)
+C_O1_I4(r, r, rIN, rIK, 0)
+C_O2_I1(r, r, l)
+C_O2_I2(r, r, l, l)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, r, r, rIN, rIK)
+C_O2_I4(r, r, rI, rI, rIN, rIK)
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
new file mode 100644
index 000000000..8f501149e
--- /dev/null
+++ b/tcg/arm/tcg-target-con-str.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Arm target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('l', ALL_QLOAD_REGS)
+REGS('s', ALL_QSTORE_REGS)
+REGS('w', ALL_VECTOR_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_ARM)
+CONST('K', TCG_CT_CONST_INV)
+CONST('N', TCG_CT_CONST_NEG)
+CONST('O', TCG_CT_CONST_ORRI)
+CONST('V', TCG_CT_CONST_ANDI)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
new file mode 100644
index 000000000..9d322cdba
--- /dev/null
+++ b/tcg/arm/tcg-target.c.inc
@@ -0,0 +1,3191 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Andrzej Zaborowski
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "elf.h"
+#include "../tcg-pool.c.inc"
+
+int arm_arch = __ARM_ARCH;
+
+#ifndef use_idiv_instructions
+bool use_idiv_instructions;
+#endif
+#ifndef use_neon_instructions
+bool use_neon_instructions;
+#endif
+
+/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
+#ifdef CONFIG_SOFTMMU
+# define USING_SOFTMMU 1
+#else
+# define USING_SOFTMMU 0
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%sp", "%r14", "%pc",
+ "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7",
+ "%q8", "%q9", "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R13,
+ TCG_REG_R0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R12,
+ TCG_REG_R14,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ /* Q4 - Q7 are call-saved, and skipped. */
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
+};
+
+static const int tcg_target_call_iarg_regs[4] = {
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
+};
+static const int tcg_target_call_oarg_regs[2] = {
+ TCG_REG_R0, TCG_REG_R1
+};
+
+#define TCG_REG_TMP TCG_REG_R12
+#define TCG_VEC_TMP TCG_REG_Q15
+
+typedef enum {
+ COND_EQ = 0x0,
+ COND_NE = 0x1,
+ COND_CS = 0x2, /* Unsigned greater or equal */
+ COND_CC = 0x3, /* Unsigned less than */
+ COND_MI = 0x4, /* Negative */
+ COND_PL = 0x5, /* Zero or greater */
+ COND_VS = 0x6, /* Overflow */
+ COND_VC = 0x7, /* No overflow */
+ COND_HI = 0x8, /* Unsigned greater than */
+ COND_LS = 0x9, /* Unsigned less or equal */
+ COND_GE = 0xa,
+ COND_LT = 0xb,
+ COND_GT = 0xc,
+ COND_LE = 0xd,
+ COND_AL = 0xe,
+} ARMCond;
+
+#define TO_CPSR (1 << 20)
+
+#define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
+#define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
+#define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
+#define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
+#define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
+#define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
+#define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
+#define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
+
+typedef enum {
+ ARITH_AND = 0x0 << 21,
+ ARITH_EOR = 0x1 << 21,
+ ARITH_SUB = 0x2 << 21,
+ ARITH_RSB = 0x3 << 21,
+ ARITH_ADD = 0x4 << 21,
+ ARITH_ADC = 0x5 << 21,
+ ARITH_SBC = 0x6 << 21,
+ ARITH_RSC = 0x7 << 21,
+ ARITH_TST = 0x8 << 21 | TO_CPSR,
+ ARITH_CMP = 0xa << 21 | TO_CPSR,
+ ARITH_CMN = 0xb << 21 | TO_CPSR,
+ ARITH_ORR = 0xc << 21,
+ ARITH_MOV = 0xd << 21,
+ ARITH_BIC = 0xe << 21,
+ ARITH_MVN = 0xf << 21,
+
+ INSN_CLZ = 0x016f0f10,
+ INSN_RBIT = 0x06ff0f30,
+
+ INSN_LDMIA = 0x08b00000,
+ INSN_STMDB = 0x09200000,
+
+ INSN_LDR_IMM = 0x04100000,
+ INSN_LDR_REG = 0x06100000,
+ INSN_STR_IMM = 0x04000000,
+ INSN_STR_REG = 0x06000000,
+
+ INSN_LDRH_IMM = 0x005000b0,
+ INSN_LDRH_REG = 0x001000b0,
+ INSN_LDRSH_IMM = 0x005000f0,
+ INSN_LDRSH_REG = 0x001000f0,
+ INSN_STRH_IMM = 0x004000b0,
+ INSN_STRH_REG = 0x000000b0,
+
+ INSN_LDRB_IMM = 0x04500000,
+ INSN_LDRB_REG = 0x06500000,
+ INSN_LDRSB_IMM = 0x005000d0,
+ INSN_LDRSB_REG = 0x001000d0,
+ INSN_STRB_IMM = 0x04400000,
+ INSN_STRB_REG = 0x06400000,
+
+ INSN_LDRD_IMM = 0x004000d0,
+ INSN_LDRD_REG = 0x000000d0,
+ INSN_STRD_IMM = 0x004000f0,
+ INSN_STRD_REG = 0x000000f0,
+
+ INSN_DMB_ISH = 0xf57ff05b,
+ INSN_DMB_MCR = 0xee070fba,
+
+ /* Architected nop introduced in v6k. */
+ /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
+ also Just So Happened to do nothing on pre-v6k so that we
+ don't need to conditionalize it? */
+ INSN_NOP_v6k = 0xe320f000,
+ /* Otherwise the assembler uses mov r0,r0 */
+ INSN_NOP_v4 = (COND_AL << 28) | ARITH_MOV,
+
+ INSN_VADD = 0xf2000800,
+ INSN_VAND = 0xf2000110,
+ INSN_VBIC = 0xf2100110,
+ INSN_VEOR = 0xf3000110,
+ INSN_VORN = 0xf2300110,
+ INSN_VORR = 0xf2200110,
+ INSN_VSUB = 0xf3000800,
+ INSN_VMUL = 0xf2000910,
+ INSN_VQADD = 0xf2000010,
+ INSN_VQADD_U = 0xf3000010,
+ INSN_VQSUB = 0xf2000210,
+ INSN_VQSUB_U = 0xf3000210,
+ INSN_VMAX = 0xf2000600,
+ INSN_VMAX_U = 0xf3000600,
+ INSN_VMIN = 0xf2000610,
+ INSN_VMIN_U = 0xf3000610,
+
+ INSN_VABS = 0xf3b10300,
+ INSN_VMVN = 0xf3b00580,
+ INSN_VNEG = 0xf3b10380,
+
+ INSN_VCEQ0 = 0xf3b10100,
+ INSN_VCGT0 = 0xf3b10000,
+ INSN_VCGE0 = 0xf3b10080,
+ INSN_VCLE0 = 0xf3b10180,
+ INSN_VCLT0 = 0xf3b10200,
+
+ INSN_VCEQ = 0xf3000810,
+ INSN_VCGE = 0xf2000310,
+ INSN_VCGT = 0xf2000300,
+ INSN_VCGE_U = 0xf3000310,
+ INSN_VCGT_U = 0xf3000300,
+
+ INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */
+ INSN_VSARI = 0xf2800010, /* VSHR.S */
+ INSN_VSHRI = 0xf3800010, /* VSHR.U */
+ INSN_VSLI = 0xf3800510,
+ INSN_VSHL_S = 0xf2000400, /* VSHL.S (register) */
+ INSN_VSHL_U = 0xf3000400, /* VSHL.U (register) */
+
+ INSN_VBSL = 0xf3100110,
+ INSN_VBIT = 0xf3200110,
+ INSN_VBIF = 0xf3300110,
+
+ INSN_VTST = 0xf2000810,
+
+ INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */
+ INSN_VDUP_S = 0xf3b00c00, /* VDUP (scalar) */
+ INSN_VLDR_D = 0xed100b00, /* VLDR.64 */
+ INSN_VLD1 = 0xf4200000, /* VLD1 (multiple single elements) */
+ INSN_VLD1R = 0xf4a00c00, /* VLD1 (single element to all lanes) */
+ INSN_VST1 = 0xf4000000, /* VST1 (multiple single elements) */
+ INSN_VMOVI = 0xf2800010, /* VMOV (immediate) */
+} ARMInsn;
+
+#define INSN_NOP (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
+
+static const uint8_t tcg_cond_to_arm_cond[] = {
+ [TCG_COND_EQ] = COND_EQ,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_LT,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_GT,
+ /* unsigned */
+ [TCG_COND_LTU] = COND_CC,
+ [TCG_COND_GEU] = COND_CS,
+ [TCG_COND_LEU] = COND_LS,
+ [TCG_COND_GTU] = COND_HI,
+};
+
+static int encode_imm(uint32_t imm);
+
+/* TCG private relocation type: add with pc+imm8 */
+#define R_ARM_PC8 11
+
+/* TCG private relocation type: vldr with imm8 << 2 */
+#define R_ARM_PC11 12
+
+static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) >> 2;
+
+ if (offset == sextract32(offset, 0, 24)) {
+ *src_rw = deposit32(*src_rw, 0, 24, offset);
+ return true;
+ }
+ return false;
+}
+
+static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
+
+ if (offset >= -0xfff && offset <= 0xfff) {
+ tcg_insn_unit insn = *src_rw;
+ bool u = (offset >= 0);
+ if (!u) {
+ offset = -offset;
+ }
+ insn = deposit32(insn, 23, 1, u);
+ insn = deposit32(insn, 0, 12, offset);
+ *src_rw = insn;
+ return true;
+ }
+ return false;
+}
+
+static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
+
+ if (offset >= -0xff && offset <= 0xff) {
+ tcg_insn_unit insn = *src_rw;
+ bool u = (offset >= 0);
+ if (!u) {
+ offset = -offset;
+ }
+ insn = deposit32(insn, 23, 1, u);
+ insn = deposit32(insn, 0, 8, offset);
+ *src_rw = insn;
+ return true;
+ }
+ return false;
+}
+
+static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
+ int imm12 = encode_imm(offset);
+
+ if (imm12 >= 0) {
+ *src_rw = deposit32(*src_rw, 0, 12, imm12);
+ return true;
+ }
+ return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_debug_assert(addend == 0);
+ switch (type) {
+ case R_ARM_PC24:
+ return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
+ case R_ARM_PC13:
+ return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
+ case R_ARM_PC11:
+ return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
+ case R_ARM_PC8:
+ return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+#define TCG_CT_CONST_ARM 0x100
+#define TCG_CT_CONST_INV 0x200
+#define TCG_CT_CONST_NEG 0x400
+#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ORRI 0x1000
+#define TCG_CT_CONST_ANDI 0x2000
+
+#define ALL_GENERAL_REGS 0xffffu
+#define ALL_VECTOR_REGS 0xffff0000u
+
+/*
+ * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
+ * and r0-r1 doing the byte swapping, so don't use these.
+ * r3 is removed for softmmu to avoid clashes with helper arguments.
+ */
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLOAD_REGS \
+ (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
+ (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
+ (1 << TCG_REG_R14)))
+#define ALL_QSTORE_REGS \
+ (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
+ (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
+ ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
+#else
+#define ALL_QLOAD_REGS ALL_GENERAL_REGS
+#define ALL_QSTORE_REGS \
+ (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
+#endif
+
+/*
+ * ARM immediates for ALU instructions are made of an unsigned 8-bit
+ * right-rotated by an even amount between 0 and 30.
+ *
+ * Return < 0 if @imm cannot be encoded, else the entire imm12 field.
+ */
+static int encode_imm(uint32_t imm)
+{
+ uint32_t rot, imm8;
+
+ /* Simple case, no rotation required. */
+ if ((imm & ~0xff) == 0) {
+ return imm;
+ }
+
+ /* Next, try a simple even shift. */
+ rot = ctz32(imm) & ~1;
+ imm8 = imm >> rot;
+ rot = 32 - rot;
+ if ((imm8 & ~0xff) == 0) {
+ goto found;
+ }
+
+ /*
+ * Finally, try harder with rotations.
+ * The ctz test above will have taken care of rotates >= 8.
+ */
+ for (rot = 2; rot < 8; rot += 2) {
+ imm8 = rol32(imm, rot);
+ if ((imm8 & ~0xff) == 0) {
+ goto found;
+ }
+ }
+ /* Fail: imm cannot be encoded. */
+ return -1;
+
+ found:
+ /* Note that rot is even, and we discard bit 0 by shifting by 7. */
+ return rot << 7 | imm8;
+}
+
+static int encode_imm_nofail(uint32_t imm)
+{
+ int ret = encode_imm(imm);
+ tcg_debug_assert(ret >= 0);
+ return ret;
+}
+
+static bool check_fit_imm(uint32_t imm)
+{
+ return encode_imm(imm) >= 0;
+}
+
+/* Return true if v16 is a valid 16-bit shifted immediate. */
+static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
+{
+ if (v16 == (v16 & 0xff)) {
+ *cmode = 0x8;
+ *imm8 = v16 & 0xff;
+ return true;
+ } else if (v16 == (v16 & 0xff00)) {
+ *cmode = 0xa;
+ *imm8 = v16 >> 8;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifted immediate. */
+static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == (v32 & 0xff)) {
+ *cmode = 0x0;
+ *imm8 = v32 & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff00)) {
+ *cmode = 0x2;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff0000)) {
+ *cmode = 0x4;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ } else if (v32 == (v32 & 0xff000000)) {
+ *cmode = 0x6;
+ *imm8 = v32 >> 24;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if v32 is a valid 32-bit shifting ones immediate. */
+static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
+{
+ if ((v32 & 0xffff00ff) == 0xff) {
+ *cmode = 0xc;
+ *imm8 = (v32 >> 8) & 0xff;
+ return true;
+ } else if ((v32 & 0xff00ffff) == 0xffff) {
+ *cmode = 0xd;
+ *imm8 = (v32 >> 16) & 0xff;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Return non-zero if v32 can be formed by MOVI+ORR.
+ * Place the parameters for MOVI in (cmode, imm8).
+ * Return the cmode for ORR; the imm8 can be had via extraction from v32.
+ */
+static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
+{
+ int i;
+
+ for (i = 6; i > 0; i -= 2) {
+ /* Mask out one byte we can add with ORR. */
+ uint32_t tmp = v32 & ~(0xffu << (i * 4));
+ if (is_shimm32(tmp, cmode, imm8) ||
+ is_soimm32(tmp, cmode, imm8)) {
+ break;
+ }
+ }
+ return i;
+}
+
+/* Return true if V is a valid 16-bit or 32-bit shifted immediate. */
+static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
+{
+ if (v32 == deposit32(v32, 16, 16, v32)) {
+ return is_shimm16(v32, cmode, imm8);
+ } else {
+ return is_shimm32(v32, cmode, imm8);
+ }
+}
+
+/* Test if a constant matches the constraint.
+ * TODO: define constraints for:
+ *
+ * ldr/str offset: between -0xfff and 0xfff
+ * ldrh/strh offset: between -0xff and 0xff
+ * mov operand2: values represented with x << (2 * y), x < 0x100
+ * add, sub, eor...: ditto
+ */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
+
+ switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
+ case 0:
+ break;
+ case TCG_CT_CONST_ANDI:
+ val = ~val;
+ /* fallthru */
+ case TCG_CT_CONST_ORRI:
+ if (val == deposit64(val, 32, 32, val)) {
+ int cmode, imm8;
+ return is_shimm1632(val, &cmode, &imm8);
+ }
+ break;
+ default:
+ /* Both bits should not be set for the same insn. */
+ g_assert_not_reached();
+ }
+
+ return 0;
+}
+
+static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset)
+{
+ tcg_out32(s, (cond << 28) | 0x0a000000 |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, int32_t offset)
+{
+ tcg_out32(s, (cond << 28) | 0x0b000000 |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
+{
+ tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
+}
+
+static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
+{
+ tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
+ (((offset - 8) >> 2) & 0x00ffffff));
+}
+
+static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg rd, TCGReg rn, TCGReg rm, int shift)
+{
+ tcg_out32(s, (cond << 28) | (0 << 25) | opc |
+ (rn << 16) | (rd << 12) | shift | rm);
+}
+
+static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rm)
+{
+ /* Simple reg-reg move, optimising out the 'do nothing' case */
+ if (rd != rm) {
+ tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
+ }
+}
+
+static void tcg_out_bx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
+{
+ tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+}
+
+static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn)
+{
+ /*
+ * Unless the C portion of QEMU is compiled as thumb, we don't need
+ * true BX semantics; merely a branch to an address held in a register.
+ */
+ if (use_armv5t_instructions) {
+ tcg_out_bx_reg(s, cond, rn);
+ } else {
+ tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
+ }
+}
+
+static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg rd, TCGReg rn, int im)
+{
+ tcg_out32(s, (cond << 28) | (1 << 25) | opc |
+ (rn << 16) | (rd << 12) | im);
+}
+
+static void tcg_out_ldstm(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg rn, uint16_t mask)
+{
+ tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
+}
+
+/* Note that this routine is used for both LDR and LDRH formats, so we do
+ not wish to include an immediate shift at this point. */
+static void tcg_out_memop_r(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
+ TCGReg rn, TCGReg rm, bool u, bool p, bool w)
+{
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
+ | (w << 21) | (rn << 16) | (rt << 12) | rm);
+}
+
+static void tcg_out_memop_8(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
+ TCGReg rn, int imm8, bool p, bool w)
+{
+ bool u = 1;
+ if (imm8 < 0) {
+ imm8 = -imm8;
+ u = 0;
+ }
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
+ (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
+}
+
+static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg rt, TCGReg rn, int imm12, bool p, bool w)
+{
+ bool u = 1;
+ if (imm12 < 0) {
+ imm12 = -imm12;
+ u = 0;
+ }
+ tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
+ (rn << 16) | (rt << 12) | imm12);
+}
+
+static void tcg_out_ld32_12(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
+}
+
+static void tcg_out_st32_12(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
+}
+
+static void tcg_out_ld32_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_st32_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_ldrd_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_ldrd_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void __attribute__((unused))
+tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
+}
+
+static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_strd_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
+}
+
+/* Register pre-increment with base writeback. */
+static void tcg_out_ld32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
+}
+
+static void tcg_out_st32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
+}
+
+static void tcg_out_ld16u_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_st16_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_ld16u_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_st16_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_ld16s_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_ld16s_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_ld8_12(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
+}
+
+static void tcg_out_st8_12(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm12)
+{
+ tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
+}
+
+static void tcg_out_ld8_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_st8_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_ld8s_8(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, int imm8)
+{
+ tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
+}
+
+static void tcg_out_ld8s_r(TCGContext *s, ARMCond cond, TCGReg rt,
+ TCGReg rn, TCGReg rm)
+{
+ tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
+}
+
+static void tcg_out_movi_pool(TCGContext *s, ARMCond cond,
+ TCGReg rd, uint32_t arg)
+{
+ new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
+ tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
+}
+
+static void tcg_out_movi32(TCGContext *s, ARMCond cond,
+ TCGReg rd, uint32_t arg)
+{
+ int imm12, diff, opc, sh1, sh2;
+ uint32_t tt0, tt1, tt2;
+
+ /* Check a single MOV/MVN before anything else. */
+ imm12 = encode_imm(arg);
+ if (imm12 >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, imm12);
+ return;
+ }
+ imm12 = encode_imm(~arg);
+ if (imm12 >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, imm12);
+ return;
+ }
+
+ /* Check for a pc-relative address. This will usually be the TB,
+ or within the TB, which is immediately before the code block. */
+ diff = tcg_pcrel_diff(s, (void *)arg) - 8;
+ if (diff >= 0) {
+ imm12 = encode_imm(diff);
+ if (imm12 >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC, imm12);
+ return;
+ }
+ } else {
+ imm12 = encode_imm(-diff);
+ if (imm12 >= 0) {
+ tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC, imm12);
+ return;
+ }
+ }
+
+ /* Use movw + movt. */
+ if (use_armv7_instructions) {
+ /* movw */
+ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
+ | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
+ if (arg & 0xffff0000) {
+ /* movt */
+ tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
+ | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
+ }
+ return;
+ }
+
+ /* Look for sequences of two insns. If we have lots of 1's, we can
+ shorten the sequence by beginning with mvn and then clearing
+ higher bits with eor. */
+ tt0 = arg;
+ opc = ARITH_MOV;
+ if (ctpop32(arg) > 16) {
+ tt0 = ~arg;
+ opc = ARITH_MVN;
+ }
+ sh1 = ctz32(tt0) & ~1;
+ tt1 = tt0 & ~(0xff << sh1);
+ sh2 = ctz32(tt1) & ~1;
+ tt2 = tt1 & ~(0xff << sh2);
+ if (tt2 == 0) {
+ int rot;
+
+ rot = ((32 - sh1) << 7) & 0xf00;
+ tcg_out_dat_imm(s, cond, opc, rd, 0, ((tt0 >> sh1) & 0xff) | rot);
+ rot = ((32 - sh2) << 7) & 0xf00;
+ tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
+ ((tt0 >> sh2) & 0xff) | rot);
+ return;
+ }
+
+ /* Otherwise, drop it into the constant pool. */
+ tcg_out_movi_pool(s, cond, rd, arg);
+}
+
+/*
+ * Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rI" constraint.
+ */
+static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg dst, TCGReg lhs, TCGArg rhs, int rhs_is_const)
+{
+ if (rhs_is_const) {
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, encode_imm_nofail(rhs));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+/*
+ * Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rIK" constraint.
+ */
+static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc,
+ ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs,
+ bool rhs_is_const)
+{
+ if (rhs_is_const) {
+ int imm12 = encode_imm(rhs);
+ if (imm12 < 0) {
+ imm12 = encode_imm_nofail(~rhs);
+ opc = opinv;
+ }
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
+ ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs,
+ bool rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rIN" constraint.
+ */
+ if (rhs_is_const) {
+ int imm12 = encode_imm(rhs);
+ if (imm12 < 0) {
+ imm12 = encode_imm_nofail(-rhs);
+ opc = opneg;
+ }
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
+static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd,
+ TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
+ if (!use_armv6_instructions && rd == rn) {
+ if (rd == rm) {
+ /* rd == rn == rm; copy an input to tmp first. */
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rm = rn = TCG_REG_TMP;
+ } else {
+ rn = rm;
+ rm = rd;
+ }
+ }
+ /* mul */
+ tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
+}
+
+static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0,
+ TCGReg rd1, TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
+ if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
+ if (rd0 == rm || rd1 == rm) {
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rn = TCG_REG_TMP;
+ } else {
+ TCGReg t = rn;
+ rn = rm;
+ rm = t;
+ }
+ }
+ /* umull */
+ tcg_out32(s, (cond << 28) | 0x00800090 |
+ (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
+}
+
+static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0,
+ TCGReg rd1, TCGReg rn, TCGReg rm)
+{
+ /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
+ if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
+ if (rd0 == rm || rd1 == rm) {
+ tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
+ rn = TCG_REG_TMP;
+ } else {
+ TCGReg t = rn;
+ rn = rm;
+ rm = t;
+ }
+ }
+ /* smull */
+ tcg_out32(s, (cond << 28) | 0x00c00090 |
+ (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
+}
+
+static void tcg_out_sdiv(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
+static void tcg_out_udiv(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, TCGReg rm)
+{
+ tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
+}
+
+static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+{
+ if (use_armv6_instructions) {
+ /* sxtb */
+ tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(24));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_ASR(24));
+ }
+}
+
+static void __attribute__((unused))
+tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+{
+ tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
+}
+
+static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+{
+ if (use_armv6_instructions) {
+ /* sxth */
+ tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(16));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_ASR(16));
+ }
+}
+
+static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+{
+ if (use_armv6_instructions) {
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_LSL(16));
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rd, SHIFT_IMM_LSR(16));
+ }
+}
+
+static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int flags)
+{
+ if (use_armv6_instructions) {
+ if (flags & TCG_BSWAP_OS) {
+ /* revsh */
+ tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
+ return;
+ }
+
+ /* rev16 */
+ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+ if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd);
+ }
+ return;
+ }
+
+ if (flags == 0) {
+ /*
+ * For stores, no input or output extension:
+ * rn = xxAB
+ * lsr tmp, rn, #8 tmp = 0xxA
+ * and tmp, tmp, #0xff tmp = 000A
+ * orr rd, tmp, rn, lsl #8 rd = xABA
+ */
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
+ tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
+ return;
+ }
+
+ /*
+ * Byte swap, leaving the result at the top of the register.
+ * We will then shift down, zero or sign-extending.
+ */
+ if (flags & TCG_BSWAP_IZ) {
+ /*
+ * rn = 00AB
+ * ror tmp, rn, #8 tmp = B00A
+ * orr tmp, tmp, tmp, lsl #16 tmp = BA00
+ */
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8));
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP,
+ SHIFT_IMM_LSL(16));
+ } else {
+ /*
+ * rn = xxAB
+ * and tmp, rn, #0xff00 tmp = 00A0
+ * lsl tmp, tmp, #8 tmp = 0A00
+ * orr tmp, tmp, rn, lsl #24 tmp = BA00
+ */
+ tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1);
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8));
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24));
+ }
+ tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP,
+ (flags & TCG_BSWAP_OS
+ ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8)));
+}
+
+static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+{
+ if (use_armv6_instructions) {
+ /* rev */
+ tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_EOR,
+ TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
+ tcg_out_dat_imm(s, cond, ARITH_BIC,
+ TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ rd, 0, rn, SHIFT_IMM_ROR(8));
+ tcg_out_dat_reg(s, cond, ARITH_EOR,
+ rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
+ }
+}
+
+static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
+ TCGArg a1, int ofs, int len, bool const_a1)
+{
+ if (const_a1) {
+ /* bfi becomes bfc with rn == 15. */
+ a1 = 15;
+ }
+ /* bfi/bfc */
+ tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
+ | (ofs << 7) | ((ofs + len - 1) << 16));
+}
+
+static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
+ TCGReg rn, int ofs, int len)
+{
+ /* ubfx */
+ tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+}
+
+static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
+ TCGReg rn, int ofs, int len)
+{
+ /* sbfx */
+ tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+}
+
+static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld32_12(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_st32(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st32_12(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_ld16u(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld16u_8(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_ld16s(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld16s_8(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_st16(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st16_8(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_ld8u(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld8_12(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_ld8s(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xff || offset < -0xff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_ld8s_8(s, cond, rd, rn, offset);
+}
+
+static void tcg_out_st8(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, int32_t offset)
+{
+ if (offset > 0xfff || offset < -0xfff) {
+ tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
+ tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
+ } else
+ tcg_out_st8_12(s, cond, rd, rn, offset);
+}
+
+/*
+ * The _goto case is normally between TBs within the same code buffer, and
+ * with the code buffer limited to 16MB we wouldn't need the long case.
+ * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
+ */
+static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr)
+{
+ intptr_t addri = (intptr_t)addr;
+ ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+ bool arm_mode = !(addri & 1);
+
+ if (arm_mode && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
+ tcg_out_b_imm(s, cond, disp);
+ return;
+ }
+
+ /* LDR is interworking from v5t. */
+ if (arm_mode || use_armv5t_instructions) {
+ tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
+ return;
+ }
+
+ /* else v4t */
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+ tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
+}
+
+/*
+ * The call case is mostly used for helpers - so it's not unreasonable
+ * for them to be beyond branch range.
+ */
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
+{
+ intptr_t addri = (intptr_t)addr;
+ ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+ bool arm_mode = !(addri & 1);
+
+ if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
+ if (arm_mode) {
+ tcg_out_bl_imm(s, COND_AL, disp);
+ return;
+ }
+ if (use_armv5t_instructions) {
+ tcg_out_blx_imm(s, disp);
+ return;
+ }
+ }
+
+ if (use_armv5t_instructions) {
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+ tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
+ } else if (arm_mode) {
+ /* ??? Know that movi_pool emits exactly 1 insn. */
+ tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
+ tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
+ } else {
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+ tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
+ tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
+ }
+}
+
+static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l)
+{
+ if (l->has_value) {
+ tcg_out_goto(s, cond, l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
+ tcg_out_b_imm(s, cond, 0);
+ }
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ if (use_armv7_instructions) {
+ tcg_out32(s, INSN_DMB_ISH);
+ } else if (use_armv6_instructions) {
+ tcg_out32(s, INSN_DMB_MCR);
+ }
+}
+
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGReg al = args[0];
+ TCGReg ah = args[1];
+ TCGArg bl = args[2];
+ TCGArg bh = args[3];
+ TCGCond cond = args[4];
+ int const_bl = const_args[2];
+ int const_bh = const_args[3];
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ case TCG_COND_LTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ /* We perform a conditional comparision. If the high half is
+ equal, then overwrite the flags with the comparison of the
+ low half. The resulting flags cover the whole. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
+ tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
+ return cond;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ /* We perform a double-word subtraction and examine the result.
+ We do not actually need the result of the subtract, so the
+ low part "subtract" is a compare. For the high half we have
+ no choice but to compute into a temporary. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
+ tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
+ TCG_REG_TMP, ah, bh, const_bh);
+ return cond;
+
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ /* Similar, but with swapped arguments, via reversed subtract. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
+ TCG_REG_TMP, al, bl, const_bl);
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
+ TCG_REG_TMP, ah, bh, const_bh);
+ return tcg_swap_cond(cond);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Note that TCGReg references Q-registers.
+ * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
+ */
+static uint32_t encode_vd(TCGReg rd)
+{
+ tcg_debug_assert(rd >= TCG_REG_Q0);
+ return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
+}
+
+static uint32_t encode_vn(TCGReg rn)
+{
+ tcg_debug_assert(rn >= TCG_REG_Q0);
+ return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
+}
+
+static uint32_t encode_vm(TCGReg rm)
+{
+ tcg_debug_assert(rm >= TCG_REG_Q0);
+ return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
+}
+
+static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
+ TCGReg d, TCGReg m)
+{
+ tcg_out32(s, insn | (vece << 18) | (q << 6) |
+ encode_vd(d) | encode_vm(m));
+}
+
+static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
+ TCGReg d, TCGReg n, TCGReg m)
+{
+ tcg_out32(s, insn | (vece << 20) | (q << 6) |
+ encode_vd(d) | encode_vn(n) | encode_vm(m));
+}
+
+static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
+ int q, int op, int cmode, uint8_t imm8)
+{
+ tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
+ | (cmode << 8) | extract32(imm8, 0, 4)
+ | (extract32(imm8, 4, 3) << 16)
+ | (extract32(imm8, 7, 1) << 24));
+}
+
+static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
+ TCGReg rd, TCGReg rm, int l_imm6)
+{
+ tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
+ (extract32(l_imm6, 6, 1) << 7) |
+ (extract32(l_imm6, 0, 6) << 16));
+}
+
+static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
+ TCGReg rd, TCGReg rn, int offset)
+{
+ if (offset != 0) {
+ if (check_fit_imm(offset) || check_fit_imm(-offset)) {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+ TCG_REG_TMP, rn, offset, true);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_TMP, TCG_REG_TMP, rn, 0);
+ }
+ rn = TCG_REG_TMP;
+ }
+ tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
+}
+
+#ifdef CONFIG_SOFTMMU
+#include "../tcg-ldst.c.inc"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_UW] = helper_be_lduw_mmu,
+ [MO_UL] = helper_be_ldul_mmu,
+ [MO_Q] = helper_be_ldq_mmu,
+ [MO_SW] = helper_be_ldsw_mmu,
+ [MO_SL] = helper_be_ldul_mmu,
+#else
+ [MO_UW] = helper_le_lduw_mmu,
+ [MO_UL] = helper_le_ldul_mmu,
+ [MO_Q] = helper_le_ldq_mmu,
+ [MO_SW] = helper_le_ldsw_mmu,
+ [MO_SL] = helper_le_ldul_mmu,
+#endif
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+ [MO_8] = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_16] = helper_be_stw_mmu,
+ [MO_32] = helper_be_stl_mmu,
+ [MO_64] = helper_be_stq_mmu,
+#else
+ [MO_16] = helper_le_stw_mmu,
+ [MO_32] = helper_le_stl_mmu,
+ [MO_64] = helper_le_stq_mmu,
+#endif
+};
+
+/* Helper routines for marshalling helper function arguments into
+ * the correct registers and stack.
+ * argreg is where we want to put this argument, arg is the argument itself.
+ * Return value is the updated argreg ready for the next call.
+ * Note that argreg 0..3 is real registers, 4+ on stack.
+ *
+ * We provide routines for arguments which are: immediate, 32 bit
+ * value in register, 16 and 8 bit values in register (which must be zero
+ * extended before use) and 64 bit value in a lo:hi register pair.
+ */
+#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
+static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
+{ \
+ if (argreg < 4) { \
+ MOV_ARG(s, COND_AL, argreg, arg); \
+ } else { \
+ int ofs = (argreg - 4) * 4; \
+ EXT_ARG; \
+ tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
+ tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
+ } \
+ return argreg + 1; \
+}
+
+DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
+ (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
+ (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
+ (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
+
+static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
+ TCGReg arglo, TCGReg arghi)
+{
+ /* 64 bit arguments must go in even/odd register pairs
+ * and in 8-aligned stack slots.
+ */
+ if (argreg & 1) {
+ argreg++;
+ }
+ if (use_armv6_instructions && argreg >= 4
+ && (arglo & 1) == 0 && arghi == arglo + 1) {
+ tcg_out_strd_8(s, COND_AL, arglo,
+ TCG_REG_CALL_STACK, (argreg - 4) * 4);
+ return argreg + 2;
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, arglo);
+ argreg = tcg_out_arg_reg32(s, argreg, arghi);
+ return argreg;
+ }
+}
+
+#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
+
+/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
+
+/* These offsets are built into the LDRD below. */
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
+
+/* Load and compare a TLB entry, leaving the flags set. Returns the register
+ containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+ MemOp opc, int mem_index, bool is_load)
+{
+ int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+
+ /*
+ * We don't support inline unaligned acceses, but we can easily
+ * support overalignment checks.
+ */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
+ if (use_armv6_instructions) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off);
+ }
+
+ /* Extract the tlb index from the address into R0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+
+ /*
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
+ */
+ if (cmp_off == 0) {
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ } else {
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ }
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ }
+ }
+ if (!use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4);
+ }
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
+ offsetof(CPUTLBEntry, addend));
+
+ /*
+ * Check alignment, check comparators.
+ * Do this in no more than 3 insns. Use MOVW for v7, if possible,
+ * to reduce the number of sequential conditional instructions.
+ * Almost all guests have at least 4k pages, which means that we need
+ * to clear at least 9 bits even for an 8-byte memory, which means it
+ * isn't worth checking for an immediate operand for BIC.
+ */
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
+ tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
+
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
+ addrlo, TCG_REG_TMP, 0);
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
+ } else {
+ if (a_bits) {
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
+ (1 << a_bits) - 1);
+ }
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+ tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
+ 0, TCG_REG_R2, TCG_REG_TMP,
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+ }
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
+ }
+
+ return TCG_REG_R1;
+}
+
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+ TCGReg datalo, TCGReg datahi, TCGReg addrlo,
+ TCGReg addrhi, tcg_insn_unit *raddr,
+ tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg argreg, datalo, datahi;
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+ void *func;
+
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
+ }
+ argreg = tcg_out_arg_imm32(s, argreg, oi);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
+
+ /* For armv6 we can use the canonical unsigned helpers and minimize
+ icache usage. For pre-armv6, use the signed helpers since we do
+ not have a single insn sign-extend. */
+ if (use_armv6_instructions) {
+ func = qemu_ld_helpers[opc & MO_SIZE];
+ } else {
+ func = qemu_ld_helpers[opc & MO_SSIZE];
+ if (opc & MO_SIGN) {
+ opc = MO_UL;
+ }
+ }
+ tcg_out_call(s, func);
+
+ datalo = lb->datalo_reg;
+ datahi = lb->datahi_reg;
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ case MO_SW:
+ tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ default:
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ break;
+ case MO_Q:
+ if (datalo != TCG_REG_R1) {
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ } else if (datahi != TCG_REG_R0) {
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+ } else {
+ tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
+ tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+ tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
+ }
+ break;
+ }
+
+ tcg_out_goto(s, COND_AL, lb->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg argreg, datalo, datahi;
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+
+ if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ argreg = TCG_REG_R0;
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
+ } else {
+ argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
+ }
+
+ datalo = lb->datalo_reg;
+ datahi = lb->datahi_reg;
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ argreg = tcg_out_arg_reg8(s, argreg, datalo);
+ break;
+ case MO_16:
+ argreg = tcg_out_arg_reg16(s, argreg, datalo);
+ break;
+ case MO_32:
+ default:
+ argreg = tcg_out_arg_reg32(s, argreg, datalo);
+ break;
+ case MO_64:
+ argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
+ break;
+ }
+
+ argreg = tcg_out_arg_imm32(s, argreg, oi);
+ argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
+
+ /* Tail-call to the helper, which will return to the fast path. */
+ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
+ return true;
+}
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addend)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_SB:
+ tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_UW:
+ tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_SW:
+ tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_UL:
+ tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
+ break;
+ case MO_Q:
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
+ } else if (datalo != addend) {
+ tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
+ tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
+ addend, addrlo, SHIFT_IMM_LSL(0));
+ tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
+ tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+#ifndef CONFIG_SOFTMMU
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
+ TCGReg datahi, TCGReg addrlo)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_SB:
+ tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_UW:
+ tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_SW:
+ tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_UL:
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_Q:
+ /* Avoid ldrd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
+ } else if (datalo == addrlo) {
+ tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
+ tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+#endif
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ TCGReg addend;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
+
+ /* This a conditional BL only to load a pointer within this opcode into LR
+ for the slow path. We will not be using the value for a tail call. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
+
+ tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+
+ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (guest_base) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
+ tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
+ } else {
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
+ }
+#endif
+}
+
+static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addend)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ tcg_out_st8_r(s, cond, datalo, addrlo, addend);
+ break;
+ case MO_16:
+ tcg_out_st16_r(s, cond, datalo, addrlo, addend);
+ break;
+ case MO_32:
+ tcg_out_st32_r(s, cond, datalo, addrlo, addend);
+ break;
+ case MO_64:
+ /* Avoid strd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_strd_r(s, cond, datalo, addrlo, addend);
+ } else {
+ tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
+ tcg_out_st32_12(s, cond, datahi, addend, 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+#ifndef CONFIG_SOFTMMU
+static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
+ TCGReg datahi, TCGReg addrlo)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & MO_SIZE) {
+ case MO_8:
+ tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_16:
+ tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_32:
+ tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
+ break;
+ case MO_64:
+ /* Avoid strd for user-only emulation, to handle unaligned. */
+ if (USING_SOFTMMU && use_armv6_instructions
+ && (datalo & 1) == 0 && datahi == datalo + 1) {
+ tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
+ } else {
+ tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
+ tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+#endif
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ TCGReg addend;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
+
+ tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
+
+ /* The conditional call must come last, as we're going to return here. */
+ label_ptr = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
+
+ add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else /* !CONFIG_SOFTMMU */
+ if (guest_base) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
+ tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
+ datahi, addrlo, TCG_REG_TMP);
+ } else {
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
+ }
+#endif
+}
+
+static void tcg_out_epilogue(TCGContext *s);
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0, a1, a2, a3, a4, a5;
+ int c;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+ tcg_out_epilogue(s);
+ break;
+ case INDEX_op_goto_tb:
+ {
+ /* Indirect jump method */
+ intptr_t ptr, dif, dil;
+ TCGReg base = TCG_REG_PC;
+
+ tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+ ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
+ dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
+ dil = sextract32(dif, 0, 12);
+ if (dif != dil) {
+ /* The TB is close, but outside the 12 bits addressable by
+ the load. We can extend this to 20 bits with a sub of a
+ shifted immediate from pc. In the vastly unlikely event
+ the code requires more than 1MB, we'll use 2 insns and
+ be no worse off. */
+ base = TCG_REG_R0;
+ tcg_out_movi32(s, COND_AL, base, ptr - dil);
+ }
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
+ set_jmp_reset_offset(s, args[0]);
+ }
+ break;
+ case INDEX_op_goto_ptr:
+ tcg_out_b_reg(s, COND_AL, args[0]);
+ break;
+ case INDEX_op_br:
+ tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld8s_i32:
+ tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16u_i32:
+ tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16s_i32:
+ tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i32:
+ tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st8_i32:
+ tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st16_i32:
+ tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i32:
+ tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_movcond_i32:
+ /* Constraints mean that v2 is always in the same register as dest,
+ * so we only need to do "if condition passed, move v1 to dest".
+ */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
+ ARITH_MVN, args[0], 0, args[3], const_args[3]);
+ break;
+ case INDEX_op_add_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_sub_i32:
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+ } else {
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
+ args[0], args[2], args[1], 1);
+ }
+ } else {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
+ args[0], args[1], args[2], const_args[2]);
+ }
+ break;
+ case INDEX_op_and_i32:
+ tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_andc_i32:
+ tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
+ args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_or_i32:
+ c = ARITH_ORR;
+ goto gen_arith;
+ case INDEX_op_xor_i32:
+ c = ARITH_EOR;
+ /* Fall through. */
+ gen_arith:
+ tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_add2_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ a3 = args[3], a4 = args[4], a5 = args[5];
+ if (a0 == a3 || (a0 == a5 && !const_args[5])) {
+ a0 = TCG_REG_TMP;
+ }
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
+ a0, a2, a4, const_args[4]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
+ a1, a3, a5, const_args[5]);
+ tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ break;
+ case INDEX_op_sub2_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ a3 = args[3], a4 = args[4], a5 = args[5];
+ if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
+ a0 = TCG_REG_TMP;
+ }
+ if (const_args[2]) {
+ if (const_args[4]) {
+ tcg_out_movi32(s, COND_AL, a0, a4);
+ a4 = a0;
+ }
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
+ } else {
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
+ ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
+ }
+ if (const_args[3]) {
+ if (const_args[5]) {
+ tcg_out_movi32(s, COND_AL, a1, a5);
+ a5 = a1;
+ }
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
+ } else {
+ tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
+ a1, a3, a5, const_args[5]);
+ }
+ tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ break;
+ case INDEX_op_neg_i32:
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
+ break;
+ case INDEX_op_not_i32:
+ tcg_out_dat_reg(s, COND_AL,
+ ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
+ break;
+ case INDEX_op_mul_i32:
+ tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_mulu2_i32:
+ tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ case INDEX_op_muls2_i32:
+ tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ /* XXX: Perhaps args[2] & 0x1f is wrong */
+ case INDEX_op_shl_i32:
+ c = const_args[2] ?
+ SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
+ goto gen_shift32;
+ case INDEX_op_shr_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
+ goto gen_shift32;
+ case INDEX_op_sar_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+ goto gen_shift32;
+ case INDEX_op_rotr_i32:
+ c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
+ /* Fall through. */
+ gen_shift32:
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
+ break;
+
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ ((0x20 - args[2]) & 0x1f) ?
+ SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+ SHIFT_IMM_LSL(0));
+ } else {
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ SHIFT_REG_ROR(TCG_REG_TMP));
+ }
+ break;
+
+ case INDEX_op_ctz_i32:
+ tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
+ a1 = TCG_REG_TMP;
+ goto do_clz;
+
+ case INDEX_op_clz_i32:
+ a1 = args[1];
+ do_clz:
+ a0 = args[0];
+ a2 = args[2];
+ c = const_args[2];
+ if (c && a2 == 32) {
+ tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
+ break;
+ }
+ tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
+ tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
+ if (c || a0 != a2) {
+ tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[0], args[1], const_args[1]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
+ arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
+ ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
+ ARITH_MOV, args[0], 0, 0);
+ break;
+
+ case INDEX_op_brcond2_i32:
+ c = tcg_out_cmp2(s, args, const_args);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
+ break;
+ case INDEX_op_setcond2_i32:
+ c = tcg_out_cmp2(s, args + 1, const_args + 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
+ ARITH_MOV, args[0], 0, 0);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, 0);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, 1);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, 0);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, 1);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, COND_AL, args[0], args[1]);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ tcg_out_ext8s(s, COND_AL, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i32:
+ tcg_out_ext16u(s, COND_AL, args[0], args[1]);
+ break;
+
+ case INDEX_op_deposit_i32:
+ tcg_out_deposit(s, COND_AL, args[0], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+ case INDEX_op_extract_i32:
+ tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ case INDEX_op_sextract_i32:
+ tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ break;
+ case INDEX_op_extract2_i32:
+ /* ??? These optimization vs zero should be generic. */
+ /* ??? But we can't substitute 2 for 1 in the opcode stream yet. */
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
+ args[2], SHIFT_IMM_LSL(32 - args[3]));
+ }
+ } else if (const_args[2]) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
+ args[1], SHIFT_IMM_LSR(args[3]));
+ } else {
+ /* We can do extract2 in 2 insns, vs the 3 required otherwise. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
+ args[2], SHIFT_IMM_LSL(32 - args[3]));
+ tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
+ args[1], SHIFT_IMM_LSR(args[3]));
+ }
+ break;
+
+ case INDEX_op_div_i32:
+ tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_divu_i32:
+ tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, args[0]);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_neg_i32:
+ case INDEX_op_not_i32:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_extract_i32:
+ case INDEX_op_sextract_i32:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ return C_O0_I2(r, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_setcond_i32:
+ return C_O1_I2(r, r, rIN);
+
+ case INDEX_op_and_i32:
+ case INDEX_op_andc_i32:
+ case INDEX_op_clz_i32:
+ case INDEX_op_ctz_i32:
+ return C_O1_I2(r, r, rIK);
+
+ case INDEX_op_mul_i32:
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ return C_O1_I2(r, r, r);
+
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_muls2_i32:
+ return C_O2_I2(r, r, r, r);
+
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ return C_O1_I2(r, r, rI);
+
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_brcond_i32:
+ return C_O0_I2(r, rIN);
+ case INDEX_op_deposit_i32:
+ return C_O1_I2(r, 0, rZ);
+ case INDEX_op_extract2_i32:
+ return C_O1_I2(r, rZ, rZ);
+ case INDEX_op_movcond_i32:
+ return C_O1_I4(r, r, rIN, rIK, 0);
+ case INDEX_op_add2_i32:
+ return C_O2_I4(r, r, r, r, rIN, rIK);
+ case INDEX_op_sub2_i32:
+ return C_O2_I4(r, r, rI, rI, rIN, rIK);
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(r, r, rI, rI);
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, r, r, rI, rI);
+
+ case INDEX_op_qemu_ld_i32:
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
+ case INDEX_op_qemu_ld_i64:
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
+ case INDEX_op_qemu_st_i32:
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
+ case INDEX_op_qemu_st_i64:
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
+
+ case INDEX_op_st_vec:
+ return C_O0_I2(w, r);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(w, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(w, wr);
+ case INDEX_op_abs_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return C_O1_I1(w, w);
+ case INDEX_op_dup2_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_arm_sshl_vec:
+ case INDEX_op_arm_ushl_vec:
+ return C_O1_I2(w, w, w);
+ case INDEX_op_arm_sli_vec:
+ return C_O1_I2(w, 0, w);
+ case INDEX_op_or_vec:
+ case INDEX_op_andc_vec:
+ return C_O1_I2(w, w, wO);
+ case INDEX_op_and_vec:
+ case INDEX_op_orc_vec:
+ return C_O1_I2(w, w, wV);
+ case INDEX_op_cmp_vec:
+ return C_O1_I2(w, w, wZ);
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(w, w, w, w);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ /*
+ * Only probe for the platform and capabilities if we haven't already
+ * determined maximum values at compile time.
+ */
+#if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
+ {
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+#ifndef use_idiv_instructions
+ use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
+#endif
+#ifndef use_neon_instructions
+ use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
+#endif
+ }
+#endif
+
+ if (__ARM_ARCH < 7) {
+ const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
+ if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
+ arm_arch = pl[1] - '0';
+ }
+ }
+
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+
+ tcg_target_call_clobber_regs = 0;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+
+ if (use_neon_instructions) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
+ }
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V64:
+ /* regs 1; size 8; align 8 */
+ tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V128:
+ /*
+ * We have only 8-byte alignment for the stack per the ABI.
+ * Rather than dynamically re-align the stack, it's easier
+ * to simply not request alignment beyond that. So:
+ * regs 2; size 8; align 8
+ */
+ tcg_out_vldst(s, INSN_VLD1 | 0xad0, arg, arg1, arg2);
+ return;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_st32(s, COND_AL, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V64:
+ /* regs 1; size 8; align 8 */
+ tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
+ return;
+ case TCG_TYPE_V128:
+ /* See tcg_out_ld re alignment: regs 2; size 8; align 8 */
+ tcg_out_vldst(s, INSN_VST1 | 0xad0, arg, arg1, arg2);
+ return;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ return false;
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret == arg) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
+ tcg_out_mov_reg(s, COND_AL, ret, arg);
+ return true;
+ }
+ return false;
+
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ /* "VMOV D,N" is an alias for "VORR D,N,N". */
+ tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
+ return true;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_debug_assert(type == TCG_TYPE_I32);
+ tcg_debug_assert(ret < TCG_REG_Q0);
+ tcg_out_movi32(s, COND_AL, ret, arg);
+}
+
+/* Type is always V128, with I64 elements. */
+static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
+{
+ /* Move high element into place first. */
+ /* VMOV Dd+1, Ds */
+ tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
+ /* Move low element into place; tcg_out_mov will check for nop. */
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg rs)
+{
+ int q = type - TCG_TYPE_V64;
+
+ if (vece == MO_64) {
+ if (type == TCG_TYPE_V128) {
+ tcg_out_dup2_vec(s, rd, rs, rs);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
+ }
+ } else if (rs < TCG_REG_Q0) {
+ int b = (vece == MO_8);
+ int e = (vece == MO_16);
+ tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
+ encode_vn(rd) | (rs << 12));
+ } else {
+ int imm4 = 1 << vece;
+ tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
+ encode_vd(rd) | encode_vm(rs));
+ }
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, TCGReg base, intptr_t offset)
+{
+ if (vece == MO_64) {
+ tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
+ if (type == TCG_TYPE_V128) {
+ tcg_out_dup2_vec(s, rd, rd, rd);
+ }
+ } else {
+ int q = type - TCG_TYPE_V64;
+ tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
+ rd, base, offset);
+ }
+ return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, int64_t v64)
+{
+ int q = type - TCG_TYPE_V64;
+ int cmode, imm8, i;
+
+ /* Test all bytes equal first. */
+ if (vece == MO_8) {
+ tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
+ return;
+ }
+
+ /*
+ * Test all bytes 0x00 or 0xff second. This can match cases that
+ * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
+ */
+ for (i = imm8 = 0; i < 8; i++) {
+ uint8_t byte = v64 >> (i * 8);
+ if (byte == 0xff) {
+ imm8 |= 1 << i;
+ } else if (byte != 0) {
+ goto fail_bytes;
+ }
+ }
+ tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
+ return;
+ fail_bytes:
+
+ /*
+ * Tests for various replications. For each element width, if we
+ * cannot find an expansion there's no point checking a larger
+ * width because we already know by replication it cannot match.
+ */
+ if (vece == MO_16) {
+ uint16_t v16 = v64;
+
+ if (is_shimm16(v16, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm16(~v16, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Otherwise, all remaining constants can be loaded in two insns:
+ * rd = v16 & 0xff, rd |= v16 & 0xff00.
+ */
+ tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
+ tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8); /* VORRI */
+ return;
+ }
+
+ if (vece == MO_32) {
+ uint32_t v32 = v64;
+
+ if (is_shimm32(v32, &cmode, &imm8) ||
+ is_soimm32(v32, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ return;
+ }
+ if (is_shimm32(~v32, &cmode, &imm8) ||
+ is_soimm32(~v32, &cmode, &imm8)) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ return;
+ }
+
+ /*
+ * Restrict the set of constants to those we can load with
+ * two instructions. Others we load from the pool.
+ */
+ i = is_shimm32_pair(v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
+ tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
+ return;
+ }
+ i = is_shimm32_pair(~v32, &cmode, &imm8);
+ if (i) {
+ tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
+ tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
+ return;
+ }
+ }
+
+ /*
+ * As a last resort, load from the constant pool.
+ */
+ if (!q || vece == MO_64) {
+ new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
+ /* VLDR Dd, [pc + offset] */
+ tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
+ if (q) {
+ tcg_out_dup2_vec(s, rd, rd, rd);
+ }
+ } else {
+ new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
+ /* add tmp, pc, offset */
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
+ tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
+ }
+}
+
+static const ARMInsn vec_cmp_insn[16] = {
+ [TCG_COND_EQ] = INSN_VCEQ,
+ [TCG_COND_GT] = INSN_VCGT,
+ [TCG_COND_GE] = INSN_VCGE,
+ [TCG_COND_GTU] = INSN_VCGT_U,
+ [TCG_COND_GEU] = INSN_VCGE_U,
+};
+
+static const ARMInsn vec_cmp0_insn[16] = {
+ [TCG_COND_EQ] = INSN_VCEQ0,
+ [TCG_COND_GT] = INSN_VCGT0,
+ [TCG_COND_GE] = INSN_VCGE0,
+ [TCG_COND_LT] = INSN_VCLT0,
+ [TCG_COND_LE] = INSN_VCLE0,
+};
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGType type = vecl + TCG_TYPE_V64;
+ unsigned q = vecl;
+ TCGArg a0, a1, a2, a3;
+ int cmode, imm8;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ return;
+ case INDEX_op_dup2_vec:
+ tcg_out_dup2_vec(s, a0, a1, a2);
+ return;
+ case INDEX_op_abs_vec:
+ tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
+ return;
+ case INDEX_op_neg_vec:
+ tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
+ return;
+ case INDEX_op_not_vec:
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
+ return;
+ case INDEX_op_add_vec:
+ tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_mul_vec:
+ tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_smax_vec:
+ tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_smin_vec:
+ tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_sub_vec:
+ tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_ssadd_vec:
+ tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_sssub_vec:
+ tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_umax_vec:
+ tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_umin_vec:
+ tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_usadd_vec:
+ tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_ussub_vec:
+ tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
+ return;
+ case INDEX_op_xor_vec:
+ tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
+ return;
+ case INDEX_op_arm_sshl_vec:
+ /*
+ * Note that Vm is the data and Vn is the shift count,
+ * therefore the arguments appear reversed.
+ */
+ tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
+ return;
+ case INDEX_op_arm_ushl_vec:
+ /* See above. */
+ tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
+ return;
+ case INDEX_op_shli_vec:
+ tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
+ return;
+ case INDEX_op_shri_vec:
+ tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
+ return;
+ case INDEX_op_sari_vec:
+ tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
+ return;
+ case INDEX_op_arm_sli_vec:
+ tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
+ return;
+
+ case INDEX_op_andc_vec:
+ if (!const_args[2]) {
+ tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
+ return;
+ }
+ a2 = ~a2;
+ /* fall through */
+ case INDEX_op_and_vec:
+ if (const_args[2]) {
+ is_shimm1632(~a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
+ return;
+ }
+ tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
+ a2 = a0;
+ }
+ tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
+ return;
+
+ case INDEX_op_orc_vec:
+ if (!const_args[2]) {
+ tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
+ return;
+ }
+ a2 = ~a2;
+ /* fall through */
+ case INDEX_op_or_vec:
+ if (const_args[2]) {
+ is_shimm1632(a2, &cmode, &imm8);
+ if (a0 == a1) {
+ tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
+ return;
+ }
+ tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
+ a2 = a0;
+ }
+ tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
+ return;
+
+ case INDEX_op_cmp_vec:
+ {
+ TCGCond cond = args[3];
+
+ if (cond == TCG_COND_NE) {
+ if (const_args[2]) {
+ tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
+ } else {
+ tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
+ tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
+ }
+ } else {
+ ARMInsn insn;
+
+ if (const_args[2]) {
+ insn = vec_cmp0_insn[cond];
+ if (insn) {
+ tcg_out_vreg2(s, insn, q, vece, a0, a1);
+ return;
+ }
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
+ a2 = TCG_VEC_TMP;
+ }
+ insn = vec_cmp_insn[cond];
+ if (insn == 0) {
+ TCGArg t;
+ t = a1, a1 = a2, a2 = t;
+ cond = tcg_swap_cond(cond);
+ insn = vec_cmp_insn[cond];
+ tcg_debug_assert(insn != 0);
+ }
+ tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
+ }
+ }
+ return;
+
+ case INDEX_op_bitsel_vec:
+ a3 = args[3];
+ if (a0 == a3) {
+ tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
+ } else if (a0 == a2) {
+ tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
+ } else {
+ tcg_out_mov(s, type, a0, a1);
+ tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
+ }
+ return;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_bitsel_vec:
+ return 1;
+ case INDEX_op_abs_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ return vece < MO_64;
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return -1;
+ default:
+ return 0;
+ }
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGv_vec v0, v1, v2, t1, t2, c1;
+ TCGArg a2;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+ va_end(va);
+
+ switch (opc) {
+ case INDEX_op_shlv_vec:
+ /*
+ * Merely propagate shlv_vec to arm_ushl_vec.
+ * In this way we don't set TCG_TARGET_HAS_shv_vec
+ * because everything is done via expansion.
+ */
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ break;
+
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ /* Right shifts are negative left shifts for NEON. */
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t1, v2);
+ if (opc == INDEX_op_shrv_vec) {
+ opc = INDEX_op_arm_ushl_vec;
+ } else {
+ opc = INDEX_op_arm_sshl_vec;
+ }
+ vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotli_vec:
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
+ vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_sub_vec(vece, t1, v2, c1);
+ /* Right shifts are negative left shifts for NEON. */
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ tcg_gen_or_vec(vece, v0, v0, t1);
+ tcg_temp_free_vec(t1);
+ break;
+
+ case INDEX_op_rotrv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ c1 = tcg_constant_vec(type, vece, 8 << vece);
+ tcg_gen_neg_vec(vece, t1, v2);
+ tcg_gen_sub_vec(vece, t2, c1, v2);
+ /* Right shifts are negative left shifts for NEON. */
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+ vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+ tcg_gen_or_vec(vece, v0, t1, t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = INSN_NOP;
+ }
+}
+
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+ and tcg_register_jit. */
+
+#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+
+#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ /* Calling convention requires us to save r4-r11 and lr. */
+ /* stmdb sp!, { r4 - r11, lr } */
+ tcg_out_ldstm(s, COND_AL, INSN_STMDB, TCG_REG_CALL_STACK,
+ (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+ (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+ (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_R14));
+
+ /* Reserve callee argument and tcg temp space. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+
+ tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
+
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+ tcg_out_epilogue(s);
+}
+
+static void tcg_out_epilogue(TCGContext *s)
+{
+ /* Release local stack frame. */
+ tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
+ TCG_REG_CALL_STACK, STACK_ADDEND, 1);
+
+ /* ldmia sp!, { r4 - r11, pc } */
+ tcg_out_ldstm(s, COND_AL, INSN_LDMIA, TCG_REG_CALL_STACK,
+ (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+ (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+ (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_ARM
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x7c, /* sleb128 -4 */
+ .h.cie.return_column = 14,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 13, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ /* The following must match the stmdb in the prologue. */
+ 0x8e, 1, /* DW_CFA_offset, lr, -4 */
+ 0x8b, 2, /* DW_CFA_offset, r11, -8 */
+ 0x8a, 3, /* DW_CFA_offset, r10, -12 */
+ 0x89, 4, /* DW_CFA_offset, r9, -16 */
+ 0x88, 5, /* DW_CFA_offset, r8, -20 */
+ 0x87, 6, /* DW_CFA_offset, r7, -24 */
+ 0x86, 7, /* DW_CFA_offset, r6, -28 */
+ 0x85, 8, /* DW_CFA_offset, r5, -32 */
+ 0x84, 9, /* DW_CFA_offset, r4, -36 */
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
new file mode 100644
index 000000000..f41b80955
--- /dev/null
+++ b/tcg/arm/tcg-target.h
@@ -0,0 +1,161 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ * Copyright (c) 2008 Andrzej Zaborowski
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef ARM_TCG_TARGET_H
+#define ARM_TCG_TARGET_H
+
+extern int arm_arch;
+
+#define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5)
+#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
+#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
+
+#undef TCG_TARGET_STACK_GROWSUP
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
+
+typedef enum {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_PC,
+
+ TCG_REG_Q0,
+ TCG_REG_Q1,
+ TCG_REG_Q2,
+ TCG_REG_Q3,
+ TCG_REG_Q4,
+ TCG_REG_Q5,
+ TCG_REG_Q6,
+ TCG_REG_Q7,
+ TCG_REG_Q8,
+ TCG_REG_Q9,
+ TCG_REG_Q10,
+ TCG_REG_Q11,
+ TCG_REG_Q12,
+ TCG_REG_Q13,
+ TCG_REG_Q14,
+ TCG_REG_Q15,
+
+ TCG_AREG0 = TCG_REG_R6,
+ TCG_REG_CALL_STACK = TCG_REG_R13,
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 32
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+#define use_idiv_instructions 1
+#else
+extern bool use_idiv_instructions;
+#endif
+#ifdef __ARM_NEON__
+#define use_neon_instructions 1
+#else
+extern bool use_neon_instructions;
+#endif
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 0 /* and r0, r1, #0xff */
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions
+#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_extract2_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#define TCG_TARGET_HAS_v64 use_neon_instructions
+#define TCG_TARGET_HAS_v128 use_neon_instructions
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 1
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 0
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 1
+#define TCG_TARGET_HAS_cmpsel_vec 0
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
+
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
new file mode 100644
index 000000000..d38af9a80
--- /dev/null
+++ b/tcg/arm/tcg-target.opc.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(arm_sli_vec, 1, 2, 1, IMPLVEC)
+DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
new file mode 100644
index 000000000..78774d100
--- /dev/null
+++ b/tcg/i386/tcg-target-con-set.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define i386 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ *
+ * C_N1_Im(...) defines a constraint set with 1 output and <m> inputs,
+ * except that the output must use a new register.
+ */
+C_O0_I1(r)
+C_O0_I2(L, L)
+C_O0_I2(qi, r)
+C_O0_I2(re, r)
+C_O0_I2(ri, r)
+C_O0_I2(r, re)
+C_O0_I2(s, L)
+C_O0_I2(x, r)
+C_O0_I3(L, L, L)
+C_O0_I3(s, L, L)
+C_O0_I4(L, L, L, L)
+C_O0_I4(r, r, ri, ri)
+C_O1_I1(r, 0)
+C_O1_I1(r, L)
+C_O1_I1(r, q)
+C_O1_I1(r, r)
+C_O1_I1(x, r)
+C_O1_I1(x, x)
+C_O1_I2(Q, 0, Q)
+C_O1_I2(q, r, re)
+C_O1_I2(r, 0, ci)
+C_O1_I2(r, 0, r)
+C_O1_I2(r, 0, re)
+C_O1_I2(r, 0, reZ)
+C_O1_I2(r, 0, ri)
+C_O1_I2(r, 0, rI)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, re)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(x, x, x)
+C_N1_I2(r, r, r)
+C_N1_I2(r, r, rW)
+C_O1_I3(x, x, x, x)
+C_O1_I4(r, r, re, r, 0)
+C_O1_I4(r, r, r, ri, ri)
+C_O2_I1(r, r, L)
+C_O2_I2(a, d, a, r)
+C_O2_I2(r, r, L, L)
+C_O2_I3(a, d, 0, 1, r)
+C_O2_I4(r, r, 0, 1, re, re)
diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
new file mode 100644
index 000000000..24e6bcb80
--- /dev/null
+++ b/tcg/i386/tcg-target-con-str.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define i386 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ *
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('a', 1u << TCG_REG_EAX)
+REGS('b', 1u << TCG_REG_EBX)
+REGS('c', 1u << TCG_REG_ECX)
+REGS('d', 1u << TCG_REG_EDX)
+REGS('S', 1u << TCG_REG_ESI)
+REGS('D', 1u << TCG_REG_EDI)
+
+REGS('r', ALL_GENERAL_REGS)
+REGS('x', ALL_VECTOR_REGS)
+REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */
+REGS('Q', ALL_BYTEH_REGS) /* regs with a second byte (e.g. %ah) */
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_ld/st */
+REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('e', TCG_CT_CONST_S32)
+CONST('I', TCG_CT_CONST_I32)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_U32)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
new file mode 100644
index 000000000..84b109bb8
--- /dev/null
+++ b/tcg/i386/tcg-target.c.inc
@@ -0,0 +1,3901 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "../tcg-pool.c.inc"
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+#if TCG_TARGET_REG_BITS == 64
+ "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+#else
+ "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
+#if TCG_TARGET_REG_BITS == 64
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15",
+#endif
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+ TCG_REG_RSI,
+ TCG_REG_RDI,
+ TCG_REG_RAX,
+#else
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+ TCG_REG_EBP,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EAX,
+#endif
+ TCG_REG_XMM0,
+ TCG_REG_XMM1,
+ TCG_REG_XMM2,
+ TCG_REG_XMM3,
+ TCG_REG_XMM4,
+ TCG_REG_XMM5,
+#ifndef _WIN64
+ /* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
+ any of them. Therefore only allow xmm0-xmm5 to be allocated. */
+ TCG_REG_XMM6,
+ TCG_REG_XMM7,
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_XMM8,
+ TCG_REG_XMM9,
+ TCG_REG_XMM10,
+ TCG_REG_XMM11,
+ TCG_REG_XMM12,
+ TCG_REG_XMM13,
+ TCG_REG_XMM14,
+ TCG_REG_XMM15,
+#endif
+#endif
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+#if defined(_WIN64)
+ TCG_REG_RCX,
+ TCG_REG_RDX,
+#else
+ TCG_REG_RDI,
+ TCG_REG_RSI,
+ TCG_REG_RDX,
+ TCG_REG_RCX,
+#endif
+ TCG_REG_R8,
+ TCG_REG_R9,
+#else
+ /* 32 bit mode uses stack based calling convention (GCC default). */
+#endif
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_EAX,
+#if TCG_TARGET_REG_BITS == 32
+ TCG_REG_EDX
+#endif
+};
+
+/* Constants we accept. */
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+#define TCG_CT_CONST_I32 0x400
+#define TCG_CT_CONST_WSZ 0x800
+
+/* Registers used with L constraint, which are the first argument
+ registers on x86_64, and two random call clobbered registers on
+ i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
+#define ALL_BYTEH_REGS 0x0000000fu
+#if TCG_TARGET_REG_BITS == 64
+# define ALL_GENERAL_REGS 0x0000ffffu
+# define ALL_VECTOR_REGS 0xffff0000u
+# define ALL_BYTEL_REGS ALL_GENERAL_REGS
+#else
+# define ALL_GENERAL_REGS 0x000000ffu
+# define ALL_VECTOR_REGS 0x00ff0000u
+# define ALL_BYTEL_REGS ALL_BYTEH_REGS
+#endif
+#ifdef CONFIG_SOFTMMU
+# define SOFTMMU_RESERVE_REGS ((1 << TCG_REG_L0) | (1 << TCG_REG_L1))
+#else
+# define SOFTMMU_RESERVE_REGS 0
+#endif
+
+/* The host compiler should supply <cpuid.h> to enable runtime features
+ detection, as we're not going to go so far as our own inline assembly.
+ If not available, default values will be assumed. */
+#if defined(CONFIG_CPUID_H)
+#include "qemu/cpuid.h"
+#endif
+
+/* For 64-bit, we always know that CMOV is available. */
+#if TCG_TARGET_REG_BITS == 64
+# define have_cmov 1
+#elif defined(CONFIG_CPUID_H)
+static bool have_cmov;
+#else
+# define have_cmov 0
+#endif
+
+/* We need these symbols in tcg-target.h, and we can't properly conditionalize
+ it there. Therefore we always define the variable. */
+bool have_bmi1;
+bool have_popcnt;
+bool have_avx1;
+bool have_avx2;
+bool have_movbe;
+
+#ifdef CONFIG_CPUID_H
+static bool have_bmi2;
+static bool have_lzcnt;
+#else
+# define have_bmi2 0
+# define have_lzcnt 0
+#endif
+
+static const tcg_insn_unit *tb_ret_addr;
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ value += addend;
+ switch(type) {
+ case R_386_PC32:
+ value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
+ if (value != (int32_t)value) {
+ return false;
+ }
+ /* FALLTHRU */
+ case R_386_32:
+ tcg_patch32(code_ptr, value);
+ break;
+ case R_386_PC8:
+ value -= (uintptr_t)tcg_splitwx_to_rx(code_ptr);
+ if (value != (int8_t)value) {
+ return false;
+ }
+ tcg_patch8(code_ptr, value);
+ break;
+ default:
+ tcg_abort();
+ }
+ return true;
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if (type == TCG_TYPE_I32) {
+ if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) {
+ return 1;
+ }
+ } else {
+ if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+ return 1;
+ }
+ }
+ if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
+ }
+ return 0;
+}
+
+# define LOWREGMASK(x) ((x) & 7)
+
+#define P_EXT 0x100 /* 0x0f opcode prefix */
+#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
+#define P_DATA16 0x400 /* 0x66 opcode prefix */
+#define P_VEXW 0x1000 /* Set VEX.W = 1 */
+#if TCG_TARGET_REG_BITS == 64
+# define P_REXW P_VEXW /* Set REX.W = 1; match VEXW */
+# define P_REXB_R 0x2000 /* REG field as byte register */
+# define P_REXB_RM 0x4000 /* R/M field as byte register */
+# define P_GS 0x8000 /* gs segment override */
+#else
+# define P_REXW 0
+# define P_REXB_R 0
+# define P_REXB_RM 0
+# define P_GS 0
+#endif
+#define P_EXT3A 0x10000 /* 0x0f 0x3a opcode prefix */
+#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */
+#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */
+#define P_VEXL 0x80000 /* Set VEX.L = 1 */
+
+#define OPC_ARITH_EvIz (0x81)
+#define OPC_ARITH_EvIb (0x83)
+#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
+#define OPC_ANDN (0xf2 | P_EXT38)
+#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
+#define OPC_AND_GvEv (OPC_ARITH_GvEv | (ARITH_AND << 3))
+#define OPC_BLENDPS (0x0c | P_EXT3A | P_DATA16)
+#define OPC_BSF (0xbc | P_EXT)
+#define OPC_BSR (0xbd | P_EXT)
+#define OPC_BSWAP (0xc8 | P_EXT)
+#define OPC_CALL_Jz (0xe8)
+#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
+#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
+#define OPC_DEC_r32 (0x48)
+#define OPC_IMUL_GvEv (0xaf | P_EXT)
+#define OPC_IMUL_GvEvIb (0x6b)
+#define OPC_IMUL_GvEvIz (0x69)
+#define OPC_INC_r32 (0x40)
+#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
+#define OPC_JCC_short (0x70) /* ... plus condition code */
+#define OPC_JMP_long (0xe9)
+#define OPC_JMP_short (0xeb)
+#define OPC_LEA (0x8d)
+#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
+#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
+#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
+#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_MOVB_EvIz (0xc6)
+#define OPC_MOVL_EvIz (0xc7)
+#define OPC_MOVL_Iv (0xb8)
+#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
+#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
+#define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16)
+#define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16)
+#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2)
+#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
+#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
+#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
+#define OPC_MOVDQU_WxVx (0x7f | P_EXT | P_SIMDF3)
+#define OPC_MOVQ_VqWq (0x7e | P_EXT | P_SIMDF3)
+#define OPC_MOVQ_WqVq (0xd6 | P_EXT | P_DATA16)
+#define OPC_MOVSBL (0xbe | P_EXT)
+#define OPC_MOVSWL (0xbf | P_EXT)
+#define OPC_MOVSLQ (0x63 | P_REXW)
+#define OPC_MOVZBL (0xb6 | P_EXT)
+#define OPC_MOVZWL (0xb7 | P_EXT)
+#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
+#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
+#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
+#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
+#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
+#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
+#define OPC_PACKUSWB (0x67 | P_EXT | P_DATA16)
+#define OPC_PADDB (0xfc | P_EXT | P_DATA16)
+#define OPC_PADDW (0xfd | P_EXT | P_DATA16)
+#define OPC_PADDD (0xfe | P_EXT | P_DATA16)
+#define OPC_PADDQ (0xd4 | P_EXT | P_DATA16)
+#define OPC_PADDSB (0xec | P_EXT | P_DATA16)
+#define OPC_PADDSW (0xed | P_EXT | P_DATA16)
+#define OPC_PADDUB (0xdc | P_EXT | P_DATA16)
+#define OPC_PADDUW (0xdd | P_EXT | P_DATA16)
+#define OPC_PAND (0xdb | P_EXT | P_DATA16)
+#define OPC_PANDN (0xdf | P_EXT | P_DATA16)
+#define OPC_PBLENDW (0x0e | P_EXT3A | P_DATA16)
+#define OPC_PCMPEQB (0x74 | P_EXT | P_DATA16)
+#define OPC_PCMPEQW (0x75 | P_EXT | P_DATA16)
+#define OPC_PCMPEQD (0x76 | P_EXT | P_DATA16)
+#define OPC_PCMPEQQ (0x29 | P_EXT38 | P_DATA16)
+#define OPC_PCMPGTB (0x64 | P_EXT | P_DATA16)
+#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
+#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
+#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
+#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
+#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
+#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
+#define OPC_PMAXUB (0xde | P_EXT | P_DATA16)
+#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16)
+#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16)
+#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16)
+#define OPC_PMINSW (0xea | P_EXT | P_DATA16)
+#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16)
+#define OPC_PMINUB (0xda | P_EXT | P_DATA16)
+#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16)
+#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
+#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXBW (0x30 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXWD (0x33 | P_EXT38 | P_DATA16)
+#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
+#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
+#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
+#define OPC_POR (0xeb | P_EXT | P_DATA16)
+#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
+#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
+#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
+#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
+#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
+#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
+#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
+#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
+#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
+#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
+#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
+#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
+#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
+#define OPC_PSUBB (0xf8 | P_EXT | P_DATA16)
+#define OPC_PSUBW (0xf9 | P_EXT | P_DATA16)
+#define OPC_PSUBD (0xfa | P_EXT | P_DATA16)
+#define OPC_PSUBQ (0xfb | P_EXT | P_DATA16)
+#define OPC_PSUBSB (0xe8 | P_EXT | P_DATA16)
+#define OPC_PSUBSW (0xe9 | P_EXT | P_DATA16)
+#define OPC_PSUBUB (0xd8 | P_EXT | P_DATA16)
+#define OPC_PSUBUW (0xd9 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLBW (0x60 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLWD (0x61 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLDQ (0x62 | P_EXT | P_DATA16)
+#define OPC_PUNPCKLQDQ (0x6c | P_EXT | P_DATA16)
+#define OPC_PUNPCKHBW (0x68 | P_EXT | P_DATA16)
+#define OPC_PUNPCKHWD (0x69 | P_EXT | P_DATA16)
+#define OPC_PUNPCKHDQ (0x6a | P_EXT | P_DATA16)
+#define OPC_PUNPCKHQDQ (0x6d | P_EXT | P_DATA16)
+#define OPC_PXOR (0xef | P_EXT | P_DATA16)
+#define OPC_POP_r32 (0x58)
+#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
+#define OPC_PUSH_r32 (0x50)
+#define OPC_PUSH_Iv (0x68)
+#define OPC_PUSH_Ib (0x6a)
+#define OPC_RET (0xc3)
+#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
+#define OPC_SHIFT_1 (0xd1)
+#define OPC_SHIFT_Ib (0xc1)
+#define OPC_SHIFT_cl (0xd3)
+#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHUFPS (0xc6 | P_EXT)
+#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
+#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
+#define OPC_SHRD_Ib (0xac | P_EXT)
+#define OPC_TESTL (0x85)
+#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
+#define OPC_UD2 (0x0b | P_EXT)
+#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
+#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
+#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
+#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
+#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
+#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
+#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
+#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
+#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
+#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
+#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
+#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
+#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
+#define OPC_VZEROUPPER (0x77 | P_EXT)
+#define OPC_XCHG_ax_r32 (0x90)
+
+#define OPC_GRP3_Ev (0xf7)
+#define OPC_GRP5 (0xff)
+#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
+
+/* Group 1 opcode extensions for 0x80-0x83.
+ These are also used as modifiers for OPC_ARITH. */
+#define ARITH_ADD 0
+#define ARITH_OR 1
+#define ARITH_ADC 2
+#define ARITH_SBB 3
+#define ARITH_AND 4
+#define ARITH_SUB 5
+#define ARITH_XOR 6
+#define ARITH_CMP 7
+
+/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
+#define SHIFT_ROL 0
+#define SHIFT_ROR 1
+#define SHIFT_SHL 4
+#define SHIFT_SHR 5
+#define SHIFT_SAR 7
+
+/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
+#define EXT3_NOT 2
+#define EXT3_NEG 3
+#define EXT3_MUL 4
+#define EXT3_IMUL 5
+#define EXT3_DIV 6
+#define EXT3_IDIV 7
+
+/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
+#define EXT5_INC_Ev 0
+#define EXT5_DEC_Ev 1
+#define EXT5_CALLN_Ev 2
+#define EXT5_JMPN_Ev 4
+
+/* Condition codes to be added to OPC_JCC_{long,short}. */
+#define JCC_JMP (-1)
+#define JCC_JO 0x0
+#define JCC_JNO 0x1
+#define JCC_JB 0x2
+#define JCC_JAE 0x3
+#define JCC_JE 0x4
+#define JCC_JNE 0x5
+#define JCC_JBE 0x6
+#define JCC_JA 0x7
+#define JCC_JS 0x8
+#define JCC_JNS 0x9
+#define JCC_JP 0xa
+#define JCC_JNP 0xb
+#define JCC_JL 0xc
+#define JCC_JGE 0xd
+#define JCC_JLE 0xe
+#define JCC_JG 0xf
+
+static const uint8_t tcg_cond_to_jcc[] = {
+ [TCG_COND_EQ] = JCC_JE,
+ [TCG_COND_NE] = JCC_JNE,
+ [TCG_COND_LT] = JCC_JL,
+ [TCG_COND_GE] = JCC_JGE,
+ [TCG_COND_LE] = JCC_JLE,
+ [TCG_COND_GT] = JCC_JG,
+ [TCG_COND_LTU] = JCC_JB,
+ [TCG_COND_GEU] = JCC_JAE,
+ [TCG_COND_LEU] = JCC_JBE,
+ [TCG_COND_GTU] = JCC_JA,
+};
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
+{
+ int rex;
+
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
+ if (opc & P_DATA16) {
+ /* We should never be asking for both 16 and 64-bit operation. */
+ tcg_debug_assert((opc & P_REXW) == 0);
+ tcg_out8(s, 0x66);
+ }
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
+
+ rex = 0;
+ rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
+ rex |= (r & 8) >> 1; /* REX.R */
+ rex |= (x & 8) >> 2; /* REX.X */
+ rex |= (rm & 8) >> 3; /* REX.B */
+
+ /* P_REXB_{R,RM} indicates that the given register is the low byte.
+ For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
+ as otherwise the encoding indicates %[abcd]h. Note that the values
+ that are ORed in merely indicate that the REX byte must be present;
+ those bits get discarded in output. */
+ rex |= opc & (r >= 4 ? P_REXB_R : 0);
+ rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
+
+ if (rex) {
+ tcg_out8(s, (uint8_t)(rex | 0x40));
+ }
+
+ if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
+ tcg_out8(s, 0x0f);
+ if (opc & P_EXT38) {
+ tcg_out8(s, 0x38);
+ } else if (opc & P_EXT3A) {
+ tcg_out8(s, 0x3a);
+ }
+ }
+
+ tcg_out8(s, opc);
+}
+#else
+static void tcg_out_opc(TCGContext *s, int opc)
+{
+ if (opc & P_DATA16) {
+ tcg_out8(s, 0x66);
+ }
+ if (opc & P_SIMDF3) {
+ tcg_out8(s, 0xf3);
+ } else if (opc & P_SIMDF2) {
+ tcg_out8(s, 0xf2);
+ }
+ if (opc & (P_EXT | P_EXT38 | P_EXT3A)) {
+ tcg_out8(s, 0x0f);
+ if (opc & P_EXT38) {
+ tcg_out8(s, 0x38);
+ } else if (opc & P_EXT3A) {
+ tcg_out8(s, 0x3a);
+ }
+ }
+ tcg_out8(s, opc);
+}
+/* Discard the register arguments to tcg_out_opc early, so as not to penalize
+ the 32-bit compilation paths. This method works with all versions of gcc,
+ whereas relying on optimization may not be able to exclude them. */
+#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
+#endif
+
+static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
+{
+ tcg_out_opc(s, opc, r, rm, 0);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
+static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
+ int rm, int index)
+{
+ int tmp;
+
+ /* Use the two byte form if possible, which cannot encode
+ VEX.W, VEX.B, VEX.X, or an m-mmmm field other than P_EXT. */
+ if ((opc & (P_EXT | P_EXT38 | P_EXT3A | P_VEXW)) == P_EXT
+ && ((rm | index) & 8) == 0) {
+ /* Two byte VEX prefix. */
+ tcg_out8(s, 0xc5);
+
+ tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
+ } else {
+ /* Three byte VEX prefix. */
+ tcg_out8(s, 0xc4);
+
+ /* VEX.m-mmmm */
+ if (opc & P_EXT3A) {
+ tmp = 3;
+ } else if (opc & P_EXT38) {
+ tmp = 2;
+ } else if (opc & P_EXT) {
+ tmp = 1;
+ } else {
+ g_assert_not_reached();
+ }
+ tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
+ tmp |= (index & 8 ? 0 : 0x40); /* VEX.X */
+ tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
+ tcg_out8(s, tmp);
+
+ tmp = (opc & P_VEXW ? 0x80 : 0); /* VEX.W */
+ }
+
+ tmp |= (opc & P_VEXL ? 0x04 : 0); /* VEX.L */
+ /* VEX.pp */
+ if (opc & P_DATA16) {
+ tmp |= 1; /* 0x66 */
+ } else if (opc & P_SIMDF3) {
+ tmp |= 2; /* 0xf3 */
+ } else if (opc & P_SIMDF2) {
+ tmp |= 3; /* 0xf2 */
+ }
+ tmp |= (~v & 15) << 3; /* VEX.vvvv */
+ tcg_out8(s, tmp);
+ tcg_out8(s, opc);
+}
+
+static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+{
+ tcg_out_vex_opc(s, opc, r, v, rm, 0);
+ tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
+/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
+ We handle either RM and INDEX missing with a negative value. In 64-bit
+ mode for absolute addresses, ~RM is the size of the immediate operand
+ that will follow the instruction. */
+
+static void tcg_out_sib_offset(TCGContext *s, int r, int rm, int index,
+ int shift, intptr_t offset)
+{
+ int mod, len;
+
+ if (index < 0 && rm < 0) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* Try for a rip-relative addressing mode. This has replaced
+ the 32-bit-mode absolute addressing encoding. */
+ intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
+ intptr_t disp = offset - pc;
+ if (disp == (int32_t)disp) {
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
+ tcg_out32(s, disp);
+ return;
+ }
+
+ /* Try for an absolute address encoding. This requires the
+ use of the MODRM+SIB encoding and is therefore larger than
+ rip-relative addressing. */
+ if (offset == (int32_t)offset) {
+ tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (4 << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+
+ /* ??? The memory isn't directly addressable. */
+ g_assert_not_reached();
+ } else {
+ /* Absolute address. */
+ tcg_out8(s, (r << 3) | 5);
+ tcg_out32(s, offset);
+ return;
+ }
+ }
+
+ /* Find the length of the immediate addend. Note that the encoding
+ that would be used for (%ebp) indicates absolute addressing. */
+ if (rm < 0) {
+ mod = 0, len = 4, rm = 5;
+ } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+ mod = 0, len = 0;
+ } else if (offset == (int8_t)offset) {
+ mod = 0x40, len = 1;
+ } else {
+ mod = 0x80, len = 4;
+ }
+
+ /* Use a single byte MODRM format if possible. Note that the encoding
+ that would be used for %esp is the escape to the two byte form. */
+ if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+ /* Single byte MODRM format. */
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+ } else {
+ /* Two byte MODRM+SIB format. */
+
+ /* Note that the encoding that would place %esp into the index
+ field indicates no index register. In 64-bit mode, the REX.X
+ bit counts, so %r12 can be used as the index. */
+ if (index < 0) {
+ index = 4;
+ } else {
+ tcg_debug_assert(index != TCG_REG_ESP);
+ }
+
+ tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
+ tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
+ }
+
+ if (len == 1) {
+ tcg_out8(s, offset);
+ } else if (len == 4) {
+ tcg_out32(s, offset);
+ }
+}
+
+static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
+ int index, int shift, intptr_t offset)
+{
+ tcg_out_opc(s, opc, r, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
+ tcg_out_sib_offset(s, r, rm, index, shift, offset);
+}
+
+static void tcg_out_vex_modrm_sib_offset(TCGContext *s, int opc, int r, int v,
+ int rm, int index, int shift,
+ intptr_t offset)
+{
+ tcg_out_vex_opc(s, opc, r, v, rm < 0 ? 0 : rm, index < 0 ? 0 : index);
+ tcg_out_sib_offset(s, r, rm, index, shift, offset);
+}
+
+/* A simplification of the above with no index or shift. */
+static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
+ int rm, intptr_t offset)
+{
+ tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
+}
+
+static inline void tcg_out_vex_modrm_offset(TCGContext *s, int opc, int r,
+ int v, int rm, intptr_t offset)
+{
+ tcg_out_vex_modrm_sib_offset(s, opc, r, v, rm, -1, 0, offset);
+}
+
+/* Output an opcode with an expected reference to the constant pool. */
+static inline void tcg_out_modrm_pool(TCGContext *s, int opc, int r)
+{
+ tcg_out_opc(s, opc, r, 0, 0);
+ /* Absolute for 32-bit, pc-relative for 64-bit. */
+ tcg_out8(s, LOWREGMASK(r) << 3 | 5);
+ tcg_out32(s, 0);
+}
+
+/* Output an opcode with an expected reference to the constant pool. */
+static inline void tcg_out_vex_modrm_pool(TCGContext *s, int opc, int r)
+{
+ tcg_out_vex_opc(s, opc, r, 0, 0, 0);
+ /* Absolute for 32-bit, pc-relative for 64-bit. */
+ tcg_out8(s, LOWREGMASK(r) << 3 | 5);
+ tcg_out32(s, 0);
+}
+
+/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
+static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
+{
+ /* Propagate an opcode prefix, such as P_REXW. */
+ int ext = subop & ~0x7;
+ subop &= 0x7;
+
+ tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ int rexw = 0;
+
+ if (arg == ret) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I64:
+ rexw = P_REXW;
+ /* fallthru */
+ case TCG_TYPE_I32:
+ if (ret < 16) {
+ if (arg < 16) {
+ tcg_out_modrm(s, OPC_MOVL_GvEv + rexw, ret, arg);
+ } else {
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, arg, 0, ret);
+ }
+ } else {
+ if (arg < 16) {
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, ret, 0, arg);
+ } else {
+ tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
+ }
+ }
+ break;
+
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVQ_VqWq, ret, 0, arg);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx, ret, 0, arg);
+ break;
+ case TCG_TYPE_V256:
+ tcg_debug_assert(ret >= 16 && arg >= 16);
+ tcg_out_vex_modrm(s, OPC_MOVDQA_VxWx | P_VEXL, ret, 0, arg);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static const int avx2_dup_insn[4] = {
+ OPC_VPBROADCASTB, OPC_VPBROADCASTW,
+ OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
+};
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg r, TCGReg a)
+{
+ if (have_avx2) {
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+ tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
+ } else {
+ switch (vece) {
+ case MO_8:
+ /* ??? With zero in a register, use PSHUFB. */
+ tcg_out_vex_modrm(s, OPC_PUNPCKLBW, r, a, a);
+ a = r;
+ /* FALLTHRU */
+ case MO_16:
+ tcg_out_vex_modrm(s, OPC_PUNPCKLWD, r, a, a);
+ a = r;
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_vex_modrm(s, OPC_PSHUFD, r, 0, a);
+ /* imm8 operand: all output lanes selected from input lane 0. */
+ tcg_out8(s, 0);
+ break;
+ case MO_64:
+ tcg_out_vex_modrm(s, OPC_PUNPCKLQDQ, r, a, a);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg r, TCGReg base, intptr_t offset)
+{
+ if (have_avx2) {
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+ tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
+ r, 0, base, offset);
+ } else {
+ switch (vece) {
+ case MO_64:
+ tcg_out_vex_modrm_offset(s, OPC_MOVDDUP, r, 0, base, offset);
+ break;
+ case MO_32:
+ tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
+ break;
+ case MO_16:
+ tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
+ tcg_out8(s, 0); /* imm8 */
+ tcg_out_dup_vec(s, type, vece, r, r);
+ break;
+ case MO_8:
+ tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
+ tcg_out8(s, 0); /* imm8 */
+ tcg_out_dup_vec(s, type, vece, r, r);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg ret, int64_t arg)
+{
+ int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
+
+ if (arg == 0) {
+ tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
+ return;
+ }
+ if (arg == -1) {
+ tcg_out_vex_modrm(s, OPC_PCMPEQB + vex_l, ret, ret, ret);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
+ if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
+ }
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
+ } else {
+ if (type == TCG_TYPE_V64) {
+ tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
+ } else if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
+ }
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
+ } else {
+ new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
+ }
+ }
+}
+
+static void tcg_out_movi_vec(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ if (arg == 0) {
+ tcg_out_vex_modrm(s, OPC_PXOR, ret, ret, ret);
+ return;
+ }
+ if (arg == -1) {
+ tcg_out_vex_modrm(s, OPC_PCMPEQB, ret, ret, ret);
+ return;
+ }
+
+ int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
+ tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy + rexw, ret);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
+ } else {
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
+ }
+}
+
+static void tcg_out_movi_int(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_target_long diff;
+
+ if (arg == 0) {
+ tgen_arithr(s, ARITH_XOR, ret, ret);
+ return;
+ }
+ if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+ tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out32(s, arg);
+ return;
+ }
+ if (arg == (int32_t)arg) {
+ tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
+ tcg_out32(s, arg);
+ return;
+ }
+
+ /* Try a 7 byte pc-relative lea before the 10 byte movq. */
+ diff = tcg_pcrel_diff(s, (const void *)arg) - 7;
+ if (diff == (int32_t)diff) {
+ tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
+ tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
+ tcg_out32(s, diff);
+ return;
+ }
+
+ tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+ tcg_out64(s, arg);
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+#if TCG_TARGET_REG_BITS == 64
+ case TCG_TYPE_I64:
+#endif
+ if (ret < 16) {
+ tcg_out_movi_int(s, type, ret, arg);
+ } else {
+ tcg_out_movi_vec(s, type, ret, arg);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
+{
+ if (val == (int8_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
+ tcg_out8(s, val);
+ } else if (val == (int32_t)val) {
+ tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
+ tcg_out32(s, val);
+ } else {
+ tcg_abort();
+ }
+}
+
+static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ /* Given the strength of x86 memory ordering, we only need care for
+ store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
+ faster than "mfence", so don't bother with the sse insn. */
+ if (a0 & TCG_MO_ST_LD) {
+ tcg_out8(s, 0xf0);
+ tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
+ tcg_out8(s, 0);
+ }
+}
+
+static inline void tcg_out_push(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_pop(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (ret < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv, ret, arg1, arg2);
+ } else {
+ tcg_out_vex_modrm_offset(s, OPC_MOVD_VyEy, ret, 0, arg1, arg2);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (ret < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv | P_REXW, ret, arg1, arg2);
+ break;
+ }
+ /* FALLTHRU */
+ case TCG_TYPE_V64:
+ /* There is no instruction that can validate 8-byte alignment. */
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVQ_VqWq, ret, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V128:
+ /*
+ * The gvec infrastructure is asserts that v128 vector loads
+ * and stores use a 16-byte aligned offset. Validate that the
+ * final pointer is aligned by using an insn that will SIGSEGV.
+ */
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQA_VxWx, ret, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V256:
+ /*
+ * The gvec infrastructure only requires 16-byte alignment,
+ * so here we must use an unaligned load.
+ */
+ tcg_debug_assert(ret >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_VxWx | P_VEXL,
+ ret, 0, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (arg < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv, arg, arg1, arg2);
+ } else {
+ tcg_out_vex_modrm_offset(s, OPC_MOVD_EyVy, arg, 0, arg1, arg2);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (arg < 16) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_REXW, arg, arg1, arg2);
+ break;
+ }
+ /* FALLTHRU */
+ case TCG_TYPE_V64:
+ /* There is no instruction that can validate 8-byte alignment. */
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVQ_WqVq, arg, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V128:
+ /*
+ * The gvec infrastructure is asserts that v128 vector loads
+ * and stores use a 16-byte aligned offset. Validate that the
+ * final pointer is aligned by using an insn that will SIGSEGV.
+ */
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQA_WxVx, arg, 0, arg1, arg2);
+ break;
+ case TCG_TYPE_V256:
+ /*
+ * The gvec infrastructure only requires 16-byte alignment,
+ * so here we must use an unaligned store.
+ */
+ tcg_debug_assert(arg >= 16);
+ tcg_out_vex_modrm_offset(s, OPC_MOVDQU_WxVx | P_VEXL,
+ arg, 0, arg1, arg2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ int rexw = 0;
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
+ if (val != (int32_t)val) {
+ return false;
+ }
+ rexw = P_REXW;
+ } else if (type != TCG_TYPE_I32) {
+ return false;
+ }
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
+ tcg_out32(s, val);
+ return true;
+}
+
+static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
+{
+ /* Propagate an opcode prefix, such as P_DATA16. */
+ int ext = subopc & ~0x7;
+ subopc &= 0x7;
+
+ if (count == 1) {
+ tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
+ tcg_out8(s, count);
+ }
+}
+
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static inline void tcg_out_rolw_8(TCGContext *s, int reg)
+{
+ tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
+}
+
+static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
+{
+ /* movzbl */
+ tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
+}
+
+static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsbl */
+ tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
+ tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
+{
+ /* movzwl */
+ tcg_out_modrm(s, OPC_MOVZWL, dest, src);
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
+{
+ /* movsw[lq] */
+ tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
+{
+ /* 32-bit mov zero extends. */
+ tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
+{
+ tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
+}
+
+static inline void tcg_out_bswap64(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
+}
+
+static void tgen_arithi(TCGContext *s, int c, int r0,
+ tcg_target_long val, int cf)
+{
+ int rexw = 0;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ rexw = c & -8;
+ c &= 7;
+ }
+
+ /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
+ partial flags update stalls on Pentium4 and are not recommended
+ by current Intel optimization manuals. */
+ if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
+ int is_inc = (c == ARITH_ADD) ^ (val < 0);
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* The single-byte increment encodings are re-tasked as the
+ REX prefixes. Use the MODRM encoding. */
+ tcg_out_modrm(s, OPC_GRP5 + rexw,
+ (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
+ } else {
+ tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
+ }
+ return;
+ }
+
+ if (c == ARITH_AND) {
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (val == 0xffffffffu) {
+ tcg_out_ext32u(s, r0, r0);
+ return;
+ }
+ if (val == (uint32_t)val) {
+ /* AND with no high bits set can use a 32-bit operation. */
+ rexw = 0;
+ }
+ }
+ if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+ tcg_out_ext8u(s, r0, r0);
+ return;
+ }
+ if (val == 0xffffu) {
+ tcg_out_ext16u(s, r0, r0);
+ return;
+ }
+ }
+
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
+ tcg_out8(s, val);
+ return;
+ }
+ if (rexw == 0 || val == (int32_t)val) {
+ tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
+ tcg_out32(s, val);
+ return;
+ }
+
+ tcg_abort();
+}
+
+static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+{
+ if (val != 0) {
+ tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
+ }
+}
+
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
+{
+ int32_t val, val1;
+
+ if (l->has_value) {
+ val = tcg_pcrel_diff(s, l->u.value_ptr);
+ val1 = val - 2;
+ if ((int8_t)val1 == val1) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out8(s, val1);
+ } else {
+ if (small) {
+ tcg_abort();
+ }
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ tcg_out32(s, val - 5);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ tcg_out32(s, val - 6);
+ }
+ }
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_short);
+ } else {
+ tcg_out8(s, OPC_JCC_short + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
+ s->code_ptr += 1;
+ } else {
+ if (opc == -1) {
+ tcg_out8(s, OPC_JMP_long);
+ } else {
+ tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
+ s->code_ptr += 4;
+ }
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int rexw)
+{
+ if (const_arg2) {
+ if (arg2 == 0) {
+ /* test r, r */
+ tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
+ } else {
+ tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
+ }
+ } else {
+ tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
+ }
+}
+
+static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *label, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *label, int small)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
+}
+#else
+/* XXX: we implement it at the target level to avoid having to
+ handle cross basic blocks temporaries */
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
+{
+ TCGLabel *label_next = gen_new_label();
+ TCGLabel *label_this = arg_label(args[5]);
+
+ switch(args[4]) {
+ case TCG_COND_EQ:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+ label_this, small);
+ break;
+ case TCG_COND_NE:
+ tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_this, small);
+ tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
+ label_this, small);
+ break;
+ case TCG_COND_LT:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LE:
+ tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GT:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GE:
+ tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ label_this, small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ label_this, small);
+ break;
+ default:
+ tcg_abort();
+ }
+ tcg_out_label(s, label_next);
+}
+#endif
+
+static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
+ tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
+ tcg_out_ext8u(s, dest, dest);
+}
+#else
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ TCGLabel *label_true, *label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_arg(label_true);
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_arg(label_over);
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over);
+ }
+}
+#endif
+
+static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
+ TCGReg dest, TCGReg v1)
+{
+ if (have_cmov) {
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
+ } else {
+ TCGLabel *over = gen_new_label();
+ tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+ tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+ tcg_out_label(s, over);
+ }
+}
+
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ tcg_out_cmov(s, cond, 0, dest, v1);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
+ TCGReg c1, TCGArg c2, int const_c2,
+ TCGReg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+ tcg_out_cmov(s, cond, P_REXW, dest, v1);
+}
+#endif
+
+static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
+ TCGArg arg2, bool const_a2)
+{
+ if (have_bmi1) {
+ tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
+ if (const_a2) {
+ tcg_debug_assert(arg2 == (rexw ? 64 : 32));
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+ }
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+ }
+}
+
+static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
+ TCGArg arg2, bool const_a2)
+{
+ if (have_lzcnt) {
+ tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
+ if (const_a2) {
+ tcg_debug_assert(arg2 == (rexw ? 64 : 32));
+ } else {
+ tcg_debug_assert(dest != arg2);
+ tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
+ }
+ } else {
+ tcg_debug_assert(!const_a2);
+ tcg_debug_assert(dest != arg1);
+ tcg_debug_assert(dest != arg2);
+
+ /* Recall that the output of BSR is the index not the count. */
+ tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
+ tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
+
+ /* Since we have destroyed the flags from BSR, we have to re-test. */
+ tcg_out_cmp(s, arg1, 0, 1, rexw);
+ tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
+ }
+}
+
+static void tcg_out_branch(TCGContext *s, int call, const tcg_insn_unit *dest)
+{
+ intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
+
+ if (disp == (int32_t)disp) {
+ tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
+ tcg_out32(s, disp);
+ } else {
+ /* rip-relative addressing into the constant pool.
+ This is 6 + 8 = 14 bytes, as compared to using an
+ an immediate load 10 + 6 = 16 bytes, plus we may
+ be able to re-use the pool constant for more calls. */
+ tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
+ tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
+ new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
+ tcg_out32(s, 0);
+ }
+}
+
+static inline void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+{
+ tcg_out_branch(s, 1, dest);
+}
+
+static void tcg_out_jmp(TCGContext *s, const tcg_insn_unit *dest)
+{
+ tcg_out_branch(s, 0, dest);
+}
+
+static void tcg_out_nopn(TCGContext *s, int n)
+{
+ int i;
+ /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
+ * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
+ * duplicate prefix, and all of the interesting recent cores can
+ * decode and discard the duplicates in a single cycle.
+ */
+ tcg_debug_assert(n >= 1);
+ for (i = 1; i < n; ++i) {
+ tcg_out8(s, 0x66);
+ }
+ tcg_out8(s, 0x90);
+}
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Perform the TLB load and compare.
+
+ Inputs:
+ ADDRLO and ADDRHI contain the low and high part of the address.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ Outputs:
+ LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
+ positions of the displacements of forward jumps to the TLB miss case.
+
+ Second argument register is loaded with the low part of the address.
+ In the TLB hit case, it has been adjusted as indicated by the TLB
+ and so is a host address. In the TLB miss case, it continues to
+ hold a guest address.
+
+ First argument register is clobbered. */
+
+static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+ int mem_index, MemOp opc,
+ tcg_insn_unit **label_ptr, int which)
+{
+ const TCGReg r0 = TCG_REG_L0;
+ const TCGReg r1 = TCG_REG_L1;
+ TCGType ttype = TCG_TYPE_I32;
+ TCGType tlbtype = TCG_TYPE_I32;
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_mask = (1 << a_bits) - 1;
+ unsigned s_mask = (1 << s_bits) - 1;
+ target_ulong tlb_mask;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 64) {
+ ttype = TCG_TYPE_I64;
+ trexw = P_REXW;
+ }
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
+ hrexw = P_REXW;
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
+ tlbtype = TCG_TYPE_I64;
+ tlbrexw = P_REXW;
+ }
+ }
+ }
+
+ tcg_out_mov(s, tlbtype, r0, addrlo);
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, r0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, mask));
+
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, table));
+
+ /* If the required alignment is at least as large as the access, simply
+ copy the address and mask. For lesser alignments, check that we don't
+ cross pages for the complete access. */
+ if (a_bits >= s_bits) {
+ tcg_out_mov(s, ttype, r1, addrlo);
+ } else {
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
+ }
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
+ tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
+
+ /* cmp 0(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, which);
+
+ /* Prepare for both the fast path add of the tlb addend, and the slow
+ path function argument setup. */
+ tcg_out_mov(s, ttype, r1, addrlo);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(r0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, which + 4);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ label_ptr[1] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ /* TLB Hit. */
+
+ /* add addend(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
+ offsetof(CPUTLBEntry, addend));
+}
+
+/*
+ * Record the context of a call to the out of line helper code for the slow path
+ * for a load or store, so that we can later generate the correct helper code
+ */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
+ MemOpIdx oi,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addrhi,
+ tcg_insn_unit *raddr,
+ tcg_insn_unit **label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr[0];
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ label->label_ptr[1] = label_ptr[1];
+ }
+}
+
+/*
+ * Generate code for the slow path for a load at the end of block
+ */
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ TCGReg data_reg;
+ tcg_insn_unit **label_ptr = &l->label_ptr[0];
+ int rexw = (l->type == TCG_TYPE_I64 ? P_REXW : 0);
+
+ /* resolve label address */
+ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ int ofs = 0;
+
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
+ (uintptr_t)l->raddr);
+ }
+
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ data_reg = l->datalo_reg;
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, rexw);
+ break;
+ case MO_SW:
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, rexw);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case MO_SL:
+ tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+ break;
+#endif
+ case MO_UB:
+ case MO_UW:
+ /* Note that the helpers have zero-extended to tcg_target_long. */
+ case MO_UL:
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ break;
+ case MO_Q:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+ } else if (data_reg == TCG_REG_EDX) {
+ /* xchg %edx, %eax */
+ tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+
+ /* Jump to the code corresponding to next IR of qemu_st */
+ tcg_out_jmp(s, l->raddr);
+ return true;
+}
+
+/*
+ * Generate code for the slow path for a store at the end of block
+ */
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ MemOp s_bits = opc & MO_SIZE;
+ tcg_insn_unit **label_ptr = &l->label_ptr[0];
+ TCGReg retaddr;
+
+ /* resolve label address */
+ tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ int ofs = 0;
+
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ if (s_bits == MO_64) {
+ tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
+ ofs += 4;
+ }
+
+ tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
+ ofs += 4;
+
+ retaddr = TCG_REG_EAX;
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ tcg_target_call_iarg_regs[2], l->datalo_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
+
+ if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
+ retaddr = tcg_target_call_iarg_regs[4];
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ } else {
+ retaddr = TCG_REG_RAX;
+ tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
+ tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
+ TCG_TARGET_CALL_STACK_OFFSET);
+ }
+ }
+
+ /* "Tail call" to the helper, with the return address back inline. */
+ tcg_out_push(s, retaddr);
+ tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+ return true;
+}
+#elif TCG_TARGET_REG_BITS == 32
+# define x86_guest_base_seg 0
+# define x86_guest_base_index -1
+# define x86_guest_base_offset guest_base
+#else
+static int x86_guest_base_seg;
+static int x86_guest_base_index = -1;
+static int32_t x86_guest_base_offset;
+# if defined(__x86_64__) && defined(__linux__)
+# include <asm/prctl.h>
+# include <sys/prctl.h>
+int arch_prctl(int code, unsigned long addr);
+static inline int setup_guest_base_seg(void)
+{
+ if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
+ return P_GS;
+ }
+ return 0;
+}
+# elif defined (__FreeBSD__) || defined (__FreeBSD_kernel__)
+# include <machine/sysarch.h>
+static inline int setup_guest_base_seg(void)
+{
+ if (sysarch(AMD64_SET_GSBASE, &guest_base) == 0) {
+ return P_GS;
+ }
+ return 0;
+}
+# else
+static inline int setup_guest_base_seg(void)
+{
+ return 0;
+}
+# endif
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, int index, intptr_t ofs,
+ int seg, bool is64, MemOp memop)
+{
+ bool use_movbe = false;
+ int rexw = is64 * P_REXW;
+ int movop = OPC_MOVL_GvEv;
+
+ /* Do big-endian loads with movbe. */
+ if (memop & MO_BSWAP) {
+ tcg_debug_assert(have_movbe);
+ use_movbe = true;
+ movop = OPC_MOVBE_GyMy;
+ }
+
+ switch (memop & MO_SSIZE) {
+ case MO_UB:
+ tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
+ base, index, 0, ofs);
+ break;
+ case MO_SB:
+ tcg_out_modrm_sib_offset(s, OPC_MOVSBL + rexw + seg, datalo,
+ base, index, 0, ofs);
+ break;
+ case MO_UW:
+ if (use_movbe) {
+ /* There is no extending movbe; only low 16-bits are modified. */
+ if (datalo != base && datalo != index) {
+ /* XOR breaks dependency chains. */
+ tgen_arithr(s, ARITH_XOR, datalo, datalo);
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16u(s, datalo, datalo);
+ }
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+ base, index, 0, ofs);
+ }
+ break;
+ case MO_SW:
+ if (use_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16s(s, datalo, datalo, rexw);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
+ datalo, base, index, 0, ofs);
+ }
+ break;
+ case MO_UL:
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case MO_SL:
+ if (use_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
+ base, index, 0, ofs);
+ tcg_out_ext32s(s, datalo, datalo);
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
+ base, index, 0, ofs);
+ }
+ break;
+#endif
+ case MO_Q:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
+ base, index, 0, ofs);
+ } else {
+ if (use_movbe) {
+ TCGReg t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ if (base != datalo) {
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+ base, index, 0, ofs + 4);
+ } else {
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+ base, index, 0, ofs + 4);
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
+ EAX. It will be useful once fixed registers globals are less
+ common. */
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg datalo, datahi, addrlo;
+ TCGReg addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index;
+ tcg_insn_unit *label_ptr[2];
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+
+ tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
+ label_ptr, offsetof(CPUTLBEntry, addr_read));
+
+ /* TLB Hit. */
+ tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
+
+ /* Record the current context of a load into ldst label */
+ add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else
+ tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
+ x86_guest_base_offset, x86_guest_base_seg,
+ is64, opc);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+ TCGReg base, int index, intptr_t ofs,
+ int seg, MemOp memop)
+{
+ bool use_movbe = false;
+ int movop = OPC_MOVL_EvGv;
+
+ /*
+ * Do big-endian stores with movbe or softmmu.
+ * User-only without movbe will have its swapping done generically.
+ */
+ if (memop & MO_BSWAP) {
+ tcg_debug_assert(have_movbe);
+ use_movbe = true;
+ movop = OPC_MOVBE_MyGy;
+ }
+
+ switch (memop & MO_SIZE) {
+ case MO_8:
+ /* This is handled with constraints on INDEX_op_qemu_st8_i32. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || datalo < 4);
+ tcg_out_modrm_sib_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, index, 0, ofs);
+ break;
+ case MO_16:
+ tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
+ base, index, 0, ofs);
+ break;
+ case MO_32:
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
+ break;
+ case MO_64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
+ base, index, 0, ofs);
+ } else {
+ if (use_movbe) {
+ TCGReg t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ tcg_out_modrm_sib_offset(s, movop + seg, datalo,
+ base, index, 0, ofs);
+ tcg_out_modrm_sib_offset(s, movop + seg, datahi,
+ base, index, 0, ofs + 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+{
+ TCGReg datalo, datahi, addrlo;
+ TCGReg addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ int mem_index;
+ tcg_insn_unit *label_ptr[2];
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ mem_index = get_mmuidx(oi);
+
+ tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
+ label_ptr, offsetof(CPUTLBEntry, addr_write));
+
+ /* TLB Hit. */
+ tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
+
+ /* Record the current context of a store into ldst label */
+ add_qemu_ldst_label(s, false, is64, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#else
+ tcg_out_qemu_st_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
+ x86_guest_base_offset, x86_guest_base_seg, opc);
+#endif
+}
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0, a1, a2;
+ int c, const_a2, vexop, rexw = 0;
+
+#if TCG_TARGET_REG_BITS == 64
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i64): \
+ rexw = P_REXW; /* FALLTHRU */ \
+ case glue(glue(INDEX_op_, x), _i32)
+#else
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i32)
+#endif
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ const_a2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ /* Reuse the zeroing that exists for goto_ptr. */
+ if (a0 == 0) {
+ tcg_out_jmp(s, tcg_code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
+ tcg_out_jmp(s, tb_ret_addr);
+ }
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_insn_offset) {
+ /* direct jump method */
+ int gap;
+ /* jump displacement must be aligned for atomic patching;
+ * see if we need to add extra nops before jump
+ */
+ gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr;
+ if (gap != 1) {
+ tcg_out_nopn(s, gap - 1);
+ }
+ tcg_out8(s, OPC_JMP_long); /* jmp im */
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ tcg_out32(s, 0);
+ } else {
+ /* indirect jump method */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
+ (intptr_t)(s->tb_jmp_target_addr + a0));
+ }
+ set_jmp_reset_offset(s, a0);
+ break;
+ case INDEX_op_goto_ptr:
+ /* jmp to the given host address (could be epilogue) */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
+ break;
+ case INDEX_op_br:
+ tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
+ break;
+ OP_32_64(ld8u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
+ break;
+ OP_32_64(ld8s):
+ tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
+ break;
+ OP_32_64(ld16u):
+ /* Note that we can ignore REXW for the zero-extend to 64-bit. */
+ tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
+ break;
+ OP_32_64(ld16s):
+ tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_ld32u_i64:
+#endif
+ case INDEX_op_ld_i32:
+ tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
+ break;
+
+ OP_32_64(st8):
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
+ tcg_out8(s, a0);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
+ }
+ break;
+ OP_32_64(st16):
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
+ tcg_out16(s, a0);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
+ }
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_st32_i64:
+#endif
+ case INDEX_op_st_i32:
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
+ tcg_out32(s, a0);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
+ }
+ break;
+
+ OP_32_64(add):
+ /* For 3-operand addition, use LEA. */
+ if (a0 != a1) {
+ TCGArg c3 = 0;
+ if (const_a2) {
+ c3 = a2, a2 = -1;
+ } else if (a0 == a2) {
+ /* Watch out for dest = src + dest, since we've removed
+ the matching constraint on the add. */
+ tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
+ break;
+ }
+
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
+ break;
+ }
+ c = ARITH_ADD;
+ goto gen_arith;
+ OP_32_64(sub):
+ c = ARITH_SUB;
+ goto gen_arith;
+ OP_32_64(and):
+ c = ARITH_AND;
+ goto gen_arith;
+ OP_32_64(or):
+ c = ARITH_OR;
+ goto gen_arith;
+ OP_32_64(xor):
+ c = ARITH_XOR;
+ goto gen_arith;
+ gen_arith:
+ if (const_a2) {
+ tgen_arithi(s, c + rexw, a0, a2, 0);
+ } else {
+ tgen_arithr(s, c + rexw, a0, a2);
+ }
+ break;
+
+ OP_32_64(andc):
+ if (const_a2) {
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
+ tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
+ } else {
+ tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
+ }
+ break;
+
+ OP_32_64(mul):
+ if (const_a2) {
+ int32_t val;
+ val = a2;
+ if (val == (int8_t)val) {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
+ tcg_out8(s, val);
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
+ tcg_out32(s, val);
+ }
+ } else {
+ tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
+ }
+ break;
+
+ OP_32_64(div2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
+ break;
+ OP_32_64(divu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
+ break;
+
+ OP_32_64(shl):
+ /* For small constant 3-operand shift, use LEA. */
+ if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
+ if (a2 - 1 == 0) {
+ /* shl $1,a1,a0 -> lea (a1,a1),a0 */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
+ } else {
+ /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
+ }
+ break;
+ }
+ c = SHIFT_SHL;
+ vexop = OPC_SHLX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(shr):
+ c = SHIFT_SHR;
+ vexop = OPC_SHRX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(sar):
+ c = SHIFT_SAR;
+ vexop = OPC_SARX;
+ goto gen_shift_maybe_vex;
+ OP_32_64(rotl):
+ c = SHIFT_ROL;
+ goto gen_shift;
+ OP_32_64(rotr):
+ c = SHIFT_ROR;
+ goto gen_shift;
+ gen_shift_maybe_vex:
+ if (have_bmi2) {
+ if (!const_a2) {
+ tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
+ break;
+ }
+ tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
+ }
+ /* FALLTHRU */
+ gen_shift:
+ if (const_a2) {
+ tcg_out_shifti(s, c + rexw, a0, a2);
+ } else {
+ tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
+ }
+ break;
+
+ OP_32_64(ctz):
+ tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
+ break;
+ OP_32_64(clz):
+ tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
+ break;
+ OP_32_64(ctpop):
+ tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
+ break;
+
+ OP_32_64(bswap16):
+ if (a2 & TCG_BSWAP_OS) {
+ /* Output must be sign-extended. */
+ if (rexw) {
+ tcg_out_bswap64(s, a0);
+ tcg_out_shifti(s, SHIFT_SAR + rexw, a0, 48);
+ } else {
+ tcg_out_bswap32(s, a0);
+ tcg_out_shifti(s, SHIFT_SAR, a0, 16);
+ }
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ /* Output must be zero-extended, but input isn't. */
+ tcg_out_bswap32(s, a0);
+ tcg_out_shifti(s, SHIFT_SHR, a0, 16);
+ } else {
+ tcg_out_rolw_8(s, a0);
+ }
+ break;
+ OP_32_64(bswap32):
+ tcg_out_bswap32(s, a0);
+ if (rexw && (a2 & TCG_BSWAP_OS)) {
+ tcg_out_ext32s(s, a0, a0);
+ }
+ break;
+
+ OP_32_64(neg):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
+ break;
+ OP_32_64(not):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
+ break;
+
+ OP_32_64(ext8s):
+ tcg_out_ext8s(s, a0, a1, rexw);
+ break;
+ OP_32_64(ext16s):
+ tcg_out_ext16s(s, a0, a1, rexw);
+ break;
+ OP_32_64(ext8u):
+ tcg_out_ext8u(s, a0, a1);
+ break;
+ OP_32_64(ext16u):
+ tcg_out_ext16u(s, a0, a1);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, 0);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, 1);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
+ tcg_out_qemu_st(s, args, 0);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, 1);
+ break;
+
+ OP_32_64(mulu2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
+ break;
+ OP_32_64(muls2):
+ tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
+ break;
+ OP_32_64(add2):
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
+ }
+ break;
+ OP_32_64(sub2):
+ if (const_args[4]) {
+ tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
+ } else {
+ tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
+ }
+ if (const_args[5]) {
+ tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
+ } else {
+ tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
+ }
+ break;
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args, 0);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+#else /* TCG_TARGET_REG_BITS == 64 */
+ case INDEX_op_ld32s_i64:
+ tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
+ break;
+ case INDEX_op_st_i64:
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
+ tcg_out32(s, a0);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
+ break;
+
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, a0);
+ break;
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extrl_i64_i32:
+ tcg_out_ext32u(s, a0, a1);
+ break;
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ tcg_out_ext32s(s, a0, a1);
+ break;
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
+ break;
+#endif
+
+ OP_32_64(deposit):
+ if (args[3] == 0 && args[4] == 8) {
+ /* load bits 0..7 */
+ tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
+ } else if (args[3] == 8 && args[4] == 8) {
+ /* load bits 8..15 */
+ tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
+ } else if (args[3] == 0 && args[4] == 16) {
+ /* load bits 0..15 */
+ tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
+ } else {
+ tcg_abort();
+ }
+ break;
+
+ case INDEX_op_extract_i64:
+ if (a2 + args[3] == 32) {
+ /* This is a 32-bit zero-extending right shift. */
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tcg_out_shifti(s, SHIFT_SHR, a0, a2);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_extract_i32:
+ /* On the off-chance that we can use the high-byte registers.
+ Otherwise we emit the same ext16 + shift pattern that we
+ would have gotten from the normal tcg-op.c expansion. */
+ tcg_debug_assert(a2 == 8 && args[3] == 8);
+ if (a1 < 4 && a0 < 8) {
+ tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
+ } else {
+ tcg_out_ext16u(s, a0, a1);
+ tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+ }
+ break;
+
+ case INDEX_op_sextract_i32:
+ /* We don't implement sextract_i64, as we cannot sign-extend to
+ 64-bits without using the REX prefix that explicitly excludes
+ access to the high-byte registers. */
+ tcg_debug_assert(a2 == 8 && args[3] == 8);
+ if (a1 < 4 && a0 < 8) {
+ tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
+ } else {
+ tcg_out_ext16s(s, a0, a1, 0);
+ tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+ }
+ break;
+
+ OP_32_64(extract2):
+ /* Note that SHRD outputs to the r/m operand. */
+ tcg_out_modrm(s, OPC_SHRD_Ib + rexw, a2, a0);
+ tcg_out8(s, args[3]);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, a0);
+ break;
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+
+#undef OP_32_64
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ static int const add_insn[4] = {
+ OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ
+ };
+ static int const ssadd_insn[4] = {
+ OPC_PADDSB, OPC_PADDSW, OPC_UD2, OPC_UD2
+ };
+ static int const usadd_insn[4] = {
+ OPC_PADDUB, OPC_PADDUW, OPC_UD2, OPC_UD2
+ };
+ static int const sub_insn[4] = {
+ OPC_PSUBB, OPC_PSUBW, OPC_PSUBD, OPC_PSUBQ
+ };
+ static int const sssub_insn[4] = {
+ OPC_PSUBSB, OPC_PSUBSW, OPC_UD2, OPC_UD2
+ };
+ static int const ussub_insn[4] = {
+ OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
+ };
+ static int const mul_insn[4] = {
+ OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
+ };
+ static int const shift_imm_insn[4] = {
+ OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
+ };
+ static int const cmpeq_insn[4] = {
+ OPC_PCMPEQB, OPC_PCMPEQW, OPC_PCMPEQD, OPC_PCMPEQQ
+ };
+ static int const cmpgt_insn[4] = {
+ OPC_PCMPGTB, OPC_PCMPGTW, OPC_PCMPGTD, OPC_PCMPGTQ
+ };
+ static int const punpckl_insn[4] = {
+ OPC_PUNPCKLBW, OPC_PUNPCKLWD, OPC_PUNPCKLDQ, OPC_PUNPCKLQDQ
+ };
+ static int const punpckh_insn[4] = {
+ OPC_PUNPCKHBW, OPC_PUNPCKHWD, OPC_PUNPCKHDQ, OPC_PUNPCKHQDQ
+ };
+ static int const packss_insn[4] = {
+ OPC_PACKSSWB, OPC_PACKSSDW, OPC_UD2, OPC_UD2
+ };
+ static int const packus_insn[4] = {
+ OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
+ };
+ static int const smin_insn[4] = {
+ OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
+ };
+ static int const smax_insn[4] = {
+ OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
+ };
+ static int const umin_insn[4] = {
+ OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
+ };
+ static int const umax_insn[4] = {
+ OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
+ };
+ static int const shlv_insn[4] = {
+ /* TODO: AVX512 adds support for MO_16. */
+ OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
+ };
+ static int const shrv_insn[4] = {
+ /* TODO: AVX512 adds support for MO_16. */
+ OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
+ };
+ static int const sarv_insn[4] = {
+ /* TODO: AVX512 adds support for MO_16, MO_64. */
+ OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
+ };
+ static int const shls_insn[4] = {
+ OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
+ };
+ static int const shrs_insn[4] = {
+ OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
+ };
+ static int const sars_insn[4] = {
+ OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
+ };
+ static int const abs_insn[4] = {
+ /* TODO: AVX512 adds support for MO_64. */
+ OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
+ };
+
+ TCGType type = vecl + TCG_TYPE_V64;
+ int insn, sub;
+ TCGArg a0, a1, a2;
+
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_add_vec:
+ insn = add_insn[vece];
+ goto gen_simd;
+ case INDEX_op_ssadd_vec:
+ insn = ssadd_insn[vece];
+ goto gen_simd;
+ case INDEX_op_usadd_vec:
+ insn = usadd_insn[vece];
+ goto gen_simd;
+ case INDEX_op_sub_vec:
+ insn = sub_insn[vece];
+ goto gen_simd;
+ case INDEX_op_sssub_vec:
+ insn = sssub_insn[vece];
+ goto gen_simd;
+ case INDEX_op_ussub_vec:
+ insn = ussub_insn[vece];
+ goto gen_simd;
+ case INDEX_op_mul_vec:
+ insn = mul_insn[vece];
+ goto gen_simd;
+ case INDEX_op_and_vec:
+ insn = OPC_PAND;
+ goto gen_simd;
+ case INDEX_op_or_vec:
+ insn = OPC_POR;
+ goto gen_simd;
+ case INDEX_op_xor_vec:
+ insn = OPC_PXOR;
+ goto gen_simd;
+ case INDEX_op_smin_vec:
+ insn = smin_insn[vece];
+ goto gen_simd;
+ case INDEX_op_umin_vec:
+ insn = umin_insn[vece];
+ goto gen_simd;
+ case INDEX_op_smax_vec:
+ insn = smax_insn[vece];
+ goto gen_simd;
+ case INDEX_op_umax_vec:
+ insn = umax_insn[vece];
+ goto gen_simd;
+ case INDEX_op_shlv_vec:
+ insn = shlv_insn[vece];
+ goto gen_simd;
+ case INDEX_op_shrv_vec:
+ insn = shrv_insn[vece];
+ goto gen_simd;
+ case INDEX_op_sarv_vec:
+ insn = sarv_insn[vece];
+ goto gen_simd;
+ case INDEX_op_shls_vec:
+ insn = shls_insn[vece];
+ goto gen_simd;
+ case INDEX_op_shrs_vec:
+ insn = shrs_insn[vece];
+ goto gen_simd;
+ case INDEX_op_sars_vec:
+ insn = sars_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_punpckl_vec:
+ insn = punpckl_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_punpckh_vec:
+ insn = punpckh_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_packss_vec:
+ insn = packss_insn[vece];
+ goto gen_simd;
+ case INDEX_op_x86_packus_vec:
+ insn = packus_insn[vece];
+ goto gen_simd;
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_dup2_vec:
+ /* First merge the two 32-bit inputs to a single 64-bit element. */
+ tcg_out_vex_modrm(s, OPC_PUNPCKLDQ, a0, a1, a2);
+ /* Then replicate the 64-bit elements across the rest of the vector. */
+ if (type != TCG_TYPE_V64) {
+ tcg_out_dup_vec(s, type, MO_64, a0, a0);
+ }
+ break;
+#endif
+ case INDEX_op_abs_vec:
+ insn = abs_insn[vece];
+ a2 = a1;
+ a1 = 0;
+ goto gen_simd;
+ gen_simd:
+ tcg_debug_assert(insn != OPC_UD2);
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ break;
+
+ case INDEX_op_cmp_vec:
+ sub = args[3];
+ if (sub == TCG_COND_EQ) {
+ insn = cmpeq_insn[vece];
+ } else if (sub == TCG_COND_GT) {
+ insn = cmpgt_insn[vece];
+ } else {
+ g_assert_not_reached();
+ }
+ goto gen_simd;
+
+ case INDEX_op_andc_vec:
+ insn = OPC_PANDN;
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a2, a1);
+ break;
+
+ case INDEX_op_shli_vec:
+ sub = 6;
+ goto gen_shift;
+ case INDEX_op_shri_vec:
+ sub = 2;
+ goto gen_shift;
+ case INDEX_op_sari_vec:
+ tcg_debug_assert(vece != MO_64);
+ sub = 4;
+ gen_shift:
+ tcg_debug_assert(vece != MO_8);
+ insn = shift_imm_insn[vece];
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, sub, a0, a1);
+ tcg_out8(s, a2);
+ break;
+
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+
+ case INDEX_op_x86_shufps_vec:
+ insn = OPC_SHUFPS;
+ sub = args[3];
+ goto gen_simd_imm8;
+ case INDEX_op_x86_blend_vec:
+ if (vece == MO_16) {
+ insn = OPC_PBLENDW;
+ } else if (vece == MO_32) {
+ insn = (have_avx2 ? OPC_VPBLENDD : OPC_BLENDPS);
+ } else {
+ g_assert_not_reached();
+ }
+ sub = args[3];
+ goto gen_simd_imm8;
+ case INDEX_op_x86_vperm2i128_vec:
+ insn = OPC_VPERM2I128;
+ sub = args[3];
+ goto gen_simd_imm8;
+ gen_simd_imm8:
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out8(s, sub);
+ break;
+
+ case INDEX_op_x86_vpblendvb_vec:
+ insn = OPC_VPBLENDVB;
+ if (type == TCG_TYPE_V256) {
+ insn |= P_VEXL;
+ }
+ tcg_out_vex_modrm(s, insn, a0, a1, a2);
+ tcg_out8(s, args[3] << 4);
+ break;
+
+ case INDEX_op_x86_psrldq_vec:
+ tcg_out_vex_modrm(s, OPC_GRP14, 3, a0, a1);
+ tcg_out8(s, a2);
+ break;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ return C_O0_I2(qi, r);
+
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ return C_O0_I2(ri, r);
+
+ case INDEX_op_st_i64:
+ return C_O0_I2(re, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ return C_O1_I2(r, r, re);
+
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ return C_O1_I2(r, 0, re);
+
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ return C_O1_I2(r, 0, reZ);
+
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ return C_O1_I2(r, r, rI);
+
+ case INDEX_op_shl_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i32:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i32:
+ case INDEX_op_sar_i64:
+ return have_bmi2 ? C_O1_I2(r, r, ri) : C_O1_I2(r, 0, ci);
+
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotr_i64:
+ return C_O1_I2(r, 0, ci);
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(r, re);
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ case INDEX_op_extrh_i64_i32:
+ return C_O1_I1(r, 0);
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ return C_O1_I1(r, q);
+
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ case INDEX_op_sextract_i32:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_extract2_i32:
+ case INDEX_op_extract2_i64:
+ return C_O1_I2(r, 0, r);
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ return C_O1_I2(Q, 0, Q);
+
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(q, r, re);
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ return C_O1_I4(r, r, re, r, 0);
+
+ case INDEX_op_div2_i32:
+ case INDEX_op_div2_i64:
+ case INDEX_op_divu2_i32:
+ case INDEX_op_divu2_i64:
+ return C_O2_I3(a, d, 0, 1, r);
+
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
+ return C_O2_I2(a, d, a, r);
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ return C_O2_I4(r, r, 0, 1, re, re);
+
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
+ return have_bmi1 ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
+
+ case INDEX_op_clz_i32:
+ case INDEX_op_clz_i64:
+ return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
+
+ case INDEX_op_qemu_ld_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
+
+ case INDEX_op_qemu_st_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
+ case INDEX_op_qemu_st8_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O0_I2(s, L) : C_O0_I3(s, L, L));
+
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
+ : C_O2_I2(r, r, L, L));
+
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
+ : C_O0_I4(L, L, L, L));
+
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(r, r, ri, ri);
+
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, r, r, ri, ri);
+
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(x, r);
+
+ case INDEX_op_st_vec:
+ return C_O0_I2(x, r);
+
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ case INDEX_op_rotls_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_x86_shufps_vec:
+ case INDEX_op_x86_blend_vec:
+ case INDEX_op_x86_packss_vec:
+ case INDEX_op_x86_packus_vec:
+ case INDEX_op_x86_vperm2i128_vec:
+ case INDEX_op_x86_punpckl_vec:
+ case INDEX_op_x86_punpckh_vec:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_dup2_vec:
+#endif
+ return C_O1_I2(x, x, x);
+
+ case INDEX_op_abs_vec:
+ case INDEX_op_dup_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_x86_psrldq_vec:
+ return C_O1_I1(x, x);
+
+ case INDEX_op_x86_vpblendvb_vec:
+ return C_O1_I3(x, x, x, x);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ return 1;
+ case INDEX_op_rotli_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
+ return -1;
+
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ /* We must expand the operation for MO_8. */
+ return vece == MO_8 ? -1 : 1;
+
+ case INDEX_op_sari_vec:
+ /* We must expand the operation for MO_8. */
+ if (vece == MO_8) {
+ return -1;
+ }
+ /* We can emulate this for MO_64, but it does not pay off
+ unless we're producing at least 4 values. */
+ if (vece == MO_64) {
+ return type >= TCG_TYPE_V256 ? -1 : 0;
+ }
+ return 1;
+
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ return vece >= MO_16;
+ case INDEX_op_sars_vec:
+ return vece >= MO_16 && vece <= MO_32;
+ case INDEX_op_rotls_vec:
+ return vece >= MO_16 ? -1 : 0;
+
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ return have_avx2 && vece >= MO_32;
+ case INDEX_op_sarv_vec:
+ return have_avx2 && vece == MO_32;
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return have_avx2 && vece >= MO_32 ? -1 : 0;
+
+ case INDEX_op_mul_vec:
+ if (vece == MO_8) {
+ /* We can expand the operation for MO_8. */
+ return -1;
+ }
+ if (vece == MO_64) {
+ return 0;
+ }
+ return 1;
+
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_ussub_vec:
+ return vece <= MO_16;
+ case INDEX_op_smin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_abs_vec:
+ return vece <= MO_32;
+
+ default:
+ return 0;
+ }
+}
+
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+ TCGv_vec t1, t2;
+
+ tcg_debug_assert(vece == MO_8);
+
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+
+ /*
+ * Unpack to W, shift, and repack. Tricky bits:
+ * (1) Use punpck*bw x,x to produce DDCCBBAA,
+ * i.e. duplicate in other half of the 16-bit lane.
+ * (2) For right-shift, add 8 so that the high half of the lane
+ * becomes zero. For left-shift, and left-rotate, we must
+ * shift up and down again.
+ * (3) Step 2 leaves high half zero such that PACKUSWB
+ * (pack with unsigned saturation) does not modify
+ * the quantity.
+ */
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
+
+ if (opc != INDEX_op_rotli_vec) {
+ imm += 8;
+ }
+ if (opc == INDEX_op_shri_vec) {
+ tcg_gen_shri_vec(MO_16, t1, t1, imm);
+ tcg_gen_shri_vec(MO_16, t2, t2, imm);
+ } else {
+ tcg_gen_shli_vec(MO_16, t1, t1, imm);
+ tcg_gen_shli_vec(MO_16, t2, t2, imm);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ tcg_gen_shri_vec(MO_16, t2, t2, 8);
+ }
+
+ vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+}
+
+static void expand_vec_sari(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+ TCGv_vec t1, t2;
+
+ switch (vece) {
+ case MO_8:
+ /* Unpack to W, shift, and repack, as in expand_vec_shi. */
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
+ tcg_gen_sari_vec(MO_16, t1, t1, imm + 8);
+ tcg_gen_sari_vec(MO_16, t2, t2, imm + 8);
+ vec_gen_3(INDEX_op_x86_packss_vec, type, MO_8,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ case MO_64:
+ if (imm <= 32) {
+ /*
+ * We can emulate a small sign extend by performing an arithmetic
+ * 32-bit shift and overwriting the high half of a 64-bit logical
+ * shift. Note that the ISA says shift of 32 is valid, but TCG
+ * does not, so we have to bound the smaller shift -- we get the
+ * same result in the high half either way.
+ */
+ t1 = tcg_temp_new_vec(type);
+ tcg_gen_sari_vec(MO_32, t1, v1, MIN(imm, 31));
+ tcg_gen_shri_vec(MO_64, v0, v1, imm);
+ vec_gen_4(INDEX_op_x86_blend_vec, type, MO_32,
+ tcgv_vec_arg(v0), tcgv_vec_arg(v0),
+ tcgv_vec_arg(t1), 0xaa);
+ tcg_temp_free_vec(t1);
+ } else {
+ /* Otherwise we will need to use a compare vs 0 to produce
+ * the sign-extend, shift and merge.
+ */
+ t1 = tcg_const_zeros_vec(type);
+ tcg_gen_cmp_vec(TCG_COND_GT, MO_64, t1, t1, v1);
+ tcg_gen_shri_vec(MO_64, v0, v1, imm);
+ tcg_gen_shli_vec(MO_64, t1, t1, 64 - imm);
+ tcg_gen_or_vec(MO_64, v0, v0, t1);
+ tcg_temp_free_vec(t1);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void expand_vec_rotli(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+ TCGv_vec t;
+
+ if (vece == MO_8) {
+ expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
+ return;
+ }
+
+ t = tcg_temp_new_vec(type);
+ tcg_gen_shli_vec(vece, t, v1, imm);
+ tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+}
+
+static void expand_vec_rotls(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+ TCGv_i32 rsh;
+ TCGv_vec t;
+
+ tcg_debug_assert(vece != MO_8);
+
+ t = tcg_temp_new_vec(type);
+ rsh = tcg_temp_new_i32();
+
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+ tcg_gen_shls_vec(vece, t, v1, lsh);
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+ tcg_temp_free_i32(rsh);
+}
+
+static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec sh, bool right)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ tcg_gen_dupi_vec(vece, t, 8 << vece);
+ tcg_gen_sub_vec(vece, t, t, sh);
+ if (right) {
+ tcg_gen_shlv_vec(vece, t, v1, t);
+ tcg_gen_shrv_vec(vece, v0, v1, sh);
+ } else {
+ tcg_gen_shrv_vec(vece, t, v1, t);
+ tcg_gen_shlv_vec(vece, v0, v1, sh);
+ }
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+}
+
+static void expand_vec_mul(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
+{
+ TCGv_vec t1, t2, t3, t4, zero;
+
+ tcg_debug_assert(vece == MO_8);
+
+ /*
+ * Unpack v1 bytes to words, 0 | x.
+ * Unpack v2 bytes to words, y | 0.
+ * This leaves the 8-bit result, x * y, with 8 bits of right padding.
+ * Shift logical right by 8 bits to clear the high 8 bytes before
+ * using an unsigned saturated pack.
+ *
+ * The difference between the V64, V128 and V256 cases is merely how
+ * we distribute the expansion between temporaries.
+ */
+ switch (type) {
+ case TCG_TYPE_V64:
+ t1 = tcg_temp_new_vec(TCG_TYPE_V128);
+ t2 = tcg_temp_new_vec(TCG_TYPE_V128);
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
+ vec_gen_3(INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
+ tcg_gen_mul_vec(MO_16, t1, t1, t2);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ vec_gen_3(INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t1));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ break;
+
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ t3 = tcg_temp_new_vec(type);
+ t4 = tcg_temp_new_vec(type);
+ zero = tcg_constant_vec(TCG_TYPE_V128, MO_8, 0);
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
+ vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
+ tcgv_vec_arg(t2), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t3), tcgv_vec_arg(v1), tcgv_vec_arg(zero));
+ vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
+ tcgv_vec_arg(t4), tcgv_vec_arg(zero), tcgv_vec_arg(v2));
+ tcg_gen_mul_vec(MO_16, t1, t1, t2);
+ tcg_gen_mul_vec(MO_16, t3, t3, t4);
+ tcg_gen_shri_vec(MO_16, t1, t1, 8);
+ tcg_gen_shri_vec(MO_16, t3, t3, 8);
+ vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t4);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ enum {
+ NEED_INV = 1,
+ NEED_SWAP = 2,
+ NEED_BIAS = 4,
+ NEED_UMIN = 8,
+ NEED_UMAX = 16,
+ };
+ TCGv_vec t1, t2, t3;
+ uint8_t fixup;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ fixup = 0;
+ break;
+ case TCG_COND_NE:
+ case TCG_COND_LE:
+ fixup = NEED_INV;
+ break;
+ case TCG_COND_LT:
+ fixup = NEED_SWAP;
+ break;
+ case TCG_COND_GE:
+ fixup = NEED_SWAP | NEED_INV;
+ break;
+ case TCG_COND_LEU:
+ if (vece <= MO_32) {
+ fixup = NEED_UMIN;
+ } else {
+ fixup = NEED_BIAS | NEED_INV;
+ }
+ break;
+ case TCG_COND_GTU:
+ if (vece <= MO_32) {
+ fixup = NEED_UMIN | NEED_INV;
+ } else {
+ fixup = NEED_BIAS;
+ }
+ break;
+ case TCG_COND_GEU:
+ if (vece <= MO_32) {
+ fixup = NEED_UMAX;
+ } else {
+ fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
+ }
+ break;
+ case TCG_COND_LTU:
+ if (vece <= MO_32) {
+ fixup = NEED_UMAX | NEED_INV;
+ } else {
+ fixup = NEED_BIAS | NEED_SWAP;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (fixup & NEED_INV) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (fixup & NEED_SWAP) {
+ t1 = v1, v1 = v2, v2 = t1;
+ cond = tcg_swap_cond(cond);
+ }
+
+ t1 = t2 = NULL;
+ if (fixup & (NEED_UMIN | NEED_UMAX)) {
+ t1 = tcg_temp_new_vec(type);
+ if (fixup & NEED_UMIN) {
+ tcg_gen_umin_vec(vece, t1, v1, v2);
+ } else {
+ tcg_gen_umax_vec(vece, t1, v1, v2);
+ }
+ v2 = t1;
+ cond = TCG_COND_EQ;
+ } else if (fixup & NEED_BIAS) {
+ t1 = tcg_temp_new_vec(type);
+ t2 = tcg_temp_new_vec(type);
+ t3 = tcg_constant_vec(type, vece, 1ull << ((8 << vece) - 1));
+ tcg_gen_sub_vec(vece, t1, v1, t3);
+ tcg_gen_sub_vec(vece, t2, v2, t3);
+ v1 = t1;
+ v2 = t2;
+ cond = tcg_signed_cond(cond);
+ }
+
+ tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
+ /* Expand directly; do not recurse. */
+ vec_gen_4(INDEX_op_cmp_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
+
+ if (t1) {
+ tcg_temp_free_vec(t1);
+ if (t2) {
+ tcg_temp_free_vec(t2);
+ }
+ }
+ return fixup & NEED_INV;
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
+ tcg_gen_not_vec(vece, v0, v0);
+ }
+}
+
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec c1, TCGv_vec c2,
+ TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+ /* Invert the sense of the compare by swapping arguments. */
+ TCGv_vec x;
+ x = v3, v3 = v4, v4 = x;
+ }
+ vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece,
+ tcgv_vec_arg(v0), tcgv_vec_arg(v4),
+ tcgv_vec_arg(v3), tcgv_vec_arg(t));
+ tcg_temp_free_vec(t);
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGArg a2;
+ TCGv_vec v0, v1, v2, v3, v4;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+
+ switch (opc) {
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ expand_vec_shi(type, vece, opc, v0, v1, a2);
+ break;
+
+ case INDEX_op_sari_vec:
+ expand_vec_sari(type, vece, v0, v1, a2);
+ break;
+
+ case INDEX_op_rotli_vec:
+ expand_vec_rotli(type, vece, v0, v1, a2);
+ break;
+
+ case INDEX_op_rotls_vec:
+ expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
+ break;
+
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_rotv(type, vece, v0, v1, v2, false);
+ break;
+ case INDEX_op_rotrv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_rotv(type, vece, v0, v1, v2, true);
+ break;
+
+ case INDEX_op_mul_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_mul(type, vece, v0, v1, v2);
+ break;
+
+ case INDEX_op_cmp_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+ break;
+
+ case INDEX_op_cmpsel_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+ break;
+
+ default:
+ break;
+ }
+
+ va_end(va);
+}
+
+static const int tcg_target_callee_save_regs[] = {
+#if TCG_TARGET_REG_BITS == 64
+ TCG_REG_RBP,
+ TCG_REG_RBX,
+#if defined(_WIN64)
+ TCG_REG_RDI,
+ TCG_REG_RSI,
+#endif
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14, /* Currently used for the global env. */
+ TCG_REG_R15,
+#else
+ TCG_REG_EBP, /* Currently used for the global env. */
+ TCG_REG_EBX,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+#endif
+};
+
+/* Compute frame size via macros, to share between tcg_target_qemu_prologue
+ and tcg_register_jit. */
+
+#define PUSH_SIZE \
+ ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
+ * (TCG_TARGET_REG_BITS / 8))
+
+#define FRAME_SIZE \
+ ((PUSH_SIZE \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & ~(TCG_TARGET_STACK_ALIGN - 1))
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i, stack_addend;
+
+ /* TB prologue */
+
+ /* Reserve some stack space, also for TCG temps. */
+ stack_addend = FRAME_SIZE - PUSH_SIZE;
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+ /* Save all callee saved registers. */
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_push(s, tcg_target_callee_save_regs[i]);
+ }
+
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+ + stack_addend);
+#else
+# if !defined(CONFIG_SOFTMMU) && TCG_TARGET_REG_BITS == 64
+ if (guest_base) {
+ int seg = setup_guest_base_seg();
+ if (seg != 0) {
+ x86_guest_base_seg = seg;
+ } else if (guest_base == (int32_t)guest_base) {
+ x86_guest_base_offset = guest_base;
+ } else {
+ /* Choose R12 because, as a base, it requires a SIB byte. */
+ x86_guest_base_index = TCG_REG_R12;
+ tcg_out_movi(s, TCG_TYPE_PTR, x86_guest_base_index, guest_base);
+ tcg_regset_set_reg(s->reserved_regs, x86_guest_base_index);
+ }
+ }
+# endif
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
+
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
+
+ /* TB epilogue */
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+
+ tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
+
+ if (have_avx2) {
+ tcg_out_vex_opc(s, OPC_VZEROUPPER, 0, 0, 0, 0);
+ }
+ for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
+ tcg_out_pop(s, tcg_target_callee_save_regs[i]);
+ }
+ tcg_out_opc(s, OPC_RET, 0, 0, 0);
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ memset(p, 0x90, count);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#ifdef CONFIG_CPUID_H
+ unsigned a, b, c, d, b7 = 0;
+ int max = __get_cpuid_max(0, 0);
+
+ if (max >= 7) {
+ /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
+ __cpuid_count(7, 0, a, b7, c, d);
+ have_bmi1 = (b7 & bit_BMI) != 0;
+ have_bmi2 = (b7 & bit_BMI2) != 0;
+ }
+
+ if (max >= 1) {
+ __cpuid(1, a, b, c, d);
+#ifndef have_cmov
+ /* For 32-bit, 99% certainty that we're running on hardware that
+ supports cmov, but we still need to check. In case cmov is not
+ available, we'll use a small forward branch. */
+ have_cmov = (d & bit_CMOV) != 0;
+#endif
+
+ /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
+ need to probe for it. */
+ have_movbe = (c & bit_MOVBE) != 0;
+ have_popcnt = (c & bit_POPCNT) != 0;
+
+ /* There are a number of things we must check before we can be
+ sure of not hitting invalid opcode. */
+ if (c & bit_OSXSAVE) {
+ unsigned xcrl, xcrh;
+ /* The xgetbv instruction is not available to older versions of
+ * the assembler, so we encode the instruction manually.
+ */
+ asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
+ if ((xcrl & 6) == 6) {
+ have_avx1 = (c & bit_AVX) != 0;
+ have_avx2 = (b7 & bit_AVX2) != 0;
+ }
+ }
+ }
+
+ max = __get_cpuid_max(0x8000000, 0);
+ if (max >= 1) {
+ __cpuid(0x80000001, a, b, c, d);
+ /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
+ have_lzcnt = (c & bit_LZCNT) != 0;
+ }
+#endif /* CONFIG_CPUID_H */
+
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
+ }
+ if (have_avx1) {
+ tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+ tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+ }
+ if (have_avx2) {
+ tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
+ }
+
+ tcg_target_call_clobber_regs = ALL_VECTOR_REGS;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
+ if (TCG_TARGET_REG_BITS == 64) {
+#if !defined(_WIN64)
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+#endif
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+ }
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[14];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#if !defined(__ELF__)
+ /* Host machine without ELF. */
+#elif TCG_TARGET_REG_BITS == 64
+#define ELF_HOST_MACHINE EM_X86_64
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x78, /* sleb128 -8 */
+ .h.cie.return_column = 16,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 7, /* DW_CFA_def_cfa %rsp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x90, 1, /* DW_CFA_offset, %rip, -8 */
+ /* The following ordering must match tcg_target_callee_save_regs. */
+ 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
+ 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
+ 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
+ 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
+ 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
+ 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
+ }
+};
+#else
+#define ELF_HOST_MACHINE EM_386
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 0x7c, /* sleb128 -4 */
+ .h.cie.return_column = 8,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, 4, /* DW_CFA_def_cfa %esp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x88, 1, /* DW_CFA_offset, %eip, -4 */
+ /* The following ordering must match tcg_target_callee_save_regs. */
+ 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
+ 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
+ 0x86, 4, /* DW_CFA_offset, %esi, -16 */
+ 0x87, 5, /* DW_CFA_offset, %edi, -20 */
+ }
+};
+#endif
+
+#if defined(ELF_HOST_MACHINE)
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
new file mode 100644
index 000000000..b00a6da29
--- /dev/null
+++ b/tcg/i386/tcg-target.h
@@ -0,0 +1,240 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef I386_TCG_TARGET_H
+#define I386_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 31
+
+#ifdef __x86_64__
+# define TCG_TARGET_REG_BITS 64
+# define TCG_TARGET_NB_REGS 32
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
+#else
+# define TCG_TARGET_REG_BITS 32
+# define TCG_TARGET_NB_REGS 24
+# define MAX_CODE_GEN_BUFFER_SIZE UINT32_MAX
+#endif
+
+typedef enum {
+ TCG_REG_EAX = 0,
+ TCG_REG_ECX,
+ TCG_REG_EDX,
+ TCG_REG_EBX,
+ TCG_REG_ESP,
+ TCG_REG_EBP,
+ TCG_REG_ESI,
+ TCG_REG_EDI,
+
+ /* 64-bit registers; always define the symbols to avoid
+ too much if-deffing. */
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+
+ TCG_REG_XMM0,
+ TCG_REG_XMM1,
+ TCG_REG_XMM2,
+ TCG_REG_XMM3,
+ TCG_REG_XMM4,
+ TCG_REG_XMM5,
+ TCG_REG_XMM6,
+ TCG_REG_XMM7,
+
+ /* 64-bit registers; likewise always define. */
+ TCG_REG_XMM8,
+ TCG_REG_XMM9,
+ TCG_REG_XMM10,
+ TCG_REG_XMM11,
+ TCG_REG_XMM12,
+ TCG_REG_XMM13,
+ TCG_REG_XMM14,
+ TCG_REG_XMM15,
+
+ TCG_REG_RAX = TCG_REG_EAX,
+ TCG_REG_RCX = TCG_REG_ECX,
+ TCG_REG_RDX = TCG_REG_EDX,
+ TCG_REG_RBX = TCG_REG_EBX,
+ TCG_REG_RSP = TCG_REG_ESP,
+ TCG_REG_RBP = TCG_REG_EBP,
+ TCG_REG_RSI = TCG_REG_ESI,
+ TCG_REG_RDI = TCG_REG_EDI,
+
+ TCG_AREG0 = TCG_REG_EBP,
+ TCG_REG_CALL_STACK = TCG_REG_ESP
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+
+extern bool have_bmi1;
+extern bool have_popcnt;
+extern bool have_avx1;
+extern bool have_avx2;
+extern bool have_movbe;
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_andc_i32 have_bmi1
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
+#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
+#define TCG_TARGET_HAS_extract2_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_direct_jump 1
+
+#if TCG_TARGET_REG_BITS == 64
+/* Keep target addresses zero-extended in a register. */
+#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
+#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
+#define TCG_TARGET_HAS_div2_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_andc_i64 have_bmi1
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
+#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 1
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+#else
+#define TCG_TARGET_HAS_qemu_st8_i32 1
+#endif
+
+/* We do not support older SSE systems, only beginning with AVX1. */
+#define TCG_TARGET_HAS_v64 have_avx1
+#define TCG_TARGET_HAS_v128 have_avx1
+#define TCG_TARGET_HAS_v256 have_avx2
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec 0
+#define TCG_TARGET_HAS_not_vec 0
+#define TCG_TARGET_HAS_neg_vec 0
+#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 0
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 1
+#define TCG_TARGET_HAS_shv_vec have_avx2
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_cmpsel_vec -1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) \
+ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
+ ((ofs) == 0 && (len) == 16))
+#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
+
+/* Check for the possibility of high-byte extraction and, for 64-bit,
+ zero-extending 32-bit right-shift. */
+#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
+#define TCG_TARGET_extract_i64_valid(ofs, len) \
+ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ /* patch the branch destination */
+ qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4));
+ /* no need to flush icache explicitly */
+}
+
+/* This defines the natural memory order supported by this
+ * architecture before guarantees made by various barrier
+ * instructions.
+ *
+ * The x86 has a pretty strong memory ordering which only really
+ * allows for some stores to be re-ordered after loads.
+ */
+#include "tcg/tcg-mo.h"
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
+#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
new file mode 100644
index 000000000..131294180
--- /dev/null
+++ b/tcg/i386/tcg-target.opc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019 Linaro
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
+DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/meson.build b/tcg/meson.build
new file mode 100644
index 000000000..c4c63b19d
--- /dev/null
+++ b/tcg/meson.build
@@ -0,0 +1,20 @@
+tcg_ss = ss.source_set()
+
+tcg_ss.add(files(
+ 'optimize.c',
+ 'region.c',
+ 'tcg.c',
+ 'tcg-common.c',
+ 'tcg-op.c',
+ 'tcg-op-gvec.c',
+ 'tcg-op-vec.c',
+))
+
+if get_option('tcg_interpreter')
+ libffi = dependency('libffi', version: '>=3.0', required: true,
+ method: 'pkg-config', kwargs: static_kwargs)
+ specific_ss.add(libffi)
+ specific_ss.add(files('tci.c'))
+endif
+
+specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
new file mode 100644
index 000000000..fe3e868a2
--- /dev/null
+++ b/tcg/mips/tcg-target-con-set.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define MIPS target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(rZ, r)
+C_O0_I2(rZ, rZ)
+C_O0_I2(SZ, S)
+C_O0_I3(SZ, S, S)
+C_O0_I3(SZ, SZ, S)
+C_O0_I4(rZ, rZ, rZ, rZ)
+C_O0_I4(SZ, SZ, S, S)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rIK)
+C_O1_I2(r, r, rJ)
+C_O1_I2(r, r, rWZ)
+C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, rZ)
+C_O1_I4(r, rZ, rZ, rZ, 0)
+C_O1_I4(r, rZ, rZ, rZ, rZ)
+C_O2_I1(r, r, L)
+C_O2_I2(r, r, L, L)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, rZ, rZ, rN, rN)
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
new file mode 100644
index 000000000..e4b2965c7
--- /dev/null
+++ b/tcg/mips/tcg-target-con-str.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define MIPS target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_QLOAD_REGS)
+REGS('S', ALL_QSTORE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_U16)
+CONST('J', TCG_CT_CONST_S16)
+CONST('K', TCG_CT_CONST_P2M1)
+CONST('N', TCG_CT_CONST_N16)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
new file mode 100644
index 000000000..d8f6914f0
--- /dev/null
+++ b/tcg/mips/tcg-target.c.inc
@@ -0,0 +1,2577 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef HOST_WORDS_BIGENDIAN
+# define MIPS_BE 1
+#else
+# define MIPS_BE 0
+#endif
+
+#if TCG_TARGET_REG_BITS == 32
+# define LO_OFF (MIPS_BE * 4)
+# define HI_OFF (4 - LO_OFF)
+#else
+/* To assert at compile-time that these values are never used
+ for TCG_TARGET_REG_BITS == 64. */
+int link_error(void);
+# define LO_OFF link_error()
+# define HI_OFF link_error()
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "zero",
+ "at",
+ "v0",
+ "v1",
+ "a0",
+ "a1",
+ "a2",
+ "a3",
+ "t0",
+ "t1",
+ "t2",
+ "t3",
+ "t4",
+ "t5",
+ "t6",
+ "t7",
+ "s0",
+ "s1",
+ "s2",
+ "s3",
+ "s4",
+ "s5",
+ "s6",
+ "s7",
+ "t8",
+ "t9",
+ "k0",
+ "k1",
+ "gp",
+ "sp",
+ "s8",
+ "ra",
+};
+#endif
+
+#define TCG_TMP0 TCG_REG_AT
+#define TCG_TMP1 TCG_REG_T9
+#define TCG_TMP2 TCG_REG_T8
+#define TCG_TMP3 TCG_REG_T7
+
+#ifndef CONFIG_SOFTMMU
+#define TCG_GUEST_BASE_REG TCG_REG_S1
+#endif
+
+/* check if we really need so many registers :P */
+static const int tcg_target_reg_alloc_order[] = {
+ /* Call saved registers. */
+ TCG_REG_S0,
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+
+ /* Call clobbered registers. */
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+ TCG_REG_T7,
+ TCG_REG_T8,
+ TCG_REG_T9,
+ TCG_REG_V1,
+ TCG_REG_V0,
+
+ /* Argument registers, opposite order of allocation. */
+ TCG_REG_T3,
+ TCG_REG_T2,
+ TCG_REG_T1,
+ TCG_REG_T0,
+ TCG_REG_A3,
+ TCG_REG_A2,
+ TCG_REG_A1,
+ TCG_REG_A0,
+};
+
+static const TCGReg tcg_target_call_iarg_regs[] = {
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+#if _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+#endif
+};
+
+static const TCGReg tcg_target_call_oarg_regs[2] = {
+ TCG_REG_V0,
+ TCG_REG_V1
+};
+
+static const tcg_insn_unit *tb_ret_addr;
+static const tcg_insn_unit *bswap32_addr;
+static const tcg_insn_unit *bswap32u_addr;
+static const tcg_insn_unit *bswap64_addr;
+
+static bool reloc_pc16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ /* Let the compiler perform the right-shift as part of the arithmetic. */
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = target - (src_rx + 1);
+ if (disp == (int16_t)disp) {
+ *src_rw = deposit32(*src_rw, 0, 16, disp);
+ return true;
+ }
+ return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_debug_assert(type == R_MIPS_PC16);
+ tcg_debug_assert(addend == 0);
+ return reloc_pc16(code_ptr, (const tcg_insn_unit *)value);
+}
+
+#define TCG_CT_CONST_ZERO 0x100
+#define TCG_CT_CONST_U16 0x200 /* Unsigned 16-bit: 0 - 0xffff. */
+#define TCG_CT_CONST_S16 0x400 /* Signed 16-bit: -32768 - 32767 */
+#define TCG_CT_CONST_P2M1 0x800 /* Power of 2 minus 1. */
+#define TCG_CT_CONST_N16 0x1000 /* "Negatable" 16-bit: -32767 - 32767 */
+#define TCG_CT_CONST_WSZ 0x2000 /* word size */
+
+#define ALL_GENERAL_REGS 0xffffffffu
+#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
+
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLOAD_REGS \
+ (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
+#define ALL_QSTORE_REGS \
+ (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \
+ ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \
+ : (1 << TCG_REG_A1)))
+#else
+#define ALL_QLOAD_REGS NOA0_REGS
+#define ALL_QSTORE_REGS NOA0_REGS
+#endif
+
+
+static bool is_p2m1(tcg_target_long val)
+{
+ return val && ((val + 1) & val) == 0;
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_N16) && val >= -32767 && val <= 32767) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_P2M1)
+ && use_mips32r2_instructions && is_p2m1(val)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_WSZ)
+ && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
+ }
+ return 0;
+}
+
+/* instruction opcodes */
+typedef enum {
+ OPC_J = 002 << 26,
+ OPC_JAL = 003 << 26,
+ OPC_BEQ = 004 << 26,
+ OPC_BNE = 005 << 26,
+ OPC_BLEZ = 006 << 26,
+ OPC_BGTZ = 007 << 26,
+ OPC_ADDIU = 011 << 26,
+ OPC_SLTI = 012 << 26,
+ OPC_SLTIU = 013 << 26,
+ OPC_ANDI = 014 << 26,
+ OPC_ORI = 015 << 26,
+ OPC_XORI = 016 << 26,
+ OPC_LUI = 017 << 26,
+ OPC_DADDIU = 031 << 26,
+ OPC_LB = 040 << 26,
+ OPC_LH = 041 << 26,
+ OPC_LW = 043 << 26,
+ OPC_LBU = 044 << 26,
+ OPC_LHU = 045 << 26,
+ OPC_LWU = 047 << 26,
+ OPC_SB = 050 << 26,
+ OPC_SH = 051 << 26,
+ OPC_SW = 053 << 26,
+ OPC_LD = 067 << 26,
+ OPC_SD = 077 << 26,
+
+ OPC_SPECIAL = 000 << 26,
+ OPC_SLL = OPC_SPECIAL | 000,
+ OPC_SRL = OPC_SPECIAL | 002,
+ OPC_ROTR = OPC_SPECIAL | 002 | (1 << 21),
+ OPC_SRA = OPC_SPECIAL | 003,
+ OPC_SLLV = OPC_SPECIAL | 004,
+ OPC_SRLV = OPC_SPECIAL | 006,
+ OPC_ROTRV = OPC_SPECIAL | 006 | 0100,
+ OPC_SRAV = OPC_SPECIAL | 007,
+ OPC_JR_R5 = OPC_SPECIAL | 010,
+ OPC_JALR = OPC_SPECIAL | 011,
+ OPC_MOVZ = OPC_SPECIAL | 012,
+ OPC_MOVN = OPC_SPECIAL | 013,
+ OPC_SYNC = OPC_SPECIAL | 017,
+ OPC_MFHI = OPC_SPECIAL | 020,
+ OPC_MFLO = OPC_SPECIAL | 022,
+ OPC_DSLLV = OPC_SPECIAL | 024,
+ OPC_DSRLV = OPC_SPECIAL | 026,
+ OPC_DROTRV = OPC_SPECIAL | 026 | 0100,
+ OPC_DSRAV = OPC_SPECIAL | 027,
+ OPC_MULT = OPC_SPECIAL | 030,
+ OPC_MUL_R6 = OPC_SPECIAL | 030 | 0200,
+ OPC_MUH = OPC_SPECIAL | 030 | 0300,
+ OPC_MULTU = OPC_SPECIAL | 031,
+ OPC_MULU = OPC_SPECIAL | 031 | 0200,
+ OPC_MUHU = OPC_SPECIAL | 031 | 0300,
+ OPC_DIV = OPC_SPECIAL | 032,
+ OPC_DIV_R6 = OPC_SPECIAL | 032 | 0200,
+ OPC_MOD = OPC_SPECIAL | 032 | 0300,
+ OPC_DIVU = OPC_SPECIAL | 033,
+ OPC_DIVU_R6 = OPC_SPECIAL | 033 | 0200,
+ OPC_MODU = OPC_SPECIAL | 033 | 0300,
+ OPC_DMULT = OPC_SPECIAL | 034,
+ OPC_DMUL = OPC_SPECIAL | 034 | 0200,
+ OPC_DMUH = OPC_SPECIAL | 034 | 0300,
+ OPC_DMULTU = OPC_SPECIAL | 035,
+ OPC_DMULU = OPC_SPECIAL | 035 | 0200,
+ OPC_DMUHU = OPC_SPECIAL | 035 | 0300,
+ OPC_DDIV = OPC_SPECIAL | 036,
+ OPC_DDIV_R6 = OPC_SPECIAL | 036 | 0200,
+ OPC_DMOD = OPC_SPECIAL | 036 | 0300,
+ OPC_DDIVU = OPC_SPECIAL | 037,
+ OPC_DDIVU_R6 = OPC_SPECIAL | 037 | 0200,
+ OPC_DMODU = OPC_SPECIAL | 037 | 0300,
+ OPC_ADDU = OPC_SPECIAL | 041,
+ OPC_SUBU = OPC_SPECIAL | 043,
+ OPC_AND = OPC_SPECIAL | 044,
+ OPC_OR = OPC_SPECIAL | 045,
+ OPC_XOR = OPC_SPECIAL | 046,
+ OPC_NOR = OPC_SPECIAL | 047,
+ OPC_SLT = OPC_SPECIAL | 052,
+ OPC_SLTU = OPC_SPECIAL | 053,
+ OPC_DADDU = OPC_SPECIAL | 055,
+ OPC_DSUBU = OPC_SPECIAL | 057,
+ OPC_SELEQZ = OPC_SPECIAL | 065,
+ OPC_SELNEZ = OPC_SPECIAL | 067,
+ OPC_DSLL = OPC_SPECIAL | 070,
+ OPC_DSRL = OPC_SPECIAL | 072,
+ OPC_DROTR = OPC_SPECIAL | 072 | (1 << 21),
+ OPC_DSRA = OPC_SPECIAL | 073,
+ OPC_DSLL32 = OPC_SPECIAL | 074,
+ OPC_DSRL32 = OPC_SPECIAL | 076,
+ OPC_DROTR32 = OPC_SPECIAL | 076 | (1 << 21),
+ OPC_DSRA32 = OPC_SPECIAL | 077,
+ OPC_CLZ_R6 = OPC_SPECIAL | 0120,
+ OPC_DCLZ_R6 = OPC_SPECIAL | 0122,
+
+ OPC_REGIMM = 001 << 26,
+ OPC_BLTZ = OPC_REGIMM | (000 << 16),
+ OPC_BGEZ = OPC_REGIMM | (001 << 16),
+
+ OPC_SPECIAL2 = 034 << 26,
+ OPC_MUL_R5 = OPC_SPECIAL2 | 002,
+ OPC_CLZ = OPC_SPECIAL2 | 040,
+ OPC_DCLZ = OPC_SPECIAL2 | 044,
+
+ OPC_SPECIAL3 = 037 << 26,
+ OPC_EXT = OPC_SPECIAL3 | 000,
+ OPC_DEXTM = OPC_SPECIAL3 | 001,
+ OPC_DEXTU = OPC_SPECIAL3 | 002,
+ OPC_DEXT = OPC_SPECIAL3 | 003,
+ OPC_INS = OPC_SPECIAL3 | 004,
+ OPC_DINSM = OPC_SPECIAL3 | 005,
+ OPC_DINSU = OPC_SPECIAL3 | 006,
+ OPC_DINS = OPC_SPECIAL3 | 007,
+ OPC_WSBH = OPC_SPECIAL3 | 00240,
+ OPC_DSBH = OPC_SPECIAL3 | 00244,
+ OPC_DSHD = OPC_SPECIAL3 | 00544,
+ OPC_SEB = OPC_SPECIAL3 | 02040,
+ OPC_SEH = OPC_SPECIAL3 | 03040,
+
+ /* MIPS r6 doesn't have JR, JALR should be used instead */
+ OPC_JR = use_mips32r6_instructions ? OPC_JALR : OPC_JR_R5,
+
+ /*
+ * MIPS r6 replaces MUL with an alternative encoding which is
+ * backwards-compatible at the assembly level.
+ */
+ OPC_MUL = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
+
+ /* MIPS r6 introduced names for weaker variants of SYNC. These are
+ backward compatible to previous architecture revisions. */
+ OPC_SYNC_WMB = OPC_SYNC | 0x04 << 6,
+ OPC_SYNC_MB = OPC_SYNC | 0x10 << 6,
+ OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 6,
+ OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 6,
+ OPC_SYNC_RMB = OPC_SYNC | 0x13 << 6,
+
+ /* Aliases for convenience. */
+ ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
+ ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
+ ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
+ ? OPC_SRL : OPC_DSRL,
+} MIPSInsn;
+
+/*
+ * Type reg
+ */
+static void tcg_out_opc_reg(TCGContext *s, MIPSInsn opc,
+ TCGReg rd, TCGReg rs, TCGReg rt)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type immediate
+ */
+static void tcg_out_opc_imm(TCGContext *s, MIPSInsn opc,
+ TCGReg rt, TCGReg rs, TCGArg imm)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (imm & 0xffff);
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type bitfield
+ */
+static void tcg_out_opc_bf(TCGContext *s, MIPSInsn opc, TCGReg rt,
+ TCGReg rs, int msb, int lsb)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rs & 0x1F) << 21;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (msb & 0x1F) << 11;
+ inst |= (lsb & 0x1F) << 6;
+ tcg_out32(s, inst);
+}
+
+static void tcg_out_opc_bf64(TCGContext *s, MIPSInsn opc, MIPSInsn opm,
+ MIPSInsn oph, TCGReg rt, TCGReg rs,
+ int msb, int lsb)
+{
+ if (lsb >= 32) {
+ opc = oph;
+ msb -= 32;
+ lsb -= 32;
+ } else if (msb >= 32) {
+ opc = opm;
+ msb -= 32;
+ }
+ tcg_out_opc_bf(s, opc, rt, rs, msb, lsb);
+}
+
+/*
+ * Type branch
+ */
+static void tcg_out_opc_br(TCGContext *s, MIPSInsn opc, TCGReg rt, TCGReg rs)
+{
+ tcg_out_opc_imm(s, opc, rt, rs, 0);
+}
+
+/*
+ * Type sa
+ */
+static void tcg_out_opc_sa(TCGContext *s, MIPSInsn opc,
+ TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ int32_t inst;
+
+ inst = opc;
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ inst |= (sa & 0x1F) << 6;
+ tcg_out32(s, inst);
+
+}
+
+static void tcg_out_opc_sa64(TCGContext *s, MIPSInsn opc1, MIPSInsn opc2,
+ TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ int32_t inst;
+
+ inst = (sa & 32 ? opc2 : opc1);
+ inst |= (rt & 0x1F) << 16;
+ inst |= (rd & 0x1F) << 11;
+ inst |= (sa & 0x1F) << 6;
+ tcg_out32(s, inst);
+}
+
+/*
+ * Type jump.
+ * Returns true if the branch was in range and the insn was emitted.
+ */
+static bool tcg_out_opc_jmp(TCGContext *s, MIPSInsn opc, const void *target)
+{
+ uintptr_t dest = (uintptr_t)target;
+ uintptr_t from = (uintptr_t)tcg_splitwx_to_rx(s->code_ptr) + 4;
+ int32_t inst;
+
+ /* The pc-region branch happens within the 256MB region of
+ the delay slot (thus the +4). */
+ if ((from ^ dest) & -(1 << 28)) {
+ return false;
+ }
+ tcg_debug_assert((dest & 3) == 0);
+
+ inst = opc;
+ inst |= (dest >> 2) & 0x3ffffff;
+ tcg_out32(s, inst);
+ return true;
+}
+
+static void tcg_out_nop(TCGContext *s)
+{
+ tcg_out32(s, 0);
+}
+
+static void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa);
+}
+
+static void tcg_out_dsrl(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRL, OPC_DSRL32, rd, rt, sa);
+}
+
+static void tcg_out_dsra(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa)
+{
+ tcg_out_opc_sa64(s, OPC_DSRA, OPC_DSRA32, rd, rt, sa);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ /* Simple reg-reg move, optimising out the 'do nothing' case */
+ if (ret != arg) {
+ tcg_out_opc_reg(s, OPC_OR, ret, arg, TCG_REG_ZERO);
+ }
+ return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ arg = (int32_t)arg;
+ }
+ if (arg == (int16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (arg == (uint16_t)arg) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg);
+ return;
+ }
+ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
+ tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1);
+ if (arg & 0xffff0000ull) {
+ tcg_out_dsll(s, ret, ret, 16);
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16);
+ tcg_out_dsll(s, ret, ret, 16);
+ } else {
+ tcg_out_dsll(s, ret, ret, 32);
+ }
+ }
+ if (arg & 0xffff) {
+ tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff);
+ }
+}
+
+static void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
+{
+ /* ret and arg can't be register tmp0 */
+ tcg_debug_assert(ret != TCG_TMP0);
+ tcg_debug_assert(arg != TCG_TMP0);
+
+ /* With arg = abcd: */
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg); /* badc */
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret); /* ssdc */
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xffff); /* 00dc */
+ }
+ return;
+ }
+
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8); /* 0abc */
+ if (!(flags & TCG_BSWAP_IZ)) {
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff); /* 000c */
+ }
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24); /* d000 */
+ tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16); /* ssd0 */
+ } else {
+ tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8); /* bcd0 */
+ if (flags & TCG_BSWAP_OZ) {
+ tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00); /* 00d0 */
+ }
+ }
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0); /* ssdc */
+}
+
+static void tcg_out_bswap_subr(TCGContext *s, const tcg_insn_unit *sub)
+{
+ if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP1, (uintptr_t)sub);
+ tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_TMP1, 0);
+ }
+}
+
+static void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg, int flags)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+ if (flags & TCG_BSWAP_OZ) {
+ tcg_out_opc_bf(s, OPC_DEXT, ret, ret, 31, 0);
+ }
+ } else {
+ if (flags & TCG_BSWAP_OZ) {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ }
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
+
+static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot -- never omit the insn, like tcg_out_mov might. */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP0, arg, TCG_REG_ZERO);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, TCG_TMP3);
+ }
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_bf(s, OPC_DEXT, ret, arg, 31, 0);
+ } else {
+ tcg_out_dsll(s, ret, arg, 32);
+ tcg_out_dsrl(s, ret, ret, 32);
+ }
+}
+
+static void tcg_out_ldst(TCGContext *s, MIPSInsn opc, TCGReg data,
+ TCGReg addr, intptr_t ofs)
+{
+ int16_t lo = ofs;
+ if (ofs != lo) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - lo);
+ if (addr != TCG_REG_ZERO) {
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP0, TCG_TMP0, addr);
+ }
+ addr = TCG_TMP0;
+ }
+ tcg_out_opc_imm(s, opc, data, addr, lo);
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ MIPSInsn opc = OPC_LD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_LW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ MIPSInsn opc = OPC_SD;
+ if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
+ opc = OPC_SW;
+ }
+ tcg_out_ldst(s, opc, arg, arg1, arg2);
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ if (val == 0) {
+ tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh, TCGReg al,
+ TCGReg ah, TCGArg bl, TCGArg bh, bool cbl,
+ bool cbh, bool is_sub)
+{
+ TCGReg th = TCG_TMP1;
+
+ /* If we have a negative constant such that negating it would
+ make the high part zero, we can (usually) eliminate one insn. */
+ if (cbl && cbh && bh == -1 && bl != 0) {
+ bl = -bl;
+ bh = 0;
+ is_sub = !is_sub;
+ }
+
+ /* By operating on the high part first, we get to use the final
+ carry operation to move back from the temporary. */
+ if (!cbh) {
+ tcg_out_opc_reg(s, (is_sub ? OPC_SUBU : OPC_ADDU), th, ah, bh);
+ } else if (bh != 0 || ah == rl) {
+ tcg_out_opc_imm(s, OPC_ADDIU, th, ah, (is_sub ? -bh : bh));
+ } else {
+ th = ah;
+ }
+
+ /* Note that tcg optimization should eliminate the bl == 0 case. */
+ if (is_sub) {
+ if (cbl) {
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, al, bl);
+ tcg_out_opc_imm(s, OPC_ADDIU, rl, al, -bl);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, al, bl);
+ tcg_out_opc_reg(s, OPC_SUBU, rl, al, bl);
+ }
+ tcg_out_opc_reg(s, OPC_SUBU, rh, th, TCG_TMP0);
+ } else {
+ if (cbl) {
+ tcg_out_opc_imm(s, OPC_ADDIU, rl, al, bl);
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_TMP0, rl, bl);
+ } else if (rl == al && rl == bl) {
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, al, TCG_TARGET_REG_BITS - 1);
+ tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
+ } else {
+ tcg_out_opc_reg(s, OPC_ADDU, rl, al, bl);
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_TMP0, rl, (rl == bl ? al : bl));
+ }
+ tcg_out_opc_reg(s, OPC_ADDU, rh, th, TCG_TMP0);
+ }
+}
+
+/* Bit 0 set if inversion required; bit 1 set if swapping required. */
+#define MIPS_CMP_INV 1
+#define MIPS_CMP_SWAP 2
+
+static const uint8_t mips_cmp_map[16] = {
+ [TCG_COND_LT] = 0,
+ [TCG_COND_LTU] = 0,
+ [TCG_COND_GE] = MIPS_CMP_INV,
+ [TCG_COND_GEU] = MIPS_CMP_INV,
+ [TCG_COND_LE] = MIPS_CMP_INV | MIPS_CMP_SWAP,
+ [TCG_COND_LEU] = MIPS_CMP_INV | MIPS_CMP_SWAP,
+ [TCG_COND_GT] = MIPS_CMP_SWAP,
+ [TCG_COND_GTU] = MIPS_CMP_SWAP,
+};
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg arg1, TCGReg arg2)
+{
+ MIPSInsn s_opc = OPC_SLTU;
+ int cmp_map;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ if (arg2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+ arg1 = ret;
+ }
+ tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, 1);
+ break;
+
+ case TCG_COND_NE:
+ if (arg2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+ arg1 = ret;
+ }
+ tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, arg1);
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ s_opc = OPC_SLT;
+ /* FALLTHRU */
+
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ cmp_map = mips_cmp_map[cond];
+ if (cmp_map & MIPS_CMP_SWAP) {
+ TCGReg t = arg1;
+ arg1 = arg2;
+ arg2 = t;
+ }
+ tcg_out_opc_reg(s, s_opc, ret, arg1, arg2);
+ if (cmp_map & MIPS_CMP_INV) {
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ }
+ break;
+
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+ TCGReg arg2, TCGLabel *l)
+{
+ static const MIPSInsn b_zero[16] = {
+ [TCG_COND_LT] = OPC_BLTZ,
+ [TCG_COND_GT] = OPC_BGTZ,
+ [TCG_COND_LE] = OPC_BLEZ,
+ [TCG_COND_GE] = OPC_BGEZ,
+ };
+
+ MIPSInsn s_opc = OPC_SLTU;
+ MIPSInsn b_opc;
+ int cmp_map;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ b_opc = OPC_BEQ;
+ break;
+ case TCG_COND_NE:
+ b_opc = OPC_BNE;
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GT:
+ case TCG_COND_LE:
+ case TCG_COND_GE:
+ if (arg2 == 0) {
+ b_opc = b_zero[cond];
+ arg2 = arg1;
+ arg1 = 0;
+ break;
+ }
+ s_opc = OPC_SLT;
+ /* FALLTHRU */
+
+ case TCG_COND_LTU:
+ case TCG_COND_GTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GEU:
+ cmp_map = mips_cmp_map[cond];
+ if (cmp_map & MIPS_CMP_SWAP) {
+ TCGReg t = arg1;
+ arg1 = arg2;
+ arg2 = t;
+ }
+ tcg_out_opc_reg(s, s_opc, TCG_TMP0, arg1, arg2);
+ b_opc = (cmp_map & MIPS_CMP_INV ? OPC_BEQ : OPC_BNE);
+ arg1 = TCG_TMP0;
+ arg2 = TCG_REG_ZERO;
+ break;
+
+ default:
+ tcg_abort();
+ break;
+ }
+
+ tcg_out_opc_br(s, b_opc, arg1, arg2);
+ tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, l, 0);
+ tcg_out_nop(s);
+}
+
+static TCGReg tcg_out_reduce_eq2(TCGContext *s, TCGReg tmp0, TCGReg tmp1,
+ TCGReg al, TCGReg ah,
+ TCGReg bl, TCGReg bh)
+{
+ /* Merge highpart comparison into AH. */
+ if (bh != 0) {
+ if (ah != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, tmp0, ah, bh);
+ ah = tmp0;
+ } else {
+ ah = bh;
+ }
+ }
+ /* Merge lowpart comparison into AL. */
+ if (bl != 0) {
+ if (al != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, tmp1, al, bl);
+ al = tmp1;
+ } else {
+ al = bl;
+ }
+ }
+ /* Merge high and low part comparisons into AL. */
+ if (ah != 0) {
+ if (al != 0) {
+ tcg_out_opc_reg(s, OPC_OR, tmp0, ah, al);
+ al = tmp0;
+ } else {
+ al = ah;
+ }
+ }
+ return al;
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+ TCGReg tmp0 = TCG_TMP0;
+ TCGReg tmp1 = ret;
+
+ tcg_debug_assert(ret != TCG_TMP0);
+ if (ret == ah || ret == bh) {
+ tcg_debug_assert(ret != TCG_TMP1);
+ tmp1 = TCG_TMP1;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ tmp1 = tcg_out_reduce_eq2(s, tmp0, tmp1, al, ah, bl, bh);
+ tcg_out_setcond(s, cond, ret, tmp1, TCG_REG_ZERO);
+ break;
+
+ default:
+ tcg_out_setcond(s, TCG_COND_EQ, tmp0, ah, bh);
+ tcg_out_setcond(s, tcg_unsigned_cond(cond), tmp1, al, bl);
+ tcg_out_opc_reg(s, OPC_AND, tmp1, tmp1, tmp0);
+ tcg_out_setcond(s, tcg_high_cond(cond), tmp0, ah, bh);
+ tcg_out_opc_reg(s, OPC_OR, ret, tmp1, tmp0);
+ break;
+ }
+}
+
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
+ TCGReg bl, TCGReg bh, TCGLabel *l)
+{
+ TCGCond b_cond = TCG_COND_NE;
+ TCGReg tmp = TCG_TMP1;
+
+ /* With branches, we emit between 4 and 9 insns with 2 or 3 branches.
+ With setcond, we emit between 3 and 10 insns and only 1 branch,
+ which ought to get better branch prediction. */
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ b_cond = cond;
+ tmp = tcg_out_reduce_eq2(s, TCG_TMP0, TCG_TMP1, al, ah, bl, bh);
+ break;
+
+ default:
+ /* Minimize code size by preferring a compare not requiring INV. */
+ if (mips_cmp_map[cond] & MIPS_CMP_INV) {
+ cond = tcg_invert_cond(cond);
+ b_cond = TCG_COND_EQ;
+ }
+ tcg_out_setcond2(s, cond, tmp, al, ah, bl, bh);
+ break;
+ }
+
+ tcg_out_brcond(s, b_cond, tmp, TCG_REG_ZERO, l);
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, TCGReg c2, TCGReg v1, TCGReg v2)
+{
+ bool eqz = false;
+
+ /* If one of the values is zero, put it last to match SEL*Z instructions */
+ if (use_mips32r6_instructions && v1 == 0) {
+ v1 = v2;
+ v2 = 0;
+ cond = tcg_invert_cond(cond);
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ eqz = true;
+ /* FALLTHRU */
+ case TCG_COND_NE:
+ if (c2 != 0) {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_TMP0, c1, c2);
+ c1 = TCG_TMP0;
+ }
+ break;
+
+ default:
+ /* Minimize code size by preferring a compare not requiring INV. */
+ if (mips_cmp_map[cond] & MIPS_CMP_INV) {
+ cond = tcg_invert_cond(cond);
+ eqz = true;
+ }
+ tcg_out_setcond(s, cond, TCG_TMP0, c1, c2);
+ c1 = TCG_TMP0;
+ break;
+ }
+
+ if (use_mips32r6_instructions) {
+ MIPSInsn m_opc_t = eqz ? OPC_SELEQZ : OPC_SELNEZ;
+ MIPSInsn m_opc_f = eqz ? OPC_SELNEZ : OPC_SELEQZ;
+
+ if (v2 != 0) {
+ tcg_out_opc_reg(s, m_opc_f, TCG_TMP1, v2, c1);
+ }
+ tcg_out_opc_reg(s, m_opc_t, ret, v1, c1);
+ if (v2 != 0) {
+ tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
+ }
+ } else {
+ MIPSInsn m_opc = eqz ? OPC_MOVZ : OPC_MOVN;
+
+ tcg_out_opc_reg(s, m_opc, ret, v1, c1);
+
+ /* This should be guaranteed via constraints */
+ tcg_debug_assert(v2 == ret);
+ }
+}
+
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
+{
+ /* Note that the ABI requires the called function's address to be
+ loaded into T9, even if a direct branch is in range. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg);
+
+ /* But do try a direct branch, allowing the cpu better insn prefetch. */
+ if (tail) {
+ if (!tcg_out_opc_jmp(s, OPC_J, arg)) {
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_T9, 0);
+ }
+ } else {
+ if (!tcg_out_opc_jmp(s, OPC_JAL, arg)) {
+ tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
+ }
+ }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
+{
+ tcg_out_call_int(s, arg, false);
+ tcg_out_nop(s);
+}
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_LESL] = helper_le_ldsl_mmu,
+ [MO_BESL] = helper_be_ldsl_mmu,
+#endif
+};
+
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* Helper routines for marshalling helper function arguments into
+ * the correct registers and stack.
+ * I is where we want to put this argument, and is updated and returned
+ * for the next call. ARG is the argument itself.
+ *
+ * We provide routines for arguments which are: immediate, 32 bit
+ * value in register, 16 and 8 bit values in register (which must be zero
+ * extended before use) and 64 bit value in a lo:hi register pair.
+ */
+
+static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
+{
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
+ } else {
+ /* For N32 and N64, the initial offset is different. But there
+ we also have 8 argument register so we don't run out here. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
+ tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
+ }
+ return i + 1;
+}
+
+static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xff);
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
+{
+ TCGReg tmp = TCG_TMP0;
+ if (arg == 0) {
+ tmp = TCG_REG_ZERO;
+ } else {
+ if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
+ tmp = tcg_target_call_iarg_regs[i];
+ }
+ tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
+ }
+ return tcg_out_call_iarg_reg(s, i, tmp);
+}
+
+static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
+{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
+ i = (i + 1) & ~1;
+ i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
+ i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
+ return i;
+}
+
+/* We expect to use a 16-bit negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
+
+/*
+ * Perform the tlb comparison operation.
+ * The complete host address is placed in BASE.
+ * Clobbers TMP0, TMP1, TMP2, TMP3.
+ */
+static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
+ TCGReg addrh, MemOpIdx oi,
+ tcg_insn_unit *label_ptr[2], bool is_load)
+{
+ MemOp opc = get_memop(oi);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+ int mem_index = get_mmuidx(oi);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ int add_off = offsetof(CPUTLBEntry, addend);
+ int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ target_ulong mask;
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
+
+ /* Extract the TLB index from the address into TMP3. */
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
+
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /* We don't currently support unaligned accesses.
+ We could do so with mips32r6. */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+
+ /* Mask the page bits, keeping the alignment bits to compare against. */
+ mask = (target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+
+ /* Load the (low-half) tlb comparator. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, mask);
+ } else {
+ tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
+ TCG_TMP0, TCG_TMP3, cmp_off);
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, mask);
+ /* No second compare is required here;
+ load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
+ }
+
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addrl);
+ addrl = base;
+ }
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
+
+ label_ptr[0] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
+
+ /* Load and test the high half tlb comparator. */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ /* delay slot */
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
+
+ /* Load the tlb addend for the fast path. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
+
+ label_ptr[1] = s->code_ptr;
+ tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
+ }
+
+ /* delay slot */
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
+ TCGType ext,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addrhi,
+ void *raddr, tcg_insn_unit *label_ptr[2])
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->type = ext;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr[0];
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ label->label_ptr[1] = label_ptr[1];
+ }
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ TCGReg v0;
+ int i;
+
+ /* resolve label address */
+ if (!reloc_pc16(l->label_ptr[0], tgt_rx)
+ || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
+ && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
+ return false;
+ }
+
+ i = 1;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
+ }
+ i = tcg_out_call_iarg_imm(s, i, oi);
+ i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+
+ v0 = l->datalo_reg;
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
+ /* We eliminated V0 from the possible output registers, so it
+ cannot be clobbered here. So we must move V1 first. */
+ if (MIPS_BE) {
+ tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
+ v0 = l->datahi_reg;
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
+ }
+ }
+
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
+ return false;
+ }
+
+ /* delay slot */
+ if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
+ /* we always sign-extend 32-bit loads */
+ tcg_out_opc_sa(s, OPC_SLL, v0, TCG_REG_V0, 0);
+ } else {
+ tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
+ }
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ MemOp s_bits = opc & MO_SIZE;
+ int i;
+
+ /* resolve label address */
+ if (!reloc_pc16(l->label_ptr[0], tgt_rx)
+ || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
+ && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
+ return false;
+ }
+
+ i = 1;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
+ }
+ switch (s_bits) {
+ case MO_8:
+ i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
+ break;
+ case MO_16:
+ i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
+ break;
+ case MO_32:
+ i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+ break;
+ case MO_64:
+ if (TCG_TARGET_REG_BITS == 32) {
+ i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
+ } else {
+ i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+ i = tcg_out_call_iarg_imm(s, i, oi);
+
+ /* Tail call to the store helper. Thus force the return address
+ computation to take place in the return address register. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
+ i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
+ tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ return true;
+}
+#endif
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, MemOp opc, bool is_64)
+{
+ switch (opc & (MO_SSIZE | MO_BSWAP)) {
+ case MO_UB:
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
+ break;
+ case MO_SB:
+ tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
+ break;
+ case MO_UW | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
+ tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+ break;
+ case MO_UW:
+ tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
+ break;
+ case MO_SW | MO_BSWAP:
+ tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
+ tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
+ break;
+ case MO_SW:
+ tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
+ break;
+ case MO_UL | MO_BSWAP:
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL | MO_BSWAP:
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ tcg_out_bswap32(s, lo, lo, 0);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
+ }
+ break;
+ case MO_UL:
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL:
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ break;
+ case MO_Q | MO_BSWAP:
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ tcg_out_bswap64(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
+ }
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
+ } else {
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
+ tcg_out_bswap_subr(s, bswap32_addr);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
+ tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
+ }
+ break;
+ case MO_Q:
+ /* Prefer to load from offset 0 first, but allow for overlap. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ } else if (MIPS_BE ? hi != base : lo == base) {
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ } else {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh;
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[2];
+#endif
+ TCGReg base = TCG_REG_A0;
+
+ data_regl = *args++;
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 1);
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+ add_qemu_ldst_label(s, 1, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
+ if (guest_base == 0 && data_regl != addr_regl) {
+ base = addr_regl;
+ } else if (guest_base == (int16_t)guest_base) {
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
+ } else {
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
+ }
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, MemOp opc)
+{
+ /* Don't clutter the code below with checks to avoid bswapping ZERO. */
+ if ((lo | hi) == 0) {
+ opc &= ~MO_BSWAP;
+ }
+
+ switch (opc & (MO_SIZE | MO_BSWAP)) {
+ case MO_8:
+ tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
+ break;
+
+ case MO_16 | MO_BSWAP:
+ tcg_out_bswap16(s, TCG_TMP1, lo, 0);
+ lo = TCG_TMP1;
+ /* FALLTHRU */
+ case MO_16:
+ tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
+ break;
+
+ case MO_32 | MO_BSWAP:
+ tcg_out_bswap32(s, TCG_TMP3, lo, 0);
+ lo = TCG_TMP3;
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
+ break;
+
+ case MO_64 | MO_BSWAP:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_bswap64(s, TCG_TMP3, lo);
+ tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
+ } else {
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
+ tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
+ }
+ break;
+ case MO_64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
+ } else {
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
+ }
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh;
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[2];
+#endif
+ TCGReg base = TCG_REG_A0;
+
+ data_regl = *args++;
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ tcg_out_tlb_load(s, base, addr_regl, addr_regh, oi, label_ptr, 0);
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+ add_qemu_ldst_label(s, 0, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ base = TCG_REG_A0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
+ if (guest_base == 0) {
+ base = addr_regl;
+ } else if (guest_base == (int16_t)guest_base) {
+ tcg_out_opc_imm(s, ALIAS_PADDI, base, addr_regl, guest_base);
+ } else {
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_GUEST_BASE_REG, addr_regl);
+ }
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+#endif
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ static const MIPSInsn sync[] = {
+ /* Note that SYNC_MB is a slightly weaker than SYNC 0,
+ as the former is an ordering barrier and the latter
+ is a completion barrier. */
+ [0 ... TCG_MO_ALL] = OPC_SYNC_MB,
+ [TCG_MO_LD_LD] = OPC_SYNC_RMB,
+ [TCG_MO_ST_ST] = OPC_SYNC_WMB,
+ [TCG_MO_LD_ST] = OPC_SYNC_RELEASE,
+ [TCG_MO_LD_ST | TCG_MO_ST_ST] = OPC_SYNC_RELEASE,
+ [TCG_MO_LD_ST | TCG_MO_LD_LD] = OPC_SYNC_ACQUIRE,
+ };
+ tcg_out32(s, sync[a0 & TCG_MO_ALL]);
+}
+
+static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6,
+ int width, TCGReg a0, TCGReg a1, TCGArg a2)
+{
+ if (use_mips32r6_instructions) {
+ if (a2 == width) {
+ tcg_out_opc_reg(s, opcv6, a0, a1, 0);
+ } else {
+ tcg_out_opc_reg(s, opcv6, TCG_TMP0, a1, 0);
+ tcg_out_movcond(s, TCG_COND_EQ, a0, a1, 0, a2, TCG_TMP0);
+ }
+ } else {
+ if (a2 == width) {
+ tcg_out_opc_reg(s, opcv2, a0, a1, a1);
+ } else if (a0 == a2) {
+ tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVN, a0, TCG_TMP0, a1);
+ } else if (a0 != a1) {
+ tcg_out_opc_reg(s, opcv2, a0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVZ, a0, a2, a1);
+ } else {
+ tcg_out_opc_reg(s, opcv2, TCG_TMP0, a1, a1);
+ tcg_out_opc_reg(s, OPC_MOVZ, TCG_TMP0, a2, a1);
+ tcg_out_mov(s, TCG_TYPE_REG, a0, TCG_TMP0);
+ }
+ }
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ MIPSInsn i1, i2;
+ TCGArg a0, a1, a2;
+ int c2;
+
+ /*
+ * Note that many operands use the constraint set "rZ".
+ * We make use of the fact that 0 is the ZERO register,
+ * and hence such cases need not check for const_args.
+ */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ {
+ TCGReg b0 = TCG_REG_ZERO;
+
+ a0 = (intptr_t)a0;
+ if (a0 & ~0xffff) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff);
+ b0 = TCG_REG_V0;
+ }
+ if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
+ (uintptr_t)tb_ret_addr);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
+ }
+ tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff);
+ }
+ break;
+ case INDEX_op_goto_tb:
+ /* indirect jump method */
+ tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO,
+ (uintptr_t)(s->tb_jmp_target_addr + a0));
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
+ tcg_out_nop(s);
+ set_jmp_reset_offset(s, a0);
+ break;
+ case INDEX_op_goto_ptr:
+ /* jmp to the given host address (could be epilogue) */
+ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0);
+ tcg_out_nop(s);
+ break;
+ case INDEX_op_br:
+ tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO,
+ arg_label(a0));
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ i1 = OPC_LBU;
+ goto do_ldst;
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ i1 = OPC_LB;
+ goto do_ldst;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ i1 = OPC_LHU;
+ goto do_ldst;
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ i1 = OPC_LH;
+ goto do_ldst;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32s_i64:
+ i1 = OPC_LW;
+ goto do_ldst;
+ case INDEX_op_ld32u_i64:
+ i1 = OPC_LWU;
+ goto do_ldst;
+ case INDEX_op_ld_i64:
+ i1 = OPC_LD;
+ goto do_ldst;
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ i1 = OPC_SB;
+ goto do_ldst;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ i1 = OPC_SH;
+ goto do_ldst;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ i1 = OPC_SW;
+ goto do_ldst;
+ case INDEX_op_st_i64:
+ i1 = OPC_SD;
+ do_ldst:
+ tcg_out_ldst(s, i1, a0, a1, a2);
+ break;
+
+ case INDEX_op_add_i32:
+ i1 = OPC_ADDU, i2 = OPC_ADDIU;
+ goto do_binary;
+ case INDEX_op_add_i64:
+ i1 = OPC_DADDU, i2 = OPC_DADDIU;
+ goto do_binary;
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ i1 = OPC_OR, i2 = OPC_ORI;
+ goto do_binary;
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ i1 = OPC_XOR, i2 = OPC_XORI;
+ do_binary:
+ if (c2) {
+ tcg_out_opc_imm(s, i2, a0, a1, a2);
+ break;
+ }
+ do_binaryv:
+ tcg_out_opc_reg(s, i1, a0, a1, a2);
+ break;
+
+ case INDEX_op_sub_i32:
+ i1 = OPC_SUBU, i2 = OPC_ADDIU;
+ goto do_subtract;
+ case INDEX_op_sub_i64:
+ i1 = OPC_DSUBU, i2 = OPC_DADDIU;
+ do_subtract:
+ if (c2) {
+ tcg_out_opc_imm(s, i2, a0, a1, -a2);
+ break;
+ }
+ goto do_binaryv;
+ case INDEX_op_and_i32:
+ if (c2 && a2 != (uint16_t)a2) {
+ int msb = ctz32(~a2) - 1;
+ tcg_debug_assert(use_mips32r2_instructions);
+ tcg_debug_assert(is_p2m1(a2));
+ tcg_out_opc_bf(s, OPC_EXT, a0, a1, msb, 0);
+ break;
+ }
+ i1 = OPC_AND, i2 = OPC_ANDI;
+ goto do_binary;
+ case INDEX_op_and_i64:
+ if (c2 && a2 != (uint16_t)a2) {
+ int msb = ctz64(~a2) - 1;
+ tcg_debug_assert(use_mips32r2_instructions);
+ tcg_debug_assert(is_p2m1(a2));
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1, msb, 0);
+ break;
+ }
+ i1 = OPC_AND, i2 = OPC_ANDI;
+ goto do_binary;
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ i1 = OPC_NOR;
+ goto do_binaryv;
+
+ case INDEX_op_mul_i32:
+ if (use_mips32_instructions) {
+ tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_MULT, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_mulsh_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_MUH, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_MULT, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_muluh_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_MUHU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_MULTU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_div_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DIV_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DIV, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_divu_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DIVU_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DIVU, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_rem_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_MOD, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DIV, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_remu_i32:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_MODU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DIVU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_mul_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUL, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_mulsh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUH, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULT, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_muluh_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMUHU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DMULTU, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_div_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIV_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_divu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DDIVU_R6, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFLO;
+ goto do_hilo1;
+ case INDEX_op_rem_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMOD, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIV, i2 = OPC_MFHI;
+ goto do_hilo1;
+ case INDEX_op_remu_i64:
+ if (use_mips32r6_instructions) {
+ tcg_out_opc_reg(s, OPC_DMODU, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DDIVU, i2 = OPC_MFHI;
+ do_hilo1:
+ tcg_out_opc_reg(s, i1, 0, a1, a2);
+ tcg_out_opc_reg(s, i2, a0, 0, 0);
+ break;
+
+ case INDEX_op_muls2_i32:
+ i1 = OPC_MULT;
+ goto do_hilo2;
+ case INDEX_op_mulu2_i32:
+ i1 = OPC_MULTU;
+ goto do_hilo2;
+ case INDEX_op_muls2_i64:
+ i1 = OPC_DMULT;
+ goto do_hilo2;
+ case INDEX_op_mulu2_i64:
+ i1 = OPC_DMULTU;
+ do_hilo2:
+ tcg_out_opc_reg(s, i1, 0, a2, args[3]);
+ tcg_out_opc_reg(s, OPC_MFLO, a0, 0, 0);
+ tcg_out_opc_reg(s, OPC_MFHI, a1, 0, 0);
+ break;
+
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ i1 = OPC_NOR;
+ goto do_unary;
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ i1 = OPC_SEB;
+ goto do_unary;
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ i1 = OPC_SEH;
+ do_unary:
+ tcg_out_opc_reg(s, i1, a0, TCG_REG_ZERO, a1);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ tcg_out_bswap16(s, a0, a1, a2);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, a0, a1, 0);
+ break;
+ case INDEX_op_bswap32_i64:
+ tcg_out_bswap32(s, a0, a1, a2);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, a0, a1);
+ break;
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_dsra(s, a0, a1, 32);
+ break;
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ tcg_out_opc_sa(s, OPC_SLL, a0, a1, 0);
+ break;
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ tcg_out_ext32u(s, a0, a1);
+ break;
+
+ case INDEX_op_sar_i32:
+ i1 = OPC_SRAV, i2 = OPC_SRA;
+ goto do_shift;
+ case INDEX_op_shl_i32:
+ i1 = OPC_SLLV, i2 = OPC_SLL;
+ goto do_shift;
+ case INDEX_op_shr_i32:
+ i1 = OPC_SRLV, i2 = OPC_SRL;
+ goto do_shift;
+ case INDEX_op_rotr_i32:
+ i1 = OPC_ROTRV, i2 = OPC_ROTR;
+ do_shift:
+ if (c2) {
+ tcg_out_opc_sa(s, i2, a0, a1, a2);
+ break;
+ }
+ do_shiftv:
+ tcg_out_opc_reg(s, i1, a0, a2, a1);
+ break;
+ case INDEX_op_rotl_i32:
+ if (c2) {
+ tcg_out_opc_sa(s, OPC_ROTR, a0, a1, 32 - a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_SUBU, TCG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_reg(s, OPC_ROTRV, a0, TCG_TMP0, a1);
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (c2) {
+ tcg_out_dsra(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRAV;
+ goto do_shiftv;
+ case INDEX_op_shl_i64:
+ if (c2) {
+ tcg_out_dsll(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSLLV;
+ goto do_shiftv;
+ case INDEX_op_shr_i64:
+ if (c2) {
+ tcg_out_dsrl(s, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DSRLV;
+ goto do_shiftv;
+ case INDEX_op_rotr_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, a2);
+ break;
+ }
+ i1 = OPC_DROTRV;
+ goto do_shiftv;
+ case INDEX_op_rotl_i64:
+ if (c2) {
+ tcg_out_opc_sa64(s, OPC_DROTR, OPC_DROTR32, a0, a1, 64 - a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_DSUBU, TCG_TMP0, TCG_REG_ZERO, a2);
+ tcg_out_opc_reg(s, OPC_DROTRV, a0, TCG_TMP0, a1);
+ }
+ break;
+
+ case INDEX_op_clz_i32:
+ tcg_out_clz(s, OPC_CLZ, OPC_CLZ_R6, 32, a0, a1, a2);
+ break;
+ case INDEX_op_clz_i64:
+ tcg_out_clz(s, OPC_DCLZ, OPC_DCLZ_R6, 64, a0, a1, a2);
+ break;
+
+ case INDEX_op_deposit_i32:
+ tcg_out_opc_bf(s, OPC_INS, a0, a2, args[3] + args[4] - 1, args[3]);
+ break;
+ case INDEX_op_deposit_i64:
+ tcg_out_opc_bf64(s, OPC_DINS, OPC_DINSM, OPC_DINSU, a0, a2,
+ args[3] + args[4] - 1, args[3]);
+ break;
+ case INDEX_op_extract_i32:
+ tcg_out_opc_bf(s, OPC_EXT, a0, a1, args[3] - 1, a2);
+ break;
+ case INDEX_op_extract_i64:
+ tcg_out_opc_bf64(s, OPC_DEXT, OPC_DEXTM, OPC_DEXTU, a0, a1,
+ args[3] - 1, a2);
+ break;
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5]));
+ break;
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], a0, a1, a2, args[3], args[4]);
+ break;
+
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], a0, a1, a2);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, false);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, true);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], true);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, a0);
+ break;
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_not_i32:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_extract_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_not_i64:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extrh_i64_i32:
+ case INDEX_op_extract_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(rZ, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ return C_O1_I2(r, r, rJ);
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ return C_O1_I2(r, rZ, rN);
+ case INDEX_op_mul_i32:
+ case INDEX_op_mulsh_i32:
+ case INDEX_op_muluh_i32:
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ case INDEX_op_rem_i32:
+ case INDEX_op_remu_i32:
+ case INDEX_op_nor_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_mulsh_i64:
+ case INDEX_op_muluh_i64:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i64:
+ case INDEX_op_rem_i64:
+ case INDEX_op_remu_i64:
+ case INDEX_op_nor_i64:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(r, rZ, rZ);
+ case INDEX_op_muls2_i32:
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_muls2_i64:
+ case INDEX_op_mulu2_i64:
+ return C_O2_I2(r, r, r, r);
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ return C_O1_I2(r, r, rIK);
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
+ return C_O1_I2(r, r, rI);
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotr_i64:
+ case INDEX_op_rotl_i64:
+ return C_O1_I2(r, r, ri);
+ case INDEX_op_clz_i32:
+ case INDEX_op_clz_i64:
+ return C_O1_I2(r, r, rWZ);
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ return C_O1_I2(r, 0, rZ);
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(rZ, rZ);
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ return (use_mips32r6_instructions
+ ? C_O1_I4(r, rZ, rZ, rZ, rZ)
+ : C_O1_I4(r, rZ, rZ, rZ, 0));
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ return C_O2_I4(r, r, rZ, rZ, rN, rN);
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, rZ, rZ, rZ, rZ);
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(rZ, rZ, rZ, rZ);
+
+ case INDEX_op_qemu_ld_i32:
+ return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
+ case INDEX_op_qemu_st_i32:
+ return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
+ ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
+ : C_O2_I2(r, r, L, L));
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
+ : C_O0_I4(SZ, SZ, S, S));
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static const int tcg_target_callee_save_regs[] = {
+ TCG_REG_S0, /* used for the global env (TCG_AREG0) */
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+ TCG_REG_RA, /* should be last for ABI compliance */
+};
+
+/* The Linux kernel doesn't provide any information about the available
+ instruction set. Probe it using a signal handler. */
+
+
+#ifndef use_movnz_instructions
+bool use_movnz_instructions = false;
+#endif
+
+#ifndef use_mips32_instructions
+bool use_mips32_instructions = false;
+#endif
+
+#ifndef use_mips32r2_instructions
+bool use_mips32r2_instructions = false;
+#endif
+
+static volatile sig_atomic_t got_sigill;
+
+static void sigill_handler(int signo, siginfo_t *si, void *data)
+{
+ /* Skip the faulty instruction */
+ ucontext_t *uc = (ucontext_t *)data;
+ uc->uc_mcontext.pc += 4;
+
+ got_sigill = 1;
+}
+
+static void tcg_target_detect_isa(void)
+{
+ struct sigaction sa_old, sa_new;
+
+ memset(&sa_new, 0, sizeof(sa_new));
+ sa_new.sa_flags = SA_SIGINFO;
+ sa_new.sa_sigaction = sigill_handler;
+ sigaction(SIGILL, &sa_new, &sa_old);
+
+ /* Probe for movn/movz, necessary to implement movcond. */
+#ifndef use_movnz_instructions
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32\n"
+ "movn $zero, $zero, $zero\n"
+ "movz $zero, $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_movnz_instructions = !got_sigill;
+#endif
+
+ /* Probe for MIPS32 instructions. As no subsetting is allowed
+ by the specification, it is only necessary to probe for one
+ of the instructions. */
+#ifndef use_mips32_instructions
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32\n"
+ "mul $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_mips32_instructions = !got_sigill;
+#endif
+
+ /* Probe for MIPS32r2 instructions if MIPS32 instructions are
+ available. As no subsetting is allowed by the specification,
+ it is only necessary to probe for one of the instructions. */
+#ifndef use_mips32r2_instructions
+ if (use_mips32_instructions) {
+ got_sigill = 0;
+ asm volatile(".set push\n"
+ ".set mips32r2\n"
+ "seb $zero, $zero\n"
+ ".set pop\n"
+ : : : );
+ use_mips32r2_instructions = !got_sigill;
+ }
+#endif
+
+ sigaction(SIGILL, &sa_old, NULL);
+}
+
+static tcg_insn_unit *align_code_ptr(TCGContext *s)
+{
+ uintptr_t p = (uintptr_t)s->code_ptr;
+ if (p & 15) {
+ p = (p + 15) & -16;
+ s->code_ptr = (void *)p;
+ }
+ return s->code_ptr;
+}
+
+/* Stack frame parameters. */
+#define REG_SIZE (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff);
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i;
+
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
+
+ /* TB prologue */
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+ }
+
+#ifndef CONFIG_SOFTMMU
+ if (guest_base) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ /* Call generated code */
+ tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0);
+ /* delay slot */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_V0, TCG_REG_ZERO);
+
+ /* TB epilogue */
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+ }
+
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* delay slot */
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+
+ if (use_mips32r2_instructions) {
+ return;
+ }
+
+ /* Bswap subroutines: Input in TCG_TMP0, output in TCG_TMP3;
+ clobbers TCG_TMP1, TCG_TMP2. */
+
+ /*
+ * bswap32 -- 32-bit swap (signed result for mips64). a0 = abcd.
+ */
+ bswap32_addr = tcg_splitwx_to_rx(align_code_ptr(s));
+ /* t3 = (ssss)d000 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ return;
+ }
+
+ /*
+ * bswap32u -- unsigned 32-bit swap. a0 = ....abcd.
+ */
+ bswap32u_addr = tcg_splitwx_to_rx(align_code_ptr(s));
+ /* t1 = (0000)000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff);
+ /* t3 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, TCG_TMP0, 24);
+ /* t1 = (0000)d000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+ /* t2 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_TMP0, 8);
+ /* t2 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP2, TCG_TMP2, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t3 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /*
+ * bswap64 -- 64-bit swap. a0 = abcdefgh
+ */
+ bswap64_addr = tcg_splitwx_to_rx(align_code_ptr(s));
+ /* t3 = h0000000 */
+ tcg_out_dsll(s, TCG_TMP3, TCG_TMP0, 56);
+ /* t1 = 0000000a */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 56);
+
+ /* t2 = 000000g0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP0, 0xff00);
+ /* t3 = h000000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00000abc */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 40);
+ /* t2 = 0g000000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000000b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+
+ /* t3 = hg00000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ /* t2 = 0000abcd */
+ tcg_out_dsrl(s, TCG_TMP2, TCG_TMP0, 32);
+ /* t3 = hg0000ba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+
+ /* t1 = 000000c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP2, 0xff00);
+ /* t2 = 0000000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP2, 0x00ff);
+ /* t1 = 00000c00 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
+ /* t2 = 0000d000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 24);
+
+ /* t3 = hg000cba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+ /* t1 = 00abcdef */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_TMP0, 16);
+ /* t3 = hg00dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+
+ /* t2 = 0000000f */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP2, TCG_TMP1, 0x00ff);
+ /* t1 = 000000e0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t2 = 00f00000 */
+ tcg_out_dsll(s, TCG_TMP2, TCG_TMP2, 40);
+ /* t1 = 000e0000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+
+ /* t3 = hgf0dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP2);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* t3 = hgfedcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_TMP3, TCG_TMP3, TCG_TMP1);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_target_detect_isa();
+ tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
+ }
+
+ tcg_target_call_clobber_regs = 0;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_A0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_A1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_A2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_A3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_T9);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO); /* zero register */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K0); /* kernel use only */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_K1); /* kernel use only */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP0); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP1); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP2); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP3); /* internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_MIPS
+/* GDB doesn't appear to require proper setting of ELF_HOST_FLAGS,
+ which is good because they're really quite complicated for MIPS. */
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+ .h.cie.return_column = TCG_REG_RA,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 16, 9, /* DW_CFA_offset, s0, -72 */
+ 0x80 + 17, 8, /* DW_CFA_offset, s2, -64 */
+ 0x80 + 18, 7, /* DW_CFA_offset, s3, -56 */
+ 0x80 + 19, 6, /* DW_CFA_offset, s4, -48 */
+ 0x80 + 20, 5, /* DW_CFA_offset, s5, -40 */
+ 0x80 + 21, 4, /* DW_CFA_offset, s6, -32 */
+ 0x80 + 22, 3, /* DW_CFA_offset, s7, -24 */
+ 0x80 + 30, 2, /* DW_CFA_offset, s8, -16 */
+ 0x80 + 31, 1, /* DW_CFA_offset, ra, -8 */
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
new file mode 100644
index 000000000..c366fdf74
--- /dev/null
+++ b/tcg/mips/tcg-target.h
@@ -0,0 +1,214 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Based on i386/tcg-target.c - Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIPS_TCG_TARGET_H
+#define MIPS_TCG_TARGET_H
+
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_REG_BITS 32
+#elif _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64
+# define TCG_TARGET_REG_BITS 64
+#else
+# error "Unknown ABI"
+#endif
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_NB_REGS 32
+
+#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
+
+typedef enum {
+ TCG_REG_ZERO = 0,
+ TCG_REG_AT,
+ TCG_REG_V0,
+ TCG_REG_V1,
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+ TCG_REG_T7,
+ TCG_REG_S0,
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_T8,
+ TCG_REG_T9,
+ TCG_REG_K0,
+ TCG_REG_K1,
+ TCG_REG_GP,
+ TCG_REG_SP,
+ TCG_REG_S8,
+ TCG_REG_RA,
+
+ TCG_REG_CALL_STACK = TCG_REG_SP,
+ TCG_AREG0 = TCG_REG_S0,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN 16
+#if _MIPS_SIM == _ABIO32
+# define TCG_TARGET_CALL_STACK_OFFSET 16
+#else
+# define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+
+/* MOVN/MOVZ instructions detection */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+ defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+ defined(_MIPS_ARCH_MIPS4)
+#define use_movnz_instructions 1
+#else
+extern bool use_movnz_instructions;
+#endif
+
+/* MIPS32 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+#define use_mips32_instructions 1
+#else
+extern bool use_mips32_instructions;
+#endif
+
+/* MIPS32R2 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define use_mips32r2_instructions 1
+#else
+extern bool use_mips32r2_instructions;
+#endif
+
+/* MIPS32R6 instruction set detection */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#define use_mips32r6_instructions 1
+#else
+#define use_mips32r6_instructions 0
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i32 1
+#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_direct_jump 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 1
+#define TCG_TARGET_HAS_extrh_i64_i32 1
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_add2_i64 0
+#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muls2_i64 (!use_mips32r6_instructions)
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#endif
+
+/* optional instructions detected at runtime */
+#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions
+#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions
+#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_extract_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_rot_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_clz_i64 use_mips32r2_instructions
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#endif
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
+#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_neg_i64 0 /* sub rd, zero, rt */
+#define TCG_TARGET_HAS_ext8u_i64 0 /* andi rt, rs, 0xff */
+#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
+#endif
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t)
+ QEMU_ERROR("code path is reachable");
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+
+#endif
diff --git a/tcg/optimize.c b/tcg/optimize.c
new file mode 100644
index 000000000..2397f2cf9
--- /dev/null
+++ b/tcg/optimize.c
@@ -0,0 +1,2213 @@
+/*
+ * Optimizations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2010 Samsung Electronics.
+ * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/int128.h"
+#include "tcg/tcg-op.h"
+#include "tcg-internal.h"
+
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+
+#define CASE_OP_32_64_VEC(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64): \
+ glue(glue(case INDEX_op_, x), _vec)
+
+typedef struct TempOptInfo {
+ bool is_const;
+ TCGTemp *prev_copy;
+ TCGTemp *next_copy;
+ uint64_t val;
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
+} TempOptInfo;
+
+typedef struct OptContext {
+ TCGContext *tcg;
+ TCGOp *prev_mb;
+ TCGTempSet temps_used;
+
+ /* In flight values from optimization. */
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
+ uint64_t s_mask; /* mask of clrsb(value) bits */
+ TCGType type;
+} OptContext;
+
+/* Calculate the smask for a specific value. */
+static uint64_t smask_from_value(uint64_t value)
+{
+ int rep = clrsb64(value);
+ return ~(~0ull >> rep);
+}
+
+/*
+ * Calculate the smask for a given set of known-zeros.
+ * If there are lots of zeros on the left, we can consider the remainder
+ * an unsigned field, and thus the corresponding signed field is one bit
+ * larger.
+ */
+static uint64_t smask_from_zmask(uint64_t zmask)
+{
+ /*
+ * Only the 0 bits are significant for zmask, thus the msb itself
+ * must be zero, else we have no sign information.
+ */
+ int rep = clz64(zmask);
+ if (rep == 0) {
+ return 0;
+ }
+ rep -= 1;
+ return ~(~0ull >> rep);
+}
+
+/*
+ * Recreate a properly left-aligned smask after manipulation.
+ * Some bit-shuffling, particularly shifts and rotates, may
+ * retain sign bits on the left, but may scatter disconnected
+ * sign bits on the right. Retain only what remains to the left.
+ */
+static uint64_t smask_from_smask(int64_t smask)
+{
+ /* Only the 1 bits are significant for smask */
+ return smask_from_zmask(~smask);
+}
+
+static inline TempOptInfo *ts_info(TCGTemp *ts)
+{
+ return ts->state_ptr;
+}
+
+static inline TempOptInfo *arg_info(TCGArg arg)
+{
+ return ts_info(arg_temp(arg));
+}
+
+static inline bool ts_is_const(TCGTemp *ts)
+{
+ return ts_info(ts)->is_const;
+}
+
+static inline bool arg_is_const(TCGArg arg)
+{
+ return ts_is_const(arg_temp(arg));
+}
+
+static inline bool ts_is_copy(TCGTemp *ts)
+{
+ return ts_info(ts)->next_copy != ts;
+}
+
+/* Reset TEMP's state, possibly removing the temp for the list of copies. */
+static void reset_ts(TCGTemp *ts)
+{
+ TempOptInfo *ti = ts_info(ts);
+ TempOptInfo *pi = ts_info(ti->prev_copy);
+ TempOptInfo *ni = ts_info(ti->next_copy);
+
+ ni->prev_copy = ti->prev_copy;
+ pi->next_copy = ti->next_copy;
+ ti->next_copy = ts;
+ ti->prev_copy = ts;
+ ti->is_const = false;
+ ti->z_mask = -1;
+ ti->s_mask = 0;
+}
+
+static void reset_temp(TCGArg arg)
+{
+ reset_ts(arg_temp(arg));
+}
+
+/* Initialize and activate a temporary. */
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
+{
+ size_t idx = temp_idx(ts);
+ TempOptInfo *ti;
+
+ if (test_bit(idx, ctx->temps_used.l)) {
+ return;
+ }
+ set_bit(idx, ctx->temps_used.l);
+
+ ti = ts->state_ptr;
+ if (ti == NULL) {
+ ti = tcg_malloc(sizeof(TempOptInfo));
+ ts->state_ptr = ti;
+ }
+
+ ti->next_copy = ts;
+ ti->prev_copy = ts;
+ if (ts->kind == TEMP_CONST) {
+ ti->is_const = true;
+ ti->val = ts->val;
+ ti->z_mask = ts->val;
+ ti->s_mask = smask_from_value(ts->val);
+ } else {
+ ti->is_const = false;
+ ti->z_mask = -1;
+ ti->s_mask = 0;
+ }
+}
+
+static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
+{
+ TCGTemp *i, *g, *l;
+
+ /* If this is already readonly, we can't do better. */
+ if (temp_readonly(ts)) {
+ return ts;
+ }
+
+ g = l = NULL;
+ for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
+ if (temp_readonly(i)) {
+ return i;
+ } else if (i->kind > ts->kind) {
+ if (i->kind == TEMP_GLOBAL) {
+ g = i;
+ } else if (i->kind == TEMP_LOCAL) {
+ l = i;
+ }
+ }
+ }
+
+ /* If we didn't find a better representation, return the same temp. */
+ return g ? g : l ? l : ts;
+}
+
+static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
+{
+ TCGTemp *i;
+
+ if (ts1 == ts2) {
+ return true;
+ }
+
+ if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
+ return false;
+ }
+
+ for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
+ if (i == ts2) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool args_are_copies(TCGArg arg1, TCGArg arg2)
+{
+ return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
+}
+
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
+{
+ TCGTemp *dst_ts = arg_temp(dst);
+ TCGTemp *src_ts = arg_temp(src);
+ TempOptInfo *di;
+ TempOptInfo *si;
+ TCGOpcode new_op;
+
+ if (ts_are_copies(dst_ts, src_ts)) {
+ tcg_op_remove(ctx->tcg, op);
+ return true;
+ }
+
+ reset_ts(dst_ts);
+ di = ts_info(dst_ts);
+ si = ts_info(src_ts);
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ new_op = INDEX_op_mov_i32;
+ break;
+ case TCG_TYPE_I64:
+ new_op = INDEX_op_mov_i64;
+ break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
+ new_op = INDEX_op_mov_vec;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ op->opc = new_op;
+ op->args[0] = dst;
+ op->args[1] = src;
+
+ di->z_mask = si->z_mask;
+ di->s_mask = si->s_mask;
+
+ if (src_ts->type == dst_ts->type) {
+ TempOptInfo *ni = ts_info(si->next_copy);
+
+ di->next_copy = si->next_copy;
+ di->prev_copy = src_ts;
+ ni->prev_copy = dst_ts;
+ si->next_copy = dst_ts;
+ di->is_const = si->is_const;
+ di->val = si->val;
+ }
+ return true;
+}
+
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
+ TCGArg dst, uint64_t val)
+{
+ TCGTemp *tv;
+
+ if (ctx->type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ /* Convert movi to mov with constant temp. */
+ tv = tcg_constant_internal(ctx->type, val);
+ init_ts_info(ctx, tv);
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
+}
+
+static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
+{
+ uint64_t l64, h64;
+
+ switch (op) {
+ CASE_OP_32_64(add):
+ return x + y;
+
+ CASE_OP_32_64(sub):
+ return x - y;
+
+ CASE_OP_32_64(mul):
+ return x * y;
+
+ CASE_OP_32_64(and):
+ return x & y;
+
+ CASE_OP_32_64(or):
+ return x | y;
+
+ CASE_OP_32_64(xor):
+ return x ^ y;
+
+ case INDEX_op_shl_i32:
+ return (uint32_t)x << (y & 31);
+
+ case INDEX_op_shl_i64:
+ return (uint64_t)x << (y & 63);
+
+ case INDEX_op_shr_i32:
+ return (uint32_t)x >> (y & 31);
+
+ case INDEX_op_shr_i64:
+ return (uint64_t)x >> (y & 63);
+
+ case INDEX_op_sar_i32:
+ return (int32_t)x >> (y & 31);
+
+ case INDEX_op_sar_i64:
+ return (int64_t)x >> (y & 63);
+
+ case INDEX_op_rotr_i32:
+ return ror32(x, y & 31);
+
+ case INDEX_op_rotr_i64:
+ return ror64(x, y & 63);
+
+ case INDEX_op_rotl_i32:
+ return rol32(x, y & 31);
+
+ case INDEX_op_rotl_i64:
+ return rol64(x, y & 63);
+
+ CASE_OP_32_64(not):
+ return ~x;
+
+ CASE_OP_32_64(neg):
+ return -x;
+
+ CASE_OP_32_64(andc):
+ return x & ~y;
+
+ CASE_OP_32_64(orc):
+ return x | ~y;
+
+ CASE_OP_32_64(eqv):
+ return ~(x ^ y);
+
+ CASE_OP_32_64(nand):
+ return ~(x & y);
+
+ CASE_OP_32_64(nor):
+ return ~(x | y);
+
+ case INDEX_op_clz_i32:
+ return (uint32_t)x ? clz32(x) : y;
+
+ case INDEX_op_clz_i64:
+ return x ? clz64(x) : y;
+
+ case INDEX_op_ctz_i32:
+ return (uint32_t)x ? ctz32(x) : y;
+
+ case INDEX_op_ctz_i64:
+ return x ? ctz64(x) : y;
+
+ case INDEX_op_ctpop_i32:
+ return ctpop32(x);
+
+ case INDEX_op_ctpop_i64:
+ return ctpop64(x);
+
+ CASE_OP_32_64(ext8s):
+ return (int8_t)x;
+
+ CASE_OP_32_64(ext16s):
+ return (int16_t)x;
+
+ CASE_OP_32_64(ext8u):
+ return (uint8_t)x;
+
+ CASE_OP_32_64(ext16u):
+ return (uint16_t)x;
+
+ CASE_OP_32_64(bswap16):
+ x = bswap16(x);
+ return y & TCG_BSWAP_OS ? (int16_t)x : x;
+
+ CASE_OP_32_64(bswap32):
+ x = bswap32(x);
+ return y & TCG_BSWAP_OS ? (int32_t)x : x;
+
+ case INDEX_op_bswap64_i64:
+ return bswap64(x);
+
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ return (int32_t)x;
+
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_ext32u_i64:
+ return (uint32_t)x;
+
+ case INDEX_op_extrh_i64_i32:
+ return (uint64_t)x >> 32;
+
+ case INDEX_op_muluh_i32:
+ return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
+ case INDEX_op_mulsh_i32:
+ return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
+
+ case INDEX_op_muluh_i64:
+ mulu64(&l64, &h64, x, y);
+ return h64;
+ case INDEX_op_mulsh_i64:
+ muls64(&l64, &h64, x, y);
+ return h64;
+
+ case INDEX_op_div_i32:
+ /* Avoid crashing on divide by zero, otherwise undefined. */
+ return (int32_t)x / ((int32_t)y ? : 1);
+ case INDEX_op_divu_i32:
+ return (uint32_t)x / ((uint32_t)y ? : 1);
+ case INDEX_op_div_i64:
+ return (int64_t)x / ((int64_t)y ? : 1);
+ case INDEX_op_divu_i64:
+ return (uint64_t)x / ((uint64_t)y ? : 1);
+
+ case INDEX_op_rem_i32:
+ return (int32_t)x % ((int32_t)y ? : 1);
+ case INDEX_op_remu_i32:
+ return (uint32_t)x % ((uint32_t)y ? : 1);
+ case INDEX_op_rem_i64:
+ return (int64_t)x % ((int64_t)y ? : 1);
+ case INDEX_op_remu_i64:
+ return (uint64_t)x % ((uint64_t)y ? : 1);
+
+ default:
+ fprintf(stderr,
+ "Unrecognized operation %d in do_constant_folding.\n", op);
+ tcg_abort();
+ }
+}
+
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
+ uint64_t x, uint64_t y)
+{
+ uint64_t res = do_constant_folding_2(op, x, y);
+ if (type == TCG_TYPE_I32) {
+ res = (int32_t)res;
+ }
+ return res;
+}
+
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int32_t)x < (int32_t)y;
+ case TCG_COND_GE:
+ return (int32_t)x >= (int32_t)y;
+ case TCG_COND_LE:
+ return (int32_t)x <= (int32_t)y;
+ case TCG_COND_GT:
+ return (int32_t)x > (int32_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int64_t)x < (int64_t)y;
+ case TCG_COND_GE:
+ return (int64_t)x >= (int64_t)y;
+ case TCG_COND_LE:
+ return (int64_t)x <= (int64_t)y;
+ case TCG_COND_GT:
+ return (int64_t)x > (int64_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GT:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ case TCG_COND_GTU:
+ case TCG_COND_NE:
+ return 0;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_EQ:
+ return 1;
+ default:
+ tcg_abort();
+ }
+}
+
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond(TCGType type, TCGArg x,
+ TCGArg y, TCGCond c)
+{
+ uint64_t xv = arg_info(x)->val;
+ uint64_t yv = arg_info(y)->val;
+
+ if (arg_is_const(x) && arg_is_const(y)) {
+ switch (type) {
+ case TCG_TYPE_I32:
+ return do_constant_folding_cond_32(xv, yv, c);
+ case TCG_TYPE_I64:
+ return do_constant_folding_cond_64(xv, yv, c);
+ default:
+ /* Only scalar comparisons are optimizable */
+ return -1;
+ }
+ } else if (args_are_copies(x, y)) {
+ return do_constant_folding_cond_eq(c);
+ } else if (arg_is_const(y) && yv == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/*
+ * Return -1 if the condition can't be simplified,
+ * and the result of the condition (0 or 1) if it can.
+ */
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+ TCGArg al = p1[0], ah = p1[1];
+ TCGArg bl = p2[0], bh = p2[1];
+
+ if (arg_is_const(bl) && arg_is_const(bh)) {
+ tcg_target_ulong blv = arg_info(bl)->val;
+ tcg_target_ulong bhv = arg_info(bh)->val;
+ uint64_t b = deposit64(blv, 32, 32, bhv);
+
+ if (arg_is_const(al) && arg_is_const(ah)) {
+ tcg_target_ulong alv = arg_info(al)->val;
+ tcg_target_ulong ahv = arg_info(ah)->val;
+ uint64_t a = deposit64(alv, 32, 32, ahv);
+ return do_constant_folding_cond_64(a, b, c);
+ }
+ if (b == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+ if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
+ return do_constant_folding_cond_eq(c);
+ }
+ return -1;
+}
+
+/**
+ * swap_commutative:
+ * @dest: TCGArg of the destination argument, or NO_DEST.
+ * @p1: first paired argument
+ * @p2: second paired argument
+ *
+ * If *@p1 is a constant and *@p2 is not, swap.
+ * If *@p2 matches @dest, swap.
+ * Return true if a swap was performed.
+ */
+
+#define NO_DEST temp_arg(NULL)
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+ TCGArg a1 = *p1, a2 = *p2;
+ int sum = 0;
+ sum += arg_is_const(a1);
+ sum -= arg_is_const(a2);
+
+ /* Prefer the constant in second argument, and then the form
+ op a, a, b, which is better handled on non-RISC hosts. */
+ if (sum > 0 || (sum == 0 && dest == a2)) {
+ *p1 = a2;
+ *p2 = a1;
+ return true;
+ }
+ return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+ int sum = 0;
+ sum += arg_is_const(p1[0]);
+ sum += arg_is_const(p1[1]);
+ sum -= arg_is_const(p2[0]);
+ sum -= arg_is_const(p2[1]);
+ if (sum > 0) {
+ TCGArg t;
+ t = p1[0], p1[0] = p2[0], p2[0] = t;
+ t = p1[1], p1[1] = p2[1], p2[1] = t;
+ return true;
+ }
+ return false;
+}
+
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
+{
+ for (int i = 0; i < nb_args; i++) {
+ TCGTemp *ts = arg_temp(op->args[i]);
+ if (ts) {
+ init_ts_info(ctx, ts);
+ }
+ }
+}
+
+static void copy_propagate(OptContext *ctx, TCGOp *op,
+ int nb_oargs, int nb_iargs)
+{
+ TCGContext *s = ctx->tcg;
+
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ TCGTemp *ts = arg_temp(op->args[i]);
+ if (ts && ts_is_copy(ts)) {
+ op->args[i] = temp_arg(find_better_copy(s, ts));
+ }
+ }
+}
+
+static void finish_folding(OptContext *ctx, TCGOp *op)
+{
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
+ int i, nb_oargs;
+
+ /*
+ * For an opcode that ends a BB, reset all temp data.
+ * We do no cross-BB optimization.
+ */
+ if (def->flags & TCG_OPF_BB_END) {
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
+ ctx->prev_mb = NULL;
+ return;
+ }
+
+ nb_oargs = def->nb_oargs;
+ for (i = 0; i < nb_oargs; i++) {
+ TCGTemp *ts = arg_temp(op->args[i]);
+ reset_ts(ts);
+ /*
+ * Save the corresponding known-zero/sign bits mask for the
+ * first output argument (only one supported so far).
+ */
+ if (i == 0) {
+ ts_info(ts)->z_mask = ctx->z_mask;
+ ts_info(ts)->s_mask = ctx->s_mask;
+ }
+ }
+}
+
+/*
+ * The fold_* functions return true when processing is complete,
+ * usually by folding the operation to a constant or to a copy,
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
+ * like collect information about the value produced, for use in
+ * optimizing a subsequent operation.
+ *
+ * These first fold_* functions are all helpers, used by other
+ * folders for more specific operations.
+ */
+
+static bool fold_const1(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1])) {
+ uint64_t t;
+
+ t = arg_info(op->args[1])->val;
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+ return false;
+}
+
+static bool fold_const2(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ uint64_t t1 = arg_info(op->args[1])->val;
+ uint64_t t2 = arg_info(op->args[2])->val;
+
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+ }
+ return false;
+}
+
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
+{
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
+ return fold_const2(ctx, op);
+}
+
+static bool fold_masks(OptContext *ctx, TCGOp *op)
+{
+ uint64_t a_mask = ctx->a_mask;
+ uint64_t z_mask = ctx->z_mask;
+ uint64_t s_mask = ctx->s_mask;
+
+ /*
+ * 32-bit ops generate 32-bit results, which for the purpose of
+ * simplifying tcg are sign-extended. Certainly that's how we
+ * represent our constants elsewhere. Note that the bits will
+ * be reset properly for a 64-bit value when encountering the
+ * type changing opcodes.
+ */
+ if (ctx->type == TCG_TYPE_I32) {
+ a_mask = (int32_t)a_mask;
+ z_mask = (int32_t)z_mask;
+ s_mask |= MAKE_64BIT_MASK(32, 32);
+ ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
+ }
+
+ if (z_mask == 0) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
+ }
+ if (a_mask == 0) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+ return false;
+}
+
+/*
+ * Convert @op to NOT, if NOT is supported by the host.
+ * Return true f the conversion is successful, which will still
+ * indicate that the processing is complete.
+ */
+static bool fold_not(OptContext *ctx, TCGOp *op);
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
+{
+ TCGOpcode not_op;
+ bool have_not;
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ not_op = INDEX_op_not_i32;
+ have_not = TCG_TARGET_HAS_not_i32;
+ break;
+ case TCG_TYPE_I64:
+ not_op = INDEX_op_not_i64;
+ have_not = TCG_TARGET_HAS_not_i64;
+ break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ not_op = INDEX_op_not_vec;
+ have_not = TCG_TARGET_HAS_not_vec;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (have_not) {
+ op->opc = not_op;
+ op->args[1] = op->args[idx];
+ return fold_not(ctx, op);
+ }
+ return false;
+}
+
+/* If the binary operation has first argument @i, fold to @i. */
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+ }
+ return false;
+}
+
+/* If the binary operation has first argument @i, fold to NOT. */
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
+ return fold_to_not(ctx, op, 2);
+ }
+ return false;
+}
+
+/* If the binary operation has second argument @i, fold to @i. */
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+ }
+ return false;
+}
+
+/* If the binary operation has second argument @i, fold to identity. */
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+ return false;
+}
+
+/* If the binary operation has second argument @i, fold to NOT. */
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
+ return fold_to_not(ctx, op, 1);
+ }
+ return false;
+}
+
+/* If the binary operation has both arguments equal, fold to @i. */
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
+{
+ if (args_are_copies(op->args[1], op->args[2])) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+ }
+ return false;
+}
+
+/* If the binary operation has both arguments equal, fold to identity. */
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
+{
+ if (args_are_copies(op->args[1], op->args[2])) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+ }
+ return false;
+}
+
+/*
+ * These outermost fold_<op> functions are sorted alphabetically.
+ *
+ * The ordering of the transformations should be:
+ * 1) those that produce a constant
+ * 2) those that produce a copy
+ * 3) those that produce information about the result value.
+ */
+
+static bool fold_add(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_x(ctx, op, 0)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
+{
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
+ uint64_t al = arg_info(op->args[2])->val;
+ uint64_t ah = arg_info(op->args[3])->val;
+ uint64_t bl = arg_info(op->args[4])->val;
+ uint64_t bh = arg_info(op->args[5])->val;
+ TCGArg rl, rh;
+ TCGOp *op2;
+
+ if (ctx->type == TCG_TYPE_I32) {
+ uint64_t a = deposit64(al, 32, 32, ah);
+ uint64_t b = deposit64(bl, 32, 32, bh);
+
+ if (add) {
+ a += b;
+ } else {
+ a -= b;
+ }
+
+ al = sextract64(a, 0, 32);
+ ah = sextract64(a, 32, 32);
+ } else {
+ Int128 a = int128_make128(al, ah);
+ Int128 b = int128_make128(bl, bh);
+
+ if (add) {
+ a = int128_add(a, b);
+ } else {
+ a = int128_sub(a, b);
+ }
+
+ al = int128_getlo(a);
+ ah = int128_gethi(a);
+ }
+
+ rl = op->args[0];
+ rh = op->args[1];
+
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+ tcg_opt_gen_movi(ctx, op, rl, al);
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
+ return true;
+ }
+ return false;
+}
+
+static bool fold_add2(OptContext *ctx, TCGOp *op)
+{
+ /* Note that the high and low parts may be independently swapped. */
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
+
+ return fold_addsub2(ctx, op, true);
+}
+
+static bool fold_and(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z1, z2;
+
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, -1) ||
+ fold_xx_to_x(ctx, op)) {
+ return true;
+ }
+
+ z1 = arg_info(op->args[1])->z_mask;
+ z2 = arg_info(op->args[2])->z_mask;
+ ctx->z_mask = z1 & z2;
+
+ /*
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
+ * Bitwise operations preserve the relative quantity of the repetitions.
+ */
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+
+ /*
+ * Known-zeros does not imply known-ones. Therefore unless
+ * arg2 is constant, we can't infer affected bits from it.
+ */
+ if (arg_is_const(op->args[2])) {
+ ctx->a_mask = z1 & ~z2;
+ }
+
+ return fold_masks(ctx, op);
+}
+
+static bool fold_andc(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z1;
+
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 0) ||
+ fold_ix_to_not(ctx, op, -1)) {
+ return true;
+ }
+
+ z1 = arg_info(op->args[1])->z_mask;
+
+ /*
+ * Known-zeros does not imply known-ones. Therefore unless
+ * arg2 is constant, we can't infer anything from it.
+ */
+ if (arg_is_const(op->args[2])) {
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
+ ctx->a_mask = z1 & ~z2;
+ z1 &= z2;
+ }
+ ctx->z_mask = z1;
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks(ctx, op);
+}
+
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[2];
+ int i;
+
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
+ op->args[2] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
+ if (i == 0) {
+ tcg_op_remove(ctx->tcg, op);
+ return true;
+ }
+ if (i > 0) {
+ op->opc = INDEX_op_br;
+ op->args[0] = op->args[3];
+ }
+ return false;
+}
+
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[4];
+ TCGArg label = op->args[5];
+ int i, inv = 0;
+
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
+ op->args[4] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
+ if (i >= 0) {
+ goto do_brcond_const;
+ }
+
+ switch (cond) {
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ /*
+ * Simplify LT/GE comparisons vs zero to a single compare
+ * vs the high word of the input.
+ */
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
+ goto do_brcond_high;
+ }
+ break;
+
+ case TCG_COND_NE:
+ inv = 1;
+ QEMU_FALLTHROUGH;
+ case TCG_COND_EQ:
+ /*
+ * Simplify EQ/NE comparisons where one of the pairs
+ * can be simplified.
+ */
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
+ op->args[2], cond);
+ switch (i ^ inv) {
+ case 0:
+ goto do_brcond_const;
+ case 1:
+ goto do_brcond_high;
+ }
+
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
+ op->args[3], cond);
+ switch (i ^ inv) {
+ case 0:
+ goto do_brcond_const;
+ case 1:
+ op->opc = INDEX_op_brcond_i32;
+ op->args[1] = op->args[2];
+ op->args[2] = cond;
+ op->args[3] = label;
+ break;
+ }
+ break;
+
+ default:
+ break;
+
+ do_brcond_high:
+ op->opc = INDEX_op_brcond_i32;
+ op->args[0] = op->args[1];
+ op->args[1] = op->args[3];
+ op->args[2] = cond;
+ op->args[3] = label;
+ break;
+
+ do_brcond_const:
+ if (i == 0) {
+ tcg_op_remove(ctx->tcg, op);
+ return true;
+ }
+ op->opc = INDEX_op_br;
+ op->args[0] = label;
+ break;
+ }
+ return false;
+}
+
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask, s_mask, sign;
+
+ if (arg_is_const(op->args[1])) {
+ uint64_t t = arg_info(op->args[1])->val;
+
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+
+ z_mask = arg_info(op->args[1])->z_mask;
+
+ switch (op->opc) {
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ z_mask = bswap16(z_mask);
+ sign = INT16_MIN;
+ break;
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ z_mask = bswap32(z_mask);
+ sign = INT32_MIN;
+ break;
+ case INDEX_op_bswap64_i64:
+ z_mask = bswap64(z_mask);
+ sign = INT64_MIN;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ s_mask = smask_from_zmask(z_mask);
+
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
+ case TCG_BSWAP_OZ:
+ break;
+ case TCG_BSWAP_OS:
+ /* If the sign bit may be 1, force all the bits above to 1. */
+ if (z_mask & sign) {
+ z_mask |= sign;
+ s_mask = sign << 1;
+ }
+ break;
+ default:
+ /* The high bits are undefined: force all bits above the sign to 1. */
+ z_mask |= sign << 1;
+ s_mask = 0;
+ break;
+ }
+ ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
+
+ return fold_masks(ctx, op);
+}
+
+static bool fold_call(OptContext *ctx, TCGOp *op)
+{
+ TCGContext *s = ctx->tcg;
+ int nb_oargs = TCGOP_CALLO(op);
+ int nb_iargs = TCGOP_CALLI(op);
+ int flags, i;
+
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
+
+ /* If the function reads or writes globals, reset temp data. */
+ flags = tcg_call_flags(op);
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
+ int nb_globals = s->nb_globals;
+
+ for (i = 0; i < nb_globals; i++) {
+ if (test_bit(i, ctx->temps_used.l)) {
+ reset_ts(&ctx->tcg->temps[i]);
+ }
+ }
+ }
+
+ /* Reset temp data for outputs. */
+ for (i = 0; i < nb_oargs; i++) {
+ reset_temp(op->args[i]);
+ }
+
+ /* Stop optimizing MB across calls. */
+ ctx->prev_mb = NULL;
+ return true;
+}
+
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask;
+
+ if (arg_is_const(op->args[1])) {
+ uint64_t t = arg_info(op->args[1])->val;
+
+ if (t != 0) {
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ z_mask = 31;
+ break;
+ case TCG_TYPE_I64:
+ z_mask = 63;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
+ return false;
+}
+
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const1(ctx, op)) {
+ return true;
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ ctx->z_mask = 32 | 31;
+ break;
+ case TCG_TYPE_I64:
+ ctx->z_mask = 64 | 63;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
+ return false;
+}
+
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ uint64_t t1 = arg_info(op->args[1])->val;
+ uint64_t t2 = arg_info(op->args[2])->val;
+
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
+ }
+
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
+ op->args[3], op->args[4],
+ arg_info(op->args[2])->z_mask);
+ return false;
+}
+
+static bool fold_divide(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) ||
+ fold_xi_to_x(ctx, op, 1)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_dup(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1])) {
+ uint64_t t = arg_info(op->args[1])->val;
+ t = dup_const(TCGOP_VECE(op), t);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+ return false;
+}
+
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
+ arg_info(op->args[2])->val);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+
+ if (args_are_copies(op->args[1], op->args[2])) {
+ op->opc = INDEX_op_dup_vec;
+ TCGOP_VECE(op) = MO_32;
+ }
+ return false;
+}
+
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_x(ctx, op, -1) ||
+ fold_xi_to_not(ctx, op, 0)) {
+ return true;
+ }
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return false;
+}
+
+static bool fold_extract(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask_old, z_mask;
+ int pos = op->args[2];
+ int len = op->args[3];
+
+ if (arg_is_const(op->args[1])) {
+ uint64_t t;
+
+ t = arg_info(op->args[1])->val;
+ t = extract64(t, pos, len);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+
+ z_mask_old = arg_info(op->args[1])->z_mask;
+ z_mask = extract64(z_mask_old, pos, len);
+ if (pos == 0) {
+ ctx->a_mask = z_mask_old ^ z_mask;
+ }
+ ctx->z_mask = z_mask;
+ ctx->s_mask = smask_from_zmask(z_mask);
+
+ return fold_masks(ctx, op);
+}
+
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
+{
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
+ uint64_t v1 = arg_info(op->args[1])->val;
+ uint64_t v2 = arg_info(op->args[2])->val;
+ int shr = op->args[3];
+
+ if (op->opc == INDEX_op_extract2_i64) {
+ v1 >>= shr;
+ v2 <<= 64 - shr;
+ } else {
+ v1 = (uint32_t)v1 >> shr;
+ v2 = (uint64_t)((int32_t)v2 << (32 - shr));
+ }
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
+ }
+ return false;
+}
+
+static bool fold_exts(OptContext *ctx, TCGOp *op)
+{
+ uint64_t s_mask_old, s_mask, z_mask, sign;
+ bool type_change = false;
+
+ if (fold_const1(ctx, op)) {
+ return true;
+ }
+
+ z_mask = arg_info(op->args[1])->z_mask;
+ s_mask = arg_info(op->args[1])->s_mask;
+ s_mask_old = s_mask;
+
+ switch (op->opc) {
+ CASE_OP_32_64(ext8s):
+ sign = INT8_MIN;
+ z_mask = (uint8_t)z_mask;
+ break;
+ CASE_OP_32_64(ext16s):
+ sign = INT16_MIN;
+ z_mask = (uint16_t)z_mask;
+ break;
+ case INDEX_op_ext_i32_i64:
+ type_change = true;
+ QEMU_FALLTHROUGH;
+ case INDEX_op_ext32s_i64:
+ sign = INT32_MIN;
+ z_mask = (uint32_t)z_mask;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (z_mask & sign) {
+ z_mask |= sign;
+ }
+ s_mask |= sign << 1;
+
+ ctx->z_mask = z_mask;
+ ctx->s_mask = s_mask;
+ if (!type_change) {
+ ctx->a_mask = s_mask & ~s_mask_old;
+ }
+
+ return fold_masks(ctx, op);
+}
+
+static bool fold_extu(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask_old, z_mask;
+ bool type_change = false;
+
+ if (fold_const1(ctx, op)) {
+ return true;
+ }
+
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
+
+ switch (op->opc) {
+ CASE_OP_32_64(ext8u):
+ z_mask = (uint8_t)z_mask;
+ break;
+ CASE_OP_32_64(ext16u):
+ z_mask = (uint16_t)z_mask;
+ break;
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extu_i32_i64:
+ type_change = true;
+ QEMU_FALLTHROUGH;
+ case INDEX_op_ext32u_i64:
+ z_mask = (uint32_t)z_mask;
+ break;
+ case INDEX_op_extrh_i64_i32:
+ type_change = true;
+ z_mask >>= 32;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ ctx->z_mask = z_mask;
+ ctx->s_mask = smask_from_zmask(z_mask);
+ if (!type_change) {
+ ctx->a_mask = z_mask_old ^ z_mask;
+ }
+ return fold_masks(ctx, op);
+}
+
+static bool fold_mb(OptContext *ctx, TCGOp *op)
+{
+ /* Eliminate duplicate and redundant fence instructions. */
+ if (ctx->prev_mb) {
+ /*
+ * Merge two barriers of the same type into one,
+ * or a weaker barrier into a stronger one,
+ * or two weaker barriers into a stronger one.
+ * mb X; mb Y => mb X|Y
+ * mb; strl => mb; st
+ * ldaq; mb => ld; mb
+ * ldaq; strl => ld; mb; st
+ * Other combinations are also merged into a strong
+ * barrier. This is stricter than specified but for
+ * the purposes of TCG is better than not optimizing.
+ */
+ ctx->prev_mb->args[0] |= op->args[0];
+ tcg_op_remove(ctx->tcg, op);
+ } else {
+ ctx->prev_mb = op;
+ }
+ return true;
+}
+
+static bool fold_mov(OptContext *ctx, TCGOp *op)
+{
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+}
+
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[5];
+ int i;
+
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
+ op->args[5] = cond = tcg_swap_cond(cond);
+ }
+ /*
+ * Canonicalize the "false" input reg to match the destination reg so
+ * that the tcg backend can implement a "move if true" operation.
+ */
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
+ op->args[5] = cond = tcg_invert_cond(cond);
+ }
+
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ if (i >= 0) {
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
+ }
+
+ ctx->z_mask = arg_info(op->args[3])->z_mask
+ | arg_info(op->args[4])->z_mask;
+ ctx->s_mask = arg_info(op->args[3])->s_mask
+ & arg_info(op->args[4])->s_mask;
+
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
+ uint64_t tv = arg_info(op->args[3])->val;
+ uint64_t fv = arg_info(op->args[4])->val;
+ TCGOpcode opc;
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ opc = INDEX_op_setcond_i32;
+ break;
+ case TCG_TYPE_I64:
+ opc = INDEX_op_setcond_i64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (tv == 1 && fv == 0) {
+ op->opc = opc;
+ op->args[3] = cond;
+ } else if (fv == 1 && tv == 0) {
+ op->opc = opc;
+ op->args[3] = tcg_invert_cond(cond);
+ }
+ }
+ return false;
+}
+
+static bool fold_mul(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) ||
+ fold_xi_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 1)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_i(ctx, op, 0)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
+{
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
+
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
+ uint64_t a = arg_info(op->args[2])->val;
+ uint64_t b = arg_info(op->args[3])->val;
+ uint64_t h, l;
+ TCGArg rl, rh;
+ TCGOp *op2;
+
+ switch (op->opc) {
+ case INDEX_op_mulu2_i32:
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
+ h = (int32_t)(l >> 32);
+ l = (int32_t)l;
+ break;
+ case INDEX_op_muls2_i32:
+ l = (int64_t)(int32_t)a * (int32_t)b;
+ h = l >> 32;
+ l = (int32_t)l;
+ break;
+ case INDEX_op_mulu2_i64:
+ mulu64(&l, &h, a, b);
+ break;
+ case INDEX_op_muls2_i64:
+ muls64(&l, &h, a, b);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ rl = op->args[0];
+ rh = op->args[1];
+
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
+
+ tcg_opt_gen_movi(ctx, op, rl, l);
+ tcg_opt_gen_movi(ctx, op2, rh, h);
+ return true;
+ }
+ return false;
+}
+
+static bool fold_nand(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_not(ctx, op, -1)) {
+ return true;
+ }
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return false;
+}
+
+static bool fold_neg(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask;
+
+ if (fold_const1(ctx, op)) {
+ return true;
+ }
+
+ /* Set to 1 all bits to the left of the rightmost. */
+ z_mask = arg_info(op->args[1])->z_mask;
+ ctx->z_mask = -(z_mask & -z_mask);
+
+ /*
+ * Because of fold_sub_to_neg, we want to always return true,
+ * via finish_folding.
+ */
+ finish_folding(ctx, op);
+ return true;
+}
+
+static bool fold_nor(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_not(ctx, op, 0)) {
+ return true;
+ }
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return false;
+}
+
+static bool fold_not(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const1(ctx, op)) {
+ return true;
+ }
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
+
+ /* Because of fold_to_not, we want to always return true, via finish. */
+ finish_folding(ctx, op);
+ return true;
+}
+
+static bool fold_or(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xi_to_x(ctx, op, 0) ||
+ fold_xx_to_x(ctx, op)) {
+ return true;
+ }
+
+ ctx->z_mask = arg_info(op->args[1])->z_mask
+ | arg_info(op->args[2])->z_mask;
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks(ctx, op);
+}
+
+static bool fold_orc(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, -1) ||
+ fold_xi_to_x(ctx, op, -1) ||
+ fold_ix_to_not(ctx, op, 0)) {
+ return true;
+ }
+
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return false;
+}
+
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
+{
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
+ MemOp mop = get_memop(oi);
+ int width = 8 * memop_size(mop);
+
+ if (width < 64) {
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
+ if (!(mop & MO_SIGN)) {
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
+ ctx->s_mask <<= 1;
+ }
+ }
+
+ /* Opcodes that touch guest memory stop the mb optimization. */
+ ctx->prev_mb = NULL;
+ return false;
+}
+
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
+{
+ /* Opcodes that touch guest memory stop the mb optimization. */
+ ctx->prev_mb = NULL;
+ return false;
+}
+
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[3];
+ int i;
+
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
+ op->args[3] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
+ if (i >= 0) {
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+ }
+
+ ctx->z_mask = 1;
+ ctx->s_mask = smask_from_zmask(1);
+ return false;
+}
+
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
+{
+ TCGCond cond = op->args[5];
+ int i, inv = 0;
+
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
+ op->args[5] = cond = tcg_swap_cond(cond);
+ }
+
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
+ if (i >= 0) {
+ goto do_setcond_const;
+ }
+
+ switch (cond) {
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ /*
+ * Simplify LT/GE comparisons vs zero to a single compare
+ * vs the high word of the input.
+ */
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
+ goto do_setcond_high;
+ }
+ break;
+
+ case TCG_COND_NE:
+ inv = 1;
+ QEMU_FALLTHROUGH;
+ case TCG_COND_EQ:
+ /*
+ * Simplify EQ/NE comparisons where one of the pairs
+ * can be simplified.
+ */
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
+ op->args[3], cond);
+ switch (i ^ inv) {
+ case 0:
+ goto do_setcond_const;
+ case 1:
+ goto do_setcond_high;
+ }
+
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
+ op->args[4], cond);
+ switch (i ^ inv) {
+ case 0:
+ goto do_setcond_const;
+ case 1:
+ op->args[2] = op->args[3];
+ op->args[3] = cond;
+ op->opc = INDEX_op_setcond_i32;
+ break;
+ }
+ break;
+
+ default:
+ break;
+
+ do_setcond_high:
+ op->args[1] = op->args[2];
+ op->args[2] = op->args[4];
+ op->args[3] = cond;
+ op->opc = INDEX_op_setcond_i32;
+ break;
+ }
+
+ ctx->z_mask = 1;
+ ctx->s_mask = smask_from_zmask(1);
+ return false;
+
+ do_setcond_const:
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
+}
+
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
+{
+ uint64_t z_mask, s_mask, s_mask_old;
+ int pos = op->args[2];
+ int len = op->args[3];
+
+ if (arg_is_const(op->args[1])) {
+ uint64_t t;
+
+ t = arg_info(op->args[1])->val;
+ t = sextract64(t, pos, len);
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
+ }
+
+ z_mask = arg_info(op->args[1])->z_mask;
+ z_mask = sextract64(z_mask, pos, len);
+ ctx->z_mask = z_mask;
+
+ s_mask_old = arg_info(op->args[1])->s_mask;
+ s_mask = sextract64(s_mask_old, pos, len);
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
+ ctx->s_mask = s_mask;
+
+ if (pos == 0) {
+ ctx->a_mask = s_mask & ~s_mask_old;
+ }
+
+ return fold_masks(ctx, op);
+}
+
+static bool fold_shift(OptContext *ctx, TCGOp *op)
+{
+ uint64_t s_mask, z_mask, sign;
+
+ if (fold_const2(ctx, op) ||
+ fold_ix_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 0)) {
+ return true;
+ }
+
+ s_mask = arg_info(op->args[1])->s_mask;
+ z_mask = arg_info(op->args[1])->z_mask;
+
+ if (arg_is_const(op->args[2])) {
+ int sh = arg_info(op->args[2])->val;
+
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
+
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
+ ctx->s_mask = smask_from_smask(s_mask);
+
+ return fold_masks(ctx, op);
+ }
+
+ switch (op->opc) {
+ CASE_OP_32_64(sar):
+ /*
+ * Arithmetic right shift will not reduce the number of
+ * input sign repetitions.
+ */
+ ctx->s_mask = s_mask;
+ break;
+ CASE_OP_32_64(shr):
+ /*
+ * If the sign bit is known zero, then logical right shift
+ * will not reduced the number of input sign repetitions.
+ */
+ sign = (s_mask & -s_mask) >> 1;
+ if (!(z_mask & sign)) {
+ ctx->s_mask = s_mask;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
+{
+ TCGOpcode neg_op;
+ bool have_neg;
+
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
+ return false;
+ }
+
+ switch (ctx->type) {
+ case TCG_TYPE_I32:
+ neg_op = INDEX_op_neg_i32;
+ have_neg = TCG_TARGET_HAS_neg_i32;
+ break;
+ case TCG_TYPE_I64:
+ neg_op = INDEX_op_neg_i64;
+ have_neg = TCG_TARGET_HAS_neg_i64;
+ break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ neg_op = INDEX_op_neg_vec;
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (have_neg) {
+ op->opc = neg_op;
+ op->args[1] = op->args[2];
+ return fold_neg(ctx, op);
+ }
+ return false;
+}
+
+static bool fold_sub(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 0) ||
+ fold_sub_to_neg(ctx, op)) {
+ return true;
+ }
+ return false;
+}
+
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
+{
+ return fold_addsub2(ctx, op, false);
+}
+
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
+{
+ /* We can't do any folding with a load, but we can record bits. */
+ switch (op->opc) {
+ CASE_OP_32_64(ld8s):
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
+ break;
+ CASE_OP_32_64(ld8u):
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
+ break;
+ CASE_OP_32_64(ld16s):
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
+ break;
+ CASE_OP_32_64(ld16u):
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
+ break;
+ case INDEX_op_ld32s_i64:
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
+ break;
+ case INDEX_op_ld32u_i64:
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return false;
+}
+
+static bool fold_xor(OptContext *ctx, TCGOp *op)
+{
+ if (fold_const2_commutative(ctx, op) ||
+ fold_xx_to_i(ctx, op, 0) ||
+ fold_xi_to_x(ctx, op, 0) ||
+ fold_xi_to_not(ctx, op, -1)) {
+ return true;
+ }
+
+ ctx->z_mask = arg_info(op->args[1])->z_mask
+ | arg_info(op->args[2])->z_mask;
+ ctx->s_mask = arg_info(op->args[1])->s_mask
+ & arg_info(op->args[2])->s_mask;
+ return fold_masks(ctx, op);
+}
+
+/* Propagate constants and copies, fold constant expressions. */
+void tcg_optimize(TCGContext *s)
+{
+ int nb_temps, i;
+ TCGOp *op, *op_next;
+ OptContext ctx = { .tcg = s };
+
+ /* Array VALS has an element for each temp.
+ If this temp holds a constant then its value is kept in VALS' element.
+ If this temp is a copy of other ones then the other copies are
+ available through the doubly linked circular list. */
+
+ nb_temps = s->nb_temps;
+ for (i = 0; i < nb_temps; ++i) {
+ s->temps[i].state_ptr = NULL;
+ }
+
+ QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def;
+ bool done = false;
+
+ /* Calls are special. */
+ if (opc == INDEX_op_call) {
+ fold_call(&ctx, op);
+ continue;
+ }
+
+ def = &tcg_op_defs[opc];
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
+
+ /* Pre-compute the type of the operation. */
+ if (def->flags & TCG_OPF_VECTOR) {
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
+ } else if (def->flags & TCG_OPF_64BIT) {
+ ctx.type = TCG_TYPE_I64;
+ } else {
+ ctx.type = TCG_TYPE_I32;
+ }
+
+ /* Assume all bits affected, no bits known zero, no sign reps. */
+ ctx.a_mask = -1;
+ ctx.z_mask = -1;
+ ctx.s_mask = 0;
+
+ /*
+ * Process each opcode.
+ * Sorted alphabetically by opcode as much as possible.
+ */
+ switch (opc) {
+ CASE_OP_32_64_VEC(add):
+ done = fold_add(&ctx, op);
+ break;
+ CASE_OP_32_64(add2):
+ done = fold_add2(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(and):
+ done = fold_and(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(andc):
+ done = fold_andc(&ctx, op);
+ break;
+ CASE_OP_32_64(brcond):
+ done = fold_brcond(&ctx, op);
+ break;
+ case INDEX_op_brcond2_i32:
+ done = fold_brcond2(&ctx, op);
+ break;
+ CASE_OP_32_64(bswap16):
+ CASE_OP_32_64(bswap32):
+ case INDEX_op_bswap64_i64:
+ done = fold_bswap(&ctx, op);
+ break;
+ CASE_OP_32_64(clz):
+ CASE_OP_32_64(ctz):
+ done = fold_count_zeros(&ctx, op);
+ break;
+ CASE_OP_32_64(ctpop):
+ done = fold_ctpop(&ctx, op);
+ break;
+ CASE_OP_32_64(deposit):
+ done = fold_deposit(&ctx, op);
+ break;
+ CASE_OP_32_64(div):
+ CASE_OP_32_64(divu):
+ done = fold_divide(&ctx, op);
+ break;
+ case INDEX_op_dup_vec:
+ done = fold_dup(&ctx, op);
+ break;
+ case INDEX_op_dup2_vec:
+ done = fold_dup2(&ctx, op);
+ break;
+ CASE_OP_32_64(eqv):
+ done = fold_eqv(&ctx, op);
+ break;
+ CASE_OP_32_64(extract):
+ done = fold_extract(&ctx, op);
+ break;
+ CASE_OP_32_64(extract2):
+ done = fold_extract2(&ctx, op);
+ break;
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext16s):
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ done = fold_exts(&ctx, op);
+ break;
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16u):
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extrh_i64_i32:
+ done = fold_extu(&ctx, op);
+ break;
+ CASE_OP_32_64(ld8s):
+ CASE_OP_32_64(ld8u):
+ CASE_OP_32_64(ld16s):
+ CASE_OP_32_64(ld16u):
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld32u_i64:
+ done = fold_tcg_ld(&ctx, op);
+ break;
+ case INDEX_op_mb:
+ done = fold_mb(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(mov):
+ done = fold_mov(&ctx, op);
+ break;
+ CASE_OP_32_64(movcond):
+ done = fold_movcond(&ctx, op);
+ break;
+ CASE_OP_32_64(mul):
+ done = fold_mul(&ctx, op);
+ break;
+ CASE_OP_32_64(mulsh):
+ CASE_OP_32_64(muluh):
+ done = fold_mul_highpart(&ctx, op);
+ break;
+ CASE_OP_32_64(muls2):
+ CASE_OP_32_64(mulu2):
+ done = fold_multiply2(&ctx, op);
+ break;
+ CASE_OP_32_64(nand):
+ done = fold_nand(&ctx, op);
+ break;
+ CASE_OP_32_64(neg):
+ done = fold_neg(&ctx, op);
+ break;
+ CASE_OP_32_64(nor):
+ done = fold_nor(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(not):
+ done = fold_not(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(or):
+ done = fold_or(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(orc):
+ done = fold_orc(&ctx, op);
+ break;
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ done = fold_qemu_ld(&ctx, op);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
+ case INDEX_op_qemu_st_i64:
+ done = fold_qemu_st(&ctx, op);
+ break;
+ CASE_OP_32_64(rem):
+ CASE_OP_32_64(remu):
+ done = fold_remainder(&ctx, op);
+ break;
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ done = fold_shift(&ctx, op);
+ break;
+ CASE_OP_32_64(setcond):
+ done = fold_setcond(&ctx, op);
+ break;
+ case INDEX_op_setcond2_i32:
+ done = fold_setcond2(&ctx, op);
+ break;
+ CASE_OP_32_64(sextract):
+ done = fold_sextract(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(sub):
+ done = fold_sub(&ctx, op);
+ break;
+ CASE_OP_32_64(sub2):
+ done = fold_sub2(&ctx, op);
+ break;
+ CASE_OP_32_64_VEC(xor):
+ done = fold_xor(&ctx, op);
+ break;
+ default:
+ break;
+ }
+
+ if (!done) {
+ finish_folding(&ctx, op);
+ }
+ }
+}
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
new file mode 100644
index 000000000..a1a345883
--- /dev/null
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define PowerPC target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(r, r)
+C_O0_I2(r, ri)
+C_O0_I2(S, S)
+C_O0_I2(v, r)
+C_O0_I3(S, S, S)
+C_O0_I4(r, r, ri, ri)
+C_O0_I4(S, S, S, S)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I1(v, r)
+C_O1_I1(v, v)
+C_O1_I1(v, vr)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, L, L)
+C_O1_I2(r, rI, ri)
+C_O1_I2(r, rI, rT)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rT)
+C_O1_I2(r, r, rU)
+C_O1_I2(r, r, rZW)
+C_O1_I2(v, v, v)
+C_O1_I3(v, v, v, v)
+C_O1_I4(r, r, ri, rZ, rZ)
+C_O1_I4(r, r, r, ri, ri)
+C_O2_I1(L, L, L)
+C_O2_I2(L, L, L, L)
+C_O2_I4(r, r, rI, rZM, r, r)
+C_O2_I4(r, r, r, r, rI, rZM)
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
new file mode 100644
index 000000000..298ca20d5
--- /dev/null
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define PowerPC target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('v', ALL_VECTOR_REGS)
+REGS('A', 1u << TCG_REG_R3)
+REGS('B', 1u << TCG_REG_R4)
+REGS('C', 1u << TCG_REG_R5)
+REGS('D', 1u << TCG_REG_R6)
+REGS('L', ALL_QLOAD_REGS)
+REGS('S', ALL_QSTORE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S16)
+CONST('J', TCG_CT_CONST_U16)
+CONST('M', TCG_CT_CONST_MONE)
+CONST('T', TCG_CT_CONST_S32)
+CONST('U', TCG_CT_CONST_U32)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
new file mode 100644
index 000000000..3e4ca2be8
--- /dev/null
+++ b/tcg/ppc/tcg-target.c.inc
@@ -0,0 +1,3907 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "elf.h"
+#include "../tcg-pool.c.inc"
+
+/*
+ * Standardize on the _CALL_FOO symbols used by GCC:
+ * Apple XCode does not define _CALL_DARWIN.
+ * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
+ */
+#if !defined(_CALL_SYSV) && \
+ !defined(_CALL_DARWIN) && \
+ !defined(_CALL_AIX) && \
+ !defined(_CALL_ELF)
+# if defined(__APPLE__)
+# define _CALL_DARWIN
+# elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
+# define _CALL_SYSV
+# else
+# error "Unknown ABI"
+# endif
+#endif
+
+#ifdef _CALL_SYSV
+# define TCG_TARGET_CALL_ALIGN_ARGS 1
+#endif
+
+/* For some memory operations, we need a scratch that isn't R0. For the AIX
+ calling convention, we can re-use the TOC register since we'll be reloading
+ it at every call. Otherwise R12 will do nicely as neither a call-saved
+ register nor a parameter register. */
+#ifdef _CALL_AIX
+# define TCG_REG_TMP1 TCG_REG_R2
+#else
+# define TCG_REG_TMP1 TCG_REG_R12
+#endif
+
+#define TCG_VEC_TMP1 TCG_REG_V0
+#define TCG_VEC_TMP2 TCG_REG_V1
+
+#define TCG_REG_TB TCG_REG_R31
+#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
+
+/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
+#define SZP ((int)sizeof(void *))
+
+/* Shorthand for size of a register. */
+#define SZR (TCG_TARGET_REG_BITS / 8)
+
+#define TCG_CT_CONST_S16 0x100
+#define TCG_CT_CONST_U16 0x200
+#define TCG_CT_CONST_S32 0x400
+#define TCG_CT_CONST_U32 0x800
+#define TCG_CT_CONST_ZERO 0x1000
+#define TCG_CT_CONST_MONE 0x2000
+#define TCG_CT_CONST_WSZ 0x4000
+
+#define ALL_GENERAL_REGS 0xffffffffu
+#define ALL_VECTOR_REGS 0xffffffff00000000ull
+
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLOAD_REGS \
+ (ALL_GENERAL_REGS & \
+ ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
+#define ALL_QSTORE_REGS \
+ (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
+ (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
+#else
+#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
+#define ALL_QSTORE_REGS ALL_QLOAD_REGS
+#endif
+
+TCGPowerISA have_isa;
+static bool have_isel;
+bool have_altivec;
+bool have_vsx;
+
+#ifndef CONFIG_SOFTMMU
+#define TCG_GUEST_BASE_REG 30
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R14, /* call saved registers */
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27,
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31,
+ TCG_REG_R12, /* call clobbered, non-arguments */
+ TCG_REG_R11,
+ TCG_REG_R2,
+ TCG_REG_R13,
+ TCG_REG_R10, /* call clobbered, arguments */
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_R7,
+ TCG_REG_R6,
+ TCG_REG_R5,
+ TCG_REG_R4,
+ TCG_REG_R3,
+
+ /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
+ TCG_REG_V2, /* call clobbered, vectors */
+ TCG_REG_V3,
+ TCG_REG_V4,
+ TCG_REG_V5,
+ TCG_REG_V6,
+ TCG_REG_V7,
+ TCG_REG_V8,
+ TCG_REG_V9,
+ TCG_REG_V10,
+ TCG_REG_V11,
+ TCG_REG_V12,
+ TCG_REG_V13,
+ TCG_REG_V14,
+ TCG_REG_V15,
+ TCG_REG_V16,
+ TCG_REG_V17,
+ TCG_REG_V18,
+ TCG_REG_V19,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R3,
+ TCG_REG_R4
+};
+
+static const int tcg_target_callee_save_regs[] = {
+#ifdef _CALL_DARWIN
+ TCG_REG_R11,
+#endif
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R16,
+ TCG_REG_R17,
+ TCG_REG_R18,
+ TCG_REG_R19,
+ TCG_REG_R20,
+ TCG_REG_R21,
+ TCG_REG_R22,
+ TCG_REG_R23,
+ TCG_REG_R24,
+ TCG_REG_R25,
+ TCG_REG_R26,
+ TCG_REG_R27, /* currently used for the global env */
+ TCG_REG_R28,
+ TCG_REG_R29,
+ TCG_REG_R30,
+ TCG_REG_R31
+};
+
+static inline bool in_range_b(tcg_target_long target)
+{
+ return target == sextract64(target, 0, 26);
+}
+
+static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
+ const tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ tcg_debug_assert(in_range_b(disp));
+ return disp & 0x3fffffc;
+}
+
+static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
+
+ if (in_range_b(disp)) {
+ *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
+ return true;
+ }
+ return false;
+}
+
+static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
+ const tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+ tcg_debug_assert(disp == (int16_t) disp);
+ return disp & 0xfffc;
+}
+
+static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
+
+ if (disp == (int16_t) disp) {
+ *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
+ return true;
+ }
+ return false;
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ /* The only 32-bit constraint we use aside from
+ TCG_CT_CONST is TCG_CT_CONST_S16. */
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_WSZ)
+ && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ return 1;
+ }
+ return 0;
+}
+
+#define OPCD(opc) ((opc)<<26)
+#define XO19(opc) (OPCD(19)|((opc)<<1))
+#define MD30(opc) (OPCD(30)|((opc)<<2))
+#define MDS30(opc) (OPCD(30)|((opc)<<1))
+#define XO31(opc) (OPCD(31)|((opc)<<1))
+#define XO58(opc) (OPCD(58)|(opc))
+#define XO62(opc) (OPCD(62)|(opc))
+#define VX4(opc) (OPCD(4)|(opc))
+
+#define B OPCD( 18)
+#define BC OPCD( 16)
+#define LBZ OPCD( 34)
+#define LHZ OPCD( 40)
+#define LHA OPCD( 42)
+#define LWZ OPCD( 32)
+#define LWZUX XO31( 55)
+#define STB OPCD( 38)
+#define STH OPCD( 44)
+#define STW OPCD( 36)
+
+#define STD XO62( 0)
+#define STDU XO62( 1)
+#define STDX XO31(149)
+
+#define LD XO58( 0)
+#define LDX XO31( 21)
+#define LDU XO58( 1)
+#define LDUX XO31( 53)
+#define LWA XO58( 2)
+#define LWAX XO31(341)
+
+#define ADDIC OPCD( 12)
+#define ADDI OPCD( 14)
+#define ADDIS OPCD( 15)
+#define ORI OPCD( 24)
+#define ORIS OPCD( 25)
+#define XORI OPCD( 26)
+#define XORIS OPCD( 27)
+#define ANDI OPCD( 28)
+#define ANDIS OPCD( 29)
+#define MULLI OPCD( 7)
+#define CMPLI OPCD( 10)
+#define CMPI OPCD( 11)
+#define SUBFIC OPCD( 8)
+
+#define LWZU OPCD( 33)
+#define STWU OPCD( 37)
+
+#define RLWIMI OPCD( 20)
+#define RLWINM OPCD( 21)
+#define RLWNM OPCD( 23)
+
+#define RLDICL MD30( 0)
+#define RLDICR MD30( 1)
+#define RLDIMI MD30( 3)
+#define RLDCL MDS30( 8)
+
+#define BCLR XO19( 16)
+#define BCCTR XO19(528)
+#define CRAND XO19(257)
+#define CRANDC XO19(129)
+#define CRNAND XO19(225)
+#define CROR XO19(449)
+#define CRNOR XO19( 33)
+
+#define EXTSB XO31(954)
+#define EXTSH XO31(922)
+#define EXTSW XO31(986)
+#define ADD XO31(266)
+#define ADDE XO31(138)
+#define ADDME XO31(234)
+#define ADDZE XO31(202)
+#define ADDC XO31( 10)
+#define AND XO31( 28)
+#define SUBF XO31( 40)
+#define SUBFC XO31( 8)
+#define SUBFE XO31(136)
+#define SUBFME XO31(232)
+#define SUBFZE XO31(200)
+#define OR XO31(444)
+#define XOR XO31(316)
+#define MULLW XO31(235)
+#define MULHW XO31( 75)
+#define MULHWU XO31( 11)
+#define DIVW XO31(491)
+#define DIVWU XO31(459)
+#define CMP XO31( 0)
+#define CMPL XO31( 32)
+#define LHBRX XO31(790)
+#define LWBRX XO31(534)
+#define LDBRX XO31(532)
+#define STHBRX XO31(918)
+#define STWBRX XO31(662)
+#define STDBRX XO31(660)
+#define MFSPR XO31(339)
+#define MTSPR XO31(467)
+#define SRAWI XO31(824)
+#define NEG XO31(104)
+#define MFCR XO31( 19)
+#define MFOCRF (MFCR | (1u << 20))
+#define NOR XO31(124)
+#define CNTLZW XO31( 26)
+#define CNTLZD XO31( 58)
+#define CNTTZW XO31(538)
+#define CNTTZD XO31(570)
+#define CNTPOPW XO31(378)
+#define CNTPOPD XO31(506)
+#define ANDC XO31( 60)
+#define ORC XO31(412)
+#define EQV XO31(284)
+#define NAND XO31(476)
+#define ISEL XO31( 15)
+
+#define MULLD XO31(233)
+#define MULHD XO31( 73)
+#define MULHDU XO31( 9)
+#define DIVD XO31(489)
+#define DIVDU XO31(457)
+
+#define LBZX XO31( 87)
+#define LHZX XO31(279)
+#define LHAX XO31(343)
+#define LWZX XO31( 23)
+#define STBX XO31(215)
+#define STHX XO31(407)
+#define STWX XO31(151)
+
+#define EIEIO XO31(854)
+#define HWSYNC XO31(598)
+#define LWSYNC (HWSYNC | (1u << 21))
+
+#define SPR(a, b) ((((a)<<5)|(b))<<11)
+#define LR SPR(8, 0)
+#define CTR SPR(9, 0)
+
+#define SLW XO31( 24)
+#define SRW XO31(536)
+#define SRAW XO31(792)
+
+#define SLD XO31( 27)
+#define SRD XO31(539)
+#define SRAD XO31(794)
+#define SRADI XO31(413<<1)
+
+#define BRH XO31(219)
+#define BRW XO31(155)
+#define BRD XO31(187)
+
+#define TW XO31( 4)
+#define TRAP (TW | TO(31))
+
+#define NOP ORI /* ori 0,0,0 */
+
+#define LVX XO31(103)
+#define LVEBX XO31(7)
+#define LVEHX XO31(39)
+#define LVEWX XO31(71)
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
+#define LXSIWZX (XO31(12) | 1) /* v2.07, force tx=1 */
+#define LXV (OPCD(61) | 8 | 1) /* v3.00, force tx=1 */
+#define LXSD (OPCD(57) | 2) /* v3.00 */
+#define LXVWSX (XO31(364) | 1) /* v3.00, force tx=1 */
+
+#define STVX XO31(231)
+#define STVEWX XO31(199)
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
+#define STXSIWX (XO31(140) | 1) /* v2.07, force sx=1 */
+#define STXV (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
+#define STXSD (OPCD(61) | 2) /* v3.00 */
+
+#define VADDSBS VX4(768)
+#define VADDUBS VX4(512)
+#define VADDUBM VX4(0)
+#define VADDSHS VX4(832)
+#define VADDUHS VX4(576)
+#define VADDUHM VX4(64)
+#define VADDSWS VX4(896)
+#define VADDUWS VX4(640)
+#define VADDUWM VX4(128)
+#define VADDUDM VX4(192) /* v2.07 */
+
+#define VSUBSBS VX4(1792)
+#define VSUBUBS VX4(1536)
+#define VSUBUBM VX4(1024)
+#define VSUBSHS VX4(1856)
+#define VSUBUHS VX4(1600)
+#define VSUBUHM VX4(1088)
+#define VSUBSWS VX4(1920)
+#define VSUBUWS VX4(1664)
+#define VSUBUWM VX4(1152)
+#define VSUBUDM VX4(1216) /* v2.07 */
+
+#define VNEGW (VX4(1538) | (6 << 16)) /* v3.00 */
+#define VNEGD (VX4(1538) | (7 << 16)) /* v3.00 */
+
+#define VMAXSB VX4(258)
+#define VMAXSH VX4(322)
+#define VMAXSW VX4(386)
+#define VMAXSD VX4(450) /* v2.07 */
+#define VMAXUB VX4(2)
+#define VMAXUH VX4(66)
+#define VMAXUW VX4(130)
+#define VMAXUD VX4(194) /* v2.07 */
+#define VMINSB VX4(770)
+#define VMINSH VX4(834)
+#define VMINSW VX4(898)
+#define VMINSD VX4(962) /* v2.07 */
+#define VMINUB VX4(514)
+#define VMINUH VX4(578)
+#define VMINUW VX4(642)
+#define VMINUD VX4(706) /* v2.07 */
+
+#define VCMPEQUB VX4(6)
+#define VCMPEQUH VX4(70)
+#define VCMPEQUW VX4(134)
+#define VCMPEQUD VX4(199) /* v2.07 */
+#define VCMPGTSB VX4(774)
+#define VCMPGTSH VX4(838)
+#define VCMPGTSW VX4(902)
+#define VCMPGTSD VX4(967) /* v2.07 */
+#define VCMPGTUB VX4(518)
+#define VCMPGTUH VX4(582)
+#define VCMPGTUW VX4(646)
+#define VCMPGTUD VX4(711) /* v2.07 */
+#define VCMPNEB VX4(7) /* v3.00 */
+#define VCMPNEH VX4(71) /* v3.00 */
+#define VCMPNEW VX4(135) /* v3.00 */
+
+#define VSLB VX4(260)
+#define VSLH VX4(324)
+#define VSLW VX4(388)
+#define VSLD VX4(1476) /* v2.07 */
+#define VSRB VX4(516)
+#define VSRH VX4(580)
+#define VSRW VX4(644)
+#define VSRD VX4(1732) /* v2.07 */
+#define VSRAB VX4(772)
+#define VSRAH VX4(836)
+#define VSRAW VX4(900)
+#define VSRAD VX4(964) /* v2.07 */
+#define VRLB VX4(4)
+#define VRLH VX4(68)
+#define VRLW VX4(132)
+#define VRLD VX4(196) /* v2.07 */
+
+#define VMULEUB VX4(520)
+#define VMULEUH VX4(584)
+#define VMULEUW VX4(648) /* v2.07 */
+#define VMULOUB VX4(8)
+#define VMULOUH VX4(72)
+#define VMULOUW VX4(136) /* v2.07 */
+#define VMULUWM VX4(137) /* v2.07 */
+#define VMULLD VX4(457) /* v3.10 */
+#define VMSUMUHM VX4(38)
+
+#define VMRGHB VX4(12)
+#define VMRGHH VX4(76)
+#define VMRGHW VX4(140)
+#define VMRGLB VX4(268)
+#define VMRGLH VX4(332)
+#define VMRGLW VX4(396)
+
+#define VPKUHUM VX4(14)
+#define VPKUWUM VX4(78)
+
+#define VAND VX4(1028)
+#define VANDC VX4(1092)
+#define VNOR VX4(1284)
+#define VOR VX4(1156)
+#define VXOR VX4(1220)
+#define VEQV VX4(1668) /* v2.07 */
+#define VNAND VX4(1412) /* v2.07 */
+#define VORC VX4(1348) /* v2.07 */
+
+#define VSPLTB VX4(524)
+#define VSPLTH VX4(588)
+#define VSPLTW VX4(652)
+#define VSPLTISB VX4(780)
+#define VSPLTISH VX4(844)
+#define VSPLTISW VX4(908)
+
+#define VSLDOI VX4(44)
+
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
+#define XXSPLTIB (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
+
+#define MFVSRD (XO31(51) | 1) /* v2.07, force sx=1 */
+#define MFVSRWZ (XO31(115) | 1) /* v2.07, force sx=1 */
+#define MTVSRD (XO31(179) | 1) /* v2.07, force tx=1 */
+#define MTVSRWZ (XO31(243) | 1) /* v2.07, force tx=1 */
+#define MTVSRDD (XO31(435) | 1) /* v3.00, force tx=1 */
+#define MTVSRWS (XO31(403) | 1) /* v3.00, force tx=1 */
+
+#define RT(r) ((r)<<21)
+#define RS(r) ((r)<<21)
+#define RA(r) ((r)<<16)
+#define RB(r) ((r)<<11)
+#define TO(t) ((t)<<21)
+#define SH(s) ((s)<<11)
+#define MB(b) ((b)<<6)
+#define ME(e) ((e)<<1)
+#define BO(o) ((o)<<21)
+#define MB64(b) ((b)<<5)
+#define FXM(b) (1 << (19 - (b)))
+
+#define VRT(r) (((r) & 31) << 21)
+#define VRA(r) (((r) & 31) << 16)
+#define VRB(r) (((r) & 31) << 11)
+#define VRC(r) (((r) & 31) << 6)
+
+#define LK 1
+
+#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
+#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
+#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
+#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
+
+#define BF(n) ((n)<<23)
+#define BI(n, c) (((c)+((n)*4))<<16)
+#define BT(n, c) (((c)+((n)*4))<<21)
+#define BA(n, c) (((c)+((n)*4))<<16)
+#define BB(n, c) (((c)+((n)*4))<<11)
+#define BC_(n, c) (((c)+((n)*4))<<6)
+
+#define BO_COND_TRUE BO(12)
+#define BO_COND_FALSE BO( 4)
+#define BO_ALWAYS BO(20)
+
+enum {
+ CR_LT,
+ CR_GT,
+ CR_EQ,
+ CR_SO
+};
+
+static const uint32_t tcg_to_bc[] = {
+ [TCG_COND_EQ] = BC | BI(7, CR_EQ) | BO_COND_TRUE,
+ [TCG_COND_NE] = BC | BI(7, CR_EQ) | BO_COND_FALSE,
+ [TCG_COND_LT] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GE] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LE] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GT] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+ [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
+ [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
+ [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
+ [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
+};
+
+/* The low bit here is set if the RA and RB fields must be inverted. */
+static const uint32_t tcg_to_isel[] = {
+ [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ),
+ [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1,
+ [TCG_COND_LT] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GT] = ISEL | BC_(7, CR_GT),
+ [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
+};
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ const tcg_insn_unit *target;
+ int16_t lo;
+ int32_t hi;
+
+ value += addend;
+ target = (const tcg_insn_unit *)value;
+
+ switch (type) {
+ case R_PPC_REL14:
+ return reloc_pc14(code_ptr, target);
+ case R_PPC_REL24:
+ return reloc_pc24(code_ptr, target);
+ case R_PPC_ADDR16:
+ /*
+ * We are (slightly) abusing this relocation type. In particular,
+ * assert that the low 2 bits are zero, and do not modify them.
+ * That way we can use this with LD et al that have opcode bits
+ * in the low 2 bits of the insn.
+ */
+ if ((value & 3) || value != (int16_t)value) {
+ return false;
+ }
+ *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
+ break;
+ case R_PPC_ADDR32:
+ /*
+ * We are abusing this relocation type. Again, this points to
+ * a pair of insns, lis + load. This is an absolute address
+ * relocation for PPC32 so the lis cannot be removed.
+ */
+ lo = value;
+ hi = value - lo;
+ if (hi + lo != value) {
+ return false;
+ }
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+ TCGReg base, tcg_target_long offset);
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret == arg) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I64:
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ /* fallthru */
+ case TCG_TYPE_I32:
+ if (ret < TCG_REG_V0) {
+ if (arg < TCG_REG_V0) {
+ tcg_out32(s, OR | SAB(arg, ret, arg));
+ break;
+ } else if (have_isa_2_07) {
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
+ | VRT(arg) | RA(ret));
+ break;
+ } else {
+ /* Altivec does not support vector->integer moves. */
+ return false;
+ }
+ } else if (arg < TCG_REG_V0) {
+ if (have_isa_2_07) {
+ tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
+ | VRT(ret) | RA(arg));
+ break;
+ } else {
+ /* Altivec does not support integer->vector moves. */
+ return false;
+ }
+ }
+ /* fallthru */
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
+ tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb)
+{
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
+ mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
+ tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
+}
+
+static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb, int me)
+{
+ tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
+}
+
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out32(s, EXTSB | RA(dst) | RS(src));
+}
+
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out32(s, EXTSH | RA(dst) | RS(src));
+}
+
+static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
+}
+
+static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out32(s, EXTSW | RA(dst) | RS(src));
+}
+
+static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 0, 32);
+}
+
+static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
+}
+
+static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
+}
+
+static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
+}
+
+static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
+}
+
+static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
+}
+
+static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
+}
+
+static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
+{
+ TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
+
+ if (have_isa_3_10) {
+ tcg_out32(s, BRH | RA(dst) | RS(src));
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_ext16s(s, dst, dst);
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ tcg_out_ext16u(s, dst, dst);
+ }
+ return;
+ }
+
+ /*
+ * In the following,
+ * dep(a, b, m) -> (a & ~m) | (b & m)
+ *
+ * Begin with: src = xxxxabcd
+ */
+ /* tmp = rol32(src, 24) & 0x000000ff = 0000000c */
+ tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
+ /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00) = 000000dc */
+ tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
+
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_ext16s(s, dst, tmp);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
+ }
+}
+
+static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
+{
+ TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
+
+ if (have_isa_3_10) {
+ tcg_out32(s, BRW | RA(dst) | RS(src));
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_ext32s(s, dst, dst);
+ } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ tcg_out_ext32u(s, dst, dst);
+ }
+ return;
+ }
+
+ /*
+ * Stolen from gcc's builtin_bswap32.
+ * In the following,
+ * dep(a, b, m) -> (a & ~m) | (b & m)
+ *
+ * Begin with: src = xxxxabcd
+ */
+ /* tmp = rol32(src, 8) & 0xffffffff = 0000bcda */
+ tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
+ /* tmp = dep(tmp, rol32(src, 24), 0xff000000) = 0000dcda */
+ tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
+ /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00) = 0000dcba */
+ tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
+
+ if (flags & TCG_BSWAP_OS) {
+ tcg_out_ext32s(s, dst, tmp);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
+ }
+}
+
+static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
+ TCGReg t1 = dst == src ? dst : TCG_REG_R0;
+
+ if (have_isa_3_10) {
+ tcg_out32(s, BRD | RA(dst) | RS(src));
+ return;
+ }
+
+ /*
+ * In the following,
+ * dep(a, b, m) -> (a & ~m) | (b & m)
+ *
+ * Begin with: src = abcdefgh
+ */
+ /* t0 = rol32(src, 8) & 0xffffffff = 0000fghe */
+ tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
+ /* t0 = dep(t0, rol32(src, 24), 0xff000000) = 0000hghe */
+ tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
+ /* t0 = dep(t0, rol32(src, 24), 0x0000ff00) = 0000hgfe */
+ tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
+
+ /* t0 = rol64(t0, 32) = hgfe0000 */
+ tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
+ /* t1 = rol64(src, 32) = efghabcd */
+ tcg_out_rld(s, RLDICL, t1, src, 32, 0);
+
+ /* t0 = dep(t0, rol32(t1, 24), 0xffffffff) = hgfebcda */
+ tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
+ /* t0 = dep(t0, rol32(t1, 24), 0xff000000) = hgfedcda */
+ tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
+ /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00) = hgfedcba */
+ tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
+
+ tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
+}
+
+/* Emit a move into ret of arg, if it can be done in one insn. */
+static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
+{
+ if (arg == (int16_t)arg) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ return true;
+ }
+ if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
+ tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg, bool in_prologue)
+{
+ intptr_t tb_diff;
+ tcg_target_long tmp;
+ int shift;
+
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ arg = (int32_t)arg;
+ }
+
+ /* Load 16-bit immediates with one insn. */
+ if (tcg_out_movi_one(s, ret, arg)) {
+ return;
+ }
+
+ /* Load addresses within the TB with one insn. */
+ tb_diff = tcg_tbrel_diff(s, (void *)arg);
+ if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
+ tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
+ return;
+ }
+
+ /* Load 32-bit immediates with two insns. Note that we've already
+ eliminated bare ADDIS, so we know both insns are required. */
+ if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
+ tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ return;
+ }
+ if (arg == (uint32_t)arg && !(arg & 0x8000)) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ return;
+ }
+
+ /* Load masked 16-bit value. */
+ if (arg > 0 && (arg & 0x8000)) {
+ tmp = arg | 0x7fff;
+ if ((tmp & (tmp + 1)) == 0) {
+ int mb = clz64(tmp + 1) + 1;
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
+ return;
+ }
+ }
+
+ /* Load common masks with 2 insns. */
+ shift = ctz64(arg);
+ tmp = arg >> shift;
+ if (tmp == (int16_t)tmp) {
+ tcg_out32(s, ADDI | TAI(ret, 0, tmp));
+ tcg_out_shli64(s, ret, ret, shift);
+ return;
+ }
+ shift = clz64(arg);
+ if (tcg_out_movi_one(s, ret, arg << shift)) {
+ tcg_out_shri64(s, ret, ret, shift);
+ return;
+ }
+
+ /* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
+ if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
+ tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
+ return;
+ }
+
+ /* Use the constant pool, if possible. */
+ if (!in_prologue && USE_REG_TB) {
+ new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
+ tcg_tbrel_diff(s, NULL));
+ tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
+ return;
+ }
+
+ tmp = arg >> 31 >> 1;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
+ if (tmp) {
+ tcg_out_shli64(s, ret, ret, 32);
+ }
+ if (arg & 0xffff0000) {
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ }
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ }
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg ret, int64_t val)
+{
+ uint32_t load_insn;
+ int rel, low;
+ intptr_t add;
+
+ switch (vece) {
+ case MO_8:
+ low = (int8_t)val;
+ if (low >= -16 && low < 16) {
+ tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
+ return;
+ }
+ if (have_isa_3_00) {
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
+ return;
+ }
+ break;
+
+ case MO_16:
+ low = (int16_t)val;
+ if (low >= -16 && low < 16) {
+ tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
+ return;
+ }
+ break;
+
+ case MO_32:
+ low = (int32_t)val;
+ if (low >= -16 && low < 16) {
+ tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
+ return;
+ }
+ break;
+ }
+
+ /*
+ * Otherwise we must load the value from the constant pool.
+ */
+ if (USE_REG_TB) {
+ rel = R_PPC_ADDR16;
+ add = tcg_tbrel_diff(s, NULL);
+ } else {
+ rel = R_PPC_ADDR32;
+ add = 0;
+ }
+
+ if (have_vsx) {
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_label(s, val, rel, s->code_ptr, add);
+ } else {
+ new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
+ }
+ } else {
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
+ } else {
+ new_pool_l4(s, rel, s->code_ptr, add,
+ val >> 32, val, val >> 32, val);
+ }
+ }
+
+ if (USE_REG_TB) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
+ load_insn |= RA(TCG_REG_TB);
+ } else {
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
+ }
+ tcg_out32(s, load_insn);
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ tcg_debug_assert(ret < TCG_REG_V0);
+ tcg_out_movi_int(s, type, ret, arg, false);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool mask_operand(uint32_t c, int *mb, int *me)
+{
+ uint32_t lsb, test;
+
+ /* Accept a bit pattern like:
+ 0....01....1
+ 1....10....0
+ 0..01..10..0
+ Keep track of the transitions. */
+ if (c == 0 || c == -1) {
+ return false;
+ }
+ test = c;
+ lsb = test & -test;
+ test += lsb;
+ if (test & (test - 1)) {
+ return false;
+ }
+
+ *me = clz32(lsb);
+ *mb = test ? clz32(test & -test) + 1 : 0;
+ return true;
+}
+
+static bool mask64_operand(uint64_t c, int *mb, int *me)
+{
+ uint64_t lsb;
+
+ if (c == 0) {
+ return false;
+ }
+
+ lsb = c & -c;
+ /* Accept 1..10..0. */
+ if (c == -lsb) {
+ *mb = 0;
+ *me = clz64(lsb);
+ return true;
+ }
+ /* Accept 0..01..1. */
+ if (lsb == 1 && (c & (c + 1)) == 0) {
+ *mb = clz64(c + 1) + 1;
+ *me = 63;
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ int mb, me;
+
+ if (mask_operand(c, &mb, &me)) {
+ tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+ } else if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
+ }
+}
+
+static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
+{
+ int mb, me;
+
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ if (mask64_operand(c, &mb, &me)) {
+ if (mb == 0) {
+ tcg_out_rld(s, RLDICR, dst, src, 0, me);
+ } else {
+ tcg_out_rld(s, RLDICL, dst, src, 0, mb);
+ }
+ } else if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
+ tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
+ }
+}
+
+static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
+ int op_lo, int op_hi)
+{
+ if (c >> 16) {
+ tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
+ src = dst;
+ }
+ if (c & 0xffff) {
+ tcg_out32(s, op_lo | SAI(src, dst, c));
+ src = dst;
+ }
+}
+
+static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, ORI, ORIS);
+}
+
+static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, XORI, XORIS);
+}
+
+static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
+{
+ ptrdiff_t disp = tcg_pcrel_diff(s, target);
+ if (in_range_b(disp)) {
+ tcg_out32(s, B | (disp & 0x3fffffc) | mask);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS | mask);
+ }
+}
+
+static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
+ TCGReg base, tcg_target_long offset)
+{
+ tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
+ bool is_int_store = false;
+ TCGReg rs = TCG_REG_TMP1;
+
+ switch (opi) {
+ case LD: case LWA:
+ align = 3;
+ /* FALLTHRU */
+ default:
+ if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
+ rs = rt;
+ break;
+ }
+ break;
+ case LXSD:
+ case STXSD:
+ align = 3;
+ break;
+ case LXV:
+ case STXV:
+ align = 15;
+ break;
+ case STD:
+ align = 3;
+ /* FALLTHRU */
+ case STB: case STH: case STW:
+ is_int_store = true;
+ break;
+ }
+
+ /* For unaligned, or very large offsets, use the indexed form. */
+ if (offset & align || offset != (int32_t)offset || opi == 0) {
+ if (rs == base) {
+ rs = TCG_REG_R0;
+ }
+ tcg_debug_assert(!is_int_store || rs != rt);
+ tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
+ tcg_out32(s, opx | TAB(rt & 31, base, rs));
+ return;
+ }
+
+ l0 = (int16_t)offset;
+ offset = (offset - l0) >> 16;
+ l1 = (int16_t)offset;
+
+ if (l1 < 0 && orig >= 0) {
+ extra = 0x4000;
+ l1 = (int16_t)(offset - 0x4000);
+ }
+ if (l1) {
+ tcg_out32(s, ADDIS | TAI(rs, base, l1));
+ base = rs;
+ }
+ if (extra) {
+ tcg_out32(s, ADDIS | TAI(rs, base, extra));
+ base = rs;
+ }
+ if (opi != ADDI || base != rt || l0 != 0) {
+ tcg_out32(s, opi | TAI(rt & 31, base, l0));
+ }
+}
+
+static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
+ TCGReg va, TCGReg vb, int shb)
+{
+ tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg base, intptr_t offset)
+{
+ int shift;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (ret < TCG_REG_V0) {
+ tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
+ break;
+ }
+ if (have_isa_2_07 && have_vsx) {
+ tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
+ break;
+ }
+ tcg_debug_assert((offset & 3) == 0);
+ tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
+ shift = (offset - 4) & 0xc;
+ if (shift) {
+ tcg_out_vsldoi(s, ret, ret, ret, shift);
+ }
+ break;
+ case TCG_TYPE_I64:
+ if (ret < TCG_REG_V0) {
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ tcg_out_mem_long(s, LD, LDX, ret, base, offset);
+ break;
+ }
+ /* fallthru */
+ case TCG_TYPE_V64:
+ tcg_debug_assert(ret >= TCG_REG_V0);
+ if (have_vsx) {
+ tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
+ ret, base, offset);
+ break;
+ }
+ tcg_debug_assert((offset & 7) == 0);
+ tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
+ if (offset & 8) {
+ tcg_out_vsldoi(s, ret, ret, ret, 8);
+ }
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(ret >= TCG_REG_V0);
+ tcg_debug_assert((offset & 15) == 0);
+ tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
+ LVX, ret, base, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg base, intptr_t offset)
+{
+ int shift;
+
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (arg < TCG_REG_V0) {
+ tcg_out_mem_long(s, STW, STWX, arg, base, offset);
+ break;
+ }
+ if (have_isa_2_07 && have_vsx) {
+ tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
+ break;
+ }
+ assert((offset & 3) == 0);
+ tcg_debug_assert((offset & 3) == 0);
+ shift = (offset - 4) & 0xc;
+ if (shift) {
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
+ arg = TCG_VEC_TMP1;
+ }
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
+ break;
+ case TCG_TYPE_I64:
+ if (arg < TCG_REG_V0) {
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
+ tcg_out_mem_long(s, STD, STDX, arg, base, offset);
+ break;
+ }
+ /* fallthru */
+ case TCG_TYPE_V64:
+ tcg_debug_assert(arg >= TCG_REG_V0);
+ if (have_vsx) {
+ tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
+ STXSDX, arg, base, offset);
+ break;
+ }
+ tcg_debug_assert((offset & 7) == 0);
+ if (offset & 8) {
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
+ arg = TCG_VEC_TMP1;
+ }
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
+ tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
+ break;
+ case TCG_TYPE_V128:
+ tcg_debug_assert(arg >= TCG_REG_V0);
+ tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
+ STVX, arg, base, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ return false;
+}
+
+static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int cr, TCGType type)
+{
+ int imm;
+ uint32_t op;
+
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+
+ /* Simplify the comparisons below wrt CMPI. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (int32_t)arg2;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ if (const_arg2) {
+ if ((int16_t) arg2 == arg2) {
+ op = CMPI;
+ imm = 1;
+ break;
+ } else if ((uint16_t) arg2 == arg2) {
+ op = CMPLI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMPL;
+ imm = 0;
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ if (const_arg2) {
+ if ((int16_t) arg2 == arg2) {
+ op = CMPI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMP;
+ imm = 0;
+ break;
+
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ if (const_arg2) {
+ if ((uint16_t) arg2 == arg2) {
+ op = CMPLI;
+ imm = 1;
+ break;
+ }
+ }
+ op = CMPL;
+ imm = 0;
+ break;
+
+ default:
+ tcg_abort();
+ }
+ op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
+
+ if (imm) {
+ tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
+ } else {
+ if (const_arg2) {
+ tcg_out_movi(s, type, TCG_REG_R0, arg2);
+ arg2 = TCG_REG_R0;
+ }
+ tcg_out32(s, op | RA(arg1) | RB(arg2));
+ }
+}
+
+static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
+ TCGReg dst, TCGReg src)
+{
+ if (type == TCG_TYPE_I32) {
+ tcg_out32(s, CNTLZW | RS(src) | RA(dst));
+ tcg_out_shri32(s, dst, dst, 5);
+ } else {
+ tcg_out32(s, CNTLZD | RS(src) | RA(dst));
+ tcg_out_shri64(s, dst, dst, 6);
+ }
+}
+
+static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ /* X != 0 implies X + -1 generates a carry. Extra addition
+ trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
+ if (dst != src) {
+ tcg_out32(s, ADDIC | TAI(dst, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, dst, src));
+ } else {
+ tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
+ }
+}
+
+static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
+ bool const_arg2)
+{
+ if (const_arg2) {
+ if ((uint32_t)arg2 == arg2) {
+ tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
+ }
+ } else {
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
+ }
+ return TCG_REG_R0;
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg arg0, TCGArg arg1, TCGArg arg2,
+ int const_arg2)
+{
+ int crop, sh;
+
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
+
+ /* Ignore high bits of a potential constant arg2. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (uint32_t)arg2;
+ }
+
+ /* Handle common and trivial cases before handling anything else. */
+ if (arg2 == 0) {
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+ case TCG_COND_NE:
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
+ }
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
+ case TCG_COND_GE:
+ tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
+ arg1 = arg0;
+ /* FALLTHRU */
+ case TCG_COND_LT:
+ /* Extract the sign bit. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_shri32(s, arg0, arg1, 31);
+ } else {
+ tcg_out_shri64(s, arg0, arg1, 63);
+ }
+ return;
+ default:
+ break;
+ }
+ }
+
+ /* If we have ISEL, we can implement everything with 3 or 4 insns.
+ All other cases below are also at least 3 insns, so speed up the
+ code generator by not considering them and always using ISEL. */
+ if (have_isel) {
+ int isel, tab;
+
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+
+ isel = tcg_to_isel[cond];
+
+ tcg_out_movi(s, type, arg0, 1);
+ if (isel & 1) {
+ /* arg0 = (bc ? 0 : 1) */
+ tab = TAB(arg0, 0, arg0);
+ isel &= ~1;
+ } else {
+ /* arg0 = (bc ? 1 : 0) */
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
+ tab = TAB(arg0, arg0, TCG_REG_R0);
+ }
+ tcg_out32(s, isel | tab);
+ return;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+
+ case TCG_COND_NE:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ /* Discard the high bits only once, rather than both inputs. */
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
+ }
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
+
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ sh = 30;
+ crop = 0;
+ goto crtest;
+
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ sh = 29;
+ crop = 0;
+ goto crtest;
+
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ sh = 31;
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
+ goto crtest;
+
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ sh = 31;
+ crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
+ crtest:
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ if (crop) {
+ tcg_out32(s, crop);
+ }
+ tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+ tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
+{
+ if (l->has_value) {
+ bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
+ }
+ tcg_out32(s, bc);
+}
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ TCGLabel *l, TCGType type)
+{
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ tcg_out_bc(s, tcg_to_bc[cond], l);
+}
+
+static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
+ TCGArg v2, bool const_c2)
+{
+ /* If for some reason both inputs are zero, don't produce bad code. */
+ if (v1 == 0 && v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ return;
+ }
+
+ tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
+
+ if (have_isel) {
+ int isel = tcg_to_isel[cond];
+
+ /* Swap the V operands if the operation indicates inversion. */
+ if (isel & 1) {
+ int t = v1;
+ v1 = v2;
+ v2 = t;
+ isel &= ~1;
+ }
+ /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
+ if (v2 == 0) {
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
+ }
+ tcg_out32(s, isel | TAB(dest, v1, v2));
+ } else {
+ if (dest == v2) {
+ cond = tcg_invert_cond(cond);
+ v2 = v1;
+ } else if (dest != v1) {
+ if (v1 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v1);
+ }
+ }
+ /* Branch forward over one insn */
+ tcg_out32(s, tcg_to_bc[cond] | 8);
+ if (v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v2);
+ }
+ }
+}
+
+static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
+ TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
+{
+ if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ } else {
+ tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
+ /* Note that the only other valid constant for a2 is 0. */
+ if (have_isel) {
+ tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
+ tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
+ } else if (!const_a2 && a0 == a2) {
+ tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ } else {
+ tcg_out32(s, opc | RA(a0) | RS(a1));
+ tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
+ if (const_a2) {
+ tcg_out_movi(s, type, a0, 0);
+ } else {
+ tcg_out_mov(s, type, a0, a2);
+ }
+ }
+ }
+}
+
+static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ static const struct { uint8_t bit1, bit2; } bits[] = {
+ [TCG_COND_LT ] = { CR_LT, CR_LT },
+ [TCG_COND_LE ] = { CR_LT, CR_GT },
+ [TCG_COND_GT ] = { CR_GT, CR_GT },
+ [TCG_COND_GE ] = { CR_GT, CR_LT },
+ [TCG_COND_LTU] = { CR_LT, CR_LT },
+ [TCG_COND_LEU] = { CR_LT, CR_GT },
+ [TCG_COND_GTU] = { CR_GT, CR_GT },
+ [TCG_COND_GEU] = { CR_GT, CR_LT },
+ };
+
+ TCGCond cond = args[4], cond2;
+ TCGArg al, ah, bl, bh;
+ int blconst, bhconst;
+ int op, bit1, bit2;
+
+ al = args[0];
+ ah = args[1];
+ bl = args[2];
+ bh = args[3];
+ blconst = const_args[2];
+ bhconst = const_args[3];
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ op = CRAND;
+ goto do_equality;
+ case TCG_COND_NE:
+ op = CRNAND;
+ do_equality:
+ tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
+ tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
+ tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ break;
+
+ case TCG_COND_LT:
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ case TCG_COND_GE:
+ case TCG_COND_LTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ bit1 = bits[cond].bit1;
+ bit2 = bits[cond].bit2;
+ op = (bit1 != bit2 ? CRANDC : CRAND);
+ cond2 = tcg_unsigned_cond(cond);
+
+ tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
+ tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
+ tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
+ tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ tcg_out_cmp2(s, args + 1, const_args + 1);
+ tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+ tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
+}
+
+static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ tcg_out_cmp2(s, args, const_args);
+ tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ uint32_t insn = HWSYNC;
+ a0 &= TCG_MO_ALL;
+ if (a0 == TCG_MO_LD_LD) {
+ insn = LWSYNC;
+ } else if (a0 == TCG_MO_ST_ST) {
+ insn = EIEIO;
+ }
+ tcg_out32(s, insn);
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_insn_unit i1, i2;
+ intptr_t tb_diff = addr - tc_ptr;
+ intptr_t br_diff = addr - (jmp_rx + 4);
+ uint64_t pair;
+
+ /* This does not exercise the range of the branch, but we do
+ still need to be able to load the new value of TCG_REG_TB.
+ But this does still happen quite often. */
+ if (tb_diff == (int16_t)tb_diff) {
+ i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
+ i2 = B | (br_diff & 0x3fffffc);
+ } else {
+ intptr_t lo = (int16_t)tb_diff;
+ intptr_t hi = (int32_t)(tb_diff - lo);
+ assert(tb_diff == hi + lo);
+ i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
+ i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
+ }
+#ifdef HOST_WORDS_BIGENDIAN
+ pair = (uint64_t)i1 << 32 | i2;
+#else
+ pair = (uint64_t)i2 << 32 | i1;
+#endif
+
+ /* As per the enclosing if, this is ppc64. Avoid the _Static_assert
+ within qatomic_set that would fail to build a ppc32 host. */
+ qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
+ } else {
+ intptr_t diff = addr - jmp_rx;
+ tcg_debug_assert(in_range_b(diff));
+ qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
+ }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
+{
+#ifdef _CALL_AIX
+ /* Look through the descriptor. If the branch is in range, and we
+ don't have to spend too much effort on building the toc. */
+ const void *tgt = ((const void * const *)target)[0];
+ uintptr_t toc = ((const uintptr_t *)target)[1];
+ intptr_t diff = tcg_pcrel_diff(s, tgt);
+
+ if (in_range_b(diff) && toc == (uint32_t)toc) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
+ tcg_out_b(s, LK, tgt);
+ } else {
+ /* Fold the low bits of the constant into the addresses below. */
+ intptr_t arg = (intptr_t)target;
+ int ofs = (int16_t)arg;
+
+ if (ofs + 8 < 0x8000) {
+ arg -= ofs;
+ } else {
+ ofs = 0;
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
+ tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
+ tcg_out32(s, BCCTR | BO_ALWAYS | LK);
+ }
+#elif defined(_CALL_ELF) && _CALL_ELF == 2
+ intptr_t diff;
+
+ /* In the ELFv2 ABI, we have to set up r12 to contain the destination
+ address, which the callee uses to compute its TOC address. */
+ /* FIXME: when the branch is in range, we could avoid r12 load if we
+ knew that the destination uses the same TOC, and what its local
+ entry point offset is. */
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
+
+ diff = tcg_pcrel_diff(s, target);
+ if (in_range_b(diff)) {
+ tcg_out_b(s, LK, target);
+ } else {
+ tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS | LK);
+ }
+#else
+ tcg_out_b(s, LK, target);
+#endif
+}
+
+static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
+ [MO_UB] = LBZX,
+ [MO_UW] = LHZX,
+ [MO_UL] = LWZX,
+ [MO_Q] = LDX,
+ [MO_SW] = LHAX,
+ [MO_SL] = LWAX,
+ [MO_BSWAP | MO_UB] = LBZX,
+ [MO_BSWAP | MO_UW] = LHBRX,
+ [MO_BSWAP | MO_UL] = LWBRX,
+ [MO_BSWAP | MO_Q] = LDBRX,
+};
+
+static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
+ [MO_UB] = STBX,
+ [MO_UW] = STHX,
+ [MO_UL] = STWX,
+ [MO_Q] = STDX,
+ [MO_BSWAP | MO_UB] = STBX,
+ [MO_BSWAP | MO_UW] = STHBRX,
+ [MO_BSWAP | MO_UL] = STWBRX,
+ [MO_BSWAP | MO_Q] = STDBRX,
+};
+
+static const uint32_t qemu_exts_opc[4] = {
+ EXTSB, EXTSH, EXTSW, 0
+};
+
+#if defined (CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+ * int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+
+/* We expect to use a 16-bit negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
+
+/* Perform the TLB load and compare. Places the result of the comparison
+ in CR7, loads the addend of the TLB into R3, and returns the register
+ containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
+ TCGReg addrlo, TCGReg addrhi,
+ int mem_index, bool is_read)
+{
+ int cmp_off
+ = (is_read
+ ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
+
+ /* Extract the page index, shifted into place for tlb index. */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ } else {
+ tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ }
+ tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
+
+ /* Load the TLB comparator. */
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
+ ? LWZUX : LDUX);
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
+ } else {
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+ }
+ }
+
+ /* Load the TLB addend for use on the fast path. Do this asap
+ to minimize any load use delay. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
+ offsetof(CPUTLBEntry, addend));
+
+ /* Clear the non-page, non-alignment bits from the address */
+ if (TCG_TARGET_REG_BITS == 32) {
+ /* We don't support unaligned accesses on 32-bits.
+ * Preserve the bottom bits and thus trigger a comparison
+ * failure on unaligned accesses.
+ */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else {
+ TCGReg t = addrlo;
+
+ /* If the access is unaligned, we need to make sure we fail if we
+ * cross a page boundary. The trick is to add the access size-1
+ * to the address before masking the low bits. That will make the
+ * address overflow to the next page if we cross a page boundary,
+ * which will then force a mismatch of the TLB compare.
+ */
+ if (a_bits < s_bits) {
+ unsigned a_mask = (1 << a_bits) - 1;
+ unsigned s_mask = (1 << s_bits) - 1;
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
+ t = TCG_REG_R0;
+ }
+
+ /* Mask the address for the requested alignment. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ /* Zero-extend the address for use in the final address. */
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
+ addrlo = TCG_REG_R4;
+ } else if (a_bits == 0) {
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+ }
+ }
+
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_I32);
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ } else {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_TL);
+ }
+
+ return addrlo;
+}
+
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+ TCGReg datalo_reg, TCGReg datahi_reg,
+ TCGReg addrlo_reg, TCGReg addrhi_reg,
+ tcg_insn_unit *raddr, tcg_insn_unit *lptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = datalo_reg;
+ label->datahi_reg = datahi_reg;
+ label->addrlo_reg = addrlo_reg;
+ label->addrhi_reg = addrhi_reg;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = lptr;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+ TCGReg hi, lo, arg = TCG_REG_R3;
+
+ if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
+
+ lo = lb->addrlo_reg;
+ hi = lb->addrhi_reg;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ } else {
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
+ tcg_out32(s, MFSPR | RT(arg) | LR);
+
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ lo = lb->datalo_reg;
+ hi = lb->datahi_reg;
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
+ tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
+ } else if (opc & MO_SIGN) {
+ uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
+ tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
+ }
+
+ tcg_out_b(s, 0, lb->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+ MemOp s_bits = opc & MO_SIZE;
+ TCGReg hi, lo, arg = TCG_REG_R3;
+
+ if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
+
+ lo = lb->addrlo_reg;
+ hi = lb->addrhi_reg;
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ } else {
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
+ }
+
+ lo = lb->datalo_reg;
+ hi = lb->datahi_reg;
+ if (TCG_TARGET_REG_BITS == 32) {
+ switch (s_bits) {
+ case MO_64:
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ arg |= 1;
+#endif
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
+ /* FALLTHRU */
+ case MO_32:
+ tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
+ break;
+ default:
+ tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
+ break;
+ }
+ } else {
+ if (s_bits == MO_64) {
+ tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
+ } else {
+ tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
+ }
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
+ tcg_out32(s, MFSPR | RT(arg) | LR);
+
+ tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ tcg_out_b(s, 0, lb->raddr);
+ return true;
+}
+#endif /* SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg datalo, datahi, addrlo, rbase;
+ TCGReg addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc, s_bits;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+ s_bits = opc & MO_SIZE;
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+ rbase = TCG_REG_R3;
+#else /* !CONFIG_SOFTMMU */
+ rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
+ addrlo = TCG_REG_TMP1;
+ }
+#endif
+
+ if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (opc & MO_BSWAP) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
+ tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
+ } else if (addrlo == datahi) {
+ tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
+ tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
+ } else {
+ tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
+ tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
+ }
+ } else {
+ uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
+ if (!have_isa_2_06 && insn == LDBRX) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+ tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
+ tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
+ } else if (insn) {
+ tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
+ } else {
+ insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
+ tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
+ insn = qemu_exts_opc[s_bits];
+ tcg_out32(s, insn | RA(datalo) | RS(datalo));
+ }
+ }
+
+#ifdef CONFIG_SOFTMMU
+ add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg datalo, datahi, addrlo, rbase;
+ TCGReg addrhi __attribute__((unused));
+ MemOpIdx oi;
+ MemOp opc, s_bits;
+#ifdef CONFIG_SOFTMMU
+ int mem_index;
+ tcg_insn_unit *label_ptr;
+#endif
+
+ datalo = *args++;
+ datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addrlo = *args++;
+ addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+ s_bits = opc & MO_SIZE;
+
+#ifdef CONFIG_SOFTMMU
+ mem_index = get_mmuidx(oi);
+ addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ label_ptr = s->code_ptr;
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+ rbase = TCG_REG_R3;
+#else /* !CONFIG_SOFTMMU */
+ rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
+ addrlo = TCG_REG_TMP1;
+ }
+#endif
+
+ if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (opc & MO_BSWAP) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
+ tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
+ } else if (rbase != 0) {
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
+ tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
+ tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
+ } else {
+ tcg_out32(s, STW | TAI(datahi, addrlo, 0));
+ tcg_out32(s, STW | TAI(datalo, addrlo, 4));
+ }
+ } else {
+ uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
+ if (!have_isa_2_06 && insn == STDBRX) {
+ tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
+ tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
+ tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
+ tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
+ } else {
+ tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
+ }
+ }
+
+#ifdef CONFIG_SOFTMMU
+ add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
+ s->code_ptr, label_ptr);
+#endif
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = NOP;
+ }
+}
+
+/* Parameters for function call generation, used in tcg.c. */
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_EXTEND_ARGS 1
+
+#ifdef _CALL_AIX
+# define LINK_AREA_SIZE (6 * SZR)
+# define LR_OFFSET (1 * SZR)
+# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR)
+#elif defined(_CALL_DARWIN)
+# define LINK_AREA_SIZE (6 * SZR)
+# define LR_OFFSET (2 * SZR)
+#elif TCG_TARGET_REG_BITS == 64
+# if defined(_CALL_ELF) && _CALL_ELF == 2
+# define LINK_AREA_SIZE (4 * SZR)
+# define LR_OFFSET (1 * SZR)
+# endif
+#else /* TCG_TARGET_REG_BITS == 32 */
+# if defined(_CALL_SYSV)
+# define LINK_AREA_SIZE (2 * SZR)
+# define LR_OFFSET (1 * SZR)
+# endif
+#endif
+#ifndef LR_OFFSET
+# error "Unhandled abi"
+#endif
+#ifndef TCG_TARGET_CALL_STACK_OFFSET
+# define TCG_TARGET_CALL_STACK_OFFSET LINK_AREA_SIZE
+#endif
+
+#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
+
+#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_SIZE \
+ + REG_SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+
+#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i;
+
+#ifdef _CALL_AIX
+ const void **desc = (const void **)s->code_ptr;
+ desc[0] = tcg_splitwx_to_rx(desc + 2); /* entry point */
+ desc[1] = 0; /* environment pointer */
+ s->code_ptr = (void *)(desc + 2); /* skip over descriptor */
+#endif
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
+ CPU_TEMP_BUF_SIZE);
+
+ /* Prologue */
+ tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
+ tcg_out32(s, (SZR == 8 ? STDU : STWU)
+ | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
+
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_R1, REG_SAVE_BOT + i * SZR);
+ }
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
+
+#ifndef CONFIG_SOFTMMU
+ if (guest_base) {
+ tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
+ }
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+
+ /* Epilogue */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_R1, REG_SAVE_BOT + i * SZR);
+ }
+ tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
+ tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
+ tcg_out32(s, BCLR | BO_ALWAYS);
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0, a1, a2;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
+ tcg_out_b(s, 0, tcg_code_gen_epilogue);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_insn_offset) {
+ /* Direct jump. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ /* Ensure the next insns are 8-byte aligned. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
+ s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
+ tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
+ } else {
+ s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
+ tcg_out32(s, B);
+ s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
+ break;
+ }
+ } else {
+ /* Indirect jump. */
+ tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
+ (intptr_t)(s->tb_jmp_insn_offset + args[0]));
+ }
+ tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ set_jmp_reset_offset(s, args[0]);
+ if (USE_REG_TB) {
+ /* For the unlinked case, need to reset TCG_REG_TB. */
+ tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
+ -tcg_current_code_size(s));
+ }
+ break;
+ case INDEX_op_goto_ptr:
+ tcg_out32(s, MTSPR | RS(args[0]) | CTR);
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
+ }
+ tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ break;
+ case INDEX_op_br:
+ {
+ TCGLabel *l = arg_label(args[0]);
+ uint32_t insn = B;
+
+ if (l->has_value) {
+ insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
+ l->u.value_ptr);
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
+ }
+ tcg_out32(s, insn);
+ }
+ break;
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
+ tcg_out_ext8s(s, args[0], args[0]);
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_32:
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_32;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
+ break;
+
+ case INDEX_op_and_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, AND | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_and_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi64(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, AND | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_or_i64:
+ case INDEX_op_or_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_ori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, OR | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_xor_i64:
+ case INDEX_op_xor_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_xori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, XOR | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_andc_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi32(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_andc_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi64(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
+ break;
+ case INDEX_op_orc_i32:
+ if (const_args[2]) {
+ tcg_out_ori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_orc_i64:
+ tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_eqv_i32:
+ if (const_args[2]) {
+ tcg_out_xori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
+ case INDEX_op_eqv_i64:
+ tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
+ break;
+
+ case INDEX_op_clz_i32:
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i32:
+ tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctpop_i32:
+ tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
+ break;
+
+ case INDEX_op_clz_i64:
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctz_i64:
+ tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_ctpop_i64:
+ tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
+ break;
+
+ case INDEX_op_mul_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLW | TAB(a0, a1, a2));
+ }
+ break;
+
+ case INDEX_op_div_i32:
+ tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_divu_i32:
+ tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_shl_i32:
+ if (const_args[2]) {
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_shli32(s, args[0], args[1], args[2] & 31);
+ } else {
+ tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_shr_i32:
+ if (const_args[2]) {
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_shri32(s, args[0], args[1], args[2] & 31);
+ } else {
+ tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_sar_i32:
+ if (const_args[2]) {
+ tcg_out_sari32(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
+ } else {
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ | MB(0) | ME(31));
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
+ | MB(0) | ME(31));
+ }
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), TCG_TYPE_I32);
+ break;
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ arg_label(args[3]), TCG_TYPE_I64);
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args, const_args);
+ break;
+
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
+ break;
+
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
+ break;
+
+ case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_64:
+ tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_64;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
+ break;
+
+ case INDEX_op_shl_i64:
+ if (const_args[2]) {
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_shli64(s, args[0], args[1], args[2] & 63);
+ } else {
+ tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (const_args[2]) {
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_shri64(s, args[0], args[1], args[2] & 63);
+ } else {
+ tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (const_args[2]) {
+ tcg_out_sari64(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_rotl_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
+ } else {
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
+ }
+ break;
+ case INDEX_op_rotr_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
+ }
+ break;
+
+ case INDEX_op_mul_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLD | TAB(a0, a1, a2));
+ }
+ break;
+ case INDEX_op_div_i64:
+ tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_divu_i64:
+ tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, false);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, true);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ tcg_out_ext8s(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ tcg_out_ext16s(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ tcg_out_ext32s(s, args[0], args[1]);
+ break;
+ case INDEX_op_extu_i32_i64:
+ tcg_out_ext32u(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
+ const_args[2]);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
+ const_args[2]);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ tcg_out_bswap16(s, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, args[0], args[1], 0);
+ break;
+ case INDEX_op_bswap32_i64:
+ tcg_out_bswap32(s, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_bswap64(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_deposit_i32:
+ if (const_args[2]) {
+ uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi32(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ 32 - args[3] - args[4], 31 - args[3]);
+ }
+ break;
+ case INDEX_op_deposit_i64:
+ if (const_args[2]) {
+ uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi64(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
+ 64 - args[3] - args[4]);
+ }
+ break;
+
+ case INDEX_op_extract_i32:
+ tcg_out_rlw(s, RLWINM, args[0], args[1],
+ 32 - args[2], 32 - args[3], 31);
+ break;
+ case INDEX_op_extract_i64:
+ tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add2_i64:
+#else
+ case INDEX_op_add2_i32:
+#endif
+ /* Note that the CA bit is defined based on the word size of the
+ environment. So in 64-bit mode it's always carry-out of bit 63.
+ The fallback code using deposit works just as well for 32-bit. */
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[4]) {
+ tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
+ } else {
+ tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
+ }
+ if (const_args[5]) {
+ tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
+ } else {
+ tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_sub2_i64:
+#else
+ case INDEX_op_sub2_i32:
+#endif
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[2]) {
+ tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
+ } else {
+ tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
+ }
+ if (const_args[3]) {
+ tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
+ } else {
+ tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
+ }
+ break;
+
+ case INDEX_op_muluh_i32:
+ tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_mulsh_i32:
+ tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_muluh_i64:
+ tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
+ break;
+ case INDEX_op_mulsh_i64:
+ tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, args[0]);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_not_vec:
+ return 1;
+ case INDEX_op_orc_vec:
+ return have_isa_2_07;
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ return vece <= MO_32 || have_isa_2_07;
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ return vece <= MO_32;
+ case INDEX_op_cmp_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_rotli_vec:
+ return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
+ case INDEX_op_neg_vec:
+ return vece >= MO_32 && have_isa_3_00;
+ case INDEX_op_mul_vec:
+ switch (vece) {
+ case MO_8:
+ case MO_16:
+ return -1;
+ case MO_32:
+ return have_isa_2_07 ? 1 : -1;
+ case MO_64:
+ return have_isa_3_10;
+ }
+ return 0;
+ case INDEX_op_bitsel_vec:
+ return have_vsx;
+ case INDEX_op_rotrv_vec:
+ return -1;
+ default:
+ return 0;
+ }
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src)
+{
+ tcg_debug_assert(dst >= TCG_REG_V0);
+
+ /* Splat from integer reg allowed via constraints for v3.00. */
+ if (src < TCG_REG_V0) {
+ tcg_debug_assert(have_isa_3_00);
+ switch (vece) {
+ case MO_64:
+ tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
+ return true;
+ case MO_32:
+ tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
+ return true;
+ default:
+ /* Fail, so that we fall back on either dupm or mov+dup. */
+ return false;
+ }
+ }
+
+ /*
+ * Recall we use (or emulate) VSX integer loads, so the integer is
+ * right justified within the left (zero-index) double-word.
+ */
+ switch (vece) {
+ case MO_8:
+ tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
+ break;
+ case MO_16:
+ tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
+ break;
+ case MO_32:
+ tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
+ break;
+ case MO_64:
+ if (have_vsx) {
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
+ break;
+ }
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
+ tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg out, TCGReg base, intptr_t offset)
+{
+ int elt;
+
+ tcg_debug_assert(out >= TCG_REG_V0);
+ switch (vece) {
+ case MO_8:
+ if (have_isa_3_00) {
+ tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
+ } else {
+ tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
+ }
+ elt = extract32(offset, 0, 4);
+#ifndef HOST_WORDS_BIGENDIAN
+ elt ^= 15;
+#endif
+ tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
+ break;
+ case MO_16:
+ tcg_debug_assert((offset & 1) == 0);
+ if (have_isa_3_00) {
+ tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
+ } else {
+ tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
+ }
+ elt = extract32(offset, 1, 3);
+#ifndef HOST_WORDS_BIGENDIAN
+ elt ^= 7;
+#endif
+ tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
+ break;
+ case MO_32:
+ if (have_isa_3_00) {
+ tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
+ break;
+ }
+ tcg_debug_assert((offset & 3) == 0);
+ tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
+ elt = extract32(offset, 2, 2);
+#ifndef HOST_WORDS_BIGENDIAN
+ elt ^= 3;
+#endif
+ tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
+ break;
+ case MO_64:
+ if (have_vsx) {
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
+ break;
+ }
+ tcg_debug_assert((offset & 7) == 0);
+ tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
+ elt = extract32(offset, 3, 1);
+#ifndef HOST_WORDS_BIGENDIAN
+ elt = !elt;
+#endif
+ if (elt) {
+ tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
+ } else {
+ tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ static const uint32_t
+ add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
+ sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
+ mul_op[4] = { 0, 0, VMULUWM, VMULLD },
+ neg_op[4] = { 0, 0, VNEGW, VNEGD },
+ eq_op[4] = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
+ ne_op[4] = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
+ gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
+ gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
+ ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
+ usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
+ sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
+ ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
+ umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
+ smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
+ umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
+ smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
+ shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
+ shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
+ sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
+ mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
+ mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
+ muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
+ mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
+ pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
+ rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
+
+ TCGType type = vecl + TCG_TYPE_V64;
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
+ uint32_t insn;
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ return;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ return;
+
+ case INDEX_op_add_vec:
+ insn = add_op[vece];
+ break;
+ case INDEX_op_sub_vec:
+ insn = sub_op[vece];
+ break;
+ case INDEX_op_neg_vec:
+ insn = neg_op[vece];
+ a2 = a1;
+ a1 = 0;
+ break;
+ case INDEX_op_mul_vec:
+ insn = mul_op[vece];
+ break;
+ case INDEX_op_ssadd_vec:
+ insn = ssadd_op[vece];
+ break;
+ case INDEX_op_sssub_vec:
+ insn = sssub_op[vece];
+ break;
+ case INDEX_op_usadd_vec:
+ insn = usadd_op[vece];
+ break;
+ case INDEX_op_ussub_vec:
+ insn = ussub_op[vece];
+ break;
+ case INDEX_op_smin_vec:
+ insn = smin_op[vece];
+ break;
+ case INDEX_op_umin_vec:
+ insn = umin_op[vece];
+ break;
+ case INDEX_op_smax_vec:
+ insn = smax_op[vece];
+ break;
+ case INDEX_op_umax_vec:
+ insn = umax_op[vece];
+ break;
+ case INDEX_op_shlv_vec:
+ insn = shlv_op[vece];
+ break;
+ case INDEX_op_shrv_vec:
+ insn = shrv_op[vece];
+ break;
+ case INDEX_op_sarv_vec:
+ insn = sarv_op[vece];
+ break;
+ case INDEX_op_and_vec:
+ insn = VAND;
+ break;
+ case INDEX_op_or_vec:
+ insn = VOR;
+ break;
+ case INDEX_op_xor_vec:
+ insn = VXOR;
+ break;
+ case INDEX_op_andc_vec:
+ insn = VANDC;
+ break;
+ case INDEX_op_not_vec:
+ insn = VNOR;
+ a2 = a1;
+ break;
+ case INDEX_op_orc_vec:
+ insn = VORC;
+ break;
+
+ case INDEX_op_cmp_vec:
+ switch (args[3]) {
+ case TCG_COND_EQ:
+ insn = eq_op[vece];
+ break;
+ case TCG_COND_NE:
+ insn = ne_op[vece];
+ break;
+ case TCG_COND_GT:
+ insn = gts_op[vece];
+ break;
+ case TCG_COND_GTU:
+ insn = gtu_op[vece];
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case INDEX_op_bitsel_vec:
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
+ return;
+
+ case INDEX_op_dup2_vec:
+ assert(TCG_TARGET_REG_BITS == 32);
+ /* With inputs a1 = xLxx, a2 = xHxx */
+ tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1)); /* a0 = xxHL */
+ tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8); /* tmp = HLxx */
+ tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8); /* a0 = HLHL */
+ return;
+
+ case INDEX_op_ppc_mrgh_vec:
+ insn = mrgh_op[vece];
+ break;
+ case INDEX_op_ppc_mrgl_vec:
+ insn = mrgl_op[vece];
+ break;
+ case INDEX_op_ppc_muleu_vec:
+ insn = muleu_op[vece];
+ break;
+ case INDEX_op_ppc_mulou_vec:
+ insn = mulou_op[vece];
+ break;
+ case INDEX_op_ppc_pkum_vec:
+ insn = pkum_op[vece];
+ break;
+ case INDEX_op_rotlv_vec:
+ insn = rotl_op[vece];
+ break;
+ case INDEX_op_ppc_msum_vec:
+ tcg_debug_assert(vece == MO_16);
+ tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
+ return;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_debug_assert(insn != 0);
+ tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
+}
+
+static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGArg imm, TCGOpcode opci)
+{
+ TCGv_vec t1;
+
+ if (vece == MO_32) {
+ /*
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
+ * So using negative numbers gets us the 4th bit easily.
+ */
+ imm = sextract32(imm, 0, 5);
+ } else {
+ imm &= (8 << vece) - 1;
+ }
+
+ /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
+ t1 = tcg_constant_vec(type, MO_8, imm);
+ vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ bool need_swap = false, need_inv = false;
+
+ tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ break;
+ case TCG_COND_NE:
+ if (have_isa_3_00 && vece <= MO_32) {
+ break;
+ }
+ /* fall through */
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ need_inv = true;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ need_swap = true;
+ break;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ need_swap = need_inv = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (need_inv) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (need_swap) {
+ TCGv_vec t1;
+ t1 = v1, v1 = v2, v2 = t1;
+ cond = tcg_swap_cond(cond);
+ }
+
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
+
+ if (need_inv) {
+ tcg_gen_not_vec(vece, v0, v0);
+ }
+}
+
+static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2)
+{
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec c0, c16;
+
+ switch (vece) {
+ case MO_8:
+ case MO_16:
+ vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
+ tcgv_vec_arg(t1), tcgv_vec_arg(t2));
+ vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v0), tcgv_vec_arg(t1));
+ break;
+
+ case MO_32:
+ tcg_debug_assert(!have_isa_2_07);
+ /*
+ * Only 5 bits are significant, and VSPLTISB can represent -16..15.
+ * So using -16 is a quick way to represent 16.
+ */
+ c16 = tcg_constant_vec(type, MO_8, -16);
+ c0 = tcg_constant_vec(type, MO_8, 0);
+
+ vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v2), tcgv_vec_arg(c16));
+ vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+ vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
+ tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
+ vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
+ tcgv_vec_arg(t1), tcgv_vec_arg(c16));
+ tcg_gen_add_vec(MO_32, v0, t1, t2);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGv_vec v0, v1, v2, t0;
+ TCGArg a2;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ a2 = va_arg(va, TCGArg);
+
+ switch (opc) {
+ case INDEX_op_shli_vec:
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
+ break;
+ case INDEX_op_shri_vec:
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
+ break;
+ case INDEX_op_sari_vec:
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
+ break;
+ case INDEX_op_rotli_vec:
+ expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
+ break;
+ case INDEX_op_cmp_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+ break;
+ case INDEX_op_mul_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ expand_vec_mul(type, vece, v0, v1, v2);
+ break;
+ case INDEX_op_rotlv_vec:
+ v2 = temp_tcgv_vec(arg_temp(a2));
+ t0 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t0, v2);
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
+ tcg_temp_free_vec(t0);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ va_end(va);
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_neg_i32:
+ case INDEX_op_not_i32:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_extract_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_ctpop_i64:
+ case INDEX_op_neg_i64:
+ case INDEX_op_not_i64:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_extract_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(r, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_andc_i32:
+ case INDEX_op_orc_i32:
+ case INDEX_op_eqv_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_and_i64:
+ case INDEX_op_andc_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ return C_O1_I2(r, r, rI);
+
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ case INDEX_op_nand_i32:
+ case INDEX_op_nor_i32:
+ case INDEX_op_muluh_i32:
+ case INDEX_op_mulsh_i32:
+ case INDEX_op_orc_i64:
+ case INDEX_op_eqv_i64:
+ case INDEX_op_nand_i64:
+ case INDEX_op_nor_i64:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i64:
+ case INDEX_op_mulsh_i64:
+ case INDEX_op_muluh_i64:
+ return C_O1_I2(r, r, r);
+
+ case INDEX_op_sub_i32:
+ return C_O1_I2(r, rI, ri);
+ case INDEX_op_add_i64:
+ return C_O1_I2(r, r, rT);
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
+ return C_O1_I2(r, r, rU);
+ case INDEX_op_sub_i64:
+ return C_O1_I2(r, rI, rT);
+ case INDEX_op_clz_i32:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i64:
+ return C_O1_I2(r, r, rZW);
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(r, ri);
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ return C_O1_I4(r, r, ri, rZ, rZ);
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ return C_O1_I2(r, 0, rZ);
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(r, r, ri, ri);
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, r, r, ri, ri);
+ case INDEX_op_add2_i64:
+ case INDEX_op_add2_i32:
+ return C_O2_I4(r, r, r, r, rI, rZM);
+ case INDEX_op_sub2_i64:
+ case INDEX_op_sub2_i32:
+ return C_O2_I4(r, r, rI, rZM, r, r);
+
+ case INDEX_op_qemu_ld_i32:
+ return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
+ ? C_O1_I1(r, L)
+ : C_O1_I2(r, L, L));
+
+ case INDEX_op_qemu_st_i32:
+ return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
+ ? C_O0_I2(S, S)
+ : C_O0_I3(S, S, S));
+
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
+ : C_O2_I2(L, L, L, L));
+
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
+ : C_O0_I4(S, S, S, S));
+
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_ussub_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ case INDEX_op_ppc_mrgh_vec:
+ case INDEX_op_ppc_mrgl_vec:
+ case INDEX_op_ppc_muleu_vec:
+ case INDEX_op_ppc_mulou_vec:
+ case INDEX_op_ppc_pkum_vec:
+ case INDEX_op_dup2_vec:
+ return C_O1_I2(v, v, v);
+
+ case INDEX_op_not_vec:
+ case INDEX_op_neg_vec:
+ return C_O1_I1(v, v);
+
+ case INDEX_op_dup_vec:
+ return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
+
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(v, r);
+
+ case INDEX_op_st_vec:
+ return C_O0_I2(v, r);
+
+ case INDEX_op_bitsel_vec:
+ case INDEX_op_ppc_msum_vec:
+ return C_O1_I3(v, v, v, v);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+
+ have_isa = tcg_isa_base;
+ if (hwcap & PPC_FEATURE_ARCH_2_06) {
+ have_isa = tcg_isa_2_06;
+ }
+#ifdef PPC_FEATURE2_ARCH_2_07
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
+ have_isa = tcg_isa_2_07;
+ }
+#endif
+#ifdef PPC_FEATURE2_ARCH_3_00
+ if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
+ have_isa = tcg_isa_3_00;
+ }
+#endif
+#ifdef PPC_FEATURE2_ARCH_3_10
+ if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
+ have_isa = tcg_isa_3_10;
+ }
+#endif
+
+#ifdef PPC_FEATURE2_HAS_ISEL
+ /* Prefer explicit instruction from the kernel. */
+ have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
+#else
+ /* Fall back to knowing Power7 (2.06) has ISEL. */
+ have_isel = have_isa_2_06;
+#endif
+
+ if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
+ have_altivec = true;
+ /* We only care about the portion of VSX that overlaps Altivec. */
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
+ have_vsx = true;
+ }
+ }
+
+ tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
+ if (have_altivec) {
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
+ }
+
+ tcg_target_call_clobber_regs = 0;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
+
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
+#if defined(_CALL_SYSV)
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
+#endif
+#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
+#endif
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
+ if (USE_REG_TB) {
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
+ }
+}
+
+#ifdef __ELF__
+typedef struct {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#if TCG_TARGET_REG_BITS == 64
+# define ELF_HOST_MACHINE EM_PPC64
+#else
+# define ELF_HOST_MACHINE EM_PPC
+#endif
+
+static DebugFrame debug_frame = {
+ .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .cie.id = -1,
+ .cie.version = 1,
+ .cie.code_align = 1,
+ .cie.data_align = (-SZR & 0x7f), /* sleb128 -SZR */
+ .cie.return_column = 65,
+
+ /* Total FDE size does not include the "len" member. */
+ .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_R1, /* DW_CFA_def_cfa r1, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
+ 0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ uint8_t *p = &debug_frame.fde_reg_ofs[3];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
+ p[0] = 0x80 + tcg_target_callee_save_regs[i];
+ p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
+ }
+
+ debug_frame.fde.func_start = (uintptr_t)buf;
+ debug_frame.fde.func_len = buf_size;
+
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+#endif /* __ELF__ */
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
new file mode 100644
index 000000000..0943192cd
--- /dev/null
+++ b/tcg/ppc/tcg-target.h
@@ -0,0 +1,190 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC_TCG_TARGET_H
+#define PPC_TCG_TARGET_H
+
+#ifdef _ARCH_PPC64
+# define TCG_TARGET_REG_BITS 64
+# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
+#else
+# define TCG_TARGET_REG_BITS 32
+# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
+#endif
+
+#define TCG_TARGET_NB_REGS 64
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+
+typedef enum {
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
+ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
+ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
+ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+ TCG_REG_R16, TCG_REG_R17, TCG_REG_R18, TCG_REG_R19,
+ TCG_REG_R20, TCG_REG_R21, TCG_REG_R22, TCG_REG_R23,
+ TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
+ TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
+
+ TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
+ TCG_REG_CALL_STACK = TCG_REG_R1,
+ TCG_AREG0 = TCG_REG_R27
+} TCGReg;
+
+typedef enum {
+ tcg_isa_base,
+ tcg_isa_2_06,
+ tcg_isa_2_07,
+ tcg_isa_3_00,
+ tcg_isa_3_10,
+} TCGPowerISA;
+
+extern TCGPowerISA have_isa;
+extern bool have_altivec;
+extern bool have_vsx;
+
+#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
+#define have_isa_2_07 (have_isa >= tcg_isa_2_07)
+#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
+#define have_isa_3_10 (have_isa >= tcg_isa_3_10)
+
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
+#define TCG_TARGET_HAS_ext16u_i32 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 1
+#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 0
+#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 0
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#endif
+
+/*
+ * While technically Altivec could support V64, it has no 64-bit store
+ * instruction and substituting two 32-bit stores makes the generated
+ * code quite large.
+ */
+#define TCG_TARGET_HAS_v64 have_vsx
+#define TCG_TARGET_HAS_v128 have_altivec
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec have_isa_2_07
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec have_isa_3_00
+#define TCG_TARGET_HAS_abs_vec 0
+#define TCG_TARGET_HAS_roti_vec 0
+#define TCG_TARGET_HAS_rots_vec 0
+#define TCG_TARGET_HAS_rotv_vec 1
+#define TCG_TARGET_HAS_shi_vec 0
+#define TCG_TARGET_HAS_shs_vec 0
+#define TCG_TARGET_HAS_shv_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 1
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
+#define TCG_TARGET_HAS_cmpsel_vec 0
+
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
new file mode 100644
index 000000000..db514403c
--- /dev/null
+++ b/tcg/ppc/tcg-target.opc.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019 Linaro Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+
+DEF(ppc_mrgh_vec, 1, 2, 0, IMPLVEC)
+DEF(ppc_mrgl_vec, 1, 2, 0, IMPLVEC)
+DEF(ppc_msum_vec, 1, 3, 0, IMPLVEC)
+DEF(ppc_muleu_vec, 1, 2, 0, IMPLVEC)
+DEF(ppc_mulou_vec, 1, 2, 0, IMPLVEC)
+DEF(ppc_pkum_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/region.c b/tcg/region.c
new file mode 100644
index 000000000..9cc30d492
--- /dev/null
+++ b/tcg/region.c
@@ -0,0 +1,891 @@
+/*
+ * Memory region management for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+#include "tcg-internal.h"
+
+
+struct tcg_region_tree {
+ QemuMutex lock;
+ GTree *tree;
+ /* padding to avoid false sharing is computed at run-time */
+};
+
+/*
+ * We divide code_gen_buffer into equally-sized "regions" that TCG threads
+ * dynamically allocate from as demand dictates. Given appropriate region
+ * sizing, this minimizes flushes even when some TCG threads generate a lot
+ * more code than others.
+ */
+struct tcg_region_state {
+ QemuMutex lock;
+
+ /* fields set at init time */
+ void *start_aligned;
+ void *after_prologue;
+ size_t n;
+ size_t size; /* size of one region */
+ size_t stride; /* .size + guard size */
+ size_t total_size; /* size of entire buffer, >= n * stride */
+
+ /* fields protected by the lock */
+ size_t current; /* current region index */
+ size_t agg_size_full; /* aggregate size of full regions */
+};
+
+static struct tcg_region_state region;
+
+/*
+ * This is an array of struct tcg_region_tree's, with padding.
+ * We use void * to simplify the computation of region_trees[i]; each
+ * struct is found every tree_size bytes.
+ */
+static void *region_trees;
+static size_t tree_size;
+
+bool in_code_gen_buffer(const void *p)
+{
+ /*
+ * Much like it is valid to have a pointer to the byte past the
+ * end of an array (so long as you don't dereference it), allow
+ * a pointer to the byte past the end of the code gen buffer.
+ */
+ return (size_t)(p - region.start_aligned) <= region.total_size;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+const void *tcg_splitwx_to_rx(void *rw)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rw) {
+ g_assert(in_code_gen_buffer(rw));
+ rw += tcg_splitwx_diff;
+ }
+ return rw;
+}
+
+void *tcg_splitwx_to_rw(const void *rx)
+{
+ /* Pass NULL pointers unchanged. */
+ if (rx) {
+ rx -= tcg_splitwx_diff;
+ /* Assert that we end with a pointer in the rw region. */
+ g_assert(in_code_gen_buffer(rx));
+ }
+ return (void *)rx;
+}
+#endif /* CONFIG_DEBUG_TCG */
+
+/* compare a pointer @ptr and a tb_tc @s */
+static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
+{
+ if (ptr >= s->ptr + s->size) {
+ return 1;
+ } else if (ptr < s->ptr) {
+ return -1;
+ }
+ return 0;
+}
+
+static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
+{
+ const struct tb_tc *a = ap;
+ const struct tb_tc *b = bp;
+
+ /*
+ * When both sizes are set, we know this isn't a lookup.
+ * This is the most likely case: every TB must be inserted; lookups
+ * are a lot less frequent.
+ */
+ if (likely(a->size && b->size)) {
+ if (a->ptr > b->ptr) {
+ return 1;
+ } else if (a->ptr < b->ptr) {
+ return -1;
+ }
+ /* a->ptr == b->ptr should happen only on deletions */
+ g_assert(a->size == b->size);
+ return 0;
+ }
+ /*
+ * All lookups have either .size field set to 0.
+ * From the glib sources we see that @ap is always the lookup key. However
+ * the docs provide no guarantee, so we just mark this case as likely.
+ */
+ if (likely(a->size == 0)) {
+ return ptr_cmp_tb_tc(a->ptr, b);
+ }
+ return ptr_cmp_tb_tc(b->ptr, a);
+}
+
+static void tb_destroy(gpointer value)
+{
+ TranslationBlock *tb = value;
+ qemu_spin_destroy(&tb->jmp_lock);
+}
+
+static void tcg_region_trees_init(void)
+{
+ size_t i;
+
+ tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
+ region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ qemu_mutex_init(&rt->lock);
+ rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
+ }
+}
+
+static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
+{
+ size_t region_idx;
+
+ /*
+ * Like tcg_splitwx_to_rw, with no assert. The pc may come from
+ * a signal handler over which the caller has no control.
+ */
+ if (!in_code_gen_buffer(p)) {
+ p -= tcg_splitwx_diff;
+ if (!in_code_gen_buffer(p)) {
+ return NULL;
+ }
+ }
+
+ if (p < region.start_aligned) {
+ region_idx = 0;
+ } else {
+ ptrdiff_t offset = p - region.start_aligned;
+
+ if (offset > region.stride * (region.n - 1)) {
+ region_idx = region.n - 1;
+ } else {
+ region_idx = offset / region.stride;
+ }
+ }
+ return region_trees + region_idx * tree_size;
+}
+
+void tcg_tb_insert(TranslationBlock *tb)
+{
+ struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
+
+ g_assert(rt != NULL);
+ qemu_mutex_lock(&rt->lock);
+ g_tree_insert(rt->tree, &tb->tc, tb);
+ qemu_mutex_unlock(&rt->lock);
+}
+
+void tcg_tb_remove(TranslationBlock *tb)
+{
+ struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
+
+ g_assert(rt != NULL);
+ qemu_mutex_lock(&rt->lock);
+ g_tree_remove(rt->tree, &tb->tc);
+ qemu_mutex_unlock(&rt->lock);
+}
+
+/*
+ * Find the TB 'tb' such that
+ * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
+ * Return NULL if not found.
+ */
+TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
+{
+ struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
+ TranslationBlock *tb;
+ struct tb_tc s = { .ptr = (void *)tc_ptr };
+
+ if (rt == NULL) {
+ return NULL;
+ }
+
+ qemu_mutex_lock(&rt->lock);
+ tb = g_tree_lookup(rt->tree, &s);
+ qemu_mutex_unlock(&rt->lock);
+ return tb;
+}
+
+static void tcg_region_tree_lock_all(void)
+{
+ size_t i;
+
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ qemu_mutex_lock(&rt->lock);
+ }
+}
+
+static void tcg_region_tree_unlock_all(void)
+{
+ size_t i;
+
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ qemu_mutex_unlock(&rt->lock);
+ }
+}
+
+void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
+{
+ size_t i;
+
+ tcg_region_tree_lock_all();
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ g_tree_foreach(rt->tree, func, user_data);
+ }
+ tcg_region_tree_unlock_all();
+}
+
+size_t tcg_nb_tbs(void)
+{
+ size_t nb_tbs = 0;
+ size_t i;
+
+ tcg_region_tree_lock_all();
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ nb_tbs += g_tree_nnodes(rt->tree);
+ }
+ tcg_region_tree_unlock_all();
+ return nb_tbs;
+}
+
+static void tcg_region_tree_reset_all(void)
+{
+ size_t i;
+
+ tcg_region_tree_lock_all();
+ for (i = 0; i < region.n; i++) {
+ struct tcg_region_tree *rt = region_trees + i * tree_size;
+
+ /* Increment the refcount first so that destroy acts as a reset */
+ g_tree_ref(rt->tree);
+ g_tree_destroy(rt->tree);
+ }
+ tcg_region_tree_unlock_all();
+}
+
+static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
+{
+ void *start, *end;
+
+ start = region.start_aligned + curr_region * region.stride;
+ end = start + region.size;
+
+ if (curr_region == 0) {
+ start = region.after_prologue;
+ }
+ /* The final region may have a few extra pages due to earlier rounding. */
+ if (curr_region == region.n - 1) {
+ end = region.start_aligned + region.total_size;
+ }
+
+ *pstart = start;
+ *pend = end;
+}
+
+static void tcg_region_assign(TCGContext *s, size_t curr_region)
+{
+ void *start, *end;
+
+ tcg_region_bounds(curr_region, &start, &end);
+
+ s->code_gen_buffer = start;
+ s->code_gen_ptr = start;
+ s->code_gen_buffer_size = end - start;
+ s->code_gen_highwater = end - TCG_HIGHWATER;
+}
+
+static bool tcg_region_alloc__locked(TCGContext *s)
+{
+ if (region.current == region.n) {
+ return true;
+ }
+ tcg_region_assign(s, region.current);
+ region.current++;
+ return false;
+}
+
+/*
+ * Request a new region once the one in use has filled up.
+ * Returns true on error.
+ */
+bool tcg_region_alloc(TCGContext *s)
+{
+ bool err;
+ /* read the region size now; alloc__locked will overwrite it on success */
+ size_t size_full = s->code_gen_buffer_size;
+
+ qemu_mutex_lock(&region.lock);
+ err = tcg_region_alloc__locked(s);
+ if (!err) {
+ region.agg_size_full += size_full - TCG_HIGHWATER;
+ }
+ qemu_mutex_unlock(&region.lock);
+ return err;
+}
+
+/*
+ * Perform a context's first region allocation.
+ * This function does _not_ increment region.agg_size_full.
+ */
+static void tcg_region_initial_alloc__locked(TCGContext *s)
+{
+ bool err = tcg_region_alloc__locked(s);
+ g_assert(!err);
+}
+
+void tcg_region_initial_alloc(TCGContext *s)
+{
+ qemu_mutex_lock(&region.lock);
+ tcg_region_initial_alloc__locked(s);
+ qemu_mutex_unlock(&region.lock);
+}
+
+/* Call from a safe-work context */
+void tcg_region_reset_all(void)
+{
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+
+ qemu_mutex_lock(&region.lock);
+ region.current = 0;
+ region.agg_size_full = 0;
+
+ for (i = 0; i < n_ctxs; i++) {
+ TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ tcg_region_initial_alloc__locked(s);
+ }
+ qemu_mutex_unlock(&region.lock);
+
+ tcg_region_tree_reset_all();
+}
+
+static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
+{
+#ifdef CONFIG_USER_ONLY
+ return 1;
+#else
+ size_t n_regions;
+
+ /*
+ * It is likely that some vCPUs will translate more code than others,
+ * so we first try to set more regions than max_cpus, with those regions
+ * being of reasonable size. If that's not possible we make do by evenly
+ * dividing the code_gen_buffer among the vCPUs.
+ */
+ /* Use a single region if all we have is one vCPU thread */
+ if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
+ return 1;
+ }
+
+ /*
+ * Try to have more regions than max_cpus, with each region being >= 2 MB.
+ * If we can't, then just allocate one region per vCPU thread.
+ */
+ n_regions = tb_size / (2 * MiB);
+ if (n_regions <= max_cpus) {
+ return max_cpus;
+ }
+ return MIN(n_regions, max_cpus * 8);
+#endif
+}
+
+/*
+ * Minimum size of the code gen buffer. This number is randomly chosen,
+ * but not so small that we can't have a fair number of TB's live.
+ *
+ * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
+ * Unless otherwise indicated, this is constrained by the range of
+ * direct branches on the host cpu, as used by the TCG implementation
+ * of goto_tb.
+ */
+#define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
+
+#if TCG_TARGET_REG_BITS == 32
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
+#ifdef CONFIG_USER_ONLY
+/*
+ * For user mode on smaller 32 bit systems we may run into trouble
+ * allocating big chunks of data in the right place. On these systems
+ * we utilise a static code generation buffer directly in the binary.
+ */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+#else /* TCG_TARGET_REG_BITS == 64 */
+#ifdef CONFIG_USER_ONLY
+/*
+ * As user-mode emulation typically means running multiple instances
+ * of the translator don't go too nuts with our default code gen
+ * buffer lest we make things too hard for the OS.
+ */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
+#else
+/*
+ * We expect most system emulation to run one or two guests per host.
+ * Users running large scale system emulation may want to tweak their
+ * runtime setup via the tb-size control on the command line.
+ */
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
+#endif
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+ (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+ ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+ __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
+{
+ void *buf, *end;
+ size_t size;
+
+ if (splitwx > 0) {
+ error_setg(errp, "jit split-wx not supported");
+ return -1;
+ }
+
+ /* page-align the beginning and end of the buffer */
+ buf = static_code_gen_buffer;
+ end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
+ buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
+ end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
+
+ size = end - buf;
+
+ /* Honor a command-line option limiting the size of the buffer. */
+ if (size > tb_size) {
+ size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
+ }
+
+ region.start_aligned = buf;
+ region.total_size = size;
+
+ return PROT_READ | PROT_WRITE;
+}
+#elif defined(_WIN32)
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+{
+ void *buf;
+
+ if (splitwx > 0) {
+ error_setg(errp, "jit split-wx not supported");
+ return -1;
+ }
+
+ buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
+ PAGE_EXECUTE_READWRITE);
+ if (buf == NULL) {
+ error_setg_win32(errp, GetLastError(),
+ "allocate %zu bytes for jit buffer", size);
+ return false;
+ }
+
+ region.start_aligned = buf;
+ region.total_size = size;
+
+ return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+}
+#else
+static int alloc_code_gen_buffer_anon(size_t size, int prot,
+ int flags, Error **errp)
+{
+ void *buf;
+
+ buf = mmap(NULL, size, prot, flags, -1, 0);
+ if (buf == MAP_FAILED) {
+ error_setg_errno(errp, errno,
+ "allocate %zu bytes for jit buffer", size);
+ return -1;
+ }
+
+ region.start_aligned = buf;
+ region.total_size = size;
+ return prot;
+}
+
+#ifndef CONFIG_TCG_INTERPRETER
+#ifdef CONFIG_POSIX
+#include "qemu/memfd.h"
+
+static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
+{
+ void *buf_rw = NULL, *buf_rx = MAP_FAILED;
+ int fd = -1;
+
+ buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
+ if (buf_rw == NULL) {
+ goto fail;
+ }
+
+ buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ if (buf_rx == MAP_FAILED) {
+ goto fail_rx;
+ }
+
+ close(fd);
+ region.start_aligned = buf_rw;
+ region.total_size = size;
+ tcg_splitwx_diff = buf_rx - buf_rw;
+
+ return PROT_READ | PROT_WRITE;
+
+ fail_rx:
+ error_setg_errno(errp, errno, "failed to map shared memory for execute");
+ fail:
+ if (buf_rx != MAP_FAILED) {
+ munmap(buf_rx, size);
+ }
+ if (buf_rw) {
+ munmap(buf_rw, size);
+ }
+ if (fd >= 0) {
+ close(fd);
+ }
+ return -1;
+}
+#endif /* CONFIG_POSIX */
+
+#ifdef CONFIG_DARWIN
+#include <mach/mach.h>
+
+extern kern_return_t mach_vm_remap(vm_map_t target_task,
+ mach_vm_address_t *target_address,
+ mach_vm_size_t size,
+ mach_vm_offset_t mask,
+ int flags,
+ vm_map_t src_task,
+ mach_vm_address_t src_address,
+ boolean_t copy,
+ vm_prot_t *cur_protection,
+ vm_prot_t *max_protection,
+ vm_inherit_t inheritance);
+
+static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
+{
+ kern_return_t ret;
+ mach_vm_address_t buf_rw, buf_rx;
+ vm_prot_t cur_prot, max_prot;
+
+ /* Map the read-write portion via normal anon memory. */
+ if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
+ return -1;
+ }
+
+ buf_rw = (mach_vm_address_t)region.start_aligned;
+ buf_rx = 0;
+ ret = mach_vm_remap(mach_task_self(),
+ &buf_rx,
+ size,
+ 0,
+ VM_FLAGS_ANYWHERE,
+ mach_task_self(),
+ buf_rw,
+ false,
+ &cur_prot,
+ &max_prot,
+ VM_INHERIT_NONE);
+ if (ret != KERN_SUCCESS) {
+ /* TODO: Convert "ret" to a human readable error message. */
+ error_setg(errp, "vm_remap for jit splitwx failed");
+ munmap((void *)buf_rw, size);
+ return -1;
+ }
+
+ if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
+ error_setg_errno(errp, errno, "mprotect for jit splitwx");
+ munmap((void *)buf_rx, size);
+ munmap((void *)buf_rw, size);
+ return -1;
+ }
+
+ tcg_splitwx_diff = buf_rx - buf_rw;
+ return PROT_READ | PROT_WRITE;
+}
+#endif /* CONFIG_DARWIN */
+#endif /* CONFIG_TCG_INTERPRETER */
+
+static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
+{
+#ifndef CONFIG_TCG_INTERPRETER
+# ifdef CONFIG_DARWIN
+ return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
+# endif
+# ifdef CONFIG_POSIX
+ return alloc_code_gen_buffer_splitwx_memfd(size, errp);
+# endif
+#endif
+ error_setg(errp, "jit split-wx not supported");
+ return -1;
+}
+
+static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
+{
+ ERRP_GUARD();
+ int prot, flags;
+
+ if (splitwx) {
+ prot = alloc_code_gen_buffer_splitwx(size, errp);
+ if (prot >= 0) {
+ return prot;
+ }
+ /*
+ * If splitwx force-on (1), fail;
+ * if splitwx default-on (-1), fall through to splitwx off.
+ */
+ if (splitwx > 0) {
+ return -1;
+ }
+ error_free_or_abort(errp);
+ }
+
+ /*
+ * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
+ * rejects a permission change from RWX -> NONE when reserving the
+ * guard pages later. We can go the other way with the same number
+ * of syscalls, so always begin with PROT_NONE.
+ */
+ prot = PROT_NONE;
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#ifdef CONFIG_DARWIN
+ /* Applicable to both iOS and macOS (Apple Silicon). */
+ if (!splitwx) {
+ flags |= MAP_JIT;
+ }
+#endif
+
+ return alloc_code_gen_buffer_anon(size, prot, flags, errp);
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
+
+/*
+ * Initializes region partitioning.
+ *
+ * Called at init time from the parent thread (i.e. the one calling
+ * tcg_context_init), after the target's TCG globals have been set.
+ *
+ * Region partitioning works by splitting code_gen_buffer into separate regions,
+ * and then assigning regions to TCG threads so that the threads can translate
+ * code in parallel without synchronization.
+ *
+ * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
+ * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
+ * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
+ * must have been parsed before calling this function, since it calls
+ * qemu_tcg_mttcg_enabled().
+ *
+ * In user-mode we use a single region. Having multiple regions in user-mode
+ * is not supported, because the number of vCPU threads (recall that each thread
+ * spawned by the guest corresponds to a vCPU thread) is only bounded by the
+ * OS, and usually this number is huge (tens of thousands is not uncommon).
+ * Thus, given this large bound on the number of vCPU threads and the fact
+ * that code_gen_buffer is allocated at compile-time, we cannot guarantee
+ * that the availability of at least one region per vCPU thread.
+ *
+ * However, this user-mode limitation is unlikely to be a significant problem
+ * in practice. Multi-threaded guests share most if not all of their translated
+ * code, which makes parallel code generation less appealing than in softmmu.
+ */
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
+{
+ const size_t page_size = qemu_real_host_page_size;
+ size_t region_size;
+ int have_prot, need_prot;
+
+ /* Size the buffer. */
+ if (tb_size == 0) {
+ size_t phys_mem = qemu_get_host_physmem();
+ if (phys_mem == 0) {
+ tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+ } else {
+ tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
+ tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
+ }
+ }
+ if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+ }
+ if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+ }
+
+ have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
+ assert(have_prot >= 0);
+
+ /* Request large pages for the buffer and the splitwx. */
+ qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
+ if (tcg_splitwx_diff) {
+ qemu_madvise(region.start_aligned + tcg_splitwx_diff,
+ region.total_size, QEMU_MADV_HUGEPAGE);
+ }
+
+ /*
+ * Make region_size a multiple of page_size, using aligned as the start.
+ * As a result of this we might end up with a few extra pages at the end of
+ * the buffer; we will assign those to the last region.
+ */
+ region.n = tcg_n_regions(tb_size, max_cpus);
+ region_size = tb_size / region.n;
+ region_size = QEMU_ALIGN_DOWN(region_size, page_size);
+
+ /* A region must have at least 2 pages; one code, one guard */
+ g_assert(region_size >= 2 * page_size);
+ region.stride = region_size;
+
+ /* Reserve space for guard pages. */
+ region.size = region_size - page_size;
+ region.total_size -= page_size;
+
+ /*
+ * The first region will be smaller than the others, via the prologue,
+ * which has yet to be allocated. For now, the first region begins at
+ * the page boundary.
+ */
+ region.after_prologue = region.start_aligned;
+
+ /* init the region struct */
+ qemu_mutex_init(&region.lock);
+
+ /*
+ * Set guard pages in the rw buffer, as that's the one into which
+ * buffer overruns could occur. Do not set guard pages in the rx
+ * buffer -- let that one use hugepages throughout.
+ * Work with the page protections set up with the initial mapping.
+ */
+ need_prot = PAGE_READ | PAGE_WRITE;
+#ifndef CONFIG_TCG_INTERPRETER
+ if (tcg_splitwx_diff == 0) {
+ need_prot |= PAGE_EXEC;
+ }
+#endif
+ for (size_t i = 0, n = region.n; i < n; i++) {
+ void *start, *end;
+
+ tcg_region_bounds(i, &start, &end);
+ if (have_prot != need_prot) {
+ int rc;
+
+ if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
+ rc = qemu_mprotect_rwx(start, end - start);
+ } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
+ rc = qemu_mprotect_rw(start, end - start);
+ } else {
+ g_assert_not_reached();
+ }
+ if (rc) {
+ error_setg_errno(&error_fatal, errno,
+ "mprotect of jit buffer");
+ }
+ }
+ if (have_prot != 0) {
+ /* Guard pages are nice for bug detection but are not essential. */
+ (void)qemu_mprotect_none(end, page_size);
+ }
+ }
+
+ tcg_region_trees_init();
+
+ /*
+ * Leave the initial context initialized to the first region.
+ * This will be the context into which we generate the prologue.
+ * It is also the only context for CONFIG_USER_ONLY.
+ */
+ tcg_region_initial_alloc__locked(&tcg_init_ctx);
+}
+
+void tcg_region_prologue_set(TCGContext *s)
+{
+ /* Deduct the prologue from the first region. */
+ g_assert(region.start_aligned == s->code_gen_buffer);
+ region.after_prologue = s->code_ptr;
+
+ /* Recompute boundaries of the first region. */
+ tcg_region_assign(s, 0);
+
+ /* Register the balance of the buffer with gdb. */
+ tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
+ region.start_aligned + region.total_size -
+ region.after_prologue);
+}
+
+/*
+ * Returns the size (in bytes) of all translated code (i.e. from all regions)
+ * currently in the cache.
+ * See also: tcg_code_capacity()
+ * Do not confuse with tcg_current_code_size(); that one applies to a single
+ * TCG context.
+ */
+size_t tcg_code_size(void)
+{
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+ size_t total;
+
+ qemu_mutex_lock(&region.lock);
+ total = region.agg_size_full;
+ for (i = 0; i < n_ctxs; i++) {
+ const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ size_t size;
+
+ size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
+ g_assert(size <= s->code_gen_buffer_size);
+ total += size;
+ }
+ qemu_mutex_unlock(&region.lock);
+ return total;
+}
+
+/*
+ * Returns the code capacity (in bytes) of the entire cache, i.e. including all
+ * regions.
+ * See also: tcg_code_size()
+ */
+size_t tcg_code_capacity(void)
+{
+ size_t guard_size, capacity;
+
+ /* no need for synchronization; these variables are set at init time */
+ guard_size = region.stride - region.size;
+ capacity = region.total_size;
+ capacity -= (region.n - 1) * guard_size;
+ capacity -= region.n * TCG_HIGHWATER;
+
+ return capacity;
+}
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
new file mode 100644
index 000000000..cf0ac4d75
--- /dev/null
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define RISC-V target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(LZ, L)
+C_O0_I2(rZ, r)
+C_O0_I2(rZ, rZ)
+C_O0_I3(LZ, L, L)
+C_O0_I3(LZ, LZ, L)
+C_O0_I4(LZ, LZ, L, L)
+C_O0_I4(rZ, rZ, rZ, rZ)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, rZ)
+C_O1_I4(r, rZ, rZ, rZ, rZ)
+C_O2_I1(r, r, L)
+C_O2_I2(r, r, L, L)
+C_O2_I4(r, r, rZ, rZ, rM, rM)
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
new file mode 100644
index 000000000..8d8afaee5
--- /dev/null
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define RISC-V target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S12)
+CONST('N', TCG_CT_CONST_N12)
+CONST('M', TCG_CT_CONST_M12)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
new file mode 100644
index 000000000..9b13a46fb
--- /dev/null
+++ b/tcg/riscv/tcg-target.c.inc
@@ -0,0 +1,1804 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
+ * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Based on i386/tcg-target.c and mips/tcg-target.c
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "../tcg-pool.c.inc"
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "zero",
+ "ra",
+ "sp",
+ "gp",
+ "tp",
+ "t0",
+ "t1",
+ "t2",
+ "s0",
+ "s1",
+ "a0",
+ "a1",
+ "a2",
+ "a3",
+ "a4",
+ "a5",
+ "a6",
+ "a7",
+ "s2",
+ "s3",
+ "s4",
+ "s5",
+ "s6",
+ "s7",
+ "s8",
+ "s9",
+ "s10",
+ "s11",
+ "t3",
+ "t4",
+ "t5",
+ "t6"
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+ /* Call saved registers */
+ /* TCG_REG_S0 reservered for TCG_AREG0 */
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+ TCG_REG_S9,
+ TCG_REG_S10,
+ TCG_REG_S11,
+
+ /* Call clobbered registers */
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_T3,
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+
+ /* Argument registers */
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+ TCG_REG_A4,
+ TCG_REG_A5,
+ TCG_REG_A6,
+ TCG_REG_A7,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+ TCG_REG_A4,
+ TCG_REG_A5,
+ TCG_REG_A6,
+ TCG_REG_A7,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_A0,
+ TCG_REG_A1,
+};
+
+#define TCG_CT_CONST_ZERO 0x100
+#define TCG_CT_CONST_S12 0x200
+#define TCG_CT_CONST_N12 0x400
+#define TCG_CT_CONST_M12 0x800
+
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
+/*
+ * For softmmu, we need to avoid conflicts with the first 5
+ * argument registers to call the helper. Some of these are
+ * also used for the tlb lookup.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+
+static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ return sextract32(val, pos, len);
+ } else {
+ return sextract64(val, pos, len);
+ }
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
+ return 1;
+ }
+ if ((ct & TCG_CT_CONST_M12) && val >= -0xfff && val <= 0xfff) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * RISC-V Base ISA opcodes (IM)
+ */
+
+typedef enum {
+ OPC_ADD = 0x33,
+ OPC_ADDI = 0x13,
+ OPC_AND = 0x7033,
+ OPC_ANDI = 0x7013,
+ OPC_AUIPC = 0x17,
+ OPC_BEQ = 0x63,
+ OPC_BGE = 0x5063,
+ OPC_BGEU = 0x7063,
+ OPC_BLT = 0x4063,
+ OPC_BLTU = 0x6063,
+ OPC_BNE = 0x1063,
+ OPC_DIV = 0x2004033,
+ OPC_DIVU = 0x2005033,
+ OPC_JAL = 0x6f,
+ OPC_JALR = 0x67,
+ OPC_LB = 0x3,
+ OPC_LBU = 0x4003,
+ OPC_LD = 0x3003,
+ OPC_LH = 0x1003,
+ OPC_LHU = 0x5003,
+ OPC_LUI = 0x37,
+ OPC_LW = 0x2003,
+ OPC_LWU = 0x6003,
+ OPC_MUL = 0x2000033,
+ OPC_MULH = 0x2001033,
+ OPC_MULHSU = 0x2002033,
+ OPC_MULHU = 0x2003033,
+ OPC_OR = 0x6033,
+ OPC_ORI = 0x6013,
+ OPC_REM = 0x2006033,
+ OPC_REMU = 0x2007033,
+ OPC_SB = 0x23,
+ OPC_SD = 0x3023,
+ OPC_SH = 0x1023,
+ OPC_SLL = 0x1033,
+ OPC_SLLI = 0x1013,
+ OPC_SLT = 0x2033,
+ OPC_SLTI = 0x2013,
+ OPC_SLTIU = 0x3013,
+ OPC_SLTU = 0x3033,
+ OPC_SRA = 0x40005033,
+ OPC_SRAI = 0x40005013,
+ OPC_SRL = 0x5033,
+ OPC_SRLI = 0x5013,
+ OPC_SUB = 0x40000033,
+ OPC_SW = 0x2023,
+ OPC_XOR = 0x4033,
+ OPC_XORI = 0x4013,
+
+#if TCG_TARGET_REG_BITS == 64
+ OPC_ADDIW = 0x1b,
+ OPC_ADDW = 0x3b,
+ OPC_DIVUW = 0x200503b,
+ OPC_DIVW = 0x200403b,
+ OPC_MULW = 0x200003b,
+ OPC_REMUW = 0x200703b,
+ OPC_REMW = 0x200603b,
+ OPC_SLLIW = 0x101b,
+ OPC_SLLW = 0x103b,
+ OPC_SRAIW = 0x4000501b,
+ OPC_SRAW = 0x4000503b,
+ OPC_SRLIW = 0x501b,
+ OPC_SRLW = 0x503b,
+ OPC_SUBW = 0x4000003b,
+#else
+ /* Simplify code throughout by defining aliases for RV32. */
+ OPC_ADDIW = OPC_ADDI,
+ OPC_ADDW = OPC_ADD,
+ OPC_DIVUW = OPC_DIVU,
+ OPC_DIVW = OPC_DIV,
+ OPC_MULW = OPC_MUL,
+ OPC_REMUW = OPC_REMU,
+ OPC_REMW = OPC_REM,
+ OPC_SLLIW = OPC_SLLI,
+ OPC_SLLW = OPC_SLL,
+ OPC_SRAIW = OPC_SRAI,
+ OPC_SRAW = OPC_SRA,
+ OPC_SRLIW = OPC_SRLI,
+ OPC_SRLW = OPC_SRL,
+ OPC_SUBW = OPC_SUB,
+#endif
+
+ OPC_FENCE = 0x0000000f,
+} RISCVInsn;
+
+/*
+ * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
+ */
+
+/* Type-R */
+
+static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
+{
+ return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
+}
+
+/* Type-I */
+
+static int32_t encode_imm12(uint32_t imm)
+{
+ return (imm & 0xfff) << 20;
+}
+
+static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
+{
+ return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
+}
+
+/* Type-S */
+
+static int32_t encode_simm12(uint32_t imm)
+{
+ int32_t ret = 0;
+
+ ret |= (imm & 0xFE0) << 20;
+ ret |= (imm & 0x1F) << 7;
+
+ return ret;
+}
+
+static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+ return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
+}
+
+/* Type-SB */
+
+static int32_t encode_sbimm12(uint32_t imm)
+{
+ int32_t ret = 0;
+
+ ret |= (imm & 0x1000) << 19;
+ ret |= (imm & 0x7e0) << 20;
+ ret |= (imm & 0x1e) << 7;
+ ret |= (imm & 0x800) >> 4;
+
+ return ret;
+}
+
+static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+ return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
+}
+
+/* Type-U */
+
+static int32_t encode_uimm20(uint32_t imm)
+{
+ return imm & 0xfffff000;
+}
+
+static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
+{
+ return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
+}
+
+/* Type-UJ */
+
+static int32_t encode_ujimm20(uint32_t imm)
+{
+ int32_t ret = 0;
+
+ ret |= (imm & 0x0007fe) << (21 - 1);
+ ret |= (imm & 0x000800) << (20 - 11);
+ ret |= (imm & 0x0ff000) << (12 - 12);
+ ret |= (imm & 0x100000) << (31 - 20);
+
+ return ret;
+}
+
+static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
+{
+ return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
+}
+
+/*
+ * RISC-V instruction emitters
+ */
+
+static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
+ TCGReg rd, TCGReg rs1, TCGReg rs2)
+{
+ tcg_out32(s, encode_r(opc, rd, rs1, rs2));
+}
+
+static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
+ TCGReg rd, TCGReg rs1, TCGArg imm)
+{
+ tcg_out32(s, encode_i(opc, rd, rs1, imm));
+}
+
+static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
+ TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+ tcg_out32(s, encode_s(opc, rs1, rs2, imm));
+}
+
+static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
+ TCGReg rs1, TCGReg rs2, uint32_t imm)
+{
+ tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
+}
+
+static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
+ TCGReg rd, uint32_t imm)
+{
+ tcg_out32(s, encode_u(opc, rd, imm));
+}
+
+static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
+ TCGReg rd, uint32_t imm)
+{
+ tcg_out32(s, encode_uj(opc, rd, imm));
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0);
+ }
+}
+
+/*
+ * Relocations
+ */
+
+static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
+
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 12)) {
+ *src_rw |= encode_sbimm12(offset);
+ return true;
+ }
+
+ return false;
+}
+
+static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
+
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 20)) {
+ *src_rw |= encode_ujimm20(offset);
+ return true;
+ }
+
+ return false;
+}
+
+static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
+ int32_t lo = sextreg(offset, 0, 12);
+ int32_t hi = offset - lo;
+
+ if (offset == hi + lo) {
+ src_rw[0] |= encode_uimm20(hi);
+ src_rw[1] |= encode_imm12(lo);
+ return true;
+ }
+
+ return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_debug_assert(addend == 0);
+ switch (type) {
+ case R_RISCV_BRANCH:
+ return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
+ case R_RISCV_JAL:
+ return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
+ case R_RISCV_CALL:
+ return reloc_call(code_ptr, (tcg_insn_unit *)value);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * TCG intrinsics
+ */
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret == arg) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ case TCG_TYPE_I64:
+ tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+ tcg_target_long val)
+{
+ tcg_target_long lo, hi, tmp;
+ int shift, ret;
+
+ if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ lo = sextreg(val, 0, 12);
+ if (val == lo) {
+ tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
+ return;
+ }
+
+ hi = val - lo;
+ if (TCG_TARGET_REG_BITS == 32 || val == (int32_t)val) {
+ tcg_out_opc_upper(s, OPC_LUI, rd, hi);
+ if (lo != 0) {
+ tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
+ }
+ return;
+ }
+
+ /* We can only be here if TCG_TARGET_REG_BITS != 32 */
+ tmp = tcg_pcrel_diff(s, (void *)val);
+ if (tmp == (int32_t)tmp) {
+ tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
+ tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
+ ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
+ tcg_debug_assert(ret == true);
+ return;
+ }
+
+ /* Look for a single 20-bit section. */
+ shift = ctz64(val);
+ tmp = val >> shift;
+ if (tmp == sextreg(tmp, 0, 20)) {
+ tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
+ if (shift > 12) {
+ tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
+ } else {
+ tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
+ }
+ return;
+ }
+
+ /* Look for a few high zero bits, with lots of bits set in the middle. */
+ shift = clz64(val);
+ tmp = val << shift;
+ if (tmp == sextreg(tmp, 12, 20) << 12) {
+ tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
+ tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
+ return;
+ } else if (tmp == sextreg(tmp, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
+ tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
+ return;
+ }
+
+ /* Drop into the constant pool. */
+ new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
+ tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
+ tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
+}
+
+static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
+}
+
+static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
+ tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
+ tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
+}
+
+static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
+ tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
+}
+
+static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
+ tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
+}
+
+static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
+}
+
+static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+ TCGReg addr, intptr_t offset)
+{
+ intptr_t imm12 = sextreg(offset, 0, 12);
+
+ if (offset != imm12) {
+ intptr_t diff = offset - (uintptr_t)s->code_ptr;
+
+ if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+ imm12 = sextreg(diff, 0, 12);
+ tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
+ if (addr != TCG_REG_ZERO) {
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+ }
+ }
+ addr = TCG_REG_TMP2;
+ }
+
+ switch (opc) {
+ case OPC_SB:
+ case OPC_SH:
+ case OPC_SW:
+ case OPC_SD:
+ tcg_out_opc_store(s, opc, addr, data, imm12);
+ break;
+ case OPC_LB:
+ case OPC_LBU:
+ case OPC_LH:
+ case OPC_LHU:
+ case OPC_LW:
+ case OPC_LWU:
+ case OPC_LD:
+ tcg_out_opc_imm(s, opc, data, addr, imm12);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
+ tcg_out_ldst(s, is32bit ? OPC_LW : OPC_LD, arg, arg1, arg2);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ bool is32bit = (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32);
+ tcg_out_ldst(s, is32bit ? OPC_SW : OPC_SD, arg, arg1, arg2);
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ if (val == 0) {
+ tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_addsub2(TCGContext *s,
+ TCGReg rl, TCGReg rh,
+ TCGReg al, TCGReg ah,
+ TCGArg bl, TCGArg bh,
+ bool cbl, bool cbh, bool is_sub, bool is32bit)
+{
+ const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
+ const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
+ const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
+ TCGReg th = TCG_REG_TMP1;
+
+ /* If we have a negative constant such that negating it would
+ make the high part zero, we can (usually) eliminate one insn. */
+ if (cbl && cbh && bh == -1 && bl != 0) {
+ bl = -bl;
+ bh = 0;
+ is_sub = !is_sub;
+ }
+
+ /* By operating on the high part first, we get to use the final
+ carry operation to move back from the temporary. */
+ if (!cbh) {
+ tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
+ } else if (bh != 0 || ah == rl) {
+ tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
+ } else {
+ th = ah;
+ }
+
+ /* Note that tcg optimization should eliminate the bl == 0 case. */
+ if (is_sub) {
+ if (cbl) {
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
+ tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
+ tcg_out_opc_reg(s, opc_sub, rl, al, bl);
+ }
+ tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
+ } else {
+ if (cbl) {
+ tcg_out_opc_imm(s, opc_addi, rl, al, bl);
+ tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
+ } else if (rl == al && rl == bl) {
+ tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
+ tcg_out_opc_reg(s, opc_addi, rl, al, bl);
+ } else {
+ tcg_out_opc_reg(s, opc_add, rl, al, bl);
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
+ rl, (rl == bl ? al : bl));
+ }
+ tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
+ }
+}
+
+static const struct {
+ RISCVInsn op;
+ bool swap;
+} tcg_brcond_to_riscv[] = {
+ [TCG_COND_EQ] = { OPC_BEQ, false },
+ [TCG_COND_NE] = { OPC_BNE, false },
+ [TCG_COND_LT] = { OPC_BLT, false },
+ [TCG_COND_GE] = { OPC_BGE, false },
+ [TCG_COND_LE] = { OPC_BGE, true },
+ [TCG_COND_GT] = { OPC_BLT, true },
+ [TCG_COND_LTU] = { OPC_BLTU, false },
+ [TCG_COND_GEU] = { OPC_BGEU, false },
+ [TCG_COND_LEU] = { OPC_BGEU, true },
+ [TCG_COND_GTU] = { OPC_BLTU, true }
+};
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+ TCGReg arg2, TCGLabel *l)
+{
+ RISCVInsn op = tcg_brcond_to_riscv[cond].op;
+
+ tcg_debug_assert(op != 0);
+
+ if (tcg_brcond_to_riscv[cond].swap) {
+ TCGReg t = arg1;
+ arg1 = arg2;
+ arg2 = t;
+ }
+
+ tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
+ tcg_out_opc_branch(s, op, arg1, arg2, 0);
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg arg1, TCGReg arg2)
+{
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+ tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
+ break;
+ case TCG_COND_NE:
+ tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+ tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
+ break;
+ case TCG_COND_LT:
+ tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+ break;
+ case TCG_COND_GE:
+ tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ break;
+ case TCG_COND_LE:
+ tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ break;
+ case TCG_COND_GT:
+ tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+ tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+}
+
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
+ TCGReg bl, TCGReg bh, TCGLabel *l)
+{
+ /* todo */
+ g_assert_not_reached();
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+ /* todo */
+ g_assert_not_reached();
+}
+
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
+{
+ TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
+ ptrdiff_t offset = tcg_pcrel_diff(s, arg);
+ int ret;
+
+ tcg_debug_assert((offset & 1) == 0);
+ if (offset == sextreg(offset, 0, 20)) {
+ /* short jump: -2097150 to 2097152 */
+ tcg_out_opc_jump(s, OPC_JAL, link, offset);
+ } else if (TCG_TARGET_REG_BITS == 32 || offset == (int32_t)offset) {
+ /* long jump: -2147483646 to 2147483648 */
+ tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
+ tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
+ ret = reloc_call(s->code_ptr - 2, arg);
+ tcg_debug_assert(ret == true);
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ /* far jump: 64-bit */
+ tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
+ tcg_target_long base = (tcg_target_long)arg - imm;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
+ tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
+ } else {
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
+{
+ tcg_out_call_int(s, arg, false);
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ tcg_insn_unit insn = OPC_FENCE;
+
+ if (a0 & TCG_MO_LD_LD) {
+ insn |= 0x02200000;
+ }
+ if (a0 & TCG_MO_ST_LD) {
+ insn |= 0x01200000;
+ }
+ if (a0 & TCG_MO_LD_ST) {
+ insn |= 0x02100000;
+ }
+ if (a0 & TCG_MO_ST_ST) {
+ insn |= 0x02200000;
+ }
+ tcg_out32(s, insn);
+}
+
+/*
+ * Load/store and TLB
+ */
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * MemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_UW] = helper_be_lduw_mmu,
+ [MO_SW] = helper_be_ldsw_mmu,
+ [MO_UL] = helper_be_ldul_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_SL] = helper_be_ldsl_mmu,
+#endif
+ [MO_Q] = helper_be_ldq_mmu,
+#else
+ [MO_UW] = helper_le_lduw_mmu,
+ [MO_SW] = helper_le_ldsw_mmu,
+ [MO_UL] = helper_le_ldul_mmu,
+#if TCG_TARGET_REG_BITS == 64
+ [MO_SL] = helper_le_ldsl_mmu,
+#endif
+ [MO_Q] = helper_le_ldq_mmu,
+#endif
+};
+
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, MemOpIdx oi,
+ * uintptr_t ra)
+ */
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
+ [MO_8] = helper_ret_stb_mmu,
+#ifdef HOST_WORDS_BIGENDIAN
+ [MO_16] = helper_be_stw_mmu,
+ [MO_32] = helper_be_stl_mmu,
+ [MO_64] = helper_be_stq_mmu,
+#else
+ [MO_16] = helper_le_stw_mmu,
+ [MO_32] = helper_le_stl_mmu,
+ [MO_64] = helper_le_stq_mmu,
+#endif
+};
+
+/* We don't support oversize guests */
+QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS < TARGET_LONG_BITS);
+
+/* We expect to use a 12-bit negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+
+static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+ tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
+ bool ok = reloc_jimm20(s->code_ptr - 1, target);
+ tcg_debug_assert(ok);
+}
+
+static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
+ TCGReg addrh, MemOpIdx oi,
+ tcg_insn_unit **label_ptr, bool is_load)
+{
+ MemOp opc = get_memop(oi);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+ tcg_target_long compare_mask;
+ int mem_index = get_mmuidx(oi);
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+ TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
+
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
+
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addrl,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
+ is_load ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write));
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+ offsetof(CPUTLBEntry, addend));
+
+ /* We don't support unaligned accesses. */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ /* Clear the non-page, non-alignment bits from the address. */
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addrl, compare_mask);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
+ }
+
+ /* Compare masked address with the TLB entry. */
+ label_ptr[0] = s->code_ptr;
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+ /* TLB Hit - translate address using addend. */
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
+ addrl = TCG_REG_TMP0;
+ }
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
+ TCGType ext,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addrhi,
+ void *raddr, tcg_insn_unit **label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->type = ext;
+ label->datalo_reg = datalo;
+ label->datahi_reg = datahi;
+ label->addrlo_reg = addrlo;
+ label->addrhi_reg = addrhi;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr[0];
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ TCGReg a0 = tcg_target_call_iarg_regs[0];
+ TCGReg a1 = tcg_target_call_iarg_regs[1];
+ TCGReg a2 = tcg_target_call_iarg_regs[2];
+ TCGReg a3 = tcg_target_call_iarg_regs[3];
+
+ /* We don't support oversize guests */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ g_assert_not_reached();
+ }
+
+ /* resolve label address */
+ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ /* call load helper */
+ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+ tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
+
+ tcg_out_call(s, qemu_ld_helpers[opc & MO_SSIZE]);
+ tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
+
+ tcg_out_goto(s, l->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+ MemOpIdx oi = l->oi;
+ MemOp opc = get_memop(oi);
+ MemOp s_bits = opc & MO_SIZE;
+ TCGReg a0 = tcg_target_call_iarg_regs[0];
+ TCGReg a1 = tcg_target_call_iarg_regs[1];
+ TCGReg a2 = tcg_target_call_iarg_regs[2];
+ TCGReg a3 = tcg_target_call_iarg_regs[3];
+ TCGReg a4 = tcg_target_call_iarg_regs[4];
+
+ /* We don't support oversize guests */
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ g_assert_not_reached();
+ }
+
+ /* resolve label address */
+ if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+ return false;
+ }
+
+ /* call store helper */
+ tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
+ tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
+ tcg_out_mov(s, TCG_TYPE_PTR, a2, l->datalo_reg);
+ switch (s_bits) {
+ case MO_8:
+ tcg_out_ext8u(s, a2, a2);
+ break;
+ case MO_16:
+ tcg_out_ext16u(s, a2, a2);
+ break;
+ default:
+ break;
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
+
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
+
+ tcg_out_goto(s, l->raddr);
+ return true;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, MemOp opc, bool is_64)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & (MO_SSIZE)) {
+ case MO_UB:
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
+ break;
+ case MO_SB:
+ tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
+ break;
+ case MO_UW:
+ tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
+ break;
+ case MO_SW:
+ tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
+ break;
+ case MO_UL:
+ if (TCG_TARGET_REG_BITS == 64 && is_64) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ break;
+ }
+ /* FALLTHRU */
+ case MO_SL:
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ break;
+ case MO_Q:
+ /* Prefer to load from offset 0 first, but allow for overlap. */
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ } else if (lo != base) {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
+ } else {
+ tcg_out_opc_imm(s, OPC_LW, hi, base, 4);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh;
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[1];
+#endif
+ TCGReg base = TCG_REG_TMP0;
+
+ data_regl = *args++;
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 1);
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+ add_qemu_ldst_label(s, 1, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
+ if (guest_base != 0) {
+ tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+ }
+ tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc, is_64);
+#endif
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
+ TCGReg base, MemOp opc)
+{
+ /* Byte swapping is left to middle-end expansion. */
+ tcg_debug_assert((opc & MO_BSWAP) == 0);
+
+ switch (opc & (MO_SSIZE)) {
+ case MO_8:
+ tcg_out_opc_store(s, OPC_SB, base, lo, 0);
+ break;
+ case MO_16:
+ tcg_out_opc_store(s, OPC_SH, base, lo, 0);
+ break;
+ case MO_32:
+ tcg_out_opc_store(s, OPC_SW, base, lo, 0);
+ break;
+ case MO_64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_store(s, OPC_SD, base, lo, 0);
+ } else {
+ tcg_out_opc_store(s, OPC_SW, base, lo, 0);
+ tcg_out_opc_store(s, OPC_SW, base, hi, 4);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+{
+ TCGReg addr_regl, addr_regh __attribute__((unused));
+ TCGReg data_regl, data_regh;
+ MemOpIdx oi;
+ MemOp opc;
+#if defined(CONFIG_SOFTMMU)
+ tcg_insn_unit *label_ptr[1];
+#endif
+ TCGReg base = TCG_REG_TMP0;
+
+ data_regl = *args++;
+ data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
+ addr_regl = *args++;
+ addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
+ oi = *args++;
+ opc = get_memop(oi);
+
+#if defined(CONFIG_SOFTMMU)
+ tcg_out_tlb_load(s, addr_regl, addr_regh, oi, label_ptr, 0);
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+ add_qemu_ldst_label(s, 0, oi,
+ (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+ data_regl, data_regh, addr_regl, addr_regh,
+ s->code_ptr, label_ptr);
+#else
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, base, addr_regl);
+ addr_regl = base;
+ }
+ if (guest_base != 0) {
+ tcg_out_opc_reg(s, OPC_ADD, base, TCG_GUEST_BASE_REG, addr_regl);
+ }
+ tcg_out_qemu_st_direct(s, data_regl, data_regh, base, opc);
+#endif
+}
+
+static const tcg_insn_unit *tb_ret_addr;
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0 = args[0];
+ TCGArg a1 = args[1];
+ TCGArg a2 = args[2];
+ int c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ /* Reuse the zeroing that exists for goto_ptr. */
+ if (a0 == 0) {
+ tcg_out_call_int(s, tcg_code_gen_epilogue, true);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
+ tcg_out_call_int(s, tb_ret_addr, true);
+ }
+ break;
+
+ case INDEX_op_goto_tb:
+ assert(s->tb_jmp_insn_offset == 0);
+ /* indirect jump method */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+ (uintptr_t)(s->tb_jmp_target_addr + a0));
+ tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+ set_jmp_reset_offset(s, a0);
+ break;
+
+ case INDEX_op_goto_ptr:
+ tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
+ break;
+
+ case INDEX_op_br:
+ tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
+ tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
+ break;
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ tcg_out_ldst(s, OPC_LB, a0, a1, a2);
+ break;
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
+ break;
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ tcg_out_ldst(s, OPC_LH, a0, a1, a2);
+ break;
+ case INDEX_op_ld32u_i64:
+ tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32s_i64:
+ tcg_out_ldst(s, OPC_LW, a0, a1, a2);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ldst(s, OPC_LD, a0, a1, a2);
+ break;
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ tcg_out_ldst(s, OPC_SB, a0, a1, a2);
+ break;
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ tcg_out_ldst(s, OPC_SH, a0, a1, a2);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_ldst(s, OPC_SW, a0, a1, a2);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_ldst(s, OPC_SD, a0, a1, a2);
+ break;
+
+ case INDEX_op_add_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_add_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sub_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_sub_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
+ } else {
+ tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
+ break;
+
+ case INDEX_op_neg_i32:
+ tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
+ break;
+ case INDEX_op_neg_i64:
+ tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
+ break;
+
+ case INDEX_op_mul_i32:
+ tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
+ break;
+ case INDEX_op_mul_i64:
+ tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
+ break;
+
+ case INDEX_op_div_i32:
+ tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
+ break;
+ case INDEX_op_div_i64:
+ tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
+ break;
+
+ case INDEX_op_divu_i32:
+ tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
+ break;
+ case INDEX_op_divu_i64:
+ tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
+ break;
+
+ case INDEX_op_rem_i32:
+ tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
+ break;
+ case INDEX_op_rem_i64:
+ tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
+ break;
+
+ case INDEX_op_remu_i32:
+ tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
+ break;
+ case INDEX_op_remu_i64:
+ tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
+ break;
+
+ case INDEX_op_shl_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_shl_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_shr_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_sar_i32:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (c2) {
+ tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
+ } else {
+ tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], false, true);
+ break;
+ case INDEX_op_add2_i64:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], false, false);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], true, true);
+ break;
+ case INDEX_op_sub2_i64:
+ tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
+ const_args[4], const_args[5], true, false);
+ break;
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+ break;
+ case INDEX_op_brcond2_i32:
+ tcg_out_brcond2(s, args[4], a0, a1, a2, args[3], arg_label(args[5]));
+ break;
+
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], a0, a1, a2);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, args, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ tcg_out_qemu_st(s, args, false);
+ break;
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args, true);
+ break;
+
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ tcg_out_ext8u(s, a0, a1);
+ break;
+
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ tcg_out_ext16u(s, a0, a1);
+ break;
+
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ tcg_out_ext32u(s, a0, a1);
+ break;
+
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ tcg_out_ext8s(s, a0, a1);
+ break;
+
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ tcg_out_ext16s(s, a0, a1);
+ break;
+
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_ext_i32_i64:
+ tcg_out_ext32s(s, a0, a1);
+ break;
+
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
+ break;
+
+ case INDEX_op_mulsh_i32:
+ case INDEX_op_mulsh_i64:
+ tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
+ break;
+
+ case INDEX_op_muluh_i32:
+ case INDEX_op_muluh_i64:
+ tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, a0);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_not_i32:
+ case INDEX_op_neg_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_not_i64:
+ case INDEX_op_neg_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extrh_i64_i32:
+ case INDEX_op_ext_i32_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(rZ, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_add_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
+ return C_O1_I2(r, r, rI);
+
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ return C_O1_I2(r, rZ, rN);
+
+ case INDEX_op_mul_i32:
+ case INDEX_op_mulsh_i32:
+ case INDEX_op_muluh_i32:
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ case INDEX_op_rem_i32:
+ case INDEX_op_remu_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_mulsh_i64:
+ case INDEX_op_muluh_i64:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i64:
+ case INDEX_op_rem_i64:
+ case INDEX_op_remu_i64:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(r, rZ, rZ);
+
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(rZ, rZ);
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ return C_O2_I4(r, r, rZ, rZ, rM, rM);
+
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(rZ, rZ, rZ, rZ);
+
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, rZ, rZ, rZ, rZ);
+
+ case INDEX_op_qemu_ld_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
+ case INDEX_op_qemu_st_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O0_I2(LZ, L) : C_O0_I3(LZ, L, L));
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
+ : C_O2_I2(r, r, L, L));
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(LZ, L)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(LZ, LZ, L)
+ : C_O0_I4(LZ, LZ, L, L));
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static const int tcg_target_callee_save_regs[] = {
+ TCG_REG_S0, /* used for the global env (TCG_AREG0) */
+ TCG_REG_S1,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+ TCG_REG_S9,
+ TCG_REG_S10,
+ TCG_REG_S11,
+ TCG_REG_RA, /* should be last for ABI compliance */
+};
+
+/* Stack frame parameters. */
+#define REG_SIZE (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+ & -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int i;
+
+ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
+
+ /* TB prologue */
+ tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+ }
+
+#if !defined(CONFIG_SOFTMMU)
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+#endif
+
+ /* Call generated code */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
+
+ /* Return path for goto_ptr. Set return value to 0 */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
+
+ /* TB epilogue */
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+ for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+ tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+ TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+ }
+
+ tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+ tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
+ }
+
+ tcg_target_call_clobber_regs = -1u;
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
+ tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_RISCV
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+ .h.cie.return_column = TCG_REG_RA,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x80 + 9, 12, /* DW_CFA_offset, s1, -96 */
+ 0x80 + 18, 11, /* DW_CFA_offset, s2, -88 */
+ 0x80 + 19, 10, /* DW_CFA_offset, s3, -80 */
+ 0x80 + 20, 9, /* DW_CFA_offset, s4, -72 */
+ 0x80 + 21, 8, /* DW_CFA_offset, s5, -64 */
+ 0x80 + 22, 7, /* DW_CFA_offset, s6, -56 */
+ 0x80 + 23, 6, /* DW_CFA_offset, s7, -48 */
+ 0x80 + 24, 5, /* DW_CFA_offset, s8, -40 */
+ 0x80 + 25, 4, /* DW_CFA_offset, s9, -32 */
+ 0x80 + 26, 3, /* DW_CFA_offset, s10, -24 */
+ 0x80 + 27, 2, /* DW_CFA_offset, s11, -16 */
+ 0x80 + 1 , 1, /* DW_CFA_offset, ra, -8 */
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
new file mode 100644
index 000000000..ef78b99e9
--- /dev/null
+++ b/tcg/riscv/tcg-target.h
@@ -0,0 +1,175 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef RISCV_TCG_TARGET_H
+#define RISCV_TCG_TARGET_H
+
+#if __riscv_xlen == 32
+# define TCG_TARGET_REG_BITS 32
+#elif __riscv_xlen == 64
+# define TCG_TARGET_REG_BITS 64
+#endif
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 20
+#define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
+
+typedef enum {
+ TCG_REG_ZERO,
+ TCG_REG_RA,
+ TCG_REG_SP,
+ TCG_REG_GP,
+ TCG_REG_TP,
+ TCG_REG_T0,
+ TCG_REG_T1,
+ TCG_REG_T2,
+ TCG_REG_S0,
+ TCG_REG_S1,
+ TCG_REG_A0,
+ TCG_REG_A1,
+ TCG_REG_A2,
+ TCG_REG_A3,
+ TCG_REG_A4,
+ TCG_REG_A5,
+ TCG_REG_A6,
+ TCG_REG_A7,
+ TCG_REG_S2,
+ TCG_REG_S3,
+ TCG_REG_S4,
+ TCG_REG_S5,
+ TCG_REG_S6,
+ TCG_REG_S7,
+ TCG_REG_S8,
+ TCG_REG_S9,
+ TCG_REG_S10,
+ TCG_REG_S11,
+ TCG_REG_T3,
+ TCG_REG_T4,
+ TCG_REG_T5,
+ TCG_REG_T6,
+
+ /* aliases */
+ TCG_AREG0 = TCG_REG_S0,
+ TCG_GUEST_BASE_REG = TCG_REG_S1,
+ TCG_REG_TMP0 = TCG_REG_T6,
+ TCG_REG_TMP1 = TCG_REG_T5,
+ TCG_REG_TMP2 = TCG_REG_T4,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_ALIGN_ARGS 1
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_div2_i32 0
+#define TCG_TARGET_HAS_rot_i32 0
+#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_extract_i32 0
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 (TCG_TARGET_REG_BITS == 32)
+#define TCG_TARGET_HAS_mulsh_i32 (TCG_TARGET_REG_BITS == 32)
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 0
+#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_brcond2 1
+#define TCG_TARGET_HAS_setcond2 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_div2_i64 0
+#define TCG_TARGET_HAS_rot_i64 0
+#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_extrl_i64_i32 1
+#define TCG_TARGET_HAS_extrh_i64_i32 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 0
+#define TCG_TARGET_HAS_bswap32_i64 0
+#define TCG_TARGET_HAS_bswap64_i64 0
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 1
+#define TCG_TARGET_HAS_mulsh_i64 1
+#endif
+
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_DEFAULT_MO (0)
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
+
+#endif
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
new file mode 100644
index 000000000..426dd92e5
--- /dev/null
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define S390 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(L, L)
+C_O0_I2(r, r)
+C_O0_I2(r, ri)
+C_O0_I2(v, r)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I1(v, r)
+C_O1_I1(v, v)
+C_O1_I1(v, vr)
+C_O1_I2(r, 0, ri)
+C_O1_I2(r, 0, rI)
+C_O1_I2(r, 0, rJ)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, rZ, r)
+C_O1_I2(v, v, r)
+C_O1_I2(v, v, v)
+C_O1_I3(v, v, v, v)
+C_O1_I4(r, r, ri, r, 0)
+C_O1_I4(r, r, ri, rI, 0)
+C_O2_I2(b, a, 0, r)
+C_O2_I3(b, a, 0, 1, r)
+C_O2_I4(r, r, 0, 1, rA, r)
+C_O2_I4(r, r, 0, 1, ri, r)
+C_O2_I4(r, r, 0, 1, r, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
new file mode 100644
index 000000000..8bb0358ae
--- /dev/null
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define S390 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+REGS('v', ALL_VECTOR_REGS)
+/*
+ * A (single) even/odd pair for division.
+ * TODO: Add something to the register allocator to allow
+ * this kind of regno+1 pairing to be done more generally.
+ */
+REGS('a', 1u << TCG_REG_R2)
+REGS('b', 1u << TCG_REG_R3)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('A', TCG_CT_CONST_S33)
+CONST('I', TCG_CT_CONST_S16)
+CONST('J', TCG_CT_CONST_S32)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
new file mode 100644
index 000000000..57e803e33
--- /dev/null
+++ b/tcg/s390x/tcg-target.c.inc
@@ -0,0 +1,3411 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* We only support generating code for 64-bit mode. */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
+#include "../tcg-pool.c.inc"
+#include "elf.h"
+
+/* ??? The translation blocks produced by TCG are generally small enough to
+ be entirely reachable with a 16-bit displacement. Leaving the option for
+ a 32-bit displacement here Just In Case. */
+#define USE_LONG_BRANCHES 0
+
+#define TCG_CT_CONST_S16 0x100
+#define TCG_CT_CONST_S32 0x200
+#define TCG_CT_CONST_S33 0x400
+#define TCG_CT_CONST_ZERO 0x800
+
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
+#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
+
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+
+/* Several places within the instruction set 0 means "no register"
+ rather than TCG_REG_R0. */
+#define TCG_REG_NONE 0
+
+/* A scratch register that may be be used throughout the backend. */
+#define TCG_TMP0 TCG_REG_R1
+
+/* A scratch register that holds a pointer to the beginning of the TB.
+ We don't need this when we have pc-relative loads with the general
+ instructions extension facility. */
+#define TCG_REG_TB TCG_REG_R12
+#define USE_REG_TB (!HAVE_FACILITY(GEN_INST_EXT))
+
+#ifndef CONFIG_SOFTMMU
+#define TCG_GUEST_BASE_REG TCG_REG_R13
+#endif
+
+/* All of the following instructions are prefixed with their instruction
+ format, and are defined as 8- or 16-bit quantities, even when the two
+ halves of the 16-bit quantity may appear 32 bits apart in the insn.
+ This makes it easy to copy the values from the tables in Appendix B. */
+typedef enum S390Opcode {
+ RIL_AFI = 0xc209,
+ RIL_AGFI = 0xc208,
+ RIL_ALFI = 0xc20b,
+ RIL_ALGFI = 0xc20a,
+ RIL_BRASL = 0xc005,
+ RIL_BRCL = 0xc004,
+ RIL_CFI = 0xc20d,
+ RIL_CGFI = 0xc20c,
+ RIL_CLFI = 0xc20f,
+ RIL_CLGFI = 0xc20e,
+ RIL_CLRL = 0xc60f,
+ RIL_CLGRL = 0xc60a,
+ RIL_CRL = 0xc60d,
+ RIL_CGRL = 0xc608,
+ RIL_IIHF = 0xc008,
+ RIL_IILF = 0xc009,
+ RIL_LARL = 0xc000,
+ RIL_LGFI = 0xc001,
+ RIL_LGRL = 0xc408,
+ RIL_LLIHF = 0xc00e,
+ RIL_LLILF = 0xc00f,
+ RIL_LRL = 0xc40d,
+ RIL_MSFI = 0xc201,
+ RIL_MSGFI = 0xc200,
+ RIL_NIHF = 0xc00a,
+ RIL_NILF = 0xc00b,
+ RIL_OIHF = 0xc00c,
+ RIL_OILF = 0xc00d,
+ RIL_SLFI = 0xc205,
+ RIL_SLGFI = 0xc204,
+ RIL_XIHF = 0xc006,
+ RIL_XILF = 0xc007,
+
+ RI_AGHI = 0xa70b,
+ RI_AHI = 0xa70a,
+ RI_BRC = 0xa704,
+ RI_CHI = 0xa70e,
+ RI_CGHI = 0xa70f,
+ RI_IIHH = 0xa500,
+ RI_IIHL = 0xa501,
+ RI_IILH = 0xa502,
+ RI_IILL = 0xa503,
+ RI_LGHI = 0xa709,
+ RI_LLIHH = 0xa50c,
+ RI_LLIHL = 0xa50d,
+ RI_LLILH = 0xa50e,
+ RI_LLILL = 0xa50f,
+ RI_MGHI = 0xa70d,
+ RI_MHI = 0xa70c,
+ RI_NIHH = 0xa504,
+ RI_NIHL = 0xa505,
+ RI_NILH = 0xa506,
+ RI_NILL = 0xa507,
+ RI_OIHH = 0xa508,
+ RI_OIHL = 0xa509,
+ RI_OILH = 0xa50a,
+ RI_OILL = 0xa50b,
+
+ RIE_CGIJ = 0xec7c,
+ RIE_CGRJ = 0xec64,
+ RIE_CIJ = 0xec7e,
+ RIE_CLGRJ = 0xec65,
+ RIE_CLIJ = 0xec7f,
+ RIE_CLGIJ = 0xec7d,
+ RIE_CLRJ = 0xec77,
+ RIE_CRJ = 0xec76,
+ RIE_LOCGHI = 0xec46,
+ RIE_RISBG = 0xec55,
+
+ RRE_AGR = 0xb908,
+ RRE_ALGR = 0xb90a,
+ RRE_ALCR = 0xb998,
+ RRE_ALCGR = 0xb988,
+ RRE_CGR = 0xb920,
+ RRE_CLGR = 0xb921,
+ RRE_DLGR = 0xb987,
+ RRE_DLR = 0xb997,
+ RRE_DSGFR = 0xb91d,
+ RRE_DSGR = 0xb90d,
+ RRE_FLOGR = 0xb983,
+ RRE_LGBR = 0xb906,
+ RRE_LCGR = 0xb903,
+ RRE_LGFR = 0xb914,
+ RRE_LGHR = 0xb907,
+ RRE_LGR = 0xb904,
+ RRE_LLGCR = 0xb984,
+ RRE_LLGFR = 0xb916,
+ RRE_LLGHR = 0xb985,
+ RRE_LRVR = 0xb91f,
+ RRE_LRVGR = 0xb90f,
+ RRE_LTGR = 0xb902,
+ RRE_MLGR = 0xb986,
+ RRE_MSGR = 0xb90c,
+ RRE_MSR = 0xb252,
+ RRE_NGR = 0xb980,
+ RRE_OGR = 0xb981,
+ RRE_SGR = 0xb909,
+ RRE_SLGR = 0xb90b,
+ RRE_SLBR = 0xb999,
+ RRE_SLBGR = 0xb989,
+ RRE_XGR = 0xb982,
+
+ RRF_LOCR = 0xb9f2,
+ RRF_LOCGR = 0xb9e2,
+ RRF_NRK = 0xb9f4,
+ RRF_NGRK = 0xb9e4,
+ RRF_ORK = 0xb9f6,
+ RRF_OGRK = 0xb9e6,
+ RRF_SRK = 0xb9f9,
+ RRF_SGRK = 0xb9e9,
+ RRF_SLRK = 0xb9fb,
+ RRF_SLGRK = 0xb9eb,
+ RRF_XRK = 0xb9f7,
+ RRF_XGRK = 0xb9e7,
+
+ RR_AR = 0x1a,
+ RR_ALR = 0x1e,
+ RR_BASR = 0x0d,
+ RR_BCR = 0x07,
+ RR_CLR = 0x15,
+ RR_CR = 0x19,
+ RR_DR = 0x1d,
+ RR_LCR = 0x13,
+ RR_LR = 0x18,
+ RR_LTR = 0x12,
+ RR_NR = 0x14,
+ RR_OR = 0x16,
+ RR_SR = 0x1b,
+ RR_SLR = 0x1f,
+ RR_XR = 0x17,
+
+ RSY_RLL = 0xeb1d,
+ RSY_RLLG = 0xeb1c,
+ RSY_SLLG = 0xeb0d,
+ RSY_SLLK = 0xebdf,
+ RSY_SRAG = 0xeb0a,
+ RSY_SRAK = 0xebdc,
+ RSY_SRLG = 0xeb0c,
+ RSY_SRLK = 0xebde,
+
+ RS_SLL = 0x89,
+ RS_SRA = 0x8a,
+ RS_SRL = 0x88,
+
+ RXY_AG = 0xe308,
+ RXY_AY = 0xe35a,
+ RXY_CG = 0xe320,
+ RXY_CLG = 0xe321,
+ RXY_CLY = 0xe355,
+ RXY_CY = 0xe359,
+ RXY_LAY = 0xe371,
+ RXY_LB = 0xe376,
+ RXY_LG = 0xe304,
+ RXY_LGB = 0xe377,
+ RXY_LGF = 0xe314,
+ RXY_LGH = 0xe315,
+ RXY_LHY = 0xe378,
+ RXY_LLGC = 0xe390,
+ RXY_LLGF = 0xe316,
+ RXY_LLGH = 0xe391,
+ RXY_LMG = 0xeb04,
+ RXY_LRV = 0xe31e,
+ RXY_LRVG = 0xe30f,
+ RXY_LRVH = 0xe31f,
+ RXY_LY = 0xe358,
+ RXY_NG = 0xe380,
+ RXY_OG = 0xe381,
+ RXY_STCY = 0xe372,
+ RXY_STG = 0xe324,
+ RXY_STHY = 0xe370,
+ RXY_STMG = 0xeb24,
+ RXY_STRV = 0xe33e,
+ RXY_STRVG = 0xe32f,
+ RXY_STRVH = 0xe33f,
+ RXY_STY = 0xe350,
+ RXY_XG = 0xe382,
+
+ RX_A = 0x5a,
+ RX_C = 0x59,
+ RX_L = 0x58,
+ RX_LA = 0x41,
+ RX_LH = 0x48,
+ RX_ST = 0x50,
+ RX_STC = 0x42,
+ RX_STH = 0x40,
+
+ VRIa_VGBM = 0xe744,
+ VRIa_VREPI = 0xe745,
+ VRIb_VGM = 0xe746,
+ VRIc_VREP = 0xe74d,
+
+ VRRa_VLC = 0xe7de,
+ VRRa_VLP = 0xe7df,
+ VRRa_VLR = 0xe756,
+ VRRc_VA = 0xe7f3,
+ VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
+ VRRc_VCH = 0xe7fb, /* " */
+ VRRc_VCHL = 0xe7f9, /* " */
+ VRRc_VERLLV = 0xe773,
+ VRRc_VESLV = 0xe770,
+ VRRc_VESRAV = 0xe77a,
+ VRRc_VESRLV = 0xe778,
+ VRRc_VML = 0xe7a2,
+ VRRc_VMN = 0xe7fe,
+ VRRc_VMNL = 0xe7fc,
+ VRRc_VMX = 0xe7ff,
+ VRRc_VMXL = 0xe7fd,
+ VRRc_VN = 0xe768,
+ VRRc_VNC = 0xe769,
+ VRRc_VNO = 0xe76b,
+ VRRc_VO = 0xe76a,
+ VRRc_VOC = 0xe76f,
+ VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
+ VRRc_VS = 0xe7f7,
+ VRRa_VUPH = 0xe7d7,
+ VRRa_VUPL = 0xe7d6,
+ VRRc_VX = 0xe76d,
+ VRRe_VSEL = 0xe78d,
+ VRRf_VLVGP = 0xe762,
+
+ VRSa_VERLL = 0xe733,
+ VRSa_VESL = 0xe730,
+ VRSa_VESRA = 0xe73a,
+ VRSa_VESRL = 0xe738,
+ VRSb_VLVG = 0xe722,
+ VRSc_VLGV = 0xe721,
+
+ VRX_VL = 0xe706,
+ VRX_VLLEZ = 0xe704,
+ VRX_VLREP = 0xe705,
+ VRX_VST = 0xe70e,
+ VRX_VSTEF = 0xe70b,
+ VRX_VSTEG = 0xe70a,
+
+ NOP = 0x0707,
+} S390Opcode;
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
+ "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+ "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+ "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
+};
+#endif
+
+/* Since R6 is a potential argument register, choose it last of the
+ call-saved registers. Likewise prefer the call-clobbered registers
+ in reverse order to maximize the chance of avoiding the arguments. */
+static const int tcg_target_reg_alloc_order[] = {
+ /* Call saved registers. */
+ TCG_REG_R13,
+ TCG_REG_R12,
+ TCG_REG_R11,
+ TCG_REG_R10,
+ TCG_REG_R9,
+ TCG_REG_R8,
+ TCG_REG_R7,
+ TCG_REG_R6,
+ /* Call clobbered registers. */
+ TCG_REG_R14,
+ TCG_REG_R0,
+ TCG_REG_R1,
+ /* Argument registers, in reverse order of allocation. */
+ TCG_REG_R5,
+ TCG_REG_R4,
+ TCG_REG_R3,
+ TCG_REG_R2,
+
+ /* V8-V15 are call saved, and omitted. */
+ TCG_REG_V0,
+ TCG_REG_V1,
+ TCG_REG_V2,
+ TCG_REG_V3,
+ TCG_REG_V4,
+ TCG_REG_V5,
+ TCG_REG_V6,
+ TCG_REG_V7,
+ TCG_REG_V16,
+ TCG_REG_V17,
+ TCG_REG_V18,
+ TCG_REG_V19,
+ TCG_REG_V20,
+ TCG_REG_V21,
+ TCG_REG_V22,
+ TCG_REG_V23,
+ TCG_REG_V24,
+ TCG_REG_V25,
+ TCG_REG_V26,
+ TCG_REG_V27,
+ TCG_REG_V28,
+ TCG_REG_V29,
+ TCG_REG_V30,
+ TCG_REG_V31,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R2,
+};
+
+#define S390_CC_EQ 8
+#define S390_CC_LT 4
+#define S390_CC_GT 2
+#define S390_CC_OV 1
+#define S390_CC_NE (S390_CC_LT | S390_CC_GT)
+#define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
+#define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
+#define S390_CC_NEVER 0
+#define S390_CC_ALWAYS 15
+
+/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
+static const uint8_t tcg_cond_to_s390_cond[] = {
+ [TCG_COND_EQ] = S390_CC_EQ,
+ [TCG_COND_NE] = S390_CC_NE,
+ [TCG_COND_LT] = S390_CC_LT,
+ [TCG_COND_LE] = S390_CC_LE,
+ [TCG_COND_GT] = S390_CC_GT,
+ [TCG_COND_GE] = S390_CC_GE,
+ [TCG_COND_LTU] = S390_CC_LT,
+ [TCG_COND_LEU] = S390_CC_LE,
+ [TCG_COND_GTU] = S390_CC_GT,
+ [TCG_COND_GEU] = S390_CC_GE,
+};
+
+/* Condition codes that result from a LOAD AND TEST. Here, we have no
+ unsigned instruction variation, however since the test is vs zero we
+ can re-map the outcomes appropriately. */
+static const uint8_t tcg_cond_to_ltr_cond[] = {
+ [TCG_COND_EQ] = S390_CC_EQ,
+ [TCG_COND_NE] = S390_CC_NE,
+ [TCG_COND_LT] = S390_CC_LT,
+ [TCG_COND_LE] = S390_CC_LE,
+ [TCG_COND_GT] = S390_CC_GT,
+ [TCG_COND_GE] = S390_CC_GE,
+ [TCG_COND_LTU] = S390_CC_NEVER,
+ [TCG_COND_LEU] = S390_CC_EQ,
+ [TCG_COND_GTU] = S390_CC_NE,
+ [TCG_COND_GEU] = S390_CC_ALWAYS,
+};
+
+#ifdef CONFIG_SOFTMMU
+static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LESL] = helper_le_ldsl_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BESL] = helper_be_ldsl_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+};
+
+static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+};
+#endif
+
+static const tcg_insn_unit *tb_ret_addr;
+uint64_t s390_facilities[3];
+
+static inline bool is_general_reg(TCGReg r)
+{
+ return r <= TCG_REG_R15;
+}
+
+static inline bool is_vector_reg(TCGReg r)
+{
+ return r >= TCG_REG_V0 && r <= TCG_REG_V31;
+}
+
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
+ intptr_t value, intptr_t addend)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ intptr_t pcrel2;
+ uint32_t old;
+
+ value += addend;
+ pcrel2 = (tcg_insn_unit *)value - src_rx;
+
+ switch (type) {
+ case R_390_PC16DBL:
+ if (pcrel2 == (int16_t)pcrel2) {
+ tcg_patch16(src_rw, pcrel2);
+ return true;
+ }
+ break;
+ case R_390_PC32DBL:
+ if (pcrel2 == (int32_t)pcrel2) {
+ tcg_patch32(src_rw, pcrel2);
+ return true;
+ }
+ break;
+ case R_390_20:
+ if (value == sextract64(value, 0, 20)) {
+ old = *(uint32_t *)src_rw & 0xf00000ff;
+ old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
+ tcg_patch32(src_rw, old);
+ return true;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return false;
+}
+
+/* Test if a constant matches the constraint. */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ /* The following are mutually exclusive. */
+ if (ct & TCG_CT_CONST_S16) {
+ return val == (int16_t)val;
+ } else if (ct & TCG_CT_CONST_S32) {
+ return val == (int32_t)val;
+ } else if (ct & TCG_CT_CONST_S33) {
+ return val >= -0xffffffffll && val <= 0xffffffffll;
+ } else if (ct & TCG_CT_CONST_ZERO) {
+ return val == 0;
+ }
+
+ return 0;
+}
+
+/* Emit instructions according to the given instruction format. */
+
+static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
+{
+ tcg_out16(s, (op << 8) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2)
+{
+ tcg_out32(s, (op << 16) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, int m3)
+{
+ tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
+static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+ tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
+}
+
+static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
+ int i2, int m3)
+{
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
+ tcg_out32(s, (i2 << 16) | (op & 0xff));
+}
+
+static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
+{
+ tcg_out16(s, op | (r1 << 4));
+ tcg_out32(s, i2);
+}
+
+static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
+ TCGReg b2, TCGReg r3, int disp)
+{
+ tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
+ | (disp & 0xfff));
+}
+
+static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
+ TCGReg b2, TCGReg r3, int disp)
+{
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
+ tcg_out32(s, (op & 0xff) | (b2 << 28)
+ | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
+}
+
+#define tcg_out_insn_RX tcg_out_insn_RS
+#define tcg_out_insn_RXY tcg_out_insn_RSY
+
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+ /*
+ * Shift bit 4 of each regno to its corresponding bit of RXB.
+ * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
+ * is the left-shift of the 4th operand.
+ */
+ return ((v1 & 0x10) << (4 + 3))
+ | ((v2 & 0x10) << (4 + 2))
+ | ((v3 & 0x10) << (4 + 1))
+ | ((v4 & 0x10) << (4 + 0));
+}
+
+static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint16_t i2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+ tcg_out16(s, i2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint8_t i2, uint8_t i3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
+ tcg_out16(s, (i2 << 8) | (i3 & 0xff));
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
+ TCGReg v1, uint16_t i2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+ tcg_out16(s, i2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
+}
+
+static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out16(s, v3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_vector_reg(v2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_debug_assert(is_vector_reg(v4));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+ tcg_out16(s, v3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
+}
+
+static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
+ TCGReg v1, TCGReg r2, TCGReg r3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(is_general_reg(r2));
+ tcg_debug_assert(is_general_reg(r3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
+ tcg_out16(s, r3 << 12);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
+}
+
+static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
+ intptr_t d2, TCGReg b2, TCGReg r3, int m4)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_general_reg(r3));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
+ intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+ tcg_debug_assert(is_general_reg(r1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_debug_assert(is_vector_reg(v3));
+ tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
+ tcg_out16(s, b2 << 12 | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
+}
+
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
+ TCGReg b2, TCGReg x2, intptr_t d2, int m3)
+{
+ tcg_debug_assert(is_vector_reg(v1));
+ tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+ tcg_debug_assert(is_general_reg(x2));
+ tcg_debug_assert(is_general_reg(b2));
+ tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
+ tcg_out16(s, (b2 << 12) | d2);
+ tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
+/* Emit an opcode with "type-checking" of the format. */
+#define tcg_out_insn(S, FMT, OP, ...) \
+ glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
+
+
+/* emit 64-bit shifts */
+static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
+ TCGReg src, TCGReg sh_reg, int sh_imm)
+{
+ tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
+}
+
+/* emit 32-bit shifts */
+static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
+ TCGReg sh_reg, int sh_imm)
+{
+ tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
+{
+ if (src == dst) {
+ return true;
+ }
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(dst) && is_general_reg(src))) {
+ tcg_out_insn(s, RR, LR, dst, src);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(dst))) {
+ if (likely(is_general_reg(src))) {
+ tcg_out_insn(s, RRE, LGR, dst, src);
+ } else {
+ tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
+ }
+ break;
+ } else if (is_general_reg(src)) {
+ tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ tcg_out_insn(s, VRRa, VLR, dst, src, 0);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static const S390Opcode lli_insns[4] = {
+ RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
+};
+
+static bool maybe_out_small_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long sval)
+{
+ tcg_target_ulong uval = sval;
+ int i;
+
+ if (type == TCG_TYPE_I32) {
+ uval = (uint32_t)sval;
+ sval = (int32_t)sval;
+ }
+
+ /* Try all 32-bit insns that can load it in one go. */
+ if (sval >= -0x8000 && sval < 0x8000) {
+ tcg_out_insn(s, RI, LGHI, ret, sval);
+ return true;
+ }
+
+ for (i = 0; i < 4; i++) {
+ tcg_target_long mask = 0xffffull << i*16;
+ if ((uval & mask) == uval) {
+ tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* load a register with an immediate value */
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long sval, bool in_prologue)
+{
+ tcg_target_ulong uval;
+
+ /* Try all 32-bit insns that can load it in one go. */
+ if (maybe_out_small_movi(s, type, ret, sval)) {
+ return;
+ }
+
+ uval = sval;
+ if (type == TCG_TYPE_I32) {
+ uval = (uint32_t)sval;
+ sval = (int32_t)sval;
+ }
+
+ /* Try all 48-bit insns that can load it in one go. */
+ if (HAVE_FACILITY(EXT_IMM)) {
+ if (sval == (int32_t)sval) {
+ tcg_out_insn(s, RIL, LGFI, ret, sval);
+ return;
+ }
+ if (uval <= 0xffffffff) {
+ tcg_out_insn(s, RIL, LLILF, ret, uval);
+ return;
+ }
+ if ((uval & 0xffffffff) == 0) {
+ tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
+ return;
+ }
+ }
+
+ /* Try for PC-relative address load. For odd addresses,
+ attempt to use an offset from the start of the TB. */
+ if ((sval & 1) == 0) {
+ ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
+ if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, LARL, ret, off);
+ return;
+ }
+ } else if (USE_REG_TB && !in_prologue) {
+ ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
+ if (off == sextract64(off, 0, 20)) {
+ /* This is certain to be an address within TB, and therefore
+ OFF will be negative; don't try RX_LA. */
+ tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
+ return;
+ }
+ }
+
+ /* A 32-bit unsigned value can be loaded in 2 insns. And given
+ that LLILL, LLIHL, LLILF above did not succeed, we know that
+ both insns are required. */
+ if (uval <= 0xffffffff) {
+ tcg_out_insn(s, RI, LLILL, ret, uval);
+ tcg_out_insn(s, RI, IILH, ret, uval >> 16);
+ return;
+ }
+
+ /* Otherwise, stuff it in the constant pool. */
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
+ tcg_out_insn(s, RIL, LGRL, ret, 0);
+ new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+ } else if (USE_REG_TB && !in_prologue) {
+ tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
+ tcg_tbrel_diff(s, NULL));
+ } else {
+ TCGReg base = ret ? ret : TCG_TMP0;
+ tcg_out_insn(s, RIL, LARL, base, 0);
+ new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
+ tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long sval)
+{
+ tcg_out_movi_int(s, type, ret, sval, false);
+}
+
+/* Emit a load/store type instruction. Inputs are:
+ DATA: The register to be loaded or stored.
+ BASE+OFS: The effective address.
+ OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
+ OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
+
+static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
+ TCGReg data, TCGReg base, TCGReg index,
+ tcg_target_long ofs)
+{
+ if (ofs < -0x80000 || ofs >= 0x80000) {
+ /* Combine the low 20 bits of the offset with the actual load insn;
+ the high 44 bits must come from an immediate load. */
+ tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+ ofs = low;
+
+ /* If we were already given an index register, add it in. */
+ if (index != TCG_REG_NONE) {
+ tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+ }
+ index = TCG_TMP0;
+ }
+
+ if (opc_rx && ofs >= 0 && ofs < 0x1000) {
+ tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
+ } else {
+ tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
+ }
+}
+
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
+ TCGReg data, TCGReg base, TCGReg index,
+ tcg_target_long ofs, int m3)
+{
+ if (ofs < 0 || ofs >= 0x1000) {
+ if (ofs >= -0x80000 && ofs < 0x80000) {
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
+ base = TCG_TMP0;
+ index = TCG_REG_NONE;
+ ofs = 0;
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
+ if (index != TCG_REG_NONE) {
+ tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+ }
+ index = TCG_TMP0;
+ ofs = 0;
+ }
+ }
+ tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
+}
+
+/* load data without address translation or endianness conversion */
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
+ break;
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
+ break;
+
+ case TCG_TYPE_V128:
+ /* Hint quadword aligned. */
+ tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+ TCGReg base, intptr_t ofs)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
+ }
+ break;
+
+ case TCG_TYPE_I64:
+ if (likely(is_general_reg(data))) {
+ tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+ break;
+ }
+ /* fallthru */
+
+ case TCG_TYPE_V64:
+ tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
+ break;
+
+ case TCG_TYPE_V128:
+ /* Hint quadword aligned. */
+ tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ return false;
+}
+
+/* load data from an absolute host address */
+static void tcg_out_ld_abs(TCGContext *s, TCGType type,
+ TCGReg dest, const void *abs)
+{
+ intptr_t addr = (intptr_t)abs;
+
+ if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
+ ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
+ if (disp == (int32_t)disp) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, LRL, dest, disp);
+ } else {
+ tcg_out_insn(s, RIL, LGRL, dest, disp);
+ }
+ return;
+ }
+ }
+ if (USE_REG_TB) {
+ ptrdiff_t disp = tcg_tbrel_diff(s, abs);
+ if (disp == sextract64(disp, 0, 20)) {
+ tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
+ return;
+ }
+ }
+
+ tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
+ tcg_out_ld(s, type, dest, dest, addr & 0xffff);
+}
+
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+ int msb, int lsb, int ofs, int z)
+{
+ /* Format RIE-f */
+ tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+ tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+ tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
+static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (HAVE_FACILITY(EXT_IMM)) {
+ tcg_out_insn(s, RRE, LGBR, dest, src);
+ return;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ if (dest == src) {
+ tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
+ }
+ tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
+ tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
+static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (HAVE_FACILITY(EXT_IMM)) {
+ tcg_out_insn(s, RRE, LLGCR, dest, src);
+ return;
+ }
+
+ if (dest == src) {
+ tcg_out_movi(s, type, TCG_TMP0, 0xff);
+ src = TCG_TMP0;
+ } else {
+ tcg_out_movi(s, type, dest, 0xff);
+ }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, src);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, src);
+ }
+}
+
+static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (HAVE_FACILITY(EXT_IMM)) {
+ tcg_out_insn(s, RRE, LGHR, dest, src);
+ return;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ if (dest == src) {
+ tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
+ }
+ tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
+ } else {
+ tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
+ tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
+ }
+}
+
+static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ if (HAVE_FACILITY(EXT_IMM)) {
+ tcg_out_insn(s, RRE, LLGHR, dest, src);
+ return;
+ }
+
+ if (dest == src) {
+ tcg_out_movi(s, type, TCG_TMP0, 0xffff);
+ src = TCG_TMP0;
+ } else {
+ tcg_out_movi(s, type, dest, 0xffff);
+ }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, src);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, src);
+ }
+}
+
+static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
+{
+ tcg_out_insn(s, RRE, LGFR, dest, src);
+}
+
+static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
+{
+ tcg_out_insn(s, RRE, LLGFR, dest, src);
+}
+
+/* Accept bit patterns like these:
+ 0....01....1
+ 1....10....0
+ 1..10..01..1
+ 0..01..10..0
+ Copied from gcc sources. */
+static inline bool risbg_mask(uint64_t c)
+{
+ uint64_t lsb;
+ /* We don't change the number of transitions by inverting,
+ so make sure we start with the LSB zero. */
+ if (c & 1) {
+ c = ~c;
+ }
+ /* Reject all zeros or all ones. */
+ if (c == 0) {
+ return false;
+ }
+ /* Find the first transition. */
+ lsb = c & -c;
+ /* Invert to look for a second transition. */
+ c = ~c;
+ /* Erase the first transition. */
+ c &= -lsb;
+ /* Find the second transition, if any. */
+ lsb = c & -c;
+ /* Match if all the bits are 1's, or if c is zero. */
+ return c == -lsb;
+}
+
+static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
+{
+ int msb, lsb;
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Achieve wraparound by swapping msb and lsb. */
+ msb = 64 - ctz64(~val);
+ lsb = clz64(~val) - 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
+}
+
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+ static const S390Opcode ni_insns[4] = {
+ RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+ };
+ static const S390Opcode nif_insns[2] = {
+ RIL_NILF, RIL_NIHF
+ };
+ uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
+ int i;
+
+ /* Look for the zero-extensions. */
+ if ((val & valid) == 0xffffffff) {
+ tgen_ext32u(s, dest, dest);
+ return;
+ }
+ if (HAVE_FACILITY(EXT_IMM)) {
+ if ((val & valid) == 0xff) {
+ tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+ if ((val & valid) == 0xffff) {
+ tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+ return;
+ }
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (HAVE_FACILITY(EXT_IMM)) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
+ }
+ if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
+ tgen_andi_risbg(s, dest, dest, val);
+ return;
+ }
+
+ /* Use the constant pool if USE_REG_TB, but not for small constants. */
+ if (USE_REG_TB) {
+ if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+ tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
+ tcg_tbrel_diff(s, NULL));
+ return;
+ }
+ } else {
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ }
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
+ } else {
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+ }
+}
+
+static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+ static const S390Opcode oi_insns[4] = {
+ RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
+ };
+ static const S390Opcode oif_insns[2] = {
+ RIL_OILF, RIL_OIHF
+ };
+
+ int i;
+
+ /* Look for no-op. */
+ if (unlikely(val == 0)) {
+ return;
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = (0xffffull << i*16);
+ if ((val & mask) != 0 && (val & ~mask) == 0) {
+ tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+ return;
+ }
+ }
+
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (HAVE_FACILITY(EXT_IMM)) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = (0xffffffffull << i*32);
+ if ((val & mask) != 0 && (val & ~mask) == 0) {
+ tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
+ }
+
+ /* Use the constant pool if USE_REG_TB, but not for small constants. */
+ if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
+ } else {
+ tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
+ }
+ } else if (USE_REG_TB) {
+ tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, val, R_390_20, s->code_ptr - 2,
+ tcg_tbrel_diff(s, NULL));
+ } else {
+ /* Perform the OR via sequential modifications to the high and
+ low parts. Do this via recursion to handle 16-bit vs 32-bit
+ masks in each half. */
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
+ tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
+ tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
+ }
+}
+
+static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
+{
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (HAVE_FACILITY(EXT_IMM)) {
+ if ((val & 0xffffffff00000000ull) == 0) {
+ tcg_out_insn(s, RIL, XILF, dest, val);
+ return;
+ }
+ if ((val & 0x00000000ffffffffull) == 0) {
+ tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
+ return;
+ }
+ }
+
+ /* Use the constant pool if USE_REG_TB, but not for small constants. */
+ if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
+ } else {
+ tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
+ }
+ } else if (USE_REG_TB) {
+ tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, val, R_390_20, s->code_ptr - 2,
+ tcg_tbrel_diff(s, NULL));
+ } else {
+ /* Perform the xor by parts. */
+ tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
+ if (val & 0xffffffff) {
+ tcg_out_insn(s, RIL, XILF, dest, val);
+ }
+ if (val > 0xffffffff) {
+ tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
+ }
+ }
+}
+
+static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
+ TCGArg c2, bool c2const, bool need_carry)
+{
+ bool is_unsigned = is_unsigned_cond(c);
+ S390Opcode op;
+
+ if (c2const) {
+ if (c2 == 0) {
+ if (!(is_unsigned && need_carry)) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, LTR, r1, r1);
+ } else {
+ tcg_out_insn(s, RRE, LTGR, r1, r1);
+ }
+ return tcg_cond_to_ltr_cond[c];
+ }
+ }
+
+ if (!is_unsigned && c2 == (int16_t)c2) {
+ op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
+ tcg_out_insn_RI(s, op, r1, c2);
+ goto exit;
+ }
+
+ if (HAVE_FACILITY(EXT_IMM)) {
+ if (type == TCG_TYPE_I32) {
+ op = (is_unsigned ? RIL_CLFI : RIL_CFI);
+ tcg_out_insn_RIL(s, op, r1, c2);
+ goto exit;
+ } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
+ op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
+ tcg_out_insn_RIL(s, op, r1, c2);
+ goto exit;
+ }
+ }
+
+ /* Use the constant pool, but not for small constants. */
+ if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
+ c2 = TCG_TMP0;
+ /* fall through to reg-reg */
+ } else if (USE_REG_TB) {
+ if (type == TCG_TYPE_I32) {
+ op = (is_unsigned ? RXY_CLY : RXY_CY);
+ tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
+ 4 - tcg_tbrel_diff(s, NULL));
+ } else {
+ op = (is_unsigned ? RXY_CLG : RXY_CG);
+ tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
+ new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
+ tcg_tbrel_diff(s, NULL));
+ }
+ goto exit;
+ } else {
+ if (type == TCG_TYPE_I32) {
+ op = (is_unsigned ? RIL_CLRL : RIL_CRL);
+ tcg_out_insn_RIL(s, op, r1, 0);
+ new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
+ s->code_ptr - 2, 2 + 4);
+ } else {
+ op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
+ tcg_out_insn_RIL(s, op, r1, 0);
+ new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
+ }
+ goto exit;
+ }
+ }
+
+ if (type == TCG_TYPE_I32) {
+ op = (is_unsigned ? RR_CLR : RR_CR);
+ tcg_out_insn_RR(s, op, r1, c2);
+ } else {
+ op = (is_unsigned ? RRE_CLGR : RRE_CGR);
+ tcg_out_insn_RRE(s, op, r1, c2);
+ }
+
+ exit:
+ return tcg_cond_to_s390_cond[c];
+}
+
+static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
+{
+ int cc;
+ bool have_loc;
+
+ /* With LOC2, we can always emit the minimum 3 insns. */
+ if (HAVE_FACILITY(LOAD_ON_COND2)) {
+ /* Emit: d = 0, d = (cc ? 1 : d). */
+ cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
+ return;
+ }
+
+ have_loc = HAVE_FACILITY(LOAD_ON_COND);
+
+ /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller. */
+ restart:
+ switch (cond) {
+ case TCG_COND_NE:
+ /* X != 0 is X > 0. */
+ if (c2const && c2 == 0) {
+ cond = TCG_COND_GTU;
+ } else {
+ break;
+ }
+ /* fallthru */
+
+ case TCG_COND_GTU:
+ case TCG_COND_GT:
+ /* The result of a compare has CC=2 for GT and CC=3 unused.
+ ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
+ tgen_cmp(s, type, cond, c1, c2, c2const, true);
+ tcg_out_movi(s, type, dest, 0);
+ tcg_out_insn(s, RRE, ALCGR, dest, dest);
+ return;
+
+ case TCG_COND_EQ:
+ /* X == 0 is X <= 0. */
+ if (c2const && c2 == 0) {
+ cond = TCG_COND_LEU;
+ } else {
+ break;
+ }
+ /* fallthru */
+
+ case TCG_COND_LEU:
+ case TCG_COND_LE:
+ /* As above, but we're looking for borrow, or !carry.
+ The second insn computes d - d - borrow, or -1 for true
+ and 0 for false. So we must mask to 1 bit afterward. */
+ tgen_cmp(s, type, cond, c1, c2, c2const, true);
+ tcg_out_insn(s, RRE, SLBGR, dest, dest);
+ tgen_andi(s, type, dest, 1);
+ return;
+
+ case TCG_COND_GEU:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ /* Swap operands so that we can use LEU/GTU/GT/LE. */
+ if (c2const) {
+ if (have_loc) {
+ break;
+ }
+ tcg_out_movi(s, type, TCG_TMP0, c2);
+ c2 = c1;
+ c2const = 0;
+ c1 = TCG_TMP0;
+ } else {
+ TCGReg t = c1;
+ c1 = c2;
+ c2 = t;
+ }
+ cond = tcg_swap_cond(cond);
+ goto restart;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
+ if (have_loc) {
+ /* Emit: d = 0, t = 1, d = (cc ? t : d). */
+ tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
+ tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
+ } else {
+ /* Emit: d = 1; if (cc) goto over; d = 0; over: */
+ tcg_out_movi(s, type, dest, 1);
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_movi(s, type, dest, 0);
+ }
+}
+
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+ TCGReg c1, TCGArg c2, int c2const,
+ TCGArg v3, int v3const)
+{
+ int cc;
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
+ cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
+ if (v3const) {
+ tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
+ } else {
+ tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
+ }
+ } else {
+ c = tcg_invert_cond(c);
+ cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
+
+ /* Emit: if (cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, v3);
+ }
+}
+
+static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
+ TCGArg a2, int a2const)
+{
+ /* Since this sets both R and R+1, we have no choice but to store the
+ result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */
+ QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
+ tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
+
+ if (a2const && a2 == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
+ } else {
+ if (a2const) {
+ tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
+ }
+ if (HAVE_FACILITY(LOAD_ON_COND)) {
+ /* Emit: if (one bit found) dest = r0. */
+ tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
+ } else {
+ /* Emit: if (no one bit found) goto over; dest = r0; over: */
+ tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
+ }
+ }
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len, int z)
+{
+ int lsb = (63 - ofs);
+ int msb = lsb - (len - 1);
+ tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
+}
+
+static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len)
+{
+ tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
+}
+
+static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
+{
+ ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
+ if (off == (int16_t)off) {
+ tcg_out_insn(s, RI, BRC, cc, off);
+ } else if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, BRCL, cc, off);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+ tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
+ }
+}
+
+static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
+{
+ if (l->has_value) {
+ tgen_gotoi(s, cc, l->u.value_ptr);
+ } else if (USE_LONG_BRANCHES) {
+ tcg_out16(s, RIL_BRCL | (cc << 4));
+ tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
+ s->code_ptr += 2;
+ } else {
+ tcg_out16(s, RI_BRC | (cc << 4));
+ tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
+ s->code_ptr += 1;
+ }
+}
+
+static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
+ TCGReg r1, TCGReg r2, TCGLabel *l)
+{
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
+ tcg_out16(s, 0);
+ tcg_out16(s, cc << 12 | (opc & 0xff));
+}
+
+static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
+ TCGReg r1, int i2, TCGLabel *l)
+{
+ tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
+ tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
+ tcg_out16(s, 0);
+ tcg_out16(s, (i2 << 8) | (opc & 0xff));
+}
+
+static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
+ TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
+{
+ int cc;
+
+ if (HAVE_FACILITY(GEN_INST_EXT)) {
+ bool is_unsigned = is_unsigned_cond(c);
+ bool in_range;
+ S390Opcode opc;
+
+ cc = tcg_cond_to_s390_cond[c];
+
+ if (!c2const) {
+ opc = (type == TCG_TYPE_I32
+ ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
+ : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
+ tgen_compare_branch(s, opc, cc, r1, c2, l);
+ return;
+ }
+
+ /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
+ If the immediate we've been given does not fit that range, we'll
+ fall back to separate compare and branch instructions using the
+ larger comparison range afforded by COMPARE IMMEDIATE. */
+ if (type == TCG_TYPE_I32) {
+ if (is_unsigned) {
+ opc = RIE_CLIJ;
+ in_range = (uint32_t)c2 == (uint8_t)c2;
+ } else {
+ opc = RIE_CIJ;
+ in_range = (int32_t)c2 == (int8_t)c2;
+ }
+ } else {
+ if (is_unsigned) {
+ opc = RIE_CLGIJ;
+ in_range = (uint64_t)c2 == (uint8_t)c2;
+ } else {
+ opc = RIE_CGIJ;
+ in_range = (int64_t)c2 == (int8_t)c2;
+ }
+ }
+ if (in_range) {
+ tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
+ return;
+ }
+ }
+
+ cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
+ tgen_branch(s, cc, l);
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+{
+ ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
+ if (off == (int32_t)off) {
+ tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
+ tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
+ }
+}
+
+static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
+ TCGReg base, TCGReg index, int disp)
+{
+ switch (opc & (MO_SSIZE | MO_BSWAP)) {
+ case MO_UB:
+ tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
+ break;
+ case MO_SB:
+ tcg_out_insn(s, RXY, LGB, data, base, index, disp);
+ break;
+
+ case MO_UW | MO_BSWAP:
+ /* swapped unsigned halfword load with upper bits zeroed */
+ tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+ tgen_ext16u(s, TCG_TYPE_I64, data, data);
+ break;
+ case MO_UW:
+ tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
+ break;
+
+ case MO_SW | MO_BSWAP:
+ /* swapped sign-extended halfword load */
+ tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
+ tgen_ext16s(s, TCG_TYPE_I64, data, data);
+ break;
+ case MO_SW:
+ tcg_out_insn(s, RXY, LGH, data, base, index, disp);
+ break;
+
+ case MO_UL | MO_BSWAP:
+ /* swapped unsigned int load with upper bits zeroed */
+ tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+ tgen_ext32u(s, data, data);
+ break;
+ case MO_UL:
+ tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
+ break;
+
+ case MO_SL | MO_BSWAP:
+ /* swapped sign-extended int load */
+ tcg_out_insn(s, RXY, LRV, data, base, index, disp);
+ tgen_ext32s(s, data, data);
+ break;
+ case MO_SL:
+ tcg_out_insn(s, RXY, LGF, data, base, index, disp);
+ break;
+
+ case MO_Q | MO_BSWAP:
+ tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
+ break;
+ case MO_Q:
+ tcg_out_insn(s, RXY, LG, data, base, index, disp);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
+ TCGReg base, TCGReg index, int disp)
+{
+ switch (opc & (MO_SIZE | MO_BSWAP)) {
+ case MO_UB:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, STC, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STCY, data, base, index, disp);
+ }
+ break;
+
+ case MO_UW | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
+ break;
+ case MO_UW:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, STH, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STHY, data, base, index, disp);
+ }
+ break;
+
+ case MO_UL | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRV, data, base, index, disp);
+ break;
+ case MO_UL:
+ if (disp >= 0 && disp < 0x1000) {
+ tcg_out_insn(s, RX, ST, data, base, index, disp);
+ } else {
+ tcg_out_insn(s, RXY, STY, data, base, index, disp);
+ }
+ break;
+
+ case MO_Q | MO_BSWAP:
+ tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
+ break;
+ case MO_Q:
+ tcg_out_insn(s, RXY, STG, data, base, index, disp);
+ break;
+
+ default:
+ tcg_abort();
+ }
+}
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
+
+/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
+ addend into R2. Returns a register with the santitized guest address. */
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
+ int mem_index, bool is_ld)
+{
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned s_mask = (1 << s_bits) - 1;
+ unsigned a_mask = (1 << a_bits) - 1;
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ int ofs, a_off;
+ uint64_t tlb_mask;
+
+ tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
+ tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
+
+ /* For aligned accesses, we check the first byte and include the alignment
+ bits within the address. For unaligned access, we check that we don't
+ cross pages using the address of the last byte of the access. */
+ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
+ if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
+ tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+ } else {
+ tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
+ }
+
+ if (is_ld) {
+ ofs = offsetof(CPUTLBEntry, addr_read);
+ } else {
+ ofs = offsetof(CPUTLBEntry, addr_write);
+ }
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+ } else {
+ tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+ }
+
+ tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
+ offsetof(CPUTLBEntry, addend));
+
+ if (TARGET_LONG_BITS == 32) {
+ tgen_ext32u(s, TCG_REG_R3, addr_reg);
+ return TCG_REG_R3;
+ }
+ return addr_reg;
+}
+
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+ TCGReg data, TCGReg addr,
+ tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+{
+ TCGLabelQemuLdst *label = new_ldst_label(s);
+
+ label->is_ld = is_ld;
+ label->oi = oi;
+ label->datalo_reg = data;
+ label->addrlo_reg = addr;
+ label->raddr = tcg_splitwx_to_rx(raddr);
+ label->label_ptr[0] = label_ptr;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg addr_reg = lb->addrlo_reg;
+ TCGReg data_reg = lb->datalo_reg;
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+
+ if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+
+ tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+ return true;
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ TCGReg addr_reg = lb->addrlo_reg;
+ TCGReg data_reg = lb->datalo_reg;
+ MemOpIdx oi = lb->oi;
+ MemOp opc = get_memop(oi);
+
+ if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
+ return false;
+ }
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
+ }
+ switch (opc & MO_SIZE) {
+ case MO_UB:
+ tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ case MO_UW:
+ tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ case MO_UL:
+ tgen_ext32u(s, TCG_REG_R4, data_reg);
+ break;
+ case MO_Q:
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
+ break;
+ default:
+ tcg_abort();
+ }
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+ tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+
+ tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
+ return true;
+}
+#else
+static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
+ TCGReg *index_reg, tcg_target_long *disp)
+{
+ if (TARGET_LONG_BITS == 32) {
+ tgen_ext32u(s, TCG_TMP0, *addr_reg);
+ *addr_reg = TCG_TMP0;
+ }
+ if (guest_base < 0x80000) {
+ *index_reg = TCG_REG_NONE;
+ *disp = guest_base;
+ } else {
+ *index_reg = TCG_GUEST_BASE_REG;
+ *disp = 0;
+ }
+}
+#endif /* CONFIG_SOFTMMU */
+
+static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+ MemOpIdx oi)
+{
+ MemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+ TCGReg base_reg;
+
+ base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
+
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
+
+ tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+ add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+ TCGReg index_reg;
+ tcg_target_long disp;
+
+ tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+ tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
+ MemOpIdx oi)
+{
+ MemOp opc = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned mem_index = get_mmuidx(oi);
+ tcg_insn_unit *label_ptr;
+ TCGReg base_reg;
+
+ base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
+
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+ label_ptr = s->code_ptr;
+ s->code_ptr += 1;
+
+ tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
+
+ add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+#else
+ TCGReg index_reg;
+ tcg_target_long disp;
+
+ tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
+ tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
+#endif
+}
+
+# define OP_32_64(x) \
+ case glue(glue(INDEX_op_,x),_i32): \
+ case glue(glue(INDEX_op_,x),_i64)
+
+static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ S390Opcode op, op2;
+ TCGArg a0, a1, a2;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ /* Reuse the zeroing that exists for goto_ptr. */
+ a0 = args[0];
+ if (a0 == 0) {
+ tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
+ tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
+ }
+ break;
+
+ case INDEX_op_goto_tb:
+ a0 = args[0];
+ if (s->tb_jmp_insn_offset) {
+ /*
+ * branch displacement must be aligned for atomic patching;
+ * see if we need to add extra nop before branch
+ */
+ if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
+ tcg_out16(s, NOP);
+ }
+ tcg_debug_assert(!USE_REG_TB);
+ tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ s->code_ptr += 2;
+ } else {
+ /* load address stored at s->tb_jmp_target_addr + a0 */
+ tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
+ tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
+ /* and go there */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
+ }
+ set_jmp_reset_offset(s, a0);
+
+ /* For the unlinked path of goto_tb, we need to reset
+ TCG_REG_TB to the beginning of this TB. */
+ if (USE_REG_TB) {
+ int ofs = -tcg_current_code_size(s);
+ /* All TB are restricted to 64KiB by unwind info. */
+ tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
+ tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
+ TCG_REG_TB, TCG_REG_NONE, ofs);
+ }
+ break;
+
+ case INDEX_op_goto_ptr:
+ a0 = args[0];
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
+ }
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
+ break;
+
+ OP_32_64(ld8u):
+ /* ??? LLC (RXY format) is only present with the extended-immediate
+ facility, whereas LLGC is always present. */
+ tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(ld8s):
+ /* ??? LB is no smaller than LGB, so no point to using it. */
+ tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(ld16u):
+ /* ??? LLH (RXY format) is only present with the extended-immediate
+ facility, whereas LLGH is always present. */
+ tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_ld16s_i32:
+ tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_ld_i32:
+ tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ OP_32_64(st8):
+ tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ break;
+
+ OP_32_64(st16):
+ tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ break;
+
+ case INDEX_op_st_i32:
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (const_args[2]) {
+ do_addi_32:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AHI, a0, a2);
+ break;
+ }
+ if (HAVE_FACILITY(EXT_IMM)) {
+ tcg_out_insn(s, RIL, AFI, a0, a2);
+ break;
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, AR, a0, a2);
+ } else {
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+ }
+ break;
+ case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_32;
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, SR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, SRK, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_and_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_andi(s, TCG_TYPE_I32, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, NR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, NRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_or_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_ori(s, TCG_TYPE_I32, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, OR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, ORK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_xor_i32:
+ a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ tgen_xori(s, TCG_TYPE_I32, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, XR, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRF, XRK, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_neg_i32:
+ tcg_out_insn(s, RR, LCR, args[0], args[1]);
+ break;
+
+ case INDEX_op_mul_i32:
+ if (const_args[2]) {
+ if ((int32_t)args[2] == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MHI, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, MSR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_div2_i32:
+ tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_divu2_i32:
+ tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+ break;
+
+ case INDEX_op_shl_i32:
+ op = RS_SLL;
+ op2 = RSY_SLLK;
+ do_shift32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
+ if (a0 == a1) {
+ if (const_args[2]) {
+ tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
+ } else {
+ tcg_out_sh32(s, op, a0, a2, 0);
+ }
+ } else {
+ /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */
+ if (const_args[2]) {
+ tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
+ } else {
+ tcg_out_sh64(s, op2, a0, a1, a2, 0);
+ }
+ }
+ break;
+ case INDEX_op_shr_i32:
+ op = RS_SRL;
+ op2 = RSY_SRLK;
+ goto do_shift32;
+ case INDEX_op_sar_i32:
+ op = RS_SRA;
+ op2 = RSY_SRAK;
+ goto do_shift32;
+
+ case INDEX_op_rotl_i32:
+ /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1],
+ TCG_REG_NONE, (32 - args[2]) & 31);
+ } else {
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+ tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
+ }
+ break;
+
+ case INDEX_op_ext8s_i32:
+ tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i32:
+ tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext8u_i32:
+ tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i32:
+ tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
+ break;
+
+ case INDEX_op_bswap16_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
+ if (a2 & TCG_BSWAP_OS) {
+ tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
+ } else {
+ tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
+ }
+ break;
+ case INDEX_op_bswap16_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ tcg_out_insn(s, RRE, LRVGR, a0, a1);
+ if (a2 & TCG_BSWAP_OS) {
+ tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
+ } else {
+ tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
+ }
+ break;
+
+ case INDEX_op_bswap32_i32:
+ tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
+ break;
+ case INDEX_op_bswap32_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ tcg_out_insn(s, RRE, LRVR, a0, a1);
+ if (a2 & TCG_BSWAP_OS) {
+ tgen_ext32s(s, a0, a0);
+ } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+ tgen_ext32u(s, a0, a0);
+ }
+ break;
+
+ case INDEX_op_add2_i32:
+ if (const_args[4]) {
+ tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i32:
+ if (const_args[4]) {
+ tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ break;
+
+ case INDEX_op_br:
+ tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
+ break;
+
+ case INDEX_op_brcond_i32:
+ tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
+ args[1], const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i32:
+ tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ /* ??? Technically we can use a non-extending instruction. */
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_ld16s_i64:
+ tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld32u_i64:
+ tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld32s_i64:
+ tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_st32_i64:
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ break;
+
+ case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ do_addi_64:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AGHI, a0, a2);
+ break;
+ }
+ if (HAVE_FACILITY(EXT_IMM)) {
+ if (a2 == (int32_t)a2) {
+ tcg_out_insn(s, RIL, AGFI, a0, a2);
+ break;
+ } else if (a2 == (uint32_t)a2) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a2);
+ break;
+ } else if (-a2 == (uint32_t)-a2) {
+ tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+ break;
+ }
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, AGR, a0, a2);
+ } else {
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
+ }
+ break;
+ case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_64;
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, SGR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_and_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_or_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_ori(s, TCG_TYPE_I64, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, OGR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
+ }
+ break;
+ case INDEX_op_xor_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+ tgen_xori(s, TCG_TYPE_I64, a0, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, XGR, a0, a2);
+ } else {
+ tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
+ }
+ break;
+
+ case INDEX_op_neg_i64:
+ tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
+ break;
+ case INDEX_op_bswap64_i64:
+ tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
+ break;
+
+ case INDEX_op_mul_i64:
+ if (const_args[2]) {
+ if (args[2] == (int16_t)args[2]) {
+ tcg_out_insn(s, RI, MGHI, args[0], args[2]);
+ } else {
+ tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
+ }
+ break;
+
+ case INDEX_op_div2_i64:
+ /* ??? We get an unnecessary sign-extension of the dividend
+ into R3 with this definition, but as we do in fact always
+ produce both quotient and remainder using INDEX_op_div_i64
+ instead requires jumping through even more hoops. */
+ tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_divu2_i64:
+ tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+ break;
+ case INDEX_op_mulu2_i64:
+ tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+ break;
+
+ case INDEX_op_shl_i64:
+ op = RSY_SLLG;
+ do_shift64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_shr_i64:
+ op = RSY_SRLG;
+ goto do_shift64;
+ case INDEX_op_sar_i64:
+ op = RSY_SRAG;
+ goto do_shift64;
+
+ case INDEX_op_rotl_i64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+ TCG_REG_NONE, args[2]);
+ } else {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
+ }
+ break;
+ case INDEX_op_rotr_i64:
+ if (const_args[2]) {
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
+ TCG_REG_NONE, (64 - args[2]) & 63);
+ } else {
+ /* We can use the smaller 32-bit negate because only the
+ low 6 bits are examined for the rotate. */
+ tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
+ tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
+ }
+ break;
+
+ case INDEX_op_ext8s_i64:
+ tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext16s_i64:
+ tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ tgen_ext32s(s, args[0], args[1]);
+ break;
+ case INDEX_op_ext8u_i64:
+ tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_ext16u_i64:
+ tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
+ break;
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_ext32u_i64:
+ tgen_ext32u(s, args[0], args[1]);
+ break;
+
+ case INDEX_op_add2_i64:
+ if (const_args[4]) {
+ if ((int64_t)args[4] >= 0) {
+ tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i64:
+ if (const_args[4]) {
+ if ((int64_t)args[4] >= 0) {
+ tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
+ } else {
+ tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
+ }
+ } else {
+ tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ }
+ tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ break;
+
+ case INDEX_op_brcond_i64:
+ tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
+ args[1], const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i64:
+ tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
+ args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
+ break;
+
+ OP_32_64(deposit):
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[1]) {
+ tgen_deposit(s, a0, a2, args[3], args[4], 1);
+ } else {
+ /* Since we can't support "0Z" as a constraint, we allow a1 in
+ any register. Fix things up as if a matching constraint. */
+ if (a0 != a1) {
+ TCGType type = (opc == INDEX_op_deposit_i64);
+ if (a0 == a2) {
+ tcg_out_mov(s, type, TCG_TMP0, a2);
+ a2 = TCG_TMP0;
+ }
+ tcg_out_mov(s, type, a0, a1);
+ }
+ tgen_deposit(s, a0, a2, args[3], args[4], 0);
+ }
+ break;
+
+ OP_32_64(extract):
+ tgen_extract(s, args[0], args[1], args[2], args[3]);
+ break;
+
+ case INDEX_op_clz_i64:
+ tgen_clz(s, args[0], args[1], args[2], const_args[2]);
+ break;
+
+ case INDEX_op_mb:
+ /* The host memory model is quite strong, we simply need to
+ serialize the instruction stream. */
+ if (args[0] & TCG_MO_ST_LD) {
+ tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
+ }
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src)
+{
+ if (is_general_reg(src)) {
+ /* Replicate general register into two MO_64. */
+ tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
+ if (vece == MO_64) {
+ return true;
+ }
+ }
+
+ /*
+ * Recall that the "standard" integer, within a vector, is the
+ * rightmost element of the leftmost doubleword, a-la VLLEZ.
+ */
+ tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
+ return true;
+}
+
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg base, intptr_t offset)
+{
+ tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
+ return true;
+}
+
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t val)
+{
+ int i, mask, msb, lsb;
+
+ /* Look for int16_t elements. */
+ if (vece <= MO_16 ||
+ (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
+ tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
+ return;
+ }
+
+ /* Look for bit masks. */
+ if (vece == MO_32) {
+ if (risbg_mask((int32_t)val)) {
+ /* Handle wraparound by swapping msb and lsb. */
+ if ((val & 0x80000001u) == 0x80000001u) {
+ msb = 32 - ctz32(~val);
+ lsb = clz32(~val) - 1;
+ } else {
+ msb = clz32(val);
+ lsb = 31 - ctz32(val);
+ }
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
+ return;
+ }
+ } else {
+ if (risbg_mask(val)) {
+ /* Handle wraparound by swapping msb and lsb. */
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Handle wraparound by swapping msb and lsb. */
+ msb = 64 - ctz64(~val);
+ lsb = clz64(~val) - 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
+ return;
+ }
+ }
+
+ /* Look for all bytes 0x00 or 0xff. */
+ for (i = mask = 0; i < 8; i++) {
+ uint8_t byte = val >> (i * 8);
+ if (byte == 0xff) {
+ mask |= 1 << i;
+ } else if (byte != 0) {
+ break;
+ }
+ }
+ if (i == 8) {
+ tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
+ return;
+ }
+
+ /* Otherwise, stuff it in the constant pool. */
+ tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
+ new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
+ tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
+}
+
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGType type = vecl + TCG_TYPE_V64;
+ TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
+
+ switch (opc) {
+ case INDEX_op_ld_vec:
+ tcg_out_ld(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_st_vec:
+ tcg_out_st(s, type, a0, a1, a2);
+ break;
+ case INDEX_op_dupm_vec:
+ tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
+ break;
+
+ case INDEX_op_abs_vec:
+ tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
+ break;
+ case INDEX_op_neg_vec:
+ tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
+ break;
+ case INDEX_op_not_vec:
+ tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
+ break;
+
+ case INDEX_op_add_vec:
+ tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
+ break;
+ case INDEX_op_sub_vec:
+ tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
+ break;
+ case INDEX_op_and_vec:
+ tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
+ break;
+ case INDEX_op_andc_vec:
+ tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
+ break;
+ case INDEX_op_mul_vec:
+ tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
+ break;
+ case INDEX_op_or_vec:
+ tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
+ break;
+ case INDEX_op_orc_vec:
+ tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
+ break;
+ case INDEX_op_xor_vec:
+ tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
+ break;
+
+ case INDEX_op_shli_vec:
+ tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_shri_vec:
+ tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_sari_vec:
+ tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_rotli_vec:
+ tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
+ break;
+ case INDEX_op_shls_vec:
+ tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_shrs_vec:
+ tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_sars_vec:
+ tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_rotls_vec:
+ tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
+ break;
+ case INDEX_op_shlv_vec:
+ tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_shrv_vec:
+ tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_sarv_vec:
+ tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
+ break;
+ case INDEX_op_rotlv_vec:
+ tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
+ break;
+
+ case INDEX_op_smin_vec:
+ tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
+ break;
+ case INDEX_op_smax_vec:
+ tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
+ break;
+ case INDEX_op_umin_vec:
+ tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
+ break;
+ case INDEX_op_umax_vec:
+ tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
+ break;
+
+ case INDEX_op_bitsel_vec:
+ tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
+ break;
+
+ case INDEX_op_cmp_vec:
+ switch ((TCGCond)args[3]) {
+ case TCG_COND_EQ:
+ tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GT:
+ tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case INDEX_op_s390_vuph_vec:
+ tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
+ break;
+ case INDEX_op_s390_vupl_vec:
+ tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
+ break;
+ case INDEX_op_s390_vpks_vec:
+ tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
+ break;
+
+ case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
+ default:
+ g_assert_not_reached();
+ }
+}
+
+int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
+{
+ switch (opc) {
+ case INDEX_op_abs_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_bitsel_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_rotls_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_sars_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_xor_vec:
+ return 1;
+ case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
+ case INDEX_op_rotrv_vec:
+ return -1;
+ case INDEX_op_mul_vec:
+ return vece < MO_64;
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_sssub_vec:
+ return vece < MO_64 ? -1 : 0;
+ default:
+ return 0;
+ }
+}
+
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ bool need_swap = false, need_inv = false;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ break;
+ case TCG_COND_NE:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ need_inv = true;
+ break;
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ need_swap = true;
+ break;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ need_swap = need_inv = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (need_inv) {
+ cond = tcg_invert_cond(cond);
+ }
+ if (need_swap) {
+ TCGv_vec t1;
+ t1 = v1, v1 = v2, v2 = t1;
+ cond = tcg_swap_cond(cond);
+ }
+
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
+ tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
+
+ return need_inv;
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+ if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
+ tcg_gen_not_vec(vece, v0, v0);
+ }
+}
+
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec c1, TCGv_vec c2,
+ TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+ /* Invert the sense of the compare by swapping arguments. */
+ tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
+ } else {
+ tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
+ }
+ tcg_temp_free_vec(t);
+}
+
+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
+ TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
+{
+ TCGv_vec h1 = tcg_temp_new_vec(type);
+ TCGv_vec h2 = tcg_temp_new_vec(type);
+ TCGv_vec l1 = tcg_temp_new_vec(type);
+ TCGv_vec l2 = tcg_temp_new_vec(type);
+
+ tcg_debug_assert (vece < MO_64);
+
+ /* Unpack with sign-extension. */
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+ tcgv_vec_arg(h1), tcgv_vec_arg(v1));
+ vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+ tcgv_vec_arg(h2), tcgv_vec_arg(v2));
+
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+ tcgv_vec_arg(l1), tcgv_vec_arg(v1));
+ vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+ tcgv_vec_arg(l2), tcgv_vec_arg(v2));
+
+ /* Arithmetic on a wider element size. */
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
+ tcgv_vec_arg(h1), tcgv_vec_arg(h2));
+ vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
+ tcgv_vec_arg(l1), tcgv_vec_arg(l2));
+
+ /* Pack with saturation. */
+ vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
+ tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
+
+ tcg_temp_free_vec(h1);
+ tcg_temp_free_vec(h2);
+ tcg_temp_free_vec(l1);
+ tcg_temp_free_vec(l2);
+}
+
+void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg a0, ...)
+{
+ va_list va;
+ TCGv_vec v0, v1, v2, v3, v4, t0;
+
+ va_start(va, a0);
+ v0 = temp_tcgv_vec(arg_temp(a0));
+ v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+
+ switch (opc) {
+ case INDEX_op_cmp_vec:
+ expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
+ break;
+
+ case INDEX_op_cmpsel_vec:
+ v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+ expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+ break;
+
+ case INDEX_op_rotrv_vec:
+ t0 = tcg_temp_new_vec(type);
+ tcg_gen_neg_vec(vece, t0, v2);
+ tcg_gen_rotlv_vec(vece, v0, v1, t0);
+ tcg_temp_free_vec(t0);
+ break;
+
+ case INDEX_op_ssadd_vec:
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
+ break;
+ case INDEX_op_sssub_vec:
+ expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ va_end(va);
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(r, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotr_i64:
+ case INDEX_op_clz_i64:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(r, r, ri);
+
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ return (HAVE_FACILITY(DISTINCT_OPS)
+ ? C_O1_I2(r, r, ri)
+ : C_O1_I2(r, 0, ri));
+
+ case INDEX_op_mul_i32:
+ /* If we have the general-instruction-extensions, then we have
+ MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
+ have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit. */
+ return (HAVE_FACILITY(GEN_INST_EXT)
+ ? C_O1_I2(r, 0, ri)
+ : C_O1_I2(r, 0, rI));
+
+ case INDEX_op_mul_i64:
+ return (HAVE_FACILITY(GEN_INST_EXT)
+ ? C_O1_I2(r, 0, rJ)
+ : C_O1_I2(r, 0, rI));
+
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ return (HAVE_FACILITY(DISTINCT_OPS)
+ ? C_O1_I2(r, r, ri)
+ : C_O1_I2(r, 0, ri));
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(r, ri);
+
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ return C_O1_I1(r, L);
+ case INDEX_op_qemu_st_i64:
+ case INDEX_op_qemu_st_i32:
+ return C_O0_I2(L, L);
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ return C_O1_I2(r, rZ, r);
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ return (HAVE_FACILITY(LOAD_ON_COND2)
+ ? C_O1_I4(r, r, ri, rI, 0)
+ : C_O1_I4(r, r, ri, r, 0));
+
+ case INDEX_op_div2_i32:
+ case INDEX_op_div2_i64:
+ case INDEX_op_divu2_i32:
+ case INDEX_op_divu2_i64:
+ return C_O2_I3(b, a, 0, 1, r);
+
+ case INDEX_op_mulu2_i64:
+ return C_O2_I2(b, a, 0, r);
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ return (HAVE_FACILITY(EXT_IMM)
+ ? C_O2_I4(r, r, 0, 1, ri, r)
+ : C_O2_I4(r, r, 0, 1, r, r));
+
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i64:
+ return (HAVE_FACILITY(EXT_IMM)
+ ? C_O2_I4(r, r, 0, 1, rA, r)
+ : C_O2_I4(r, r, 0, 1, r, r));
+
+ case INDEX_op_st_vec:
+ return C_O0_I2(v, r);
+ case INDEX_op_ld_vec:
+ case INDEX_op_dupm_vec:
+ return C_O1_I1(v, r);
+ case INDEX_op_dup_vec:
+ return C_O1_I1(v, vr);
+ case INDEX_op_abs_vec:
+ case INDEX_op_neg_vec:
+ case INDEX_op_not_vec:
+ case INDEX_op_rotli_vec:
+ case INDEX_op_sari_vec:
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_s390_vuph_vec:
+ case INDEX_op_s390_vupl_vec:
+ return C_O1_I1(v, v);
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_andc_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_orc_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_mul_vec:
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_umax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_s390_vpks_vec:
+ return C_O1_I2(v, v, v);
+ case INDEX_op_rotls_vec:
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ return C_O1_I2(v, v, r);
+ case INDEX_op_bitsel_vec:
+ return C_O1_I3(v, v, v, v);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
+ * Some distros have fixed this up locally, others have not.
+ */
+#ifndef HWCAP_S390_VXRS
+#define HWCAP_S390_VXRS 2048
+#endif
+
+static void query_s390_facilities(void)
+{
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+
+ /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
+ is present on all 64-bit systems, but let's check for it anyway. */
+ if (hwcap & HWCAP_S390_STFLE) {
+ register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
+ register void *r1 __asm__("1") = s390_facilities;
+
+ /* stfle 0(%r1) */
+ asm volatile(".word 0xb2b0,0x1000"
+ : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
+ }
+
+ /*
+ * Use of vector registers requires os support beyond the facility bit.
+ * If the kernel does not advertise support, disable the facility bits.
+ * There is nothing else we currently care about in the 3rd word, so
+ * disable VECTOR with one store.
+ */
+ if (!(hwcap & HWCAP_S390_VXRS)) {
+ s390_facilities[2] = 0;
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ query_s390_facilities();
+
+ tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
+ tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
+ if (HAVE_FACILITY(VECTOR)) {
+ tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
+ tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
+ }
+
+ tcg_target_call_clobber_regs = 0;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
+ /* The r6 register is technically call-saved, but it's also a parameter
+ register, so it can get killed by setup for the qemu_st helper. */
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
+ /* The return register can be considered call-clobbered. */
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
+
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
+ /* XXX many insns can't be used with R0, so we better avoid it for now */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+ if (USE_REG_TB) {
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
+ }
+}
+
+#define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
+ + TCG_STATIC_CALL_ARGS_SIZE \
+ + CPU_TEMP_BUF_NLONGS * sizeof(long)))
+
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ /* stmg %r6,%r15,48(%r15) (save registers) */
+ tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
+
+ /* aghi %r15,-frame_size */
+ tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+#ifndef CONFIG_SOFTMMU
+ if (guest_base >= 0x80000) {
+ tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+ if (USE_REG_TB) {
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
+ tcg_target_call_iarg_regs[1]);
+ }
+
+ /* br %r3 (go to TB) */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
+
+ /*
+ * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
+ * and fall through to the rest of the epilogue.
+ */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
+
+ /* TB epilogue */
+ tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+
+ /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+ FRAME_SIZE + 48);
+
+ /* br %r14 (return) */
+ tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ memset(p, 0x07, count * sizeof(tcg_insn_unit));
+}
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[4];
+ uint8_t fde_reg_ofs[18];
+} DebugFrame;
+
+/* We're expecting a 2 byte uleb128 encoded value. */
+QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
+
+#define ELF_HOST_MACHINE EM_S390
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = 8, /* sleb128 8 */
+ .h.cie.return_column = TCG_REG_R14,
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+ 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
+ (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+ (FRAME_SIZE >> 7)
+ },
+ .fde_reg_ofs = {
+ 0x86, 6, /* DW_CFA_offset, %r6, 48 */
+ 0x87, 7, /* DW_CFA_offset, %r7, 56 */
+ 0x88, 8, /* DW_CFA_offset, %r8, 64 */
+ 0x89, 9, /* DW_CFA_offset, %r92, 72 */
+ 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
+ 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
+ 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
+ 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
+ 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
+ }
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
new file mode 100644
index 000000000..527ada0f6
--- /dev/null
+++ b/tcg/s390x/tcg-target.h
@@ -0,0 +1,186 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef S390_TCG_TARGET_H
+#define S390_TCG_TARGET_H
+
+#define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+
+/* We have a +- 4GB range on the branches; leave some slop. */
+#define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
+
+typedef enum TCGReg {
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3,
+ TCG_REG_R4, TCG_REG_R5, TCG_REG_R6, TCG_REG_R7,
+ TCG_REG_R8, TCG_REG_R9, TCG_REG_R10, TCG_REG_R11,
+ TCG_REG_R12, TCG_REG_R13, TCG_REG_R14, TCG_REG_R15,
+
+ TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+ TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+ TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+ TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+ TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+ TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+ TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+ TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
+ TCG_AREG0 = TCG_REG_R10,
+ TCG_REG_CALL_STACK = TCG_REG_R15
+} TCGReg;
+
+#define TCG_TARGET_NB_REGS 64
+
+/* A list of relevant facilities used by this translator. Some of these
+ are required for proper operation, and these are checked at startup. */
+
+#define FACILITY_ZARCH_ACTIVE 2
+#define FACILITY_LONG_DISP 18
+#define FACILITY_EXT_IMM 21
+#define FACILITY_GEN_INST_EXT 34
+#define FACILITY_LOAD_ON_COND 45
+#define FACILITY_FAST_BCR_SER FACILITY_LOAD_ON_COND
+#define FACILITY_DISTINCT_OPS FACILITY_LOAD_ON_COND
+#define FACILITY_LOAD_ON_COND2 53
+#define FACILITY_VECTOR 129
+#define FACILITY_VECTOR_ENH1 135
+
+extern uint64_t s390_facilities[3];
+
+#define HAVE_FACILITY(X) \
+ ((s390_facilities[FACILITY_##X / 64] >> (63 - FACILITY_##X % 64)) & 1)
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div2_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_not_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_andc_i32 0
+#define TCG_TARGET_HAS_orc_i32 0
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i32 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_extrl_i64_i32 0
+#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_direct_jump HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#define TCG_TARGET_HAS_div2_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_not_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_andc_i64 0
+#define TCG_TARGET_HAS_orc_i64 0
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 HAVE_FACILITY(EXT_IMM)
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_deposit_i64 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_extract_i64 HAVE_FACILITY(GEN_INST_EXT)
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+
+#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
+#define TCG_TARGET_HAS_not_vec 1
+#define TCG_TARGET_HAS_neg_vec 1
+#define TCG_TARGET_HAS_abs_vec 1
+#define TCG_TARGET_HAS_roti_vec 1
+#define TCG_TARGET_HAS_rots_vec 1
+#define TCG_TARGET_HAS_rotv_vec 1
+#define TCG_TARGET_HAS_shi_vec 1
+#define TCG_TARGET_HAS_shs_vec 1
+#define TCG_TARGET_HAS_shv_vec 1
+#define TCG_TARGET_HAS_mul_vec 1
+#define TCG_TARGET_HAS_sat_vec 0
+#define TCG_TARGET_HAS_minmax_vec 1
+#define TCG_TARGET_HAS_bitsel_vec 1
+#define TCG_TARGET_HAS_cmpsel_vec 0
+
+/* used for function call generation */
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET 160
+
+#define TCG_TARGET_EXTEND_ARGS 1
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
+static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ /* patch the branch destination */
+ intptr_t disp = addr - (jmp_rx - 2);
+ qatomic_set((int32_t *)jmp_rw, disp / 2);
+ /* no need to flush icache explicitly */
+}
+
+#ifdef CONFIG_SOFTMMU
+#define TCG_TARGET_NEED_LDST_LABELS
+#endif
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/s390x/tcg-target.opc.h b/tcg/s390x/tcg-target.opc.h
new file mode 100644
index 000000000..0eb2350fb
--- /dev/null
+++ b/tcg/s390x/tcg-target.opc.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2021 Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ * Target-specific opcodes for host vector expansion. These will be
+ * emitted by tcg_expand_vec_op. For those familiar with GCC internals,
+ * consider these to be UNSPEC with names.
+ */
+DEF(s390_vuph_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vupl_vec, 1, 1, 0, IMPLVEC)
+DEF(s390_vpks_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc/tcg-target-con-set.h
new file mode 100644
index 000000000..3b751dc3f
--- /dev/null
+++ b/tcg/sparc/tcg-target-con-set.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(rZ, r)
+C_O0_I2(RZ, r)
+C_O0_I2(rZ, rJ)
+C_O0_I2(RZ, RJ)
+C_O0_I2(sZ, A)
+C_O0_I2(SZ, A)
+C_O1_I1(r, A)
+C_O1_I1(R, A)
+C_O1_I1(r, r)
+C_O1_I1(r, R)
+C_O1_I1(R, r)
+C_O1_I1(R, R)
+C_O1_I2(R, R, R)
+C_O1_I2(r, rZ, rJ)
+C_O1_I2(R, RZ, RJ)
+C_O1_I4(r, rZ, rJ, rI, 0)
+C_O1_I4(R, RZ, RJ, RI, 0)
+C_O2_I2(r, r, rZ, rJ)
+C_O2_I4(R, R, RZ, RZ, RJ, RI)
+C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc/tcg-target-con-str.h
new file mode 100644
index 000000000..fdb25d931
--- /dev/null
+++ b/tcg/sparc/tcg-target-con-str.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('R', ALL_GENERAL_REGS64)
+REGS('s', ALL_QLDST_REGS)
+REGS('S', ALL_QLDST_REGS64)
+REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S11)
+CONST('J', TCG_CT_CONST_S13)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
new file mode 100644
index 000000000..9dd32ef95
--- /dev/null
+++ b/tcg/sparc/tcg-target.c.inc
@@ -0,0 +1,1825 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "../tcg-pool.c.inc"
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "%g0",
+ "%g1",
+ "%g2",
+ "%g3",
+ "%g4",
+ "%g5",
+ "%g6",
+ "%g7",
+ "%o0",
+ "%o1",
+ "%o2",
+ "%o3",
+ "%o4",
+ "%o5",
+ "%o6",
+ "%o7",
+ "%l0",
+ "%l1",
+ "%l2",
+ "%l3",
+ "%l4",
+ "%l5",
+ "%l6",
+ "%l7",
+ "%i0",
+ "%i1",
+ "%i2",
+ "%i3",
+ "%i4",
+ "%i5",
+ "%i6",
+ "%i7",
+};
+#endif
+
+#ifdef __arch64__
+# define SPARC64 1
+#else
+# define SPARC64 0
+#endif
+
+#define TCG_CT_CONST_S11 0x100
+#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_ZERO 0x400
+
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+/*
+ * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
+ * registers. These are saved manually by the kernel in full 64-bit
+ * slots. The %i and %l registers are saved by the register window
+ * mechanism, which only allocates space for 32 bits. Given that this
+ * window spill/fill can happen on any signal, we must consider the
+ * high bits of the %i and %l registers garbage at all times.
+ */
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
+#if SPARC64
+# define ALL_GENERAL_REGS64 ALL_GENERAL_REGS
+#else
+# define ALL_GENERAL_REGS64 MAKE_64BIT_MASK(0, 16)
+#endif
+#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
+
+/* Define some temporary registers. T2 is used for constant generation. */
+#define TCG_REG_T1 TCG_REG_G1
+#define TCG_REG_T2 TCG_REG_O7
+
+#ifndef CONFIG_SOFTMMU
+# define TCG_GUEST_BASE_REG TCG_REG_I5
+#endif
+
+#define TCG_REG_TB TCG_REG_I1
+#define USE_REG_TB (sizeof(void *) > 4)
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_L0,
+ TCG_REG_L1,
+ TCG_REG_L2,
+ TCG_REG_L3,
+ TCG_REG_L4,
+ TCG_REG_L5,
+ TCG_REG_L6,
+ TCG_REG_L7,
+
+ TCG_REG_I0,
+ TCG_REG_I1,
+ TCG_REG_I2,
+ TCG_REG_I3,
+ TCG_REG_I4,
+ TCG_REG_I5,
+
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+};
+
+static const int tcg_target_call_iarg_regs[6] = {
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+};
+
+#define INSN_OP(x) ((x) << 30)
+#define INSN_OP2(x) ((x) << 22)
+#define INSN_OP3(x) ((x) << 19)
+#define INSN_OPF(x) ((x) << 5)
+#define INSN_RD(x) ((x) << 25)
+#define INSN_RS1(x) ((x) << 14)
+#define INSN_RS2(x) (x)
+#define INSN_ASI(x) ((x) << 5)
+
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
+#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
+#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
+#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
+#define INSN_COND(x) ((x) << 25)
+
+#define COND_N 0x0
+#define COND_E 0x1
+#define COND_LE 0x2
+#define COND_L 0x3
+#define COND_LEU 0x4
+#define COND_CS 0x5
+#define COND_NEG 0x6
+#define COND_VS 0x7
+#define COND_A 0x8
+#define COND_NE 0x9
+#define COND_G 0xa
+#define COND_GE 0xb
+#define COND_GU 0xc
+#define COND_CC 0xd
+#define COND_POS 0xe
+#define COND_VC 0xf
+#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z 1
+#define RCOND_LEZ 2
+#define RCOND_LZ 3
+#define RCOND_NZ 5
+#define RCOND_GZ 6
+#define RCOND_GEZ 7
+
+#define MOVCC_ICC (1 << 18)
+#define MOVCC_XCC (1 << 18 | 1 << 12)
+
+#define BPCC_ICC 0
+#define BPCC_XCC (2 << 20)
+#define BPCC_PT (1 << 19)
+#define BPCC_PN 0
+#define BPCC_A (1 << 29)
+
+#define BPR_PT BPCC_PT
+
+#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
+#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
+#define ARITH_ANDN (INSN_OP(2) | INSN_OP3(0x05))
+#define ARITH_OR (INSN_OP(2) | INSN_OP3(0x02))
+#define ARITH_ORCC (INSN_OP(2) | INSN_OP3(0x12))
+#define ARITH_ORN (INSN_OP(2) | INSN_OP3(0x06))
+#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
+#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
+#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
+#define ARITH_ADDC (INSN_OP(2) | INSN_OP3(0x08))
+#define ARITH_SUBC (INSN_OP(2) | INSN_OP3(0x0c))
+#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
+#define ARITH_SMUL (INSN_OP(2) | INSN_OP3(0x0b))
+#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
+#define ARITH_SDIV (INSN_OP(2) | INSN_OP3(0x0f))
+#define ARITH_MULX (INSN_OP(2) | INSN_OP3(0x09))
+#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
+#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
+#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
+
+#define ARITH_ADDXC (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x11))
+#define ARITH_UMULXHI (INSN_OP(2) | INSN_OP3(0x36) | INSN_OPF(0x16))
+
+#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
+#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
+#define SHIFT_SRA (INSN_OP(2) | INSN_OP3(0x27))
+
+#define SHIFT_SLLX (INSN_OP(2) | INSN_OP3(0x25) | (1 << 12))
+#define SHIFT_SRLX (INSN_OP(2) | INSN_OP3(0x26) | (1 << 12))
+#define SHIFT_SRAX (INSN_OP(2) | INSN_OP3(0x27) | (1 << 12))
+
+#define RDY (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(0))
+#define WRY (INSN_OP(2) | INSN_OP3(0x30) | INSN_RD(0))
+#define JMPL (INSN_OP(2) | INSN_OP3(0x38))
+#define RETURN (INSN_OP(2) | INSN_OP3(0x39))
+#define SAVE (INSN_OP(2) | INSN_OP3(0x3c))
+#define RESTORE (INSN_OP(2) | INSN_OP3(0x3d))
+#define SETHI (INSN_OP(0) | INSN_OP2(0x4))
+#define CALL INSN_OP(1)
+#define LDUB (INSN_OP(3) | INSN_OP3(0x01))
+#define LDSB (INSN_OP(3) | INSN_OP3(0x09))
+#define LDUH (INSN_OP(3) | INSN_OP3(0x02))
+#define LDSH (INSN_OP(3) | INSN_OP3(0x0a))
+#define LDUW (INSN_OP(3) | INSN_OP3(0x00))
+#define LDSW (INSN_OP(3) | INSN_OP3(0x08))
+#define LDX (INSN_OP(3) | INSN_OP3(0x0b))
+#define STB (INSN_OP(3) | INSN_OP3(0x05))
+#define STH (INSN_OP(3) | INSN_OP3(0x06))
+#define STW (INSN_OP(3) | INSN_OP3(0x04))
+#define STX (INSN_OP(3) | INSN_OP3(0x0e))
+#define LDUBA (INSN_OP(3) | INSN_OP3(0x11))
+#define LDSBA (INSN_OP(3) | INSN_OP3(0x19))
+#define LDUHA (INSN_OP(3) | INSN_OP3(0x12))
+#define LDSHA (INSN_OP(3) | INSN_OP3(0x1a))
+#define LDUWA (INSN_OP(3) | INSN_OP3(0x10))
+#define LDSWA (INSN_OP(3) | INSN_OP3(0x18))
+#define LDXA (INSN_OP(3) | INSN_OP3(0x1b))
+#define STBA (INSN_OP(3) | INSN_OP3(0x15))
+#define STHA (INSN_OP(3) | INSN_OP3(0x16))
+#define STWA (INSN_OP(3) | INSN_OP3(0x14))
+#define STXA (INSN_OP(3) | INSN_OP3(0x1e))
+
+#define MEMBAR (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
+
+#define NOP (SETHI | INSN_RD(TCG_REG_G0) | 0)
+
+#ifndef ASI_PRIMARY_LITTLE
+#define ASI_PRIMARY_LITTLE 0x88
+#endif
+
+#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#ifndef use_vis3_instructions
+bool use_vis3_instructions;
+#endif
+
+static bool check_fit_i64(int64_t val, unsigned int bits)
+{
+ return val == sextract64(val, 0, bits);
+}
+
+static bool check_fit_i32(int32_t val, unsigned int bits)
+{
+ return val == sextract32(val, 0, bits);
+}
+
+#define check_fit_tl check_fit_i64
+#if SPARC64
+# define check_fit_ptr check_fit_i64
+#else
+# define check_fit_ptr check_fit_i32
+#endif
+
+static bool patch_reloc(tcg_insn_unit *src_rw, int type,
+ intptr_t value, intptr_t addend)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ uint32_t insn = *src_rw;
+ intptr_t pcrel;
+
+ value += addend;
+ pcrel = tcg_ptr_byte_diff((tcg_insn_unit *)value, src_rx);
+
+ switch (type) {
+ case R_SPARC_WDISP16:
+ assert(check_fit_ptr(pcrel >> 2, 16));
+ insn &= ~INSN_OFF16(-1);
+ insn |= INSN_OFF16(pcrel);
+ break;
+ case R_SPARC_WDISP19:
+ assert(check_fit_ptr(pcrel >> 2, 19));
+ insn &= ~INSN_OFF19(-1);
+ insn |= INSN_OFF19(pcrel);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ *src_rw = insn;
+ return true;
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ }
+
+ if (type == TCG_TYPE_I32) {
+ val = (int32_t)val;
+ }
+
+ if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void tcg_out_nop(TCGContext *s)
+{
+ tcg_out32(s, NOP);
+}
+
+static void tcg_out_arith(TCGContext *s, TCGReg rd, TCGReg rs1,
+ TCGReg rs2, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_RS2(rs2));
+}
+
+static void tcg_out_arithi(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t offset, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1) | INSN_IMM13(offset));
+}
+
+static void tcg_out_arithc(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t val2, int val2const, int op)
+{
+ tcg_out32(s, op | INSN_RD(rd) | INSN_RS1(rs1)
+ | (val2const ? INSN_IMM13(val2) : INSN_RS2(val2)));
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ if (ret != arg) {
+ tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ }
+ return true;
+}
+
+static void tcg_out_mov_delay(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+ if (ret != arg) {
+ tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ } else {
+ tcg_out_nop(s);
+ }
+}
+
+static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
+{
+ tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
+}
+
+static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
+{
+ tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
+}
+
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg, bool in_prologue)
+{
+ tcg_target_long hi, lo = (int32_t)arg;
+ tcg_target_long test, lsb;
+
+ /* Make sure we test 32-bit constants for imm13 properly. */
+ if (type == TCG_TYPE_I32) {
+ arg = lo;
+ }
+
+ /* A 13-bit constant sign-extended to 64-bits. */
+ if (check_fit_tl(arg, 13)) {
+ tcg_out_movi_imm13(s, ret, arg);
+ return;
+ }
+
+ /* A 13-bit constant relative to the TB. */
+ if (!in_prologue && USE_REG_TB) {
+ test = tcg_tbrel_diff(s, (void *)arg);
+ if (check_fit_ptr(test, 13)) {
+ tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD);
+ return;
+ }
+ }
+
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
+ tcg_out_sethi(s, ret, arg);
+ if (arg & 0x3ff) {
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
+ }
+ return;
+ }
+
+ /* A 32-bit constant sign-extended to 64-bits. */
+ if (arg == lo) {
+ tcg_out_sethi(s, ret, ~arg);
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
+ return;
+ }
+
+ /* A 21-bit constant, shifted. */
+ lsb = ctz64(arg);
+ test = (tcg_target_long)arg >> lsb;
+ if (check_fit_tl(test, 13)) {
+ tcg_out_movi_imm13(s, ret, test);
+ tcg_out_arithi(s, ret, ret, lsb, SHIFT_SLLX);
+ return;
+ } else if (lsb > 10 && test == extract64(test, 0, 21)) {
+ tcg_out_sethi(s, ret, test << 10);
+ tcg_out_arithi(s, ret, ret, lsb - 10, SHIFT_SLLX);
+ return;
+ }
+
+ /* A 64-bit constant decomposed into 2 32-bit pieces. */
+ if (check_fit_i32(lo, 13)) {
+ hi = (arg - lo) >> 32;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
+ } else {
+ hi = arg >> 32;
+ tcg_out_movi(s, TCG_TYPE_I32, ret, hi);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T2, lo);
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
+ }
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ tcg_out_movi_int(s, type, ret, arg, false);
+}
+
+static void tcg_out_ldst_rr(TCGContext *s, TCGReg data, TCGReg a1,
+ TCGReg a2, int op)
+{
+ tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
+}
+
+static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
+ intptr_t offset, int op)
+{
+ if (check_fit_ptr(offset, 13)) {
+ tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
+ INSN_IMM13(offset));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+ tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
+ }
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+ TCGReg arg1, intptr_t arg2)
+{
+ tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ if (val == 0) {
+ tcg_out_st(s, type, TCG_REG_G0, base, ofs);
+ return true;
+ }
+ return false;
+}
+
+static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg)
+{
+ intptr_t diff = tcg_tbrel_diff(s, arg);
+ if (USE_REG_TB && check_fit_ptr(diff, 13)) {
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff);
+ return;
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff);
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff);
+}
+
+static void tcg_out_sety(TCGContext *s, TCGReg rs)
+{
+ tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs));
+}
+
+static void tcg_out_rdy(TCGContext *s, TCGReg rd)
+{
+ tcg_out32(s, RDY | INSN_RD(rd));
+}
+
+static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1,
+ int32_t val2, int val2const, int uns)
+{
+ /* Load Y with the sign/zero extension of RS1 to 64-bits. */
+ if (uns) {
+ tcg_out_sety(s, TCG_REG_G0);
+ } else {
+ tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+ tcg_out_sety(s, TCG_REG_T1);
+ }
+
+ tcg_out_arithc(s, rd, rs1, val2, val2const,
+ uns ? ARITH_UDIV : ARITH_SDIV);
+}
+
+static const uint8_t tcg_cond_to_bcond[] = {
+ [TCG_COND_EQ] = COND_E,
+ [TCG_COND_NE] = COND_NE,
+ [TCG_COND_LT] = COND_L,
+ [TCG_COND_GE] = COND_GE,
+ [TCG_COND_LE] = COND_LE,
+ [TCG_COND_GT] = COND_G,
+ [TCG_COND_LTU] = COND_CS,
+ [TCG_COND_GEU] = COND_CC,
+ [TCG_COND_LEU] = COND_LEU,
+ [TCG_COND_GTU] = COND_GU,
+};
+
+static const uint8_t tcg_cond_to_rcond[] = {
+ [TCG_COND_EQ] = RCOND_Z,
+ [TCG_COND_NE] = RCOND_NZ,
+ [TCG_COND_LT] = RCOND_LZ,
+ [TCG_COND_GT] = RCOND_GZ,
+ [TCG_COND_LE] = RCOND_LEZ,
+ [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, TCGLabel *l)
+{
+ int off19 = 0;
+
+ if (l->has_value) {
+ off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, l, 0);
+ }
+ tcg_out_bpcc0(s, scond, flags, off19);
+}
+
+static void tcg_out_cmp(TCGContext *s, TCGReg c1, int32_t c2, int c2const)
+{
+ tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
+}
+
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGReg arg1,
+ int32_t arg2, int const_arg2, TCGLabel *l)
+{
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, l);
+ tcg_out_nop(s);
+}
+
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGReg ret,
+ int32_t v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+ | INSN_RS1(tcg_cond_to_bcond[cond])
+ | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const,
+ int32_t v1, int v1const)
+{
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
+ int32_t arg2, int const_arg2, TCGLabel *l)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
+ if (arg2 == 0 && !is_unsigned_cond(cond)) {
+ int off16 = 0;
+
+ if (l->has_value) {
+ off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
+ } else {
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, l, 0);
+ }
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+ | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+ } else {
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, l);
+ }
+ tcg_out_nop(s);
+}
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg c1,
+ int32_t v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+ | (tcg_cond_to_rcond[cond] << 10)
+ | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const,
+ int32_t v1, int v1const)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+ Note that the immediate range is one bit smaller, so we must check
+ for that as well. */
+ if (c2 == 0 && !is_unsigned_cond(cond)
+ && (!v1const || check_fit_i32(v1, 10))) {
+ tcg_out_movr(s, cond, ret, c1, v1, v1const);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+ }
+}
+
+static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const)
+{
+ /* For 32-bit comparisons, we can play games with ADDC/SUBC. */
+ switch (cond) {
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ /* The result of the comparison is in the carry bit. */
+ break;
+
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ /* For equality, we can transform to inequality vs zero. */
+ if (c2 != 0) {
+ tcg_out_arithc(s, TCG_REG_T1, c1, c2, c2const, ARITH_XOR);
+ c2 = TCG_REG_T1;
+ } else {
+ c2 = c1;
+ }
+ c1 = TCG_REG_G0, c2const = 0;
+ cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
+ break;
+
+ case TCG_COND_GTU:
+ case TCG_COND_LEU:
+ /* If we don't need to load a constant into a register, we can
+ swap the operands on GTU/LEU. There's no benefit to loading
+ the constant into a temporary register. */
+ if (!c2const || c2 == 0) {
+ TCGReg t = c1;
+ c1 = c2;
+ c2 = t;
+ c2const = 0;
+ cond = tcg_swap_cond(cond);
+ break;
+ }
+ /* FALLTHRU */
+
+ default:
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
+ return;
+ }
+
+ tcg_out_cmp(s, c1, c2, c2const);
+ if (cond == TCG_COND_LTU) {
+ tcg_out_arithi(s, ret, TCG_REG_G0, 0, ARITH_ADDC);
+ } else {
+ tcg_out_arithi(s, ret, TCG_REG_G0, -1, ARITH_SUBC);
+ }
+}
+
+static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGReg c1, int32_t c2, int c2const)
+{
+ if (use_vis3_instructions) {
+ switch (cond) {
+ case TCG_COND_NE:
+ if (c2 != 0) {
+ break;
+ }
+ c2 = c1, c2const = 0, c1 = TCG_REG_G0;
+ /* FALLTHRU */
+ case TCG_COND_LTU:
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_arith(s, ret, TCG_REG_G0, TCG_REG_G0, ARITH_ADDXC);
+ return;
+ default:
+ break;
+ }
+ }
+
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare
+ if the input does not overlap the output. */
+ if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movr(s, cond, ret, c1, 1, 1);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+ }
+}
+
+static void tcg_out_addsub2_i32(TCGContext *s, TCGReg rl, TCGReg rh,
+ TCGReg al, TCGReg ah, int32_t bl, int blconst,
+ int32_t bh, int bhconst, int opl, int oph)
+{
+ TCGReg tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
+ }
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+ tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+ tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
+}
+
+static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
+ TCGReg al, TCGReg ah, int32_t bl, int blconst,
+ int32_t bh, int bhconst, bool is_sub)
+{
+ TCGReg tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
+ }
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, is_sub ? ARITH_SUBCC : ARITH_ADDCC);
+
+ if (use_vis3_instructions && !is_sub) {
+ /* Note that ADDXC doesn't accept immediates. */
+ if (bhconst && bh != 0) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh);
+ bh = TCG_REG_T2;
+ }
+ tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
+ } else if (bh == TCG_REG_G0) {
+ /* If we have a zero, we can perform the operation in two insns,
+ with the arithmetic first, and a conditional move into place. */
+ if (rh == ah) {
+ tcg_out_arithi(s, TCG_REG_T2, ah, 1,
+ is_sub ? ARITH_SUB : ARITH_ADD);
+ tcg_out_movcc(s, TCG_COND_LTU, MOVCC_XCC, rh, TCG_REG_T2, 0);
+ } else {
+ tcg_out_arithi(s, rh, ah, 1, is_sub ? ARITH_SUB : ARITH_ADD);
+ tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, rh, ah, 0);
+ }
+ } else {
+ /* Otherwise adjust BH as if there is carry into T2 ... */
+ if (bhconst) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_T2, bh + (is_sub ? -1 : 1));
+ } else {
+ tcg_out_arithi(s, TCG_REG_T2, bh, 1,
+ is_sub ? ARITH_SUB : ARITH_ADD);
+ }
+ /* ... smoosh T2 back to original BH if carry is clear ... */
+ tcg_out_movcc(s, TCG_COND_GEU, MOVCC_XCC, TCG_REG_T2, bh, bhconst);
+ /* ... and finally perform the arithmetic with the new operand. */
+ tcg_out_arith(s, rh, ah, TCG_REG_T2, is_sub ? ARITH_SUB : ARITH_ADD);
+ }
+
+ tcg_out_mov(s, TCG_TYPE_I64, rl, tmp);
+}
+
+static void tcg_out_call_nodelay(TCGContext *s, const tcg_insn_unit *dest,
+ bool in_prologue)
+{
+ ptrdiff_t disp = tcg_pcrel_diff(s, dest);
+
+ if (disp == (int32_t)disp) {
+ tcg_out32(s, CALL | (uint32_t)disp >> 2);
+ } else {
+ uintptr_t desti = (uintptr_t)dest;
+ tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
+ desti & ~0xfff, in_prologue);
+ tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
+ }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+{
+ tcg_out_call_nodelay(s, dest, false);
+ tcg_out_nop(s);
+}
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+ /* Note that the TCG memory order constants mirror the Sparc MEMBAR. */
+ tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
+}
+
+#ifdef CONFIG_SOFTMMU
+static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
+static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
+
+static void emit_extend(TCGContext *s, TCGReg r, int op)
+{
+ /* Emit zero extend of 8, 16 or 32 bit data as
+ * required by the MO_* value op; do nothing for 64 bit.
+ */
+ switch (op & MO_SIZE) {
+ case MO_8:
+ tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
+ break;
+ case MO_16:
+ tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
+ tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
+ break;
+ case MO_32:
+ if (SPARC64) {
+ tcg_out_arith(s, r, r, 0, SHIFT_SRL);
+ }
+ break;
+ case MO_64:
+ break;
+ }
+}
+
+static void build_trampolines(TCGContext *s)
+{
+ static void * const qemu_ld_helpers[] = {
+ [MO_UB] = helper_ret_ldub_mmu,
+ [MO_SB] = helper_ret_ldsb_mmu,
+ [MO_LEUW] = helper_le_lduw_mmu,
+ [MO_LESW] = helper_le_ldsw_mmu,
+ [MO_LEUL] = helper_le_ldul_mmu,
+ [MO_LEQ] = helper_le_ldq_mmu,
+ [MO_BEUW] = helper_be_lduw_mmu,
+ [MO_BESW] = helper_be_ldsw_mmu,
+ [MO_BEUL] = helper_be_ldul_mmu,
+ [MO_BEQ] = helper_be_ldq_mmu,
+ };
+ static void * const qemu_st_helpers[] = {
+ [MO_UB] = helper_ret_stb_mmu,
+ [MO_LEUW] = helper_le_stw_mmu,
+ [MO_LEUL] = helper_le_stl_mmu,
+ [MO_LEQ] = helper_le_stq_mmu,
+ [MO_BEUW] = helper_be_stw_mmu,
+ [MO_BEUL] = helper_be_stl_mmu,
+ [MO_BEQ] = helper_be_stq_mmu,
+ };
+
+ int i;
+ TCGReg ra;
+
+ for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
+ if (qemu_ld_helpers[i] == NULL) {
+ continue;
+ }
+
+ /* May as well align the trampoline. */
+ while ((uintptr_t)s->code_ptr & 15) {
+ tcg_out_nop(s);
+ }
+ qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
+
+ if (SPARC64 || TARGET_LONG_BITS == 32) {
+ ra = TCG_REG_O3;
+ } else {
+ /* Install the high part of the address. */
+ tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX);
+ ra = TCG_REG_O4;
+ }
+
+ /* Set the retaddr operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+ /* Set the env operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
+ /* Tail call. */
+ tcg_out_call_nodelay(s, qemu_ld_helpers[i], true);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
+ if (qemu_st_helpers[i] == NULL) {
+ continue;
+ }
+
+ /* May as well align the trampoline. */
+ while ((uintptr_t)s->code_ptr & 15) {
+ tcg_out_nop(s);
+ }
+ qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
+
+ if (SPARC64) {
+ emit_extend(s, TCG_REG_O2, i);
+ ra = TCG_REG_O4;
+ } else {
+ ra = TCG_REG_O1;
+ if (TARGET_LONG_BITS == 64) {
+ /* Install the high part of the address. */
+ tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+ ra += 2;
+ } else {
+ ra += 1;
+ }
+ if ((i & MO_SIZE) == MO_64) {
+ /* Install the high part of the data. */
+ tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
+ ra += 2;
+ } else {
+ emit_extend(s, ra, i);
+ ra += 1;
+ }
+ /* Skip the oi argument. */
+ ra += 1;
+ }
+
+ /* Set the retaddr operand. */
+ if (ra >= TCG_REG_O6) {
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK,
+ TCG_TARGET_CALL_STACK_OFFSET);
+ ra = TCG_REG_G1;
+ }
+ tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7);
+ /* Set the env operand. */
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
+ /* Tail call. */
+ tcg_out_call_nodelay(s, qemu_st_helpers[i], true);
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
+ }
+}
+#endif
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+ int tmp_buf_size, frame_size;
+
+ /*
+ * The TCG temp buffer is at the top of the frame, immediately
+ * below the frame pointer. Use the logical (aligned) offset here;
+ * the stack bias is applied in temp_allocate_frame().
+ */
+ tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+ tcg_set_frame(s, TCG_REG_I6, -tmp_buf_size, tmp_buf_size);
+
+ /*
+ * TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+ * otherwise the minimal frame usable by callees.
+ */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+ frame_size += TCG_TARGET_STACK_ALIGN - 1;
+ frame_size &= -TCG_TARGET_STACK_ALIGN;
+ tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
+ INSN_IMM13(-frame_size));
+
+#ifndef CONFIG_SOFTMMU
+ if (guest_base != 0) {
+ tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
+ /* We choose TCG_REG_TB such that no move is required. */
+ if (USE_REG_TB) {
+ QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
+ }
+
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL);
+ /* delay slot */
+ tcg_out_nop(s);
+
+ /* Epilogue for goto_ptr. */
+ tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ /* delay slot */
+ tcg_out_movi_imm13(s, TCG_REG_O0, 0);
+
+#ifdef CONFIG_SOFTMMU
+ build_trampolines(s);
+#endif
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ int i;
+ for (i = 0; i < count; ++i) {
+ p[i] = NOP;
+ }
+}
+
+#if defined(CONFIG_SOFTMMU)
+
+/* We expect to use a 13-bit negative offset from ENV. */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
+
+/* Perform the TLB load and compare.
+
+ Inputs:
+ ADDRLO and ADDRHI contain the possible two parts of the address.
+
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ The result of the TLB comparison is in %[ix]cc. The sanitized address
+ is in the returned register, maybe %o0. The TLB addend is in %o1. */
+
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
+ MemOp opc, int which)
+{
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ const TCGReg r0 = TCG_REG_O0;
+ const TCGReg r1 = TCG_REG_O1;
+ const TCGReg r2 = TCG_REG_O2;
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned a_bits = get_alignment_bits(opc);
+ tcg_target_long compare_mask;
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
+
+ /* Extract the page index, shifted into place for tlb index. */
+ tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
+ SHIFT_SRL);
+ tcg_out_arith(s, r2, r2, r0, ARITH_AND);
+
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
+ tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
+
+ /* Mask out the page offset, except for the required alignment.
+ We don't support unaligned accesses. */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+ if (check_fit_tl(compare_mask, 13)) {
+ tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
+ tcg_out_arith(s, r2, addr, r2, ARITH_AND);
+ }
+ tcg_out_cmp(s, r0, r2, 0);
+
+ /* If the guest address must be zero-extended, do so now. */
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL);
+ return r0;
+ }
+ return addr;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = LDUB,
+ [MO_SB] = LDSB,
+
+ [MO_BEUW] = LDUH,
+ [MO_BESW] = LDSH,
+ [MO_BEUL] = LDUW,
+ [MO_BESL] = LDSW,
+ [MO_BEQ] = LDX,
+
+ [MO_LEUW] = LDUH_LE,
+ [MO_LESW] = LDSH_LE,
+ [MO_LEUL] = LDUW_LE,
+ [MO_LESL] = LDSW_LE,
+ [MO_LEQ] = LDX_LE,
+};
+
+static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_UB] = STB,
+
+ [MO_BEUW] = STH,
+ [MO_BEUL] = STW,
+ [MO_BEQ] = STX,
+
+ [MO_LEUW] = STH_LE,
+ [MO_LEUL] = STW_LE,
+ [MO_LEQ] = STX_LE,
+};
+
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
+ MemOpIdx oi, bool is_64)
+{
+ MemOp memop = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned memi = get_mmuidx(oi);
+ TCGReg addrz, param;
+ const tcg_insn_unit *func;
+ tcg_insn_unit *label_ptr;
+
+ addrz = tcg_out_tlb_load(s, addr, memi, memop,
+ offsetof(CPUTLBEntry, addr_read));
+
+ /* The fast path is exactly one insn. Thus we can perform the
+ entire TLB Hit in the (annulled) delay slot of the branch
+ over the TLB Miss case. */
+
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+ qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+
+ /* TLB Miss. */
+
+ param = TCG_REG_O1;
+ if (!SPARC64 && TARGET_LONG_BITS == 64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
+
+ /* We use the helpers to extend SB and SW data, leaving the case
+ of SL needing explicit extending below. */
+ if ((memop & MO_SSIZE) == MO_SL) {
+ func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+ } else {
+ func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
+ }
+ tcg_debug_assert(func != NULL);
+ tcg_out_call_nodelay(s, func, false);
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+
+ /* Recall that all of the helpers return 64-bit results.
+ Which complicates things for sparcv8plus. */
+ if (SPARC64) {
+ /* We let the helper sign-extend SB and SW, but leave SL for here. */
+ if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+ tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
+ }
+ } else {
+ if ((memop & MO_SIZE) == MO_64) {
+ tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX);
+ tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL);
+ tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR);
+ } else if (is_64) {
+ /* Re-extend from 32-bit rather than reassembling when we
+ know the high register must be an extension. */
+ tcg_out_arithi(s, data, TCG_REG_O1, 0,
+ memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1);
+ }
+ }
+
+ *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+ addr = TCG_REG_T1;
+ }
+ tcg_out_ldst_rr(s, data, addr,
+ (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
+ MemOpIdx oi)
+{
+ MemOp memop = get_memop(oi);
+#ifdef CONFIG_SOFTMMU
+ unsigned memi = get_mmuidx(oi);
+ TCGReg addrz, param;
+ const tcg_insn_unit *func;
+ tcg_insn_unit *label_ptr;
+
+ addrz = tcg_out_tlb_load(s, addr, memi, memop,
+ offsetof(CPUTLBEntry, addr_write));
+
+ /* The fast path is exactly one insn. Thus we can perform the entire
+ TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
+ qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+
+ /* TLB Miss. */
+
+ param = TCG_REG_O1;
+ if (!SPARC64 && TARGET_LONG_BITS == 64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
+ if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
+ /* Skip the high-part; we'll perform the extract in the trampoline. */
+ param++;
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, param++, data);
+
+ func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
+ tcg_debug_assert(func != NULL);
+ tcg_out_call_nodelay(s, func, false);
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, param, oi);
+
+ *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
+#else
+ if (SPARC64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
+ addr = TCG_REG_T1;
+ }
+ tcg_out_ldst_rr(s, data, addr,
+ (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
+#endif /* CONFIG_SOFTMMU */
+}
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGArg a0, a1, a2;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ c2 = const_args[2];
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ if (check_fit_ptr(a0, 13)) {
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ tcg_out_movi_imm13(s, TCG_REG_O0, a0);
+ break;
+ } else if (USE_REG_TB) {
+ intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
+ if (check_fit_ptr(tb_diff, 13)) {
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ /* Note that TCG_REG_TB has been unwound to O1. */
+ tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD);
+ break;
+ }
+ }
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff);
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
+ tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR);
+ break;
+ case INDEX_op_goto_tb:
+ if (s->tb_jmp_insn_offset) {
+ /* direct jump method */
+ if (USE_REG_TB) {
+ /* make sure the patch is 8-byte aligned. */
+ if ((intptr_t)s->code_ptr & 4) {
+ tcg_out_nop(s);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ tcg_out_sethi(s, TCG_REG_T1, 0);
+ tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR);
+ tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL);
+ tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD);
+ } else {
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ tcg_out32(s, CALL);
+ tcg_out_nop(s);
+ }
+ } else {
+ /* indirect jump method */
+ tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0);
+ tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
+ tcg_out_nop(s);
+ }
+ set_jmp_reset_offset(s, a0);
+
+ /* For the unlinked path of goto_tb, we need to reset
+ TCG_REG_TB to the beginning of this TB. */
+ if (USE_REG_TB) {
+ c = -tcg_current_code_size(s);
+ if (check_fit_i32(c, 13)) {
+ tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c);
+ tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB,
+ TCG_REG_T1, ARITH_ADD);
+ }
+ }
+ break;
+ case INDEX_op_goto_ptr:
+ tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL);
+ if (USE_REG_TB) {
+ tcg_out_mov_delay(s, TCG_REG_TB, a0);
+ } else {
+ tcg_out_nop(s);
+ }
+ break;
+ case INDEX_op_br:
+ tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0));
+ tcg_out_nop(s);
+ break;
+
+#define OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+
+ OP_32_64(ld8u):
+ tcg_out_ldst(s, a0, a1, a2, LDUB);
+ break;
+ OP_32_64(ld8s):
+ tcg_out_ldst(s, a0, a1, a2, LDSB);
+ break;
+ OP_32_64(ld16u):
+ tcg_out_ldst(s, a0, a1, a2, LDUH);
+ break;
+ OP_32_64(ld16s):
+ tcg_out_ldst(s, a0, a1, a2, LDSH);
+ break;
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld32u_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDUW);
+ break;
+ OP_32_64(st8):
+ tcg_out_ldst(s, a0, a1, a2, STB);
+ break;
+ OP_32_64(st16):
+ tcg_out_ldst(s, a0, a1, a2, STH);
+ break;
+ case INDEX_op_st_i32:
+ case INDEX_op_st32_i64:
+ tcg_out_ldst(s, a0, a1, a2, STW);
+ break;
+ OP_32_64(add):
+ c = ARITH_ADD;
+ goto gen_arith;
+ OP_32_64(sub):
+ c = ARITH_SUB;
+ goto gen_arith;
+ OP_32_64(and):
+ c = ARITH_AND;
+ goto gen_arith;
+ OP_32_64(andc):
+ c = ARITH_ANDN;
+ goto gen_arith;
+ OP_32_64(or):
+ c = ARITH_OR;
+ goto gen_arith;
+ OP_32_64(orc):
+ c = ARITH_ORN;
+ goto gen_arith;
+ OP_32_64(xor):
+ c = ARITH_XOR;
+ goto gen_arith;
+ case INDEX_op_shl_i32:
+ c = SHIFT_SLL;
+ do_shift32:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, a0, a1, a2 & 31, c2, c);
+ break;
+ case INDEX_op_shr_i32:
+ c = SHIFT_SRL;
+ goto do_shift32;
+ case INDEX_op_sar_i32:
+ c = SHIFT_SRA;
+ goto do_shift32;
+ case INDEX_op_mul_i32:
+ c = ARITH_UMUL;
+ goto gen_arith;
+
+ OP_32_64(neg):
+ c = ARITH_SUB;
+ goto gen_arith1;
+ OP_32_64(not):
+ c = ARITH_ORN;
+ goto gen_arith1;
+
+ case INDEX_op_div_i32:
+ tcg_out_div32(s, a0, a1, a2, c2, 0);
+ break;
+ case INDEX_op_divu_i32:
+ tcg_out_div32(s, a0, a1, a2, c2, 1);
+ break;
+
+ case INDEX_op_brcond_i32:
+ tcg_out_brcond_i32(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond_i32(s, args[3], a0, a1, a2, c2);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond_i32(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+ break;
+
+ case INDEX_op_add2_i32:
+ tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_ADDCC, ARITH_ADDC);
+ break;
+ case INDEX_op_sub2_i32:
+ tcg_out_addsub2_i32(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_SUBCC, ARITH_SUBC);
+ break;
+ case INDEX_op_mulu2_i32:
+ c = ARITH_UMUL;
+ goto do_mul2;
+ case INDEX_op_muls2_i32:
+ c = ARITH_SMUL;
+ do_mul2:
+ /* The 32-bit multiply insns produce a full 64-bit result. If the
+ destination register can hold it, we can avoid the slower RDY. */
+ tcg_out_arithc(s, a0, a2, args[3], const_args[3], c);
+ if (SPARC64 || a0 <= TCG_REG_O7) {
+ tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
+ } else {
+ tcg_out_rdy(s, a1);
+ }
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ tcg_out_qemu_ld(s, a0, a1, a2, false);
+ break;
+ case INDEX_op_qemu_ld_i64:
+ tcg_out_qemu_ld(s, a0, a1, a2, true);
+ break;
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ tcg_out_qemu_st(s, a0, a1, a2);
+ break;
+
+ case INDEX_op_ld32s_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDSW);
+ break;
+ case INDEX_op_ld_i64:
+ tcg_out_ldst(s, a0, a1, a2, LDX);
+ break;
+ case INDEX_op_st_i64:
+ tcg_out_ldst(s, a0, a1, a2, STX);
+ break;
+ case INDEX_op_shl_i64:
+ c = SHIFT_SLLX;
+ do_shift64:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, a0, a1, a2 & 63, c2, c);
+ break;
+ case INDEX_op_shr_i64:
+ c = SHIFT_SRLX;
+ goto do_shift64;
+ case INDEX_op_sar_i64:
+ c = SHIFT_SRAX;
+ goto do_shift64;
+ case INDEX_op_mul_i64:
+ c = ARITH_MULX;
+ goto gen_arith;
+ case INDEX_op_div_i64:
+ c = ARITH_SDIVX;
+ goto gen_arith;
+ case INDEX_op_divu_i64:
+ c = ARITH_UDIVX;
+ goto gen_arith;
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_ext32s_i64:
+ tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
+ break;
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_ext32u_i64:
+ tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
+ break;
+ case INDEX_op_extrl_i64_i32:
+ tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+ break;
+ case INDEX_op_extrh_i64_i32:
+ tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
+ break;
+
+ case INDEX_op_brcond_i64:
+ tcg_out_brcond_i64(s, a2, a0, a1, const_args[1], arg_label(args[3]));
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond_i64(s, args[3], a0, a1, a2, c2);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond_i64(s, args[5], a0, a1, a2, c2, args[3], const_args[3]);
+ break;
+ case INDEX_op_add2_i64:
+ tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+ const_args[4], args[5], const_args[5], false);
+ break;
+ case INDEX_op_sub2_i64:
+ tcg_out_addsub2_i64(s, args[0], args[1], args[2], args[3], args[4],
+ const_args[4], args[5], const_args[5], true);
+ break;
+ case INDEX_op_muluh_i64:
+ tcg_out_arith(s, args[0], args[1], args[2], ARITH_UMULXHI);
+ break;
+
+ gen_arith:
+ tcg_out_arithc(s, a0, a1, a2, c2, c);
+ break;
+
+ gen_arith1:
+ tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_mb(s, a0);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_neg_i32:
+ case INDEX_op_not_i32:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ return C_O0_I2(rZ, r);
+
+ case INDEX_op_add_i32:
+ case INDEX_op_mul_i32:
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_andc_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_orc_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_setcond_i32:
+ return C_O1_I2(r, rZ, rJ);
+
+ case INDEX_op_brcond_i32:
+ return C_O0_I2(rZ, rJ);
+ case INDEX_op_movcond_i32:
+ return C_O1_I4(r, rZ, rJ, rI, 0);
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_muls2_i32:
+ return C_O2_I2(r, r, rZ, rJ);
+
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ return C_O1_I1(R, r);
+
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(RZ, r);
+
+ case INDEX_op_add_i64:
+ case INDEX_op_mul_i64:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_andc_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_orc_i64:
+ case INDEX_op_xor_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_setcond_i64:
+ return C_O1_I2(R, RZ, RJ);
+
+ case INDEX_op_neg_i64:
+ case INDEX_op_not_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ return C_O1_I1(R, R);
+
+ case INDEX_op_extrl_i64_i32:
+ case INDEX_op_extrh_i64_i32:
+ return C_O1_I1(r, R);
+
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(RZ, RJ);
+ case INDEX_op_movcond_i64:
+ return C_O1_I4(R, RZ, RJ, RI, 0);
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i64:
+ return C_O2_I4(R, R, RZ, RZ, RJ, RI);
+ case INDEX_op_muluh_i64:
+ return C_O1_I2(R, R, R);
+
+ case INDEX_op_qemu_ld_i32:
+ return C_O1_I1(r, A);
+ case INDEX_op_qemu_ld_i64:
+ return C_O1_I1(R, A);
+ case INDEX_op_qemu_st_i32:
+ return C_O0_I2(sZ, A);
+ case INDEX_op_qemu_st_i64:
+ return C_O0_I2(SZ, A);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+ /*
+ * Only probe for the platform and capabilities if we haven't already
+ * determined maximum values at compile time.
+ */
+#ifndef use_vis3_instructions
+ {
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+ use_vis3_instructions = (hwcap & HWCAP_SPARC_VIS3) != 0;
+ }
+#endif
+
+ tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+ tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64;
+
+ tcg_target_call_clobber_regs = 0;
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G7);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O0);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O1);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O2);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O3);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O4);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O5);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O6);
+ tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_O7);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+}
+
+#if SPARC64
+# define ELF_HOST_MACHINE EM_SPARCV9
+#else
+# define ELF_HOST_MACHINE EM_SPARC32PLUS
+# define ELF_HOST_FLAGS EF_SPARC_32PLUS
+#endif
+
+typedef struct {
+ DebugFrameHeader h;
+ uint8_t fde_def_cfa[SPARC64 ? 4 : 2];
+ uint8_t fde_win_save;
+ uint8_t fde_ret_save[3];
+} DebugFrame;
+
+static const DebugFrame debug_frame = {
+ .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
+ .h.cie.id = -1,
+ .h.cie.version = 1,
+ .h.cie.code_align = 1,
+ .h.cie.data_align = -sizeof(void *) & 0x7f,
+ .h.cie.return_column = 15, /* o7 */
+
+ /* Total FDE size does not include the "len" member. */
+ .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+ .fde_def_cfa = {
+#if SPARC64
+ 12, 30, /* DW_CFA_def_cfa i6, 2047 */
+ (2047 & 0x7f) | 0x80, (2047 >> 7)
+#else
+ 13, 30 /* DW_CFA_def_cfa_register i6 */
+#endif
+ },
+ .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */
+ .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+ tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
+
+void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
+ uintptr_t jmp_rw, uintptr_t addr)
+{
+ intptr_t tb_disp = addr - tc_ptr;
+ intptr_t br_disp = addr - jmp_rx;
+ tcg_insn_unit i1, i2;
+
+ /* We can reach the entire address space for ILP32.
+ For LP64, the code_gen_buffer can't be larger than 2GB. */
+ tcg_debug_assert(tb_disp == (int32_t)tb_disp);
+ tcg_debug_assert(br_disp == (int32_t)br_disp);
+
+ if (!USE_REG_TB) {
+ qatomic_set((uint32_t *)jmp_rw,
+ deposit32(CALL, 0, 30, br_disp >> 2));
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
+ return;
+ }
+
+ /* This does not exercise the range of the branch, but we do
+ still need to be able to load the new value of TCG_REG_TB.
+ But this does still happen quite often. */
+ if (check_fit_ptr(tb_disp, 13)) {
+ /* ba,pt %icc, addr */
+ i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A)
+ | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp));
+ i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB)
+ | INSN_IMM13(tb_disp));
+ } else if (tb_disp >= 0) {
+ i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10);
+ i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
+ | INSN_IMM13(tb_disp & 0x3ff));
+ } else {
+ i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10);
+ i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1)
+ | INSN_IMM13((tb_disp & 0x3ff) | -0x400));
+ }
+
+ qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1));
+ flush_idcache_range(jmp_rx, jmp_rw, 8);
+}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
new file mode 100644
index 000000000..c05076304
--- /dev/null
+++ b/tcg/sparc/tcg-target.h
@@ -0,0 +1,172 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SPARC_TCG_TARGET_H
+#define SPARC_TCG_TARGET_H
+
+#define TCG_TARGET_REG_BITS 64
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
+
+typedef enum {
+ TCG_REG_G0 = 0,
+ TCG_REG_G1,
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+ TCG_REG_G6,
+ TCG_REG_G7,
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
+ TCG_REG_O6,
+ TCG_REG_O7,
+ TCG_REG_L0,
+ TCG_REG_L1,
+ TCG_REG_L2,
+ TCG_REG_L3,
+ TCG_REG_L4,
+ TCG_REG_L5,
+ TCG_REG_L6,
+ TCG_REG_L7,
+ TCG_REG_I0,
+ TCG_REG_I1,
+ TCG_REG_I2,
+ TCG_REG_I3,
+ TCG_REG_I4,
+ TCG_REG_I5,
+ TCG_REG_I6,
+ TCG_REG_I7,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#ifdef __arch64__
+#define TCG_TARGET_STACK_BIAS 2047
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
+#else
+#define TCG_TARGET_STACK_BIAS 0
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4)
+#endif
+
+#ifdef __arch64__
+#define TCG_TARGET_EXTEND_ARGS 1
+#endif
+
+#if defined(__VIS__) && __VIS__ >= 0x300
+#define use_vis3_instructions 1
+#else
+extern bool use_vis3_instructions;
+#endif
+
+/* optional instructions */
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 0
+#define TCG_TARGET_HAS_rot_i32 0
+#define TCG_TARGET_HAS_ext8s_i32 0
+#define TCG_TARGET_HAS_ext16s_i32 0
+#define TCG_TARGET_HAS_ext8u_i32 0
+#define TCG_TARGET_HAS_ext16u_i32 0
+#define TCG_TARGET_HAS_bswap16_i32 0
+#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 0
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_clz_i32 0
+#define TCG_TARGET_HAS_ctz_i32 0
+#define TCG_TARGET_HAS_ctpop_i32 0
+#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_extract_i32 0
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_direct_jump 1
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#define TCG_TARGET_HAS_extrl_i64_i32 1
+#define TCG_TARGET_HAS_extrh_i64_i32 1
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 0
+#define TCG_TARGET_HAS_rot_i64 0
+#define TCG_TARGET_HAS_ext8s_i64 0
+#define TCG_TARGET_HAS_ext16s_i64 0
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap16_i64 0
+#define TCG_TARGET_HAS_bswap32_i64 0
+#define TCG_TARGET_HAS_bswap64_i64 0
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 0
+#define TCG_TARGET_HAS_nand_i64 0
+#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_clz_i64 0
+#define TCG_TARGET_HAS_ctz_i64 0
+#define TCG_TARGET_HAS_ctpop_i64 0
+#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_extract_i64 0
+#define TCG_TARGET_HAS_sextract_i64 0
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
+#define TCG_TARGET_HAS_mulsh_i64 0
+
+#define TCG_AREG0 TCG_REG_I0
+
+#define TCG_TARGET_DEFAULT_MO (0)
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#define TCG_TARGET_NEED_POOL_LABELS
+
+#endif
diff --git a/tcg/tcg-common.c b/tcg/tcg-common.c
new file mode 100644
index 000000000..aa0c4f60c
--- /dev/null
+++ b/tcg/tcg-common.c
@@ -0,0 +1,34 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg.h"
+
+TCGOpDef tcg_op_defs[] = {
+#define DEF(s, oargs, iargs, cargs, flags) \
+ { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
+#include "tcg/tcg-opc.h"
+#undef DEF
+};
+const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
new file mode 100644
index 000000000..92c91dcde
--- /dev/null
+++ b/tcg/tcg-internal.h
@@ -0,0 +1,62 @@
+/*
+ * Internal declarations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_INTERNAL_H
+#define TCG_INTERNAL_H 1
+
+#define TCG_HIGHWATER 1024
+
+typedef struct TCGHelperInfo {
+ void *func;
+ const char *name;
+ unsigned flags;
+ unsigned typemask;
+} TCGHelperInfo;
+
+extern TCGContext tcg_init_ctx;
+extern TCGContext **tcg_ctxs;
+extern unsigned int tcg_cur_ctxs;
+extern unsigned int tcg_max_ctxs;
+
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus);
+bool tcg_region_alloc(TCGContext *s);
+void tcg_region_initial_alloc(TCGContext *s);
+void tcg_region_prologue_set(TCGContext *s);
+
+static inline void *tcg_call_func(TCGOp *op)
+{
+ return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op)];
+}
+
+static inline const TCGHelperInfo *tcg_call_info(TCGOp *op)
+{
+ return (void *)(uintptr_t)op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
+}
+
+static inline unsigned tcg_call_flags(TCGOp *op)
+{
+ return tcg_call_info(op)->flags;
+}
+
+#endif /* TCG_INTERNAL_H */
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
new file mode 100644
index 000000000..6c6848d03
--- /dev/null
+++ b/tcg/tcg-ldst.c.inc
@@ -0,0 +1,78 @@
+/*
+ * TCG Backend Data: load-store optimization only.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+typedef struct TCGLabelQemuLdst {
+ bool is_ld; /* qemu_ld: true, qemu_st: false */
+ MemOpIdx oi;
+ TCGType type; /* result type of a load */
+ TCGReg addrlo_reg; /* reg index for low word of guest virtual addr */
+ TCGReg addrhi_reg; /* reg index for high word of guest virtual addr */
+ TCGReg datalo_reg; /* reg index for low word to be loaded or stored */
+ TCGReg datahi_reg; /* reg index for high word to be loaded or stored */
+ const tcg_insn_unit *raddr; /* addr of the next IR of qemu_ld/st IR */
+ tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
+ QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
+} TCGLabelQemuLdst;
+
+
+/*
+ * Generate TB finalization at the end of block
+ */
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
+
+static int tcg_out_ldst_finalize(TCGContext *s)
+{
+ TCGLabelQemuLdst *lb;
+
+ /* qemu_ld/st slow paths */
+ QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
+ if (lb->is_ld
+ ? !tcg_out_qemu_ld_slow_path(s, lb)
+ : !tcg_out_qemu_st_slow_path(s, lb)) {
+ return -2;
+ }
+
+ /* Test for (pending) buffer overflow. The assumption is that any
+ one operation beginning below the high water mark cannot overrun
+ the buffer completely. Thus we can test for overflow after
+ generating code without having to check during generation. */
+ if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Allocate a new TCGLabelQemuLdst entry.
+ */
+
+static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
+{
+ TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
+
+ QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
+
+ return l;
+}
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
new file mode 100644
index 000000000..ffe55e908
--- /dev/null
+++ b/tcg/tcg-op-gvec.c
@@ -0,0 +1,3699 @@
+/*
+ * Generic vector operation expansion
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "qemu/main-loop.h"
+#include "tcg/tcg-gvec-desc.h"
+
+#define MAX_UNROLL 4
+
+#ifdef CONFIG_DEBUG_TCG
+static const TCGOpcode vecop_list_empty[1] = { 0 };
+#else
+#define vecop_list_empty NULL
+#endif
+
+
+/* Verify vector size and alignment rules. OFS should be the OR of all
+ of the operand offsets so that we can check them all at once. */
+static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs)
+{
+ uint32_t max_align;
+
+ switch (oprsz) {
+ case 8:
+ case 16:
+ case 32:
+ tcg_debug_assert(oprsz <= maxsz);
+ break;
+ default:
+ tcg_debug_assert(oprsz == maxsz);
+ break;
+ }
+ tcg_debug_assert(maxsz <= (8 << SIMD_MAXSZ_BITS));
+
+ max_align = maxsz >= 16 ? 15 : 7;
+ tcg_debug_assert((maxsz & max_align) == 0);
+ tcg_debug_assert((ofs & max_align) == 0);
+}
+
+/* Verify vector overlap rules for two operands. */
+static void check_overlap_2(uint32_t d, uint32_t a, uint32_t s)
+{
+ tcg_debug_assert(d == a || d + s <= a || a + s <= d);
+}
+
+/* Verify vector overlap rules for three operands. */
+static void check_overlap_3(uint32_t d, uint32_t a, uint32_t b, uint32_t s)
+{
+ check_overlap_2(d, a, s);
+ check_overlap_2(d, b, s);
+ check_overlap_2(a, b, s);
+}
+
+/* Verify vector overlap rules for four operands. */
+static void check_overlap_4(uint32_t d, uint32_t a, uint32_t b,
+ uint32_t c, uint32_t s)
+{
+ check_overlap_2(d, a, s);
+ check_overlap_2(d, b, s);
+ check_overlap_2(d, c, s);
+ check_overlap_2(a, b, s);
+ check_overlap_2(a, c, s);
+ check_overlap_2(b, c, s);
+}
+
+/* Create a descriptor from components. */
+uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data)
+{
+ uint32_t desc = 0;
+
+ check_size_align(oprsz, maxsz, 0);
+ tcg_debug_assert(data == sextract32(data, 0, SIMD_DATA_BITS));
+
+ oprsz = (oprsz / 8) - 1;
+ maxsz = (maxsz / 8) - 1;
+
+ /*
+ * We have just asserted in check_size_align that either
+ * oprsz is {8,16,32} or matches maxsz. Encode the final
+ * case with '2', as that would otherwise map to 24.
+ */
+ if (oprsz == maxsz) {
+ oprsz = 2;
+ }
+
+ desc = deposit32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS, oprsz);
+ desc = deposit32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS, maxsz);
+ desc = deposit32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS, data);
+
+ return desc;
+}
+
+/* Generate a call to a gvec-style helper with two vector operands. */
+void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2 *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+}
+
+/* Generate a call to a gvec-style helper with two vector operands
+ and one scalar operand. */
+void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2i *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, c, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands. */
+void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_3 *fn)
+{
+ TCGv_ptr a0, a1, a2;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+
+ fn(a0, a1, a2, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+}
+
+/* Generate a call to a gvec-style helper with four vector operands. */
+void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_4 *fn)
+{
+ TCGv_ptr a0, a1, a2, a3;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+
+ fn(a0, a1, a2, a3, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+}
+
+/* Generate a call to a gvec-style helper with five vector operands. */
+void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t xofs, uint32_t oprsz,
+ uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn)
+{
+ TCGv_ptr a0, a1, a2, a3, a4;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+ a4 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+ tcg_gen_addi_ptr(a4, cpu_env, xofs);
+
+ fn(a0, a1, a2, a3, a4, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+ tcg_temp_free_ptr(a4);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_2_ptr *fn)
+{
+ TCGv_ptr a0, a1;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ fn(a0, a1, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+}
+
+/* Generate a call to a gvec-style helper with three vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
+ int32_t data, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr a0, a1, a2;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+
+ fn(a0, a1, a2, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+}
+
+/* Generate a call to a gvec-style helper with four vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
+ uint32_t maxsz, int32_t data,
+ gen_helper_gvec_4_ptr *fn)
+{
+ TCGv_ptr a0, a1, a2, a3;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+
+ fn(a0, a1, a2, a3, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+}
+
+/* Generate a call to a gvec-style helper with five vector operands
+ and an extra pointer operand. */
+void tcg_gen_gvec_5_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t eofs, TCGv_ptr ptr,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_5_ptr *fn)
+{
+ TCGv_ptr a0, a1, a2, a3, a4;
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(oprsz, maxsz, data));
+
+ a0 = tcg_temp_new_ptr();
+ a1 = tcg_temp_new_ptr();
+ a2 = tcg_temp_new_ptr();
+ a3 = tcg_temp_new_ptr();
+ a4 = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+ tcg_gen_addi_ptr(a2, cpu_env, bofs);
+ tcg_gen_addi_ptr(a3, cpu_env, cofs);
+ tcg_gen_addi_ptr(a4, cpu_env, eofs);
+
+ fn(a0, a1, a2, a3, a4, ptr, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_ptr(a2);
+ tcg_temp_free_ptr(a3);
+ tcg_temp_free_ptr(a4);
+}
+
+/* Return true if we want to implement something of OPRSZ bytes
+ in units of LNSZ. This limits the expansion of inline code. */
+static inline bool check_size_impl(uint32_t oprsz, uint32_t lnsz)
+{
+ uint32_t q, r;
+
+ if (oprsz < lnsz) {
+ return false;
+ }
+
+ q = oprsz / lnsz;
+ r = oprsz % lnsz;
+ tcg_debug_assert((r & 7) == 0);
+
+ if (lnsz < 16) {
+ /* For sizes below 16, accept no remainder. */
+ if (r != 0) {
+ return false;
+ }
+ } else {
+ /*
+ * Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ * In addition, expand_clr needs to handle a multiple of 8.
+ * Thus we can handle the tail with one more operation per
+ * diminishing power of 2.
+ */
+ q += ctpop32(r);
+ }
+
+ return q <= MAX_UNROLL;
+}
+
+static void expand_clr(uint32_t dofs, uint32_t maxsz);
+
+/* Duplicate C as per VECE. */
+uint64_t (dup_const)(unsigned vece, uint64_t c)
+{
+ switch (vece) {
+ case MO_8:
+ return 0x0101010101010101ull * (uint8_t)c;
+ case MO_16:
+ return 0x0001000100010001ull * (uint16_t)c;
+ case MO_32:
+ return 0x0000000100000001ull * (uint32_t)c;
+ case MO_64:
+ return c;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Duplicate IN into OUT as per VECE. */
+void tcg_gen_dup_i32(unsigned vece, TCGv_i32 out, TCGv_i32 in)
+{
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ext8u_i32(out, in);
+ tcg_gen_muli_i32(out, out, 0x01010101);
+ break;
+ case MO_16:
+ tcg_gen_deposit_i32(out, in, in, 16, 16);
+ break;
+ case MO_32:
+ tcg_gen_mov_i32(out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void tcg_gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
+{
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ext8u_i64(out, in);
+ tcg_gen_muli_i64(out, out, 0x0101010101010101ull);
+ break;
+ case MO_16:
+ tcg_gen_ext16u_i64(out, in);
+ tcg_gen_muli_i64(out, out, 0x0001000100010001ull);
+ break;
+ case MO_32:
+ tcg_gen_deposit_i64(out, in, in, 32, 32);
+ break;
+ case MO_64:
+ tcg_gen_mov_i64(out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Select a supported vector type for implementing an operation on SIZE
+ * bytes. If OP is 0, assume that the real operation to be performed is
+ * required by all backends. Otherwise, make sure than OP can be performed
+ * on elements of size VECE in the selected type. Do not select V64 if
+ * PREFER_I64 is true. Return 0 if no vector type is selected.
+ */
+static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece,
+ uint32_t size, bool prefer_i64)
+{
+ /*
+ * Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ * It is hard to imagine a case in which v256 is supported
+ * but v128 is not, but check anyway.
+ * In addition, expand_clr needs to handle a multiple of 8.
+ */
+ if (TCG_TARGET_HAS_v256 &&
+ check_size_impl(size, 32) &&
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) &&
+ (!(size & 16) ||
+ (TCG_TARGET_HAS_v128 &&
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) &&
+ (!(size & 8) ||
+ (TCG_TARGET_HAS_v64 &&
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
+ return TCG_TYPE_V256;
+ }
+ if (TCG_TARGET_HAS_v128 &&
+ check_size_impl(size, 16) &&
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece) &&
+ (!(size & 8) ||
+ (TCG_TARGET_HAS_v64 &&
+ tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)))) {
+ return TCG_TYPE_V128;
+ }
+ if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
+ && tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)) {
+ return TCG_TYPE_V64;
+ }
+ return 0;
+}
+
+static void do_dup_store(TCGType type, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_vec t_vec)
+{
+ uint32_t i = 0;
+
+ tcg_debug_assert(oprsz >= 8);
+
+ /*
+ * This may be expand_clr for the tail of an operation, e.g.
+ * oprsz == 8 && maxsz == 64. The first 8 bytes of this store
+ * are misaligned wrt the maximum vector size, so do that first.
+ */
+ if (dofs & 8) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
+ i += 8;
+ }
+
+ switch (type) {
+ case TCG_TYPE_V256:
+ /*
+ * Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ for (; i + 32 <= oprsz; i += 32) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
+ }
+ /* fallthru */
+ case TCG_TYPE_V128:
+ for (; i + 16 <= oprsz; i += 16) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
+ }
+ break;
+ case TCG_TYPE_V64:
+ for (; i < oprsz; i += 8) {
+ tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
+ * Only one of IN_32 or IN_64 may be set;
+ * IN_C is used if IN_32 and IN_64 are unset.
+ */
+static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i32 in_32, TCGv_i64 in_64,
+ uint64_t in_c)
+{
+ TCGType type;
+ TCGv_i64 t_64;
+ TCGv_i32 t_32, t_desc;
+ TCGv_ptr t_ptr;
+ uint32_t i;
+
+ assert(vece <= (in_32 ? MO_32 : MO_64));
+ assert(in_32 == NULL || in_64 == NULL);
+
+ /* If we're storing 0, expand oprsz to maxsz. */
+ if (in_32 == NULL && in_64 == NULL) {
+ in_c = dup_const(vece, in_c);
+ if (in_c == 0) {
+ oprsz = maxsz;
+ vece = MO_8;
+ } else if (in_c == dup_const(MO_8, in_c)) {
+ vece = MO_8;
+ }
+ }
+
+ /* Implement inline with a vector type, if possible.
+ * Prefer integer when 64-bit host and no variable dup.
+ */
+ type = choose_vector_type(NULL, vece, oprsz,
+ (TCG_TARGET_REG_BITS == 64 && in_32 == NULL
+ && (in_64 == NULL || vece == MO_64)));
+ if (type != 0) {
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+
+ if (in_32) {
+ tcg_gen_dup_i32_vec(vece, t_vec, in_32);
+ } else if (in_64) {
+ tcg_gen_dup_i64_vec(vece, t_vec, in_64);
+ } else {
+ tcg_gen_dupi_vec(vece, t_vec, in_c);
+ }
+ do_dup_store(type, dofs, oprsz, maxsz, t_vec);
+ tcg_temp_free_vec(t_vec);
+ return;
+ }
+
+ /* Otherwise, inline with an integer type, unless "large". */
+ if (check_size_impl(oprsz, TCG_TARGET_REG_BITS / 8)) {
+ t_64 = NULL;
+ t_32 = NULL;
+
+ if (in_32) {
+ /* We are given a 32-bit variable input. For a 64-bit host,
+ use a 64-bit operation unless the 32-bit operation would
+ be simple enough. */
+ if (TCG_TARGET_REG_BITS == 64
+ && (vece != MO_32 || !check_size_impl(oprsz, 4))) {
+ t_64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t_64, in_32);
+ tcg_gen_dup_i64(vece, t_64, t_64);
+ } else {
+ t_32 = tcg_temp_new_i32();
+ tcg_gen_dup_i32(vece, t_32, in_32);
+ }
+ } else if (in_64) {
+ /* We are given a 64-bit variable input. */
+ t_64 = tcg_temp_new_i64();
+ tcg_gen_dup_i64(vece, t_64, in_64);
+ } else {
+ /* We are given a constant input. */
+ /* For 64-bit hosts, use 64-bit constants for "simple" constants
+ or when we'd need too many 32-bit stores, or when a 64-bit
+ constant is really required. */
+ if (vece == MO_64
+ || (TCG_TARGET_REG_BITS == 64
+ && (in_c == 0 || in_c == -1
+ || !check_size_impl(oprsz, 4)))) {
+ t_64 = tcg_constant_i64(in_c);
+ } else {
+ t_32 = tcg_constant_i32(in_c);
+ }
+ }
+
+ /* Implement inline if we picked an implementation size above. */
+ if (t_32) {
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_st_i32(t_32, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t_32);
+ goto done;
+ }
+ if (t_64) {
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_st_i64(t_64, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t_64);
+ goto done;
+ }
+ }
+
+ /* Otherwise implement out of line. */
+ t_ptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
+
+ /*
+ * This may be expand_clr for the tail of an operation, e.g.
+ * oprsz == 8 && maxsz == 64. The size of the clear is misaligned
+ * wrt simd_desc and will assert. Simply pass all replicated byte
+ * stores through to memset.
+ */
+ if (oprsz == maxsz && vece == MO_8) {
+ TCGv_ptr t_size = tcg_const_ptr(oprsz);
+ TCGv_i32 t_val;
+
+ if (in_32) {
+ t_val = in_32;
+ } else if (in_64) {
+ t_val = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t_val, in_64);
+ } else {
+ t_val = tcg_constant_i32(in_c);
+ }
+ gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
+
+ if (in_64) {
+ tcg_temp_free_i32(t_val);
+ }
+ tcg_temp_free_ptr(t_size);
+ tcg_temp_free_ptr(t_ptr);
+ return;
+ }
+
+ t_desc = tcg_constant_i32(simd_desc(oprsz, maxsz, 0));
+
+ if (vece == MO_64) {
+ if (in_64) {
+ gen_helper_gvec_dup64(t_ptr, t_desc, in_64);
+ } else {
+ t_64 = tcg_constant_i64(in_c);
+ gen_helper_gvec_dup64(t_ptr, t_desc, t_64);
+ }
+ } else {
+ typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
+ static dup_fn * const fns[3] = {
+ gen_helper_gvec_dup8,
+ gen_helper_gvec_dup16,
+ gen_helper_gvec_dup32
+ };
+
+ if (in_32) {
+ fns[vece](t_ptr, t_desc, in_32);
+ } else if (in_64) {
+ t_32 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t_32, in_64);
+ fns[vece](t_ptr, t_desc, t_32);
+ tcg_temp_free_i32(t_32);
+ } else {
+ if (vece == MO_8) {
+ in_c &= 0xff;
+ } else if (vece == MO_16) {
+ in_c &= 0xffff;
+ }
+ t_32 = tcg_constant_i32(in_c);
+ fns[vece](t_ptr, t_desc, t_32);
+ }
+ }
+
+ tcg_temp_free_ptr(t_ptr);
+ return;
+
+ done:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Likewise, but with zero. */
+static void expand_clr(uint32_t dofs, uint32_t maxsz)
+{
+ do_dup(MO_8, dofs, maxsz, maxsz, NULL, NULL, 0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using i32 elements. */
+static void expand_2_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ bool load_dest, void (*fni)(TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0);
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+
+static void expand_2i_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ int32_t c, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32, int32_t))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0, c);
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+
+static void expand_2s_i32(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ TCGv_i32 c, bool scalar_first,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(t1, c, t0);
+ } else {
+ fni(t1, t0, c);
+ }
+ tcg_gen_st_i32(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_3_i32(uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1);
+ tcg_gen_st_i32(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+static void expand_3i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, int32_t c, bool load_dest,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, int32_t))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i32(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1, c);
+ tcg_gen_st_i32(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, bool write_aofs,
+ void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i32(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_i32(t1, cpu_env, aofs + i);
+ }
+ }
+ tcg_temp_free_i32(t3);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using i64 elements. */
+static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ bool load_dest, void (*fni)(TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0);
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+static void expand_2i_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ int64_t c, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64, int64_t))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t1, cpu_env, dofs + i);
+ }
+ fni(t1, t0, c);
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+static void expand_2s_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ TCGv_i64 c, bool scalar_first,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(t1, c, t0);
+ } else {
+ fni(t1, t0, c);
+ }
+ tcg_gen_st_i64(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
+static void expand_3_i64(uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1);
+ tcg_gen_st_i64(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+static void expand_3i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, int64_t c, bool load_dest,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, int64_t))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_i64(t2, cpu_env, dofs + i);
+ }
+ fni(t2, t0, t1, c);
+ tcg_gen_st_i64(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i64 elements. */
+static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t cofs, uint32_t oprsz, bool write_aofs,
+ void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t1, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t2, cpu_env, bofs + i);
+ tcg_gen_ld_i64(t3, cpu_env, cofs + i);
+ fni(t0, t1, t2, t3);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_i64(t1, cpu_env, aofs + i);
+ }
+ }
+ tcg_temp_free_i64(t3);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+/* Expand OPSZ bytes worth of two-operand operations using host vectors. */
+static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
+ }
+ fni(vece, t1, t0);
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+}
+
+/* Expand OPSZ bytes worth of two-vector operands and an immediate operand
+ using host vectors. */
+static void expand_2i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ int64_t c, bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, int64_t))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t1, cpu_env, dofs + i);
+ }
+ fni(vece, t1, t0, c);
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+}
+
+static void expand_2s_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ TCGv_vec c, bool scalar_first,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ if (scalar_first) {
+ fni(vece, t1, c, t0);
+ } else {
+ fni(vece, t1, t0, c);
+ }
+ tcg_gen_st_vec(t1, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using host vectors. */
+static void expand_3_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz,
+ uint32_t tysz, TCGType type, bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t2, cpu_env, dofs + i);
+ }
+ fni(vece, t2, t0, t1);
+ tcg_gen_st_vec(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+/*
+ * Expand OPSZ bytes worth of three-vector operands and an immediate operand
+ * using host vectors.
+ */
+static void expand_3i_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t tysz,
+ TCGType type, int64_t c, bool load_dest,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec,
+ int64_t))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+ if (load_dest) {
+ tcg_gen_ld_vec(t2, cpu_env, dofs + i);
+ }
+ fni(vece, t2, t0, t1, c);
+ tcg_gen_st_vec(t2, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t2);
+}
+
+/* Expand OPSZ bytes worth of four-operand operations using host vectors. */
+static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t cofs, uint32_t oprsz,
+ uint32_t tysz, TCGType type, bool write_aofs,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec,
+ TCGv_vec, TCGv_vec))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ TCGv_vec t2 = tcg_temp_new_vec(type);
+ TCGv_vec t3 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t1, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t2, cpu_env, bofs + i);
+ tcg_gen_ld_vec(t3, cpu_env, cofs + i);
+ fni(vece, t0, t1, t2, t3);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ if (write_aofs) {
+ tcg_gen_st_vec(t1, cpu_env, aofs + i);
+ }
+ }
+ tcg_temp_free_vec(t3);
+ tcg_temp_free_vec(t2);
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+/* Expand a vector two-operand operation. */
+void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ g->load_dest, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ g->load_dest, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ g->load_dest, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_2_i64(dofs, aofs, oprsz, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_2_i32(dofs, aofs, oprsz, g->load_dest, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector operation with two vectors and an immediate. */
+void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, int64_t c, const GVecGen2i *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ c, g->load_dest, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ c, g->load_dest, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ c, g->load_dest, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
+ } else {
+ if (g->fno) {
+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
+ } else {
+ TCGv_i64 tcg_c = tcg_constant_i64(c);
+ tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
+ maxsz, c, g->fnoi);
+ }
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector operation with two vectors and a scalar. */
+void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 c, const GVecGen2s *g)
+{
+ TCGType type;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ if (type != 0) {
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+ uint32_t some;
+
+ tcg_gen_dup_i64_vec(g->vece, t_vec, c);
+
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ t_vec, g->scalar_first, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+
+ case TCG_TYPE_V128:
+ expand_2s_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ t_vec, g->scalar_first, g->fniv);
+ break;
+
+ case TCG_TYPE_V64:
+ expand_2s_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ t_vec, g->scalar_first, g->fniv);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(t_vec);
+ tcg_swap_vecop_list(hold_list);
+ } else if (g->fni8 && check_size_impl(oprsz, 8)) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+
+ tcg_gen_dup_i64(g->vece, t64, c);
+ expand_2s_i64(dofs, aofs, oprsz, t64, g->scalar_first, g->fni8);
+ tcg_temp_free_i64(t64);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ TCGv_i32 t32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(t32, c);
+ tcg_gen_dup_i32(g->vece, t32, t32);
+ expand_2s_i32(dofs, aofs, oprsz, t32, g->scalar_first, g->fni4);
+ tcg_temp_free_i32(t32);
+ } else {
+ tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, 0, g->fno);
+ return;
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector three-operand operation. */
+void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+ check_overlap_3(dofs, aofs, bofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
+ g->load_dest, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
+ g->load_dest, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
+ g->load_dest, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz,
+ maxsz, g->data, g->fno);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector operation with three vectors and an immediate. */
+void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz, int64_t c,
+ const GVecGen3i *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+ check_overlap_3(dofs, aofs, bofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /*
+ * Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_3i_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
+ c, g->load_dest, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
+ c, g->load_dest, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_3i_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
+ c, g->load_dest, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_3i_i64(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_3i_i32(dofs, aofs, bofs, oprsz, c, g->load_dest, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, c, g->fno);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/* Expand a vector four-operand operation. */
+void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
+{
+ const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty;
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list);
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs);
+ check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
+
+ type = 0;
+ if (g->fniv) {
+ type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64);
+ }
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
+ 32, TCG_TYPE_V256, g->write_aofs, g->fniv);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ cofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 16, TCG_TYPE_V128, g->write_aofs, g->fniv);
+ break;
+ case TCG_TYPE_V64:
+ expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
+ 8, TCG_TYPE_V64, g->write_aofs, g->fniv);
+ break;
+
+ case 0:
+ if (g->fni8 && check_size_impl(oprsz, 8)) {
+ expand_4_i64(dofs, aofs, bofs, cofs, oprsz,
+ g->write_aofs, g->fni8);
+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
+ expand_4_i32(dofs, aofs, bofs, cofs, oprsz,
+ g->write_aofs, g->fni4);
+ } else {
+ assert(g->fno != NULL);
+ tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
+ oprsz, maxsz, g->data, g->fno);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+/*
+ * Expand specific vector operations.
+ */
+
+static void vec_mov2(unsigned vece, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mov_vec(a, b);
+}
+
+void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2 g = {
+ .fni8 = tcg_gen_mov_i64,
+ .fniv = vec_mov2,
+ .fno = gen_helper_gvec_mov,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ if (dofs != aofs) {
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
+ } else {
+ check_size_align(oprsz, maxsz, dofs);
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+ }
+}
+
+void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i32 in)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ tcg_debug_assert(vece <= MO_32);
+ do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
+}
+
+void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 in)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ tcg_debug_assert(vece <= MO_64);
+ do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
+}
+
+void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ if (vece <= MO_64) {
+ TCGType type = choose_vector_type(NULL, vece, oprsz, 0);
+ if (type != 0) {
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+ tcg_gen_dup_mem_vec(vece, t_vec, cpu_env, aofs);
+ do_dup_store(type, dofs, oprsz, maxsz, t_vec);
+ tcg_temp_free_vec(t_vec);
+ } else if (vece <= MO_32) {
+ TCGv_i32 in = tcg_temp_new_i32();
+ switch (vece) {
+ case MO_8:
+ tcg_gen_ld8u_i32(in, cpu_env, aofs);
+ break;
+ case MO_16:
+ tcg_gen_ld16u_i32(in, cpu_env, aofs);
+ break;
+ default:
+ tcg_gen_ld_i32(in, cpu_env, aofs);
+ break;
+ }
+ do_dup(vece, dofs, oprsz, maxsz, in, NULL, 0);
+ tcg_temp_free_i32(in);
+ } else {
+ TCGv_i64 in = tcg_temp_new_i64();
+ tcg_gen_ld_i64(in, cpu_env, aofs);
+ do_dup(vece, dofs, oprsz, maxsz, NULL, in, 0);
+ tcg_temp_free_i64(in);
+ }
+ } else if (vece == 4) {
+ /* 128-bit duplicate. */
+ int i;
+
+ tcg_debug_assert(oprsz >= 16);
+ if (TCG_TARGET_HAS_v128) {
+ TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V128);
+
+ tcg_gen_ld_vec(in, cpu_env, aofs);
+ for (i = (aofs == dofs) * 16; i < oprsz; i += 16) {
+ tcg_gen_st_vec(in, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(in);
+ } else {
+ TCGv_i64 in0 = tcg_temp_new_i64();
+ TCGv_i64 in1 = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(in0, cpu_env, aofs);
+ tcg_gen_ld_i64(in1, cpu_env, aofs + 8);
+ for (i = (aofs == dofs) * 16; i < oprsz; i += 16) {
+ tcg_gen_st_i64(in0, cpu_env, dofs + i);
+ tcg_gen_st_i64(in1, cpu_env, dofs + i + 8);
+ }
+ tcg_temp_free_i64(in0);
+ tcg_temp_free_i64(in1);
+ }
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+ } else if (vece == 5) {
+ /* 256-bit duplicate. */
+ int i;
+
+ tcg_debug_assert(oprsz >= 32);
+ tcg_debug_assert(oprsz % 32 == 0);
+ if (TCG_TARGET_HAS_v256) {
+ TCGv_vec in = tcg_temp_new_vec(TCG_TYPE_V256);
+
+ tcg_gen_ld_vec(in, cpu_env, aofs);
+ for (i = (aofs == dofs) * 32; i < oprsz; i += 32) {
+ tcg_gen_st_vec(in, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(in);
+ } else if (TCG_TARGET_HAS_v128) {
+ TCGv_vec in0 = tcg_temp_new_vec(TCG_TYPE_V128);
+ TCGv_vec in1 = tcg_temp_new_vec(TCG_TYPE_V128);
+
+ tcg_gen_ld_vec(in0, cpu_env, aofs);
+ tcg_gen_ld_vec(in1, cpu_env, aofs + 16);
+ for (i = (aofs == dofs) * 32; i < oprsz; i += 32) {
+ tcg_gen_st_vec(in0, cpu_env, dofs + i);
+ tcg_gen_st_vec(in1, cpu_env, dofs + i + 16);
+ }
+ tcg_temp_free_vec(in0);
+ tcg_temp_free_vec(in1);
+ } else {
+ TCGv_i64 in[4];
+ int j;
+
+ for (j = 0; j < 4; ++j) {
+ in[j] = tcg_temp_new_i64();
+ tcg_gen_ld_i64(in[j], cpu_env, aofs + j * 8);
+ }
+ for (i = (aofs == dofs) * 32; i < oprsz; i += 32) {
+ for (j = 0; j < 4; ++j) {
+ tcg_gen_st_i64(in[j], cpu_env, dofs + i + j * 8);
+ }
+ }
+ for (j = 0; j < 4; ++j) {
+ tcg_temp_free_i64(in[j]);
+ }
+ }
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+ } else {
+ g_assert_not_reached();
+ }
+}
+
+void tcg_gen_gvec_dup_imm(unsigned vece, uint32_t dofs, uint32_t oprsz,
+ uint32_t maxsz, uint64_t x)
+{
+ check_size_align(oprsz, maxsz, dofs);
+ do_dup(vece, dofs, oprsz, maxsz, NULL, NULL, x);
+}
+
+void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2 g = {
+ .fni8 = tcg_gen_not_i64,
+ .fniv = tcg_gen_not_vec,
+ .fno = gen_helper_gvec_not,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g);
+}
+
+/* Perform a vector addition using normal addition and a mask. The mask
+ should be the sign bit of each lane. This 6-operation form is more
+ efficient than separate additions when there are 4 or more lanes in
+ the 64-bit operation. */
+static void gen_addv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_andc_i64(t1, a, m);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_xor_i64(t3, a, b);
+ tcg_gen_add_i64(d, t1, t2);
+ tcg_gen_and_i64(t3, t3, m);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
+ gen_addv_mask(d, a, b, m);
+}
+
+void tcg_gen_vec_add8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+
+ tcg_gen_andc_i32(t1, a, m);
+ tcg_gen_andc_i32(t2, b, m);
+ tcg_gen_xor_i32(t3, a, b);
+ tcg_gen_add_i32(d, t1, t2);
+ tcg_gen_and_i32(t3, t3, m);
+ tcg_gen_xor_i32(d, d, t3);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+}
+
+void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
+ gen_addv_mask(d, a, b, m);
+}
+
+void tcg_gen_vec_add16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t1, a, ~0xffff);
+ tcg_gen_add_i32(t2, a, b);
+ tcg_gen_add_i32(t1, t1, b);
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
+void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, a, ~0xffffffffull);
+ tcg_gen_add_i64(t2, a, b);
+ tcg_gen_add_i64(t1, t1, b);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+static const TCGOpcode vecop_list_add[] = { INDEX_op_add_vec, 0 };
+
+void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fni8 = tcg_gen_vec_add8_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add8,
+ .opt_opc = vecop_list_add,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_add16_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add16,
+ .opt_opc = vecop_list_add,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_add_i32,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add32,
+ .opt_opc = vecop_list_add,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_add_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_add64,
+ .opt_opc = vecop_list_add,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fni8 = tcg_gen_vec_add8_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds8,
+ .opt_opc = vecop_list_add,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_add16_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds16,
+ .opt_opc = vecop_list_add,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_add_i32,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds32,
+ .opt_opc = vecop_list_add,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_add_i64,
+ .fniv = tcg_gen_add_vec,
+ .fno = gen_helper_gvec_adds64,
+ .opt_opc = vecop_list_add,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(c);
+ tcg_gen_gvec_adds(vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
+static const TCGOpcode vecop_list_sub[] = { INDEX_op_sub_vec, 0 };
+
+void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs8,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs16,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs32,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_subs64,
+ .opt_opc = vecop_list_sub,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+/* Perform a vector subtraction using normal subtraction and a mask.
+ Compare gen_addv_mask above. */
+static void gen_subv_mask(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_or_i64(t1, a, m);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_eqv_i64(t3, a, b);
+ tcg_gen_sub_i64(d, t1, t2);
+ tcg_gen_and_i64(t3, t3, m);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
+ gen_subv_mask(d, a, b, m);
+}
+
+void tcg_gen_vec_sub8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 m = tcg_constant_i32((int32_t)dup_const(MO_8, 0x80));
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+
+ tcg_gen_or_i32(t1, a, m);
+ tcg_gen_andc_i32(t2, b, m);
+ tcg_gen_eqv_i32(t3, a, b);
+ tcg_gen_sub_i32(d, t1, t2);
+ tcg_gen_and_i32(t3, t3, m);
+ tcg_gen_xor_i32(d, d, t3);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+}
+
+void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
+ gen_subv_mask(d, a, b, m);
+}
+
+void tcg_gen_vec_sub16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t1, b, ~0xffff);
+ tcg_gen_sub_i32(t2, a, b);
+ tcg_gen_sub_i32(t1, a, t1);
+ tcg_gen_deposit_i32(d, t1, t2, 0, 16);
+
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
+void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, b, ~0xffffffffull);
+ tcg_gen_sub_i64(t2, a, b);
+ tcg_gen_sub_i64(t1, a, t1);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub8,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub16,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub32,
+ .opt_opc = vecop_list_sub,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_gvec_sub64,
+ .opt_opc = vecop_list_sub,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static const TCGOpcode vecop_list_mul[] = { INDEX_op_mul_vec, 0 };
+
+void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul8,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul16,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_mul_i32,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul32,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_mul_i64,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_mul64,
+ .opt_opc = vecop_list_mul,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2s g[4] = {
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls8,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls16,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_mul_i32,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls32,
+ .opt_opc = vecop_list_mul,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_mul_i64,
+ .fniv = tcg_gen_mul_vec,
+ .fno = gen_helper_gvec_muls64,
+ .opt_opc = vecop_list_mul,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &g[vece]);
+}
+
+void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(c);
+ tcg_gen_gvec_muls(vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
+void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_ssadd_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_ssadd_vec,
+ .fno = gen_helper_gvec_ssadd8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_ssadd_vec,
+ .fno = gen_helper_gvec_ssadd16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = tcg_gen_ssadd_vec,
+ .fno = gen_helper_gvec_ssadd32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = tcg_gen_ssadd_vec,
+ .fno = gen_helper_gvec_ssadd64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_sssub_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_sssub_vec,
+ .fno = gen_helper_gvec_sssub8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_sssub_vec,
+ .fno = gen_helper_gvec_sssub16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = tcg_gen_sssub_vec,
+ .fno = gen_helper_gvec_sssub32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = tcg_gen_sssub_vec,
+ .fno = gen_helper_gvec_sssub64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_usadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 max = tcg_constant_i32(-1);
+ tcg_gen_add_i32(d, a, b);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, d, a, max, d);
+}
+
+static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 max = tcg_constant_i64(-1);
+ tcg_gen_add_i64(d, a, b);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, d, a, max, d);
+}
+
+void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_usadd_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_usadd_vec,
+ .fno = gen_helper_gvec_usadd8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_usadd_vec,
+ .fno = gen_helper_gvec_usadd16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_usadd_i32,
+ .fniv = tcg_gen_usadd_vec,
+ .fno = gen_helper_gvec_usadd32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_usadd_i64,
+ .fniv = tcg_gen_usadd_vec,
+ .fno = gen_helper_gvec_usadd64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_ussub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 min = tcg_constant_i32(0);
+ tcg_gen_sub_i32(d, a, b);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, min, d);
+}
+
+static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 min = tcg_constant_i64(0);
+ tcg_gen_sub_i64(d, a, b);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, min, d);
+}
+
+void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_ussub_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_ussub_vec,
+ .fno = gen_helper_gvec_ussub8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_ussub_vec,
+ .fno = gen_helper_gvec_ussub16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_ussub_i32,
+ .fniv = tcg_gen_ussub_vec,
+ .fno = gen_helper_gvec_ussub32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_ussub_i64,
+ .fniv = tcg_gen_ussub_vec,
+ .fno = gen_helper_gvec_ussub64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_smin(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_smin_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_smin_vec,
+ .fno = gen_helper_gvec_smin8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_smin_vec,
+ .fno = gen_helper_gvec_smin16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_smin_i32,
+ .fniv = tcg_gen_smin_vec,
+ .fno = gen_helper_gvec_smin32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_smin_i64,
+ .fniv = tcg_gen_smin_vec,
+ .fno = gen_helper_gvec_smin64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_umin(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_umin_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_umin_vec,
+ .fno = gen_helper_gvec_umin8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_umin_vec,
+ .fno = gen_helper_gvec_umin16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_umin_i32,
+ .fniv = tcg_gen_umin_vec,
+ .fno = gen_helper_gvec_umin32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_umin_i64,
+ .fniv = tcg_gen_umin_vec,
+ .fno = gen_helper_gvec_umin64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_smax(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_smax_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_smax_vec,
+ .fno = gen_helper_gvec_smax8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_smax_vec,
+ .fno = gen_helper_gvec_smax16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_smax_i32,
+ .fniv = tcg_gen_smax_vec,
+ .fno = gen_helper_gvec_smax32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_smax_i64,
+ .fniv = tcg_gen_smax_vec,
+ .fno = gen_helper_gvec_smax64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_umax(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_umax_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_umax_vec,
+ .fno = gen_helper_gvec_umax8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_umax_vec,
+ .fno = gen_helper_gvec_umax16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_umax_i32,
+ .fniv = tcg_gen_umax_vec,
+ .fno = gen_helper_gvec_umax32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_umax_i64,
+ .fniv = tcg_gen_umax_vec,
+ .fno = gen_helper_gvec_umax64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 }
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/* Perform a vector negation using normal negation and a mask.
+ Compare gen_subv_mask above. */
+static void gen_negv_mask(TCGv_i64 d, TCGv_i64 b, TCGv_i64 m)
+{
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+
+ tcg_gen_andc_i64(t3, m, b);
+ tcg_gen_andc_i64(t2, b, m);
+ tcg_gen_sub_i64(d, m, t2);
+ tcg_gen_xor_i64(d, d, t3);
+
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+}
+
+void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_8, 0x80));
+ gen_negv_mask(d, b, m);
+}
+
+void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0x8000));
+ gen_negv_mask(d, b, m);
+}
+
+void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t1, b, ~0xffffffffull);
+ tcg_gen_neg_i64(t2, b);
+ tcg_gen_neg_i64(t1, t1);
+ tcg_gen_deposit_i64(d, t1, t2, 0, 32);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, 0 };
+ static const GVecGen2 g[4] = {
+ { .fni8 = tcg_gen_vec_neg8_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_neg16_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_neg_i32,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_neg_i64,
+ .fniv = tcg_gen_neg_vec,
+ .fno = gen_helper_gvec_neg64,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]);
+}
+
+static void gen_absv_mask(TCGv_i64 d, TCGv_i64 b, unsigned vece)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ int nbit = 8 << vece;
+
+ /* Create -1 for each negative element. */
+ tcg_gen_shri_i64(t, b, nbit - 1);
+ tcg_gen_andi_i64(t, t, dup_const(vece, 1));
+ tcg_gen_muli_i64(t, t, (1 << nbit) - 1);
+
+ /*
+ * Invert (via xor -1) and add one.
+ * Because of the ordering the msb is cleared,
+ * so we never have carry into the next element.
+ */
+ tcg_gen_xor_i64(d, b, t);
+ tcg_gen_andi_i64(t, t, dup_const(vece, 1));
+ tcg_gen_add_i64(d, d, t);
+
+ tcg_temp_free_i64(t);
+}
+
+static void tcg_gen_vec_abs8_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ gen_absv_mask(d, b, MO_8);
+}
+
+static void tcg_gen_vec_abs16_i64(TCGv_i64 d, TCGv_i64 b)
+{
+ gen_absv_mask(d, b, MO_16);
+}
+
+void tcg_gen_gvec_abs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_abs_vec, 0 };
+ static const GVecGen2 g[4] = {
+ { .fni8 = tcg_gen_vec_abs8_i64,
+ .fniv = tcg_gen_abs_vec,
+ .fno = gen_helper_gvec_abs8,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_abs16_i64,
+ .fniv = tcg_gen_abs_vec,
+ .fno = gen_helper_gvec_abs16,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_abs_i32,
+ .fniv = tcg_gen_abs_vec,
+ .fno = gen_helper_gvec_abs32,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_abs_i64,
+ .fniv = tcg_gen_abs_vec,
+ .fno = gen_helper_gvec_abs64,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_2(dofs, aofs, oprsz, maxsz, &g[vece]);
+}
+
+void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_and_i64,
+ .fniv = tcg_gen_and_vec,
+ .fno = gen_helper_gvec_and,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_or_i64,
+ .fniv = tcg_gen_or_vec,
+ .fno = gen_helper_gvec_or,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_xor_i64,
+ .fniv = tcg_gen_xor_vec,
+ .fno = gen_helper_gvec_xor,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_andc_i64,
+ .fniv = tcg_gen_andc_vec,
+ .fno = gen_helper_gvec_andc,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, 0);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_orc_i64,
+ .fniv = tcg_gen_orc_vec,
+ .fno = gen_helper_gvec_orc,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_nand(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_nand_i64,
+ .fniv = tcg_gen_nand_vec,
+ .fno = gen_helper_gvec_nand,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_not(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_nor(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_nor_i64,
+ .fniv = tcg_gen_nor_vec,
+ .fno = gen_helper_gvec_nor,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_not(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+void tcg_gen_gvec_eqv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 g = {
+ .fni8 = tcg_gen_eqv_i64,
+ .fniv = tcg_gen_eqv_vec,
+ .fno = gen_helper_gvec_eqv,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (aofs == bofs) {
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, -1);
+ } else {
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g);
+ }
+}
+
+static const GVecGen2s gop_ands = {
+ .fni8 = tcg_gen_and_i64,
+ .fniv = tcg_gen_and_vec,
+ .fno = gen_helper_gvec_ands,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ands);
+}
+
+static const GVecGen2s gop_xors = {
+ .fni8 = tcg_gen_xor_i64,
+ .fniv = tcg_gen_xor_vec,
+ .fno = gen_helper_gvec_xors,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_xors);
+}
+
+static const GVecGen2s gop_ors = {
+ .fni8 = tcg_gen_or_i64,
+ .fniv = tcg_gen_or_vec,
+ .fno = gen_helper_gvec_ors,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64
+};
+
+void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i64 c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_dup_i64(vece, tmp, c);
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(dup_const(vece, c));
+ tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, tmp, &gop_ors);
+}
+
+void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << c);
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shl8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_8, 0xff << c);
+ tcg_gen_shli_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_vec_shl16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_16, 0xffff << c);
+ tcg_gen_shli_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_shl8i_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_shl16i_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shli_i32,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shli_i64,
+ .fniv = tcg_gen_shli_vec,
+ .fno = gen_helper_gvec_shl64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> c);
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> c);
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+void tcg_gen_vec_shr8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_8, 0xff >> c);
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_vec_shr16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t mask = dup_const(MO_16, 0xffff >> c);
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(d, d, mask);
+}
+
+void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_shr8i_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_shr16i_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shri_i32,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shri_i64,
+ .fniv = tcg_gen_shri_vec,
+ .fno = gen_helper_gvec_shr64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t s_mask = dup_const(MO_8, 0x80 >> c);
+ uint64_t c_mask = dup_const(MO_8, 0xff >> c);
+ TCGv_i64 s = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_or_i64(d, d, s); /* include sign extension */
+ tcg_temp_free_i64(s);
+}
+
+void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t s_mask = dup_const(MO_16, 0x8000 >> c);
+ uint64_t c_mask = dup_const(MO_16, 0xffff >> c);
+ TCGv_i64 s = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(d, a, c);
+ tcg_gen_andi_i64(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_andi_i64(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_muli_i64(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_or_i64(d, d, s); /* include sign extension */
+ tcg_temp_free_i64(s);
+}
+
+void tcg_gen_vec_sar8i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t s_mask = dup_const(MO_8, 0x80 >> c);
+ uint32_t c_mask = dup_const(MO_8, 0xff >> c);
+ TCGv_i32 s = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_or_i32(d, d, s); /* include sign extension */
+ tcg_temp_free_i32(s);
+}
+
+void tcg_gen_vec_sar16i_i32(TCGv_i32 d, TCGv_i32 a, int32_t c)
+{
+ uint32_t s_mask = dup_const(MO_16, 0x8000 >> c);
+ uint32_t c_mask = dup_const(MO_16, 0xffff >> c);
+ TCGv_i32 s = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(d, a, c);
+ tcg_gen_andi_i32(s, d, s_mask); /* isolate (shifted) sign bit */
+ tcg_gen_andi_i32(d, d, c_mask); /* clear out bits above sign */
+ tcg_gen_muli_i32(s, s, (2 << c) - 2); /* replicate isolated signs */
+ tcg_gen_or_i32(d, d, s); /* include sign extension */
+ tcg_temp_free_i32(s);
+}
+
+void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_sari_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_sar8i_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_sar16i_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sari_i32,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sari_i64,
+ .fniv = tcg_gen_sari_vec,
+ .fno = gen_helper_gvec_sar64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_vec_rotl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 8 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_vec_rotl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t c)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << c);
+
+ tcg_gen_shli_i64(d, a, c);
+ tcg_gen_shri_i64(a, a, 16 - c);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(a, a, ~mask);
+ tcg_gen_or_i64(d, d, a);
+}
+
+void tcg_gen_gvec_rotli(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen2i g[4] = {
+ { .fni8 = tcg_gen_vec_rotl8i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl8i,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = tcg_gen_vec_rotl16i_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl16i,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotli_i32,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl32i,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotli_i64,
+ .fniv = tcg_gen_rotli_vec,
+ .fno = gen_helper_gvec_rotl64i,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, dofs, aofs, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, shift, &g[vece]);
+ }
+}
+
+void tcg_gen_gvec_rotri(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(shift >= 0 && shift < (8 << vece));
+ tcg_gen_gvec_rotli(vece, dofs, aofs, -shift & ((8 << vece) - 1),
+ oprsz, maxsz);
+}
+
+/*
+ * Specialized generation vector shifts by a non-constant scalar.
+ */
+
+typedef struct {
+ void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+ void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+ void (*fniv_s)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32);
+ void (*fniv_v)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+ gen_helper_gvec_2 *fno[4];
+ TCGOpcode s_list[2];
+ TCGOpcode v_list[2];
+} GVecGen2sh;
+
+static void expand_2sh_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t tysz, TCGType type,
+ TCGv_i32 shift,
+ void (*fni)(unsigned, TCGv_vec, TCGv_vec, TCGv_i32))
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ fni(vece, t0, t0, shift);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t0);
+}
+
+static void
+do_gvec_shifts(unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i32 shift,
+ uint32_t oprsz, uint32_t maxsz, const GVecGen2sh *g)
+{
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ /* If the backend has a scalar expansion, great. */
+ type = choose_vector_type(g->s_list, vece, oprsz, vece == MO_64);
+ if (type) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ switch (type) {
+ case TCG_TYPE_V256:
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2sh_vec(vece, dofs, aofs, some, 32,
+ TCG_TYPE_V256, shift, g->fniv_s);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_2sh_vec(vece, dofs, aofs, oprsz, 16,
+ TCG_TYPE_V128, shift, g->fniv_s);
+ break;
+ case TCG_TYPE_V64:
+ expand_2sh_vec(vece, dofs, aofs, oprsz, 8,
+ TCG_TYPE_V64, shift, g->fniv_s);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+ goto clear_tail;
+ }
+
+ /* If the backend supports variable vector shifts, also cool. */
+ type = choose_vector_type(g->v_list, vece, oprsz, vece == MO_64);
+ if (type) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ TCGv_vec v_shift = tcg_temp_new_vec(type);
+
+ if (vece == MO_64) {
+ TCGv_i64 sh64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(sh64, shift);
+ tcg_gen_dup_i64_vec(MO_64, v_shift, sh64);
+ tcg_temp_free_i64(sh64);
+ } else {
+ tcg_gen_dup_i32_vec(vece, v_shift, shift);
+ }
+
+ switch (type) {
+ case TCG_TYPE_V256:
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_2s_vec(vece, dofs, aofs, some, 32, TCG_TYPE_V256,
+ v_shift, false, g->fniv_v);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_2s_vec(vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
+ v_shift, false, g->fniv_v);
+ break;
+ case TCG_TYPE_V64:
+ expand_2s_vec(vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
+ v_shift, false, g->fniv_v);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(v_shift);
+ tcg_swap_vecop_list(hold_list);
+ goto clear_tail;
+ }
+
+ /* Otherwise fall back to integral... */
+ if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+ expand_2s_i32(dofs, aofs, oprsz, shift, false, g->fni4);
+ } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+ TCGv_i64 sh64 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(sh64, shift);
+ expand_2s_i64(dofs, aofs, oprsz, sh64, false, g->fni8);
+ tcg_temp_free_i64(sh64);
+ } else {
+ TCGv_ptr a0 = tcg_temp_new_ptr();
+ TCGv_ptr a1 = tcg_temp_new_ptr();
+ TCGv_i32 desc = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(desc, shift, SIMD_DATA_SHIFT);
+ tcg_gen_ori_i32(desc, desc, simd_desc(oprsz, maxsz, 0));
+ tcg_gen_addi_ptr(a0, cpu_env, dofs);
+ tcg_gen_addi_ptr(a1, cpu_env, aofs);
+
+ g->fno[vece](a0, a1, desc);
+
+ tcg_temp_free_ptr(a0);
+ tcg_temp_free_ptr(a1);
+ tcg_temp_free_i32(desc);
+ return;
+ }
+
+ clear_tail:
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+void tcg_gen_gvec_shls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2sh g = {
+ .fni4 = tcg_gen_shl_i32,
+ .fni8 = tcg_gen_shl_i64,
+ .fniv_s = tcg_gen_shls_vec,
+ .fniv_v = tcg_gen_shlv_vec,
+ .fno = {
+ gen_helper_gvec_shl8i,
+ gen_helper_gvec_shl16i,
+ gen_helper_gvec_shl32i,
+ gen_helper_gvec_shl64i,
+ },
+ .s_list = { INDEX_op_shls_vec, 0 },
+ .v_list = { INDEX_op_shlv_vec, 0 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_shrs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2sh g = {
+ .fni4 = tcg_gen_shr_i32,
+ .fni8 = tcg_gen_shr_i64,
+ .fniv_s = tcg_gen_shrs_vec,
+ .fniv_v = tcg_gen_shrv_vec,
+ .fno = {
+ gen_helper_gvec_shr8i,
+ gen_helper_gvec_shr16i,
+ gen_helper_gvec_shr32i,
+ gen_helper_gvec_shr64i,
+ },
+ .s_list = { INDEX_op_shrs_vec, 0 },
+ .v_list = { INDEX_op_shrv_vec, 0 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2sh g = {
+ .fni4 = tcg_gen_sar_i32,
+ .fni8 = tcg_gen_sar_i64,
+ .fniv_s = tcg_gen_sars_vec,
+ .fniv_v = tcg_gen_sarv_vec,
+ .fno = {
+ gen_helper_gvec_sar8i,
+ gen_helper_gvec_sar16i,
+ gen_helper_gvec_sar32i,
+ gen_helper_gvec_sar64i,
+ },
+ .s_list = { INDEX_op_sars_vec, 0 },
+ .v_list = { INDEX_op_sarv_vec, 0 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
+void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
+ TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen2sh g = {
+ .fni4 = tcg_gen_rotl_i32,
+ .fni8 = tcg_gen_rotl_i64,
+ .fniv_s = tcg_gen_rotls_vec,
+ .fniv_v = tcg_gen_rotlv_vec,
+ .fno = {
+ gen_helper_gvec_rotl8i,
+ gen_helper_gvec_rotl16i,
+ gen_helper_gvec_rotl32i,
+ gen_helper_gvec_rotl64i,
+ },
+ .s_list = { INDEX_op_rotls_vec, 0 },
+ .v_list = { INDEX_op_rotlv_vec, 0 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ do_gvec_shifts(vece, dofs, aofs, shift, oprsz, maxsz, &g);
+}
+
+/*
+ * Expand D = A << (B % element bits)
+ *
+ * Unlike scalar shifts, where it is easy for the target front end
+ * to include the modulo as part of the expansion. If the target
+ * naturally includes the modulo as part of the operation, great!
+ * If the target has some other behaviour from out-of-range shifts,
+ * then it could not use this function anyway, and would need to
+ * do it's own expansion with custom functions.
+ */
+static void tcg_gen_shlv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
+
+ tcg_gen_and_vec(vece, t, b, m);
+ tcg_gen_shlv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_shl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_shl_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_shl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_shl_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_shlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shlv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shl_mod_i32,
+ .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shl_mod_i64,
+ .fniv = tcg_gen_shlv_mod_vec,
+ .fno = gen_helper_gvec_shl64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/*
+ * Similarly for logical right shifts.
+ */
+
+static void tcg_gen_shrv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
+
+ tcg_gen_and_vec(vece, t, b, m);
+ tcg_gen_shrv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_shr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_shr_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_shr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_shr_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_shrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shrv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_shr_mod_i32,
+ .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_shr_mod_i64,
+ .fniv = tcg_gen_shrv_mod_vec,
+ .fno = gen_helper_gvec_shr64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/*
+ * Similarly for arithmetic right shifts.
+ */
+
+static void tcg_gen_sarv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
+
+ tcg_gen_and_vec(vece, t, b, m);
+ tcg_gen_sarv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_sar_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_sar_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_sar_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_sar_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_sarv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_sarv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_sar_mod_i32,
+ .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_sar_mod_i64,
+ .fniv = tcg_gen_sarv_mod_vec,
+ .fno = gen_helper_gvec_sar64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/*
+ * Similarly for rotates.
+ */
+
+static void tcg_gen_rotlv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
+
+ tcg_gen_and_vec(vece, t, b, m);
+ tcg_gen_rotlv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_rotl_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_rotl_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_rotl_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_rotl_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_rotlv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotlv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotl_mod_i32,
+ .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotl_mod_i64,
+ .fniv = tcg_gen_rotlv_mod_vec,
+ .fno = gen_helper_gvec_rotl64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+static void tcg_gen_rotrv_mod_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, (8 << vece) - 1);
+
+ tcg_gen_and_vec(vece, t, b, m);
+ tcg_gen_rotrv_vec(vece, d, a, t);
+ tcg_temp_free_vec(t);
+}
+
+static void tcg_gen_rotr_mod_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(t, b, 31);
+ tcg_gen_rotr_i32(d, a, t);
+ tcg_temp_free_i32(t);
+}
+
+static void tcg_gen_rotr_mod_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_andi_i64(t, b, 63);
+ tcg_gen_rotr_i64(d, a, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotrv_vec, 0 };
+ static const GVecGen3 g[4] = {
+ { .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr8v,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr16v,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_rotr_mod_i32,
+ .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr32v,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = tcg_gen_rotr_mod_i64,
+ .fniv = tcg_gen_rotrv_mod_vec,
+ .fno = gen_helper_gvec_rotr64v,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+ };
+
+ tcg_debug_assert(vece <= MO_64);
+ tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]);
+}
+
+/* Expand OPSZ bytes worth of three-operand operations using i32 elements. */
+static void expand_cmp_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, TCGCond cond)
+{
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 4) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i32(t1, cpu_env, bofs + i);
+ tcg_gen_setcond_i32(cond, t0, t0, t1);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t0);
+}
+
+static void expand_cmp_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, TCGCond cond)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_ld_i64(t1, cpu_env, bofs + i);
+ tcg_gen_setcond_i64(cond, t0, t0, t1);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t0);
+}
+
+static void expand_cmp_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t oprsz, uint32_t tysz,
+ TCGType type, TCGCond cond)
+{
+ TCGv_vec t0 = tcg_temp_new_vec(type);
+ TCGv_vec t1 = tcg_temp_new_vec(type);
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += tysz) {
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_ld_vec(t1, cpu_env, bofs + i);
+ tcg_gen_cmp_vec(cond, vece, t0, t0, t1);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_vec(t1);
+ tcg_temp_free_vec(t0);
+}
+
+void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, uint32_t bofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
+ static gen_helper_gvec_3 * const eq_fn[4] = {
+ gen_helper_gvec_eq8, gen_helper_gvec_eq16,
+ gen_helper_gvec_eq32, gen_helper_gvec_eq64
+ };
+ static gen_helper_gvec_3 * const ne_fn[4] = {
+ gen_helper_gvec_ne8, gen_helper_gvec_ne16,
+ gen_helper_gvec_ne32, gen_helper_gvec_ne64
+ };
+ static gen_helper_gvec_3 * const lt_fn[4] = {
+ gen_helper_gvec_lt8, gen_helper_gvec_lt16,
+ gen_helper_gvec_lt32, gen_helper_gvec_lt64
+ };
+ static gen_helper_gvec_3 * const le_fn[4] = {
+ gen_helper_gvec_le8, gen_helper_gvec_le16,
+ gen_helper_gvec_le32, gen_helper_gvec_le64
+ };
+ static gen_helper_gvec_3 * const ltu_fn[4] = {
+ gen_helper_gvec_ltu8, gen_helper_gvec_ltu16,
+ gen_helper_gvec_ltu32, gen_helper_gvec_ltu64
+ };
+ static gen_helper_gvec_3 * const leu_fn[4] = {
+ gen_helper_gvec_leu8, gen_helper_gvec_leu16,
+ gen_helper_gvec_leu32, gen_helper_gvec_leu64
+ };
+ static gen_helper_gvec_3 * const * const fns[16] = {
+ [TCG_COND_EQ] = eq_fn,
+ [TCG_COND_NE] = ne_fn,
+ [TCG_COND_LT] = lt_fn,
+ [TCG_COND_LE] = le_fn,
+ [TCG_COND_LTU] = ltu_fn,
+ [TCG_COND_LEU] = leu_fn,
+ };
+
+ const TCGOpcode *hold_list;
+ TCGType type;
+ uint32_t some;
+
+ check_size_align(oprsz, maxsz, dofs | aofs | bofs);
+ check_overlap_3(dofs, aofs, bofs, maxsz);
+
+ if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+ do_dup(MO_8, dofs, oprsz, maxsz,
+ NULL, NULL, -(cond == TCG_COND_ALWAYS));
+ return;
+ }
+
+ /*
+ * Implement inline with a vector type, if possible.
+ * Prefer integer when 64-bit host and 64-bit comparison.
+ */
+ hold_list = tcg_swap_vecop_list(cmp_list);
+ type = choose_vector_type(cmp_list, vece, oprsz,
+ TCG_TARGET_REG_BITS == 64 && vece == MO_64);
+ switch (type) {
+ case TCG_TYPE_V256:
+ /* Recall that ARM SVE allows vector sizes that are not a
+ * power of 2, but always a multiple of 16. The intent is
+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
+ */
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond);
+ if (some == oprsz) {
+ break;
+ }
+ dofs += some;
+ aofs += some;
+ bofs += some;
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+ case TCG_TYPE_V128:
+ expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond);
+ break;
+ case TCG_TYPE_V64:
+ expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond);
+ break;
+
+ case 0:
+ if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+ expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+ expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
+ } else {
+ gen_helper_gvec_3 * const *fn = fns[cond];
+
+ if (fn == NULL) {
+ uint32_t tmp;
+ tmp = aofs, aofs = bofs, bofs = tmp;
+ cond = tcg_swap_cond(cond);
+ fn = fns[cond];
+ assert(fn != NULL);
+ }
+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
+ oprsz = maxsz;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_swap_vecop_list(hold_list);
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_and_i64(t, b, a);
+ tcg_gen_andc_i64(d, c, a);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t bofs, uint32_t cofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 g = {
+ .fni8 = tcg_gen_bitsel_i64,
+ .fniv = tcg_gen_bitsel_vec,
+ .fno = gen_helper_gvec_bitsel,
+ };
+
+ tcg_gen_gvec_4(dofs, aofs, bofs, cofs, oprsz, maxsz, &g);
+}
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
new file mode 100644
index 000000000..faf30f9cd
--- /dev/null
+++ b/tcg/tcg-op-vec.c
@@ -0,0 +1,816 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2018 Linaro, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-mo.h"
+
+/* Reduce the number of ifdefs below. This assumes that all uses of
+ TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
+ the compiler can eliminate. */
+#if TCG_TARGET_REG_BITS == 64
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
+#define TCGV_LOW TCGV_LOW_link_error
+#define TCGV_HIGH TCGV_HIGH_link_error
+#endif
+
+/*
+ * Vector optional opcode tracking.
+ * Except for the basic logical operations (and, or, xor), and
+ * data movement (mov, ld, st, dupi), many vector opcodes are
+ * optional and may not be supported on the host. Thank Intel
+ * for the irregularity in their instruction set.
+ *
+ * The gvec expanders allow custom vector operations to be composed,
+ * generally via the .fniv callback in the GVecGen* structures. At
+ * the same time, in deciding whether to use this hook we need to
+ * know if the host supports the required operations. This is
+ * presented as an array of opcodes, terminated by 0. Each opcode
+ * is assumed to be expanded with the given VECE.
+ *
+ * For debugging, we want to validate this array. Therefore, when
+ * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
+ * will validate that their opcode is present in the list.
+ */
+#ifdef CONFIG_DEBUG_TCG
+void tcg_assert_listed_vecop(TCGOpcode op)
+{
+ const TCGOpcode *p = tcg_ctx->vecop_list;
+ if (p) {
+ for (; *p; ++p) {
+ if (*p == op) {
+ return;
+ }
+ }
+ g_assert_not_reached();
+ }
+}
+#endif
+
+bool tcg_can_emit_vecop_list(const TCGOpcode *list,
+ TCGType type, unsigned vece)
+{
+ if (list == NULL) {
+ return true;
+ }
+
+ for (; *list; ++list) {
+ TCGOpcode opc = *list;
+
+#ifdef CONFIG_DEBUG_TCG
+ switch (opc) {
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_mov_vec:
+ case INDEX_op_dup_vec:
+ case INDEX_op_dup2_vec:
+ case INDEX_op_ld_vec:
+ case INDEX_op_st_vec:
+ case INDEX_op_bitsel_vec:
+ /* These opcodes are mandatory and should not be listed. */
+ g_assert_not_reached();
+ case INDEX_op_not_vec:
+ /* These opcodes have generic expansions using the above. */
+ g_assert_not_reached();
+ default:
+ break;
+ }
+#endif
+
+ if (tcg_can_emit_vec_op(opc, type, vece)) {
+ continue;
+ }
+
+ /*
+ * The opcode list is created by front ends based on what they
+ * actually invoke. We must mirror the logic in the routines
+ * below for generic expansions using other opcodes.
+ */
+ switch (opc) {
+ case INDEX_op_neg_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
+ continue;
+ }
+ break;
+ case INDEX_op_abs_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
+ && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
+ || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
+ || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
+ continue;
+ }
+ break;
+ case INDEX_op_usadd_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+ continue;
+ }
+ break;
+ case INDEX_op_ussub_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
+ tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+ continue;
+ }
+ break;
+ case INDEX_op_cmpsel_vec:
+ case INDEX_op_smin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_umax_vec:
+ if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+ }
+ return true;
+}
+
+void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+}
+
+void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg r, TCGArg a, TCGArg b)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+ op->args[2] = b;
+}
+
+void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
+ TCGArg r, TCGArg a, TCGArg b, TCGArg c)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+ op->args[2] = b;
+ op->args[3] = c;
+}
+
+static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
+ TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ TCGOP_VECL(op) = type - TCG_TYPE_V64;
+ TCGOP_VECE(op) = vece;
+ op->args[0] = r;
+ op->args[1] = a;
+ op->args[2] = b;
+ op->args[3] = c;
+ op->args[4] = d;
+ op->args[5] = e;
+}
+
+static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGType type = rt->base_type;
+
+ /* Must enough inputs for the output. */
+ tcg_debug_assert(at->base_type >= type);
+ vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
+}
+
+static void vec_gen_op3(TCGOpcode opc, unsigned vece,
+ TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGType type = rt->base_type;
+
+ /* Must enough inputs for the output. */
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
+}
+
+void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
+{
+ if (r != a) {
+ vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
+ }
+}
+
+TCGv_vec tcg_const_zeros_vec(TCGType type)
+{
+ TCGv_vec ret = tcg_temp_new_vec(type);
+ tcg_gen_dupi_vec(MO_64, ret, 0);
+ return ret;
+}
+
+TCGv_vec tcg_const_ones_vec(TCGType type)
+{
+ TCGv_vec ret = tcg_temp_new_vec(type);
+ tcg_gen_dupi_vec(MO_64, ret, -1);
+ return ret;
+}
+
+TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
+{
+ TCGTemp *t = tcgv_vec_temp(m);
+ return tcg_const_zeros_vec(t->base_type);
+}
+
+TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
+{
+ TCGTemp *t = tcgv_vec_temp(m);
+ return tcg_const_ones_vec(t->base_type);
+}
+
+void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
+}
+
+void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ TCGArg ai = tcgv_i64_arg(a);
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+ } else if (vece == MO_64) {
+ TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
+ TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
+ vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
+ } else {
+ TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+ }
+}
+
+void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg ai = tcgv_i32_arg(a);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
+}
+
+void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
+ tcg_target_long ofs)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
+}
+
+static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ vec_gen_3(opc, type, 0, ri, bi, o);
+}
+
+void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
+}
+
+void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
+{
+ vec_gen_ldst(INDEX_op_st_vec, r, b, o);
+}
+
+void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
+{
+ TCGArg ri = tcgv_vec_arg(r);
+ TCGArg bi = tcgv_ptr_arg(b);
+ TCGTemp *rt = arg_temp(ri);
+ TCGType type = rt->base_type;
+
+ tcg_debug_assert(low_type >= TCG_TYPE_V64);
+ tcg_debug_assert(low_type <= type);
+ vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
+}
+
+void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
+}
+
+void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
+}
+
+void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
+}
+
+void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (TCG_TARGET_HAS_andc_vec) {
+ vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+ tcg_gen_not_vec(0, t, b);
+ tcg_gen_and_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (TCG_TARGET_HAS_orc_vec) {
+ vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+ tcg_gen_not_vec(0, t, b);
+ tcg_gen_or_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
+ tcg_gen_and_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+}
+
+void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
+ tcg_gen_or_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+}
+
+void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
+ tcg_gen_xor_vec(0, r, a, b);
+ tcg_gen_not_vec(0, r, r);
+}
+
+static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_assert_listed_vecop(opc);
+ can = tcg_can_emit_vec_op(opc, type, vece);
+ if (can > 0) {
+ vec_gen_2(opc, type, vece, ri, ai);
+ } else if (can < 0) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_expand_vec_op(opc, type, vece, ri, ai);
+ tcg_swap_vecop_list(hold_list);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+
+ if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
+ TCGv_vec t = tcg_const_ones_vec_matching(r);
+ tcg_gen_xor_vec(0, r, a, t);
+ tcg_temp_free_vec(t);
+ }
+ tcg_swap_vecop_list(hold_list);
+}
+
+void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ const TCGOpcode *hold_list;
+
+ tcg_assert_listed_vecop(INDEX_op_neg_vec);
+ hold_list = tcg_swap_vecop_list(NULL);
+
+ if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
+ TCGv_vec t = tcg_const_zeros_vec_matching(r);
+ tcg_gen_sub_vec(vece, r, t, a);
+ tcg_temp_free_vec(t);
+ }
+ tcg_swap_vecop_list(hold_list);
+}
+
+void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
+{
+ const TCGOpcode *hold_list;
+
+ tcg_assert_listed_vecop(INDEX_op_abs_vec);
+ hold_list = tcg_swap_vecop_list(NULL);
+
+ if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
+ TCGType type = tcgv_vec_temp(r)->base_type;
+ TCGv_vec t = tcg_temp_new_vec(type);
+
+ tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
+ if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
+ tcg_gen_neg_vec(vece, t, a);
+ tcg_gen_smax_vec(vece, r, a, t);
+ } else {
+ if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
+ tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
+ } else {
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
+ tcg_constant_vec(type, vece, 0));
+ }
+ tcg_gen_xor_vec(vece, r, a, t);
+ tcg_gen_sub_vec(vece, r, r, t);
+ }
+
+ tcg_temp_free_vec(t);
+ }
+ tcg_swap_vecop_list(hold_list);
+}
+
+static void do_shifti(TCGOpcode opc, unsigned vece,
+ TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type == type);
+ tcg_debug_assert(i >= 0 && i < (8 << vece));
+ tcg_assert_listed_vecop(opc);
+
+ if (i == 0) {
+ tcg_gen_mov_vec(r, a);
+ return;
+ }
+
+ can = tcg_can_emit_vec_op(opc, type, vece);
+ if (can > 0) {
+ vec_gen_3(opc, type, vece, ri, ai, i);
+ } else {
+ /* We leave the choice of expansion via scalar or vector shift
+ to the target. Often, but not always, dupi can feed a vector
+ shift easier than a scalar. */
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_debug_assert(can < 0);
+ tcg_expand_vec_op(opc, type, vece, ri, ai, i);
+ tcg_swap_vecop_list(hold_list);
+ }
+}
+
+void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_shli_vec, vece, r, a, i);
+}
+
+void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_shri_vec, vece, r, a, i);
+}
+
+void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_sari_vec, vece, r, a, i);
+}
+
+void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
+}
+
+void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
+{
+ int bits = 8 << vece;
+ tcg_debug_assert(i >= 0 && i < bits);
+ do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
+}
+
+void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
+ TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg bi = temp_arg(bt);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ tcg_assert_listed_vecop(INDEX_op_cmp_vec);
+ can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
+ if (can > 0) {
+ vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
+ } else {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_debug_assert(can < 0);
+ tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
+ tcg_swap_vecop_list(hold_list);
+ }
+}
+
+static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
+ TCGv_vec b, TCGOpcode opc)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg bi = temp_arg(bt);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ tcg_assert_listed_vecop(opc);
+ can = tcg_can_emit_vec_op(opc, type, vece);
+ if (can > 0) {
+ vec_gen_3(opc, type, vece, ri, ai, bi);
+ } else if (can < 0) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
+ tcg_swap_vecop_list(hold_list);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
+ TCGv_vec b, TCGOpcode opc)
+{
+ bool ok = do_op3(vece, r, a, b, opc);
+ tcg_debug_assert(ok);
+}
+
+void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
+}
+
+void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
+}
+
+void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
+}
+
+void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
+}
+
+void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+ /* usadd(a, b) = min(a, ~b) + b */
+ tcg_gen_not_vec(vece, t, b);
+ tcg_gen_umin_vec(vece, t, t, a);
+ tcg_gen_add_vec(vece, r, t, b);
+
+ tcg_temp_free_vec(t);
+ tcg_swap_vecop_list(hold_list);
+ }
+}
+
+void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
+}
+
+void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ TCGv_vec t = tcg_temp_new_vec_matching(r);
+
+ /* ussub(a, b) = max(a, b) - b */
+ tcg_gen_umax_vec(vece, t, a, b);
+ tcg_gen_sub_vec(vece, r, t, b);
+
+ tcg_temp_free_vec(t);
+ tcg_swap_vecop_list(hold_list);
+ }
+}
+
+static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
+ TCGv_vec b, TCGOpcode opc, TCGCond cond)
+{
+ if (!do_op3(vece, r, a, b, opc)) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
+ tcg_swap_vecop_list(hold_list);
+ }
+}
+
+void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
+}
+
+void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
+}
+
+void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
+}
+
+void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
+}
+
+void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
+}
+
+void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
+}
+
+void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
+}
+
+void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
+}
+
+void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
+{
+ do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
+}
+
+static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
+ TCGv_i32 s, TCGOpcode opc)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *st = tcgv_i32_temp(s);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg si = temp_arg(st);
+ TCGType type = rt->base_type;
+ int can;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_assert_listed_vecop(opc);
+ can = tcg_can_emit_vec_op(opc, type, vece);
+ if (can > 0) {
+ vec_gen_3(opc, type, vece, ri, ai, si);
+ } else if (can < 0) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
+ tcg_expand_vec_op(opc, type, vece, ri, ai, si);
+ tcg_swap_vecop_list(hold_list);
+ } else {
+ g_assert_not_reached();
+ }
+}
+
+void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
+{
+ do_shifts(vece, r, a, b, INDEX_op_shls_vec);
+}
+
+void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
+{
+ do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
+}
+
+void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
+{
+ do_shifts(vece, r, a, b, INDEX_op_sars_vec);
+}
+
+void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
+{
+ do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
+}
+
+void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
+ TCGv_vec b, TCGv_vec c)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGTemp *ct = tcgv_vec_temp(c);
+ TCGType type = rt->base_type;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ tcg_debug_assert(ct->base_type >= type);
+
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
+ temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
+ } else {
+ TCGv_vec t = tcg_temp_new_vec(type);
+ tcg_gen_and_vec(MO_8, t, a, b);
+ tcg_gen_andc_vec(MO_8, r, c, a);
+ tcg_gen_or_vec(MO_8, r, r, t);
+ tcg_temp_free_vec(t);
+ }
+}
+
+void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
+ TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
+{
+ TCGTemp *rt = tcgv_vec_temp(r);
+ TCGTemp *at = tcgv_vec_temp(a);
+ TCGTemp *bt = tcgv_vec_temp(b);
+ TCGTemp *ct = tcgv_vec_temp(c);
+ TCGTemp *dt = tcgv_vec_temp(d);
+ TCGArg ri = temp_arg(rt);
+ TCGArg ai = temp_arg(at);
+ TCGArg bi = temp_arg(bt);
+ TCGArg ci = temp_arg(ct);
+ TCGArg di = temp_arg(dt);
+ TCGType type = rt->base_type;
+ const TCGOpcode *hold_list;
+ int can;
+
+ tcg_debug_assert(at->base_type >= type);
+ tcg_debug_assert(bt->base_type >= type);
+ tcg_debug_assert(ct->base_type >= type);
+ tcg_debug_assert(dt->base_type >= type);
+
+ tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
+ hold_list = tcg_swap_vecop_list(NULL);
+ can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
+
+ if (can > 0) {
+ vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
+ } else if (can < 0) {
+ tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
+ ri, ai, bi, ci, di, cond);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec(type);
+ tcg_gen_cmp_vec(cond, vece, t, a, b);
+ tcg_gen_bitsel_vec(vece, r, t, c, d);
+ tcg_temp_free_vec(t);
+ }
+ tcg_swap_vecop_list(hold_list);
+}
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
new file mode 100644
index 000000000..61b492d89
--- /dev/null
+++ b/tcg/tcg-op.c
@@ -0,0 +1,3371 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-mo.h"
+#include "trace-tcg.h"
+#include "exec/plugin-gen.h"
+
+/* Reduce the number of ifdefs below. This assumes that all uses of
+ TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
+ the compiler can eliminate. */
+#if TCG_TARGET_REG_BITS == 64
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
+#define TCGV_LOW TCGV_LOW_link_error
+#define TCGV_HIGH TCGV_HIGH_link_error
+#endif
+
+void tcg_gen_op1(TCGOpcode opc, TCGArg a1)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+}
+
+void tcg_gen_op2(TCGOpcode opc, TCGArg a1, TCGArg a2)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+ op->args[1] = a2;
+}
+
+void tcg_gen_op3(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+ op->args[1] = a2;
+ op->args[2] = a3;
+}
+
+void tcg_gen_op4(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+ op->args[1] = a2;
+ op->args[2] = a3;
+ op->args[3] = a4;
+}
+
+void tcg_gen_op5(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+ TCGArg a4, TCGArg a5)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+ op->args[1] = a2;
+ op->args[2] = a3;
+ op->args[3] = a4;
+ op->args[4] = a5;
+}
+
+void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3,
+ TCGArg a4, TCGArg a5, TCGArg a6)
+{
+ TCGOp *op = tcg_emit_op(opc);
+ op->args[0] = a1;
+ op->args[1] = a2;
+ op->args[2] = a3;
+ op->args[3] = a4;
+ op->args[4] = a5;
+ op->args[5] = a6;
+}
+
+void tcg_gen_mb(TCGBar mb_type)
+{
+ if (tcg_ctx->tb_cflags & CF_PARALLEL) {
+ tcg_gen_op1(INDEX_op_mb, mb_type);
+ }
+}
+
+/* 32 bit ops */
+
+void tcg_gen_movi_i32(TCGv_i32 ret, int32_t arg)
+{
+ tcg_gen_mov_i32(ret, tcg_constant_i32(arg));
+}
+
+void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_add_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2)
+{
+ if (arg1 == 0 && TCG_TARGET_HAS_neg_i32) {
+ /* Don't recurse with tcg_gen_neg_i32. */
+ tcg_gen_op2_i32(INDEX_op_neg_i32, ret, arg2);
+ } else {
+ tcg_gen_sub_i32(ret, tcg_constant_i32(arg1), arg2);
+ }
+}
+
+void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i32(ret, 0);
+ return;
+ case -1:
+ tcg_gen_mov_i32(ret, arg1);
+ return;
+ case 0xff:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffff:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+ return;
+ }
+ break;
+ }
+
+ tcg_gen_and_i32(ret, arg1, tcg_constant_i32(arg2));
+}
+
+void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i32(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_or_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+ /* Don't recurse with tcg_gen_not_i32. */
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
+ } else {
+ tcg_gen_xor_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_shl_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_shr_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_sar_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ l->refs++;
+ tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l));
+ }
+}
+
+void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ tcg_gen_brcond_i32(cond, arg1, tcg_constant_i32(arg2), l);
+ }
+}
+
+void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i32(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i32(ret, 0);
+ } else {
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+ }
+}
+
+void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_gen_setcond_i32(cond, ret, arg1, tcg_constant_i32(arg2));
+}
+
+void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_movi_i32(ret, 0);
+ } else if (is_power_of_2(arg2)) {
+ tcg_gen_shli_i32(ret, arg1, ctz32(arg2));
+ } else {
+ tcg_gen_mul_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_div_i32) {
+ tcg_gen_op3_i32(INDEX_op_div_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_div_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i32) {
+ tcg_gen_op3_i32(INDEX_op_rem_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_div_i32, t0, arg1, arg2);
+ tcg_gen_mul_i32(t0, t0, arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t0, arg1, 31);
+ tcg_gen_op5_i32(INDEX_op_div2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_rem_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_divu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_div_i32) {
+ tcg_gen_op3_i32(INDEX_op_divu_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_divu_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_remu_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i32) {
+ tcg_gen_op3_i32(INDEX_op_remu_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_divu_i32, t0, arg1, arg2);
+ tcg_gen_mul_i32(t0, t0, arg2);
+ tcg_gen_sub_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ } else if (TCG_TARGET_HAS_div2_i32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, 0);
+ tcg_gen_op5_i32(INDEX_op_divu2_i32, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i32(t0);
+ } else {
+ gen_helper_remu_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_andc_i32) {
+ tcg_gen_op3_i32(INDEX_op_andc_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_eqv_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_eqv_i32) {
+ tcg_gen_op3_i32(INDEX_op_eqv_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_xor_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_nand_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_nand_i32) {
+ tcg_gen_op3_i32(INDEX_op_nand_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_and_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_nor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_nor_i32) {
+ tcg_gen_op3_i32(INDEX_op_nor_i32, ret, arg1, arg2);
+ } else {
+ tcg_gen_or_i32(ret, arg1, arg2);
+ tcg_gen_not_i32(ret, ret);
+ }
+}
+
+void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_orc_i32) {
+ tcg_gen_op3_i32(INDEX_op_orc_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_not_i32(t0, arg2);
+ tcg_gen_or_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_clz_i32) {
+ tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_clz_i64) {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t1, arg1);
+ tcg_gen_extu_i32_i64(t2, arg2);
+ tcg_gen_addi_i64(t2, t2, 32);
+ tcg_gen_clz_i64(t1, t1, t2);
+ tcg_gen_extrl_i64_i32(ret, t1);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_gen_subi_i32(ret, ret, 32);
+ } else {
+ gen_helper_clz_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+ tcg_gen_clz_i32(ret, arg1, tcg_constant_i32(arg2));
+}
+
+void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_ctz_i32) {
+ tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_ctz_i64) {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t1, arg1);
+ tcg_gen_extu_i32_i64(t2, arg2);
+ tcg_gen_ctz_i64(t1, t1, t2);
+ tcg_gen_extrl_i64_i32(ret, t1);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ } else if (TCG_TARGET_HAS_ctpop_i32
+ || TCG_TARGET_HAS_ctpop_i64
+ || TCG_TARGET_HAS_clz_i32
+ || TCG_TARGET_HAS_clz_i64) {
+ TCGv_i32 z, t = tcg_temp_new_i32();
+
+ if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(t, t);
+ } else {
+ /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */
+ tcg_gen_neg_i32(t, arg1);
+ tcg_gen_and_i32(t, t, arg1);
+ tcg_gen_clzi_i32(t, t, 32);
+ tcg_gen_xori_i32(t, t, 31);
+ }
+ z = tcg_constant_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i32(t);
+ } else {
+ gen_helper_ctz_i32(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
+{
+ if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(ret, t);
+ tcg_temp_free_i32(t);
+ } else {
+ tcg_gen_ctz_i32(ret, arg1, tcg_constant_i32(arg2));
+ }
+}
+
+void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_clz_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t, arg, 31);
+ tcg_gen_xor_i32(t, t, arg);
+ tcg_gen_clzi_i32(t, t, 32);
+ tcg_gen_subi_i32(ret, t, 1);
+ tcg_temp_free_i32(t);
+ } else {
+ gen_helper_clrsb_i32(ret, arg);
+ }
+}
+
+void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
+{
+ if (TCG_TARGET_HAS_ctpop_i32) {
+ tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
+ } else if (TCG_TARGET_HAS_ctpop_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t, arg1);
+ tcg_gen_ctpop_i64(t, t);
+ tcg_gen_extrl_i64_i32(ret, t);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_helper_ctpop_i32(ret, arg1);
+ }
+}
+
+void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i32) {
+ tcg_gen_op3_i32(INDEX_op_rotl_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shl_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shr_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else if (TCG_TARGET_HAS_rot_i32) {
+ tcg_gen_rotl_i32(ret, arg1, tcg_constant_i32(arg2));
+ } else {
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shli_i32(t0, arg1, arg2);
+ tcg_gen_shri_i32(t1, arg1, 32 - arg2);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i32) {
+ tcg_gen_op3_i32(INDEX_op_rotr_i32, ret, arg1, arg2);
+ } else {
+ TCGv_i32 t0, t1;
+
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+ tcg_gen_shr_i32(t0, arg1, arg2);
+ tcg_gen_subfi_i32(t1, 32, arg2);
+ tcg_gen_shl_i32(t1, arg1, t1);
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 32);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else {
+ tcg_gen_rotli_i32(ret, arg1, 32 - arg2);
+ }
+}
+
+void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
+ unsigned int ofs, unsigned int len)
+{
+ uint32_t mask;
+ TCGv_i32 t1;
+
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ if (len == 32) {
+ tcg_gen_mov_i32(ret, arg2);
+ return;
+ }
+ if (TCG_TARGET_HAS_deposit_i32 && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+ tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len);
+ return;
+ }
+
+ t1 = tcg_temp_new_i32();
+
+ if (TCG_TARGET_HAS_extract2_i32) {
+ if (ofs + len == 32) {
+ tcg_gen_shli_i32(t1, arg1, len);
+ tcg_gen_extract2_i32(ret, t1, arg2, len);
+ goto done;
+ }
+ if (ofs == 0) {
+ tcg_gen_extract2_i32(ret, arg1, arg2, len);
+ tcg_gen_rotli_i32(ret, ret, len);
+ goto done;
+ }
+ }
+
+ mask = (1u << len) - 1;
+ if (ofs + len < 32) {
+ tcg_gen_andi_i32(t1, arg2, mask);
+ tcg_gen_shli_i32(t1, t1, ofs);
+ } else {
+ tcg_gen_shli_i32(t1, arg2, ofs);
+ }
+ tcg_gen_andi_i32(ret, arg1, ~(mask << ofs));
+ tcg_gen_or_i32(ret, ret, t1);
+ done:
+ tcg_temp_free_i32(t1);
+}
+
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ if (ofs + len == 32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ } else if (ofs == 0) {
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ } else if (TCG_TARGET_HAS_deposit_i32
+ && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+ TCGv_i32 zero = tcg_constant_i32(0);
+ tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
+ } else {
+ /* To help two-operand hosts we prefer to zero-extend first,
+ which allows ARG to stay live. */
+ switch (len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_ext16u_i32(ret, arg);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_ext8u_i32(ret, arg);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ /* Otherwise prefer zero-extension over AND for code size. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ tcg_gen_ext16u_i32(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_shli_i32(ret, arg, ofs);
+ tcg_gen_ext8u_i32(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ tcg_gen_shli_i32(ret, ret, ofs);
+ }
+}
+
+void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 32) {
+ tcg_gen_shri_i32(ret, arg, 32 - len);
+ return;
+ }
+ if (ofs == 0) {
+ tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+ return;
+ }
+
+ if (TCG_TARGET_HAS_extract_i32
+ && TCG_TARGET_extract_i32_valid(ofs, len)) {
+ tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that zero-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_ext16u_i32(ret, arg);
+ tcg_gen_shri_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_ext8u_i32(ret, arg);
+ tcg_gen_shri_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+
+ /* ??? Ideally we'd know what values are available for immediate AND.
+ Assume that 8 bits are available, plus the special case of 16,
+ so that we get ext8u, ext16u. */
+ switch (len) {
+ case 1 ... 8: case 16:
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_andi_i32(ret, ret, (1u << len) - 1);
+ break;
+ default:
+ tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+ tcg_gen_shri_i32(ret, ret, 32 - len);
+ break;
+ }
+}
+
+void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 32) {
+ tcg_gen_sari_i32(ret, arg, 32 - len);
+ return;
+ }
+ if (ofs == 0) {
+ switch (len) {
+ case 16:
+ tcg_gen_ext16s_i32(ret, arg);
+ return;
+ case 8:
+ tcg_gen_ext8s_i32(ret, arg);
+ return;
+ }
+ }
+
+ if (TCG_TARGET_HAS_sextract_i32
+ && TCG_TARGET_extract_i32_valid(ofs, len)) {
+ tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that sign-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_ext16s_i32(ret, arg);
+ tcg_gen_sari_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_ext8s_i32(ret, arg);
+ tcg_gen_sari_i32(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ switch (len) {
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_ext16s_i32(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_shri_i32(ret, arg, ofs);
+ tcg_gen_ext8s_i32(ret, ret);
+ return;
+ }
+ break;
+ }
+
+ tcg_gen_shli_i32(ret, arg, 32 - len - ofs);
+ tcg_gen_sari_i32(ret, ret, 32 - len);
+}
+
+/*
+ * Extract 32-bits from a 64-bit input, ah:al, starting from ofs.
+ * Unlike tcg_gen_extract_i32 above, len is fixed at 32.
+ */
+void tcg_gen_extract2_i32(TCGv_i32 ret, TCGv_i32 al, TCGv_i32 ah,
+ unsigned int ofs)
+{
+ tcg_debug_assert(ofs <= 32);
+ if (ofs == 0) {
+ tcg_gen_mov_i32(ret, al);
+ } else if (ofs == 32) {
+ tcg_gen_mov_i32(ret, ah);
+ } else if (al == ah) {
+ tcg_gen_rotri_i32(ret, al, ofs);
+ } else if (TCG_TARGET_HAS_extract2_i32) {
+ tcg_gen_op4i_i32(INDEX_op_extract2_i32, ret, al, ah, ofs);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t0, al, ofs);
+ tcg_gen_deposit_i32(ret, t0, ah, 32 - ofs, ofs);
+ tcg_temp_free_i32(t0);
+ }
+}
+
+void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1,
+ TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_mov_i32(ret, v1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_mov_i32(ret, v2);
+ } else if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(cond, t0, c1, c2);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_and_i32(t1, v1, t0);
+ tcg_gen_andc_i32(ret, v2, t0);
+ tcg_gen_or_i32(ret, ret, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_add2_i32) {
+ tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_add_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
+ TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i32) {
+ tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(t0, al, ah);
+ tcg_gen_concat_i32_i64(t1, bl, bh);
+ tcg_gen_sub_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i32) {
+ tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_muluh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(t0, arg1);
+ tcg_gen_extu_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i32) {
+ tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_mulsh_i32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
+ tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
+ tcg_gen_mov_i32(rl, t);
+ tcg_temp_free_i32(t);
+ } else if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv_i32 t3 = tcg_temp_new_i32();
+ tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i32(t2, arg1, 31);
+ tcg_gen_sari_i32(t3, arg2, 31);
+ tcg_gen_and_i32(t2, t2, arg2);
+ tcg_gen_and_i32(t3, t3, arg1);
+ tcg_gen_sub_i32(rh, t1, t2);
+ tcg_gen_sub_i32(rh, rh, t3);
+ tcg_gen_mov_i32(rl, t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ tcg_temp_free_i32(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, arg1);
+ tcg_gen_ext_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+ /* Adjust for negative input for the signed arg1. */
+ tcg_gen_sari_i32(t2, arg1, 31);
+ tcg_gen_and_i32(t2, t2, arg2);
+ tcg_gen_sub_i32(rh, t1, t2);
+ tcg_gen_mov_i32(rl, t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(t0, arg1);
+ tcg_gen_extu_i32_i64(t1, arg2);
+ tcg_gen_mul_i64(t0, t0, t1);
+ tcg_gen_extr_i64_i32(rl, rh, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext8s_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8s_i32, ret, arg);
+ } else {
+ tcg_gen_shli_i32(ret, arg, 24);
+ tcg_gen_sari_i32(ret, ret, 24);
+ }
+}
+
+void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext16s_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16s_i32, ret, arg);
+ } else {
+ tcg_gen_shli_i32(ret, arg, 16);
+ tcg_gen_sari_i32(ret, ret, 16);
+ }
+}
+
+void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg);
+ } else {
+ tcg_gen_andi_i32(ret, arg, 0xffu);
+ }
+}
+
+void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg);
+ } else {
+ tcg_gen_andi_i32(ret, arg, 0xffffu);
+ }
+}
+
+void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg, int flags)
+{
+ /* Only one extension flag may be present. */
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
+ if (TCG_TARGET_HAS_bswap16_i32) {
+ tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, flags);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ tcg_gen_shri_i32(t0, arg, 8);
+ if (!(flags & TCG_BSWAP_IZ)) {
+ tcg_gen_ext8u_i32(t0, t0);
+ }
+
+ if (flags & TCG_BSWAP_OS) {
+ tcg_gen_shli_i32(t1, arg, 24);
+ tcg_gen_sari_i32(t1, t1, 16);
+ } else if (flags & TCG_BSWAP_OZ) {
+ tcg_gen_ext8u_i32(t1, arg);
+ tcg_gen_shli_i32(t1, t1, 8);
+ } else {
+ tcg_gen_shli_i32(t1, arg, 8);
+ }
+
+ tcg_gen_or_i32(ret, t0, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_HAS_bswap32_i32) {
+ tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_constant_i32(0x00ff00ff);
+
+ /* arg = abcd */
+ tcg_gen_shri_i32(t0, arg, 8); /* t0 = .abc */
+ tcg_gen_and_i32(t1, arg, t2); /* t1 = .b.d */
+ tcg_gen_and_i32(t0, t0, t2); /* t0 = .a.c */
+ tcg_gen_shli_i32(t1, t1, 8); /* t1 = b.d. */
+ tcg_gen_or_i32(ret, t0, t1); /* ret = badc */
+
+ tcg_gen_shri_i32(t0, ret, 16); /* t0 = ..ba */
+ tcg_gen_shli_i32(t1, ret, 16); /* t1 = dc.. */
+ tcg_gen_or_i32(ret, t0, t1); /* ret = dcba */
+
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a);
+}
+
+void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sari_i32(t, a, 31);
+ tcg_gen_xor_i32(ret, a, t);
+ tcg_gen_sub_i32(ret, ret, t);
+ tcg_temp_free_i32(t);
+}
+
+/* 64-bit ops */
+
+#if TCG_TARGET_REG_BITS == 32
+/* These are all inline for TCG_TARGET_REG_BITS == 64. */
+
+void tcg_gen_discard_i64(TCGv_i64 arg)
+{
+ tcg_gen_discard_i32(TCGV_LOW(arg));
+ tcg_gen_discard_i32(TCGV_HIGH(arg));
+}
+
+void tcg_gen_mov_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ TCGTemp *ts = tcgv_i64_temp(arg);
+
+ /* Canonicalize TCGv_i64 TEMP_CONST into TCGv_i32 TEMP_CONST. */
+ if (ts->kind == TEMP_CONST) {
+ tcg_gen_movi_i64(ret, ts->val);
+ } else {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ }
+}
+
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_movi_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), arg >> 32);
+}
+
+void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld8u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld16u_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld16s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld16s_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld32u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+}
+
+void tcg_gen_ld32s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+}
+
+void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
+{
+ /* Since arg2 and ret have different types,
+ they cannot be the same temporary */
+#ifdef HOST_WORDS_BIGENDIAN
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4);
+#else
+ tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset);
+ tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4);
+#else
+ tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset);
+ tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset + 4);
+#endif
+}
+
+void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_and_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_and_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_xor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_xor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+}
+
+void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_shl_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_shr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_shr_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_sar_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ gen_helper_sar_i64(ret, arg1, arg2);
+}
+
+void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0;
+ TCGv_i32 t1;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_mulu2_i32(TCGV_LOW(t0), TCGV_HIGH(t0),
+ TCGV_LOW(arg1), TCGV_LOW(arg2));
+
+ tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+ tcg_gen_mul_i32(t1, TCGV_HIGH(arg1), TCGV_LOW(arg2));
+ tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
+
+ tcg_gen_mov_i64(ret, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i32(t1);
+}
+
+#else
+
+void tcg_gen_movi_i64(TCGv_i64 ret, int64_t arg)
+{
+ tcg_gen_mov_i64(ret, tcg_constant_i64(arg));
+}
+
+#endif /* TCG_TARGET_REG_SIZE == 32 */
+
+void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ tcg_gen_add_i64(ret, arg1, tcg_constant_i64(arg2));
+ } else {
+ tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+ }
+}
+
+void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2)
+{
+ if (arg1 == 0 && TCG_TARGET_HAS_neg_i64) {
+ /* Don't recurse with tcg_gen_neg_i64. */
+ tcg_gen_op2_i64(INDEX_op_neg_i64, ret, arg2);
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ tcg_gen_sub_i64(ret, tcg_constant_i64(arg1), arg2);
+ } else {
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+ tcg_constant_i32(arg1), tcg_constant_i32(arg1 >> 32),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2));
+ }
+}
+
+void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+ } else {
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+ }
+}
+
+void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_andi_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_andi_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i64(ret, 0);
+ return;
+ case -1:
+ tcg_gen_mov_i64(ret, arg1);
+ return;
+ case 0xff:
+ /* Don't recurse with tcg_gen_ext8u_i64. */
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffff:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffffffu:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+ return;
+ }
+ break;
+ }
+
+ tcg_gen_and_i64(ret, arg1, tcg_constant_i64(arg2));
+}
+
+void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_ori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i64(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_or_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
+}
+
+void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_xori_i32(TCGV_LOW(ret), TCGV_LOW(arg1), arg2);
+ tcg_gen_xori_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), arg2 >> 32);
+ return;
+ }
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+ /* Don't recurse with tcg_gen_not_i64. */
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+ } else {
+ tcg_gen_xor_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
+}
+
+static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
+ unsigned c, bool right, bool arith)
+{
+ tcg_debug_assert(c < 64);
+ if (c == 0) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ } else if (c >= 32) {
+ c -= 32;
+ if (right) {
+ if (arith) {
+ tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
+ } else {
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+ } else {
+ tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ }
+ } else if (right) {
+ if (TCG_TARGET_HAS_extract2_i32) {
+ tcg_gen_extract2_i32(TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), c);
+ } else {
+ tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
+ tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(ret),
+ TCGV_HIGH(arg1), 32 - c, c);
+ }
+ if (arith) {
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
+ } else {
+ tcg_gen_shri_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
+ }
+ } else {
+ if (TCG_TARGET_HAS_extract2_i32) {
+ tcg_gen_extract2_i32(TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), 32 - c);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
+ tcg_gen_deposit_i32(TCGV_HIGH(ret), t0,
+ TCGV_HIGH(arg1), c, 32 - c);
+ tcg_temp_free_i32(t0);
+ }
+ tcg_gen_shli_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
+ }
+}
+
+void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_shl_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
+}
+
+void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_shr_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
+}
+
+void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_sar_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
+}
+
+void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ l->refs++;
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1),
+ TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2), cond, label_arg(l));
+ } else {
+ tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond,
+ label_arg(l));
+ }
+ }
+}
+
+void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l)
+{
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_gen_brcond_i64(cond, arg1, tcg_constant_i64(arg2), l);
+ } else if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(l);
+ } else if (cond != TCG_COND_NEVER) {
+ l->refs++;
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2),
+ tcg_constant_i32(arg2 >> 32),
+ cond, label_arg(l));
+ }
+}
+
+void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i64(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i64(ret, 0);
+ } else {
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+ }
+ }
+}
+
+void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 arg1, int64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_gen_setcond_i64(cond, ret, arg1, tcg_constant_i64(arg2));
+ } else if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i64(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i64(ret, 0);
+ } else {
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2),
+ tcg_constant_i32(arg2 >> 32), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+}
+
+void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ if (arg2 == 0) {
+ tcg_gen_movi_i64(ret, 0);
+ } else if (is_power_of_2(arg2)) {
+ tcg_gen_shli_i64(ret, arg1, ctz64(arg2));
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_mul_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_div_i64) {
+ tcg_gen_op3_i64(INDEX_op_div_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_div_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i64) {
+ tcg_gen_op3_i64(INDEX_op_rem_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_div_i64, t0, arg1, arg2);
+ tcg_gen_mul_i64(t0, t0, arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t0, arg1, 63);
+ tcg_gen_op5_i64(INDEX_op_div2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_rem_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_divu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_div_i64) {
+ tcg_gen_op3_i64(INDEX_op_divu_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, ret, t0, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_divu_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_remu_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rem_i64) {
+ tcg_gen_op3_i64(INDEX_op_remu_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_div_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_divu_i64, t0, arg1, arg2);
+ tcg_gen_mul_i64(t0, t0, arg2);
+ tcg_gen_sub_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ } else if (TCG_TARGET_HAS_div2_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, 0);
+ tcg_gen_op5_i64(INDEX_op_divu2_i64, t0, ret, arg1, t0, arg2);
+ tcg_temp_free_i64(t0);
+ } else {
+ gen_helper_remu_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext8s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 56);
+ tcg_gen_sari_i64(ret, ret, 56);
+ }
+}
+
+void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext16s_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 48);
+ tcg_gen_sari_i64(ret, ret, 48);
+ }
+}
+
+void tcg_gen_ext32s_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32s_i64, ret, arg);
+ } else {
+ tcg_gen_shli_i64(ret, arg, 32);
+ tcg_gen_sari_i64(ret, ret, 32);
+ }
+}
+
+void tcg_gen_ext8u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext8u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffu);
+ }
+}
+
+void tcg_gen_ext16u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_ext16u_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffffu);
+ }
+}
+
+void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg);
+ } else {
+ tcg_gen_andi_i64(ret, arg, 0xffffffffu);
+ }
+}
+
+void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
+{
+ /* Only one extension flag may be present. */
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg), flags);
+ if (flags & TCG_BSWAP_OS) {
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else {
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+ } else if (TCG_TARGET_HAS_bswap16_i64) {
+ tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, flags);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t0, arg, 8);
+ if (!(flags & TCG_BSWAP_IZ)) {
+ tcg_gen_ext8u_i64(t0, t0);
+ }
+
+ if (flags & TCG_BSWAP_OS) {
+ tcg_gen_shli_i64(t1, arg, 56);
+ tcg_gen_sari_i64(t1, t1, 48);
+ } else if (flags & TCG_BSWAP_OZ) {
+ tcg_gen_ext8u_i64(t1, arg);
+ tcg_gen_shli_i64(t1, t1, 8);
+ } else {
+ tcg_gen_shli_i64(t1, arg, 8);
+ }
+
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg, int flags)
+{
+ /* Only one extension flag may be present. */
+ tcg_debug_assert(!(flags & TCG_BSWAP_OS) || !(flags & TCG_BSWAP_OZ));
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ if (flags & TCG_BSWAP_OS) {
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else {
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ }
+ } else if (TCG_TARGET_HAS_bswap32_i64) {
+ tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, flags);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_constant_i64(0x00ff00ff);
+
+ /* arg = xxxxabcd */
+ tcg_gen_shri_i64(t0, arg, 8); /* t0 = .xxxxabc */
+ tcg_gen_and_i64(t1, arg, t2); /* t1 = .....b.d */
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = .....a.c */
+ tcg_gen_shli_i64(t1, t1, 8); /* t1 = ....b.d. */
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ....badc */
+
+ tcg_gen_shli_i64(t1, ret, 48); /* t1 = dc...... */
+ tcg_gen_shri_i64(t0, ret, 16); /* t0 = ......ba */
+ if (flags & TCG_BSWAP_OS) {
+ tcg_gen_sari_i64(t1, t1, 32); /* t1 = ssssdc.. */
+ } else {
+ tcg_gen_shri_i64(t1, t1, 32); /* t1 = ....dc.. */
+ }
+ tcg_gen_or_i64(ret, t0, t1); /* ret = ssssdcba */
+
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0, t1;
+ t0 = tcg_temp_new_i32();
+ t1 = tcg_temp_new_i32();
+
+ tcg_gen_bswap32_i32(t0, TCGV_LOW(arg));
+ tcg_gen_bswap32_i32(t1, TCGV_HIGH(arg));
+ tcg_gen_mov_i32(TCGV_LOW(ret), t1);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t0);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else if (TCG_TARGET_HAS_bswap64_i64) {
+ tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ /* arg = abcdefgh */
+ tcg_gen_movi_i64(t2, 0x00ff00ff00ff00ffull);
+ tcg_gen_shri_i64(t0, arg, 8); /* t0 = .abcdefg */
+ tcg_gen_and_i64(t1, arg, t2); /* t1 = .b.d.f.h */
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = .a.c.e.g */
+ tcg_gen_shli_i64(t1, t1, 8); /* t1 = b.d.f.h. */
+ tcg_gen_or_i64(ret, t0, t1); /* ret = badcfehg */
+
+ tcg_gen_movi_i64(t2, 0x0000ffff0000ffffull);
+ tcg_gen_shri_i64(t0, ret, 16); /* t0 = ..badcfe */
+ tcg_gen_and_i64(t1, ret, t2); /* t1 = ..dc..hg */
+ tcg_gen_and_i64(t0, t0, t2); /* t0 = ..ba..fe */
+ tcg_gen_shli_i64(t1, t1, 16); /* t1 = dc..hg.. */
+ tcg_gen_or_i64(ret, t0, t1); /* ret = dcbahgfe */
+
+ tcg_gen_shri_i64(t0, ret, 32); /* t0 = ....dcba */
+ tcg_gen_shli_i64(t1, ret, 32); /* t1 = hgfe.... */
+ tcg_gen_or_i64(ret, t0, t1); /* ret = hgfedcba */
+
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ }
+}
+
+void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_not_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_not_i32(TCGV_HIGH(ret), TCGV_HIGH(arg));
+ } else if (TCG_TARGET_HAS_not_i64) {
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg);
+ } else {
+ tcg_gen_xori_i64(ret, arg, -1);
+ }
+}
+
+void tcg_gen_andc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_andc_i64) {
+ tcg_gen_op3_i64(INDEX_op_andc_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_and_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_eqv_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_eqv_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_eqv_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_eqv_i64) {
+ tcg_gen_op3_i64(INDEX_op_eqv_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_xor_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_nand_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_nand_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nand_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_nand_i64) {
+ tcg_gen_op3_i64(INDEX_op_nand_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_and_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_nor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_nor_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_nor_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_nor_i64) {
+ tcg_gen_op3_i64(INDEX_op_nor_i64, ret, arg1, arg2);
+ } else {
+ tcg_gen_or_i64(ret, arg1, arg2);
+ tcg_gen_not_i64(ret, ret);
+ }
+}
+
+void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_orc_i32(TCGV_LOW(ret), TCGV_LOW(arg1), TCGV_LOW(arg2));
+ tcg_gen_orc_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), TCGV_HIGH(arg2));
+ } else if (TCG_TARGET_HAS_orc_i64) {
+ tcg_gen_op3_i64(INDEX_op_orc_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_not_i64(t0, arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_clz_i64) {
+ tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2);
+ } else {
+ gen_helper_clz_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32
+ && TCG_TARGET_HAS_clz_i32
+ && arg2 <= 0xffffffffu) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_clzi_i32(t, TCGV_LOW(arg1), arg2 - 32);
+ tcg_gen_addi_i32(t, t, 32);
+ tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_clz_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_ctz_i64) {
+ tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
+ } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) {
+ TCGv_i64 z, t = tcg_temp_new_i64();
+
+ if (TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(t, t);
+ } else {
+ /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */
+ tcg_gen_neg_i64(t, arg1);
+ tcg_gen_and_i64(t, t, arg1);
+ tcg_gen_clzi_i64(t, t, 64);
+ tcg_gen_xori_i64(t, t, 63);
+ }
+ z = tcg_constant_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i64(t);
+ tcg_temp_free_i64(z);
+ } else {
+ gen_helper_ctz_i64(ret, arg1, arg2);
+ }
+}
+
+void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
+{
+ if (TCG_TARGET_REG_BITS == 32
+ && TCG_TARGET_HAS_ctz_i32
+ && arg2 <= 0xffffffffu) {
+ TCGv_i32 t32 = tcg_temp_new_i32();
+ tcg_gen_ctzi_i32(t32, TCGV_HIGH(arg1), arg2 - 32);
+ tcg_gen_addi_i32(t32, t32, 32);
+ tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ tcg_temp_free_i32(t32);
+ } else if (!TCG_TARGET_HAS_ctz_i64
+ && TCG_TARGET_HAS_ctpop_i64
+ && arg2 == 64) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(ret, t);
+ tcg_temp_free_i64(t);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_ctz_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t, arg, 63);
+ tcg_gen_xor_i64(t, t, arg);
+ tcg_gen_clzi_i64(t, t, 64);
+ tcg_gen_subi_i64(ret, t, 1);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_helper_clrsb_i64(ret, arg);
+ }
+}
+
+void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
+{
+ if (TCG_TARGET_HAS_ctpop_i64) {
+ tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
+ } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
+ tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ gen_helper_ctpop_i64(ret, arg1);
+ }
+}
+
+void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i64) {
+ tcg_gen_op3_i64(INDEX_op_rotl_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shl_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shr_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (TCG_TARGET_HAS_rot_i64) {
+ tcg_gen_rotl_i64(ret, arg1, tcg_constant_i64(arg2));
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shli_i64(t0, arg1, arg2);
+ tcg_gen_shri_i64(t1, arg1, 64 - arg2);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_rot_i64) {
+ tcg_gen_op3_i64(INDEX_op_rotr_i64, ret, arg1, arg2);
+ } else {
+ TCGv_i64 t0, t1;
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_shr_i64(t0, arg1, arg2);
+ tcg_gen_subfi_i64(t1, 64, arg2);
+ tcg_gen_shl_i64(t1, arg1, t1);
+ tcg_gen_or_i64(ret, t0, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+{
+ tcg_debug_assert(arg2 >= 0 && arg2 < 64);
+ /* some cases can be optimized here */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ tcg_gen_rotli_i64(ret, arg1, 64 - arg2);
+ }
+}
+
+void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
+ unsigned int ofs, unsigned int len)
+{
+ uint64_t mask;
+ TCGv_i64 t1;
+
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ if (len == 64) {
+ tcg_gen_mov_i64(ret, arg2);
+ return;
+ }
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+ tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ if (ofs >= 32) {
+ tcg_gen_deposit_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), ofs - 32, len);
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_deposit_i32(TCGV_LOW(ret), TCGV_LOW(arg1),
+ TCGV_LOW(arg2), ofs, len);
+ tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ return;
+ }
+ }
+
+ t1 = tcg_temp_new_i64();
+
+ if (TCG_TARGET_HAS_extract2_i64) {
+ if (ofs + len == 64) {
+ tcg_gen_shli_i64(t1, arg1, len);
+ tcg_gen_extract2_i64(ret, t1, arg2, len);
+ goto done;
+ }
+ if (ofs == 0) {
+ tcg_gen_extract2_i64(ret, arg1, arg2, len);
+ tcg_gen_rotli_i64(ret, ret, len);
+ goto done;
+ }
+ }
+
+ mask = (1ull << len) - 1;
+ if (ofs + len < 64) {
+ tcg_gen_andi_i64(t1, arg2, mask);
+ tcg_gen_shli_i64(t1, t1, ofs);
+ } else {
+ tcg_gen_shli_i64(t1, arg2, ofs);
+ }
+ tcg_gen_andi_i64(ret, arg1, ~(mask << ofs));
+ tcg_gen_or_i64(ret, ret, t1);
+ done:
+ tcg_temp_free_i64(t1);
+}
+
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ if (ofs + len == 64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ } else if (ofs == 0) {
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ } else if (TCG_TARGET_HAS_deposit_i64
+ && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+ TCGv_i64 zero = tcg_constant_i64(0);
+ tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
+ } else {
+ if (TCG_TARGET_REG_BITS == 32) {
+ if (ofs >= 32) {
+ tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
+ ofs - 32, len);
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ }
+ /* To help two-operand hosts we prefer to zero-extend first,
+ which allows ARG to stay live. */
+ switch (len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_ext32u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_ext16u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_ext8u_i64(ret, arg);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ /* Otherwise prefer zero-extension over AND for code size. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext32u_i64(ret, ret);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext16u_i64(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_shli_i64(ret, arg, ofs);
+ tcg_gen_ext8u_i64(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ tcg_gen_shli_i64(ret, ret, ofs);
+ }
+}
+
+void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ /* Canonicalize certain special cases, even if extract is supported. */
+ if (ofs + len == 64) {
+ tcg_gen_shri_i64(ret, arg, 64 - len);
+ return;
+ }
+ if (ofs == 0) {
+ tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+ return;
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ /* Look for a 32-bit extract within one of the two words. */
+ if (ofs >= 32) {
+ tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ if (ofs + len <= 32) {
+ tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
+ /* The field is split across two words. One double-word
+ shift is better than two double-word shifts. */
+ goto do_shift_and;
+ }
+
+ if (TCG_TARGET_HAS_extract_i64
+ && TCG_TARGET_extract_i64_valid(ofs, len)) {
+ tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that zero-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_ext32u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_ext16u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_ext8u_i64(ret, arg);
+ tcg_gen_shri_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+
+ /* ??? Ideally we'd know what values are available for immediate AND.
+ Assume that 8 bits are available, plus the special cases of 16 and 32,
+ so that we get ext8u, ext16u, and ext32u. */
+ switch (len) {
+ case 1 ... 8: case 16: case 32:
+ do_shift_and:
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_andi_i64(ret, ret, (1ull << len) - 1);
+ break;
+ default:
+ tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+ tcg_gen_shri_i64(ret, ret, 64 - len);
+ break;
+ }
+}
+
+void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
+ unsigned int ofs, unsigned int len)
+{
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len > 0);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
+ /* Canonicalize certain special cases, even if sextract is supported. */
+ if (ofs + len == 64) {
+ tcg_gen_sari_i64(ret, arg, 64 - len);
+ return;
+ }
+ if (ofs == 0) {
+ switch (len) {
+ case 32:
+ tcg_gen_ext32s_i64(ret, arg);
+ return;
+ case 16:
+ tcg_gen_ext16s_i64(ret, arg);
+ return;
+ case 8:
+ tcg_gen_ext8s_i64(ret, arg);
+ return;
+ }
+ }
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ /* Look for a 32-bit extract within one of the two words. */
+ if (ofs >= 32) {
+ tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len);
+ } else if (ofs + len <= 32) {
+ tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+ } else if (ofs == 0) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg));
+ tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32);
+ return;
+ } else if (len > 32) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ /* Extract the bits for the high word normally. */
+ tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32);
+ /* Shift the field down for the low part. */
+ tcg_gen_shri_i64(ret, arg, ofs);
+ /* Overwrite the shift into the high part. */
+ tcg_gen_mov_i32(TCGV_HIGH(ret), t);
+ tcg_temp_free_i32(t);
+ return;
+ } else {
+ /* Shift the field down for the low part, such that the
+ field sits at the MSB. */
+ tcg_gen_shri_i64(ret, arg, ofs + len - 32);
+ /* Shift the field down from the MSB, sign extending. */
+ tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len);
+ }
+ /* Sign-extend the field from 32 bits. */
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ return;
+ }
+
+ if (TCG_TARGET_HAS_sextract_i64
+ && TCG_TARGET_extract_i64_valid(ofs, len)) {
+ tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
+ return;
+ }
+
+ /* Assume that sign-extension, if available, is cheaper than a shift. */
+ switch (ofs + len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_ext32s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_ext16s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_ext8s_i64(ret, arg);
+ tcg_gen_sari_i64(ret, ret, ofs);
+ return;
+ }
+ break;
+ }
+ switch (len) {
+ case 32:
+ if (TCG_TARGET_HAS_ext32s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext32s_i64(ret, ret);
+ return;
+ }
+ break;
+ case 16:
+ if (TCG_TARGET_HAS_ext16s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext16s_i64(ret, ret);
+ return;
+ }
+ break;
+ case 8:
+ if (TCG_TARGET_HAS_ext8s_i64) {
+ tcg_gen_shri_i64(ret, arg, ofs);
+ tcg_gen_ext8s_i64(ret, ret);
+ return;
+ }
+ break;
+ }
+ tcg_gen_shli_i64(ret, arg, 64 - len - ofs);
+ tcg_gen_sari_i64(ret, ret, 64 - len);
+}
+
+/*
+ * Extract 64 bits from a 128-bit input, ah:al, starting from ofs.
+ * Unlike tcg_gen_extract_i64 above, len is fixed at 64.
+ */
+void tcg_gen_extract2_i64(TCGv_i64 ret, TCGv_i64 al, TCGv_i64 ah,
+ unsigned int ofs)
+{
+ tcg_debug_assert(ofs <= 64);
+ if (ofs == 0) {
+ tcg_gen_mov_i64(ret, al);
+ } else if (ofs == 64) {
+ tcg_gen_mov_i64(ret, ah);
+ } else if (al == ah) {
+ tcg_gen_rotri_i64(ret, al, ofs);
+ } else if (TCG_TARGET_HAS_extract2_i64) {
+ tcg_gen_op4i_i64(INDEX_op_extract2_i64, ret, al, ah, ofs);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t0, al, ofs);
+ tcg_gen_deposit_i64(ret, t0, ah, 64 - ofs, ofs);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1,
+ TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2)
+{
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_mov_i64(ret, v1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_mov_i64(ret, v2);
+ } else if (TCG_TARGET_REG_BITS == 32) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(c1), TCGV_HIGH(c1),
+ TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_movi_i32(t1, 0);
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+ TCGV_LOW(v1), TCGV_LOW(v2));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+ TCGV_HIGH(v1), TCGV_HIGH(v2));
+ } else {
+ tcg_gen_neg_i32(t0, t0);
+
+ tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+ tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else if (TCG_TARGET_HAS_movcond_i64) {
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_setcond_i64(cond, t0, c1, c2);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_and_i64(t1, v1, t0);
+ tcg_gen_andc_i64(ret, v2, t0);
+ tcg_gen_or_i64(ret, ret, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_add2_i64) {
+ tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_add_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, t0, al);
+ tcg_gen_add_i64(rh, ah, bh);
+ tcg_gen_add_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
+ TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
+{
+ if (TCG_TARGET_HAS_sub2_i64) {
+ tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_sub_i64(t0, al, bl);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t1, al, bl);
+ tcg_gen_sub_i64(rh, ah, bh);
+ tcg_gen_sub_i64(rh, rh, t1);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+}
+
+void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_mulu2_i64) {
+ tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_muluh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ gen_helper_muluh_i64(rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ if (TCG_TARGET_HAS_muls2_i64) {
+ tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
+ } else if (TCG_TARGET_HAS_mulsh_i64) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
+ tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t);
+ tcg_temp_free_i64(t);
+ } else if (TCG_TARGET_HAS_mulu2_i64 || TCG_TARGET_HAS_muluh_i64) {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ TCGv_i64 t3 = tcg_temp_new_i64();
+ tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+ /* Adjust for negative inputs. */
+ tcg_gen_sari_i64(t2, arg1, 63);
+ tcg_gen_sari_i64(t3, arg2, 63);
+ tcg_gen_and_i64(t2, t2, arg2);
+ tcg_gen_and_i64(t3, t3, arg1);
+ tcg_gen_sub_i64(rh, t1, t2);
+ tcg_gen_sub_i64(rh, rh, t3);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ tcg_temp_free_i64(t3);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t0, arg1, arg2);
+ gen_helper_mulsh_i64(rh, arg1, arg2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ }
+}
+
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+ tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+ /* Adjust for negative input for the signed arg1. */
+ tcg_gen_sari_i64(t2, arg1, 63);
+ tcg_gen_and_i64(t2, t2, arg2);
+ tcg_gen_sub_i64(rh, t1, t2);
+ tcg_gen_mov_i64(rl, t0);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b);
+}
+
+void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b);
+}
+
+void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a);
+}
+
+void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a);
+}
+
+void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sari_i64(t, a, 63);
+ tcg_gen_xor_i64(ret, a, t);
+ tcg_gen_sub_i64(ret, ret, t);
+ tcg_temp_free_i64(t);
+}
+
+/* Size changing operations. */
+
+void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+ } else if (TCG_TARGET_HAS_extrl_i64_i32) {
+ tcg_gen_op2(INDEX_op_extrl_i64_i32,
+ tcgv_i32_arg(ret), tcgv_i64_arg(arg));
+ } else {
+ tcg_gen_mov_i32(ret, (TCGv_i32)arg);
+ }
+}
+
+void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
+ } else if (TCG_TARGET_HAS_extrh_i64_i32) {
+ tcg_gen_op2(INDEX_op_extrh_i64_i32,
+ tcgv_i32_arg(ret), tcgv_i64_arg(arg));
+ } else {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t, arg, 32);
+ tcg_gen_mov_i32(ret, (TCGv_i32)t);
+ tcg_temp_free_i64(t);
+ }
+}
+
+void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ } else {
+ tcg_gen_op2(INDEX_op_extu_i32_i64,
+ tcgv_i64_arg(ret), tcgv_i32_arg(arg));
+ }
+}
+
+void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(ret), arg);
+ tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
+ } else {
+ tcg_gen_op2(INDEX_op_ext_i32_i64,
+ tcgv_i64_arg(ret), tcgv_i32_arg(arg));
+ }
+}
+
+void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
+{
+ TCGv_i64 tmp;
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(TCGV_LOW(dest), low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+ return;
+ }
+
+ tmp = tcg_temp_new_i64();
+ /* These extensions are only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_extu_i32_i64(dest, low);
+ /* If deposit is available, use it. Otherwise use the extra
+ knowledge that we have of the zero-extensions above. */
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+ tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+ } else {
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_or_i64(dest, dest, tmp);
+ }
+ tcg_temp_free_i64(tmp);
+}
+
+void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
+{
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_mov_i32(lo, TCGV_LOW(arg));
+ tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
+ } else {
+ tcg_gen_extrl_i64_i32(lo, arg);
+ tcg_gen_extrh_i64_i32(hi, arg);
+ }
+}
+
+void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg)
+{
+ tcg_gen_ext32u_i64(lo, arg);
+ tcg_gen_shri_i64(hi, arg, 32);
+}
+
+/* QEMU specific operations. */
+
+void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx)
+{
+ /*
+ * Let the jit code return the read-only version of the
+ * TranslationBlock, so that we minimize the pc-relative
+ * distance of the address of the exit_tb code to TB.
+ * This will improve utilization of pc-relative address loads.
+ *
+ * TODO: Move this to translator_loop, so that all const
+ * TranslationBlock pointers refer to read-only memory.
+ * This requires coordination with targets that do not use
+ * the translator_loop.
+ */
+ uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx;
+
+ if (tb == NULL) {
+ tcg_debug_assert(idx == 0);
+ } else if (idx <= TB_EXIT_IDXMAX) {
+#ifdef CONFIG_DEBUG_TCG
+ /* This is an exit following a goto_tb. Verify that we have
+ seen this numbered exit before, via tcg_gen_goto_tb. */
+ tcg_debug_assert(tcg_ctx->goto_tb_issue_mask & (1 << idx));
+#endif
+ } else {
+ /* This is an exit via the exitreq label. */
+ tcg_debug_assert(idx == TB_EXIT_REQUESTED);
+ }
+
+ plugin_gen_disable_mem_helpers();
+ tcg_gen_op1i(INDEX_op_exit_tb, val);
+}
+
+void tcg_gen_goto_tb(unsigned idx)
+{
+ /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */
+ tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB));
+ /* We only support two chained exits. */
+ tcg_debug_assert(idx <= TB_EXIT_IDXMAX);
+#ifdef CONFIG_DEBUG_TCG
+ /* Verify that we haven't seen this numbered exit before. */
+ tcg_debug_assert((tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0);
+ tcg_ctx->goto_tb_issue_mask |= 1 << idx;
+#endif
+ plugin_gen_disable_mem_helpers();
+ tcg_gen_op1i(INDEX_op_goto_tb, idx);
+}
+
+void tcg_gen_lookup_and_goto_ptr(void)
+{
+ TCGv_ptr ptr;
+
+ if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) {
+ tcg_gen_exit_tb(NULL, 0);
+ return;
+ }
+
+ plugin_gen_disable_mem_helpers();
+ ptr = tcg_temp_new_ptr();
+ gen_helper_lookup_tb_ptr(ptr, cpu_env);
+ tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
+ tcg_temp_free_ptr(ptr);
+}
+
+static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
+{
+ /* Trigger the asserts within as early as possible. */
+ unsigned a_bits = get_alignment_bits(op);
+
+ /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
+ if (a_bits == (op & MO_SIZE)) {
+ op = (op & ~MO_AMASK) | MO_ALIGN;
+ }
+
+ switch (op & MO_SIZE) {
+ case MO_8:
+ op &= ~MO_BSWAP;
+ break;
+ case MO_16:
+ break;
+ case MO_32:
+ if (!is64) {
+ op &= ~MO_SIGN;
+ }
+ break;
+ case MO_64:
+ if (is64) {
+ op &= ~MO_SIGN;
+ break;
+ }
+ /* fall through */
+ default:
+ g_assert_not_reached();
+ }
+ if (st) {
+ op &= ~MO_SIGN;
+ }
+ return op;
+}
+
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
+ MemOp memop, TCGArg idx)
+{
+ MemOpIdx oi = make_memop_idx(memop, idx);
+#if TARGET_LONG_BITS == 32
+ tcg_gen_op3i_i32(opc, val, addr, oi);
+#else
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
+ } else {
+ tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
+ }
+#endif
+}
+
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
+ MemOp memop, TCGArg idx)
+{
+ MemOpIdx oi = make_memop_idx(memop, idx);
+#if TARGET_LONG_BITS == 32
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
+ } else {
+ tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
+ }
+#else
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
+ } else {
+ tcg_gen_op3i_i64(opc, val, addr, oi);
+ }
+#endif
+}
+
+static void tcg_gen_req_mo(TCGBar type)
+{
+#ifdef TCG_GUEST_DEFAULT_MO
+ type &= TCG_GUEST_DEFAULT_MO;
+#endif
+ type &= ~TCG_TARGET_DEFAULT_MO;
+ if (type) {
+ tcg_gen_mb(type | TCG_BAR_SC);
+ }
+}
+
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
+{
+#ifdef CONFIG_PLUGIN
+ if (tcg_ctx->plugin_insn != NULL) {
+ /* Save a copy of the vaddr for use after a load. */
+ TCGv temp = tcg_temp_new();
+ tcg_gen_mov_tl(temp, vaddr);
+ return temp;
+ }
+#endif
+ return vaddr;
+}
+
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
+ enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+ if (tcg_ctx->plugin_insn != NULL) {
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
+ plugin_gen_empty_mem_callback(vaddr, info);
+ tcg_temp_free(vaddr);
+ }
+#endif
+}
+
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp orig_memop;
+ MemOpIdx oi;
+
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ memop = tcg_canonicalize_memop(memop, 0, 0);
+ oi = make_memop_idx(memop, idx);
+ trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
+
+ orig_memop = memop;
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+ memop &= ~MO_BSWAP;
+ /* The bswap primitive benefits from zero-extended input. */
+ if ((memop & MO_SSIZE) == MO_SW) {
+ memop &= ~MO_SIGN;
+ }
+ }
+
+ addr = plugin_prep_mem_callbacks(addr);
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if ((orig_memop ^ memop) & MO_BSWAP) {
+ switch (orig_memop & MO_SIZE) {
+ case MO_16:
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i32(val, val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ TCGv_i32 swap = NULL;
+ MemOpIdx oi;
+
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ memop = tcg_canonicalize_memop(memop, 0, 1);
+ oi = make_memop_idx(memop, idx);
+ trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
+
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+ swap = tcg_temp_new_i32();
+ switch (memop & MO_SIZE) {
+ case MO_16:
+ tcg_gen_bswap16_i32(swap, val, 0);
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i32(swap, val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ val = swap;
+ memop &= ~MO_BSWAP;
+ }
+
+ addr = plugin_prep_mem_callbacks(addr);
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
+ } else {
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+ }
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
+
+ if (swap) {
+ tcg_temp_free_i32(swap);
+ }
+}
+
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp orig_memop;
+ MemOpIdx oi;
+
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
+ if (memop & MO_SIGN) {
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
+ } else {
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
+ }
+ return;
+ }
+
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+ oi = make_memop_idx(memop, idx);
+ trace_guest_ld_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
+
+ orig_memop = memop;
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+ memop &= ~MO_BSWAP;
+ /* The bswap primitive benefits from zero-extended input. */
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
+ memop &= ~MO_SIGN;
+ }
+ }
+
+ addr = plugin_prep_mem_callbacks(addr);
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if ((orig_memop ^ memop) & MO_BSWAP) {
+ int flags = (orig_memop & MO_SIGN
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
+ switch (orig_memop & MO_SIZE) {
+ case MO_16:
+ tcg_gen_bswap16_i64(val, val, flags);
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i64(val, val, flags);
+ break;
+ case MO_64:
+ tcg_gen_bswap64_i64(val, val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ TCGv_i64 swap = NULL;
+ MemOpIdx oi;
+
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
+ return;
+ }
+
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
+ memop = tcg_canonicalize_memop(memop, 1, 1);
+ oi = make_memop_idx(memop, idx);
+ trace_guest_st_before_tcg(tcg_ctx->cpu, cpu_env, addr, oi);
+
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
+ swap = tcg_temp_new_i64();
+ switch (memop & MO_SIZE) {
+ case MO_16:
+ tcg_gen_bswap16_i64(swap, val, 0);
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i64(swap, val, 0);
+ break;
+ case MO_64:
+ tcg_gen_bswap64_i64(swap, val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ val = swap;
+ memop &= ~MO_BSWAP;
+ }
+
+ addr = plugin_prep_mem_callbacks(addr);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
+
+ if (swap) {
+ tcg_temp_free_i64(swap);
+ }
+}
+
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
+{
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_gen_ext8s_i32(ret, val);
+ break;
+ case MO_UB:
+ tcg_gen_ext8u_i32(ret, val);
+ break;
+ case MO_SW:
+ tcg_gen_ext16s_i32(ret, val);
+ break;
+ case MO_UW:
+ tcg_gen_ext16u_i32(ret, val);
+ break;
+ default:
+ tcg_gen_mov_i32(ret, val);
+ break;
+ }
+}
+
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
+{
+ switch (opc & MO_SSIZE) {
+ case MO_SB:
+ tcg_gen_ext8s_i64(ret, val);
+ break;
+ case MO_UB:
+ tcg_gen_ext8u_i64(ret, val);
+ break;
+ case MO_SW:
+ tcg_gen_ext16s_i64(ret, val);
+ break;
+ case MO_UW:
+ tcg_gen_ext16u_i64(ret, val);
+ break;
+ case MO_SL:
+ tcg_gen_ext32s_i64(ret, val);
+ break;
+ case MO_UL:
+ tcg_gen_ext32u_i64(ret, val);
+ break;
+ default:
+ tcg_gen_mov_i64(ret, val);
+ break;
+ }
+}
+
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
+ TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
+ TCGv_i64, TCGv_i64, TCGv_i32);
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
+ TCGv_i32, TCGv_i32);
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
+ TCGv_i64, TCGv_i32);
+
+#ifdef CONFIG_ATOMIC64
+# define WITH_ATOMIC64(X) X,
+#else
+# define WITH_ATOMIC64(X)
+#endif
+
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
+ [MO_8] = gen_helper_atomic_cmpxchgb,
+ [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
+ [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
+ [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
+ [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
+};
+
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
+{
+ memop = tcg_canonicalize_memop(memop, 0, 0);
+
+ if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
+
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
+ tcg_temp_free_i32(t2);
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i32(retv, t1, memop);
+ } else {
+ tcg_gen_mov_i32(retv, t1);
+ }
+ tcg_temp_free_i32(t1);
+ } else {
+ gen_atomic_cx_i32 gen;
+ MemOpIdx oi;
+
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+ tcg_debug_assert(gen != NULL);
+
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i32(retv, retv, memop);
+ }
+ }
+}
+
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
+{
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+
+ if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
+
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
+ tcg_temp_free_i64(t2);
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i64(retv, t1, memop);
+ } else {
+ tcg_gen_mov_i64(retv, t1);
+ }
+ tcg_temp_free_i64(t1);
+ } else if ((memop & MO_SIZE) == MO_64) {
+#ifdef CONFIG_ATOMIC64
+ gen_atomic_cx_i64 gen;
+ MemOpIdx oi;
+
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+ tcg_debug_assert(gen != NULL);
+
+ oi = make_memop_idx(memop, idx);
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
+#else
+ gen_helper_exit_atomic(cpu_env);
+ /* Produce a result, so that we have a well-formed opcode stream
+ with respect to uses of the result in the (dead) code following. */
+ tcg_gen_movi_i64(retv, 0);
+#endif /* CONFIG_ATOMIC64 */
+ } else {
+ TCGv_i32 c32 = tcg_temp_new_i32();
+ TCGv_i32 n32 = tcg_temp_new_i32();
+ TCGv_i32 r32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(c32, cmpv);
+ tcg_gen_extrl_i64_i32(n32, newv);
+ tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
+ tcg_temp_free_i32(c32);
+ tcg_temp_free_i32(n32);
+
+ tcg_gen_extu_i32_i64(retv, r32);
+ tcg_temp_free_i32(r32);
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i64(retv, retv, memop);
+ }
+ }
+}
+
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
+ TCGArg idx, MemOp memop, bool new_val,
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+
+ memop = tcg_canonicalize_memop(memop, 0, 0);
+
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
+ tcg_gen_ext_i32(t2, val, memop);
+ gen(t2, t1, t2);
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
+
+ tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
+ tcg_temp_free_i32(t1);
+ tcg_temp_free_i32(t2);
+}
+
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
+ TCGArg idx, MemOp memop, void * const table[])
+{
+ gen_atomic_op_i32 gen;
+ MemOpIdx oi;
+
+ memop = tcg_canonicalize_memop(memop, 0, 0);
+
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
+ tcg_debug_assert(gen != NULL);
+
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i32(ret, ret, memop);
+ }
+}
+
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
+ TCGArg idx, MemOp memop, bool new_val,
+ void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
+ tcg_gen_ext_i64(t2, val, memop);
+ gen(t2, t1, t2);
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
+
+ tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+}
+
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
+ TCGArg idx, MemOp memop, void * const table[])
+{
+ memop = tcg_canonicalize_memop(memop, 1, 0);
+
+ if ((memop & MO_SIZE) == MO_64) {
+#ifdef CONFIG_ATOMIC64
+ gen_atomic_op_i64 gen;
+ MemOpIdx oi;
+
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
+ tcg_debug_assert(gen != NULL);
+
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
+#else
+ gen_helper_exit_atomic(cpu_env);
+ /* Produce a result, so that we have a well-formed opcode stream
+ with respect to uses of the result in the (dead) code following. */
+ tcg_gen_movi_i64(ret, 0);
+#endif /* CONFIG_ATOMIC64 */
+ } else {
+ TCGv_i32 v32 = tcg_temp_new_i32();
+ TCGv_i32 r32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(v32, val);
+ do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
+ tcg_temp_free_i32(v32);
+
+ tcg_gen_extu_i32_i64(ret, r32);
+ tcg_temp_free_i32(r32);
+
+ if (memop & MO_SIGN) {
+ tcg_gen_ext_i64(ret, ret, memop);
+ }
+ }
+}
+
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
+ [MO_8] = gen_helper_atomic_##NAME##b, \
+ [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
+ [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
+ [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
+ [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
+}; \
+void tcg_gen_atomic_##NAME##_i32 \
+ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
+{ \
+ if (tcg_ctx->tb_cflags & CF_PARALLEL) { \
+ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
+ } else { \
+ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
+ tcg_gen_##OP##_i32); \
+ } \
+} \
+void tcg_gen_atomic_##NAME##_i64 \
+ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
+{ \
+ if (tcg_ctx->tb_cflags & CF_PARALLEL) { \
+ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
+ } else { \
+ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
+ tcg_gen_##OP##_i64); \
+ } \
+}
+
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
+
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
+
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mov_i32(r, b);
+}
+
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mov_i64(r, b);
+}
+
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
+
+#undef GEN_ATOMIC_HELPER
diff --git a/tcg/tcg-pool.c.inc b/tcg/tcg-pool.c.inc
new file mode 100644
index 000000000..90c2e63b7
--- /dev/null
+++ b/tcg/tcg-pool.c.inc
@@ -0,0 +1,162 @@
+/*
+ * TCG Backend Data: constant pool.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+typedef struct TCGLabelPoolData {
+ struct TCGLabelPoolData *next;
+ tcg_insn_unit *label;
+ intptr_t addend;
+ int rtype;
+ unsigned nlong;
+ tcg_target_ulong data[];
+} TCGLabelPoolData;
+
+
+static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
+ tcg_insn_unit *label, intptr_t addend)
+{
+ TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
+ + sizeof(tcg_target_ulong) * nlong);
+
+ n->label = label;
+ n->addend = addend;
+ n->rtype = rtype;
+ n->nlong = nlong;
+ return n;
+}
+
+static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
+{
+ TCGLabelPoolData *i, **pp;
+ int nlong = n->nlong;
+
+ /* Insertion sort on the pool. */
+ for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
+ if (nlong > i->nlong) {
+ break;
+ }
+ if (nlong < i->nlong) {
+ continue;
+ }
+ if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
+ break;
+ }
+ }
+ n->next = *pp;
+ *pp = n;
+}
+
+/* The "usual" for generic integer code. */
+static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
+ tcg_insn_unit *label, intptr_t addend)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
+ n->data[0] = d;
+ new_pool_insert(s, n);
+}
+
+/* For v64 or v128, depending on the host. */
+static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ new_pool_insert(s, n);
+}
+
+/* For v128 or v256, depending on the host. */
+static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1, tcg_target_ulong d2,
+ tcg_target_ulong d3)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ n->data[2] = d2;
+ n->data[3] = d3;
+ new_pool_insert(s, n);
+}
+
+/* For v256, for 32-bit host. */
+static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
+ intptr_t addend, tcg_target_ulong d0,
+ tcg_target_ulong d1, tcg_target_ulong d2,
+ tcg_target_ulong d3, tcg_target_ulong d4,
+ tcg_target_ulong d5, tcg_target_ulong d6,
+ tcg_target_ulong d7)
+{
+ TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
+ n->data[0] = d0;
+ n->data[1] = d1;
+ n->data[2] = d2;
+ n->data[3] = d3;
+ n->data[4] = d4;
+ n->data[5] = d5;
+ n->data[6] = d6;
+ n->data[7] = d7;
+ new_pool_insert(s, n);
+}
+
+/* To be provided by cpu/tcg-target.c.inc. */
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
+
+static int tcg_out_pool_finalize(TCGContext *s)
+{
+ TCGLabelPoolData *p = s->pool_labels;
+ TCGLabelPoolData *l = NULL;
+ void *a;
+
+ if (p == NULL) {
+ return 0;
+ }
+
+ /* ??? Round up to qemu_icache_linesize, but then do not round
+ again when allocating the next TranslationBlock structure. */
+ a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
+ sizeof(tcg_target_ulong) * p->nlong);
+ tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
+ s->data_gen_ptr = a;
+
+ for (; p != NULL; p = p->next) {
+ size_t size = sizeof(tcg_target_ulong) * p->nlong;
+ uintptr_t value;
+
+ if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
+ if (unlikely(a > s->code_gen_highwater)) {
+ return -1;
+ }
+ memcpy(a, p->data, size);
+ a += size;
+ l = p;
+ }
+
+ value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
+ if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
+ return -2;
+ }
+ }
+
+ s->code_ptr = a;
+ return 0;
+}
diff --git a/tcg/tcg.c b/tcg/tcg.c
new file mode 100644
index 000000000..934aa8510
--- /dev/null
+++ b/tcg/tcg.c
@@ -0,0 +1,4738 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* define it to use liveness analysis (better code) */
+#define USE_TCG_OPTIMIZATIONS
+
+#include "qemu/osdep.h"
+
+/* Define to jump the ELF file used to communicate with GDB. */
+#undef DEBUG_JIT
+
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#include "qemu/host-utils.h"
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
+#include "qemu/cacheflush.h"
+
+/* Note: the long term plan is to reduce the dependencies on the QEMU
+ CPU definitions. Currently they are used for qemu_ld/st
+ instructions */
+#define NO_CPU_IO_DEFS
+
+#include "exec/exec-all.h"
+#include "tcg/tcg-op.h"
+
+#if UINTPTR_MAX == UINT32_MAX
+# define ELF_CLASS ELFCLASS32
+#else
+# define ELF_CLASS ELFCLASS64
+#endif
+#ifdef HOST_WORDS_BIGENDIAN
+# define ELF_DATA ELFDATA2MSB
+#else
+# define ELF_DATA ELFDATA2LSB
+#endif
+
+#include "elf.h"
+#include "exec/log.h"
+#include "tcg/tcg-ldst.h"
+#include "tcg-internal.h"
+
+#ifdef CONFIG_TCG_INTERPRETER
+#include <ffi.h>
+#endif
+
+/* Forward declarations for functions declared in tcg-target.c.inc and
+ used here. */
+static void tcg_target_init(TCGContext *s);
+static void tcg_target_qemu_prologue(TCGContext *s);
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend);
+
+/* The CIE and FDE header definitions will be common to all hosts. */
+typedef struct {
+ uint32_t len __attribute__((aligned((sizeof(void *)))));
+ uint32_t id;
+ uint8_t version;
+ char augmentation[1];
+ uint8_t code_align;
+ uint8_t data_align;
+ uint8_t return_column;
+} DebugFrameCIE;
+
+typedef struct QEMU_PACKED {
+ uint32_t len __attribute__((aligned((sizeof(void *)))));
+ uint32_t cie_offset;
+ uintptr_t func_start;
+ uintptr_t func_len;
+} DebugFrameFDEHeader;
+
+typedef struct QEMU_PACKED {
+ DebugFrameCIE cie;
+ DebugFrameFDEHeader fde;
+} DebugFrameHeader;
+
+static void tcg_register_jit_int(const void *buf, size_t size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+ __attribute__((unused));
+
+/* Forward declarations for functions declared and used in tcg-target.c.inc. */
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
+ intptr_t arg2);
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
+#if TCG_TARGET_MAYBE_vec
+static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src);
+static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg base, intptr_t offset);
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg);
+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS]);
+#else
+static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg src)
+{
+ g_assert_not_reached();
+}
+static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, TCGReg base, intptr_t offset)
+{
+ g_assert_not_reached();
+}
+static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg)
+{
+ g_assert_not_reached();
+}
+static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
+ unsigned vecl, unsigned vece,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ g_assert_not_reached();
+}
+#endif
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
+ intptr_t arg2);
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs);
+#ifdef CONFIG_TCG_INTERPRETER
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
+ ffi_cif *cif);
+#else
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
+#endif
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+static int tcg_out_ldst_finalize(TCGContext *s);
+#endif
+
+TCGContext tcg_init_ctx;
+__thread TCGContext *tcg_ctx;
+
+TCGContext **tcg_ctxs;
+unsigned int tcg_cur_ctxs;
+unsigned int tcg_max_ctxs;
+TCGv_env cpu_env = 0;
+const void *tcg_code_gen_epilogue;
+uintptr_t tcg_splitwx_diff;
+
+#ifndef CONFIG_TCG_INTERPRETER
+tcg_prologue_fn *tcg_qemu_tb_exec;
+#endif
+
+static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
+static TCGRegSet tcg_target_call_clobber_regs;
+
+#if TCG_TARGET_INSN_UNIT_SIZE == 1
+static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
+{
+ *s->code_ptr++ = v;
+}
+
+static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
+ uint8_t v)
+{
+ *p = v;
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 2
+static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
+ uint16_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 4
+static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
+ uint32_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 8
+static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+ *s->code_ptr++ = v;
+ } else {
+ tcg_insn_unit *p = s->code_ptr;
+ memcpy(p, &v, sizeof(v));
+ s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
+ }
+}
+
+static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
+ uint64_t v)
+{
+ if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+ *p = v;
+ } else {
+ memcpy(p, &v, sizeof(v));
+ }
+}
+#endif
+
+/* label relocation processing */
+
+static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
+ TCGLabel *l, intptr_t addend)
+{
+ TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
+
+ r->type = type;
+ r->ptr = code_ptr;
+ r->addend = addend;
+ QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
+}
+
+static void tcg_out_label(TCGContext *s, TCGLabel *l)
+{
+ tcg_debug_assert(!l->has_value);
+ l->has_value = 1;
+ l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
+}
+
+TCGLabel *gen_new_label(void)
+{
+ TCGContext *s = tcg_ctx;
+ TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
+
+ memset(l, 0, sizeof(TCGLabel));
+ l->id = s->nb_labels++;
+ QSIMPLEQ_INIT(&l->relocs);
+
+ QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
+
+ return l;
+}
+
+static bool tcg_resolve_relocs(TCGContext *s)
+{
+ TCGLabel *l;
+
+ QSIMPLEQ_FOREACH(l, &s->labels, next) {
+ TCGRelocation *r;
+ uintptr_t value = l->u.value;
+
+ QSIMPLEQ_FOREACH(r, &l->relocs, next) {
+ if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static void set_jmp_reset_offset(TCGContext *s, int which)
+{
+ /*
+ * We will check for overflow at the end of the opcode loop in
+ * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
+ */
+ s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
+}
+
+/* Signal overflow, starting over with fewer guest insns. */
+static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
+{
+ siglongjmp(s->jmp_trans, -2);
+}
+
+#define C_PFX1(P, A) P##A
+#define C_PFX2(P, A, B) P##A##_##B
+#define C_PFX3(P, A, B, C) P##A##_##B##_##C
+#define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
+#define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
+#define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
+
+/* Define an enumeration for the various combinations. */
+
+#define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
+#define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
+#define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
+#define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
+
+#define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
+#define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
+#define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
+#define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
+
+#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
+
+#define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
+#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
+#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
+
+typedef enum {
+#include "tcg-target-con-set.h"
+} TCGConstraintSetIndex;
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
+
+#undef C_O0_I1
+#undef C_O0_I2
+#undef C_O0_I3
+#undef C_O0_I4
+#undef C_O1_I1
+#undef C_O1_I2
+#undef C_O1_I3
+#undef C_O1_I4
+#undef C_N1_I2
+#undef C_O2_I1
+#undef C_O2_I2
+#undef C_O2_I3
+#undef C_O2_I4
+
+/* Put all of the constraint sets into an array, indexed by the enum. */
+
+#define C_O0_I1(I1) { .args_ct_str = { #I1 } },
+#define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
+#define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
+#define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
+
+#define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
+#define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
+#define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
+#define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
+
+#define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
+
+#define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
+#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
+#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
+
+static const TCGTargetOpDef constraint_sets[] = {
+#include "tcg-target-con-set.h"
+};
+
+
+#undef C_O0_I1
+#undef C_O0_I2
+#undef C_O0_I3
+#undef C_O0_I4
+#undef C_O1_I1
+#undef C_O1_I2
+#undef C_O1_I3
+#undef C_O1_I4
+#undef C_N1_I2
+#undef C_O2_I1
+#undef C_O2_I2
+#undef C_O2_I3
+#undef C_O2_I4
+
+/* Expand the enumerator to be returned from tcg_target_op_def(). */
+
+#define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
+#define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
+#define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
+#define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
+
+#define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
+#define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
+#define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
+#define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
+
+#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
+
+#define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
+#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
+#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
+
+#include "tcg-target.c.inc"
+
+static void alloc_tcg_plugin_context(TCGContext *s)
+{
+#ifdef CONFIG_PLUGIN
+ s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
+ s->plugin_tb->insns =
+ g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
+#endif
+}
+
+/*
+ * All TCG threads except the parent (i.e. the one that called tcg_context_init
+ * and registered the target's TCG globals) must register with this function
+ * before initiating translation.
+ *
+ * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
+ * of tcg_region_init() for the reasoning behind this.
+ *
+ * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
+ * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
+ * is not used anymore for translation once this function is called.
+ *
+ * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
+ * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
+ */
+#ifdef CONFIG_USER_ONLY
+void tcg_register_thread(void)
+{
+ tcg_ctx = &tcg_init_ctx;
+}
+#else
+void tcg_register_thread(void)
+{
+ TCGContext *s = g_malloc(sizeof(*s));
+ unsigned int i, n;
+
+ *s = tcg_init_ctx;
+
+ /* Relink mem_base. */
+ for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
+ if (tcg_init_ctx.temps[i].mem_base) {
+ ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
+ tcg_debug_assert(b >= 0 && b < n);
+ s->temps[i].mem_base = &s->temps[b];
+ }
+ }
+
+ /* Claim an entry in tcg_ctxs */
+ n = qatomic_fetch_inc(&tcg_cur_ctxs);
+ g_assert(n < tcg_max_ctxs);
+ qatomic_set(&tcg_ctxs[n], s);
+
+ if (n > 0) {
+ alloc_tcg_plugin_context(s);
+ tcg_region_initial_alloc(s);
+ }
+
+ tcg_ctx = s;
+}
+#endif /* !CONFIG_USER_ONLY */
+
+/* pool based memory allocation */
+void *tcg_malloc_internal(TCGContext *s, int size)
+{
+ TCGPool *p;
+ int pool_size;
+
+ if (size > TCG_POOL_CHUNK_SIZE) {
+ /* big malloc: insert a new pool (XXX: could optimize) */
+ p = g_malloc(sizeof(TCGPool) + size);
+ p->size = size;
+ p->next = s->pool_first_large;
+ s->pool_first_large = p;
+ return p->data;
+ } else {
+ p = s->pool_current;
+ if (!p) {
+ p = s->pool_first;
+ if (!p)
+ goto new_pool;
+ } else {
+ if (!p->next) {
+ new_pool:
+ pool_size = TCG_POOL_CHUNK_SIZE;
+ p = g_malloc(sizeof(TCGPool) + pool_size);
+ p->size = pool_size;
+ p->next = NULL;
+ if (s->pool_current)
+ s->pool_current->next = p;
+ else
+ s->pool_first = p;
+ } else {
+ p = p->next;
+ }
+ }
+ }
+ s->pool_current = p;
+ s->pool_cur = p->data + size;
+ s->pool_end = p->data + p->size;
+ return p->data;
+}
+
+void tcg_pool_reset(TCGContext *s)
+{
+ TCGPool *p, *t;
+ for (p = s->pool_first_large; p; p = t) {
+ t = p->next;
+ g_free(p);
+ }
+ s->pool_first_large = NULL;
+ s->pool_cur = s->pool_end = NULL;
+ s->pool_current = NULL;
+}
+
+#include "exec/helper-proto.h"
+
+static const TCGHelperInfo all_helpers[] = {
+#include "exec/helper-tcg.h"
+};
+static GHashTable *helper_table;
+
+#ifdef CONFIG_TCG_INTERPRETER
+static GHashTable *ffi_table;
+
+static ffi_type * const typecode_to_ffi[8] = {
+ [dh_typecode_void] = &ffi_type_void,
+ [dh_typecode_i32] = &ffi_type_uint32,
+ [dh_typecode_s32] = &ffi_type_sint32,
+ [dh_typecode_i64] = &ffi_type_uint64,
+ [dh_typecode_s64] = &ffi_type_sint64,
+ [dh_typecode_ptr] = &ffi_type_pointer,
+};
+#endif
+
+static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
+static void process_op_defs(TCGContext *s);
+static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
+ TCGReg reg, const char *name);
+
+static void tcg_context_init(unsigned max_cpus)
+{
+ TCGContext *s = &tcg_init_ctx;
+ int op, total_args, n, i;
+ TCGOpDef *def;
+ TCGArgConstraint *args_ct;
+ TCGTemp *ts;
+
+ memset(s, 0, sizeof(*s));
+ s->nb_globals = 0;
+
+ /* Count total number of arguments and allocate the corresponding
+ space */
+ total_args = 0;
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ n = def->nb_iargs + def->nb_oargs;
+ total_args += n;
+ }
+
+ args_ct = g_new0(TCGArgConstraint, total_args);
+
+ for(op = 0; op < NB_OPS; op++) {
+ def = &tcg_op_defs[op];
+ def->args_ct = args_ct;
+ n = def->nb_iargs + def->nb_oargs;
+ args_ct += n;
+ }
+
+ /* Register helpers. */
+ /* Use g_direct_hash/equal for direct pointer comparisons on func. */
+ helper_table = g_hash_table_new(NULL, NULL);
+
+ for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+ g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
+ (gpointer)&all_helpers[i]);
+ }
+
+#ifdef CONFIG_TCG_INTERPRETER
+ /* g_direct_hash/equal for direct comparisons on uint32_t. */
+ ffi_table = g_hash_table_new(NULL, NULL);
+ for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+ struct {
+ ffi_cif cif;
+ ffi_type *args[];
+ } *ca;
+ uint32_t typemask = all_helpers[i].typemask;
+ gpointer hash = (gpointer)(uintptr_t)typemask;
+ ffi_status status;
+ int nargs;
+
+ if (g_hash_table_lookup(ffi_table, hash)) {
+ continue;
+ }
+
+ /* Ignoring the return type, find the last non-zero field. */
+ nargs = 32 - clz32(typemask >> 3);
+ nargs = DIV_ROUND_UP(nargs, 3);
+
+ ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
+ ca->cif.rtype = typecode_to_ffi[typemask & 7];
+ ca->cif.nargs = nargs;
+
+ if (nargs != 0) {
+ ca->cif.arg_types = ca->args;
+ for (i = 0; i < nargs; ++i) {
+ int typecode = extract32(typemask, (i + 1) * 3, 3);
+ ca->args[i] = typecode_to_ffi[typecode];
+ }
+ }
+
+ status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
+ ca->cif.rtype, ca->cif.arg_types);
+ assert(status == FFI_OK);
+
+ g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
+ }
+#endif
+
+ tcg_target_init(s);
+ process_op_defs(s);
+
+ /* Reverse the order of the saved registers, assuming they're all at
+ the start of tcg_target_reg_alloc_order. */
+ for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
+ int r = tcg_target_reg_alloc_order[n];
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
+ break;
+ }
+ }
+ for (i = 0; i < n; ++i) {
+ indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
+ }
+ for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
+ indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
+ }
+
+ alloc_tcg_plugin_context(s);
+
+ tcg_ctx = s;
+ /*
+ * In user-mode we simply share the init context among threads, since we
+ * use a single region. See the documentation tcg_region_init() for the
+ * reasoning behind this.
+ * In softmmu we will have at most max_cpus TCG threads.
+ */
+#ifdef CONFIG_USER_ONLY
+ tcg_ctxs = &tcg_ctx;
+ tcg_cur_ctxs = 1;
+ tcg_max_ctxs = 1;
+#else
+ tcg_max_ctxs = max_cpus;
+ tcg_ctxs = g_new0(TCGContext *, max_cpus);
+#endif
+
+ tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
+ ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
+ cpu_env = temp_tcgv_ptr(ts);
+}
+
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
+{
+ tcg_context_init(max_cpus);
+ tcg_region_init(tb_size, splitwx, max_cpus);
+}
+
+/*
+ * Allocate TBs right before their corresponding translated code, making
+ * sure that TBs and code are on different cache lines.
+ */
+TranslationBlock *tcg_tb_alloc(TCGContext *s)
+{
+ uintptr_t align = qemu_icache_linesize;
+ TranslationBlock *tb;
+ void *next;
+
+ retry:
+ tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
+ next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
+
+ if (unlikely(next > s->code_gen_highwater)) {
+ if (tcg_region_alloc(s)) {
+ return NULL;
+ }
+ goto retry;
+ }
+ qatomic_set(&s->code_gen_ptr, next);
+ s->data_gen_ptr = NULL;
+ return tb;
+}
+
+void tcg_prologue_init(TCGContext *s)
+{
+ size_t prologue_size;
+
+ s->code_ptr = s->code_gen_ptr;
+ s->code_buf = s->code_gen_ptr;
+ s->data_gen_ptr = NULL;
+
+#ifndef CONFIG_TCG_INTERPRETER
+ tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
+#endif
+
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+ s->pool_labels = NULL;
+#endif
+
+ qemu_thread_jit_write();
+ /* Generate the prologue. */
+ tcg_target_qemu_prologue(s);
+
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+ /* Allow the prologue to put e.g. guest_base into a pool entry. */
+ {
+ int result = tcg_out_pool_finalize(s);
+ tcg_debug_assert(result == 0);
+ }
+#endif
+
+ prologue_size = tcg_current_code_size(s);
+
+#ifndef CONFIG_TCG_INTERPRETER
+ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
+ (uintptr_t)s->code_buf, prologue_size);
+#endif
+
+#ifdef DEBUG_DISAS
+ if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+ FILE *logfile = qemu_log_lock();
+ qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
+ if (s->data_gen_ptr) {
+ size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
+ size_t data_size = prologue_size - code_size;
+ size_t i;
+
+ log_disas(s->code_gen_ptr, code_size);
+
+ for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
+ if (sizeof(tcg_target_ulong) == 8) {
+ qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
+ (uintptr_t)s->data_gen_ptr + i,
+ *(uint64_t *)(s->data_gen_ptr + i));
+ } else {
+ qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
+ (uintptr_t)s->data_gen_ptr + i,
+ *(uint32_t *)(s->data_gen_ptr + i));
+ }
+ }
+ } else {
+ log_disas(s->code_gen_ptr, prologue_size);
+ }
+ qemu_log("\n");
+ qemu_log_flush();
+ qemu_log_unlock(logfile);
+ }
+#endif
+
+#ifndef CONFIG_TCG_INTERPRETER
+ /*
+ * Assert that goto_ptr is implemented completely, setting an epilogue.
+ * For tci, we use NULL as the signal to return from the interpreter,
+ * so skip this check.
+ */
+ tcg_debug_assert(tcg_code_gen_epilogue != NULL);
+#endif
+
+ tcg_region_prologue_set(s);
+}
+
+void tcg_func_start(TCGContext *s)
+{
+ tcg_pool_reset(s);
+ s->nb_temps = s->nb_globals;
+
+ /* No temps have been previously allocated for size or locality. */
+ memset(s->free_temps, 0, sizeof(s->free_temps));
+
+ /* No constant temps have been previously allocated. */
+ for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
+ if (s->const_table[i]) {
+ g_hash_table_remove_all(s->const_table[i]);
+ }
+ }
+
+ s->nb_ops = 0;
+ s->nb_labels = 0;
+ s->current_frame_offset = s->frame_start;
+
+#ifdef CONFIG_DEBUG_TCG
+ s->goto_tb_issue_mask = 0;
+#endif
+
+ QTAILQ_INIT(&s->ops);
+ QTAILQ_INIT(&s->free_ops);
+ QSIMPLEQ_INIT(&s->labels);
+}
+
+static TCGTemp *tcg_temp_alloc(TCGContext *s)
+{
+ int n = s->nb_temps++;
+
+ if (n >= TCG_MAX_TEMPS) {
+ tcg_raise_tb_overflow(s);
+ }
+ return memset(&s->temps[n], 0, sizeof(TCGTemp));
+}
+
+static TCGTemp *tcg_global_alloc(TCGContext *s)
+{
+ TCGTemp *ts;
+
+ tcg_debug_assert(s->nb_globals == s->nb_temps);
+ tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
+ s->nb_globals++;
+ ts = tcg_temp_alloc(s);
+ ts->kind = TEMP_GLOBAL;
+
+ return ts;
+}
+
+static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
+ TCGReg reg, const char *name)
+{
+ TCGTemp *ts;
+
+ if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
+ tcg_abort();
+ }
+
+ ts = tcg_global_alloc(s);
+ ts->base_type = type;
+ ts->type = type;
+ ts->kind = TEMP_FIXED;
+ ts->reg = reg;
+ ts->name = name;
+ tcg_regset_set_reg(s->reserved_regs, reg);
+
+ return ts;
+}
+
+void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
+{
+ s->frame_start = start;
+ s->frame_end = start + size;
+ s->frame_temp
+ = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
+}
+
+TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
+ intptr_t offset, const char *name)
+{
+ TCGContext *s = tcg_ctx;
+ TCGTemp *base_ts = tcgv_ptr_temp(base);
+ TCGTemp *ts = tcg_global_alloc(s);
+ int indirect_reg = 0, bigendian = 0;
+#ifdef HOST_WORDS_BIGENDIAN
+ bigendian = 1;
+#endif
+
+ switch (base_ts->kind) {
+ case TEMP_FIXED:
+ break;
+ case TEMP_GLOBAL:
+ /* We do not support double-indirect registers. */
+ tcg_debug_assert(!base_ts->indirect_reg);
+ base_ts->indirect_base = 1;
+ s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
+ ? 2 : 1);
+ indirect_reg = 1;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
+ TCGTemp *ts2 = tcg_global_alloc(s);
+ char buf[64];
+
+ ts->base_type = TCG_TYPE_I64;
+ ts->type = TCG_TYPE_I32;
+ ts->indirect_reg = indirect_reg;
+ ts->mem_allocated = 1;
+ ts->mem_base = base_ts;
+ ts->mem_offset = offset + bigendian * 4;
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_0");
+ ts->name = strdup(buf);
+
+ tcg_debug_assert(ts2 == ts + 1);
+ ts2->base_type = TCG_TYPE_I64;
+ ts2->type = TCG_TYPE_I32;
+ ts2->indirect_reg = indirect_reg;
+ ts2->mem_allocated = 1;
+ ts2->mem_base = base_ts;
+ ts2->mem_offset = offset + (1 - bigendian) * 4;
+ pstrcpy(buf, sizeof(buf), name);
+ pstrcat(buf, sizeof(buf), "_1");
+ ts2->name = strdup(buf);
+ } else {
+ ts->base_type = type;
+ ts->type = type;
+ ts->indirect_reg = indirect_reg;
+ ts->mem_allocated = 1;
+ ts->mem_base = base_ts;
+ ts->mem_offset = offset;
+ ts->name = name;
+ }
+ return ts;
+}
+
+TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
+{
+ TCGContext *s = tcg_ctx;
+ TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
+ TCGTemp *ts;
+ int idx, k;
+
+ k = type + (temp_local ? TCG_TYPE_COUNT : 0);
+ idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
+ if (idx < TCG_MAX_TEMPS) {
+ /* There is already an available temp with the right type. */
+ clear_bit(idx, s->free_temps[k].l);
+
+ ts = &s->temps[idx];
+ ts->temp_allocated = 1;
+ tcg_debug_assert(ts->base_type == type);
+ tcg_debug_assert(ts->kind == kind);
+ } else {
+ ts = tcg_temp_alloc(s);
+ if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
+ TCGTemp *ts2 = tcg_temp_alloc(s);
+
+ ts->base_type = type;
+ ts->type = TCG_TYPE_I32;
+ ts->temp_allocated = 1;
+ ts->kind = kind;
+
+ tcg_debug_assert(ts2 == ts + 1);
+ ts2->base_type = TCG_TYPE_I64;
+ ts2->type = TCG_TYPE_I32;
+ ts2->temp_allocated = 1;
+ ts2->kind = kind;
+ } else {
+ ts->base_type = type;
+ ts->type = type;
+ ts->temp_allocated = 1;
+ ts->kind = kind;
+ }
+ }
+
+#if defined(CONFIG_DEBUG_TCG)
+ s->temps_in_use++;
+#endif
+ return ts;
+}
+
+TCGv_vec tcg_temp_new_vec(TCGType type)
+{
+ TCGTemp *t;
+
+#ifdef CONFIG_DEBUG_TCG
+ switch (type) {
+ case TCG_TYPE_V64:
+ assert(TCG_TARGET_HAS_v64);
+ break;
+ case TCG_TYPE_V128:
+ assert(TCG_TARGET_HAS_v128);
+ break;
+ case TCG_TYPE_V256:
+ assert(TCG_TARGET_HAS_v256);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+#endif
+
+ t = tcg_temp_new_internal(type, 0);
+ return temp_tcgv_vec(t);
+}
+
+/* Create a new temp of the same type as an existing temp. */
+TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
+{
+ TCGTemp *t = tcgv_vec_temp(match);
+
+ tcg_debug_assert(t->temp_allocated != 0);
+
+ t = tcg_temp_new_internal(t->base_type, 0);
+ return temp_tcgv_vec(t);
+}
+
+void tcg_temp_free_internal(TCGTemp *ts)
+{
+ TCGContext *s = tcg_ctx;
+ int k, idx;
+
+ /* In order to simplify users of tcg_constant_*, silently ignore free. */
+ if (ts->kind == TEMP_CONST) {
+ return;
+ }
+
+#if defined(CONFIG_DEBUG_TCG)
+ s->temps_in_use--;
+ if (s->temps_in_use < 0) {
+ fprintf(stderr, "More temporaries freed than allocated!\n");
+ }
+#endif
+
+ tcg_debug_assert(ts->kind < TEMP_GLOBAL);
+ tcg_debug_assert(ts->temp_allocated != 0);
+ ts->temp_allocated = 0;
+
+ idx = temp_idx(ts);
+ k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
+ set_bit(idx, s->free_temps[k].l);
+}
+
+TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
+{
+ TCGContext *s = tcg_ctx;
+ GHashTable *h = s->const_table[type];
+ TCGTemp *ts;
+
+ if (h == NULL) {
+ h = g_hash_table_new(g_int64_hash, g_int64_equal);
+ s->const_table[type] = h;
+ }
+
+ ts = g_hash_table_lookup(h, &val);
+ if (ts == NULL) {
+ ts = tcg_temp_alloc(s);
+
+ if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
+ TCGTemp *ts2 = tcg_temp_alloc(s);
+
+ ts->base_type = TCG_TYPE_I64;
+ ts->type = TCG_TYPE_I32;
+ ts->kind = TEMP_CONST;
+ ts->temp_allocated = 1;
+ /*
+ * Retain the full value of the 64-bit constant in the low
+ * part, so that the hash table works. Actual uses will
+ * truncate the value to the low part.
+ */
+ ts->val = val;
+
+ tcg_debug_assert(ts2 == ts + 1);
+ ts2->base_type = TCG_TYPE_I64;
+ ts2->type = TCG_TYPE_I32;
+ ts2->kind = TEMP_CONST;
+ ts2->temp_allocated = 1;
+ ts2->val = val >> 32;
+ } else {
+ ts->base_type = type;
+ ts->type = type;
+ ts->kind = TEMP_CONST;
+ ts->temp_allocated = 1;
+ ts->val = val;
+ }
+ g_hash_table_insert(h, &ts->val, ts);
+ }
+
+ return ts;
+}
+
+TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
+{
+ val = dup_const(vece, val);
+ return temp_tcgv_vec(tcg_constant_internal(type, val));
+}
+
+TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
+{
+ TCGTemp *t = tcgv_vec_temp(match);
+
+ tcg_debug_assert(t->temp_allocated != 0);
+ return tcg_constant_vec(t->base_type, vece, val);
+}
+
+TCGv_i32 tcg_const_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+TCGv_i32 tcg_const_local_i32(int32_t val)
+{
+ TCGv_i32 t0;
+ t0 = tcg_temp_local_new_i32();
+ tcg_gen_movi_i32(t0, val);
+ return t0;
+}
+
+TCGv_i64 tcg_const_local_i64(int64_t val)
+{
+ TCGv_i64 t0;
+ t0 = tcg_temp_local_new_i64();
+ tcg_gen_movi_i64(t0, val);
+ return t0;
+}
+
+#if defined(CONFIG_DEBUG_TCG)
+void tcg_clear_temp_count(void)
+{
+ TCGContext *s = tcg_ctx;
+ s->temps_in_use = 0;
+}
+
+int tcg_check_temp_count(void)
+{
+ TCGContext *s = tcg_ctx;
+ if (s->temps_in_use) {
+ /* Clear the count so that we don't give another
+ * warning immediately next time around.
+ */
+ s->temps_in_use = 0;
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+/* Return true if OP may appear in the opcode stream.
+ Test the runtime variable that controls each opcode. */
+bool tcg_op_supported(TCGOpcode op)
+{
+ const bool have_vec
+ = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
+
+ switch (op) {
+ case INDEX_op_discard:
+ case INDEX_op_set_label:
+ case INDEX_op_call:
+ case INDEX_op_br:
+ case INDEX_op_mb:
+ case INDEX_op_insn_start:
+ case INDEX_op_exit_tb:
+ case INDEX_op_goto_tb:
+ case INDEX_op_goto_ptr:
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ return true;
+
+ case INDEX_op_qemu_st8_i32:
+ return TCG_TARGET_HAS_qemu_st8_i32;
+
+ case INDEX_op_mov_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_brcond_i32:
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_mul_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ return true;
+
+ case INDEX_op_movcond_i32:
+ return TCG_TARGET_HAS_movcond_i32;
+ case INDEX_op_div_i32:
+ case INDEX_op_divu_i32:
+ return TCG_TARGET_HAS_div_i32;
+ case INDEX_op_rem_i32:
+ case INDEX_op_remu_i32:
+ return TCG_TARGET_HAS_rem_i32;
+ case INDEX_op_div2_i32:
+ case INDEX_op_divu2_i32:
+ return TCG_TARGET_HAS_div2_i32;
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
+ return TCG_TARGET_HAS_rot_i32;
+ case INDEX_op_deposit_i32:
+ return TCG_TARGET_HAS_deposit_i32;
+ case INDEX_op_extract_i32:
+ return TCG_TARGET_HAS_extract_i32;
+ case INDEX_op_sextract_i32:
+ return TCG_TARGET_HAS_sextract_i32;
+ case INDEX_op_extract2_i32:
+ return TCG_TARGET_HAS_extract2_i32;
+ case INDEX_op_add2_i32:
+ return TCG_TARGET_HAS_add2_i32;
+ case INDEX_op_sub2_i32:
+ return TCG_TARGET_HAS_sub2_i32;
+ case INDEX_op_mulu2_i32:
+ return TCG_TARGET_HAS_mulu2_i32;
+ case INDEX_op_muls2_i32:
+ return TCG_TARGET_HAS_muls2_i32;
+ case INDEX_op_muluh_i32:
+ return TCG_TARGET_HAS_muluh_i32;
+ case INDEX_op_mulsh_i32:
+ return TCG_TARGET_HAS_mulsh_i32;
+ case INDEX_op_ext8s_i32:
+ return TCG_TARGET_HAS_ext8s_i32;
+ case INDEX_op_ext16s_i32:
+ return TCG_TARGET_HAS_ext16s_i32;
+ case INDEX_op_ext8u_i32:
+ return TCG_TARGET_HAS_ext8u_i32;
+ case INDEX_op_ext16u_i32:
+ return TCG_TARGET_HAS_ext16u_i32;
+ case INDEX_op_bswap16_i32:
+ return TCG_TARGET_HAS_bswap16_i32;
+ case INDEX_op_bswap32_i32:
+ return TCG_TARGET_HAS_bswap32_i32;
+ case INDEX_op_not_i32:
+ return TCG_TARGET_HAS_not_i32;
+ case INDEX_op_neg_i32:
+ return TCG_TARGET_HAS_neg_i32;
+ case INDEX_op_andc_i32:
+ return TCG_TARGET_HAS_andc_i32;
+ case INDEX_op_orc_i32:
+ return TCG_TARGET_HAS_orc_i32;
+ case INDEX_op_eqv_i32:
+ return TCG_TARGET_HAS_eqv_i32;
+ case INDEX_op_nand_i32:
+ return TCG_TARGET_HAS_nand_i32;
+ case INDEX_op_nor_i32:
+ return TCG_TARGET_HAS_nor_i32;
+ case INDEX_op_clz_i32:
+ return TCG_TARGET_HAS_clz_i32;
+ case INDEX_op_ctz_i32:
+ return TCG_TARGET_HAS_ctz_i32;
+ case INDEX_op_ctpop_i32:
+ return TCG_TARGET_HAS_ctpop_i32;
+
+ case INDEX_op_brcond2_i32:
+ case INDEX_op_setcond2_i32:
+ return TCG_TARGET_REG_BITS == 32;
+
+ case INDEX_op_mov_i64:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ return TCG_TARGET_REG_BITS == 64;
+
+ case INDEX_op_movcond_i64:
+ return TCG_TARGET_HAS_movcond_i64;
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i64:
+ return TCG_TARGET_HAS_div_i64;
+ case INDEX_op_rem_i64:
+ case INDEX_op_remu_i64:
+ return TCG_TARGET_HAS_rem_i64;
+ case INDEX_op_div2_i64:
+ case INDEX_op_divu2_i64:
+ return TCG_TARGET_HAS_div2_i64;
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
+ return TCG_TARGET_HAS_rot_i64;
+ case INDEX_op_deposit_i64:
+ return TCG_TARGET_HAS_deposit_i64;
+ case INDEX_op_extract_i64:
+ return TCG_TARGET_HAS_extract_i64;
+ case INDEX_op_sextract_i64:
+ return TCG_TARGET_HAS_sextract_i64;
+ case INDEX_op_extract2_i64:
+ return TCG_TARGET_HAS_extract2_i64;
+ case INDEX_op_extrl_i64_i32:
+ return TCG_TARGET_HAS_extrl_i64_i32;
+ case INDEX_op_extrh_i64_i32:
+ return TCG_TARGET_HAS_extrh_i64_i32;
+ case INDEX_op_ext8s_i64:
+ return TCG_TARGET_HAS_ext8s_i64;
+ case INDEX_op_ext16s_i64:
+ return TCG_TARGET_HAS_ext16s_i64;
+ case INDEX_op_ext32s_i64:
+ return TCG_TARGET_HAS_ext32s_i64;
+ case INDEX_op_ext8u_i64:
+ return TCG_TARGET_HAS_ext8u_i64;
+ case INDEX_op_ext16u_i64:
+ return TCG_TARGET_HAS_ext16u_i64;
+ case INDEX_op_ext32u_i64:
+ return TCG_TARGET_HAS_ext32u_i64;
+ case INDEX_op_bswap16_i64:
+ return TCG_TARGET_HAS_bswap16_i64;
+ case INDEX_op_bswap32_i64:
+ return TCG_TARGET_HAS_bswap32_i64;
+ case INDEX_op_bswap64_i64:
+ return TCG_TARGET_HAS_bswap64_i64;
+ case INDEX_op_not_i64:
+ return TCG_TARGET_HAS_not_i64;
+ case INDEX_op_neg_i64:
+ return TCG_TARGET_HAS_neg_i64;
+ case INDEX_op_andc_i64:
+ return TCG_TARGET_HAS_andc_i64;
+ case INDEX_op_orc_i64:
+ return TCG_TARGET_HAS_orc_i64;
+ case INDEX_op_eqv_i64:
+ return TCG_TARGET_HAS_eqv_i64;
+ case INDEX_op_nand_i64:
+ return TCG_TARGET_HAS_nand_i64;
+ case INDEX_op_nor_i64:
+ return TCG_TARGET_HAS_nor_i64;
+ case INDEX_op_clz_i64:
+ return TCG_TARGET_HAS_clz_i64;
+ case INDEX_op_ctz_i64:
+ return TCG_TARGET_HAS_ctz_i64;
+ case INDEX_op_ctpop_i64:
+ return TCG_TARGET_HAS_ctpop_i64;
+ case INDEX_op_add2_i64:
+ return TCG_TARGET_HAS_add2_i64;
+ case INDEX_op_sub2_i64:
+ return TCG_TARGET_HAS_sub2_i64;
+ case INDEX_op_mulu2_i64:
+ return TCG_TARGET_HAS_mulu2_i64;
+ case INDEX_op_muls2_i64:
+ return TCG_TARGET_HAS_muls2_i64;
+ case INDEX_op_muluh_i64:
+ return TCG_TARGET_HAS_muluh_i64;
+ case INDEX_op_mulsh_i64:
+ return TCG_TARGET_HAS_mulsh_i64;
+
+ case INDEX_op_mov_vec:
+ case INDEX_op_dup_vec:
+ case INDEX_op_dupm_vec:
+ case INDEX_op_ld_vec:
+ case INDEX_op_st_vec:
+ case INDEX_op_add_vec:
+ case INDEX_op_sub_vec:
+ case INDEX_op_and_vec:
+ case INDEX_op_or_vec:
+ case INDEX_op_xor_vec:
+ case INDEX_op_cmp_vec:
+ return have_vec;
+ case INDEX_op_dup2_vec:
+ return have_vec && TCG_TARGET_REG_BITS == 32;
+ case INDEX_op_not_vec:
+ return have_vec && TCG_TARGET_HAS_not_vec;
+ case INDEX_op_neg_vec:
+ return have_vec && TCG_TARGET_HAS_neg_vec;
+ case INDEX_op_abs_vec:
+ return have_vec && TCG_TARGET_HAS_abs_vec;
+ case INDEX_op_andc_vec:
+ return have_vec && TCG_TARGET_HAS_andc_vec;
+ case INDEX_op_orc_vec:
+ return have_vec && TCG_TARGET_HAS_orc_vec;
+ case INDEX_op_mul_vec:
+ return have_vec && TCG_TARGET_HAS_mul_vec;
+ case INDEX_op_shli_vec:
+ case INDEX_op_shri_vec:
+ case INDEX_op_sari_vec:
+ return have_vec && TCG_TARGET_HAS_shi_vec;
+ case INDEX_op_shls_vec:
+ case INDEX_op_shrs_vec:
+ case INDEX_op_sars_vec:
+ return have_vec && TCG_TARGET_HAS_shs_vec;
+ case INDEX_op_shlv_vec:
+ case INDEX_op_shrv_vec:
+ case INDEX_op_sarv_vec:
+ return have_vec && TCG_TARGET_HAS_shv_vec;
+ case INDEX_op_rotli_vec:
+ return have_vec && TCG_TARGET_HAS_roti_vec;
+ case INDEX_op_rotls_vec:
+ return have_vec && TCG_TARGET_HAS_rots_vec;
+ case INDEX_op_rotlv_vec:
+ case INDEX_op_rotrv_vec:
+ return have_vec && TCG_TARGET_HAS_rotv_vec;
+ case INDEX_op_ssadd_vec:
+ case INDEX_op_usadd_vec:
+ case INDEX_op_sssub_vec:
+ case INDEX_op_ussub_vec:
+ return have_vec && TCG_TARGET_HAS_sat_vec;
+ case INDEX_op_smin_vec:
+ case INDEX_op_umin_vec:
+ case INDEX_op_smax_vec:
+ case INDEX_op_umax_vec:
+ return have_vec && TCG_TARGET_HAS_minmax_vec;
+ case INDEX_op_bitsel_vec:
+ return have_vec && TCG_TARGET_HAS_bitsel_vec;
+ case INDEX_op_cmpsel_vec:
+ return have_vec && TCG_TARGET_HAS_cmpsel_vec;
+
+ default:
+ tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
+ return true;
+ }
+}
+
+/* Note: we convert the 64 bit args to 32 bit and do some alignment
+ and endian swap. Maybe it would be better to do the alignment
+ and endian swap in tcg_reg_alloc_call(). */
+void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
+{
+ int i, real_args, nb_rets, pi;
+ unsigned typemask;
+ const TCGHelperInfo *info;
+ TCGOp *op;
+
+ info = g_hash_table_lookup(helper_table, (gpointer)func);
+ typemask = info->typemask;
+
+#ifdef CONFIG_PLUGIN
+ /* detect non-plugin helpers */
+ if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
+ tcg_ctx->plugin_insn->calls_helpers = true;
+ }
+#endif
+
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ /* We have 64-bit values in one register, but need to pass as two
+ separate parameters. Split them. */
+ int orig_typemask = typemask;
+ int orig_nargs = nargs;
+ TCGv_i64 retl, reth;
+ TCGTemp *split_args[MAX_OPC_PARAM];
+
+ retl = NULL;
+ reth = NULL;
+ typemask = 0;
+ for (i = real_args = 0; i < nargs; ++i) {
+ int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+
+ if (is_64bit) {
+ TCGv_i64 orig = temp_tcgv_i64(args[i]);
+ TCGv_i32 h = tcg_temp_new_i32();
+ TCGv_i32 l = tcg_temp_new_i32();
+ tcg_gen_extr_i64_i32(l, h, orig);
+ split_args[real_args++] = tcgv_i32_temp(h);
+ typemask |= dh_typecode_i32 << (real_args * 3);
+ split_args[real_args++] = tcgv_i32_temp(l);
+ typemask |= dh_typecode_i32 << (real_args * 3);
+ } else {
+ split_args[real_args++] = args[i];
+ typemask |= argtype << (real_args * 3);
+ }
+ }
+ nargs = real_args;
+ args = split_args;
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
+ bool is_32bit = (argtype & ~1) == dh_typecode_i32;
+ bool is_signed = argtype & 1;
+
+ if (is_32bit) {
+ TCGv_i64 temp = tcg_temp_new_i64();
+ TCGv_i32 orig = temp_tcgv_i32(args[i]);
+ if (is_signed) {
+ tcg_gen_ext_i32_i64(temp, orig);
+ } else {
+ tcg_gen_extu_i32_i64(temp, orig);
+ }
+ args[i] = tcgv_i64_temp(temp);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+
+ op = tcg_emit_op(INDEX_op_call);
+
+ pi = 0;
+ if (ret != NULL) {
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ if ((typemask & 6) == dh_typecode_i64) {
+ /* The 32-bit ABI is going to return the 64-bit value in
+ the %o0/%o1 register pair. Prepare for this by using
+ two return temporaries, and reassemble below. */
+ retl = tcg_temp_new_i64();
+ reth = tcg_temp_new_i64();
+ op->args[pi++] = tcgv_i64_arg(reth);
+ op->args[pi++] = tcgv_i64_arg(retl);
+ nb_rets = 2;
+ } else {
+ op->args[pi++] = temp_arg(ret);
+ nb_rets = 1;
+ }
+#else
+ if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
+#ifdef HOST_WORDS_BIGENDIAN
+ op->args[pi++] = temp_arg(ret + 1);
+ op->args[pi++] = temp_arg(ret);
+#else
+ op->args[pi++] = temp_arg(ret);
+ op->args[pi++] = temp_arg(ret + 1);
+#endif
+ nb_rets = 2;
+ } else {
+ op->args[pi++] = temp_arg(ret);
+ nb_rets = 1;
+ }
+#endif
+ } else {
+ nb_rets = 0;
+ }
+ TCGOP_CALLO(op) = nb_rets;
+
+ real_args = 0;
+ for (i = 0; i < nargs; i++) {
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+ bool want_align = false;
+
+#if defined(CONFIG_TCG_INTERPRETER)
+ /*
+ * Align all arguments, so that they land in predictable places
+ * for passing off to ffi_call.
+ */
+ want_align = true;
+#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
+ /* Some targets want aligned 64 bit args */
+ want_align = is_64bit;
+#endif
+
+ if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
+ op->args[pi++] = TCG_CALL_DUMMY_ARG;
+ real_args++;
+ }
+
+ if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
+ /*
+ * If stack grows up, then we will be placing successive
+ * arguments at lower addresses, which means we need to
+ * reverse the order compared to how we would normally
+ * treat either big or little-endian. For those arguments
+ * that will wind up in registers, this still works for
+ * HPPA (the only current STACK_GROWSUP target) since the
+ * argument registers are *also* allocated in decreasing
+ * order. If another such target is added, this logic may
+ * have to get more complicated to differentiate between
+ * stack arguments and register arguments.
+ */
+#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
+ op->args[pi++] = temp_arg(args[i] + 1);
+ op->args[pi++] = temp_arg(args[i]);
+#else
+ op->args[pi++] = temp_arg(args[i]);
+ op->args[pi++] = temp_arg(args[i] + 1);
+#endif
+ real_args += 2;
+ continue;
+ }
+
+ op->args[pi++] = temp_arg(args[i]);
+ real_args++;
+ }
+ op->args[pi++] = (uintptr_t)func;
+ op->args[pi++] = (uintptr_t)info;
+ TCGOP_CALLI(op) = real_args;
+
+ /* Make sure the fields didn't overflow. */
+ tcg_debug_assert(TCGOP_CALLI(op) == real_args);
+ tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
+
+#if defined(__sparc__) && !defined(__arch64__) \
+ && !defined(CONFIG_TCG_INTERPRETER)
+ /* Free all of the parts we allocated above. */
+ for (i = real_args = 0; i < orig_nargs; ++i) {
+ int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
+ bool is_64bit = (argtype & ~1) == dh_typecode_i64;
+
+ if (is_64bit) {
+ tcg_temp_free_internal(args[real_args++]);
+ tcg_temp_free_internal(args[real_args++]);
+ } else {
+ real_args++;
+ }
+ }
+ if ((orig_typemask & 6) == dh_typecode_i64) {
+ /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
+ Note that describing these as TCGv_i64 eliminates an unnecessary
+ zero-extension that tcg_gen_concat_i32_i64 would create. */
+ tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
+ tcg_temp_free_i64(retl);
+ tcg_temp_free_i64(reth);
+ }
+#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
+ for (i = 0; i < nargs; ++i) {
+ int argtype = extract32(typemask, (i + 1) * 3, 3);
+ bool is_32bit = (argtype & ~1) == dh_typecode_i32;
+
+ if (is_32bit) {
+ tcg_temp_free_internal(args[i]);
+ }
+ }
+#endif /* TCG_TARGET_EXTEND_ARGS */
+}
+
+static void tcg_reg_alloc_start(TCGContext *s)
+{
+ int i, n;
+
+ for (i = 0, n = s->nb_temps; i < n; i++) {
+ TCGTemp *ts = &s->temps[i];
+ TCGTempVal val = TEMP_VAL_MEM;
+
+ switch (ts->kind) {
+ case TEMP_CONST:
+ val = TEMP_VAL_CONST;
+ break;
+ case TEMP_FIXED:
+ val = TEMP_VAL_REG;
+ break;
+ case TEMP_GLOBAL:
+ break;
+ case TEMP_NORMAL:
+ val = TEMP_VAL_DEAD;
+ /* fall through */
+ case TEMP_LOCAL:
+ ts->mem_allocated = 0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ts->val_type = val;
+ }
+
+ memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
+}
+
+static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
+ TCGTemp *ts)
+{
+ int idx = temp_idx(ts);
+
+ switch (ts->kind) {
+ case TEMP_FIXED:
+ case TEMP_GLOBAL:
+ pstrcpy(buf, buf_size, ts->name);
+ break;
+ case TEMP_LOCAL:
+ snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
+ break;
+ case TEMP_NORMAL:
+ snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
+ break;
+ case TEMP_CONST:
+ switch (ts->type) {
+ case TCG_TYPE_I32:
+ snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
+ break;
+#if TCG_TARGET_REG_BITS > 32
+ case TCG_TYPE_I64:
+ snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
+ break;
+#endif
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ snprintf(buf, buf_size, "v%d$0x%" PRIx64,
+ 64 << (ts->type - TCG_TYPE_V64), ts->val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+ }
+ return buf;
+}
+
+static char *tcg_get_arg_str(TCGContext *s, char *buf,
+ int buf_size, TCGArg arg)
+{
+ return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
+}
+
+static const char * const cond_name[] =
+{
+ [TCG_COND_NEVER] = "never",
+ [TCG_COND_ALWAYS] = "always",
+ [TCG_COND_EQ] = "eq",
+ [TCG_COND_NE] = "ne",
+ [TCG_COND_LT] = "lt",
+ [TCG_COND_GE] = "ge",
+ [TCG_COND_LE] = "le",
+ [TCG_COND_GT] = "gt",
+ [TCG_COND_LTU] = "ltu",
+ [TCG_COND_GEU] = "geu",
+ [TCG_COND_LEU] = "leu",
+ [TCG_COND_GTU] = "gtu"
+};
+
+static const char * const ldst_name[] =
+{
+ [MO_UB] = "ub",
+ [MO_SB] = "sb",
+ [MO_LEUW] = "leuw",
+ [MO_LESW] = "lesw",
+ [MO_LEUL] = "leul",
+ [MO_LESL] = "lesl",
+ [MO_LEQ] = "leq",
+ [MO_BEUW] = "beuw",
+ [MO_BESW] = "besw",
+ [MO_BEUL] = "beul",
+ [MO_BESL] = "besl",
+ [MO_BEQ] = "beq",
+};
+
+static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
+#ifdef TARGET_ALIGNED_ONLY
+ [MO_UNALN >> MO_ASHIFT] = "un+",
+ [MO_ALIGN >> MO_ASHIFT] = "",
+#else
+ [MO_UNALN >> MO_ASHIFT] = "",
+ [MO_ALIGN >> MO_ASHIFT] = "al+",
+#endif
+ [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
+ [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
+ [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
+ [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
+ [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
+ [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
+};
+
+static const char bswap_flag_name[][6] = {
+ [TCG_BSWAP_IZ] = "iz",
+ [TCG_BSWAP_OZ] = "oz",
+ [TCG_BSWAP_OS] = "os",
+ [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
+ [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
+};
+
+static inline bool tcg_regset_single(TCGRegSet d)
+{
+ return (d & (d - 1)) == 0;
+}
+
+static inline TCGReg tcg_regset_first(TCGRegSet d)
+{
+ if (TCG_TARGET_NB_REGS <= 32) {
+ return ctz32(d);
+ } else {
+ return ctz64(d);
+ }
+}
+
+static void tcg_dump_ops(TCGContext *s, bool have_prefs)
+{
+ char buf[128];
+ TCGOp *op;
+
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ int i, k, nb_oargs, nb_iargs, nb_cargs;
+ const TCGOpDef *def;
+ TCGOpcode c;
+ int col = 0;
+
+ c = op->opc;
+ def = &tcg_op_defs[c];
+
+ if (c == INDEX_op_insn_start) {
+ nb_oargs = 0;
+ col += qemu_log("\n ----");
+
+ for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+ target_ulong a;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
+#else
+ a = op->args[i];
+#endif
+ col += qemu_log(" " TARGET_FMT_lx, a);
+ }
+ } else if (c == INDEX_op_call) {
+ const TCGHelperInfo *info = tcg_call_info(op);
+ void *func = tcg_call_func(op);
+
+ /* variable number of arguments */
+ nb_oargs = TCGOP_CALLO(op);
+ nb_iargs = TCGOP_CALLI(op);
+ nb_cargs = def->nb_cargs;
+
+ col += qemu_log(" %s ", def->name);
+
+ /*
+ * Print the function name from TCGHelperInfo, if available.
+ * Note that plugins have a template function for the info,
+ * but the actual function pointer comes from the plugin.
+ */
+ if (func == info->func) {
+ col += qemu_log("%s", info->name);
+ } else {
+ col += qemu_log("plugin(%p)", func);
+ }
+
+ col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
+ for (i = 0; i < nb_oargs; i++) {
+ col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
+ op->args[i]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ TCGArg arg = op->args[nb_oargs + i];
+ const char *t = "<dummy>";
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
+ }
+ col += qemu_log(",%s", t);
+ }
+ } else {
+ col += qemu_log(" %s ", def->name);
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+
+ if (def->flags & TCG_OPF_VECTOR) {
+ col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
+ 8 << TCGOP_VECE(op));
+ }
+
+ k = 0;
+ for (i = 0; i < nb_oargs; i++) {
+ if (k != 0) {
+ col += qemu_log(",");
+ }
+ col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
+ op->args[k++]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ if (k != 0) {
+ col += qemu_log(",");
+ }
+ col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
+ op->args[k++]));
+ }
+ switch (c) {
+ case INDEX_op_brcond_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
+ case INDEX_op_brcond2_i32:
+ case INDEX_op_setcond2_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
+ case INDEX_op_cmp_vec:
+ case INDEX_op_cmpsel_vec:
+ if (op->args[k] < ARRAY_SIZE(cond_name)
+ && cond_name[op->args[k]]) {
+ col += qemu_log(",%s", cond_name[op->args[k++]]);
+ } else {
+ col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
+ }
+ i = 1;
+ break;
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st8_i32:
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ {
+ MemOpIdx oi = op->args[k++];
+ MemOp op = get_memop(oi);
+ unsigned ix = get_mmuidx(oi);
+
+ if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
+ col += qemu_log(",$0x%x,%u", op, ix);
+ } else {
+ const char *s_al, *s_op;
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
+ col += qemu_log(",%s%s,%u", s_al, s_op, ix);
+ }
+ i = 1;
+ }
+ break;
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ {
+ TCGArg flags = op->args[k];
+ const char *name = NULL;
+
+ if (flags < ARRAY_SIZE(bswap_flag_name)) {
+ name = bswap_flag_name[flags];
+ }
+ if (name) {
+ col += qemu_log(",%s", name);
+ } else {
+ col += qemu_log(",$0x%" TCG_PRIlx, flags);
+ }
+ i = k = 1;
+ }
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ switch (c) {
+ case INDEX_op_set_label:
+ case INDEX_op_br:
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_brcond2_i32:
+ col += qemu_log("%s$L%d", k ? "," : "",
+ arg_label(op->args[k])->id);
+ i++, k++;
+ break;
+ default:
+ break;
+ }
+ for (; i < nb_cargs; i++, k++) {
+ col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
+ }
+ }
+
+ if (have_prefs || op->life) {
+
+ QemuLogFile *logfile;
+
+ rcu_read_lock();
+ logfile = qatomic_rcu_read(&qemu_logfile);
+ if (logfile) {
+ for (; col < 40; ++col) {
+ putc(' ', logfile->fd);
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ if (op->life) {
+ unsigned life = op->life;
+
+ if (life & (SYNC_ARG * 3)) {
+ qemu_log(" sync:");
+ for (i = 0; i < 2; ++i) {
+ if (life & (SYNC_ARG << i)) {
+ qemu_log(" %d", i);
+ }
+ }
+ }
+ life /= DEAD_ARG;
+ if (life) {
+ qemu_log(" dead:");
+ for (i = 0; life; ++i, life >>= 1) {
+ if (life & 1) {
+ qemu_log(" %d", i);
+ }
+ }
+ }
+ }
+
+ if (have_prefs) {
+ for (i = 0; i < nb_oargs; ++i) {
+ TCGRegSet set = op->output_pref[i];
+
+ if (i == 0) {
+ qemu_log(" pref=");
+ } else {
+ qemu_log(",");
+ }
+ if (set == 0) {
+ qemu_log("none");
+ } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
+ qemu_log("all");
+#ifdef CONFIG_DEBUG_TCG
+ } else if (tcg_regset_single(set)) {
+ TCGReg reg = tcg_regset_first(set);
+ qemu_log("%s", tcg_target_reg_names[reg]);
+#endif
+ } else if (TCG_TARGET_NB_REGS <= 32) {
+ qemu_log("%#x", (uint32_t)set);
+ } else {
+ qemu_log("%#" PRIx64, (uint64_t)set);
+ }
+ }
+ }
+
+ qemu_log("\n");
+ }
+}
+
+/* we give more priority to constraints with less registers */
+static int get_constraint_priority(const TCGOpDef *def, int k)
+{
+ const TCGArgConstraint *arg_ct = &def->args_ct[k];
+ int n;
+
+ if (arg_ct->oalias) {
+ /* an alias is equivalent to a single register */
+ n = 1;
+ } else {
+ n = ctpop64(arg_ct->regs);
+ }
+ return TCG_TARGET_NB_REGS - n + 1;
+}
+
+/* sort from highest priority to lowest */
+static void sort_constraints(TCGOpDef *def, int start, int n)
+{
+ int i, j;
+ TCGArgConstraint *a = def->args_ct;
+
+ for (i = 0; i < n; i++) {
+ a[start + i].sort_index = start + i;
+ }
+ if (n <= 1) {
+ return;
+ }
+ for (i = 0; i < n - 1; i++) {
+ for (j = i + 1; j < n; j++) {
+ int p1 = get_constraint_priority(def, a[start + i].sort_index);
+ int p2 = get_constraint_priority(def, a[start + j].sort_index);
+ if (p1 < p2) {
+ int tmp = a[start + i].sort_index;
+ a[start + i].sort_index = a[start + j].sort_index;
+ a[start + j].sort_index = tmp;
+ }
+ }
+ }
+}
+
+static void process_op_defs(TCGContext *s)
+{
+ TCGOpcode op;
+
+ for (op = 0; op < NB_OPS; op++) {
+ TCGOpDef *def = &tcg_op_defs[op];
+ const TCGTargetOpDef *tdefs;
+ int i, nb_args;
+
+ if (def->flags & TCG_OPF_NOT_PRESENT) {
+ continue;
+ }
+
+ nb_args = def->nb_iargs + def->nb_oargs;
+ if (nb_args == 0) {
+ continue;
+ }
+
+ /*
+ * Macro magic should make it impossible, but double-check that
+ * the array index is in range. Since the signness of an enum
+ * is implementation defined, force the result to unsigned.
+ */
+ unsigned con_set = tcg_target_op_def(op);
+ tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
+ tdefs = &constraint_sets[con_set];
+
+ for (i = 0; i < nb_args; i++) {
+ const char *ct_str = tdefs->args_ct_str[i];
+ /* Incomplete TCGTargetOpDef entry. */
+ tcg_debug_assert(ct_str != NULL);
+
+ while (*ct_str != '\0') {
+ switch(*ct_str) {
+ case '0' ... '9':
+ {
+ int oarg = *ct_str - '0';
+ tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
+ tcg_debug_assert(oarg < def->nb_oargs);
+ tcg_debug_assert(def->args_ct[oarg].regs != 0);
+ def->args_ct[i] = def->args_ct[oarg];
+ /* The output sets oalias. */
+ def->args_ct[oarg].oalias = true;
+ def->args_ct[oarg].alias_index = i;
+ /* The input sets ialias. */
+ def->args_ct[i].ialias = true;
+ def->args_ct[i].alias_index = oarg;
+ }
+ ct_str++;
+ break;
+ case '&':
+ def->args_ct[i].newreg = true;
+ ct_str++;
+ break;
+ case 'i':
+ def->args_ct[i].ct |= TCG_CT_CONST;
+ ct_str++;
+ break;
+
+ /* Include all of the target-specific constraints. */
+
+#undef CONST
+#define CONST(CASE, MASK) \
+ case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
+#define REGS(CASE, MASK) \
+ case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
+
+#include "tcg-target-con-str.h"
+
+#undef REGS
+#undef CONST
+ default:
+ /* Typo in TCGTargetOpDef constraint. */
+ g_assert_not_reached();
+ }
+ }
+ }
+
+ /* TCGTargetOpDef entry with too much information? */
+ tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
+
+ /* sort the constraints (XXX: this is just an heuristic) */
+ sort_constraints(def, 0, def->nb_oargs);
+ sort_constraints(def, def->nb_oargs, def->nb_iargs);
+ }
+}
+
+void tcg_op_remove(TCGContext *s, TCGOp *op)
+{
+ TCGLabel *label;
+
+ switch (op->opc) {
+ case INDEX_op_br:
+ label = arg_label(op->args[0]);
+ label->refs--;
+ break;
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ label = arg_label(op->args[3]);
+ label->refs--;
+ break;
+ case INDEX_op_brcond2_i32:
+ label = arg_label(op->args[5]);
+ label->refs--;
+ break;
+ default:
+ break;
+ }
+
+ QTAILQ_REMOVE(&s->ops, op, link);
+ QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
+ s->nb_ops--;
+
+#ifdef CONFIG_PROFILER
+ qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
+#endif
+}
+
+void tcg_remove_ops_after(TCGOp *op)
+{
+ TCGContext *s = tcg_ctx;
+
+ while (true) {
+ TCGOp *last = tcg_last_op();
+ if (last == op) {
+ return;
+ }
+ tcg_op_remove(s, last);
+ }
+}
+
+static TCGOp *tcg_op_alloc(TCGOpcode opc)
+{
+ TCGContext *s = tcg_ctx;
+ TCGOp *op;
+
+ if (likely(QTAILQ_EMPTY(&s->free_ops))) {
+ op = tcg_malloc(sizeof(TCGOp));
+ } else {
+ op = QTAILQ_FIRST(&s->free_ops);
+ QTAILQ_REMOVE(&s->free_ops, op, link);
+ }
+ memset(op, 0, offsetof(TCGOp, link));
+ op->opc = opc;
+ s->nb_ops++;
+
+ return op;
+}
+
+TCGOp *tcg_emit_op(TCGOpcode opc)
+{
+ TCGOp *op = tcg_op_alloc(opc);
+ QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
+ return op;
+}
+
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
+{
+ TCGOp *new_op = tcg_op_alloc(opc);
+ QTAILQ_INSERT_BEFORE(old_op, new_op, link);
+ return new_op;
+}
+
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
+{
+ TCGOp *new_op = tcg_op_alloc(opc);
+ QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
+ return new_op;
+}
+
+/* Reachable analysis : remove unreachable code. */
+static void reachable_code_pass(TCGContext *s)
+{
+ TCGOp *op, *op_next;
+ bool dead = false;
+
+ QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+ bool remove = dead;
+ TCGLabel *label;
+
+ switch (op->opc) {
+ case INDEX_op_set_label:
+ label = arg_label(op->args[0]);
+ if (label->refs == 0) {
+ /*
+ * While there is an occasional backward branch, virtually
+ * all branches generated by the translators are forward.
+ * Which means that generally we will have already removed
+ * all references to the label that will be, and there is
+ * little to be gained by iterating.
+ */
+ remove = true;
+ } else {
+ /* Once we see a label, insns become live again. */
+ dead = false;
+ remove = false;
+
+ /*
+ * Optimization can fold conditional branches to unconditional.
+ * If we find a label with one reference which is preceded by
+ * an unconditional branch to it, remove both. This needed to
+ * wait until the dead code in between them was removed.
+ */
+ if (label->refs == 1) {
+ TCGOp *op_prev = QTAILQ_PREV(op, link);
+ if (op_prev->opc == INDEX_op_br &&
+ label == arg_label(op_prev->args[0])) {
+ tcg_op_remove(s, op_prev);
+ remove = true;
+ }
+ }
+ }
+ break;
+
+ case INDEX_op_br:
+ case INDEX_op_exit_tb:
+ case INDEX_op_goto_ptr:
+ /* Unconditional branches; everything following is dead. */
+ dead = true;
+ break;
+
+ case INDEX_op_call:
+ /* Notice noreturn helper calls, raising exceptions. */
+ if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
+ dead = true;
+ }
+ break;
+
+ case INDEX_op_insn_start:
+ /* Never remove -- we need to keep these for unwind. */
+ remove = false;
+ break;
+
+ default:
+ break;
+ }
+
+ if (remove) {
+ tcg_op_remove(s, op);
+ }
+ }
+}
+
+#define TS_DEAD 1
+#define TS_MEM 2
+
+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
+
+/* For liveness_pass_1, the register preferences for a given temp. */
+static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
+{
+ return ts->state_ptr;
+}
+
+/* For liveness_pass_1, reset the preferences for a given temp to the
+ * maximal regset for its type.
+ */
+static inline void la_reset_pref(TCGTemp *ts)
+{
+ *la_temp_pref(ts)
+ = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
+}
+
+/* liveness analysis: end of function: all temps are dead, and globals
+ should be in memory. */
+static void la_func_end(TCGContext *s, int ng, int nt)
+{
+ int i;
+
+ for (i = 0; i < ng; ++i) {
+ s->temps[i].state = TS_DEAD | TS_MEM;
+ la_reset_pref(&s->temps[i]);
+ }
+ for (i = ng; i < nt; ++i) {
+ s->temps[i].state = TS_DEAD;
+ la_reset_pref(&s->temps[i]);
+ }
+}
+
+/* liveness analysis: end of basic block: all temps are dead, globals
+ and local temps should be in memory. */
+static void la_bb_end(TCGContext *s, int ng, int nt)
+{
+ int i;
+
+ for (i = 0; i < nt; ++i) {
+ TCGTemp *ts = &s->temps[i];
+ int state;
+
+ switch (ts->kind) {
+ case TEMP_FIXED:
+ case TEMP_GLOBAL:
+ case TEMP_LOCAL:
+ state = TS_DEAD | TS_MEM;
+ break;
+ case TEMP_NORMAL:
+ case TEMP_CONST:
+ state = TS_DEAD;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ts->state = state;
+ la_reset_pref(ts);
+ }
+}
+
+/* liveness analysis: sync globals back to memory. */
+static void la_global_sync(TCGContext *s, int ng)
+{
+ int i;
+
+ for (i = 0; i < ng; ++i) {
+ int state = s->temps[i].state;
+ s->temps[i].state = state | TS_MEM;
+ if (state == TS_DEAD) {
+ /* If the global was previously dead, reset prefs. */
+ la_reset_pref(&s->temps[i]);
+ }
+ }
+}
+
+/*
+ * liveness analysis: conditional branch: all temps are dead,
+ * globals and local temps should be synced.
+ */
+static void la_bb_sync(TCGContext *s, int ng, int nt)
+{
+ la_global_sync(s, ng);
+
+ for (int i = ng; i < nt; ++i) {
+ TCGTemp *ts = &s->temps[i];
+ int state;
+
+ switch (ts->kind) {
+ case TEMP_LOCAL:
+ state = ts->state;
+ ts->state = state | TS_MEM;
+ if (state != TS_DEAD) {
+ continue;
+ }
+ break;
+ case TEMP_NORMAL:
+ s->temps[i].state = TS_DEAD;
+ break;
+ case TEMP_CONST:
+ continue;
+ default:
+ g_assert_not_reached();
+ }
+ la_reset_pref(&s->temps[i]);
+ }
+}
+
+/* liveness analysis: sync globals back to memory and kill. */
+static void la_global_kill(TCGContext *s, int ng)
+{
+ int i;
+
+ for (i = 0; i < ng; i++) {
+ s->temps[i].state = TS_DEAD | TS_MEM;
+ la_reset_pref(&s->temps[i]);
+ }
+}
+
+/* liveness analysis: note live globals crossing calls. */
+static void la_cross_call(TCGContext *s, int nt)
+{
+ TCGRegSet mask = ~tcg_target_call_clobber_regs;
+ int i;
+
+ for (i = 0; i < nt; i++) {
+ TCGTemp *ts = &s->temps[i];
+ if (!(ts->state & TS_DEAD)) {
+ TCGRegSet *pset = la_temp_pref(ts);
+ TCGRegSet set = *pset;
+
+ set &= mask;
+ /* If the combination is not possible, restart. */
+ if (set == 0) {
+ set = tcg_target_available_regs[ts->type] & mask;
+ }
+ *pset = set;
+ }
+ }
+}
+
+/* Liveness analysis : update the opc_arg_life array to tell if a
+ given input arguments is dead. Instructions updating dead
+ temporaries are removed. */
+static void liveness_pass_1(TCGContext *s)
+{
+ int nb_globals = s->nb_globals;
+ int nb_temps = s->nb_temps;
+ TCGOp *op, *op_prev;
+ TCGRegSet *prefs;
+ int i;
+
+ prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
+ for (i = 0; i < nb_temps; ++i) {
+ s->temps[i].state_ptr = prefs + i;
+ }
+
+ /* ??? Should be redundant with the exit_tb that ends the TB. */
+ la_func_end(s, nb_globals, nb_temps);
+
+ QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
+ int nb_iargs, nb_oargs;
+ TCGOpcode opc_new, opc_new2;
+ bool have_opc_new2;
+ TCGLifeData arg_life = 0;
+ TCGTemp *ts;
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+
+ switch (opc) {
+ case INDEX_op_call:
+ {
+ int call_flags;
+ int nb_call_regs;
+
+ nb_oargs = TCGOP_CALLO(op);
+ nb_iargs = TCGOP_CALLI(op);
+ call_flags = tcg_call_flags(op);
+
+ /* pure functions can be removed if their result is unused */
+ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
+ for (i = 0; i < nb_oargs; i++) {
+ ts = arg_temp(op->args[i]);
+ if (ts->state != TS_DEAD) {
+ goto do_not_remove_call;
+ }
+ }
+ goto do_remove;
+ }
+ do_not_remove_call:
+
+ /* Output args are dead. */
+ for (i = 0; i < nb_oargs; i++) {
+ ts = arg_temp(op->args[i]);
+ if (ts->state & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
+ }
+ if (ts->state & TS_MEM) {
+ arg_life |= SYNC_ARG << i;
+ }
+ ts->state = TS_DEAD;
+ la_reset_pref(ts);
+
+ /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
+ op->output_pref[i] = 0;
+ }
+
+ if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+ TCG_CALL_NO_READ_GLOBALS))) {
+ la_global_kill(s, nb_globals);
+ } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ la_global_sync(s, nb_globals);
+ }
+
+ /* Record arguments that die in this helper. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ ts = arg_temp(op->args[i]);
+ if (ts && ts->state & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
+ }
+ }
+
+ /* For all live registers, remove call-clobbered prefs. */
+ la_cross_call(s, nb_temps);
+
+ nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
+
+ /* Input arguments are live for preceding opcodes. */
+ for (i = 0; i < nb_iargs; i++) {
+ ts = arg_temp(op->args[i + nb_oargs]);
+ if (ts && ts->state & TS_DEAD) {
+ /* For those arguments that die, and will be allocated
+ * in registers, clear the register set for that arg,
+ * to be filled in below. For args that will be on
+ * the stack, reset to any available reg.
+ */
+ *la_temp_pref(ts)
+ = (i < nb_call_regs ? 0 :
+ tcg_target_available_regs[ts->type]);
+ ts->state &= ~TS_DEAD;
+ }
+ }
+
+ /* For each input argument, add its input register to prefs.
+ If a temp is used once, this produces a single set bit. */
+ for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
+ ts = arg_temp(op->args[i + nb_oargs]);
+ if (ts) {
+ tcg_regset_set_reg(*la_temp_pref(ts),
+ tcg_target_call_iarg_regs[i]);
+ }
+ }
+ }
+ break;
+ case INDEX_op_insn_start:
+ break;
+ case INDEX_op_discard:
+ /* mark the temporary as dead */
+ ts = arg_temp(op->args[0]);
+ ts->state = TS_DEAD;
+ la_reset_pref(ts);
+ break;
+
+ case INDEX_op_add2_i32:
+ opc_new = INDEX_op_add_i32;
+ goto do_addsub2;
+ case INDEX_op_sub2_i32:
+ opc_new = INDEX_op_sub_i32;
+ goto do_addsub2;
+ case INDEX_op_add2_i64:
+ opc_new = INDEX_op_add_i64;
+ goto do_addsub2;
+ case INDEX_op_sub2_i64:
+ opc_new = INDEX_op_sub_i64;
+ do_addsub2:
+ nb_iargs = 4;
+ nb_oargs = 2;
+ /* Test if the high part of the operation is dead, but not
+ the low part. The result can be optimized to a simple
+ add or sub. This happens often for x86_64 guest when the
+ cpu mode is set to 32 bit. */
+ if (arg_temp(op->args[1])->state == TS_DEAD) {
+ if (arg_temp(op->args[0])->state == TS_DEAD) {
+ goto do_remove;
+ }
+ /* Replace the opcode and adjust the args in place,
+ leaving 3 unused args at the end. */
+ op->opc = opc = opc_new;
+ op->args[1] = op->args[2];
+ op->args[2] = op->args[4];
+ /* Fall through and mark the single-word operation live. */
+ nb_iargs = 2;
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
+ case INDEX_op_mulu2_i32:
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_muluh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
+ goto do_mul2;
+ case INDEX_op_muls2_i32:
+ opc_new = INDEX_op_mul_i32;
+ opc_new2 = INDEX_op_mulsh_i32;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
+ goto do_mul2;
+ case INDEX_op_mulu2_i64:
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_muluh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
+ goto do_mul2;
+ case INDEX_op_muls2_i64:
+ opc_new = INDEX_op_mul_i64;
+ opc_new2 = INDEX_op_mulsh_i64;
+ have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
+ goto do_mul2;
+ do_mul2:
+ nb_iargs = 2;
+ nb_oargs = 2;
+ if (arg_temp(op->args[1])->state == TS_DEAD) {
+ if (arg_temp(op->args[0])->state == TS_DEAD) {
+ /* Both parts of the operation are dead. */
+ goto do_remove;
+ }
+ /* The high part of the operation is dead; generate the low. */
+ op->opc = opc = opc_new;
+ op->args[1] = op->args[2];
+ op->args[2] = op->args[3];
+ } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
+ /* The low part of the operation is dead; generate the high. */
+ op->opc = opc = opc_new2;
+ op->args[0] = op->args[1];
+ op->args[1] = op->args[2];
+ op->args[2] = op->args[3];
+ } else {
+ goto do_not_remove;
+ }
+ /* Mark the single-word operation live. */
+ nb_oargs = 1;
+ goto do_not_remove;
+
+ default:
+ /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Test if the operation can be removed because all
+ its outputs are dead. We assume that nb_oargs == 0
+ implies side effects */
+ if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
+ for (i = 0; i < nb_oargs; i++) {
+ if (arg_temp(op->args[i])->state != TS_DEAD) {
+ goto do_not_remove;
+ }
+ }
+ goto do_remove;
+ }
+ goto do_not_remove;
+
+ do_remove:
+ tcg_op_remove(s, op);
+ break;
+
+ do_not_remove:
+ for (i = 0; i < nb_oargs; i++) {
+ ts = arg_temp(op->args[i]);
+
+ /* Remember the preference of the uses that followed. */
+ op->output_pref[i] = *la_temp_pref(ts);
+
+ /* Output args are dead. */
+ if (ts->state & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
+ }
+ if (ts->state & TS_MEM) {
+ arg_life |= SYNC_ARG << i;
+ }
+ ts->state = TS_DEAD;
+ la_reset_pref(ts);
+ }
+
+ /* If end of basic block, update. */
+ if (def->flags & TCG_OPF_BB_EXIT) {
+ la_func_end(s, nb_globals, nb_temps);
+ } else if (def->flags & TCG_OPF_COND_BRANCH) {
+ la_bb_sync(s, nb_globals, nb_temps);
+ } else if (def->flags & TCG_OPF_BB_END) {
+ la_bb_end(s, nb_globals, nb_temps);
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ la_global_sync(s, nb_globals);
+ if (def->flags & TCG_OPF_CALL_CLOBBER) {
+ la_cross_call(s, nb_temps);
+ }
+ }
+
+ /* Record arguments that die in this opcode. */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ ts = arg_temp(op->args[i]);
+ if (ts->state & TS_DEAD) {
+ arg_life |= DEAD_ARG << i;
+ }
+ }
+
+ /* Input arguments are live for preceding opcodes. */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ ts = arg_temp(op->args[i]);
+ if (ts->state & TS_DEAD) {
+ /* For operands that were dead, initially allow
+ all regs for the type. */
+ *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
+ ts->state &= ~TS_DEAD;
+ }
+ }
+
+ /* Incorporate constraints for this operand. */
+ switch (opc) {
+ case INDEX_op_mov_i32:
+ case INDEX_op_mov_i64:
+ /* Note that these are TCG_OPF_NOT_PRESENT and do not
+ have proper constraints. That said, special case
+ moves to propagate preferences backward. */
+ if (IS_DEAD_ARG(1)) {
+ *la_temp_pref(arg_temp(op->args[0]))
+ = *la_temp_pref(arg_temp(op->args[1]));
+ }
+ break;
+
+ default:
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ const TCGArgConstraint *ct = &def->args_ct[i];
+ TCGRegSet set, *pset;
+
+ ts = arg_temp(op->args[i]);
+ pset = la_temp_pref(ts);
+ set = *pset;
+
+ set &= ct->regs;
+ if (ct->ialias) {
+ set &= op->output_pref[ct->alias_index];
+ }
+ /* If the combination is not possible, restart. */
+ if (set == 0) {
+ set = ct->regs;
+ }
+ *pset = set;
+ }
+ break;
+ }
+ break;
+ }
+ op->life = arg_life;
+ }
+}
+
+/* Liveness analysis: Convert indirect regs to direct temporaries. */
+static bool liveness_pass_2(TCGContext *s)
+{
+ int nb_globals = s->nb_globals;
+ int nb_temps, i;
+ bool changes = false;
+ TCGOp *op, *op_next;
+
+ /* Create a temporary for each indirect global. */
+ for (i = 0; i < nb_globals; ++i) {
+ TCGTemp *its = &s->temps[i];
+ if (its->indirect_reg) {
+ TCGTemp *dts = tcg_temp_alloc(s);
+ dts->type = its->type;
+ dts->base_type = its->base_type;
+ its->state_ptr = dts;
+ } else {
+ its->state_ptr = NULL;
+ }
+ /* All globals begin dead. */
+ its->state = TS_DEAD;
+ }
+ for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
+ TCGTemp *its = &s->temps[i];
+ its->state_ptr = NULL;
+ its->state = TS_DEAD;
+ }
+
+ QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ TCGLifeData arg_life = op->life;
+ int nb_iargs, nb_oargs, call_flags;
+ TCGTemp *arg_ts, *dir_ts;
+
+ if (opc == INDEX_op_call) {
+ nb_oargs = TCGOP_CALLO(op);
+ nb_iargs = TCGOP_CALLI(op);
+ call_flags = tcg_call_flags(op);
+ } else {
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Set flags similar to how calls require. */
+ if (def->flags & TCG_OPF_COND_BRANCH) {
+ /* Like reading globals: sync_globals */
+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+ } else if (def->flags & TCG_OPF_BB_END) {
+ /* Like writing globals: save_globals */
+ call_flags = 0;
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* Like reading globals: sync_globals */
+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+ } else {
+ /* No effect on globals. */
+ call_flags = (TCG_CALL_NO_READ_GLOBALS |
+ TCG_CALL_NO_WRITE_GLOBALS);
+ }
+ }
+
+ /* Make sure that input arguments are available. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg_ts = arg_temp(op->args[i]);
+ if (arg_ts) {
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts && arg_ts->state == TS_DEAD) {
+ TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
+ ? INDEX_op_ld_i32
+ : INDEX_op_ld_i64);
+ TCGOp *lop = tcg_op_insert_before(s, op, lopc);
+
+ lop->args[0] = temp_arg(dir_ts);
+ lop->args[1] = temp_arg(arg_ts->mem_base);
+ lop->args[2] = arg_ts->mem_offset;
+
+ /* Loaded, but synced with memory. */
+ arg_ts->state = TS_MEM;
+ }
+ }
+ }
+
+ /* Perform input replacement, and mark inputs that became dead.
+ No action is required except keeping temp_state up to date
+ so that we reload when needed. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg_ts = arg_temp(op->args[i]);
+ if (arg_ts) {
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts) {
+ op->args[i] = temp_arg(dir_ts);
+ changes = true;
+ if (IS_DEAD_ARG(i)) {
+ arg_ts->state = TS_DEAD;
+ }
+ }
+ }
+ }
+
+ /* Liveness analysis should ensure that the following are
+ all correct, for call sites and basic block end points. */
+ if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are synced back,
+ that is, either TS_DEAD or TS_MEM. */
+ arg_ts = &s->temps[i];
+ tcg_debug_assert(arg_ts->state_ptr == 0
+ || arg_ts->state != 0);
+ }
+ } else {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are saved back,
+ that is, TS_DEAD, waiting to be reloaded. */
+ arg_ts = &s->temps[i];
+ tcg_debug_assert(arg_ts->state_ptr == 0
+ || arg_ts->state == TS_DEAD);
+ }
+ }
+
+ /* Outputs become available. */
+ if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
+ arg_ts = arg_temp(op->args[0]);
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts) {
+ op->args[0] = temp_arg(dir_ts);
+ changes = true;
+
+ /* The output is now live and modified. */
+ arg_ts->state = 0;
+
+ if (NEED_SYNC_ARG(0)) {
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+ TCGTemp *out_ts = dir_ts;
+
+ if (IS_DEAD_ARG(0)) {
+ out_ts = arg_temp(op->args[1]);
+ arg_ts->state = TS_DEAD;
+ tcg_op_remove(s, op);
+ } else {
+ arg_ts->state = TS_MEM;
+ }
+
+ sop->args[0] = temp_arg(out_ts);
+ sop->args[1] = temp_arg(arg_ts->mem_base);
+ sop->args[2] = arg_ts->mem_offset;
+ } else {
+ tcg_debug_assert(!IS_DEAD_ARG(0));
+ }
+ }
+ } else {
+ for (i = 0; i < nb_oargs; i++) {
+ arg_ts = arg_temp(op->args[i]);
+ dir_ts = arg_ts->state_ptr;
+ if (!dir_ts) {
+ continue;
+ }
+ op->args[i] = temp_arg(dir_ts);
+ changes = true;
+
+ /* The output is now live and modified. */
+ arg_ts->state = 0;
+
+ /* Sync outputs upon their last write. */
+ if (NEED_SYNC_ARG(i)) {
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc);
+
+ sop->args[0] = temp_arg(dir_ts);
+ sop->args[1] = temp_arg(arg_ts->mem_base);
+ sop->args[2] = arg_ts->mem_offset;
+
+ arg_ts->state = TS_MEM;
+ }
+ /* Drop outputs that are dead. */
+ if (IS_DEAD_ARG(i)) {
+ arg_ts->state = TS_DEAD;
+ }
+ }
+ }
+ }
+
+ return changes;
+}
+
+#ifdef CONFIG_DEBUG_TCG
+static void dump_regs(TCGContext *s)
+{
+ TCGTemp *ts;
+ int i;
+ char buf[64];
+
+ for(i = 0; i < s->nb_temps; i++) {
+ ts = &s->temps[i];
+ printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
+ switch(ts->val_type) {
+ case TEMP_VAL_REG:
+ printf("%s", tcg_target_reg_names[ts->reg]);
+ break;
+ case TEMP_VAL_MEM:
+ printf("%d(%s)", (int)ts->mem_offset,
+ tcg_target_reg_names[ts->mem_base->reg]);
+ break;
+ case TEMP_VAL_CONST:
+ printf("$0x%" PRIx64, ts->val);
+ break;
+ case TEMP_VAL_DEAD:
+ printf("D");
+ break;
+ default:
+ printf("???");
+ break;
+ }
+ printf("\n");
+ }
+
+ for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (s->reg_to_temp[i] != NULL) {
+ printf("%s: %s\n",
+ tcg_target_reg_names[i],
+ tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
+ }
+ }
+}
+
+static void check_regs(TCGContext *s)
+{
+ int reg;
+ int k;
+ TCGTemp *ts;
+ char buf[64];
+
+ for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ ts = s->reg_to_temp[reg];
+ if (ts != NULL) {
+ if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
+ printf("Inconsistency for register %s:\n",
+ tcg_target_reg_names[reg]);
+ goto fail;
+ }
+ }
+ }
+ for (k = 0; k < s->nb_temps; k++) {
+ ts = &s->temps[k];
+ if (ts->val_type == TEMP_VAL_REG
+ && ts->kind != TEMP_FIXED
+ && s->reg_to_temp[ts->reg] != ts) {
+ printf("Inconsistency for temp %s:\n",
+ tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
+ fail:
+ printf("reg state:\n");
+ dump_regs(s);
+ tcg_abort();
+ }
+ }
+}
+#endif
+
+static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
+{
+ intptr_t off, size, align;
+
+ switch (ts->type) {
+ case TCG_TYPE_I32:
+ size = align = 4;
+ break;
+ case TCG_TYPE_I64:
+ case TCG_TYPE_V64:
+ size = align = 8;
+ break;
+ case TCG_TYPE_V128:
+ size = align = 16;
+ break;
+ case TCG_TYPE_V256:
+ /* Note that we do not require aligned storage for V256. */
+ size = 32, align = 16;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Assume the stack is sufficiently aligned.
+ * This affects e.g. ARM NEON, where we have 8 byte stack alignment
+ * and do not require 16 byte vector alignment. This seems slightly
+ * easier than fully parameterizing the above switch statement.
+ */
+ align = MIN(TCG_TARGET_STACK_ALIGN, align);
+ off = ROUND_UP(s->current_frame_offset, align);
+
+ /* If we've exhausted the stack frame, restart with a smaller TB. */
+ if (off + size > s->frame_end) {
+ tcg_raise_tb_overflow(s);
+ }
+ s->current_frame_offset = off + size;
+
+ ts->mem_offset = off;
+#if defined(__sparc__)
+ ts->mem_offset += TCG_TARGET_STACK_BIAS;
+#endif
+ ts->mem_base = s->frame_temp;
+ ts->mem_allocated = 1;
+}
+
+static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
+
+/* Mark a temporary as free or dead. If 'free_or_dead' is negative,
+ mark it free; otherwise mark it dead. */
+static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
+{
+ TCGTempVal new_type;
+
+ switch (ts->kind) {
+ case TEMP_FIXED:
+ return;
+ case TEMP_GLOBAL:
+ case TEMP_LOCAL:
+ new_type = TEMP_VAL_MEM;
+ break;
+ case TEMP_NORMAL:
+ new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
+ break;
+ case TEMP_CONST:
+ new_type = TEMP_VAL_CONST;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = NULL;
+ }
+ ts->val_type = new_type;
+}
+
+/* Mark a temporary as dead. */
+static inline void temp_dead(TCGContext *s, TCGTemp *ts)
+{
+ temp_free_or_dead(s, ts, 1);
+}
+
+/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
+ registers needs to be allocated to store a constant. If 'free_or_dead'
+ is non-zero, subsequently release the temporary; if it is positive, the
+ temp is dead; if it is negative, the temp is free. */
+static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
+ TCGRegSet preferred_regs, int free_or_dead)
+{
+ if (!temp_readonly(ts) && !ts->mem_coherent) {
+ if (!ts->mem_allocated) {
+ temp_allocate_frame(s, ts);
+ }
+ switch (ts->val_type) {
+ case TEMP_VAL_CONST:
+ /* If we're going to free the temp immediately, then we won't
+ require it later in a register, so attempt to store the
+ constant to memory directly. */
+ if (free_or_dead
+ && tcg_out_sti(s, ts->type, ts->val,
+ ts->mem_base->reg, ts->mem_offset)) {
+ break;
+ }
+ temp_load(s, ts, tcg_target_available_regs[ts->type],
+ allocated_regs, preferred_regs);
+ /* fallthrough */
+
+ case TEMP_VAL_REG:
+ tcg_out_st(s, ts->type, ts->reg,
+ ts->mem_base->reg, ts->mem_offset);
+ break;
+
+ case TEMP_VAL_MEM:
+ break;
+
+ case TEMP_VAL_DEAD:
+ default:
+ tcg_abort();
+ }
+ ts->mem_coherent = 1;
+ }
+ if (free_or_dead) {
+ temp_free_or_dead(s, ts, free_or_dead);
+ }
+}
+
+/* free register 'reg' by spilling the corresponding temporary if necessary */
+static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
+{
+ TCGTemp *ts = s->reg_to_temp[reg];
+ if (ts != NULL) {
+ temp_sync(s, ts, allocated_regs, 0, -1);
+ }
+}
+
+/**
+ * tcg_reg_alloc:
+ * @required_regs: Set of registers in which we must allocate.
+ * @allocated_regs: Set of registers which must be avoided.
+ * @preferred_regs: Set of registers we should prefer.
+ * @rev: True if we search the registers in "indirect" order.
+ *
+ * The allocated register must be in @required_regs & ~@allocated_regs,
+ * but if we can put it in @preferred_regs we may save a move later.
+ */
+static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
+ TCGRegSet allocated_regs,
+ TCGRegSet preferred_regs, bool rev)
+{
+ int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
+ TCGRegSet reg_ct[2];
+ const int *order;
+
+ reg_ct[1] = required_regs & ~allocated_regs;
+ tcg_debug_assert(reg_ct[1] != 0);
+ reg_ct[0] = reg_ct[1] & preferred_regs;
+
+ /* Skip the preferred_regs option if it cannot be satisfied,
+ or if the preference made no difference. */
+ f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
+
+ order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
+
+ /* Try free registers, preferences first. */
+ for (j = f; j < 2; j++) {
+ TCGRegSet set = reg_ct[j];
+
+ if (tcg_regset_single(set)) {
+ /* One register in the set. */
+ TCGReg reg = tcg_regset_first(set);
+ if (s->reg_to_temp[reg] == NULL) {
+ return reg;
+ }
+ } else {
+ for (i = 0; i < n; i++) {
+ TCGReg reg = order[i];
+ if (s->reg_to_temp[reg] == NULL &&
+ tcg_regset_test_reg(set, reg)) {
+ return reg;
+ }
+ }
+ }
+ }
+
+ /* We must spill something. */
+ for (j = f; j < 2; j++) {
+ TCGRegSet set = reg_ct[j];
+
+ if (tcg_regset_single(set)) {
+ /* One register in the set. */
+ TCGReg reg = tcg_regset_first(set);
+ tcg_reg_free(s, reg, allocated_regs);
+ return reg;
+ } else {
+ for (i = 0; i < n; i++) {
+ TCGReg reg = order[i];
+ if (tcg_regset_test_reg(set, reg)) {
+ tcg_reg_free(s, reg, allocated_regs);
+ return reg;
+ }
+ }
+ }
+ }
+
+ tcg_abort();
+}
+
+/* Make sure the temporary is in a register. If needed, allocate the register
+ from DESIRED while avoiding ALLOCATED. */
+static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
+ TCGRegSet allocated_regs, TCGRegSet preferred_regs)
+{
+ TCGReg reg;
+
+ switch (ts->val_type) {
+ case TEMP_VAL_REG:
+ return;
+ case TEMP_VAL_CONST:
+ reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
+ preferred_regs, ts->indirect_base);
+ if (ts->type <= TCG_TYPE_I64) {
+ tcg_out_movi(s, ts->type, reg, ts->val);
+ } else {
+ uint64_t val = ts->val;
+ MemOp vece = MO_64;
+
+ /*
+ * Find the minimal vector element that matches the constant.
+ * The targets will, in general, have to do this search anyway,
+ * do this generically.
+ */
+ if (val == dup_const(MO_8, val)) {
+ vece = MO_8;
+ } else if (val == dup_const(MO_16, val)) {
+ vece = MO_16;
+ } else if (val == dup_const(MO_32, val)) {
+ vece = MO_32;
+ }
+
+ tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
+ }
+ ts->mem_coherent = 0;
+ break;
+ case TEMP_VAL_MEM:
+ reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
+ preferred_regs, ts->indirect_base);
+ tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
+ ts->mem_coherent = 1;
+ break;
+ case TEMP_VAL_DEAD:
+ default:
+ tcg_abort();
+ }
+ ts->reg = reg;
+ ts->val_type = TEMP_VAL_REG;
+ s->reg_to_temp[reg] = ts;
+}
+
+/* Save a temporary to memory. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
+{
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
+}
+
+/* save globals to their canonical location and assume they can be
+ modified be the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i, n;
+
+ for (i = 0, n = s->nb_globals; i < n; i++) {
+ temp_save(s, &s->temps[i], allocated_regs);
+ }
+}
+
+/* sync globals to their canonical location and assume they can be
+ read by the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i, n;
+
+ for (i = 0, n = s->nb_globals; i < n; i++) {
+ TCGTemp *ts = &s->temps[i];
+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG
+ || ts->kind == TEMP_FIXED
+ || ts->mem_coherent);
+ }
+}
+
+/* at the end of a basic block, we assume all temporaries are dead and
+ all globals are stored at their canonical location. */
+static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for (i = s->nb_globals; i < s->nb_temps; i++) {
+ TCGTemp *ts = &s->temps[i];
+
+ switch (ts->kind) {
+ case TEMP_LOCAL:
+ temp_save(s, ts, allocated_regs);
+ break;
+ case TEMP_NORMAL:
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
+ break;
+ case TEMP_CONST:
+ /* Similarly, we should have freed any allocated register. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ save_globals(s, allocated_regs);
+}
+
+/*
+ * At a conditional branch, we assume all temporaries are dead and
+ * all globals and local temps are synced to their location.
+ */
+static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
+{
+ sync_globals(s, allocated_regs);
+
+ for (int i = s->nb_globals; i < s->nb_temps; i++) {
+ TCGTemp *ts = &s->temps[i];
+ /*
+ * The liveness analysis already ensures that temps are dead.
+ * Keep tcg_debug_asserts for safety.
+ */
+ switch (ts->kind) {
+ case TEMP_LOCAL:
+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
+ break;
+ case TEMP_NORMAL:
+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
+ break;
+ case TEMP_CONST:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+/*
+ * Specialized code generation for INDEX_op_mov_* with a constant.
+ */
+static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
+ tcg_target_ulong val, TCGLifeData arg_life,
+ TCGRegSet preferred_regs)
+{
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ots));
+
+ /* The movi is not explicitly generated here. */
+ if (ots->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ots->reg] = NULL;
+ }
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = val;
+ ots->mem_coherent = 0;
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
+ } else if (IS_DEAD_ARG(0)) {
+ temp_dead(s, ots);
+ }
+}
+
+/*
+ * Specialized code generation for INDEX_op_mov_*.
+ */
+static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
+{
+ const TCGLifeData arg_life = op->life;
+ TCGRegSet allocated_regs, preferred_regs;
+ TCGTemp *ts, *ots;
+ TCGType otype, itype;
+
+ allocated_regs = s->reserved_regs;
+ preferred_regs = op->output_pref[0];
+ ots = arg_temp(op->args[0]);
+ ts = arg_temp(op->args[1]);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ots));
+
+ /* Note that otype != itype for no-op truncation. */
+ otype = ots->type;
+ itype = ts->type;
+
+ if (ts->val_type == TEMP_VAL_CONST) {
+ /* propagate constant or generate sti */
+ tcg_target_ulong val = ts->val;
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, ts);
+ }
+ tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
+ return;
+ }
+
+ /* If the source value is in memory we're going to be forced
+ to have it in a register in order to perform the copy. Copy
+ the SOURCE value into its own register first, that way we
+ don't have to reload SOURCE the next time it is used. */
+ if (ts->val_type == TEMP_VAL_MEM) {
+ temp_load(s, ts, tcg_target_available_regs[itype],
+ allocated_regs, preferred_regs);
+ }
+
+ tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
+ if (IS_DEAD_ARG(0)) {
+ /* mov to a non-saved dead register makes no sense (even with
+ liveness analysis disabled). */
+ tcg_debug_assert(NEED_SYNC_ARG(0));
+ if (!ots->mem_allocated) {
+ temp_allocate_frame(s, ots);
+ }
+ tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, ts);
+ }
+ temp_dead(s, ots);
+ } else {
+ if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
+ /* the mov can be suppressed */
+ if (ots->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ots->reg] = NULL;
+ }
+ ots->reg = ts->reg;
+ temp_dead(s, ts);
+ } else {
+ if (ots->val_type != TEMP_VAL_REG) {
+ /* When allocating a new register, make sure to not spill the
+ input one. */
+ tcg_regset_set_reg(allocated_regs, ts->reg);
+ ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
+ allocated_regs, preferred_regs,
+ ots->indirect_base);
+ }
+ if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
+ /*
+ * Cross register class move not supported.
+ * Store the source register into the destination slot
+ * and leave the destination temp as TEMP_VAL_MEM.
+ */
+ assert(!temp_readonly(ots));
+ if (!ts->mem_allocated) {
+ temp_allocate_frame(s, ots);
+ }
+ tcg_out_st(s, ts->type, ts->reg,
+ ots->mem_base->reg, ots->mem_offset);
+ ots->mem_coherent = 1;
+ temp_free_or_dead(s, ots, -1);
+ return;
+ }
+ }
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+ s->reg_to_temp[ots->reg] = ots;
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, ots, allocated_regs, 0, 0);
+ }
+ }
+}
+
+/*
+ * Specialized code generation for INDEX_op_dup_vec.
+ */
+static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
+{
+ const TCGLifeData arg_life = op->life;
+ TCGRegSet dup_out_regs, dup_in_regs;
+ TCGTemp *its, *ots;
+ TCGType itype, vtype;
+ intptr_t endian_fixup;
+ unsigned vece;
+ bool ok;
+
+ ots = arg_temp(op->args[0]);
+ its = arg_temp(op->args[1]);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ots));
+
+ itype = its->type;
+ vece = TCGOP_VECE(op);
+ vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
+
+ if (its->val_type == TEMP_VAL_CONST) {
+ /* Propagate constant via movi -> dupi. */
+ tcg_target_ulong val = its->val;
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, its);
+ }
+ tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
+ return;
+ }
+
+ dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
+ dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
+
+ /* Allocate the output register now. */
+ if (ots->val_type != TEMP_VAL_REG) {
+ TCGRegSet allocated_regs = s->reserved_regs;
+
+ if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
+ /* Make sure to not spill the input register. */
+ tcg_regset_set_reg(allocated_regs, its->reg);
+ }
+ ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
+ op->output_pref[0], ots->indirect_base);
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+ s->reg_to_temp[ots->reg] = ots;
+ }
+
+ switch (its->val_type) {
+ case TEMP_VAL_REG:
+ /*
+ * The dup constriaints must be broad, covering all possible VECE.
+ * However, tcg_op_dup_vec() gets to see the VECE and we allow it
+ * to fail, indicating that extra moves are required for that case.
+ */
+ if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
+ if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
+ goto done;
+ }
+ /* Try again from memory or a vector input register. */
+ }
+ if (!its->mem_coherent) {
+ /*
+ * The input register is not synced, and so an extra store
+ * would be required to use memory. Attempt an integer-vector
+ * register move first. We do not have a TCGRegSet for this.
+ */
+ if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
+ break;
+ }
+ /* Sync the temp back to its slot and load from there. */
+ temp_sync(s, its, s->reserved_regs, 0, 0);
+ }
+ /* fall through */
+
+ case TEMP_VAL_MEM:
+#ifdef HOST_WORDS_BIGENDIAN
+ endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
+ endian_fixup -= 1 << vece;
+#else
+ endian_fixup = 0;
+#endif
+ if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
+ its->mem_offset + endian_fixup)) {
+ goto done;
+ }
+ tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ /* We now have a vector input register, so dup must succeed. */
+ ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
+ tcg_debug_assert(ok);
+
+ done:
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, its);
+ }
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, ots, s->reserved_regs, 0, 0);
+ }
+ if (IS_DEAD_ARG(0)) {
+ temp_dead(s, ots);
+ }
+}
+
+static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
+{
+ const TCGLifeData arg_life = op->life;
+ const TCGOpDef * const def = &tcg_op_defs[op->opc];
+ TCGRegSet i_allocated_regs;
+ TCGRegSet o_allocated_regs;
+ int i, k, nb_iargs, nb_oargs;
+ TCGReg reg;
+ TCGArg arg;
+ const TCGArgConstraint *arg_ct;
+ TCGTemp *ts;
+ TCGArg new_args[TCG_MAX_OP_ARGS];
+ int const_args[TCG_MAX_OP_ARGS];
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+
+ /* copy constants */
+ memcpy(new_args + nb_oargs + nb_iargs,
+ op->args + nb_oargs + nb_iargs,
+ sizeof(TCGArg) * def->nb_cargs);
+
+ i_allocated_regs = s->reserved_regs;
+ o_allocated_regs = s->reserved_regs;
+
+ /* satisfy input constraints */
+ for (k = 0; k < nb_iargs; k++) {
+ TCGRegSet i_preferred_regs, o_preferred_regs;
+
+ i = def->args_ct[nb_oargs + k].sort_index;
+ arg = op->args[i];
+ arg_ct = &def->args_ct[i];
+ ts = arg_temp(arg);
+
+ if (ts->val_type == TEMP_VAL_CONST
+ && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
+ /* constant is OK for instruction */
+ const_args[i] = 1;
+ new_args[i] = ts->val;
+ continue;
+ }
+
+ i_preferred_regs = o_preferred_regs = 0;
+ if (arg_ct->ialias) {
+ o_preferred_regs = op->output_pref[arg_ct->alias_index];
+
+ /*
+ * If the input is readonly, then it cannot also be an
+ * output and aliased to itself. If the input is not
+ * dead after the instruction, we must allocate a new
+ * register and move it.
+ */
+ if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
+ goto allocate_in_reg;
+ }
+
+ /*
+ * Check if the current register has already been allocated
+ * for another input aliased to an output.
+ */
+ if (ts->val_type == TEMP_VAL_REG) {
+ reg = ts->reg;
+ for (int k2 = 0; k2 < k; k2++) {
+ int i2 = def->args_ct[nb_oargs + k2].sort_index;
+ if (def->args_ct[i2].ialias && reg == new_args[i2]) {
+ goto allocate_in_reg;
+ }
+ }
+ }
+ i_preferred_regs = o_preferred_regs;
+ }
+
+ temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
+ reg = ts->reg;
+
+ if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
+ allocate_in_reg:
+ /*
+ * Allocate a new register matching the constraint
+ * and move the temporary register into it.
+ */
+ temp_load(s, ts, tcg_target_available_regs[ts->type],
+ i_allocated_regs, 0);
+ reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
+ o_preferred_regs, ts->indirect_base);
+ if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
+ /*
+ * Cross register class move not supported. Sync the
+ * temp back to its slot and load from there.
+ */
+ temp_sync(s, ts, i_allocated_regs, 0, 0);
+ tcg_out_ld(s, ts->type, reg,
+ ts->mem_base->reg, ts->mem_offset);
+ }
+ }
+ new_args[i] = reg;
+ const_args[i] = 0;
+ tcg_regset_set_reg(i_allocated_regs, reg);
+ }
+
+ /* mark dead temporaries and free the associated registers */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, arg_temp(op->args[i]));
+ }
+ }
+
+ if (def->flags & TCG_OPF_COND_BRANCH) {
+ tcg_reg_alloc_cbranch(s, i_allocated_regs);
+ } else if (def->flags & TCG_OPF_BB_END) {
+ tcg_reg_alloc_bb_end(s, i_allocated_regs);
+ } else {
+ if (def->flags & TCG_OPF_CALL_CLOBBER) {
+ /* XXX: permit generic clobber register list ? */
+ for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
+ tcg_reg_free(s, i, i_allocated_regs);
+ }
+ }
+ }
+ if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* sync globals if the op has side effects and might trigger
+ an exception. */
+ sync_globals(s, i_allocated_regs);
+ }
+
+ /* satisfy the output constraints */
+ for(k = 0; k < nb_oargs; k++) {
+ i = def->args_ct[k].sort_index;
+ arg = op->args[i];
+ arg_ct = &def->args_ct[i];
+ ts = arg_temp(arg);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ts));
+
+ if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
+ reg = new_args[arg_ct->alias_index];
+ } else if (arg_ct->newreg) {
+ reg = tcg_reg_alloc(s, arg_ct->regs,
+ i_allocated_regs | o_allocated_regs,
+ op->output_pref[k], ts->indirect_base);
+ } else {
+ reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
+ op->output_pref[k], ts->indirect_base);
+ }
+ tcg_regset_set_reg(o_allocated_regs, reg);
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = NULL;
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ /*
+ * Temp value is modified, so the value kept in memory is
+ * potentially not the same.
+ */
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = ts;
+ new_args[i] = reg;
+ }
+ }
+
+ /* emit instruction */
+ if (def->flags & TCG_OPF_VECTOR) {
+ tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
+ new_args, const_args);
+ } else {
+ tcg_out_op(s, op->opc, new_args, const_args);
+ }
+
+ /* move the outputs in the correct register if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ ts = arg_temp(op->args[i]);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ts));
+
+ if (NEED_SYNC_ARG(i)) {
+ temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
+ } else if (IS_DEAD_ARG(i)) {
+ temp_dead(s, ts);
+ }
+ }
+}
+
+static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
+{
+ const TCGLifeData arg_life = op->life;
+ TCGTemp *ots, *itsl, *itsh;
+ TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
+
+ /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
+ tcg_debug_assert(TCGOP_VECE(op) == MO_64);
+
+ ots = arg_temp(op->args[0]);
+ itsl = arg_temp(op->args[1]);
+ itsh = arg_temp(op->args[2]);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ots));
+
+ /* Allocate the output register now. */
+ if (ots->val_type != TEMP_VAL_REG) {
+ TCGRegSet allocated_regs = s->reserved_regs;
+ TCGRegSet dup_out_regs =
+ tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
+
+ /* Make sure to not spill the input registers. */
+ if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
+ tcg_regset_set_reg(allocated_regs, itsl->reg);
+ }
+ if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
+ tcg_regset_set_reg(allocated_regs, itsh->reg);
+ }
+
+ ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
+ op->output_pref[0], ots->indirect_base);
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+ s->reg_to_temp[ots->reg] = ots;
+ }
+
+ /* Promote dup2 of immediates to dupi_vec. */
+ if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
+ uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
+ MemOp vece = MO_64;
+
+ if (val == dup_const(MO_8, val)) {
+ vece = MO_8;
+ } else if (val == dup_const(MO_16, val)) {
+ vece = MO_16;
+ } else if (val == dup_const(MO_32, val)) {
+ vece = MO_32;
+ }
+
+ tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
+ goto done;
+ }
+
+ /* If the two inputs form one 64-bit value, try dupm_vec. */
+ if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
+ if (!itsl->mem_coherent) {
+ temp_sync(s, itsl, s->reserved_regs, 0, 0);
+ }
+ if (!itsh->mem_coherent) {
+ temp_sync(s, itsh, s->reserved_regs, 0, 0);
+ }
+#ifdef HOST_WORDS_BIGENDIAN
+ TCGTemp *its = itsh;
+#else
+ TCGTemp *its = itsl;
+#endif
+ if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
+ its->mem_base->reg, its->mem_offset)) {
+ goto done;
+ }
+ }
+
+ /* Fall back to generic expansion. */
+ return false;
+
+ done:
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, itsl);
+ }
+ if (IS_DEAD_ARG(2)) {
+ temp_dead(s, itsh);
+ }
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
+ } else if (IS_DEAD_ARG(0)) {
+ temp_dead(s, ots);
+ }
+ return true;
+}
+
+#ifdef TCG_TARGET_STACK_GROWSUP
+#define STACK_DIR(x) (-(x))
+#else
+#define STACK_DIR(x) (x)
+#endif
+
+static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
+{
+ const int nb_oargs = TCGOP_CALLO(op);
+ const int nb_iargs = TCGOP_CALLI(op);
+ const TCGLifeData arg_life = op->life;
+ const TCGHelperInfo *info;
+ int flags, nb_regs, i;
+ TCGReg reg;
+ TCGArg arg;
+ TCGTemp *ts;
+ intptr_t stack_offset;
+ size_t call_stack_size;
+ tcg_insn_unit *func_addr;
+ int allocate_args;
+ TCGRegSet allocated_regs;
+
+ func_addr = tcg_call_func(op);
+ info = tcg_call_info(op);
+ flags = info->flags;
+
+ nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
+ if (nb_regs > nb_iargs) {
+ nb_regs = nb_iargs;
+ }
+
+ /* assign stack slots first */
+ call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
+ call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
+ ~(TCG_TARGET_STACK_ALIGN - 1);
+ allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
+ if (allocate_args) {
+ /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
+ preallocate call stack */
+ tcg_abort();
+ }
+
+ stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
+ for (i = nb_regs; i < nb_iargs; i++) {
+ arg = op->args[nb_oargs + i];
+#ifdef TCG_TARGET_STACK_GROWSUP
+ stack_offset -= sizeof(tcg_target_long);
+#endif
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = arg_temp(arg);
+ temp_load(s, ts, tcg_target_available_regs[ts->type],
+ s->reserved_regs, 0);
+ tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
+ }
+#ifndef TCG_TARGET_STACK_GROWSUP
+ stack_offset += sizeof(tcg_target_long);
+#endif
+ }
+
+ /* assign input registers */
+ allocated_regs = s->reserved_regs;
+ for (i = 0; i < nb_regs; i++) {
+ arg = op->args[nb_oargs + i];
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ ts = arg_temp(arg);
+ reg = tcg_target_call_iarg_regs[i];
+
+ if (ts->val_type == TEMP_VAL_REG) {
+ if (ts->reg != reg) {
+ tcg_reg_free(s, reg, allocated_regs);
+ if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
+ /*
+ * Cross register class move not supported. Sync the
+ * temp back to its slot and load from there.
+ */
+ temp_sync(s, ts, allocated_regs, 0, 0);
+ tcg_out_ld(s, ts->type, reg,
+ ts->mem_base->reg, ts->mem_offset);
+ }
+ }
+ } else {
+ TCGRegSet arg_set = 0;
+
+ tcg_reg_free(s, reg, allocated_regs);
+ tcg_regset_set_reg(arg_set, reg);
+ temp_load(s, ts, arg_set, allocated_regs, 0);
+ }
+
+ tcg_regset_set_reg(allocated_regs, reg);
+ }
+ }
+
+ /* mark dead temporaries and free the associated registers */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, arg_temp(op->args[i]));
+ }
+ }
+
+ /* clobber call registers */
+ for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
+ tcg_reg_free(s, i, allocated_regs);
+ }
+ }
+
+ /* Save globals if they might be written by the helper, sync them if
+ they might be read. */
+ if (flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ sync_globals(s, allocated_regs);
+ } else {
+ save_globals(s, allocated_regs);
+ }
+
+#ifdef CONFIG_TCG_INTERPRETER
+ {
+ gpointer hash = (gpointer)(uintptr_t)info->typemask;
+ ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
+ assert(cif != NULL);
+ tcg_out_call(s, func_addr, cif);
+ }
+#else
+ tcg_out_call(s, func_addr);
+#endif
+
+ /* assign output registers and emit moves if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ arg = op->args[i];
+ ts = arg_temp(arg);
+
+ /* ENV should not be modified. */
+ tcg_debug_assert(!temp_readonly(ts));
+
+ reg = tcg_target_call_oarg_regs[i];
+ tcg_debug_assert(s->reg_to_temp[reg] == NULL);
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = NULL;
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = ts;
+ if (NEED_SYNC_ARG(i)) {
+ temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
+ } else if (IS_DEAD_ARG(i)) {
+ temp_dead(s, ts);
+ }
+ }
+}
+
+#ifdef CONFIG_PROFILER
+
+/* avoid copy/paste errors */
+#define PROF_ADD(to, from, field) \
+ do { \
+ (to)->field += qatomic_read(&((from)->field)); \
+ } while (0)
+
+#define PROF_MAX(to, from, field) \
+ do { \
+ typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
+ if (val__ > (to)->field) { \
+ (to)->field = val__; \
+ } \
+ } while (0)
+
+/* Pass in a zero'ed @prof */
+static inline
+void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
+{
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+
+ for (i = 0; i < n_ctxs; i++) {
+ TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ const TCGProfile *orig = &s->prof;
+
+ if (counters) {
+ PROF_ADD(prof, orig, cpu_exec_time);
+ PROF_ADD(prof, orig, tb_count1);
+ PROF_ADD(prof, orig, tb_count);
+ PROF_ADD(prof, orig, op_count);
+ PROF_MAX(prof, orig, op_count_max);
+ PROF_ADD(prof, orig, temp_count);
+ PROF_MAX(prof, orig, temp_count_max);
+ PROF_ADD(prof, orig, del_op_count);
+ PROF_ADD(prof, orig, code_in_len);
+ PROF_ADD(prof, orig, code_out_len);
+ PROF_ADD(prof, orig, search_out_len);
+ PROF_ADD(prof, orig, interm_time);
+ PROF_ADD(prof, orig, code_time);
+ PROF_ADD(prof, orig, la_time);
+ PROF_ADD(prof, orig, opt_time);
+ PROF_ADD(prof, orig, restore_count);
+ PROF_ADD(prof, orig, restore_time);
+ }
+ if (table) {
+ int i;
+
+ for (i = 0; i < NB_OPS; i++) {
+ PROF_ADD(prof, orig, table_op_count[i]);
+ }
+ }
+ }
+}
+
+#undef PROF_ADD
+#undef PROF_MAX
+
+static void tcg_profile_snapshot_counters(TCGProfile *prof)
+{
+ tcg_profile_snapshot(prof, true, false);
+}
+
+static void tcg_profile_snapshot_table(TCGProfile *prof)
+{
+ tcg_profile_snapshot(prof, false, true);
+}
+
+void tcg_dump_op_count(GString *buf)
+{
+ TCGProfile prof = {};
+ int i;
+
+ tcg_profile_snapshot_table(&prof);
+ for (i = 0; i < NB_OPS; i++) {
+ g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
+ prof.table_op_count[i]);
+ }
+}
+
+int64_t tcg_cpu_exec_time(void)
+{
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+ int64_t ret = 0;
+
+ for (i = 0; i < n_ctxs; i++) {
+ const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ const TCGProfile *prof = &s->prof;
+
+ ret += qatomic_read(&prof->cpu_exec_time);
+ }
+ return ret;
+}
+#else
+void tcg_dump_op_count(GString *buf)
+{
+ g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+}
+
+int64_t tcg_cpu_exec_time(void)
+{
+ error_report("%s: TCG profiler not compiled", __func__);
+ exit(EXIT_FAILURE);
+}
+#endif
+
+
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
+{
+#ifdef CONFIG_PROFILER
+ TCGProfile *prof = &s->prof;
+#endif
+ int i, num_insns;
+ TCGOp *op;
+
+#ifdef CONFIG_PROFILER
+ {
+ int n = 0;
+
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ n++;
+ }
+ qatomic_set(&prof->op_count, prof->op_count + n);
+ if (n > prof->op_count_max) {
+ qatomic_set(&prof->op_count_max, n);
+ }
+
+ n = s->nb_temps;
+ qatomic_set(&prof->temp_count, prof->temp_count + n);
+ if (n > prof->temp_count_max) {
+ qatomic_set(&prof->temp_count_max, n);
+ }
+ }
+#endif
+
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
+ && qemu_log_in_addr_range(tb->pc))) {
+ FILE *logfile = qemu_log_lock();
+ qemu_log("OP:\n");
+ tcg_dump_ops(s, false);
+ qemu_log("\n");
+ qemu_log_unlock(logfile);
+ }
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+ /* Ensure all labels referenced have been emitted. */
+ {
+ TCGLabel *l;
+ bool error = false;
+
+ QSIMPLEQ_FOREACH(l, &s->labels, next) {
+ if (unlikely(!l->present) && l->refs) {
+ qemu_log_mask(CPU_LOG_TB_OP,
+ "$L%d referenced but not present.\n", l->id);
+ error = true;
+ }
+ }
+ assert(!error);
+ }
+#endif
+
+#ifdef CONFIG_PROFILER
+ qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
+#endif
+
+#ifdef USE_TCG_OPTIMIZATIONS
+ tcg_optimize(s);
+#endif
+
+#ifdef CONFIG_PROFILER
+ qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
+ qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
+#endif
+
+ reachable_code_pass(s);
+ liveness_pass_1(s);
+
+ if (s->nb_indirects > 0) {
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+ && qemu_log_in_addr_range(tb->pc))) {
+ FILE *logfile = qemu_log_lock();
+ qemu_log("OP before indirect lowering:\n");
+ tcg_dump_ops(s, false);
+ qemu_log("\n");
+ qemu_log_unlock(logfile);
+ }
+#endif
+ /* Replace indirect temps with direct temps. */
+ if (liveness_pass_2(s)) {
+ /* If changes were made, re-run liveness. */
+ liveness_pass_1(s);
+ }
+ }
+
+#ifdef CONFIG_PROFILER
+ qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
+#endif
+
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
+ && qemu_log_in_addr_range(tb->pc))) {
+ FILE *logfile = qemu_log_lock();
+ qemu_log("OP after optimization and liveness analysis:\n");
+ tcg_dump_ops(s, true);
+ qemu_log("\n");
+ qemu_log_unlock(logfile);
+ }
+#endif
+
+ tcg_reg_alloc_start(s);
+
+ /*
+ * Reset the buffer pointers when restarting after overflow.
+ * TODO: Move this into translate-all.c with the rest of the
+ * buffer management. Having only this done here is confusing.
+ */
+ s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
+ s->code_ptr = s->code_buf;
+
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+ QSIMPLEQ_INIT(&s->ldst_labels);
+#endif
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+ s->pool_labels = NULL;
+#endif
+
+ num_insns = -1;
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ TCGOpcode opc = op->opc;
+
+#ifdef CONFIG_PROFILER
+ qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
+#endif
+
+ switch (opc) {
+ case INDEX_op_mov_i32:
+ case INDEX_op_mov_i64:
+ case INDEX_op_mov_vec:
+ tcg_reg_alloc_mov(s, op);
+ break;
+ case INDEX_op_dup_vec:
+ tcg_reg_alloc_dup(s, op);
+ break;
+ case INDEX_op_insn_start:
+ if (num_insns >= 0) {
+ size_t off = tcg_current_code_size(s);
+ s->gen_insn_end_off[num_insns] = off;
+ /* Assert that we do not overflow our stored offset. */
+ assert(s->gen_insn_end_off[num_insns] == off);
+ }
+ num_insns++;
+ for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+ target_ulong a;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
+#else
+ a = op->args[i];
+#endif
+ s->gen_insn_data[num_insns][i] = a;
+ }
+ break;
+ case INDEX_op_discard:
+ temp_dead(s, arg_temp(op->args[0]));
+ break;
+ case INDEX_op_set_label:
+ tcg_reg_alloc_bb_end(s, s->reserved_regs);
+ tcg_out_label(s, arg_label(op->args[0]));
+ break;
+ case INDEX_op_call:
+ tcg_reg_alloc_call(s, op);
+ break;
+ case INDEX_op_dup2_vec:
+ if (tcg_reg_alloc_dup2(s, op)) {
+ break;
+ }
+ /* fall through */
+ default:
+ /* Sanity check that we've not introduced any unhandled opcodes. */
+ tcg_debug_assert(tcg_op_supported(opc));
+ /* Note: in order to speed up the code, it would be much
+ faster to have specialized register allocator functions for
+ some common argument patterns */
+ tcg_reg_alloc_op(s, op);
+ break;
+ }
+#ifdef CONFIG_DEBUG_TCG
+ check_regs(s);
+#endif
+ /* Test for (pending) buffer overflow. The assumption is that any
+ one operation beginning below the high water mark cannot overrun
+ the buffer completely. Thus we can test for overflow after
+ generating code without having to check during generation. */
+ if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
+ return -1;
+ }
+ /* Test for TB overflow, as seen by gen_insn_end_off. */
+ if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
+ return -2;
+ }
+ }
+ tcg_debug_assert(num_insns >= 0);
+ s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
+
+ /* Generate TB finalization at the end of block */
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+ i = tcg_out_ldst_finalize(s);
+ if (i < 0) {
+ return i;
+ }
+#endif
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+ i = tcg_out_pool_finalize(s);
+ if (i < 0) {
+ return i;
+ }
+#endif
+ if (!tcg_resolve_relocs(s)) {
+ return -2;
+ }
+
+#ifndef CONFIG_TCG_INTERPRETER
+ /* flush instruction cache */
+ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
+ (uintptr_t)s->code_buf,
+ tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
+#endif
+
+ return tcg_current_code_size(s);
+}
+
+#ifdef CONFIG_PROFILER
+void tcg_dump_info(GString *buf)
+{
+ TCGProfile prof = {};
+ const TCGProfile *s;
+ int64_t tb_count;
+ int64_t tb_div_count;
+ int64_t tot;
+
+ tcg_profile_snapshot_counters(&prof);
+ s = &prof;
+ tb_count = s->tb_count;
+ tb_div_count = tb_count ? tb_count : 1;
+ tot = s->interm_time + s->code_time;
+
+ g_string_append_printf(buf, "JIT cycles %" PRId64
+ " (%0.3f s at 2.4 GHz)\n",
+ tot, tot / 2.4e9);
+ g_string_append_printf(buf, "translated TBs %" PRId64
+ " (aborted=%" PRId64 " %0.1f%%)\n",
+ tb_count, s->tb_count1 - tb_count,
+ (double)(s->tb_count1 - s->tb_count)
+ / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
+ g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n",
+ (double)s->op_count / tb_div_count, s->op_count_max);
+ g_string_append_printf(buf, "deleted ops/TB %0.2f\n",
+ (double)s->del_op_count / tb_div_count);
+ g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n",
+ (double)s->temp_count / tb_div_count,
+ s->temp_count_max);
+ g_string_append_printf(buf, "avg host code/TB %0.1f\n",
+ (double)s->code_out_len / tb_div_count);
+ g_string_append_printf(buf, "avg search data/TB %0.1f\n",
+ (double)s->search_out_len / tb_div_count);
+
+ g_string_append_printf(buf, "cycles/op %0.1f\n",
+ s->op_count ? (double)tot / s->op_count : 0);
+ g_string_append_printf(buf, "cycles/in byte %0.1f\n",
+ s->code_in_len ? (double)tot / s->code_in_len : 0);
+ g_string_append_printf(buf, "cycles/out byte %0.1f\n",
+ s->code_out_len ? (double)tot / s->code_out_len : 0);
+ g_string_append_printf(buf, "cycles/search byte %0.1f\n",
+ s->search_out_len ?
+ (double)tot / s->search_out_len : 0);
+ if (tot == 0) {
+ tot = 1;
+ }
+ g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
+ (double)s->interm_time / tot * 100.0);
+ g_string_append_printf(buf, " gen_code time %0.1f%%\n",
+ (double)s->code_time / tot * 100.0);
+ g_string_append_printf(buf, "optim./code time %0.1f%%\n",
+ (double)s->opt_time / (s->code_time ?
+ s->code_time : 1)
+ * 100.0);
+ g_string_append_printf(buf, "liveness/code time %0.1f%%\n",
+ (double)s->la_time / (s->code_time ?
+ s->code_time : 1) * 100.0);
+ g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
+ s->restore_count);
+ g_string_append_printf(buf, " avg cycles %0.1f\n",
+ s->restore_count ?
+ (double)s->restore_time / s->restore_count : 0);
+}
+#else
+void tcg_dump_info(GString *buf)
+{
+ g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+}
+#endif
+
+#ifdef ELF_HOST_MACHINE
+/* In order to use this feature, the backend needs to do three things:
+
+ (1) Define ELF_HOST_MACHINE to indicate both what value to
+ put into the ELF image and to indicate support for the feature.
+
+ (2) Define tcg_register_jit. This should create a buffer containing
+ the contents of a .debug_frame section that describes the post-
+ prologue unwind info for the tcg machine.
+
+ (3) Call tcg_register_jit_int, with the constructed .debug_frame.
+*/
+
+/* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
+typedef enum {
+ JIT_NOACTION = 0,
+ JIT_REGISTER_FN,
+ JIT_UNREGISTER_FN
+} jit_actions_t;
+
+struct jit_code_entry {
+ struct jit_code_entry *next_entry;
+ struct jit_code_entry *prev_entry;
+ const void *symfile_addr;
+ uint64_t symfile_size;
+};
+
+struct jit_descriptor {
+ uint32_t version;
+ uint32_t action_flag;
+ struct jit_code_entry *relevant_entry;
+ struct jit_code_entry *first_entry;
+};
+
+void __jit_debug_register_code(void) __attribute__((noinline));
+void __jit_debug_register_code(void)
+{
+ asm("");
+}
+
+/* Must statically initialize the version, because GDB may check
+ the version before we can set it. */
+struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+/* End GDB interface. */
+
+static int find_string(const char *strtab, const char *str)
+{
+ const char *p = strtab + 1;
+
+ while (1) {
+ if (strcmp(p, str) == 0) {
+ return p - strtab;
+ }
+ p += strlen(p) + 1;
+ }
+}
+
+static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+{
+ struct __attribute__((packed)) DebugInfo {
+ uint32_t len;
+ uint16_t version;
+ uint32_t abbrev;
+ uint8_t ptr_size;
+ uint8_t cu_die;
+ uint16_t cu_lang;
+ uintptr_t cu_low_pc;
+ uintptr_t cu_high_pc;
+ uint8_t fn_die;
+ char fn_name[16];
+ uintptr_t fn_low_pc;
+ uintptr_t fn_high_pc;
+ uint8_t cu_eoc;
+ };
+
+ struct ElfImage {
+ ElfW(Ehdr) ehdr;
+ ElfW(Phdr) phdr;
+ ElfW(Shdr) shdr[7];
+ ElfW(Sym) sym[2];
+ struct DebugInfo di;
+ uint8_t da[24];
+ char str[80];
+ };
+
+ struct ElfImage *img;
+
+ static const struct ElfImage img_template = {
+ .ehdr = {
+ .e_ident[EI_MAG0] = ELFMAG0,
+ .e_ident[EI_MAG1] = ELFMAG1,
+ .e_ident[EI_MAG2] = ELFMAG2,
+ .e_ident[EI_MAG3] = ELFMAG3,
+ .e_ident[EI_CLASS] = ELF_CLASS,
+ .e_ident[EI_DATA] = ELF_DATA,
+ .e_ident[EI_VERSION] = EV_CURRENT,
+ .e_type = ET_EXEC,
+ .e_machine = ELF_HOST_MACHINE,
+ .e_version = EV_CURRENT,
+ .e_phoff = offsetof(struct ElfImage, phdr),
+ .e_shoff = offsetof(struct ElfImage, shdr),
+ .e_ehsize = sizeof(ElfW(Shdr)),
+ .e_phentsize = sizeof(ElfW(Phdr)),
+ .e_phnum = 1,
+ .e_shentsize = sizeof(ElfW(Shdr)),
+ .e_shnum = ARRAY_SIZE(img->shdr),
+ .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
+#ifdef ELF_HOST_FLAGS
+ .e_flags = ELF_HOST_FLAGS,
+#endif
+#ifdef ELF_OSABI
+ .e_ident[EI_OSABI] = ELF_OSABI,
+#endif
+ },
+ .phdr = {
+ .p_type = PT_LOAD,
+ .p_flags = PF_X,
+ },
+ .shdr = {
+ [0] = { .sh_type = SHT_NULL },
+ /* Trick: The contents of code_gen_buffer are not present in
+ this fake ELF file; that got allocated elsewhere. Therefore
+ we mark .text as SHT_NOBITS (similar to .bss) so that readers
+ will not look for contents. We can record any address. */
+ [1] = { /* .text */
+ .sh_type = SHT_NOBITS,
+ .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
+ },
+ [2] = { /* .debug_info */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = offsetof(struct ElfImage, di),
+ .sh_size = sizeof(struct DebugInfo),
+ },
+ [3] = { /* .debug_abbrev */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = offsetof(struct ElfImage, da),
+ .sh_size = sizeof(img->da),
+ },
+ [4] = { /* .debug_frame */
+ .sh_type = SHT_PROGBITS,
+ .sh_offset = sizeof(struct ElfImage),
+ },
+ [5] = { /* .symtab */
+ .sh_type = SHT_SYMTAB,
+ .sh_offset = offsetof(struct ElfImage, sym),
+ .sh_size = sizeof(img->sym),
+ .sh_info = 1,
+ .sh_link = ARRAY_SIZE(img->shdr) - 1,
+ .sh_entsize = sizeof(ElfW(Sym)),
+ },
+ [6] = { /* .strtab */
+ .sh_type = SHT_STRTAB,
+ .sh_offset = offsetof(struct ElfImage, str),
+ .sh_size = sizeof(img->str),
+ }
+ },
+ .sym = {
+ [1] = { /* code_gen_buffer */
+ .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
+ .st_shndx = 1,
+ }
+ },
+ .di = {
+ .len = sizeof(struct DebugInfo) - 4,
+ .version = 2,
+ .ptr_size = sizeof(void *),
+ .cu_die = 1,
+ .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
+ .fn_die = 2,
+ .fn_name = "code_gen_buffer"
+ },
+ .da = {
+ 1, /* abbrev number (the cu) */
+ 0x11, 1, /* DW_TAG_compile_unit, has children */
+ 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
+ 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
+ 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
+ 0, 0, /* end of abbrev */
+ 2, /* abbrev number (the fn) */
+ 0x2e, 0, /* DW_TAG_subprogram, no children */
+ 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
+ 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
+ 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
+ 0, 0, /* end of abbrev */
+ 0 /* no more abbrev */
+ },
+ .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
+ ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
+ };
+
+ /* We only need a single jit entry; statically allocate it. */
+ static struct jit_code_entry one_entry;
+
+ uintptr_t buf = (uintptr_t)buf_ptr;
+ size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
+ DebugFrameHeader *dfh;
+
+ img = g_malloc(img_size);
+ *img = img_template;
+
+ img->phdr.p_vaddr = buf;
+ img->phdr.p_paddr = buf;
+ img->phdr.p_memsz = buf_size;
+
+ img->shdr[1].sh_name = find_string(img->str, ".text");
+ img->shdr[1].sh_addr = buf;
+ img->shdr[1].sh_size = buf_size;
+
+ img->shdr[2].sh_name = find_string(img->str, ".debug_info");
+ img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
+
+ img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
+ img->shdr[4].sh_size = debug_frame_size;
+
+ img->shdr[5].sh_name = find_string(img->str, ".symtab");
+ img->shdr[6].sh_name = find_string(img->str, ".strtab");
+
+ img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
+ img->sym[1].st_value = buf;
+ img->sym[1].st_size = buf_size;
+
+ img->di.cu_low_pc = buf;
+ img->di.cu_high_pc = buf + buf_size;
+ img->di.fn_low_pc = buf;
+ img->di.fn_high_pc = buf + buf_size;
+
+ dfh = (DebugFrameHeader *)(img + 1);
+ memcpy(dfh, debug_frame, debug_frame_size);
+ dfh->fde.func_start = buf;
+ dfh->fde.func_len = buf_size;
+
+#ifdef DEBUG_JIT
+ /* Enable this block to be able to debug the ELF image file creation.
+ One can use readelf, objdump, or other inspection utilities. */
+ {
+ FILE *f = fopen("/tmp/qemu.jit", "w+b");
+ if (f) {
+ if (fwrite(img, img_size, 1, f) != img_size) {
+ /* Avoid stupid unused return value warning for fwrite. */
+ }
+ fclose(f);
+ }
+ }
+#endif
+
+ one_entry.symfile_addr = img;
+ one_entry.symfile_size = img_size;
+
+ __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+ __jit_debug_descriptor.relevant_entry = &one_entry;
+ __jit_debug_descriptor.first_entry = &one_entry;
+ __jit_debug_register_code();
+}
+#else
+/* No support for the feature. Provide the entry point expected by exec.c,
+ and implement the internal function we declared earlier. */
+
+static void tcg_register_jit_int(const void *buf, size_t size,
+ const void *debug_frame,
+ size_t debug_frame_size)
+{
+}
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+}
+#endif /* ELF_HOST_MACHINE */
+
+#if !TCG_TARGET_MAYBE_vec
+void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
+{
+ g_assert_not_reached();
+}
+#endif
diff --git a/tcg/tci.c b/tcg/tci.c
new file mode 100644
index 000000000..e76087cca
--- /dev/null
+++ b/tcg/tci.c
@@ -0,0 +1,1401 @@
+/*
+ * Tiny Code Interpreter for QEMU
+ *
+ * Copyright (c) 2009, 2011, 2016 Stefan Weil
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "tcg/tcg.h" /* MAX_OPC_PARAM_IARGS */
+#include "exec/cpu_ldst.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-ldst.h"
+#include "qemu/compiler.h"
+#include <ffi.h>
+
+
+/*
+ * Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
+ * Without assertions, the interpreter runs much faster.
+ */
+#if defined(CONFIG_DEBUG_TCG)
+# define tci_assert(cond) assert(cond)
+#else
+# define tci_assert(cond) ((void)(cond))
+#endif
+
+__thread uintptr_t tci_tb_ptr;
+
+static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
+ uint32_t low_index, uint64_t value)
+{
+ regs[low_index] = (uint32_t)value;
+ regs[high_index] = value >> 32;
+}
+
+/* Create a 64 bit value from two 32 bit values. */
+static uint64_t tci_uint64(uint32_t high, uint32_t low)
+{
+ return ((uint64_t)high << 32) + low;
+}
+
+/*
+ * Load sets of arguments all at once. The naming convention is:
+ * tci_args_<arguments>
+ * where arguments is a sequence of
+ *
+ * b = immediate (bit position)
+ * c = condition (TCGCond)
+ * i = immediate (uint32_t)
+ * I = immediate (tcg_target_ulong)
+ * l = label or pointer
+ * m = immediate (MemOpIdx)
+ * n = immediate (call return length)
+ * r = register
+ * s = signed ldst offset
+ */
+
+static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0)
+{
+ int diff = sextract32(insn, 12, 20);
+ *l0 = diff ? (void *)tb_ptr + diff : NULL;
+}
+
+static void tci_args_r(uint32_t insn, TCGReg *r0)
+{
+ *r0 = extract32(insn, 8, 4);
+}
+
+static void tci_args_nl(uint32_t insn, const void *tb_ptr,
+ uint8_t *n0, void **l1)
+{
+ *n0 = extract32(insn, 8, 4);
+ *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
+}
+
+static void tci_args_rl(uint32_t insn, const void *tb_ptr,
+ TCGReg *r0, void **l1)
+{
+ *r0 = extract32(insn, 8, 4);
+ *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
+}
+
+static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+}
+
+static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1)
+{
+ *r0 = extract32(insn, 8, 4);
+ *i1 = sextract32(insn, 12, 20);
+}
+
+static void tci_args_rrm(uint32_t insn, TCGReg *r0,
+ TCGReg *r1, MemOpIdx *m2)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *m2 = extract32(insn, 20, 12);
+}
+
+static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+}
+
+static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *i2 = sextract32(insn, 16, 16);
+}
+
+static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
+ uint8_t *i2, uint8_t *i3)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *i2 = extract32(insn, 16, 6);
+ *i3 = extract32(insn, 22, 6);
+}
+
+static void tci_args_rrrc(uint32_t insn,
+ TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *c3 = extract32(insn, 20, 4);
+}
+
+static void tci_args_rrrm(uint32_t insn,
+ TCGReg *r0, TCGReg *r1, TCGReg *r2, MemOpIdx *m3)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *m3 = extract32(insn, 20, 12);
+}
+
+static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
+ TCGReg *r2, uint8_t *i3, uint8_t *i4)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *i3 = extract32(insn, 20, 6);
+ *i4 = extract32(insn, 26, 6);
+}
+
+static void tci_args_rrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
+ TCGReg *r2, TCGReg *r3, TCGReg *r4)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *r3 = extract32(insn, 20, 4);
+ *r4 = extract32(insn, 24, 4);
+}
+
+static void tci_args_rrrr(uint32_t insn,
+ TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *r3 = extract32(insn, 20, 4);
+}
+
+static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
+ TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *r3 = extract32(insn, 20, 4);
+ *r4 = extract32(insn, 24, 4);
+ *c5 = extract32(insn, 28, 4);
+}
+
+static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
+ TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
+{
+ *r0 = extract32(insn, 8, 4);
+ *r1 = extract32(insn, 12, 4);
+ *r2 = extract32(insn, 16, 4);
+ *r3 = extract32(insn, 20, 4);
+ *r4 = extract32(insn, 24, 4);
+ *r5 = extract32(insn, 28, 4);
+}
+
+static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
+{
+ bool result = false;
+ int32_t i0 = u0;
+ int32_t i1 = u1;
+ switch (condition) {
+ case TCG_COND_EQ:
+ result = (u0 == u1);
+ break;
+ case TCG_COND_NE:
+ result = (u0 != u1);
+ break;
+ case TCG_COND_LT:
+ result = (i0 < i1);
+ break;
+ case TCG_COND_GE:
+ result = (i0 >= i1);
+ break;
+ case TCG_COND_LE:
+ result = (i0 <= i1);
+ break;
+ case TCG_COND_GT:
+ result = (i0 > i1);
+ break;
+ case TCG_COND_LTU:
+ result = (u0 < u1);
+ break;
+ case TCG_COND_GEU:
+ result = (u0 >= u1);
+ break;
+ case TCG_COND_LEU:
+ result = (u0 <= u1);
+ break;
+ case TCG_COND_GTU:
+ result = (u0 > u1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return result;
+}
+
+static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
+{
+ bool result = false;
+ int64_t i0 = u0;
+ int64_t i1 = u1;
+ switch (condition) {
+ case TCG_COND_EQ:
+ result = (u0 == u1);
+ break;
+ case TCG_COND_NE:
+ result = (u0 != u1);
+ break;
+ case TCG_COND_LT:
+ result = (i0 < i1);
+ break;
+ case TCG_COND_GE:
+ result = (i0 >= i1);
+ break;
+ case TCG_COND_LE:
+ result = (i0 <= i1);
+ break;
+ case TCG_COND_GT:
+ result = (i0 > i1);
+ break;
+ case TCG_COND_LTU:
+ result = (u0 < u1);
+ break;
+ case TCG_COND_GEU:
+ result = (u0 >= u1);
+ break;
+ case TCG_COND_LEU:
+ result = (u0 <= u1);
+ break;
+ case TCG_COND_GTU:
+ result = (u0 > u1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return result;
+}
+
+static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
+ MemOpIdx oi, const void *tb_ptr)
+{
+ MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
+ uintptr_t ra = (uintptr_t)tb_ptr;
+
+#ifdef CONFIG_SOFTMMU
+ switch (mop) {
+ case MO_UB:
+ return helper_ret_ldub_mmu(env, taddr, oi, ra);
+ case MO_SB:
+ return helper_ret_ldsb_mmu(env, taddr, oi, ra);
+ case MO_LEUW:
+ return helper_le_lduw_mmu(env, taddr, oi, ra);
+ case MO_LESW:
+ return helper_le_ldsw_mmu(env, taddr, oi, ra);
+ case MO_LEUL:
+ return helper_le_ldul_mmu(env, taddr, oi, ra);
+ case MO_LESL:
+ return helper_le_ldsl_mmu(env, taddr, oi, ra);
+ case MO_LEQ:
+ return helper_le_ldq_mmu(env, taddr, oi, ra);
+ case MO_BEUW:
+ return helper_be_lduw_mmu(env, taddr, oi, ra);
+ case MO_BESW:
+ return helper_be_ldsw_mmu(env, taddr, oi, ra);
+ case MO_BEUL:
+ return helper_be_ldul_mmu(env, taddr, oi, ra);
+ case MO_BESL:
+ return helper_be_ldsl_mmu(env, taddr, oi, ra);
+ case MO_BEQ:
+ return helper_be_ldq_mmu(env, taddr, oi, ra);
+ default:
+ g_assert_not_reached();
+ }
+#else
+ void *haddr = g2h(env_cpu(env), taddr);
+ uint64_t ret;
+
+ set_helper_retaddr(ra);
+ switch (mop) {
+ case MO_UB:
+ ret = ldub_p(haddr);
+ break;
+ case MO_SB:
+ ret = ldsb_p(haddr);
+ break;
+ case MO_LEUW:
+ ret = lduw_le_p(haddr);
+ break;
+ case MO_LESW:
+ ret = ldsw_le_p(haddr);
+ break;
+ case MO_LEUL:
+ ret = (uint32_t)ldl_le_p(haddr);
+ break;
+ case MO_LESL:
+ ret = (int32_t)ldl_le_p(haddr);
+ break;
+ case MO_LEQ:
+ ret = ldq_le_p(haddr);
+ break;
+ case MO_BEUW:
+ ret = lduw_be_p(haddr);
+ break;
+ case MO_BESW:
+ ret = ldsw_be_p(haddr);
+ break;
+ case MO_BEUL:
+ ret = (uint32_t)ldl_be_p(haddr);
+ break;
+ case MO_BESL:
+ ret = (int32_t)ldl_be_p(haddr);
+ break;
+ case MO_BEQ:
+ ret = ldq_be_p(haddr);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ clear_helper_retaddr();
+ return ret;
+#endif
+}
+
+static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
+ MemOpIdx oi, const void *tb_ptr)
+{
+ MemOp mop = get_memop(oi) & (MO_BSWAP | MO_SSIZE);
+ uintptr_t ra = (uintptr_t)tb_ptr;
+
+#ifdef CONFIG_SOFTMMU
+ switch (mop) {
+ case MO_UB:
+ helper_ret_stb_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_LEUW:
+ helper_le_stw_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_LEUL:
+ helper_le_stl_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_LEQ:
+ helper_le_stq_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_BEUW:
+ helper_be_stw_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_BEUL:
+ helper_be_stl_mmu(env, taddr, val, oi, ra);
+ break;
+ case MO_BEQ:
+ helper_be_stq_mmu(env, taddr, val, oi, ra);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+#else
+ void *haddr = g2h(env_cpu(env), taddr);
+
+ set_helper_retaddr(ra);
+ switch (mop) {
+ case MO_UB:
+ stb_p(haddr, val);
+ break;
+ case MO_LEUW:
+ stw_le_p(haddr, val);
+ break;
+ case MO_LEUL:
+ stl_le_p(haddr, val);
+ break;
+ case MO_LEQ:
+ stq_le_p(haddr, val);
+ break;
+ case MO_BEUW:
+ stw_be_p(haddr, val);
+ break;
+ case MO_BEUL:
+ stl_be_p(haddr, val);
+ break;
+ case MO_BEQ:
+ stq_be_p(haddr, val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ clear_helper_retaddr();
+#endif
+}
+
+#if TCG_TARGET_REG_BITS == 64
+# define CASE_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i64): \
+ case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x) \
+ case glue(glue(INDEX_op_, x), _i64):
+#else
+# define CASE_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x)
+#endif
+
+/* Interpret pseudo code in tb. */
+/*
+ * Disable CFI checks.
+ * One possible operation in the pseudo code is a call to binary code.
+ * Therefore, disable CFI checks in the interpreter function
+ */
+uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
+ const void *v_tb_ptr)
+{
+ const uint32_t *tb_ptr = v_tb_ptr;
+ tcg_target_ulong regs[TCG_TARGET_NB_REGS];
+ uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
+ / sizeof(uint64_t)];
+ void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)];
+
+ regs[TCG_AREG0] = (tcg_target_ulong)env;
+ regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
+ /* Other call_slots entries initialized at first use (see below). */
+ call_slots[0] = NULL;
+ tci_assert(tb_ptr);
+
+ for (;;) {
+ uint32_t insn;
+ TCGOpcode opc;
+ TCGReg r0, r1, r2, r3, r4, r5;
+ tcg_target_ulong t1;
+ TCGCond condition;
+ target_ulong taddr;
+ uint8_t pos, len;
+ uint32_t tmp32;
+ uint64_t tmp64;
+ uint64_t T1, T2;
+ MemOpIdx oi;
+ int32_t ofs;
+ void *ptr;
+
+ insn = *tb_ptr++;
+ opc = extract32(insn, 0, 8);
+
+ switch (opc) {
+ case INDEX_op_call:
+ /*
+ * Set up the ffi_avalue array once, delayed until now
+ * because many TB's do not make any calls. In tcg_gen_callN,
+ * we arranged for every real argument to be "left-aligned"
+ * in each 64-bit slot.
+ */
+ if (unlikely(call_slots[0] == NULL)) {
+ for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) {
+ call_slots[i] = &stack[i];
+ }
+ }
+
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
+
+ /* Helper functions may need to access the "return address" */
+ tci_tb_ptr = (uintptr_t)tb_ptr;
+
+ {
+ void **pptr = ptr;
+ ffi_call(pptr[1], pptr[0], stack, call_slots);
+ }
+
+ /* Any result winds up "left-aligned" in the stack[0] slot. */
+ switch (len) {
+ case 0: /* void */
+ break;
+ case 1: /* uint32_t */
+ /*
+ * Note that libffi has an odd special case in that it will
+ * always widen an integral result to ffi_arg.
+ */
+ if (sizeof(ffi_arg) == 4) {
+ regs[TCG_REG_R0] = *(uint32_t *)stack;
+ break;
+ }
+ /* fall through */
+ case 2: /* uint64_t */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tci_write_reg64(regs, TCG_REG_R1, TCG_REG_R0, stack[0]);
+ } else {
+ regs[TCG_REG_R0] = stack[0];
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case INDEX_op_br:
+ tci_args_l(insn, tb_ptr, &ptr);
+ tb_ptr = ptr;
+ continue;
+ case INDEX_op_setcond_i32:
+ tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
+ regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
+ break;
+ case INDEX_op_movcond_i32:
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+ tmp32 = tci_compare32(regs[r1], regs[r2], condition);
+ regs[r0] = regs[tmp32 ? r3 : r4];
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_setcond2_i32:
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+ T1 = tci_uint64(regs[r2], regs[r1]);
+ T2 = tci_uint64(regs[r4], regs[r3]);
+ regs[r0] = tci_compare64(T1, T2, condition);
+ break;
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_setcond_i64:
+ tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
+ regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
+ break;
+ case INDEX_op_movcond_i64:
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
+ tmp32 = tci_compare64(regs[r1], regs[r2], condition);
+ regs[r0] = regs[tmp32 ? r3 : r4];
+ break;
+#endif
+ CASE_32_64(mov)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = regs[r1];
+ break;
+ case INDEX_op_tci_movi:
+ tci_args_ri(insn, &r0, &t1);
+ regs[r0] = t1;
+ break;
+ case INDEX_op_tci_movl:
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
+ regs[r0] = *(tcg_target_ulong *)ptr;
+ break;
+
+ /* Load/store operations (32 bit). */
+
+ CASE_32_64(ld8u)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(uint8_t *)ptr;
+ break;
+ CASE_32_64(ld8s)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(int8_t *)ptr;
+ break;
+ CASE_32_64(ld16u)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(uint16_t *)ptr;
+ break;
+ CASE_32_64(ld16s)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(int16_t *)ptr;
+ break;
+ case INDEX_op_ld_i32:
+ CASE_64(ld32u)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(uint32_t *)ptr;
+ break;
+ CASE_32_64(st8)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ *(uint8_t *)ptr = regs[r0];
+ break;
+ CASE_32_64(st16)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ *(uint16_t *)ptr = regs[r0];
+ break;
+ case INDEX_op_st_i32:
+ CASE_64(st32)
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ *(uint32_t *)ptr = regs[r0];
+ break;
+
+ /* Arithmetic operations (mixed 32/64 bit). */
+
+ CASE_32_64(add)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] + regs[r2];
+ break;
+ CASE_32_64(sub)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] - regs[r2];
+ break;
+ CASE_32_64(mul)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] * regs[r2];
+ break;
+ CASE_32_64(and)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] & regs[r2];
+ break;
+ CASE_32_64(or)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] | regs[r2];
+ break;
+ CASE_32_64(xor)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] ^ regs[r2];
+ break;
+#if TCG_TARGET_HAS_andc_i32 || TCG_TARGET_HAS_andc_i64
+ CASE_32_64(andc)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] & ~regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_orc_i32 || TCG_TARGET_HAS_orc_i64
+ CASE_32_64(orc)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] | ~regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_eqv_i32 || TCG_TARGET_HAS_eqv_i64
+ CASE_32_64(eqv)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = ~(regs[r1] ^ regs[r2]);
+ break;
+#endif
+#if TCG_TARGET_HAS_nand_i32 || TCG_TARGET_HAS_nand_i64
+ CASE_32_64(nand)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = ~(regs[r1] & regs[r2]);
+ break;
+#endif
+#if TCG_TARGET_HAS_nor_i32 || TCG_TARGET_HAS_nor_i64
+ CASE_32_64(nor)
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = ~(regs[r1] | regs[r2]);
+ break;
+#endif
+
+ /* Arithmetic operations (32 bit). */
+
+ case INDEX_op_div_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2];
+ break;
+ case INDEX_op_divu_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2];
+ break;
+ case INDEX_op_rem_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2];
+ break;
+ case INDEX_op_remu_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2];
+ break;
+#if TCG_TARGET_HAS_clz_i32
+ case INDEX_op_clz_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ tmp32 = regs[r1];
+ regs[r0] = tmp32 ? clz32(tmp32) : regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_ctz_i32
+ case INDEX_op_ctz_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ tmp32 = regs[r1];
+ regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_ctpop_i32
+ case INDEX_op_ctpop_i32:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = ctpop32(regs[r1]);
+ break;
+#endif
+
+ /* Shift/rotate operations (32 bit). */
+
+ case INDEX_op_shl_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint32_t)regs[r1] << (regs[r2] & 31);
+ break;
+ case INDEX_op_shr_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint32_t)regs[r1] >> (regs[r2] & 31);
+ break;
+ case INDEX_op_sar_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int32_t)regs[r1] >> (regs[r2] & 31);
+ break;
+#if TCG_TARGET_HAS_rot_i32
+ case INDEX_op_rotl_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = rol32(regs[r1], regs[r2] & 31);
+ break;
+ case INDEX_op_rotr_i32:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = ror32(regs[r1], regs[r2] & 31);
+ break;
+#endif
+#if TCG_TARGET_HAS_deposit_i32
+ case INDEX_op_deposit_i32:
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
+ regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
+ break;
+#endif
+#if TCG_TARGET_HAS_extract_i32
+ case INDEX_op_extract_i32:
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+ regs[r0] = extract32(regs[r1], pos, len);
+ break;
+#endif
+#if TCG_TARGET_HAS_sextract_i32
+ case INDEX_op_sextract_i32:
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+ regs[r0] = sextract32(regs[r1], pos, len);
+ break;
+#endif
+ case INDEX_op_brcond_i32:
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
+ if ((uint32_t)regs[r0]) {
+ tb_ptr = ptr;
+ }
+ break;
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
+ case INDEX_op_add2_i32:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = tci_uint64(regs[r3], regs[r2]);
+ T2 = tci_uint64(regs[r5], regs[r4]);
+ tci_write_reg64(regs, r1, r0, T1 + T2);
+ break;
+#endif
+#if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
+ case INDEX_op_sub2_i32:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = tci_uint64(regs[r3], regs[r2]);
+ T2 = tci_uint64(regs[r5], regs[r4]);
+ tci_write_reg64(regs, r1, r0, T1 - T2);
+ break;
+#endif
+#if TCG_TARGET_HAS_mulu2_i32
+ case INDEX_op_mulu2_i32:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
+ tci_write_reg64(regs, r1, r0, tmp64);
+ break;
+#endif
+#if TCG_TARGET_HAS_muls2_i32
+ case INDEX_op_muls2_i32:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
+ tci_write_reg64(regs, r1, r0, tmp64);
+ break;
+#endif
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+ CASE_32_64(ext8s)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (int8_t)regs[r1];
+ break;
+#endif
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 || \
+ TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
+ CASE_32_64(ext16s)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (int16_t)regs[r1];
+ break;
+#endif
+#if TCG_TARGET_HAS_ext8u_i32 || TCG_TARGET_HAS_ext8u_i64
+ CASE_32_64(ext8u)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (uint8_t)regs[r1];
+ break;
+#endif
+#if TCG_TARGET_HAS_ext16u_i32 || TCG_TARGET_HAS_ext16u_i64
+ CASE_32_64(ext16u)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (uint16_t)regs[r1];
+ break;
+#endif
+#if TCG_TARGET_HAS_bswap16_i32 || TCG_TARGET_HAS_bswap16_i64
+ CASE_32_64(bswap16)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = bswap16(regs[r1]);
+ break;
+#endif
+#if TCG_TARGET_HAS_bswap32_i32 || TCG_TARGET_HAS_bswap32_i64
+ CASE_32_64(bswap32)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = bswap32(regs[r1]);
+ break;
+#endif
+#if TCG_TARGET_HAS_not_i32 || TCG_TARGET_HAS_not_i64
+ CASE_32_64(not)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = ~regs[r1];
+ break;
+#endif
+#if TCG_TARGET_HAS_neg_i32 || TCG_TARGET_HAS_neg_i64
+ CASE_32_64(neg)
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = -regs[r1];
+ break;
+#endif
+#if TCG_TARGET_REG_BITS == 64
+ /* Load/store operations (64 bit). */
+
+ case INDEX_op_ld32s_i64:
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(int32_t *)ptr;
+ break;
+ case INDEX_op_ld_i64:
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ regs[r0] = *(uint64_t *)ptr;
+ break;
+ case INDEX_op_st_i64:
+ tci_args_rrs(insn, &r0, &r1, &ofs);
+ ptr = (void *)(regs[r1] + ofs);
+ *(uint64_t *)ptr = regs[r0];
+ break;
+
+ /* Arithmetic operations (64 bit). */
+
+ case INDEX_op_div_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2];
+ break;
+ case INDEX_op_divu_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2];
+ break;
+ case INDEX_op_rem_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2];
+ break;
+ case INDEX_op_remu_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2];
+ break;
+#if TCG_TARGET_HAS_clz_i64
+ case INDEX_op_clz_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_ctz_i64
+ case INDEX_op_ctz_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
+ break;
+#endif
+#if TCG_TARGET_HAS_ctpop_i64
+ case INDEX_op_ctpop_i64:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = ctpop64(regs[r1]);
+ break;
+#endif
+#if TCG_TARGET_HAS_mulu2_i64
+ case INDEX_op_mulu2_i64:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ mulu64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
+ break;
+#endif
+#if TCG_TARGET_HAS_muls2_i64
+ case INDEX_op_muls2_i64:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
+ break;
+#endif
+#if TCG_TARGET_HAS_add2_i64
+ case INDEX_op_add2_i64:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = regs[r2] + regs[r4];
+ T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
+ regs[r0] = T1;
+ regs[r1] = T2;
+ break;
+#endif
+#if TCG_TARGET_HAS_add2_i64
+ case INDEX_op_sub2_i64:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ T1 = regs[r2] - regs[r4];
+ T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
+ regs[r0] = T1;
+ regs[r1] = T2;
+ break;
+#endif
+
+ /* Shift/rotate operations (64 bit). */
+
+ case INDEX_op_shl_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] << (regs[r2] & 63);
+ break;
+ case INDEX_op_shr_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = regs[r1] >> (regs[r2] & 63);
+ break;
+ case INDEX_op_sar_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = (int64_t)regs[r1] >> (regs[r2] & 63);
+ break;
+#if TCG_TARGET_HAS_rot_i64
+ case INDEX_op_rotl_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = rol64(regs[r1], regs[r2] & 63);
+ break;
+ case INDEX_op_rotr_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ regs[r0] = ror64(regs[r1], regs[r2] & 63);
+ break;
+#endif
+#if TCG_TARGET_HAS_deposit_i64
+ case INDEX_op_deposit_i64:
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
+ regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
+ break;
+#endif
+#if TCG_TARGET_HAS_extract_i64
+ case INDEX_op_extract_i64:
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+ regs[r0] = extract64(regs[r1], pos, len);
+ break;
+#endif
+#if TCG_TARGET_HAS_sextract_i64
+ case INDEX_op_sextract_i64:
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+ regs[r0] = sextract64(regs[r1], pos, len);
+ break;
+#endif
+ case INDEX_op_brcond_i64:
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
+ if (regs[r0]) {
+ tb_ptr = ptr;
+ }
+ break;
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext_i32_i64:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (int32_t)regs[r1];
+ break;
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_extu_i32_i64:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = (uint32_t)regs[r1];
+ break;
+#if TCG_TARGET_HAS_bswap64_i64
+ case INDEX_op_bswap64_i64:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = bswap64(regs[r1]);
+ break;
+#endif
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+ /* QEMU specific operations. */
+
+ case INDEX_op_exit_tb:
+ tci_args_l(insn, tb_ptr, &ptr);
+ return (uintptr_t)ptr;
+
+ case INDEX_op_goto_tb:
+ tci_args_l(insn, tb_ptr, &ptr);
+ tb_ptr = *(void **)ptr;
+ break;
+
+ case INDEX_op_goto_ptr:
+ tci_args_r(insn, &r0);
+ ptr = (void *)regs[r0];
+ if (!ptr) {
+ return 0;
+ }
+ tb_ptr = ptr;
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tci_args_rrm(insn, &r0, &r1, &oi);
+ taddr = regs[r1];
+ } else {
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ taddr = tci_uint64(regs[r2], regs[r1]);
+ }
+ tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
+ regs[r0] = tmp32;
+ break;
+
+ case INDEX_op_qemu_ld_i64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tci_args_rrm(insn, &r0, &r1, &oi);
+ taddr = regs[r1];
+ } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ taddr = regs[r2];
+ } else {
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
+ taddr = tci_uint64(regs[r3], regs[r2]);
+ oi = regs[r4];
+ }
+ tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
+ if (TCG_TARGET_REG_BITS == 32) {
+ tci_write_reg64(regs, r1, r0, tmp64);
+ } else {
+ regs[r0] = tmp64;
+ }
+ break;
+
+ case INDEX_op_qemu_st_i32:
+ if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tci_args_rrm(insn, &r0, &r1, &oi);
+ taddr = regs[r1];
+ } else {
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ taddr = tci_uint64(regs[r2], regs[r1]);
+ }
+ tmp32 = regs[r0];
+ tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
+ break;
+
+ case INDEX_op_qemu_st_i64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tci_args_rrm(insn, &r0, &r1, &oi);
+ taddr = regs[r1];
+ tmp64 = regs[r0];
+ } else {
+ if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ taddr = regs[r2];
+ } else {
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
+ taddr = tci_uint64(regs[r3], regs[r2]);
+ oi = regs[r4];
+ }
+ tmp64 = tci_uint64(regs[r1], regs[r0]);
+ }
+ tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
+ break;
+
+ case INDEX_op_mb:
+ /* Ensure ordering for all kinds */
+ smp_mb();
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+/*
+ * Disassembler that matches the interpreter
+ */
+
+static const char *str_r(TCGReg r)
+{
+ static const char regs[TCG_TARGET_NB_REGS][4] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "env", "sp"
+ };
+
+ QEMU_BUILD_BUG_ON(TCG_AREG0 != TCG_REG_R14);
+ QEMU_BUILD_BUG_ON(TCG_REG_CALL_STACK != TCG_REG_R15);
+
+ assert((unsigned)r < TCG_TARGET_NB_REGS);
+ return regs[r];
+}
+
+static const char *str_c(TCGCond c)
+{
+ static const char cond[16][8] = {
+ [TCG_COND_NEVER] = "never",
+ [TCG_COND_ALWAYS] = "always",
+ [TCG_COND_EQ] = "eq",
+ [TCG_COND_NE] = "ne",
+ [TCG_COND_LT] = "lt",
+ [TCG_COND_GE] = "ge",
+ [TCG_COND_LE] = "le",
+ [TCG_COND_GT] = "gt",
+ [TCG_COND_LTU] = "ltu",
+ [TCG_COND_GEU] = "geu",
+ [TCG_COND_LEU] = "leu",
+ [TCG_COND_GTU] = "gtu",
+ };
+
+ assert((unsigned)c < ARRAY_SIZE(cond));
+ assert(cond[c][0] != 0);
+ return cond[c];
+}
+
+/* Disassemble TCI bytecode. */
+int print_insn_tci(bfd_vma addr, disassemble_info *info)
+{
+ const uint32_t *tb_ptr = (const void *)(uintptr_t)addr;
+ const TCGOpDef *def;
+ const char *op_name;
+ uint32_t insn;
+ TCGOpcode op;
+ TCGReg r0, r1, r2, r3, r4, r5;
+ tcg_target_ulong i1;
+ int32_t s2;
+ TCGCond c;
+ MemOpIdx oi;
+ uint8_t pos, len;
+ void *ptr;
+
+ /* TCI is always the host, so we don't need to load indirect. */
+ insn = *tb_ptr++;
+
+ info->fprintf_func(info->stream, "%08x ", insn);
+
+ op = extract32(insn, 0, 8);
+ def = &tcg_op_defs[op];
+ op_name = def->name;
+
+ switch (op) {
+ case INDEX_op_br:
+ case INDEX_op_exit_tb:
+ case INDEX_op_goto_tb:
+ tci_args_l(insn, tb_ptr, &ptr);
+ info->fprintf_func(info->stream, "%-12s %p", op_name, ptr);
+ break;
+
+ case INDEX_op_goto_ptr:
+ tci_args_r(insn, &r0);
+ info->fprintf_func(info->stream, "%-12s %s", op_name, str_r(r0));
+ break;
+
+ case INDEX_op_call:
+ tci_args_nl(insn, tb_ptr, &len, &ptr);
+ info->fprintf_func(info->stream, "%-12s %d, %p", op_name, len, ptr);
+ break;
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
+ info->fprintf_func(info->stream, "%-12s %s, 0, ne, %p",
+ op_name, str_r(r0), ptr);
+ break;
+
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ tci_args_rrrc(insn, &r0, &r1, &r2, &c);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c));
+ break;
+
+ case INDEX_op_tci_movi:
+ tci_args_ri(insn, &r0, &i1);
+ info->fprintf_func(info->stream, "%-12s %s, 0x%" TCG_PRIlx,
+ op_name, str_r(r0), i1);
+ break;
+
+ case INDEX_op_tci_movl:
+ tci_args_rl(insn, tb_ptr, &r0, &ptr);
+ info->fprintf_func(info->stream, "%-12s %s, %p",
+ op_name, str_r(r0), ptr);
+ break;
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld_i64:
+ case INDEX_op_st8_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i32:
+ case INDEX_op_st_i64:
+ tci_args_rrs(insn, &r0, &r1, &s2);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %d",
+ op_name, str_r(r0), str_r(r1), s2);
+ break;
+
+ case INDEX_op_mov_i32:
+ case INDEX_op_mov_i64:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
+ tci_args_rr(insn, &r0, &r1);
+ info->fprintf_func(info->stream, "%-12s %s, %s",
+ op_name, str_r(r0), str_r(r1));
+ break;
+
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ case INDEX_op_div_i32:
+ case INDEX_op_div_i64:
+ case INDEX_op_rem_i32:
+ case INDEX_op_rem_i64:
+ case INDEX_op_divu_i32:
+ case INDEX_op_divu_i64:
+ case INDEX_op_remu_i32:
+ case INDEX_op_remu_i64:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i32:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i32:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotr_i64:
+ case INDEX_op_clz_i32:
+ case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
+ tci_args_rrr(insn, &r0, &r1, &r2);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s",
+ op_name, str_r(r0), str_r(r1), str_r(r2));
+ break;
+
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %d, %d",
+ op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
+ break;
+
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ case INDEX_op_sextract_i32:
+ case INDEX_op_sextract_i64:
+ tci_args_rrbb(insn, &r0, &r1, &pos, &len);
+ info->fprintf_func(info->stream, "%-12s %s,%s,%d,%d",
+ op_name, str_r(r0), str_r(r1), pos, len);
+ break;
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ case INDEX_op_setcond2_i32:
+ tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1), str_r(r2),
+ str_r(r3), str_r(r4), str_c(c));
+ break;
+
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1),
+ str_r(r2), str_r(r3));
+ break;
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1), str_r(r2),
+ str_r(r3), str_r(r4), str_r(r5));
+ break;
+
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
+ goto do_qemu_ldst;
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ len = 1;
+ do_qemu_ldst:
+ len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
+ switch (len) {
+ case 2:
+ tci_args_rrm(insn, &r0, &r1, &oi);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %x",
+ op_name, str_r(r0), str_r(r1), oi);
+ break;
+ case 3:
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %x",
+ op_name, str_r(r0), str_r(r1), str_r(r2), oi);
+ break;
+ case 4:
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
+ info->fprintf_func(info->stream, "%-12s %s, %s, %s, %s, %s",
+ op_name, str_r(r0), str_r(r1),
+ str_r(r2), str_r(r3), str_r(r4));
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+
+ case 0:
+ /* tcg_out_nop_fill uses zeros */
+ if (insn == 0) {
+ info->fprintf_func(info->stream, "align");
+ break;
+ }
+ /* fall through */
+
+ default:
+ info->fprintf_func(info->stream, "illegal opcode %d", op);
+ break;
+ }
+
+ return sizeof(insn);
+}
diff --git a/tcg/tci/README b/tcg/tci/README
new file mode 100644
index 000000000..f72a40a39
--- /dev/null
+++ b/tcg/tci/README
@@ -0,0 +1,120 @@
+TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil.
+
+This file is released under the BSD license.
+
+1) Introduction
+
+TCG (Tiny Code Generator) is a code generator which translates
+code fragments ("basic blocks") from target code (any of the
+targets supported by QEMU) to a code representation which
+can be run on a host.
+
+QEMU can create native code for some hosts (arm, i386, ia64, ppc, ppc64,
+s390, sparc, x86_64). For others, unofficial host support was written.
+
+By adding a code generator for a virtual machine and using an
+interpreter for the generated bytecode, it is possible to
+support (almost) any host.
+
+This is what TCI (Tiny Code Interpreter) does.
+
+2) Implementation
+
+Like each TCG host frontend, TCI implements the code generator in
+tcg-target.c.inc, tcg-target.h. Both files are in directory tcg/tci.
+
+The additional file tcg/tci.c adds the interpreter and disassembler.
+
+The bytecode consists of opcodes (with only a few exceptions, with
+the same same numeric values and semantics as used by TCG), and up
+to six arguments packed into a 32-bit integer. See comments in tci.c
+for details on the encoding.
+
+3) Usage
+
+For hosts without native TCG, the interpreter TCI must be enabled by
+
+ configure --enable-tcg-interpreter
+
+If configure is called without --enable-tcg-interpreter, it will
+suggest using this option. Setting it automatically would need
+additional code in configure which must be fixed when new native TCG
+implementations are added.
+
+For hosts with native TCG, the interpreter TCI can be enabled by
+
+ configure --enable-tcg-interpreter
+
+The only difference from running QEMU with TCI to running without TCI
+should be speed. Especially during development of TCI, it was very
+useful to compare runs with and without TCI. Create /tmp/qemu.log by
+
+ qemu-system-i386 -d in_asm,op_opt,cpu -D /tmp/qemu.log -singlestep
+
+once with interpreter and once without interpreter and compare the resulting
+qemu.log files. This is also useful to see the effects of additional
+registers or additional opcodes (it is easy to modify the virtual machine).
+It can also be used to verify native TCGs.
+
+Hosts with native TCG can also enable TCI by claiming to be unsupported:
+
+ configure --cpu=unknown --enable-tcg-interpreter
+
+configure then no longer uses the native linker script (*.ld) for
+user mode emulation.
+
+
+4) Status
+
+TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target,
+host and target with same or different endianness.
+
+ | host (le) host (be)
+ | 32 64 32 64
+------------+------------------------------------------------------------
+target (le) | s0, u0 s1, u1 s?, u? s?, u?
+32 bit |
+ |
+target (le) | sc, uc s1, u1 s?, u? s?, u?
+64 bit |
+ |
+target (be) | sc, u0 sc, uc s?, u? s?, u?
+32 bit |
+ |
+target (be) | sc, uc sc, uc s?, u? s?, u?
+64 bit |
+ |
+
+System emulation
+s? = untested
+sc = compiles
+s0 = bios works
+s1 = grub works
+s2 = Linux boots
+
+Linux user mode emulation
+u? = untested
+uc = compiles
+u0 = static hello works
+u1 = linux-user-test works
+
+5) Todo list
+
+* TCI is not widely tested. It was written and tested on a x86_64 host
+ running i386 and x86_64 system emulation and Linux user mode.
+ A cross compiled QEMU for i386 host also works with the same basic tests.
+ A cross compiled QEMU for mipsel host works, too. It is terribly slow
+ because I run it in a mips malta emulation, so it is an interpreted
+ emulation in an emulation.
+ A cross compiled QEMU for arm host works (tested with pc bios).
+ A cross compiled QEMU for ppc host works at least partially:
+ i386-linux-user/qemu-i386 can run a simple hello-world program
+ (tested in a ppc emulation).
+
+* Some TCG opcodes are either missing in the code generator and/or
+ in the interpreter. These opcodes raise a runtime exception, so it is
+ possible to see where code must be added.
+
+* It might be useful to have a runtime option which selects the native TCG
+ or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
+ is a configure option, so you need two compilations of QEMU.
diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
new file mode 100644
index 000000000..ae2dc3b84
--- /dev/null
+++ b/tcg/tci/tcg-target-con-set.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * TCI target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(r, r)
+C_O0_I3(r, r, r)
+C_O0_I4(r, r, r, r)
+C_O1_I1(r, r)
+C_O1_I2(r, r, r)
+C_O1_I4(r, r, r, r, r)
+C_O2_I1(r, r, r)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, r, r, r, r)
diff --git a/tcg/tci/tcg-target-con-str.h b/tcg/tci/tcg-target-con-str.h
new file mode 100644
index 000000000..87c0f19e9
--- /dev/null
+++ b/tcg/tci/tcg-target-con-str.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define TCI target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS))
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
new file mode 100644
index 000000000..0cb16aaa8
--- /dev/null
+++ b/tcg/tci/tcg-target.c.inc
@@ -0,0 +1,862 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "../tcg-pool.c.inc"
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+ switch (op) {
+ case INDEX_op_goto_ptr:
+ return C_O0_I1(r);
+
+ case INDEX_op_ld8u_i32:
+ case INDEX_op_ld8s_i32:
+ case INDEX_op_ld16u_i32:
+ case INDEX_op_ld16s_i32:
+ case INDEX_op_ld_i32:
+ case INDEX_op_ld8u_i64:
+ case INDEX_op_ld8s_i64:
+ case INDEX_op_ld16u_i64:
+ case INDEX_op_ld16s_i64:
+ case INDEX_op_ld32u_i64:
+ case INDEX_op_ld32s_i64:
+ case INDEX_op_ld_i64:
+ case INDEX_op_not_i32:
+ case INDEX_op_not_i64:
+ case INDEX_op_neg_i32:
+ case INDEX_op_neg_i64:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16u_i32:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
+ case INDEX_op_ext_i32_i64:
+ case INDEX_op_extu_i32_i64:
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ case INDEX_op_bswap64_i64:
+ case INDEX_op_extract_i32:
+ case INDEX_op_extract_i64:
+ case INDEX_op_sextract_i32:
+ case INDEX_op_sextract_i64:
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
+ return C_O1_I1(r, r);
+
+ case INDEX_op_st8_i32:
+ case INDEX_op_st16_i32:
+ case INDEX_op_st_i32:
+ case INDEX_op_st8_i64:
+ case INDEX_op_st16_i64:
+ case INDEX_op_st32_i64:
+ case INDEX_op_st_i64:
+ return C_O0_I2(r, r);
+
+ case INDEX_op_div_i32:
+ case INDEX_op_div_i64:
+ case INDEX_op_divu_i32:
+ case INDEX_op_divu_i64:
+ case INDEX_op_rem_i32:
+ case INDEX_op_rem_i64:
+ case INDEX_op_remu_i32:
+ case INDEX_op_remu_i64:
+ case INDEX_op_add_i32:
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i32:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i32:
+ case INDEX_op_mul_i64:
+ case INDEX_op_and_i32:
+ case INDEX_op_and_i64:
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ case INDEX_op_or_i32:
+ case INDEX_op_or_i64:
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ case INDEX_op_xor_i32:
+ case INDEX_op_xor_i64:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i32:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i32:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i32:
+ case INDEX_op_rotr_i64:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ case INDEX_op_clz_i32:
+ case INDEX_op_clz_i64:
+ case INDEX_op_ctz_i32:
+ case INDEX_op_ctz_i64:
+ return C_O1_I2(r, r, r);
+
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ return C_O0_I2(r, r);
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_add2_i64:
+ case INDEX_op_sub2_i32:
+ case INDEX_op_sub2_i64:
+ return C_O2_I4(r, r, r, r, r, r);
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ return C_O0_I4(r, r, r, r);
+#endif
+
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
+ return C_O2_I2(r, r, r, r);
+
+ case INDEX_op_movcond_i32:
+ case INDEX_op_movcond_i64:
+ case INDEX_op_setcond2_i32:
+ return C_O1_I4(r, r, r, r, r);
+
+ case INDEX_op_qemu_ld_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O1_I1(r, r)
+ : C_O1_I2(r, r, r));
+ case INDEX_op_qemu_ld_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
+ : C_O2_I2(r, r, r, r));
+ case INDEX_op_qemu_st_i32:
+ return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ ? C_O0_I2(r, r)
+ : C_O0_I3(r, r, r));
+ case INDEX_op_qemu_st_i64:
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
+ : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
+ : C_O0_I4(r, r, r, r));
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static const int tcg_target_reg_alloc_order[] = {
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+ TCG_REG_R1,
+ TCG_REG_R0,
+};
+
+#if MAX_OPC_PARAM_IARGS != 6
+# error Fix needed, number of supported input arguments changed!
+#endif
+
+/* No call arguments via registers. All will be stored on the "stack". */
+static const int tcg_target_call_iarg_regs[] = { };
+
+static const int tcg_target_call_oarg_regs[] = {
+ TCG_REG_R0,
+#if TCG_TARGET_REG_BITS == 32
+ TCG_REG_R1
+#endif
+};
+
+#ifdef CONFIG_DEBUG_TCG
+static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+ "r00",
+ "r01",
+ "r02",
+ "r03",
+ "r04",
+ "r05",
+ "r06",
+ "r07",
+ "r08",
+ "r09",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+};
+#endif
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ intptr_t diff = value - (intptr_t)(code_ptr + 1);
+
+ tcg_debug_assert(addend == 0);
+ tcg_debug_assert(type == 20);
+
+ if (diff == sextract32(diff, 0, type)) {
+ tcg_patch32(code_ptr, deposit32(*code_ptr, 32 - type, type, diff));
+ return true;
+ }
+ return false;
+}
+
+static void stack_bounds_check(TCGReg base, target_long offset)
+{
+ if (base == TCG_REG_CALL_STACK) {
+ tcg_debug_assert(offset >= 0);
+ tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE +
+ TCG_STATIC_FRAME_SIZE));
+ }
+}
+
+static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_out_reloc(s, s->code_ptr, 20, l0, 0);
+ insn = deposit32(insn, 0, 8, op);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0)
+{
+ tcg_insn_unit insn = 0;
+ intptr_t diff;
+
+ /* Special case for exit_tb: map null -> 0. */
+ if (p0 == NULL) {
+ diff = 0;
+ } else {
+ diff = p0 - (void *)(s->code_ptr + 1);
+ tcg_debug_assert(diff != 0);
+ if (diff != sextract32(diff, 0, 20)) {
+ tcg_raise_tb_overflow(s);
+ }
+ }
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 12, 20, diff);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_v(TCGContext *s, TCGOpcode op)
+{
+ tcg_out32(s, (uint8_t)op);
+}
+
+static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(i1 == sextract32(i1, 0, 20));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 20, i1);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_out_reloc(s, s->code_ptr, 20, l1, 0);
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGArg m2)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(m2 == extract32(m2, 0, 12));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 20, 12, m2);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, intptr_t i2)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(i2 == sextract32(i2, 0, 16));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 16, i2);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
+ TCGReg r1, uint8_t b2, uint8_t b3)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(b2 == extract32(b2, 0, 6));
+ tcg_debug_assert(b3 == extract32(b3, 0, 6));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 6, b2);
+ insn = deposit32(insn, 22, 6, b3);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, c3);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrm(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2, TCGArg m3)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(m3 == extract32(m3, 0, 12));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 12, m3);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrbb(TCGContext *s, TCGOpcode op, TCGReg r0,
+ TCGReg r1, TCGReg r2, uint8_t b3, uint8_t b4)
+{
+ tcg_insn_unit insn = 0;
+
+ tcg_debug_assert(b3 == extract32(b3, 0, 6));
+ tcg_debug_assert(b4 == extract32(b4, 0, 6));
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 6, b3);
+ insn = deposit32(insn, 26, 6, b4);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op, TCGReg r0,
+ TCGReg r1, TCGReg r2, TCGReg r3, TCGReg r4)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2,
+ TCGReg r3, TCGReg r4, TCGCond c5)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ insn = deposit32(insn, 28, 4, c5);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_op_rrrrrr(TCGContext *s, TCGOpcode op,
+ TCGReg r0, TCGReg r1, TCGReg r2,
+ TCGReg r3, TCGReg r4, TCGReg r5)
+{
+ tcg_insn_unit insn = 0;
+
+ insn = deposit32(insn, 0, 8, op);
+ insn = deposit32(insn, 8, 4, r0);
+ insn = deposit32(insn, 12, 4, r1);
+ insn = deposit32(insn, 16, 4, r2);
+ insn = deposit32(insn, 20, 4, r3);
+ insn = deposit32(insn, 24, 4, r4);
+ insn = deposit32(insn, 28, 4, r5);
+ tcg_out32(s, insn);
+}
+
+static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val,
+ TCGReg base, intptr_t offset)
+{
+ stack_bounds_check(base, offset);
+ if (offset != sextract32(offset, 0, 16)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
+ tcg_out_op_rrr(s, (TCG_TARGET_REG_BITS == 32
+ ? INDEX_op_add_i32 : INDEX_op_add_i64),
+ TCG_REG_TMP, TCG_REG_TMP, base);
+ base = TCG_REG_TMP;
+ offset = 0;
+ }
+ tcg_out_op_rrs(s, op, val, base, offset);
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
+ intptr_t offset)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_ldst(s, INDEX_op_ld_i32, val, base, offset);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case TCG_TYPE_I64:
+ tcg_out_ldst(s, INDEX_op_ld_i64, val, base, offset);
+ break;
+#endif
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_op_rr(s, INDEX_op_mov_i32, ret, arg);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case TCG_TYPE_I64:
+ tcg_out_op_rr(s, INDEX_op_mov_i64, ret, arg);
+ break;
+#endif
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ TCGReg ret, tcg_target_long arg)
+{
+ switch (type) {
+ case TCG_TYPE_I32:
+#if TCG_TARGET_REG_BITS == 64
+ arg = (int32_t)arg;
+ /* fall through */
+ case TCG_TYPE_I64:
+#endif
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (arg == sextract32(arg, 0, 20)) {
+ tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg);
+ } else {
+ tcg_insn_unit insn = 0;
+
+ new_pool_label(s, arg, 20, s->code_ptr, 0);
+ insn = deposit32(insn, 0, 8, INDEX_op_tci_movl);
+ insn = deposit32(insn, 8, 4, ret);
+ tcg_out32(s, insn);
+ }
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func,
+ ffi_cif *cif)
+{
+ tcg_insn_unit insn = 0;
+ uint8_t which;
+
+ if (cif->rtype == &ffi_type_void) {
+ which = 0;
+ } else if (cif->rtype->size == 4) {
+ which = 1;
+ } else {
+ tcg_debug_assert(cif->rtype->size == 8);
+ which = 2;
+ }
+ new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif);
+ insn = deposit32(insn, 0, 8, INDEX_op_call);
+ insn = deposit32(insn, 8, 4, which);
+ tcg_out32(s, insn);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+# define CASE_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i64): \
+ case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x) \
+ case glue(glue(INDEX_op_, x), _i64):
+#else
+# define CASE_32_64(x) \
+ case glue(glue(INDEX_op_, x), _i32):
+# define CASE_64(x)
+#endif
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
+{
+ TCGOpcode exts;
+
+ switch (opc) {
+ case INDEX_op_exit_tb:
+ tcg_out_op_p(s, opc, (void *)args[0]);
+ break;
+
+ case INDEX_op_goto_tb:
+ tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+ /* indirect jump method. */
+ tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]);
+ set_jmp_reset_offset(s, args[0]);
+ break;
+
+ case INDEX_op_goto_ptr:
+ tcg_out_op_r(s, opc, args[0]);
+ break;
+
+ case INDEX_op_br:
+ tcg_out_op_l(s, opc, arg_label(args[0]));
+ break;
+
+ CASE_32_64(setcond)
+ tcg_out_op_rrrc(s, opc, args[0], args[1], args[2], args[3]);
+ break;
+
+ CASE_32_64(movcond)
+ case INDEX_op_setcond2_i32:
+ tcg_out_op_rrrrrc(s, opc, args[0], args[1], args[2],
+ args[3], args[4], args[5]);
+ break;
+
+ CASE_32_64(ld8u)
+ CASE_32_64(ld8s)
+ CASE_32_64(ld16u)
+ CASE_32_64(ld16s)
+ case INDEX_op_ld_i32:
+ CASE_64(ld32u)
+ CASE_64(ld32s)
+ CASE_64(ld)
+ CASE_32_64(st8)
+ CASE_32_64(st16)
+ case INDEX_op_st_i32:
+ CASE_64(st32)
+ CASE_64(st)
+ tcg_out_ldst(s, opc, args[0], args[1], args[2]);
+ break;
+
+ CASE_32_64(add)
+ CASE_32_64(sub)
+ CASE_32_64(mul)
+ CASE_32_64(and)
+ CASE_32_64(or)
+ CASE_32_64(xor)
+ CASE_32_64(andc) /* Optional (TCG_TARGET_HAS_andc_*). */
+ CASE_32_64(orc) /* Optional (TCG_TARGET_HAS_orc_*). */
+ CASE_32_64(eqv) /* Optional (TCG_TARGET_HAS_eqv_*). */
+ CASE_32_64(nand) /* Optional (TCG_TARGET_HAS_nand_*). */
+ CASE_32_64(nor) /* Optional (TCG_TARGET_HAS_nor_*). */
+ CASE_32_64(shl)
+ CASE_32_64(shr)
+ CASE_32_64(sar)
+ CASE_32_64(rotl) /* Optional (TCG_TARGET_HAS_rot_*). */
+ CASE_32_64(rotr) /* Optional (TCG_TARGET_HAS_rot_*). */
+ CASE_32_64(div) /* Optional (TCG_TARGET_HAS_div_*). */
+ CASE_32_64(divu) /* Optional (TCG_TARGET_HAS_div_*). */
+ CASE_32_64(rem) /* Optional (TCG_TARGET_HAS_div_*). */
+ CASE_32_64(remu) /* Optional (TCG_TARGET_HAS_div_*). */
+ CASE_32_64(clz) /* Optional (TCG_TARGET_HAS_clz_*). */
+ CASE_32_64(ctz) /* Optional (TCG_TARGET_HAS_ctz_*). */
+ tcg_out_op_rrr(s, opc, args[0], args[1], args[2]);
+ break;
+
+ CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */
+ {
+ TCGArg pos = args[3], len = args[4];
+ TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64;
+
+ tcg_debug_assert(pos < max);
+ tcg_debug_assert(pos + len <= max);
+
+ tcg_out_op_rrrbb(s, opc, args[0], args[1], args[2], pos, len);
+ }
+ break;
+
+ CASE_32_64(extract) /* Optional (TCG_TARGET_HAS_extract_*). */
+ CASE_32_64(sextract) /* Optional (TCG_TARGET_HAS_sextract_*). */
+ {
+ TCGArg pos = args[2], len = args[3];
+ TCGArg max = tcg_op_defs[opc].flags & TCG_OPF_64BIT ? 64 : 32;
+
+ tcg_debug_assert(pos < max);
+ tcg_debug_assert(pos + len <= max);
+
+ tcg_out_op_rrbb(s, opc, args[0], args[1], pos, len);
+ }
+ break;
+
+ CASE_32_64(brcond)
+ tcg_out_op_rrrc(s, (opc == INDEX_op_brcond_i32
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64),
+ TCG_REG_TMP, args[0], args[1], args[2]);
+ tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3]));
+ break;
+
+ CASE_32_64(neg) /* Optional (TCG_TARGET_HAS_neg_*). */
+ CASE_32_64(not) /* Optional (TCG_TARGET_HAS_not_*). */
+ CASE_32_64(ext8s) /* Optional (TCG_TARGET_HAS_ext8s_*). */
+ CASE_32_64(ext8u) /* Optional (TCG_TARGET_HAS_ext8u_*). */
+ CASE_32_64(ext16s) /* Optional (TCG_TARGET_HAS_ext16s_*). */
+ CASE_32_64(ext16u) /* Optional (TCG_TARGET_HAS_ext16u_*). */
+ CASE_64(ext32s) /* Optional (TCG_TARGET_HAS_ext32s_i64). */
+ CASE_64(ext32u) /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+ CASE_64(ext_i32)
+ CASE_64(extu_i32)
+ CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */
+ case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
+ case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
+ tcg_out_op_rr(s, opc, args[0], args[1]);
+ break;
+
+ case INDEX_op_bswap16_i32: /* Optional (TCG_TARGET_HAS_bswap16_i32). */
+ exts = INDEX_op_ext16s_i32;
+ goto do_bswap;
+ case INDEX_op_bswap16_i64: /* Optional (TCG_TARGET_HAS_bswap16_i64). */
+ exts = INDEX_op_ext16s_i64;
+ goto do_bswap;
+ case INDEX_op_bswap32_i64: /* Optional (TCG_TARGET_HAS_bswap32_i64). */
+ exts = INDEX_op_ext32s_i64;
+ do_bswap:
+ /* The base tci bswaps zero-extend, and ignore high bits. */
+ tcg_out_op_rr(s, opc, args[0], args[1]);
+ if (args[2] & TCG_BSWAP_OS) {
+ tcg_out_op_rr(s, exts, args[0], args[0]);
+ }
+ break;
+
+ CASE_32_64(add2)
+ CASE_32_64(sub2)
+ tcg_out_op_rrrrrr(s, opc, args[0], args[1], args[2],
+ args[3], args[4], args[5]);
+ break;
+
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+ tcg_out_op_rrrrrc(s, INDEX_op_setcond2_i32, TCG_REG_TMP,
+ args[0], args[1], args[2], args[3], args[4]);
+ tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
+ break;
+#endif
+
+ CASE_32_64(mulu2)
+ CASE_32_64(muls2)
+ tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
+ break;
+
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
+ } else {
+ tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
+ }
+ break;
+
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
+ } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
+ tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
+ tcg_out_op_rrrrr(s, opc, args[0], args[1],
+ args[2], args[3], TCG_REG_TMP);
+ }
+ break;
+
+ case INDEX_op_mb:
+ tcg_out_op_v(s, opc);
+ break;
+
+ case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
+ case INDEX_op_mov_i64:
+ case INDEX_op_call: /* Always emitted via tcg_out_call. */
+ default:
+ tcg_abort();
+ }
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base,
+ intptr_t offset)
+{
+ stack_bounds_check(base, offset);
+ switch (type) {
+ case TCG_TYPE_I32:
+ tcg_out_op_rrs(s, INDEX_op_st_i32, val, base, offset);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case TCG_TYPE_I64:
+ tcg_out_op_rrs(s, INDEX_op_st_i64, val, base, offset);
+ break;
+#endif
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+ TCGReg base, intptr_t ofs)
+{
+ return false;
+}
+
+/* Test if a constant matches the constraint. */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+ return ct & TCG_CT_CONST;
+}
+
+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+ memset(p, 0, sizeof(*p) * count);
+}
+
+static void tcg_target_init(TCGContext *s)
+{
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+ const char *envval = getenv("DEBUG_TCG");
+ if (envval) {
+ qemu_set_log(strtol(envval, NULL, 0));
+ }
+#endif
+
+ /* The current code uses uint8_t for tcg operations. */
+ tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX);
+
+ /* Registers available for 32 bit operations. */
+ tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1;
+ /* Registers available for 64 bit operations. */
+ tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1;
+ /*
+ * The interpreter "registers" are in the local stack frame and
+ * cannot be clobbered by the called helper functions. However,
+ * the interpreter assumes a 64-bit return value and assigns to
+ * the return value registers.
+ */
+ tcg_target_call_clobber_regs =
+ MAKE_64BIT_MASK(TCG_REG_R0, 64 / TCG_TARGET_REG_BITS);
+
+ s->reserved_regs = 0;
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
+
+ /* The call arguments come first, followed by the temp storage. */
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ TCG_STATIC_FRAME_SIZE);
+}
+
+/* Generate global QEMU prologue and epilogue code. */
+static inline void tcg_target_qemu_prologue(TCGContext *s)
+{
+}
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
new file mode 100644
index 000000000..033e613f2
--- /dev/null
+++ b/tcg/tci/tcg-target.h
@@ -0,0 +1,180 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This code implements a TCG which does not generate machine code for some
+ * real target machine but which generates virtual machine code for an
+ * interpreter. Interpreted pseudo code is slow, but it works on any host.
+ *
+ * Some remarks might help in understanding the code:
+ *
+ * "target" or "TCG target" is the machine which runs the generated code.
+ * This is different to the usual meaning in QEMU where "target" is the
+ * emulated machine. So normally QEMU host is identical to TCG target.
+ * Here the TCG target is a virtual machine, but this virtual machine must
+ * use the same word size like the real machine.
+ * Therefore, we need both 32 and 64 bit virtual machines (interpreter).
+ */
+
+#ifndef TCG_TARGET_H
+#define TCG_TARGET_H
+
+#define TCG_TARGET_INTERPRETER 1
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
+
+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#else
+# error Unknown pointer size for tci target
+#endif
+
+#ifdef CONFIG_DEBUG_TCG
+/* Enable debug output. */
+#define CONFIG_DEBUG_TCG_INTERPRETER
+#endif
+
+/* Optional instructions. */
+
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_div_i32 1
+#define TCG_TARGET_HAS_rem_i32 1
+#define TCG_TARGET_HAS_ext8s_i32 1
+#define TCG_TARGET_HAS_ext16s_i32 1
+#define TCG_TARGET_HAS_ext8u_i32 1
+#define TCG_TARGET_HAS_ext16u_i32 1
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
+#define TCG_TARGET_HAS_clz_i32 1
+#define TCG_TARGET_HAS_ctz_i32 1
+#define TCG_TARGET_HAS_ctpop_i32 1
+#define TCG_TARGET_HAS_neg_i32 1
+#define TCG_TARGET_HAS_not_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_muls2_i32 1
+#define TCG_TARGET_HAS_muluh_i32 0
+#define TCG_TARGET_HAS_mulsh_i32 0
+#define TCG_TARGET_HAS_direct_jump 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_extrl_i64_i32 0
+#define TCG_TARGET_HAS_extrh_i64_i32 0
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_extract_i64 1
+#define TCG_TARGET_HAS_sextract_i64 1
+#define TCG_TARGET_HAS_extract2_i64 0
+#define TCG_TARGET_HAS_div_i64 1
+#define TCG_TARGET_HAS_rem_i64 1
+#define TCG_TARGET_HAS_ext8s_i64 1
+#define TCG_TARGET_HAS_ext16s_i64 1
+#define TCG_TARGET_HAS_ext32s_i64 1
+#define TCG_TARGET_HAS_ext8u_i64 1
+#define TCG_TARGET_HAS_ext16u_i64 1
+#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
+#define TCG_TARGET_HAS_clz_i64 1
+#define TCG_TARGET_HAS_ctz_i64 1
+#define TCG_TARGET_HAS_ctpop_i64 1
+#define TCG_TARGET_HAS_neg_i64 1
+#define TCG_TARGET_HAS_not_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_muls2_i64 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
+#define TCG_TARGET_HAS_mulu2_i32 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muluh_i64 0
+#define TCG_TARGET_HAS_mulsh_i64 0
+#else
+#define TCG_TARGET_HAS_mulu2_i32 1
+#endif /* TCG_TARGET_REG_BITS == 64 */
+
+/* Number of registers available. */
+#define TCG_TARGET_NB_REGS 16
+
+/* List of registers which are used by TCG. */
+typedef enum {
+ TCG_REG_R0 = 0,
+ TCG_REG_R1,
+ TCG_REG_R2,
+ TCG_REG_R3,
+ TCG_REG_R4,
+ TCG_REG_R5,
+ TCG_REG_R6,
+ TCG_REG_R7,
+ TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
+ TCG_REG_R11,
+ TCG_REG_R12,
+ TCG_REG_R13,
+ TCG_REG_R14,
+ TCG_REG_R15,
+
+ TCG_REG_TMP = TCG_REG_R13,
+ TCG_AREG0 = TCG_REG_R14,
+ TCG_REG_CALL_STACK = TCG_REG_R15,
+} TCGReg;
+
+/* Used for function call generation. */
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_STACK_ALIGN 8
+
+#define HAVE_TCG_QEMU_TB_EXEC
+#define TCG_TARGET_NEED_POOL_LABELS
+
+/* We could notice __i386__ or __s390x__ and reduce the barriers depending
+ on the host. But if you want performance, you use the normal backend.
+ We prefer consistency across hosts on this. */
+#define TCG_TARGET_DEFAULT_MO (0)
+
+#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+
+/* not defined -- call should be eliminated at compile time */
+void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#endif /* TCG_TARGET_H */