aboutsummaryrefslogtreecommitdiffstats
path: root/capstone/arch/BPF
diff options
context:
space:
mode:
Diffstat (limited to 'capstone/arch/BPF')
-rw-r--r--capstone/arch/BPF/BPFConstants.h88
-rw-r--r--capstone/arch/BPF/BPFDisassembler.c458
-rw-r--r--capstone/arch/BPF/BPFDisassembler.h27
-rw-r--r--capstone/arch/BPF/BPFInstPrinter.c280
-rw-r--r--capstone/arch/BPF/BPFInstPrinter.h16
-rw-r--r--capstone/arch/BPF/BPFMapping.c506
-rw-r--r--capstone/arch/BPF/BPFMapping.h21
-rw-r--r--capstone/arch/BPF/BPFModule.c34
-rw-r--r--capstone/arch/BPF/BPFModule.h12
9 files changed, 1442 insertions, 0 deletions
diff --git a/capstone/arch/BPF/BPFConstants.h b/capstone/arch/BPF/BPFConstants.h
new file mode 100644
index 000000000..d12590460
--- /dev/null
+++ b/capstone/arch/BPF/BPFConstants.h
@@ -0,0 +1,88 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+/* This file defines constants and macros used for parsing a BPF instruction */
+
+#ifndef CS_BPF_CONSTANTS_H
+#define CS_BPF_CONSTANTS_H
+
+#define BPF_CLASS(code) ((code) & 0x7)
+
+///< Instruction classes
+#define BPF_CLASS_LD 0x00
+#define BPF_CLASS_LDX 0x01
+#define BPF_CLASS_ST 0x02
+#define BPF_CLASS_STX 0x03
+#define BPF_CLASS_ALU 0x04
+#define BPF_CLASS_JMP 0x05
+#define BPF_CLASS_RET 0x06 ///< cBPF only
+#define BPF_CLASS_MISC 0x07 ///< cBPF only
+#define BPF_CLASS_ALU64 0x07 ///< eBPF only
+
+#define BPF_OP(code) ((code) & 0xf0)
+
+///< Types of ALU instruction
+#define BPF_ALU_ADD 0x00
+#define BPF_ALU_SUB 0x10
+#define BPF_ALU_MUL 0x20
+#define BPF_ALU_DIV 0x30
+#define BPF_ALU_OR 0x40
+#define BPF_ALU_AND 0x50
+#define BPF_ALU_LSH 0x60
+#define BPF_ALU_RSH 0x70
+#define BPF_ALU_NEG 0x80
+#define BPF_ALU_MOD 0x90
+#define BPF_ALU_XOR 0xa0
+#define BPF_ALU_MOV 0xb0 ///< eBPF only: mov reg to reg
+#define BPF_ALU_ARSH 0xc0 ///< eBPF only: sign extending shift right
+#define BPF_ALU_END 0xd0 ///< eBPF only: endianness conversion
+
+///< Types of jmp instruction
+#define BPF_JUMP_JA 0x00 ///< goto
+#define BPF_JUMP_JEQ 0x10 ///< '=='
+#define BPF_JUMP_JGT 0x20 ///< unsigned '>'
+#define BPF_JUMP_JGE 0x30 ///< unsigned '>='
+#define BPF_JUMP_JSET 0x40 ///< '&'
+#define BPF_JUMP_JNE 0x50 ///< eBPF only: '!=' */
+#define BPF_JUMP_JSGT 0x60 ///< eBPF only: signed '>'
+#define BPF_JUMP_JSGE 0x70 ///< eBPF only: signed '>='
+#define BPF_JUMP_CALL 0x80 ///< eBPF only: function call
+#define BPF_JUMP_EXIT 0x90 ///< eBPF only: exit
+#define BPF_JUMP_JLT 0xa0 ///< eBPF only: unsigned '<'
+#define BPF_JUMP_JLE 0xb0 ///< eBPF only: unsigned '<='
+#define BPF_JUMP_JSLT 0xc0 ///< eBPF only: signed '<'
+#define BPF_JUMP_JSLE 0xd0 ///< eBPF only: signed '<='
+
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_RVAL(code) ((code) & 0x18) /* cBPF only: for return types */
+///< Source operand
+#define BPF_SRC_K 0x00
+#define BPF_SRC_X 0x08
+#define BPF_SRC_A 0x10 /* cBPF only */
+
+#define BPF_SRC_LITTLE BPF_SRC_K
+#define BPF_SRC_BIG BPF_SRC_X
+
+#define BPF_SIZE(code) ((code) & 0x18)
+///< Size modifier
+#define BPF_SIZE_W 0x00 ///< word
+#define BPF_SIZE_H 0x08 ///< half word
+#define BPF_SIZE_B 0x10 ///< byte
+#define BPF_SIZE_DW 0x18 ///< eBPF only: double word
+
+#define BPF_MODE(code) ((code) & 0xe0)
+///< Mode modifier
+#define BPF_MODE_IMM 0x00 ///< used for 32-bit mov in cBPF and 64-bit in eBPF
+#define BPF_MODE_ABS 0x20
+#define BPF_MODE_IND 0x40
+#define BPF_MODE_MEM 0x60
+#define BPF_MODE_LEN 0x80 ///< cBPF only, reserved in eBPF
+#define BPF_MODE_MSH 0xa0 ///< cBPF only, reserved in eBPF
+#define BPF_MODE_XADD 0xc0 ///< eBPF only: exclusive add
+
+#define BPF_MISCOP(code) ((code) & 0x80)
+///< Operation of misc
+#define BPF_MISCOP_TAX 0x00
+#define BPF_MISCOP_TXA 0x80
+
+#endif
diff --git a/capstone/arch/BPF/BPFDisassembler.c b/capstone/arch/BPF/BPFDisassembler.c
new file mode 100644
index 000000000..cea475236
--- /dev/null
+++ b/capstone/arch/BPF/BPFDisassembler.c
@@ -0,0 +1,458 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifdef CAPSTONE_HAS_BPF
+
+#include <string.h>
+#include <stddef.h> // offsetof macro
+
+#include "BPFConstants.h"
+#include "BPFDisassembler.h"
+#include "BPFMapping.h"
+#include "../../cs_priv.h"
+
+static uint16_t read_u16(cs_struct *ud, const uint8_t *code)
+{
+ if (MODE_IS_BIG_ENDIAN(ud->mode))
+ return (((uint16_t)code[0] << 8) | code[1]);
+ else
+ return (((uint16_t)code[1] << 8) | code[0]);
+}
+
+static uint32_t read_u32(cs_struct *ud, const uint8_t *code)
+{
+ if (MODE_IS_BIG_ENDIAN(ud->mode))
+ return ((uint32_t)read_u16(ud, code) << 16) | read_u16(ud, code + 2);
+ else
+ return ((uint32_t)read_u16(ud, code + 2) << 16) | read_u16(ud, code);
+}
+
+///< Malloc bpf_internal, also checks if code_len is large enough.
+static bpf_internal *alloc_bpf_internal(size_t code_len)
+{
+ bpf_internal *bpf;
+
+ if (code_len < 8)
+ return NULL;
+ bpf = cs_mem_malloc(sizeof(bpf_internal));
+ if (bpf == NULL)
+ return NULL;
+ /* default value */
+ bpf->insn_size = 8;
+ return bpf;
+}
+
+///< Fetch a cBPF structure from code
+static bpf_internal* fetch_cbpf(cs_struct *ud, const uint8_t *code,
+ size_t code_len)
+{
+ bpf_internal *bpf;
+
+ bpf = alloc_bpf_internal(code_len);
+ if (bpf == NULL)
+ return NULL;
+
+ bpf->op = read_u16(ud, code);
+ bpf->jt = code[2];
+ bpf->jf = code[3];
+ bpf->k = read_u32(ud, code + 4);
+ return bpf;
+}
+
+///< Fetch an eBPF structure from code
+static bpf_internal* fetch_ebpf(cs_struct *ud, const uint8_t *code,
+ size_t code_len)
+{
+ bpf_internal *bpf;
+
+ bpf = alloc_bpf_internal(code_len);
+ if (bpf == NULL)
+ return NULL;
+
+ bpf->op = (uint16_t)code[0];
+
+ // eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM,
+ // in this case imm is combined with the next block's imm.
+ if (bpf->op == (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM)) {
+ if (code_len < 16) {
+ cs_mem_free(bpf);
+ return NULL;
+ }
+ bpf->k = read_u32(ud, code + 4) | (((uint64_t)read_u32(ud, code + 12)) << 32);
+ bpf->insn_size = 16;
+ }
+ else {
+ bpf->dst = code[1] & 0xf;
+ bpf->src = (code[1] & 0xf0) >> 4;
+ bpf->offset = read_u16(ud, code + 2);
+ bpf->k = read_u32(ud, code + 4);
+ }
+ return bpf;
+}
+
+#define CHECK_READABLE_REG(ud, reg) do { \
+ if (! ((reg) >= BPF_REG_R0 && (reg) <= BPF_REG_R10)) \
+ return false; \
+ } while (0)
+
+#define CHECK_WRITABLE_REG(ud, reg) do { \
+ if (! ((reg) >= BPF_REG_R0 && (reg) < BPF_REG_R10)) \
+ return false; \
+ } while (0)
+
+#define CHECK_READABLE_AND_PUSH(ud, MI, r) do { \
+ CHECK_READABLE_REG(ud, r + BPF_REG_R0); \
+ MCOperand_CreateReg0(MI, r + BPF_REG_R0); \
+ } while (0)
+
+#define CHECK_WRITABLE_AND_PUSH(ud, MI, r) do { \
+ CHECK_WRITABLE_REG(ud, r + BPF_REG_R0); \
+ MCOperand_CreateReg0(MI, r + BPF_REG_R0); \
+ } while (0)
+
+static bool decodeLoad(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ if (!EBPF_MODE(ud)) {
+ /*
+ * +-----+-----------+--------------------+
+ * | ldb | [k] | [x+k] |
+ * | ldh | [k] | [x+k] |
+ * +-----+-----------+--------------------+
+ */
+ if (BPF_SIZE(bpf->op) == BPF_SIZE_DW)
+ return false;
+ if (BPF_SIZE(bpf->op) == BPF_SIZE_B || BPF_SIZE(bpf->op) == BPF_SIZE_H) {
+ /* no ldx */
+ if (BPF_CLASS(bpf->op) != BPF_CLASS_LD)
+ return false;
+ /* can only be BPF_ABS and BPF_IND */
+ if (BPF_MODE(bpf->op) == BPF_MODE_ABS) {
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ else if (BPF_MODE(bpf->op) == BPF_MODE_IND) {
+ MCOperand_CreateReg0(MI, BPF_REG_X);
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ return false;
+ }
+ /*
+ * +-----+----+------+------+-----+-------+
+ * | ld | #k | #len | M[k] | [k] | [x+k] |
+ * +-----+----+------+------+-----+-------+
+ * | ldx | #k | #len | M[k] | 4*([k]&0xf) |
+ * +-----+----+------+------+-------------+
+ */
+ switch (BPF_MODE(bpf->op)) {
+ default:
+ break;
+ case BPF_MODE_IMM:
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ case BPF_MODE_LEN:
+ return true;
+ case BPF_MODE_MEM:
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ if (BPF_CLASS(bpf->op) == BPF_CLASS_LD) {
+ if (BPF_MODE(bpf->op) == BPF_MODE_ABS) {
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ else if (BPF_MODE(bpf->op) == BPF_MODE_IND) {
+ MCOperand_CreateReg0(MI, BPF_REG_X);
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ }
+ else { /* LDX */
+ if (BPF_MODE(bpf->op) == BPF_MODE_MSH) {
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /* eBPF mode */
+ /*
+ * - IMM: lddw imm64
+ * - ABS: ld{w,h,b,dw} [k]
+ * - IND: ld{w,h,b,dw} [src+k]
+ * - MEM: ldx{w,h,b,dw} dst, [src+off]
+ */
+ if (BPF_CLASS(bpf->op) == BPF_CLASS_LD) {
+ switch (BPF_MODE(bpf->op)) {
+ case BPF_MODE_IMM:
+ if (bpf->op != (BPF_CLASS_LD | BPF_SIZE_DW | BPF_MODE_IMM))
+ return false;
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ case BPF_MODE_ABS:
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ case BPF_MODE_IND:
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+ return false;
+
+ }
+ /* LDX */
+ if (BPF_MODE(bpf->op) == BPF_MODE_MEM) {
+ CHECK_WRITABLE_AND_PUSH(ud, MI, bpf->dst);
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ MCOperand_CreateImm0(MI, bpf->offset);
+ return true;
+ }
+ return false;
+}
+
+static bool decodeStore(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ /* in cBPF, only BPF_ST* | BPF_MEM | BPF_W is valid
+ * while in eBPF:
+ * - BPF_STX | BPF_XADD | BPF_{W,DW}
+ * - BPF_ST* | BPF_MEM | BPF_{W,H,B,DW}
+ * are valid
+ */
+ if (!EBPF_MODE(ud)) {
+ /* can only store to M[] */
+ if (bpf->op != (BPF_CLASS(bpf->op) | BPF_MODE_MEM | BPF_SIZE_W))
+ return false;
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+
+ /* eBPF */
+
+ if (BPF_MODE(bpf->op) == BPF_MODE_XADD) {
+ if (BPF_CLASS(bpf->op) != BPF_CLASS_STX)
+ return false;
+ if (BPF_SIZE(bpf->op) != BPF_SIZE_W && BPF_SIZE(bpf->op) != BPF_SIZE_DW)
+ return false;
+ /* xadd [dst + off], src */
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst);
+ MCOperand_CreateImm0(MI, bpf->offset);
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ return true;
+ }
+
+ if (BPF_MODE(bpf->op) != BPF_MODE_MEM)
+ return false;
+
+ /* st [dst + off], src */
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst);
+ MCOperand_CreateImm0(MI, bpf->offset);
+ if (BPF_CLASS(bpf->op) == BPF_CLASS_ST)
+ MCOperand_CreateImm0(MI, bpf->k);
+ else
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ return true;
+}
+
+static bool decodeALU(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ /* Set MI->Operands */
+
+ /* cBPF */
+ if (!EBPF_MODE(ud)) {
+ if (BPF_OP(bpf->op) > BPF_ALU_XOR)
+ return false;
+ /* cBPF's NEG has no operands */
+ if (BPF_OP(bpf->op) == BPF_ALU_NEG)
+ return true;
+ if (BPF_SRC(bpf->op) == BPF_SRC_K)
+ MCOperand_CreateImm0(MI, bpf->k);
+ else /* BPF_SRC_X */
+ MCOperand_CreateReg0(MI, BPF_REG_X);
+ return true;
+ }
+
+ /* eBPF */
+
+ if (BPF_OP(bpf->op) > BPF_ALU_END)
+ return false;
+ /* ALU64 class doesn't have ENDian */
+ /* ENDian's imm must be one of 16, 32, 64 */
+ if (BPF_OP(bpf->op) == BPF_ALU_END) {
+ if (BPF_CLASS(bpf->op) == BPF_CLASS_ALU64)
+ return false;
+ if (bpf->k != 16 && bpf->k != 32 && bpf->k != 64)
+ return false;
+ }
+
+ /* - op dst, imm
+ * - op dst, src
+ * - neg dst
+ * - le<imm> dst
+ */
+ /* every ALU instructions have dst op */
+ CHECK_WRITABLE_AND_PUSH(ud, MI, bpf->dst);
+
+ /* special cases */
+ if (BPF_OP(bpf->op) == BPF_ALU_NEG)
+ return true;
+ if (BPF_OP(bpf->op) == BPF_ALU_END) {
+ /* bpf->k must be one of 16, 32, 64 */
+ MCInst_setOpcode(MI, MCInst_getOpcode(MI) | ((uint32_t)bpf->k << 4));
+ return true;
+ }
+
+ /* normal cases */
+ if (BPF_SRC(bpf->op) == BPF_SRC_K) {
+ MCOperand_CreateImm0(MI, bpf->k);
+ }
+ else { /* BPF_SRC_X */
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ }
+ return true;
+}
+
+static bool decodeJump(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ /* cBPF and eBPF are very different in class jump */
+ if (!EBPF_MODE(ud)) {
+ if (BPF_OP(bpf->op) > BPF_JUMP_JSET)
+ return false;
+
+ /* ja is a special case of jumps */
+ if (BPF_OP(bpf->op) == BPF_JUMP_JA) {
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+
+ if (BPF_SRC(bpf->op) == BPF_SRC_K)
+ MCOperand_CreateImm0(MI, bpf->k);
+ else /* BPF_SRC_X */
+ MCOperand_CreateReg0(MI, BPF_REG_X);
+ MCOperand_CreateImm0(MI, bpf->jt);
+ MCOperand_CreateImm0(MI, bpf->jf);
+ }
+ else {
+ if (BPF_OP(bpf->op) > BPF_JUMP_JSLE)
+ return false;
+
+ /* No operands for exit */
+ if (BPF_OP(bpf->op) == BPF_JUMP_EXIT)
+ return bpf->op == (BPF_CLASS_JMP | BPF_JUMP_EXIT);
+ if (BPF_OP(bpf->op) == BPF_JUMP_CALL) {
+ if (bpf->op != (BPF_CLASS_JMP | BPF_JUMP_CALL))
+ return false;
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ }
+
+ /* ja is a special case of jumps */
+ if (BPF_OP(bpf->op) == BPF_JUMP_JA) {
+ if (BPF_SRC(bpf->op) != BPF_SRC_K)
+ return false;
+ MCOperand_CreateImm0(MI, bpf->offset);
+ return true;
+ }
+
+ /* <j> dst, src, +off */
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->dst);
+ if (BPF_SRC(bpf->op) == BPF_SRC_K)
+ MCOperand_CreateImm0(MI, bpf->k);
+ else
+ CHECK_READABLE_AND_PUSH(ud, MI, bpf->src);
+ MCOperand_CreateImm0(MI, bpf->offset);
+ }
+ return true;
+}
+
+static bool decodeReturn(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ /* Here only handles the BPF_RET class in cBPF */
+ switch (BPF_RVAL(bpf->op)) {
+ case BPF_SRC_K:
+ MCOperand_CreateImm0(MI, bpf->k);
+ return true;
+ case BPF_SRC_X:
+ MCOperand_CreateReg0(MI, BPF_REG_X);
+ return true;
+ case BPF_SRC_A:
+ MCOperand_CreateReg0(MI, BPF_REG_A);
+ return true;
+ }
+ return false;
+}
+
+static bool decodeMISC(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ uint16_t op = bpf->op ^ BPF_CLASS_MISC;
+ return op == BPF_MISCOP_TAX || op == BPF_MISCOP_TXA;
+}
+
+///< 1. Check if the instruction is valid
+///< 2. Set MI->opcode
+///< 3. Set MI->Operands
+static bool getInstruction(cs_struct *ud, MCInst *MI, bpf_internal *bpf)
+{
+ cs_detail *detail;
+
+ detail = MI->flat_insn->detail;
+ // initialize detail
+ if (detail) {
+ memset(detail, 0, offsetof(cs_detail, bpf) + sizeof(cs_bpf));
+ }
+
+ MCInst_clear(MI);
+ MCInst_setOpcode(MI, bpf->op);
+
+ switch (BPF_CLASS(bpf->op)) {
+ default: /* should never happen */
+ return false;
+ case BPF_CLASS_LD:
+ case BPF_CLASS_LDX:
+ return decodeLoad(ud, MI, bpf);
+ case BPF_CLASS_ST:
+ case BPF_CLASS_STX:
+ return decodeStore(ud, MI, bpf);
+ case BPF_CLASS_ALU:
+ return decodeALU(ud, MI, bpf);
+ case BPF_CLASS_JMP:
+ return decodeJump(ud, MI, bpf);
+ case BPF_CLASS_RET:
+ /* eBPF doesn't have this class */
+ if (EBPF_MODE(ud))
+ return false;
+ return decodeReturn(ud, MI, bpf);
+ case BPF_CLASS_MISC:
+ /* case BPF_CLASS_ALU64: */
+ if (EBPF_MODE(ud))
+ return decodeALU(ud, MI, bpf);
+ else
+ return decodeMISC(ud, MI, bpf);
+ }
+}
+
+bool BPF_getInstruction(csh ud, const uint8_t *code, size_t code_len,
+ MCInst *instr, uint16_t *size, uint64_t address, void *info)
+{
+ cs_struct *cs;
+ bpf_internal *bpf;
+
+ cs = (cs_struct*)ud;
+ if (EBPF_MODE(cs))
+ bpf = fetch_ebpf(cs, code, code_len);
+ else
+ bpf = fetch_cbpf(cs, code, code_len);
+ if (bpf == NULL)
+ return false;
+ if (!getInstruction(cs, instr, bpf)) {
+ cs_mem_free(bpf);
+ return false;
+ }
+
+ *size = bpf->insn_size;
+ cs_mem_free(bpf);
+
+ return true;
+}
+
+#endif
diff --git a/capstone/arch/BPF/BPFDisassembler.h b/capstone/arch/BPF/BPFDisassembler.h
new file mode 100644
index 000000000..9616b0816
--- /dev/null
+++ b/capstone/arch/BPF/BPFDisassembler.h
@@ -0,0 +1,27 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifndef CS_BPF_DISASSEMBLER_H
+#define CS_BPF_DISASSEMBLER_H
+
+#include "../../MCInst.h"
+
+typedef struct bpf_internal {
+ uint16_t op;
+ uint64_t k;
+ /* for cBPF */
+ uint8_t jt;
+ uint8_t jf;
+ /* for eBPF */
+ uint8_t dst;
+ uint8_t src;
+ uint16_t offset;
+
+ /* length of this bpf instruction */
+ uint8_t insn_size;
+} bpf_internal;
+
+bool BPF_getInstruction(csh ud, const uint8_t *code, size_t code_len,
+ MCInst *instr, uint16_t *size, uint64_t address, void *info);
+
+#endif
diff --git a/capstone/arch/BPF/BPFInstPrinter.c b/capstone/arch/BPF/BPFInstPrinter.c
new file mode 100644
index 000000000..782d8cbc0
--- /dev/null
+++ b/capstone/arch/BPF/BPFInstPrinter.c
@@ -0,0 +1,280 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#include <capstone/platform.h>
+
+#include "BPFConstants.h"
+#include "BPFInstPrinter.h"
+#include "BPFMapping.h"
+
+static cs_bpf_op *expand_bpf_operands(cs_bpf *bpf)
+{
+ /* assert(bpf->op_count < 3); */
+ return &bpf->operands[bpf->op_count++];
+}
+
+static void push_op_reg(cs_bpf *bpf, bpf_op_type val, uint8_t ac_mode)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_REG;
+ op->reg = val;
+ op->access = ac_mode;
+}
+
+static void push_op_imm(cs_bpf *bpf, uint64_t val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_IMM;
+ op->imm = val;
+}
+
+static void push_op_off(cs_bpf *bpf, uint32_t val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_OFF;
+ op->off = val;
+}
+
+static void push_op_mem(cs_bpf *bpf, bpf_reg reg, uint32_t val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_MEM;
+ op->mem.base = reg;
+ op->mem.disp = val;
+}
+
+static void push_op_mmem(cs_bpf *bpf, uint32_t val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_MMEM;
+ op->mmem = val;
+}
+
+static void push_op_msh(cs_bpf *bpf, uint32_t val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_MSH;
+ op->msh = val;
+}
+
+static void push_op_ext(cs_bpf *bpf, bpf_ext_type val)
+{
+ cs_bpf_op *op = expand_bpf_operands(bpf);
+
+ op->type = BPF_OP_EXT;
+ op->ext = val;
+}
+
+static void convert_operands(MCInst *MI, cs_bpf *bpf)
+{
+ unsigned opcode = MCInst_getOpcode(MI);
+ unsigned mc_op_count = MCInst_getNumOperands(MI);
+ MCOperand *op;
+ MCOperand *op2;
+ unsigned i;
+
+ bpf->op_count = 0;
+ if (BPF_CLASS(opcode) == BPF_CLASS_LD || BPF_CLASS(opcode) == BPF_CLASS_LDX) {
+ switch (BPF_MODE(opcode)) {
+ case BPF_MODE_IMM:
+ push_op_imm(bpf, MCOperand_getImm(MCInst_getOperand(MI, 0)));
+ break;
+ case BPF_MODE_ABS:
+ op = MCInst_getOperand(MI, 0);
+ push_op_mem(bpf, BPF_REG_INVALID, (uint32_t)MCOperand_getImm(op));
+ break;
+ case BPF_MODE_IND:
+ op = MCInst_getOperand(MI, 0);
+ op2 = MCInst_getOperand(MI, 1);
+ push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2));
+ break;
+ case BPF_MODE_MEM:
+ if (EBPF_MODE(MI->csh)) {
+ /* ldx{w,h,b,dw} dst, [src+off] */
+ push_op_reg(bpf, MCOperand_getReg(MCInst_getOperand(MI, 0)), CS_AC_WRITE);
+ op = MCInst_getOperand(MI, 1);
+ op2 = MCInst_getOperand(MI, 2);
+ push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2));
+ }
+ else {
+ push_op_mmem(bpf, (uint32_t)MCOperand_getImm(MCInst_getOperand(MI, 0)));
+ }
+ break;
+ case BPF_MODE_LEN:
+ push_op_ext(bpf, BPF_EXT_LEN);
+ break;
+ case BPF_MODE_MSH:
+ op = MCInst_getOperand(MI, 0);
+ push_op_msh(bpf, (uint32_t)MCOperand_getImm(op));
+ break;
+ /* case BPF_MODE_XADD: // not exists */
+ }
+ return;
+ }
+ if (BPF_CLASS(opcode) == BPF_CLASS_ST || BPF_CLASS(opcode) == BPF_CLASS_STX) {
+ if (!EBPF_MODE(MI->csh)) {
+ // cBPF has only one case - st* M[k]
+ push_op_mmem(bpf, (uint32_t)MCOperand_getImm(MCInst_getOperand(MI, 0)));
+ return;
+ }
+ /* eBPF has two cases:
+ * - st [dst + off], src
+ * - xadd [dst + off], src
+ * they have same form of operands.
+ */
+ op = MCInst_getOperand(MI, 0);
+ op2 = MCInst_getOperand(MI, 1);
+ push_op_mem(bpf, MCOperand_getReg(op), (uint32_t)MCOperand_getImm(op2));
+ op = MCInst_getOperand(MI, 2);
+ if (MCOperand_isImm(op))
+ push_op_imm(bpf, MCOperand_getImm(op));
+ else if (MCOperand_isReg(op))
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ);
+ return;
+ }
+
+ if (BPF_CLASS(opcode) == BPF_CLASS_JMP) {
+ for (i = 0; i < mc_op_count; i++) {
+ op = MCInst_getOperand(MI, i);
+ if (MCOperand_isImm(op)) {
+ /* decide the imm is BPF_OP_IMM or BPF_OP_OFF type here */
+ /*
+ * 1. ja +off
+ * 2. j {x,k}, +jt, +jf // cBPF
+ * 3. j dst_reg, {src_reg, k}, +off // eBPF
+ */
+ if (BPF_OP(opcode) == BPF_JUMP_JA ||
+ (!EBPF_MODE(MI->csh) && i >= 1) ||
+ (EBPF_MODE(MI->csh) && i == 2))
+ push_op_off(bpf, (uint32_t)MCOperand_getImm(op));
+ else
+ push_op_imm(bpf, MCOperand_getImm(op));
+ }
+ else if (MCOperand_isReg(op)) {
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ);
+ }
+ }
+ return;
+ }
+
+ if (!EBPF_MODE(MI->csh)) {
+ /* In cBPF mode, all registers in operands are accessed as read */
+ for (i = 0; i < mc_op_count; i++) {
+ op = MCInst_getOperand(MI, i);
+ if (MCOperand_isImm(op))
+ push_op_imm(bpf, MCOperand_getImm(op));
+ else if (MCOperand_isReg(op))
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ);
+ }
+ return;
+ }
+
+ /* remain cases are: eBPF mode && ALU */
+ /* if (BPF_CLASS(opcode) == BPF_CLASS_ALU || BPF_CLASS(opcode) == BPF_CLASS_ALU64) */
+
+ /* We have three types:
+ * 1. {l,b}e dst // dst = byteswap(dst)
+ * 2. neg dst // dst = -dst
+ * 3. <op> dst, {src_reg, imm} // dst = dst <op> src
+ * so we can simply check the number of operands,
+ * exactly one operand means we are in case 1. and 2.,
+ * otherwise in case 3.
+ */
+ if (mc_op_count == 1) {
+ op = MCInst_getOperand(MI, 0);
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ | CS_AC_WRITE);
+ }
+ else { // if (mc_op_count == 2)
+ op = MCInst_getOperand(MI, 0);
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ | CS_AC_WRITE);
+
+ op = MCInst_getOperand(MI, 1);
+ if (MCOperand_isImm(op))
+ push_op_imm(bpf, MCOperand_getImm(op));
+ else if (MCOperand_isReg(op))
+ push_op_reg(bpf, MCOperand_getReg(op), CS_AC_READ);
+ }
+}
+
+static void print_operand(MCInst *MI, struct SStream *O, const cs_bpf_op *op)
+{
+ switch (op->type) {
+ case BPF_OP_INVALID:
+ SStream_concat(O, "invalid");
+ break;
+ case BPF_OP_REG:
+ SStream_concat(O, BPF_reg_name((csh)MI->csh, op->reg));
+ break;
+ case BPF_OP_IMM:
+ SStream_concat(O, "0x%" PRIx64, op->imm);
+ break;
+ case BPF_OP_OFF:
+ SStream_concat(O, "+0x%x", op->off);
+ break;
+ case BPF_OP_MEM:
+ SStream_concat(O, "[");
+ if (op->mem.base != BPF_REG_INVALID)
+ SStream_concat(O, BPF_reg_name((csh)MI->csh, op->mem.base));
+ if (op->mem.disp != 0) {
+ if (op->mem.base != BPF_REG_INVALID)
+ SStream_concat(O, "+");
+ SStream_concat(O, "0x%x", op->mem.disp);
+ }
+ if (op->mem.base == BPF_REG_INVALID && op->mem.disp == 0) // special case
+ SStream_concat(O, "0x0");
+ SStream_concat(O, "]");
+ break;
+ case BPF_OP_MMEM:
+ SStream_concat(O, "m[0x%x]", op->mmem);
+ break;
+ case BPF_OP_MSH:
+ SStream_concat(O, "4*([0x%x]&0xf)", op->msh);
+ break;
+ case BPF_OP_EXT:
+ switch (op->ext) {
+ case BPF_EXT_LEN:
+ SStream_concat(O, "#len");
+ break;
+ }
+ break;
+ }
+}
+
+/*
+ * 1. human readable mnemonic
+ * 2. set pubOpcode (BPF_INSN_*)
+ * 3. set detail->bpf.operands
+ * */
+void BPF_printInst(MCInst *MI, struct SStream *O, void *PrinterInfo)
+{
+ int i;
+ cs_insn insn;
+ cs_bpf bpf;
+
+ insn.detail = NULL;
+ /* set pubOpcode as instruction id */
+ BPF_get_insn_id((cs_struct*)MI->csh, &insn, MCInst_getOpcode(MI));
+ MCInst_setOpcodePub(MI, insn.id);
+
+ SStream_concat(O, BPF_insn_name((csh)MI->csh, insn.id));
+ convert_operands(MI, &bpf);
+ for (i = 0; i < bpf.op_count; i++) {
+ if (i == 0)
+ SStream_concat(O, "\t");
+ else
+ SStream_concat(O, ", ");
+ print_operand(MI, O, &bpf.operands[i]);
+ }
+
+#ifndef CAPSTONE_DIET
+ if (MI->flat_insn->detail) {
+ MI->flat_insn->detail->bpf = bpf;
+ }
+#endif
+}
diff --git a/capstone/arch/BPF/BPFInstPrinter.h b/capstone/arch/BPF/BPFInstPrinter.h
new file mode 100644
index 000000000..685a8d656
--- /dev/null
+++ b/capstone/arch/BPF/BPFInstPrinter.h
@@ -0,0 +1,16 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifndef CS_BPFINSTPRINTER_H
+#define CS_BPFINSTPRINTER_H
+
+#include <capstone/capstone.h>
+
+#include "../../MCInst.h"
+#include "../../SStream.h"
+
+struct SStream;
+
+void BPF_printInst(MCInst *MI, struct SStream *O, void *Info);
+
+#endif
diff --git a/capstone/arch/BPF/BPFMapping.c b/capstone/arch/BPF/BPFMapping.c
new file mode 100644
index 000000000..e333a5f1d
--- /dev/null
+++ b/capstone/arch/BPF/BPFMapping.c
@@ -0,0 +1,506 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#include <string.h>
+
+#include "BPFConstants.h"
+#include "BPFMapping.h"
+#include "../../utils.h"
+
+#ifndef CAPSTONE_DIET
+static const name_map group_name_maps[] = {
+ { BPF_GRP_INVALID, NULL },
+
+ { BPF_GRP_LOAD, "load" },
+ { BPF_GRP_STORE, "store" },
+ { BPF_GRP_ALU, "alu" },
+ { BPF_GRP_JUMP, "jump" },
+ { BPF_GRP_CALL, "call" },
+ { BPF_GRP_RETURN, "return" },
+ { BPF_GRP_MISC, "misc" },
+};
+#endif
+
+const char *BPF_group_name(csh handle, unsigned int id)
+{
+#ifndef CAPSTONE_DIET
+ return id2name(group_name_maps, ARR_SIZE(group_name_maps), id);
+#else
+ return NULL;
+#endif
+}
+
+#ifndef CAPSTONE_DIET
+static const name_map insn_name_maps[BPF_INS_ENDING] = {
+ { BPF_INS_INVALID, NULL },
+
+ { BPF_INS_ADD, "add" },
+ { BPF_INS_SUB, "sub" },
+ { BPF_INS_MUL, "mul" },
+ { BPF_INS_DIV, "div" },
+ { BPF_INS_OR, "or" },
+ { BPF_INS_AND, "and" },
+ { BPF_INS_LSH, "lsh" },
+ { BPF_INS_RSH, "rsh" },
+ { BPF_INS_NEG, "neg" },
+ { BPF_INS_MOD, "mod" },
+ { BPF_INS_XOR, "xor" },
+ { BPF_INS_MOV, "mov" },
+ { BPF_INS_ARSH, "arsh" },
+
+ { BPF_INS_ADD64, "add64" },
+ { BPF_INS_SUB64, "sub64" },
+ { BPF_INS_MUL64, "mul64" },
+ { BPF_INS_DIV64, "div64" },
+ { BPF_INS_OR64, "or64" },
+ { BPF_INS_AND64, "and64" },
+ { BPF_INS_LSH64, "lsh64" },
+ { BPF_INS_RSH64, "rsh64" },
+ { BPF_INS_NEG64, "neg64" },
+ { BPF_INS_MOD64, "mod64" },
+ { BPF_INS_XOR64, "xor64" },
+ { BPF_INS_MOV64, "mov64" },
+ { BPF_INS_ARSH64, "arsh64" },
+
+ { BPF_INS_LE16, "le16" },
+ { BPF_INS_LE32, "le32" },
+ { BPF_INS_LE64, "le64" },
+ { BPF_INS_BE16, "be16" },
+ { BPF_INS_BE32, "be32" },
+ { BPF_INS_BE64, "be64" },
+
+ { BPF_INS_LDW, "ldw" },
+ { BPF_INS_LDH, "ldh" },
+ { BPF_INS_LDB, "ldb" },
+ { BPF_INS_LDDW, "lddw" },
+ { BPF_INS_LDXW, "ldxw" },
+ { BPF_INS_LDXH, "ldxh" },
+ { BPF_INS_LDXB, "ldxb" },
+ { BPF_INS_LDXDW, "ldxdw" },
+
+ { BPF_INS_STW, "stw" },
+ { BPF_INS_STH, "sth" },
+ { BPF_INS_STB, "stb" },
+ { BPF_INS_STDW, "stdw" },
+ { BPF_INS_STXW, "stxw" },
+ { BPF_INS_STXH, "stxh" },
+ { BPF_INS_STXB, "stxb" },
+ { BPF_INS_STXDW, "stxdw" },
+ { BPF_INS_XADDW, "xaddw" },
+ { BPF_INS_XADDDW, "xadddw" },
+
+ { BPF_INS_JMP, "jmp" },
+ { BPF_INS_JEQ, "jeq" },
+ { BPF_INS_JGT, "jgt" },
+ { BPF_INS_JGE, "jge" },
+ { BPF_INS_JSET, "jset" },
+ { BPF_INS_JNE, "jne" },
+ { BPF_INS_JSGT, "jsgt" },
+ { BPF_INS_JSGE, "jsge" },
+ { BPF_INS_CALL, "call" },
+ { BPF_INS_EXIT, "exit" },
+ { BPF_INS_JLT, "jlt" },
+ { BPF_INS_JLE, "jle" },
+ { BPF_INS_JSLT, "jslt" },
+ { BPF_INS_JSLE, "jsle" },
+
+ { BPF_INS_RET, "ret" },
+
+ { BPF_INS_TAX, "tax" },
+ { BPF_INS_TXA, "txa" },
+};
+#endif
+
+const char *BPF_insn_name(csh handle, unsigned int id)
+{
+#ifndef CAPSTONE_DIET
+ /* We have some special cases because 'ld' in cBPF is equivalent to 'ldw'
+ * in eBPF, and we don't want to see 'ldw' appears in cBPF mode.
+ */
+ if (!EBPF_MODE(handle)) {
+ switch (id) {
+ case BPF_INS_LD: return "ld";
+ case BPF_INS_LDX: return "ldx";
+ case BPF_INS_ST: return "st";
+ case BPF_INS_STX: return "stx";
+ }
+ }
+ return id2name(insn_name_maps, ARR_SIZE(insn_name_maps), id);
+#else
+ return NULL;
+#endif
+}
+
+const char *BPF_reg_name(csh handle, unsigned int reg)
+{
+#ifndef CAPSTONE_DIET
+ if (EBPF_MODE(handle)) {
+ if (reg < BPF_REG_R0 || reg > BPF_REG_R10)
+ return NULL;
+ static const char reg_names[11][4] = {
+ "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8", "r9",
+ "r10"
+ };
+ return reg_names[reg - BPF_REG_R0];
+ }
+
+ /* cBPF mode */
+ if (reg == BPF_REG_A)
+ return "a";
+ else if (reg == BPF_REG_X)
+ return "x";
+ else
+ return NULL;
+#else
+ return NULL;
+#endif
+}
+
+static bpf_insn op2insn_ld(unsigned opcode)
+{
+#define CASE(c) case BPF_SIZE_##c: \
+ if (BPF_CLASS(opcode) == BPF_CLASS_LD) \
+ return BPF_INS_LD##c; \
+ else \
+ return BPF_INS_LDX##c;
+
+ switch (BPF_SIZE(opcode)) {
+ CASE(W);
+ CASE(H);
+ CASE(B);
+ CASE(DW);
+ }
+#undef CASE
+
+ return BPF_INS_INVALID;
+}
+
+static bpf_insn op2insn_st(unsigned opcode)
+{
+ /*
+ * - BPF_STX | BPF_XADD | BPF_{W,DW}
+ * - BPF_ST* | BPF_MEM | BPF_{W,H,B,DW}
+ */
+
+ if (opcode == (BPF_CLASS_STX | BPF_MODE_XADD | BPF_SIZE_W))
+ return BPF_INS_XADDW;
+ if (opcode == (BPF_CLASS_STX | BPF_MODE_XADD | BPF_SIZE_DW))
+ return BPF_INS_XADDDW;
+
+ /* should be BPF_MEM */
+#define CASE(c) case BPF_SIZE_##c: \
+ if (BPF_CLASS(opcode) == BPF_CLASS_ST) \
+ return BPF_INS_ST##c; \
+ else \
+ return BPF_INS_STX##c;
+ switch (BPF_SIZE(opcode)) {
+ CASE(W);
+ CASE(H);
+ CASE(B);
+ CASE(DW);
+ }
+#undef CASE
+
+ return BPF_INS_INVALID;
+}
+
+static bpf_insn op2insn_alu(unsigned opcode)
+{
+ /* Endian is a special case */
+ if (BPF_OP(opcode) == BPF_ALU_END) {
+ switch (opcode ^ BPF_CLASS_ALU ^ BPF_ALU_END) {
+ case BPF_SRC_LITTLE | (16 << 4):
+ return BPF_INS_LE16;
+ case BPF_SRC_LITTLE | (32 << 4):
+ return BPF_INS_LE32;
+ case BPF_SRC_LITTLE | (64 << 4):
+ return BPF_INS_LE64;
+ case BPF_SRC_BIG | (16 << 4):
+ return BPF_INS_BE16;
+ case BPF_SRC_BIG | (32 << 4):
+ return BPF_INS_BE32;
+ case BPF_SRC_BIG | (64 << 4):
+ return BPF_INS_BE64;
+ }
+ return BPF_INS_INVALID;
+ }
+
+#define CASE(c) case BPF_ALU_##c: \
+ if (BPF_CLASS(opcode) == BPF_CLASS_ALU) \
+ return BPF_INS_##c; \
+ else \
+ return BPF_INS_##c##64;
+
+ switch (BPF_OP(opcode)) {
+ CASE(ADD);
+ CASE(SUB);
+ CASE(MUL);
+ CASE(DIV);
+ CASE(OR);
+ CASE(AND);
+ CASE(LSH);
+ CASE(RSH);
+ CASE(NEG);
+ CASE(MOD);
+ CASE(XOR);
+ CASE(MOV);
+ CASE(ARSH);
+ }
+#undef CASE
+
+ return BPF_INS_INVALID;
+}
+
+static bpf_insn op2insn_jmp(unsigned opcode)
+{
+#define CASE(c) case BPF_JUMP_##c: return BPF_INS_##c
+ switch (BPF_OP(opcode)) {
+ case BPF_JUMP_JA:
+ return BPF_INS_JMP;
+ CASE(JEQ);
+ CASE(JGT);
+ CASE(JGE);
+ CASE(JSET);
+ CASE(JNE);
+ CASE(JSGT);
+ CASE(JSGE);
+ CASE(CALL);
+ CASE(EXIT);
+ CASE(JLT);
+ CASE(JLE);
+ CASE(JSLT);
+ CASE(JSLE);
+ }
+#undef CASE
+
+ return BPF_INS_INVALID;
+}
+
+static void update_regs_access(cs_struct *ud, cs_detail *detail,
+ bpf_insn insn_id, unsigned int opcode)
+{
+ if (insn_id == BPF_INS_INVALID)
+ return;
+#define PUSH_READ(r) do { \
+ detail->regs_read[detail->regs_read_count] = r; \
+ detail->regs_read_count++; \
+ } while (0)
+#define PUSH_WRITE(r) do { \
+ detail->regs_write[detail->regs_write_count] = r; \
+ detail->regs_write_count++; \
+ } while (0)
+ /*
+ * In eBPF mode, only these instructions have implicit registers access:
+ * - ld{w,h,b,dw} * // w: r0
+ * - exit // r: r0
+ */
+ if (EBPF_MODE(ud)) {
+ switch (insn_id) {
+ default:
+ break;
+ case BPF_INS_LDW:
+ case BPF_INS_LDH:
+ case BPF_INS_LDB:
+ case BPF_INS_LDDW:
+ PUSH_WRITE(BPF_REG_R0);
+ break;
+ case BPF_INS_EXIT:
+ PUSH_READ(BPF_REG_R0);
+ break;
+ }
+ return;
+ }
+
+ /* cBPF mode */
+ switch (BPF_CLASS(opcode)) {
+ default:
+ break;
+ case BPF_CLASS_LD:
+ PUSH_WRITE(BPF_REG_A);
+ break;
+ case BPF_CLASS_LDX:
+ PUSH_WRITE(BPF_REG_X);
+ break;
+ case BPF_CLASS_ST:
+ PUSH_READ(BPF_REG_A);
+ break;
+ case BPF_CLASS_STX:
+ PUSH_READ(BPF_REG_X);
+ break;
+ case BPF_CLASS_ALU:
+ PUSH_READ(BPF_REG_A);
+ PUSH_WRITE(BPF_REG_A);
+ break;
+ case BPF_CLASS_JMP:
+ if (insn_id != BPF_INS_JMP) // except the unconditional jump
+ PUSH_READ(BPF_REG_A);
+ break;
+ /* case BPF_CLASS_RET: */
+ case BPF_CLASS_MISC:
+ if (insn_id == BPF_INS_TAX) {
+ PUSH_READ(BPF_REG_A);
+ PUSH_WRITE(BPF_REG_X);
+ }
+ else {
+ PUSH_READ(BPF_REG_X);
+ PUSH_WRITE(BPF_REG_A);
+ }
+ break;
+ }
+}
+
+/*
+ * 1. Convert opcode(id) to BPF_INS_*
+ * 2. Set regs_read/regs_write/groups
+ */
+void BPF_get_insn_id(cs_struct *ud, cs_insn *insn, unsigned int opcode)
+{
+ // No need to care the mode (cBPF or eBPF) since all checks has be done in
+ // BPF_getInstruction, we can simply map opcode to BPF_INS_*.
+ cs_detail *detail;
+ bpf_insn id = BPF_INS_INVALID;
+ bpf_insn_group grp;
+
+ detail = insn->detail;
+#ifndef CAPSTONE_DIET
+ #define PUSH_GROUP(grp) do { \
+ if (detail) { \
+ detail->groups[detail->groups_count] = grp; \
+ detail->groups_count++; \
+ } \
+ } while(0)
+#else
+ #define PUSH_GROUP
+#endif
+
+ switch (BPF_CLASS(opcode)) {
+ default: // will never happen
+ break;
+ case BPF_CLASS_LD:
+ case BPF_CLASS_LDX:
+ id = op2insn_ld(opcode);
+ PUSH_GROUP(BPF_GRP_LOAD);
+ break;
+ case BPF_CLASS_ST:
+ case BPF_CLASS_STX:
+ id = op2insn_st(opcode);
+ PUSH_GROUP(BPF_GRP_STORE);
+ break;
+ case BPF_CLASS_ALU:
+ id = op2insn_alu(opcode);
+ PUSH_GROUP(BPF_GRP_ALU);
+ break;
+ case BPF_CLASS_JMP:
+ grp = BPF_GRP_JUMP;
+ id = op2insn_jmp(opcode);
+ if (id == BPF_INS_CALL)
+ grp = BPF_GRP_CALL;
+ else if (id == BPF_INS_EXIT)
+ grp = BPF_GRP_RETURN;
+ PUSH_GROUP(grp);
+ break;
+ case BPF_CLASS_RET:
+ id = BPF_INS_RET;
+ PUSH_GROUP(BPF_GRP_RETURN);
+ break;
+ // BPF_CLASS_MISC and BPF_CLASS_ALU64 have exactly same value
+ case BPF_CLASS_MISC:
+ /* case BPF_CLASS_ALU64: */
+ if (EBPF_MODE(ud)) {
+ // ALU64 in eBPF
+ id = op2insn_alu(opcode);
+ PUSH_GROUP(BPF_GRP_ALU);
+ }
+ else {
+ if (BPF_MISCOP(opcode) == BPF_MISCOP_TXA)
+ id = BPF_INS_TXA;
+ else
+ id = BPF_INS_TAX;
+ PUSH_GROUP(BPF_GRP_MISC);
+ }
+ break;
+ }
+
+ insn->id = id;
+#undef PUSH_GROUP
+
+#ifndef CAPSTONE_DIET
+ if (detail) {
+ update_regs_access(ud, detail, id, opcode);
+ }
+#endif
+}
+
+static void sort_and_uniq(cs_regs arr, uint8_t n, uint8_t *new_n)
+{
+ /* arr is always a tiny (usually n < 3) array,
+ * a simple O(n^2) sort is efficient enough. */
+ int i;
+ int j;
+ int iMin;
+ int tmp;
+
+ /* a modified selection sort for sorting and making unique */
+ for (j = 0; j < n; j++) {
+ /* arr[iMin] will be min(arr[j .. n-1]) */
+ iMin = j;
+ for (i = j + 1; i < n; i++) {
+ if (arr[i] < arr[iMin])
+ iMin = i;
+ }
+ if (j != 0 && arr[iMin] == arr[j - 1]) { // duplicate ele found
+ arr[iMin] = arr[n - 1];
+ --n;
+ }
+ else {
+ tmp = arr[iMin];
+ arr[iMin] = arr[j];
+ arr[j] = tmp;
+ }
+ }
+
+ *new_n = n;
+}
+void BPF_reg_access(const cs_insn *insn,
+ cs_regs regs_read, uint8_t *regs_read_count,
+ cs_regs regs_write, uint8_t *regs_write_count)
+{
+ unsigned i;
+ uint8_t read_count, write_count;
+ const cs_bpf *bpf = &(insn->detail->bpf);
+
+ read_count = insn->detail->regs_read_count;
+ write_count = insn->detail->regs_write_count;
+
+ // implicit registers
+ memcpy(regs_read, insn->detail->regs_read, read_count * sizeof(insn->detail->regs_read[0]));
+ memcpy(regs_write, insn->detail->regs_write, write_count * sizeof(insn->detail->regs_write[0]));
+
+ for (i = 0; i < bpf->op_count; i++) {
+ const cs_bpf_op *op = &(bpf->operands[i]);
+ switch (op->type) {
+ default:
+ break;
+ case BPF_OP_REG:
+ if (op->access & CS_AC_READ) {
+ regs_read[read_count] = op->reg;
+ read_count++;
+ }
+ if (op->access & CS_AC_WRITE) {
+ regs_write[write_count] = op->reg;
+ write_count++;
+ }
+ break;
+ case BPF_OP_MEM:
+ if (op->mem.base != BPF_REG_INVALID) {
+ regs_read[read_count] = op->mem.base;
+ read_count++;
+ }
+ break;
+ }
+ }
+
+ sort_and_uniq(regs_read, read_count, regs_read_count);
+ sort_and_uniq(regs_write, write_count, regs_write_count);
+}
diff --git a/capstone/arch/BPF/BPFMapping.h b/capstone/arch/BPF/BPFMapping.h
new file mode 100644
index 000000000..1401ee865
--- /dev/null
+++ b/capstone/arch/BPF/BPFMapping.h
@@ -0,0 +1,21 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifndef CS_BPFMAPPING_H
+#define CS_BPFMAPPING_H
+
+#include <capstone/capstone.h>
+
+#include "../../cs_priv.h"
+
+#define EBPF_MODE(ud) (((cs_struct*)ud)->mode & CS_MODE_BPF_EXTENDED)
+
+const char *BPF_group_name(csh handle, unsigned int id);
+const char *BPF_insn_name(csh handle, unsigned int id);
+const char *BPF_reg_name(csh handle, unsigned int reg);
+void BPF_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id);
+void BPF_reg_access(const cs_insn *insn,
+ cs_regs regs_read, uint8_t *regs_read_count,
+ cs_regs regs_write, uint8_t *regs_write_count);
+
+#endif
diff --git a/capstone/arch/BPF/BPFModule.c b/capstone/arch/BPF/BPFModule.c
new file mode 100644
index 000000000..d744b827a
--- /dev/null
+++ b/capstone/arch/BPF/BPFModule.c
@@ -0,0 +1,34 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifdef CAPSTONE_HAS_BPF
+
+#include "BPFDisassembler.h"
+#include "BPFInstPrinter.h"
+#include "BPFMapping.h"
+#include "BPFModule.h"
+
+cs_err BPF_global_init(cs_struct *ud)
+{
+ ud->printer = BPF_printInst;
+ ud->reg_name = BPF_reg_name;
+ ud->insn_id = BPF_get_insn_id;
+ ud->insn_name = BPF_insn_name;
+ ud->group_name = BPF_group_name;
+#ifndef CAPSTONE_DIET
+ ud->reg_access = BPF_reg_access;
+#endif
+ ud->disasm = BPF_getInstruction;
+
+ return CS_ERR_OK;
+}
+
+cs_err BPF_option(cs_struct *handle, cs_opt_type type, size_t value)
+{
+ if (type == CS_OPT_MODE)
+ handle->mode = (cs_mode)value;
+
+ return CS_ERR_OK;
+}
+
+#endif
diff --git a/capstone/arch/BPF/BPFModule.h b/capstone/arch/BPF/BPFModule.h
new file mode 100644
index 000000000..0ff003382
--- /dev/null
+++ b/capstone/arch/BPF/BPFModule.h
@@ -0,0 +1,12 @@
+/* Capstone Disassembly Engine */
+/* BPF Backend by david942j <david942j@gmail.com>, 2019 */
+
+#ifndef CS_BPF_MODULE_H
+#define CS_BPF_MODULE_H
+
+#include "../../utils.h"
+
+cs_err BPF_global_init(cs_struct *ud);
+cs_err BPF_option(cs_struct *handle, cs_opt_type type, size_t value);
+
+#endif