summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaciej Żenczykowski <maze@google.com>2024-04-24 15:24:34 -0700
committerMaciej Żenczykowski <maze@google.com>2024-04-24 18:07:31 -0700
commitbb3f1b0a9a2c6d5e48e2076f8ed827576b935d10 (patch)
tree234ad927c965eab08232cb7d4024f82344cbcd5a
parent65079fbfd8d952b583da02895b64c0afd6dd36e4 (diff)
downloadapf-bb3f1b0a9a2c6d5e48e2076f8ed827576b935d10.tar.gz
v5: optimize interpreter immediate decoding
Tested various approaches for converting len_field 0/1/2/3 into imm_len 0/1/2/4: (len_field * 5 + 1) / 4 (len_field * 5 + 1) >> 2 ((len_field + 1) >> 2) + len_field len_field + (len_field == 3) 0x4210 >> (4 * len_field) & 0xF 04210 >> (3 * len_field) & 7 len_field ? 1 << (len_field - 1) : 0 and if (len_field) { const u32 imm_len = 1 << (len_field - 1); ... } plus u32 array [0, 1, 2, 4] lookup Before: text data bss dec hex filename 4148 0 0 4148 1034 apf_interpreter.arm.o text data bss dec hex filename 5184 0 0 5184 1440 apf_interpreter.x86.o After: text data bss dec hex filename 4100 0 0 4100 1004 apf_interpreter.arm.o text data bss dec hex filename 5154 0 0 5154 1422 apf_interpreter.x86.o Test: TreeHugger, manually Signed-off-by: Maciej Żenczykowski <maze@google.com> Change-Id: I96fc324bb96b60d702ff6f6c45f52741997726b9
-rw-r--r--v5/apf_interpreter.c51
-rw-r--r--v5/apf_interpreter_source.c51
2 files changed, 42 insertions, 60 deletions
diff --git a/v5/apf_interpreter.c b/v5/apf_interpreter.c
index 401e1cd..7347886 100644
--- a/v5/apf_interpreter.c
+++ b/v5/apf_interpreter.c
@@ -627,7 +627,7 @@ static int do_discard_buffer(apf_context* ctx) {
return apf_internal_do_transmit_buffer(ctx, 0 /* pkt_len */, 0 /* dscp */);
}
-/* Decode the imm length, does not do range checking. */
+/* Decode an immediate, lengths [0..4] all work, does not do range checking. */
/* But note that program is at least 20 bytes shorter than ram, so first few */
/* immediates can always be safely decoded without exceeding ram buffer. */
static u32 decode_imm(apf_context* ctx, u32 length) {
@@ -705,13 +705,13 @@ static int do_apf_run(apf_context* ctx) {
#define OTHER_REG (ctx->R[reg_num ^ 1])
/* All instructions have immediate fields, so load them now. */
const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+ const u32 imm_len = ((len_field + 1u) >> 2) + len_field; /* 0,1,2,3 -> 0,1,2,4 */
u32 pktcopy_src_offset = 0; /* used for various pktdatacopy opcodes */
u32 imm = 0;
s32 signed_imm = 0;
u32 arith_imm;
s32 arith_signed_imm;
if (len_field != 0) {
- const u32 imm_len = 1 << (len_field - 1);
imm = decode_imm(ctx, imm_len); /* 1st imm, at worst bytes 1-4 past opcode/program_len */
/* Sign extend imm into signed_imm. */
signed_imm = (s32)(imm << ((4 - imm_len) * 8));
@@ -786,14 +786,11 @@ static int do_apf_run(apf_context* ctx) {
case JLT_OPCODE:
case JSET_OPCODE: {
u32 cmp_imm = 0;
- /* with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op */
- if (len_field == 0) break;
/* Load second immediate field. */
if (reg_num == 1) {
cmp_imm = ctx->R[1];
} else {
- u32 cmp_imm_len = 1 << (len_field - 1);
- cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+ cmp_imm = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
}
switch (opcode) {
case JEQ_OPCODE: if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
@@ -805,28 +802,24 @@ static int do_apf_run(apf_context* ctx) {
break;
}
case JBSMATCH_OPCODE: {
- /* with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op */
- if (len_field) {
- /* Load second immediate field. */
- u32 cmp_imm_len = 1 << (len_field - 1);
- u32 cmp_imm = decode_imm(ctx, cmp_imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
- const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
- Boolean do_jump = !reg_num;
- /* cmp_imm is size in bytes of data to compare. */
- /* pc is offset of program bytes to compare. */
- /* imm is jump target offset. */
- /* R0 is offset of packet bytes to compare. */
- if (cmp_imm > 0xFFFF) return EXCEPTION;
- /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
- if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return EXCEPTION;
- ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
- ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
- ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
- do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
- /* skip past comparison bytes */
- ctx->pc += cmp_imm;
- if (do_jump) ctx->pc += imm;
- }
+ /* Load second immediate field. */
+ u32 cmp_imm = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 bytes past prog_len */
+ const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+ Boolean do_jump = !reg_num;
+ /* cmp_imm is size in bytes of data to compare. */
+ /* pc is offset of program bytes to compare. */
+ /* imm is jump target offset. */
+ /* R0 is offset of packet bytes to compare. */
+ if (cmp_imm > 0xFFFF) return EXCEPTION;
+ /* pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap */
+ if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return EXCEPTION;
+ ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+ ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+ ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+ do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+ /* skip past comparison bytes */
+ ctx->pc += cmp_imm;
+ if (do_jump) ctx->pc += imm;
break;
}
/* There is a difference in APFv4 and APFv6 arithmetic behaviour! */
@@ -950,7 +943,6 @@ static int do_apf_run(apf_context* ctx) {
case JDNSAMATCH_EXT_OPCODE: /* 44 */
case JDNSQMATCHSAFE_EXT_OPCODE: /* 45 */
case JDNSAMATCHSAFE_EXT_OPCODE: { /* 46 */
- const u32 imm_len = 1 << (len_field - 1); /* EXT_OPCODE, thus len_field > 0 */
u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
int qtype = -1;
if (imm & 1) { /* JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes */
@@ -992,7 +984,6 @@ static int do_apf_run(apf_context* ctx) {
break;
}
case JONEOF_EXT_OPCODE: {
- const u32 imm_len = 1 << (len_field - 1); /* ext opcode len_field guaranteed > 0 */
u32 jump_offs = decode_imm(ctx, imm_len); /* 2nd imm, at worst 8 B past prog_len */
u8 imm3 = DECODE_U8(); /* 3rd imm, at worst 9 bytes past prog_len */
Boolean jmp = imm3 & 1; /* =0 jmp on match, =1 jmp on no match */
diff --git a/v5/apf_interpreter_source.c b/v5/apf_interpreter_source.c
index 04fa51f..910c308 100644
--- a/v5/apf_interpreter_source.c
+++ b/v5/apf_interpreter_source.c
@@ -92,7 +92,7 @@ static int do_discard_buffer(apf_context* ctx) {
return do_transmit_buffer(ctx, 0 /* pkt_len */, 0 /* dscp */);
}
-// Decode the imm length, does not do range checking.
+// Decode an immediate, lengths [0..4] all work, does not do range checking.
// But note that program is at least 20 bytes shorter than ram, so first few
// immediates can always be safely decoded without exceeding ram buffer.
static u32 decode_imm(apf_context* ctx, u32 length) {
@@ -170,13 +170,13 @@ static int do_apf_run(apf_context* ctx) {
#define OTHER_REG (ctx->R[reg_num ^ 1])
// All instructions have immediate fields, so load them now.
const u32 len_field = EXTRACT_IMM_LENGTH(bytecode);
+ const u32 imm_len = ((len_field + 1u) >> 2) + len_field; // 0,1,2,3 -> 0,1,2,4
u32 pktcopy_src_offset = 0; // used for various pktdatacopy opcodes
u32 imm = 0;
s32 signed_imm = 0;
u32 arith_imm;
s32 arith_signed_imm;
if (len_field != 0) {
- const u32 imm_len = 1 << (len_field - 1);
imm = decode_imm(ctx, imm_len); // 1st imm, at worst bytes 1-4 past opcode/program_len
// Sign extend imm into signed_imm.
signed_imm = (s32)(imm << ((4 - imm_len) * 8));
@@ -251,14 +251,11 @@ static int do_apf_run(apf_context* ctx) {
case JLT_OPCODE:
case JSET_OPCODE: {
u32 cmp_imm = 0;
- // with len_field == 0, we have imm == 0 and thus a jmp +0, ie. a no-op
- if (len_field == 0) break;
// Load second immediate field.
if (reg_num == 1) {
cmp_imm = ctx->R[1];
} else {
- u32 cmp_imm_len = 1 << (len_field - 1);
- cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
+ cmp_imm = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 bytes past prog_len
}
switch (opcode) {
case JEQ_OPCODE: if (ctx->R[0] == cmp_imm) ctx->pc += imm; break;
@@ -270,28 +267,24 @@ static int do_apf_run(apf_context* ctx) {
break;
}
case JBSMATCH_OPCODE: {
- // with len_field == 0, we have imm == cmp_imm == 0 and thus a jmp +0, ie. a no-op
- if (len_field) {
- // Load second immediate field.
- u32 cmp_imm_len = 1 << (len_field - 1);
- u32 cmp_imm = decode_imm(ctx, cmp_imm_len); // 2nd imm, at worst 8 bytes past prog_len
- const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
- bool do_jump = !reg_num;
- // cmp_imm is size in bytes of data to compare.
- // pc is offset of program bytes to compare.
- // imm is jump target offset.
- // R0 is offset of packet bytes to compare.
- if (cmp_imm > 0xFFFF) return EXCEPTION;
- // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
- if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return EXCEPTION;
- ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
- ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
- ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
- do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
- // skip past comparison bytes
- ctx->pc += cmp_imm;
- if (do_jump) ctx->pc += imm;
- }
+ // Load second immediate field.
+ u32 cmp_imm = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 bytes past prog_len
+ const u32 last_packet_offs = ctx->R[0] + cmp_imm - 1;
+ bool do_jump = !reg_num;
+ // cmp_imm is size in bytes of data to compare.
+ // pc is offset of program bytes to compare.
+ // imm is jump target offset.
+ // R0 is offset of packet bytes to compare.
+ if (cmp_imm > 0xFFFF) return EXCEPTION;
+ // pc < program_len < ram_len < 2GiB, thus pc + cmp_imm cannot wrap
+ if (!IN_RAM_BOUNDS(ctx->pc + cmp_imm - 1)) return EXCEPTION;
+ ASSERT_IN_PACKET_BOUNDS(ctx->R[0]);
+ ASSERT_RETURN(last_packet_offs >= ctx->R[0]);
+ ASSERT_IN_PACKET_BOUNDS(last_packet_offs);
+ do_jump ^= !memcmp(ctx->program + ctx->pc, ctx->packet + ctx->R[0], cmp_imm);
+ // skip past comparison bytes
+ ctx->pc += cmp_imm;
+ if (do_jump) ctx->pc += imm;
break;
}
// There is a difference in APFv4 and APFv6 arithmetic behaviour!
@@ -415,7 +408,6 @@ static int do_apf_run(apf_context* ctx) {
case JDNSAMATCH_EXT_OPCODE: // 44
case JDNSQMATCHSAFE_EXT_OPCODE: // 45
case JDNSAMATCHSAFE_EXT_OPCODE: { // 46
- const u32 imm_len = 1 << (len_field - 1); // EXT_OPCODE, thus len_field > 0
u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
int qtype = -1;
if (imm & 1) { // JDNSQMATCH & JDNSQMATCHSAFE are *odd* extended opcodes
@@ -457,7 +449,6 @@ static int do_apf_run(apf_context* ctx) {
break;
}
case JONEOF_EXT_OPCODE: {
- const u32 imm_len = 1 << (len_field - 1); // ext opcode len_field guaranteed > 0
u32 jump_offs = decode_imm(ctx, imm_len); // 2nd imm, at worst 8 B past prog_len
u8 imm3 = DECODE_U8(); // 3rd imm, at worst 9 bytes past prog_len
bool jmp = imm3 & 1; // =0 jmp on match, =1 jmp on no match