aboutsummaryrefslogtreecommitdiff
path: root/arm_utils.h
diff options
context:
space:
mode:
authorSamuel Huang <huangs@chromium.org>2019-01-18 19:36:50 +0000
committerCopybara-Service <copybara-worker@google.com>2021-07-25 20:45:25 -0700
commit4d20d0012bf46cdf678355af1a7be0eb1f05c80f (patch)
treeece4194582afbfe38fd9545b6cb04968976a18f3 /arm_utils.h
parent3321a9e2309f02753ef88c1c96e999ac7a6fccfe (diff)
downloadzucchini-4d20d0012bf46cdf678355af1a7be0eb1f05c80f.tar.gz
[Zucchini] ARM code: Add alignment checks for Read*() / Write*(); add tests.
Read*() / Write*() functions for ARM code take |instr_rva|, and translate |code| <-> |target_rva|. Both |instr_rva| and |target_rva| must be properly aligned (2-bytes or 4-bytes), but previously such checks were only done on |target_rva - PC(instr_rva)|. This CL adds: * Alignment checks for input params of Read*() / Write*(). * Unit tests for Read*() / Write*(). Also update high-level comments in arm_utils.h. Bug: 918867 Change-Id: Ie37d2df621411920b54ef699edaed1d90eacba88 Reviewed-on: https://chromium-review.googlesource.com/c/1415753 Reviewed-by: Samuel Huang <huangs@chromium.org> Reviewed-by: Etienne Pierre-Doray <etiennep@chromium.org> Commit-Queue: Samuel Huang <huangs@chromium.org> Cr-Commit-Position: refs/heads/master@{#624236} NOKEYCHECK=True GitOrigin-RevId: 88de28e56330d97fe046755fee972495d5e5749d
Diffstat (limited to 'arm_utils.h')
-rw-r--r--arm_utils.h45
1 files changed, 29 insertions, 16 deletions
diff --git a/arm_utils.h b/arm_utils.h
index 7d95400..eb9b343 100644
--- a/arm_utils.h
+++ b/arm_utils.h
@@ -21,7 +21,7 @@ namespace zucchini {
// * AArch64 (64-bit ARM):
// https://static.docs.arm.com/ddi0487/da/DDI0487D_a_armv8_arm.pdf
-// Definitions (non-official, but used in this file):
+// Definitions (used in Zucchini):
// * |instr_rva|: Instruction RVA: The RVA where an instruction is located. In
// ARM mode and for AArch64 this is 4-byte aligned; in THUMB2 mode this is
// 2-byte aligned.
@@ -51,7 +51,7 @@ namespace zucchini {
// Example 2 (THUMB2 mode):
// 001030A2: 00 F0 01 FA BL 001034A8
// |instr_rva| = 0x001030A2 (2-byte aligned).
-// |code| = 0xF000FA01 (special THUMB2 mode data fetch method).
+// |code| = 0xF000FA01 (special THUMB2 mode data fetch).
// |disp| = 0x00000402 (decoded from |code| with T24 -> BL encoding T1).
// PC = |instr_rva| + 4 = 0x001030A6 (THUMB2 mode).
// |target_rva| = PC + |disp| = 0x001034A8.
@@ -64,20 +64,26 @@ namespace zucchini {
// PC = |instr_rva| = 0x00305070 (AArch64).
// |target_rva| = PC + |disp| = 0x0034587C.
-// BLX complication: BLX encoding T2 transits mode from THUMB2 to ARM. Therefore
-// |target_rva| must be 4-byte aligned; it's not just PC + |disp|. In THUMB2
-// encoding, |disp| is required to be multiple of 4 (so H = 0, where H is the
-// bit corresponding to 2). |target_rva| becomes PC + |disp| rounded down to the
-// nearest 4-byte aligned address. We have two alternatives to handle this
-// complexity (this affects |disp|'s definition):
-// (1) Handle in |code| <-> |disp|: Let |disp| be |target_rva| - PC. For
-// BLX encoding T2, we'd examine |instr_rva| % 4 and adjust accordingly.
-// (2) Handle in |disp| <-> |target_rva|: Let |disp| be the value stored in
-// |code|. Computation involving |target_rva| would require |instr_rva| and
-// prior |code| extraction, from which we deduce expected target alignment
-// (4 for ARM mode; 2 for THUMB2 mode except for 4 for BLX encoding T2) and
-// adjust accordingly.
-// We adopt (2) since |code| <-> |disp| is useful, and should be made simple.
+// BLX complication: BLX transits between ARM mode and THUMB2 mode, and branches
+// to an address. Therefore |instr_rva| must align by the "old" mode, and
+// |target_rva| must align by the "new" mode. In particular:
+// * BLX encoding A2 (ARM -> THUMB2): |instr_rva| is 4-byte aligned with
+// PC = |instr_rva| + 8; |target_rva| is 2-byte aligned, and so |disp| is
+// 2-byte aligned.
+// * BLX encoding T2 (THUMB2 -> ARM): |instr_rva| is 2-byte aligned with
+// PC = |instr_rva| + 4; |target_rva| is 4-byte aligned. Complication: BLX
+// encoding T2 stores a bit |H| that corresponds to "2" in binary, but |H|
+// must be set to 0. Thus the encoded value is effectively 4-byte aligned. So
+// when computing |target_rva| by adding PC (2-byte aligned) to the stored
+// value (4-byte aligned), the result must be rounded down to the nearest
+// 4-byte aligned address.
+// The last situation creates ambiguity in how |disp| is defined! Alternatives:
+// (1) |disp| := |target_rva| - PC: So |code| <-> |disp| for BLX encoding T2,
+// requires |instr_rva| % 4 to be determined, and adjustments made.
+// (2) |disp| := Value stored in |code|: So |disp| <-> |target_rva| for BLX
+// encoding T2 requires adjustment: |disp| -> |target_rva| needs to round
+// down, whereas |target_rva| -> |disp| needs to round up.
+// We adopt (2) to simplify |code| <-> |disp|, since that gets used.
using arm_disp_t = int32_t;
@@ -129,6 +135,13 @@ class Arm32Rel32Translator {
// Rel32 address types enumeration.
enum AddrType : uint8_t {
ADDR_NONE = 0xFF,
+ // Naming: Here "A24" represents ARM mode instructions where |code|
+ // dedicates 24 bits (including sign bit) to specify |disp|. Similarly, "T8"
+ // represents THUMB2 mode instructions with 8 bits for |disp|. Currently
+ // only {A24, T8, T11, T20, T24} are defined. These are not to be confused
+ // with "B encoding A1", "B encoding T3", etc., which are specific encoding
+ // schemes given by the manual for the "B" (or other) instructions (only
+ // {A1, A2, T1, T2, T3, T4} are seen).
ADDR_A24 = 0,
ADDR_T8,
ADDR_T11,