aboutsummaryrefslogtreecommitdiff
path: root/src/findmask.c
diff options
context:
space:
mode:
authorNick Sanders <nsanders@google.com>2015-09-15 12:41:37 -0700
committerNick Sanders <nsanders@google.com>2015-09-15 12:41:37 -0700
commit241f33a3e958842e3db803c03300764bd2ee9c19 (patch)
treed38035fadbfff8dbe02121f11658dd3fe1540df8 /src/findmask.c
parent279816931fafe7dbffb0169185e9ac360144aad7 (diff)
downloadstressapptest-241f33a3e958842e3db803c03300764bd2ee9c19.tar.gz
Update to stressapptest 1.0.7 from upstream
https://github.com/stressapptest/stressapptest Change-Id: I6307bcfad2e67392b4e0308680c708546e9a15a3 Signed-off-by: Nick Sanders <nsanders@google.com>
Diffstat (limited to 'src/findmask.c')
-rw-r--r--src/findmask.c140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/findmask.c b/src/findmask.c
new file mode 100644
index 0000000..1b10988
--- /dev/null
+++ b/src/findmask.c
@@ -0,0 +1,140 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * This "tool" can be used to brute force the XOR bitmask that a memory
+ * controller uses to interleave addresses onto its two channels. To use it,
+ * you need to have a bunch of addresses that are known to go to only one
+ * of the memory channels... easiest way to get these is to run stressapptest on
+ * a machine while holding a soldering iron close to the chips of one channel.
+ * Generate about a thousand failures and extract their physical addresses
+ * from the output. Write them to findmask.inc in a way that forms a valid
+ * definition for the addrs array. Make and run on a big machine.
+ *
+ * The program iterates over all possible bitmasks within the first NUM_BITS,
+ * parallelizing execution over NUM_THREADS. Every integer is masked
+ * onto all supplied addresses, counting the amount of times this results in
+ * an odd or even amount of bits. If all but NOISE addresses fall on one side,
+ * it will print that mask to stdout. Note that the script will always "find"
+ * the mask 0x0, and may also report masks such as 0x100000000 depending on
+ * your test machines memory size... you will need to use your own judgement to
+ * interpret the results.
+ *
+ * As the program might run for a long time, you can send SIGUSR1 to it to
+ * output the last mask that was processed and get a rough idea of the
+ * current progress.
+ */
+
+#include <inttypes.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NOISE 20
+#define NUM_BITS 32
+#define NUM_THREADS 128 // keep this a power of two
+
+static uint64_t addrs[] = {
+#include "findmask.inc"
+};
+static uint64_t lastmask;
+
+__attribute__((optimize(3, "unroll-loops")))
+void* thread_func(void* arg) {
+ register uint64_t mask;
+ register uintptr_t num = (uintptr_t)arg;
+
+ for (mask = num; mask < (1ULL << (NUM_BITS + 1)); mask += NUM_THREADS) {
+ register const uint64_t* cur;
+ register int a = 0;
+ register int b = 0;
+
+ for (cur = addrs; (char*)cur < (char*)addrs + sizeof(addrs); cur++) {
+#ifdef __x86_64__
+ register uint64_t addr asm("rdx") = *cur & mask;
+ register uint32_t tmp asm("ebx");
+
+ // Behold: the dark bit counting magic!
+ asm (
+ // Fold high and low 32 bits onto each other
+ "MOVl %%edx, %%ebx\n\t"
+ "SHRq $32, %%rdx\n\t"
+ "XORl %%ebx, %%edx\n\t"
+ // Fold high and low 16 bits onto each other
+ "MOVl %%edx, %%ebx\n\t"
+ "SHRl $16, %%edx\n\t"
+ "XORw %%bx, %%dx\n\t"
+ // Fold high and low 8 bits onto each other
+ "XORb %%dh, %%dl\n\t"
+ // Invoke ancient 8086 parity flag (only counts lowest byte)
+ "SETnp %%bl\n\t"
+ "SETp %%dl\n\t"
+ // Stupid SET instruction can only affect the lowest byte...
+ "ANDl $1, %%ebx\n\t"
+ "ANDl $1, %%edx\n\t"
+ // Increment either 'a' or 'b' without needing another branch
+ "ADDl %%ebx, %2\n\t"
+ "ADDl %%edx, %1\n\t"
+ : "=b" (tmp), "+r"(a), "+r"(b) : "d"(addr) : "cc");
+
+#else // generic processor
+ register uint64_t addr = *cur & mask;
+ register uint32_t low = (uint32_t)addr;
+ register uint32_t high = (uint32_t)(addr >> 32);
+
+ // Takes about twice as long as the version above... take that GCC!
+ __builtin_parity(low) ^ __builtin_parity(high) ? a++ : b++;
+#endif
+
+ // Early abort: probably still the most valuable optimization in here
+ if (a >= NOISE && b >= NOISE) break;
+ }
+
+ if (a < NOISE) b = a;
+ if (b < NOISE) {
+ printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask);
+ fflush(stdout);
+ }
+
+ // I'm a little paranoid about performance: don't write to memory too often
+ if (!(mask & 0x7ff)) lastmask = mask;
+ }
+
+ return 0;
+}
+
+void signal_handler(int signum) {
+ printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n",
+ lastmask);
+ fflush(stdout);
+}
+
+int main(int argc, char** argv) {
+ uintptr_t i;
+ pthread_t threads[NUM_THREADS];
+
+ signal(SIGUSR1, signal_handler);
+
+ for (i = 0; i < NUM_THREADS; i++)
+ pthread_create(&threads[i], 0, thread_func, (void*)i);
+
+ for (i = 0; i < NUM_THREADS; i++)
+ pthread_join(threads[i], 0);
+
+ return 0;
+}