diff options
author | Nick Sanders <nsanders@google.com> | 2015-09-15 12:41:37 -0700 |
---|---|---|
committer | Nick Sanders <nsanders@google.com> | 2015-09-15 12:41:37 -0700 |
commit | 241f33a3e958842e3db803c03300764bd2ee9c19 (patch) | |
tree | d38035fadbfff8dbe02121f11658dd3fe1540df8 /src/findmask.c | |
parent | 279816931fafe7dbffb0169185e9ac360144aad7 (diff) | |
download | stressapptest-241f33a3e958842e3db803c03300764bd2ee9c19.tar.gz |
Update to stressapptest 1.0.7 from upstream
https://github.com/stressapptest/stressapptest
Change-Id: I6307bcfad2e67392b4e0308680c708546e9a15a3
Signed-off-by: Nick Sanders <nsanders@google.com>
Diffstat (limited to 'src/findmask.c')
-rw-r--r-- | src/findmask.c | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/src/findmask.c b/src/findmask.c new file mode 100644 index 0000000..1b10988 --- /dev/null +++ b/src/findmask.c @@ -0,0 +1,140 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + * This "tool" can be used to brute force the XOR bitmask that a memory + * controller uses to interleave addresses onto its two channels. To use it, + * you need to have a bunch of addresses that are known to go to only one + * of the memory channels... easiest way to get these is to run stressapptest on + * a machine while holding a soldering iron close to the chips of one channel. + * Generate about a thousand failures and extract their physical addresses + * from the output. Write them to findmask.inc in a way that forms a valid + * definition for the addrs array. Make and run on a big machine. + * + * The program iterates over all possible bitmasks within the first NUM_BITS, + * parallelizing execution over NUM_THREADS. Every integer is masked + * onto all supplied addresses, counting the amount of times this results in + * an odd or even amount of bits. If all but NOISE addresses fall on one side, + * it will print that mask to stdout. Note that the script will always "find" + * the mask 0x0, and may also report masks such as 0x100000000 depending on + * your test machines memory size... you will need to use your own judgement to + * interpret the results. + * + * As the program might run for a long time, you can send SIGUSR1 to it to + * output the last mask that was processed and get a rough idea of the + * current progress. + */ + +#include <inttypes.h> +#include <pthread.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#define NOISE 20 +#define NUM_BITS 32 +#define NUM_THREADS 128 // keep this a power of two + +static uint64_t addrs[] = { +#include "findmask.inc" +}; +static uint64_t lastmask; + +__attribute__((optimize(3, "unroll-loops"))) +void* thread_func(void* arg) { + register uint64_t mask; + register uintptr_t num = (uintptr_t)arg; + + for (mask = num; mask < (1ULL << (NUM_BITS + 1)); mask += NUM_THREADS) { + register const uint64_t* cur; + register int a = 0; + register int b = 0; + + for (cur = addrs; (char*)cur < (char*)addrs + sizeof(addrs); cur++) { +#ifdef __x86_64__ + register uint64_t addr asm("rdx") = *cur & mask; + register uint32_t tmp asm("ebx"); + + // Behold: the dark bit counting magic! + asm ( + // Fold high and low 32 bits onto each other + "MOVl %%edx, %%ebx\n\t" + "SHRq $32, %%rdx\n\t" + "XORl %%ebx, %%edx\n\t" + // Fold high and low 16 bits onto each other + "MOVl %%edx, %%ebx\n\t" + "SHRl $16, %%edx\n\t" + "XORw %%bx, %%dx\n\t" + // Fold high and low 8 bits onto each other + "XORb %%dh, %%dl\n\t" + // Invoke ancient 8086 parity flag (only counts lowest byte) + "SETnp %%bl\n\t" + "SETp %%dl\n\t" + // Stupid SET instruction can only affect the lowest byte... + "ANDl $1, %%ebx\n\t" + "ANDl $1, %%edx\n\t" + // Increment either 'a' or 'b' without needing another branch + "ADDl %%ebx, %2\n\t" + "ADDl %%edx, %1\n\t" + : "=b" (tmp), "+r"(a), "+r"(b) : "d"(addr) : "cc"); + +#else // generic processor + register uint64_t addr = *cur & mask; + register uint32_t low = (uint32_t)addr; + register uint32_t high = (uint32_t)(addr >> 32); + + // Takes about twice as long as the version above... take that GCC! + __builtin_parity(low) ^ __builtin_parity(high) ? a++ : b++; +#endif + + // Early abort: probably still the most valuable optimization in here + if (a >= NOISE && b >= NOISE) break; + } + + if (a < NOISE) b = a; + if (b < NOISE) { + printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask); + fflush(stdout); + } + + // I'm a little paranoid about performance: don't write to memory too often + if (!(mask & 0x7ff)) lastmask = mask; + } + + return 0; +} + +void signal_handler(int signum) { + printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n", + lastmask); + fflush(stdout); +} + +int main(int argc, char** argv) { + uintptr_t i; + pthread_t threads[NUM_THREADS]; + + signal(SIGUSR1, signal_handler); + + for (i = 0; i < NUM_THREADS; i++) + pthread_create(&threads[i], 0, thread_func, (void*)i); + + for (i = 0; i < NUM_THREADS; i++) + pthread_join(threads[i], 0); + + return 0; +} |