diff options
Diffstat (limited to 'bcc')
-rwxr-xr-x | bcc/build-bcc.sh | 12 | ||||
-rwxr-xr-x | bcc/build-kheaders-targz.sh | 39 | ||||
-rw-r--r-- | bcc/misc/android-futex-contention-record | 2 | ||||
-rw-r--r-- | bcc/misc/android-futex-contention-report | 4 | ||||
-rw-r--r-- | bcc/misc/android-futex-contention.py | 115 | ||||
-rw-r--r-- | bcc/misc/futex-contention.py | 63 | ||||
-rw-r--r-- | bcc/misc/lockstat.py | 300 |
7 files changed, 535 insertions, 0 deletions
diff --git a/bcc/build-bcc.sh b/bcc/build-bcc.sh new file mode 100755 index 0000000..a3ddc83 --- /dev/null +++ b/bcc/build-bcc.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# This script should run within a bcc checkout + +spath=$( cd "$(dirname "$0")" ; pwd -P ) +cd $spath + +rm -rf build && mkdir -p build && cd build +cmake .. -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_C_COMPILER=clang-6.0 -DCMAKE_CXX_COMPILER=clang++-6.0 +make -j4 +make install +cd .. +rm -rf build diff --git a/bcc/build-kheaders-targz.sh b/bcc/build-kheaders-targz.sh new file mode 100755 index 0000000..06eed93 --- /dev/null +++ b/bcc/build-kheaders-targz.sh @@ -0,0 +1,39 @@ +#!/bin/bash +script_full_path=$( cd "$(dirname "$0")" ; pwd -P ) + +if [ $# -ne 2 ]; then + echo "illegal number of parameters, usage: ./build KERNEL_PATH out.tar.gz" + exit 1 +fi + +# Please provide absolute paths +KERNEL_PATH=$1 +OUT_TAR=$2 + +KERNEL_PATH="$(dirname $(readlink -e $KERNEL_PATH))/$(basename $KERNEL_PATH)" +if [ ! -d "$KERNEL_PATH" ]; then + echo "Kernel directory couldn't be found" + exit 3 +fi + +# kdir=$(basename $KERNEL_PATH) + +cd $KERNEL_PATH +find arch -name include -type d -print | xargs -n1 -i: find : -type f > /tmp/kernel-headers.h +find include >> /tmp/kernel-headers.h + +grep "include/generated/autoconf.h" /tmp/kernel-headers.h > /dev/null 2>&1 +retgrep=$? +if [ $retgrep -ne 0 ]; then + >&2 echo "" + >&2 echo "The kernel sources at ${KERNEL_PATH} you pointed to aren't configured and built." + >&2 echo "Please atleast run in your kernel sources:" + >&2 echo $'make defconfig\nmake' + >&2 echo $'\nNote: You dont need to do the full build since headers are generated early on.\n' + >&2 echo "Note: Please build your kernel in tree (build and source should be in same directory)" + >&2 echo "" + exit $retgrep +fi + +cat /tmp/kernel-headers.h | tar -zcf $OUT_TAR -T - +rm /tmp/kernel-headers.h diff --git a/bcc/misc/android-futex-contention-record b/bcc/misc/android-futex-contention-record new file mode 100644 index 0000000..a12ff79 --- /dev/null +++ b/bcc/misc/android-futex-contention-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -a -g -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex -e sched:sched_waking $@ diff --git a/bcc/misc/android-futex-contention-report b/bcc/misc/android-futex-contention-report new file mode 100644 index 0000000..7a8c122 --- /dev/null +++ b/bcc/misc/android-futex-contention-report @@ -0,0 +1,4 @@ +#!/bin/bash +# description: futext contention measurement + +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/android-futex-contention.py diff --git a/bcc/misc/android-futex-contention.py b/bcc/misc/android-futex-contention.py new file mode 100644 index 0000000..5ef3841 --- /dev/null +++ b/bcc/misc/android-futex-contention.py @@ -0,0 +1,115 @@ +# futex contention +# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com> +# Licensed under the terms of the GNU GPL License version 2 +# +# Translation of: +# +# http://sourceware.org/systemtap/wiki/WSFutexContention +# +# to perf python scripting. +# +# Measures futex contention + +import os, sys +sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') +from Util import * + +process_names = {} +thread_thislock = {} +thread_blocktime = {} + +lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time +waker_wakee = {} # maps the futex waker to wakee +max_waits = {} # Details about a maximum contention like owner, owner chain +process_names = {} # long-lived pid-to-execname mapping + +def android_lock(callchain): + for c in callchain: + if 'sym' in c and 'name' in c['sym']: + name = c['sym']['name'] + else: + continue + + if 'art::Monitor::Lock' in name: + return True + return False + +def print_callchain(callchain): + for c in callchain: + if 'sym' in c and 'name' in c['sym']: + name = c['sym']['name'] + else: + continue + + print(" %s" % (name)) + +def sched__sched_waking(event_name, context, common_cpu, + common_secs, common_nsecs, common_pid, common_comm, + common_callchain, comm, pid, prio, success, + target_cpu): + waker_wakee[pid] = [common_pid, common_callchain] + +def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, + nr, uaddr, op, val, utime, uaddr2, val3): + + cmd = op & FUTEX_CMD_MASK + if cmd != FUTEX_WAIT or android_lock(callchain) == False: + return # we don't care about originators of WAKE events + # or futex uses that aren't android locks. + + process_names[tid] = comm + thread_thislock[tid] = uaddr + thread_blocktime[tid] = nsecs(s, ns) + +def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, + nr, ret): + + waker_pid = -1 + waker_chain = "[no call chain]" + + if thread_blocktime.has_key(tid): + # Gather stats about the contention (sum, min, max) + elapsed = nsecs(s, ns) - thread_blocktime[tid] + add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) + + # Track details about the maximum contention seen + # including owner and its callchain + if (tid, thread_thislock[tid]) in max_waits: + prev_wait = max_waits[(tid, thread_thislock[tid])][0] + else: + prev_wait = 0 + + if elapsed > prev_wait: + if tid in waker_wakee: + waker_pid = waker_wakee[tid][0] + waker_chain = waker_wakee[tid][1] + + max_waits[(tid, thread_thislock[tid])] = [elapsed, waker_pid, waker_chain, callchain] + + del thread_blocktime[tid] + del thread_thislock[tid] + +def trace_begin(): + print "Press control+C to stop and show the summary" + +def trace_end(): + for (tid, lock) in lock_waits: + print("\n==============================================================\n") + min, max, avg, count = lock_waits[tid, lock] + print "%s[%d] lock %x contended %d times, %d avg ns, %d max ns" % \ + (process_names[tid], tid, lock, count, avg, max) + print "" + + if not (tid, lock) in max_waits: + print"Max contention info not available" + continue + + print "Callstack of suffering task:" + print_callchain(max_waits[tid, lock][3]) + print "" + + waker_pid = max_waits[tid, lock][1] + waker_name = process_names[waker_pid] if waker_pid in process_names else "nameless-owner" + print "Owner %s caused this contention of %d ns. Owner's Call stack below:" % (waker_name, max_waits[tid, lock][0]) + print_callchain(max_waits[tid, lock][2]) + diff --git a/bcc/misc/futex-contention.py b/bcc/misc/futex-contention.py new file mode 100644 index 0000000..5c96780 --- /dev/null +++ b/bcc/misc/futex-contention.py @@ -0,0 +1,63 @@ +# futex contention +# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com> +# Licensed under the terms of the GNU GPL License version 2 +# +# Translation of: +# +# http://sourceware.org/systemtap/wiki/WSFutexContention +# +# to perf python scripting. +# +# Measures futex contention + +import os, sys +sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') +from Util import * + +process_names = {} +thread_thislock = {} +thread_blocktime = {} + +lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time +process_names = {} # long-lived pid-to-execname mapping + +def android_lock(callchain): + for c in callchain: + if 'sym' in c and 'name' in c['sym']: + name = c['sym']['name'] + else: + continue + + if 'art::Monitor::Lock' in name: + return True + return False + +def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, + nr, uaddr, op, val, utime, uaddr2, val3): + + cmd = op & FUTEX_CMD_MASK + if cmd != FUTEX_WAIT or android_lock(callchain) == False: + return # we don't care about originators of WAKE events + # or futex uses that aren't android locks. + + process_names[tid] = comm + thread_thislock[tid] = uaddr + thread_blocktime[tid] = nsecs(s, ns) + +def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, + nr, ret): + if thread_blocktime.has_key(tid): + elapsed = nsecs(s, ns) - thread_blocktime[tid] + add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) + del thread_blocktime[tid] + del thread_thislock[tid] + +def trace_begin(): + print "Press control+C to stop and show the summary" + +def trace_end(): + for (tid, lock) in lock_waits: + min, max, avg, count = lock_waits[tid, lock] + print "%s[%d] lock %x contended %d times, %d avg ns" % \ + (process_names[tid], tid, lock, count, avg) + diff --git a/bcc/misc/lockstat.py b/bcc/misc/lockstat.py new file mode 100644 index 0000000..fa3edda --- /dev/null +++ b/bcc/misc/lockstat.py @@ -0,0 +1,300 @@ +#!/usr/bin/python +# +# lockstat Trace and display lock contention stats +# +# USAGE: lockstat + +# Licensed under the Apache License, Version 2.0 (the "License") +# 28-Jul-2017 Gisle Dankel Created this. + +from bcc import BPF +from ctypes import c_int +from time import sleep +from datetime import datetime +import argparse +import subprocess +import os + +# One Lock object per TGID and uaddr. +class Lock(object): + def __init__(self): + self.contention_count = 0 + self.elapsed_blocked = 0 + self.thread_count = 0 + self.last_stack_syms = [] + + def update(self, count, block_time, last_stack_syms): + self.contention_count += count + self.elapsed_blocked += block_time + self.thread_count += 1 + self.last_stack_syms = last_stack_syms + +def run_command_get_pid(command): + p = subprocess.Popen(command.split()) + return p.pid + +examples = """ +EXAMPLES: + +./lockstat + Trace calls to sys_futex and display contented locks every 5 seconds + for all processes running on the system +./lockstat -p <pid> + Trace only for the specified pid and display contended locks + every 5 seconds +./lockstat -p <pid> -t + Trace for a specified pid and print a message on each entry and exit to + sys_futex until interrupted or killed +./lockstat -p <pid> 10 + Trace the specified pid and show a message every 10 seconds +./lockstat -c <command> 1 30 + Run the specified command and display contended locks every 1 second + for a total of 30 times +""" + +description = """ +Trace kernel futex events. +These often occur because of lock contention, e.g. involving a pthread_mutex. +This script resemblers the following SystemTap example: +https://sourceware.org/systemtap/SystemTap_Beginners_Guide/futexcontentionsect.html +""" + +parser = argparse.ArgumentParser(description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-p", "--pid", type=int, default=-1, + help="the PID to trace; if not specified, trace all") +parser.add_argument("-t", "--trace", action="store_true", + help="print trace messages for each futex enter/exit") +parser.add_argument("interval", nargs="?", default=5, type=int, + help="interval in seconds to print summary") +parser.add_argument("count", nargs="?", type=int, + help="number of times to print the report before exiting") +parser.add_argument("-c", "--command", + help="execute and trace the specified command") + +args = parser.parse_args() + +pid = args.pid +command = args.command +interval = args.interval +num_prints = args.count +trace_all = args.trace + +if command is not None: + print("Executing '%s' and tracing the resulting process." % command) + pid = run_command_get_pid(command) + +bpf_source = """ +#include <uapi/linux/futex.h> +#include <uapi/linux/ptrace.h> +#include <linux/sched.h> +#include <linux/time.h> + +struct comm_t { + char name[TASK_COMM_LEN]; +}; + +struct lock_key_t { + u64 uaddr; + u32 pid; + u32 tgid; +}; + +struct lock_info_t { + u64 elapsed_blocked; + u64 contention_count; + u64 sid; +}; + +BPF_HASH(pid_lock, u32, u64); +BPF_HASH(pid_blocktime, u32, u64); +BPF_HASH(tgid_comm, u32, struct comm_t); +BPF_HASH(lock_stats, struct lock_key_t, struct lock_info_t, 1000000); +BPF_STACK_TRACE(stack_traces, 16384); + +static inline int update_stats(u32 pid, u32 tgid, u64 uaddr, u64 block_time, u64 sid) { + struct lock_key_t key = {}; + struct lock_info_t zero = {}; + struct lock_info_t *info; + + key.pid = pid; + key.tgid = tgid; + key.uaddr = uaddr; + info = lock_stats.lookup_or_init(&key, &zero); + info->elapsed_blocked += block_time; + info->contention_count++; + info->sid = sid; + + if (0 == tgid_comm.lookup(&tgid)) { + struct comm_t comm; + bpf_get_current_comm(&comm.name, sizeof(comm.name)); + tgid_comm.update(&tgid, &comm); + } + return 0; +} + +// FIXME: Should attach to sys_enter_futex and sys_exit_futex tracepoints here, +// but that does not currently work +int sys_futex_enter(struct pt_regs *ctx, u32 *uaddr, int op, u32 val, + struct timespec *utime, u32 *uaddr2, u32 val3) { + int cmd = op & FUTEX_CMD_MASK; + if (cmd != FUTEX_WAIT) + return 0; + + u64 pid_tgid = bpf_get_current_pid_tgid(); + u32 pid = pid_tgid; + u32 tgid = pid_tgid >> 32; + + if (!(THREAD_FILTER)) + return 0; + + u64 timestamp = bpf_ktime_get_ns(); + u64 uaddr64 = (u64) uaddr; + pid_lock.update(&pid, &uaddr64); + pid_blocktime.update(&pid, ×tamp); + + if (SHOULD_PRINT) + bpf_trace_printk("enter sys_futex, pid = %u, uaddr = %x, " + "cmd = %u\\n", pid, uaddr64, cmd); + return 0; +} + +int sys_futex_exit(struct pt_regs *ctx) { + u64 pid_tgid = bpf_get_current_pid_tgid(); + u32 pid = pid_tgid; + u32 tgid = pid_tgid >> 32; + if (!(THREAD_FILTER)) + return 0; + + u64 *blocktime = pid_blocktime.lookup(&pid); + u64 *uaddr = pid_lock.lookup(&pid); + u64 timestamp = bpf_ktime_get_ns(); + u64 elapsed; + u64 sid; + + if (blocktime == 0 || uaddr == 0) + return 0; // not FUTEX_WAIT, or (less likely) missed futex_enter + + elapsed = timestamp - *blocktime; + + sid = stack_traces.get_stackid(ctx, BPF_F_USER_STACK); + update_stats(pid, tgid, *uaddr, elapsed, sid); + pid_lock.delete(&pid); + pid_blocktime.delete(&pid); + + if (SHOULD_PRINT) { + bpf_trace_printk("exit sys_futex, uaddr = %x, elapsed = %uns\\n", + uaddr == 0 ? 0 : *uaddr, elapsed); + } + return 0; +} + +""" + +bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") + +thread_filter = '1' +if pid != -1: + print("Tracing pid %d, Ctrl+C to quit." % pid) + # 'tgid' in kernel space is what people thin of as 'pid' in userspace + thread_filter = "tgid == %d" % pid +else: + print("Tracing all processes, Ctrl+C to quit.") + +bpf_source = bpf_source.replace("THREAD_FILTER", thread_filter) + +bpf_program = BPF(text=bpf_source) +bpf_program.attach_kprobe(event="SyS_futex", fn_name="sys_futex_enter") +bpf_program.attach_kretprobe(event="SyS_futex", fn_name="sys_futex_exit") + +def get_syms(stack, pid): + global bpf_program + syms = [] + for addr in stack: + s = bpf_program.sym(addr, pid, show_offset=True) + syms.append(s) + return syms + +def print_syms(syms): + print("=========") + for f in syms: + print(f) + print("=========") + +def is_android_monitor_lock(syms): + for s in syms: + if 'art::Monitor::Lock' in s: + return True + return False + +def disp_stack(stack, pid): + for addr in stack: + s = bpf_program.sym(addr, pid, show_offset=True) + +def create_tgid_stats(): + global bpf_program + stats = bpf_program["lock_stats"] + res = {} + stack_traces = bpf_program['stack_traces'] + for key, val in stats.items(): + # Only display Android monitor locks + if val.sid >= 0: + ust = stack_traces.walk(val.sid) + syms = get_syms(ust, key.pid) + if not is_android_monitor_lock(syms): + continue + else: + continue + + if not key.tgid in res: + res[key.tgid] = {} + if not key.uaddr in res[key.tgid]: + res[key.tgid][key.uaddr] = Lock() + + lock = res[key.tgid][key.uaddr] + lock.update(val.contention_count, val.elapsed_blocked, syms) + return res + +def print_comm_stats(stats): + if stats == {}: + return + + comms = bpf_program["tgid_comm"] + print("\n%s:" % (datetime.now().strftime("%H:%M:%S"))) + for tgid, locks in stats.items(): + comm = comms[c_int(tgid)].name + print("\n %s (%d):" % (comm, tgid)) + sorted_locks = sorted(locks.items(), + key=lambda x: x[1].elapsed_blocked, + reverse=True) + for addr, stats in sorted_locks: + print(" %x: %dms (%d contentions involving %d threads, avg %dus)" % + (addr, stats.elapsed_blocked / 1000000, + stats.contention_count, stats.thread_count, + stats.elapsed_blocked / stats.contention_count / 1000)) + + # No use of displaying lock stacks since we're only + # filtering for Android monitor locks. + # + # print("Last stack for this lock:") + # print_syms(stats.last_stack_syms) + +count_so_far = 0 +while True: + if trace_all: + print(bpf_program.trace_fields()) + else: + try: + sleep(interval) + except KeyboardInterrupt: + exit() + print_comm_stats(create_tgid_stats()) + count_so_far += 1 + bpf_program['tgid_comm'].clear() + bpf_program['lock_stats'].clear() + bpf_program['pid_lock'].clear() + bpf_program['pid_blocktime'].clear() + + if num_prints is not None and count_so_far >= num_prints: + exit() |