aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-04-07 01:03:09 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2022-04-07 01:03:09 +0000
commita74802afac2338658d5c0c94c9e576ca1a9e8d84 (patch)
tree5eae12be766e09fbf9c5dd986baf8a245ed5a1b4
parentd158a366526d390c13c90656ebef51f79a2de525 (diff)
parentc2146834c218bbde0bf4f8db93698900408e6163 (diff)
downloadbcc-android13-tests-release.tar.gz
Change-Id: I7ec60dddb605bad1750245cbb5792b50222f2291
-rw-r--r--.github/workflows/bcc-test.yml7
-rw-r--r--INSTALL.md2
-rw-r--r--README.md4
-rw-r--r--cmake/clang_libs.cmake3
-rw-r--r--debian/changelog14
-rw-r--r--docs/kernel-versions.md10
-rw-r--r--docs/reference_guide.md11
-rw-r--r--examples/networking/http_filter/http-parse-complete.c2
-rw-r--r--examples/networking/http_filter/http-parse-simple.c2
-rwxr-xr-xexamples/tracing/biolatpcts.py6
-rwxr-xr-xexamples/tracing/bitehist.py19
-rwxr-xr-xexamples/tracing/disksnoop.py7
-rw-r--r--libbpf-tools/.gitignore2
-rw-r--r--libbpf-tools/Android.bp140
-rw-r--r--libbpf-tools/Makefile7
-rw-r--r--libbpf-tools/android/argp.cpp2
-rw-r--r--libbpf-tools/bashreadline.c2
-rw-r--r--libbpf-tools/bindsnoop.bpf.c1
-rw-r--r--libbpf-tools/bindsnoop.h2
-rw-r--r--libbpf-tools/cachestat.c23
-rw-r--r--libbpf-tools/filetop.bpf.c3
-rw-r--r--libbpf-tools/filetop.c2
-rw-r--r--libbpf-tools/filetop.h1
-rw-r--r--libbpf-tools/fsdist.c2
-rw-r--r--libbpf-tools/fsslower.c2
-rw-r--r--libbpf-tools/klockstat.bpf.c49
-rw-r--r--libbpf-tools/klockstat.c32
-rw-r--r--libbpf-tools/oomkill.bpf.c30
-rw-r--r--libbpf-tools/oomkill.c165
-rw-r--r--libbpf-tools/oomkill.h15
-rw-r--r--libbpf-tools/opensnoop.bpf.c3
-rw-r--r--libbpf-tools/runqlen.c5
-rw-r--r--libbpf-tools/softirqs.c8
-rw-r--r--libbpf-tools/solisten.c2
-rw-r--r--libbpf-tools/tcprtt.c2
-rw-r--r--libbpf-tools/tcpsynbl.bpf.c66
-rw-r--r--libbpf-tools/tcpsynbl.c250
-rw-r--r--libbpf-tools/tcpsynbl.h11
-rw-r--r--libbpf-tools/trace_helpers.c48
-rw-r--r--libbpf-tools/trace_helpers.h2
-rw-r--r--libbpf-tools/vfsstat.c2
-rw-r--r--man/man8/biopattern.878
-rw-r--r--man/man8/biotop.87
-rw-r--r--man/man8/cachetop.88
-rw-r--r--man/man8/cpudist.811
-rw-r--r--man/man8/hardirqs.89
-rw-r--r--man/man8/softirqs.822
-rw-r--r--man/man8/sslsniff.824
-rw-r--r--man/man8/tcpcong.8136
-rw-r--r--man/man8/trace.85
-rw-r--r--src/cc/TEST_MAPPING7
-rw-r--r--src/cc/api/BPFTable.cc41
-rw-r--r--src/cc/api/BPFTable.h5
-rw-r--r--src/cc/bcc_btf.cc44
-rw-r--r--src/cc/bcc_btf.h1
-rw-r--r--src/cc/bcc_common.cc4
-rw-r--r--src/cc/bcc_common.h1
-rw-r--r--src/cc/bcc_debug.cc95
-rw-r--r--src/cc/bcc_debug.h13
-rw-r--r--src/cc/bpf_module.cc235
-rw-r--r--src/cc/bpf_module.h7
-rw-r--r--src/cc/bpf_module_rw_engine.cc7
-rw-r--r--src/cc/compat/linux/virtual_bpf.h155
-rw-r--r--src/cc/export/helpers.h33
-rw-r--r--src/cc/frontends/clang/b_frontend_action.cc38
-rw-r--r--src/cc/frontends/clang/b_frontend_action.h9
-rw-r--r--src/cc/frontends/clang/loader.cc104
-rw-r--r--src/cc/frontends/clang/loader.h56
-rw-r--r--src/cc/libbpf.c21
-rw-r--r--src/cc/perf_reader.c10
-rw-r--r--src/cc/perf_reader.h1
-rw-r--r--src/cc/usdt.h16
-rw-r--r--src/cc/usdt/usdt.cc24
-rw-r--r--src/cc/usdt/usdt_args.cc75
-rw-r--r--src/python/bcc/__init__.py15
-rw-r--r--src/python/bcc/libbcc.py4
-rw-r--r--tests/cc/test_bpf_table.cc4
-rw-r--r--tests/cc/test_pinned_table.cc64
-rwxr-xr-xtests/python/test_clang.py4
-rwxr-xr-xtests/python/test_tools_smoke.py3
-rwxr-xr-xtools/bashreadline.py4
-rwxr-xr-xtools/bindsnoop.py10
-rwxr-xr-xtools/biolatency.py23
-rwxr-xr-xtools/biolatpcts.py12
-rwxr-xr-xtools/biopattern.py140
-rw-r--r--tools/biopattern_example.txt45
-rwxr-xr-xtools/biosnoop.py19
-rwxr-xr-xtools/biotop.py46
-rwxr-xr-xtools/btrfsdist.py2
-rwxr-xr-xtools/btrfsslower.py4
-rwxr-xr-xtools/cachetop.py18
-rwxr-xr-xtools/compactsnoop.py3
-rwxr-xr-xtools/cpudist.py56
-rw-r--r--tools/cpudist_example.txt7
-rwxr-xr-xtools/dbslower.py4
-rwxr-xr-xtools/dcsnoop.py4
-rwxr-xr-xtools/drsnoop.py3
-rwxr-xr-xtools/execsnoop.py4
-rwxr-xr-xtools/ext4slower.py2
-rwxr-xr-xtools/filelife.py4
-rwxr-xr-xtools/filetop.py4
-rwxr-xr-xtools/funcslower.py72
-rwxr-xr-xtools/hardirqs.py34
-rwxr-xr-xtools/klockstat.py34
-rwxr-xr-xtools/mdflush.py4
-rwxr-xr-xtools/memleak.py18
-rwxr-xr-xtools/mountsnoop.py1
-rwxr-xr-xtools/mysqld_qslower.py4
-rwxr-xr-xtools/oomkill.py4
-rwxr-xr-xtools/softirqs.py65
-rw-r--r--tools/softirqs_example.txt24
-rwxr-xr-xtools/sslsniff.py262
-rw-r--r--tools/sslsniff_example.txt78
-rwxr-xr-xtools/swapin.py4
-rwxr-xr-xtools/syncsnoop.py2
-rwxr-xr-xtools/tcpaccept.py9
-rwxr-xr-xtools/tcpcong.py559
-rw-r--r--tools/tcpcong_example.txt491
-rwxr-xr-xtools/tcpconnect.py20
-rwxr-xr-xtools/tcpconnlat.py12
-rwxr-xr-xtools/tcpretrans.py6
-rwxr-xr-xtools/tcptop.py8
-rwxr-xr-xtools/threadsnoop.py4
-rwxr-xr-xtools/trace.py64
-rw-r--r--tools/trace_example.txt35
-rwxr-xr-xtools/vfsstat.py20
-rwxr-xr-xtools/xfsdist.py2
-rwxr-xr-xtools/zfsdist.py2
128 files changed, 4022 insertions, 592 deletions
diff --git a/.github/workflows/bcc-test.yml b/.github/workflows/bcc-test.yml
index 4ce33603..43d15823 100644
--- a/.github/workflows/bcc-test.yml
+++ b/.github/workflows/bcc-test.yml
@@ -15,8 +15,13 @@ jobs:
env:
- TYPE: Debug
PYTHON_TEST_LOGFILE: critical.log
+ RW_ENGINE_ENABLED: ON
+ - TYPE: Debug
+ PYTHON_TEST_LOGFILE: critical.log
+ RW_ENGINE_ENABLED: OFF
- TYPE: Release
PYTHON_TEST_LOGFILE: critical.log
+ RW_ENGINE_ENABLED: ON
steps:
- uses: actions/checkout@v2
- name: System info
@@ -43,7 +48,7 @@ jobs:
bcc-docker \
/bin/bash -c \
'mkdir -p /bcc/build && cd /bcc/build && \
- cmake -DCMAKE_BUILD_TYPE=${TYPE} .. && make -j9'"
+ cmake -DCMAKE_BUILD_TYPE=${TYPE} -DENABLE_LLVM_NATIVECODEGEN=${RW_ENGINE_ENABLED} .. && make -j9'"
- name: Run bcc's cc tests
env: ${{ matrix.env }}
# tests are wrapped with `script` as a hack to get a TTY as github actions doesn't provide this
diff --git a/INSTALL.md b/INSTALL.md
index 383406b0..f681ac62 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -392,7 +392,7 @@ make -j10
make install
```
-after install , you may add bcc directory to your $PATH, which you can add to ~/.bashrc
+after install, you may add bcc directory to your $PATH, which you can add to ~/.bashrc
```
bcctools=/usr/share/bcc/tools
bccexamples=/usr/share/bcc/examples
diff --git a/README.md b/README.md
index 076d127c..f2067229 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,7 @@ pair of .c and .py files, and some are directories of files.
- tools/[bindsnoop](tools/bindsnoop.py): Trace IPv4 and IPv6 bind() system calls (bind()). [Examples](tools/bindsnoop_example.txt).
- tools/[biolatency](tools/biolatency.py): Summarize block device I/O latency as a histogram. [Examples](tools/biolatency_example.txt).
- tools/[biotop](tools/biotop.py): Top for disks: Summarize block device I/O by process. [Examples](tools/biotop_example.txt).
+- tools/[biopattern](tools/biopattern.py): Identify random/sequential disk access patterns. [Examples](tools/biopattern_example.txt).
- tools/[biosnoop](tools/biosnoop.py): Trace block device I/O with PID and latency. [Examples](tools/biosnoop_example.txt).
- tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt).
- tools/[bpflist](tools/bpflist.py): Display processes with active BPF programs and maps. [Examples](tools/bpflist_example.txt).
@@ -165,6 +166,7 @@ pair of .c and .py files, and some are directories of files.
- tools/[tcpsynbl](tools/tcpsynbl.py): Show TCP SYN backlog. [Examples](tools/tcpsynbl_example.txt).
- tools/[tcptop](tools/tcptop.py): Summarize TCP send/recv throughput by host. Top for TCP. [Examples](tools/tcptop_example.txt).
- tools/[tcptracer](tools/tcptracer.py): Trace TCP established connections (connect(), accept(), close()). [Examples](tools/tcptracer_example.txt).
+- tools/[tcpcong](tools/tcpcong.py): Trace TCP socket congestion control status duration. [Examples](tools/tcpcong_example.txt).
- tools/[threadsnoop](tools/threadsnoop.py): List new thread creation. [Examples](tools/threadsnoop_example.txt).
- tools/[tplist](tools/tplist.py): Display kernel tracepoints or USDT probes and their formats. [Examples](tools/tplist_example.txt).
- tools/[trace](tools/trace.py): Trace arbitrary functions, with filters. [Examples](tools/trace_example.txt).
@@ -250,8 +252,6 @@ and outer IP addresses traversing the interface, and the userspace component
turns those statistics into a graph showing the traffic distribution at
multiple granularities. See the code [here](examples/networking/tunnel_monitor).
-[![Screenshot](http://img.youtube.com/vi/yYy3Cwce02k/0.jpg)](https://youtu.be/yYy3Cwce02k)
-
## Contributing
Already pumped up to commit some code? Here are some resources to join the
diff --git a/cmake/clang_libs.cmake b/cmake/clang_libs.cmake
index 3f1523b7..f1b1261b 100644
--- a/cmake/clang_libs.cmake
+++ b/cmake/clang_libs.cmake
@@ -22,6 +22,9 @@ if (${LLVM_PACKAGE_VERSION} VERSION_EQUAL 6 OR ${LLVM_PACKAGE_VERSION} VERSION_G
list(APPEND llvm_raw_libs bpfasmparser)
list(APPEND llvm_raw_libs bpfdisassembler)
endif()
+if (${LLVM_PACKAGE_VERSION} VERSION_EQUAL 15 OR ${LLVM_PACKAGE_VERSION} VERSION_GREATER 15)
+ list(APPEND llvm_raw_libs windowsdriver)
+endif()
llvm_map_components_to_libnames(_llvm_libs ${llvm_raw_libs})
llvm_expand_dependencies(llvm_libs ${_llvm_libs})
endif()
diff --git a/debian/changelog b/debian/changelog
index 8a23841a..22028040 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,17 @@
+bcc (0.24.0-1) unstable; urgency=low
+
+ * Support for kernel up to 5.16
+ * bcc tools: update for trace.py, sslsniff.py, tcptop.py, hardirqs.py, etc.
+ * new libbpf tools: bashreadline
+ * allow specify wakeup_events for perf buffer
+ * support BPF_MAP_TYPE_{INODE, TASK}_STORAGE maps
+ * remove all deprecated libbpf function usage
+ * remove P4/B language support
+ * major test infra change, using github actions now
+ * doc update, bug fixes and other tools improvement
+
+ -- Yonghong Song <ys114321@gmail.com> Wed, 14 Jan 2022 17:00:00 +0000
+
bcc (0.23.0-1) unstable; urgency=low
* Support for kernel up to 5.15
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
index 2c642271..36ee30a4 100644
--- a/docs/kernel-versions.md
+++ b/docs/kernel-versions.md
@@ -78,6 +78,7 @@ BPF attached to LIRC devices | 4.18 | [`f4364dcfc86d`](https://git.kernel.org/c
Pass map values to map helpers | 4.18 | [`d71962f3e627`](https://github.com/torvalds/linux/commit/d71962f3e627b5941804036755c844fabfb65ff5)
BPF socket reuseport | 4.19 | [`2dbb9b9e6df6`](https://github.com/torvalds/linux/commit/2dbb9b9e6df67d444fbe425c7f6014858d337adf)
BPF flow dissector | 4.20 | [`d58e468b1112`](https://github.com/torvalds/linux/commit/d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9)
+BPF 1M insn limit | 5.2 | [`c04c0d2b968a`](https://github.com/torvalds/linux/commit/c04c0d2b968ac45d6ef020316808ef6c82325a82)
BPF cgroup sysctl | 5.2 | [`7b146cebe30c`](https://github.com/torvalds/linux/commit/7b146cebe30cb481b0f70d85779da938da818637)
BPF raw tracepoint writable | 5.2 | [`9df1c28bb752`](https://github.com/torvalds/linux/commit/9df1c28bb75217b244257152ab7d788bb2a386d0)
BPF trampoline | 5.5 | [`fec56f5890d9`](https://github.com/torvalds/linux/commit/fec56f5890d93fc2ed74166c397dc186b1c25951)
@@ -153,6 +154,7 @@ mmap() support for array maps | 5.5 | [`fc9702273e2e`](https://git.kernel.org/pu
`LOOKUP_BATCH` | 5.6 | [`cb4d03ab499d`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cb4d03ab499d4c040f4ab6fd4389d2b49f42b5a5)
`UPDATE_BATCH`, `DELETE_BATCH` | 5.6 | [`aa2e93b8e58e`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=aa2e93b8e58e18442edfb2427446732415bc215e)
`LOOKUP_AND_DELETE_BATCH` | 5.6 | [`057996380a42`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=057996380a42bb64ccc04383cfa9c0ace4ea11f0)
+`LOOKUP_AND_DELETE_ELEM` support for hash maps | 5.14 | [`3e87f192b405`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3e87f192b405960c0fe83e0925bd0dadf4f8cf43)
## XDP
@@ -212,6 +214,7 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_check_mtu()` | 5.12 | | [`34b2021cc616`](https://github.com/torvalds/linux/commit/34b2021cc61642d61c3cf943d9e71925b827941b)
`BPF_FUNC_clone_redirect()` | 4.2 | | [`3896d655f4d4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3896d655f4d491c67d669a15f275a39f713410f8)
`BPF_FUNC_copy_from_user()` | 5.10 | | [`07be4c4a3e7a`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=07be4c4a3e7a0db148e44b16c5190e753d1c8569)
+`BPF_FUNC_copy_from_user_task()` | 5.18 | GPL | [`376040e47334`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=376040e47334c6dc6a939a32197acceb00fe4acf)
`BPF_FUNC_csum_diff()` | 4.6 | | [`7d672345ed29`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7d672345ed295b1356a5d9f7111da1d1d7d65867)
`BPF_FUNC_csum_level()` | 5.7 | | [`7cdec54f9713`](https://github.com/torvalds/linux/commit/7cdec54f9713256bb170873a1fc5c75c9127c9d2)
`BPF_FUNC_csum_update()` | 4.9 | | [`36bbef52c7eb`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=36bbef52c7eb646ed6247055a2acd3851e317857)
@@ -234,6 +237,7 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_get_func_arg_cnt()` | 5.17 | | [`f92c1e183604`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=f92c1e183604c20ce00eb889315fdaa8f2d9e509)
`BPF_FUNC_get_func_ip()` | 5.15 | | [`5d8b583d04ae`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=5d8b583d04aedb3bd5f6d227a334c210c7d735f9)
`BPF_FUNC_get_func_ret()` | 5.17 | | [`f92c1e183604`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=f92c1e183604c20ce00eb889315fdaa8f2d9e509)
+`BPF_FUNC_get_retval()` | 5.18 | | [`b44123b4a3dc`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93)
`BPF_FUNC_get_hash_recalc()` | 4.8 | | [`13c5c240f789`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=13c5c240f789bbd2bcacb14a23771491485ae61f)
`BPF_FUNC_get_listener_sock()` | 5.1 | | [`dbafd7ddd623`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/dbafd7ddd62369b2f3926ab847cbf8fc40e800b7)
`BPF_FUNC_get_local_storage()` | 4.19 | | [`cd3394317653`](https://github.com/torvalds/linux/commit/cd3394317653837e2eb5c5d0904a8996102af9fc)
@@ -249,6 +253,7 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_get_stackid()` | 4.6 | GPL | [`d5a3b1f69186`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d5a3b1f691865be576c2bffa708549b8cdccda19)
`BPF_FUNC_get_task_stack()` | 5.9 | | [`fa28dcb82a38`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next/+/fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0)
`BPF_FUNC_getsockopt()` | 4.15 | | [`cd86d1fd2102`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=cd86d1fd21025fdd6daf23d1288da405e7ad0ec6)
+`BPF_FUNC_ima_file_hash()` | 5.18 | | [`174b16946e39`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=174b16946e39ebd369097e0f773536c91a8c1a4c)
`BPF_FUNC_ima_inode_hash()` | 5.11 | | [`27672f0d280a`](https://github.com/torvalds/linux/commit/27672f0d280a3f286a410a8db2004f46ace72a17)
`BPF_FUNC_inode_storage_delete()` | 5.10 | | [`8ea636848aca`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=8ea636848aca35b9f97c5b5dee30225cf2dd0fe6)
`BPF_FUNC_inode_storage_get()` | 5.10 | | [`8ea636848aca`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=8ea636848aca35b9f97c5b5dee30225cf2dd0fe6)
@@ -311,6 +316,7 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_seq_write()` | 5.7 | GPL | [`492e639f0c22`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next/+/492e639f0c222784e2e0f121966375f641c61b15)
`BPF_FUNC_set_hash()` | 4.13 | | [`ded092cd73c2`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ded092cd73c2c56a394b936f86897f29b2e131c0)
`BPF_FUNC_set_hash_invalid()` | 4.9 | | [`7a4b28c6cc9f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7a4b28c6cc9ffac50f791b99cc7e46106436e5d8)
+`BPF_FUNC_set_retval()` | 5.18 | | [`b44123b4a3dc`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93)
`BPF_FUNC_setsockopt()` | 4.13 | | [`8c4b4c7e9ff0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8c4b4c7e9ff0447995750d9329949fa082520269)
`BPF_FUNC_sk_ancestor_cgroup_id()` | 5.7 | | [`f307fa2cb4c9`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next/+/f307fa2cb4c935f7f1ff0aeb880c7b44fb9a642b)
`BPF_FUNC_sk_assign()` | 5.6 | | [`cf7fbe660f2d`](https://github.com/torvalds/linux/commit/cf7fbe660f2dbd738ab58aea8e9b0ca6ad232449)
@@ -340,6 +346,7 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_skb_load_bytes_relative()` | 4.18 | | [`4e1ec56cdc59`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=4e1ec56cdc59746943b2acfab3c171b930187bbe)
`BPF_FUNC_skb_output()` | 5.5 | | [`a7658e1a4164`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=a7658e1a4164ce2b9eb4a11aadbba38586e93bd6)
`BPF_FUNC_skb_pull_data()` | 4.9 | | [`36bbef52c7eb`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=36bbef52c7eb646ed6247055a2acd3851e317857)
+`BPF_FUNC_skb_set_tstamp()` | 5.18 | | [`9bb984f28d5b`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=9bb984f28d5bcb917d35d930fcfb89f90f9449fd)
`BPF_FUNC_skb_set_tunnel_key()` | 4.3 | | [`d3aa45ce6b94`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d3aa45ce6b94c65b83971257317867db13e5f492)
`BPF_FUNC_skb_set_tunnel_opt()` | 4.6 | | [`14ca0751c96f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=14ca0751c96f8d3d0f52e8ed3b3236f8b34d3460)
`BPF_FUNC_skb_store_bytes()` | 4.1 | | [`91bc4822c3d6`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=91bc4822c3d61b9bb7ef66d3b77948a4f9177954)
@@ -388,6 +395,9 @@ Helper | Kernel version | License | Commit |
`BPF_FUNC_xdp_adjust_head()` | 4.10 | | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03)
`BPF_FUNC_xdp_adjust_meta()` | 4.15 | | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da)
`BPF_FUNC_xdp_adjust_tail()` | 4.18 | | [`b32cc5b9a346`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=b32cc5b9a346319c171e3ad905e0cddda032b5eb)
+`BPF_FUNC_xdp_get_buff_len()` | 5.18 | | [`0165cc817075`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=0165cc817075cf701e4289838f1d925ff1911b3e)
+`BPF_FUNC_xdp_load_bytes()` | 5.18 | | [`3f364222d032`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=3f364222d032eea6b245780e845ad213dab28cdd)
+`BPF_FUNC_xdp_store_bytes()` | 5.18 | | [`3f364222d032`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit?id=3f364222d032eea6b245780e845ad213dab28cdd)
`BPF_FUNC_xdp_output()` | 5.6 | GPL | [`d831ee84bfc9`](https://github.com/torvalds/linux/commit/d831ee84bfc9173eecf30dbbc2553ae81b996c60)
`BPF_FUNC_override_return()` | 4.16 | GPL | [`9802d86585db`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9802d86585db91655c7d1929a4f6bbe0952ea88e)
`BPF_FUNC_sock_ops_cb_flags_set()` | 4.16 | | [`b13d88072172`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b13d880721729384757f235166068c315326f4a1)
diff --git a/docs/reference_guide.md b/docs/reference_guide.md
index 96e8a3f2..ff18ab93 100644
--- a/docs/reference_guide.md
+++ b/docs/reference_guide.md
@@ -202,6 +202,9 @@ Syntax: TRACEPOINT_PROBE(*category*, *event*)
This is a macro that instruments the tracepoint defined by *category*:*event*.
+The tracepoint name is `<category>:<event>`.
+The probe function name is `tracepoint__<category>__<event>`.
+
Arguments are available in an ```args``` struct, which are the tracepoint arguments. One way to list these is to cat the relevant format file under /sys/kernel/debug/tracing/events/*category*/*event*/format.
The ```args``` struct can be used in place of ``ctx`` in each functions requiring a context as an argument. This includes notably [perf_submit()](#3-perf_submit).
@@ -216,7 +219,11 @@ TRACEPOINT_PROBE(random, urandom_read) {
}
```
-This instruments the random:urandom_read tracepoint, and prints the tracepoint argument ```got_bits```.
+This instruments the tracepoint `random:urandom_read tracepoint`, and prints the tracepoint argument ```got_bits```.
+When using Python API, this probe is automatically attached to the right tracepoint target.
+For C++, this tracepoint probe can be attached by specifying the tracepoint target and function name explicitly:
+`BPF::attach_tracepoint("random:urandom_read", "tracepoint__random__urandom_read")`
+Note the name of the probe function defined above is `tracepoint__random__urandom_read`.
Examples in situ:
[code](https://github.com/iovisor/bcc/blob/a4159da8c4ea8a05a3c6e402451f530d6e5a8b41/examples/tracing/urandomread.py#L19) ([output](https://github.com/iovisor/bcc/commit/e422f5e50ecefb96579b6391a2ada7f6367b83c4#diff-41e5ecfae4a3b38de5f4e0887ed160e5R10)),
@@ -1825,7 +1832,7 @@ XDP_FLAGS_REPLACE = (1 << 4)
You can use flags like this ```BPF.attach_xdp(dev="device", fn=b.load_func("fn_name",BPF.XDP), flags=BPF.XDP_FLAGS_UPDATE_IF_NOEXIST)```
-The default value of flgas is 0. This means if there is no xdp program with `device`, the fn will run with that device. If there is an xdp program running with device, the old program will be replaced with new fn program.
+The default value of flags is 0. This means if there is no xdp program with `device`, the fn will run with that device. If there is an xdp program running with device, the old program will be replaced with new fn program.
Currently, bcc does not support XDP_FLAGS_REPLACE flag. The following are the descriptions of other flags.
diff --git a/examples/networking/http_filter/http-parse-complete.c b/examples/networking/http_filter/http-parse-complete.c
index 61cee0fb..ef102ba7 100644
--- a/examples/networking/http_filter/http-parse-complete.c
+++ b/examples/networking/http_filter/http-parse-complete.c
@@ -100,7 +100,7 @@ int http_filter(struct __sk_buff *skb) {
unsigned long p[7];
int i = 0;
for (i = 0; i < 7; i++) {
- p[i] = load_byte(skb , payload_offset + i);
+ p[i] = load_byte(skb, payload_offset + i);
}
//find a match with an HTTP message
diff --git a/examples/networking/http_filter/http-parse-simple.c b/examples/networking/http_filter/http-parse-simple.c
index 292cb7b4..9afbe1ec 100644
--- a/examples/networking/http_filter/http-parse-simple.c
+++ b/examples/networking/http_filter/http-parse-simple.c
@@ -71,7 +71,7 @@ int http_filter(struct __sk_buff *skb) {
unsigned long p[7];
int i = 0;
for (i = 0; i < 7; i++) {
- p[i] = load_byte(skb , payload_offset + i);
+ p[i] = load_byte(skb, payload_offset + i);
}
//find a match with an HTTP message
diff --git a/examples/tracing/biolatpcts.py b/examples/tracing/biolatpcts.py
index c9bb834e..68a59516 100755
--- a/examples/tracing/biolatpcts.py
+++ b/examples/tracing/biolatpcts.py
@@ -11,6 +11,7 @@ from time import sleep
bpf_source = """
#include <linux/blk_types.h>
+#include <linux/blk-mq.h>
#include <linux/blkdev.h>
#include <linux/time64.h>
@@ -45,7 +46,10 @@ void kprobe_blk_account_io_done(struct pt_regs *ctx, struct request *rq, u64 now
"""
bpf = BPF(text=bpf_source)
-bpf.attach_kprobe(event='blk_account_io_done', fn_name='kprobe_blk_account_io_done')
+if BPF.get_kprobe_functions(b'__blk_account_io_done'):
+ bpf.attach_kprobe(event="__blk_account_io_done", fn_name="kprobe_blk_account_io_done")
+else:
+ bpf.attach_kprobe(event="blk_account_io_done", fn_name="kprobe_blk_account_io_done")
cur_lat_100ms = bpf['lat_100ms']
cur_lat_1ms = bpf['lat_1ms']
diff --git a/examples/tracing/bitehist.py b/examples/tracing/bitehist.py
index 89ceb307..81e84594 100755
--- a/examples/tracing/bitehist.py
+++ b/examples/tracing/bitehist.py
@@ -20,27 +20,32 @@ from time import sleep
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
BPF_HISTOGRAM(dist);
BPF_HISTOGRAM(dist_linear);
-int kprobe__blk_account_io_done(struct pt_regs *ctx, struct request *req)
+int trace_req_done(struct pt_regs *ctx, struct request *req)
{
- dist.increment(bpf_log2l(req->__data_len / 1024));
- dist_linear.increment(req->__data_len / 1024);
- return 0;
+ dist.increment(bpf_log2l(req->__data_len / 1024));
+ dist_linear.increment(req->__data_len / 1024);
+ return 0;
}
""")
+if BPF.get_kprobe_functions(b'__blk_account_io_done'):
+ b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
+else:
+ b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
+
# header
print("Tracing... Hit Ctrl-C to end.")
# trace until Ctrl-C
try:
- sleep(99999999)
+ sleep(99999999)
except KeyboardInterrupt:
- print()
+ print()
# output
print("log2 histogram")
diff --git a/examples/tracing/disksnoop.py b/examples/tracing/disksnoop.py
index a35e1abd..7b6891b7 100755
--- a/examples/tracing/disksnoop.py
+++ b/examples/tracing/disksnoop.py
@@ -19,7 +19,7 @@ REQ_WRITE = 1 # from include/linux/blk_types.h
# load BPF program
b = BPF(text="""
#include <uapi/linux/ptrace.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
BPF_HASH(start, struct request *);
@@ -46,7 +46,10 @@ void trace_completion(struct pt_regs *ctx, struct request *req) {
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_start")
-b.attach_kprobe(event="blk_account_io_done", fn_name="trace_completion")
+if BPF.get_kprobe_functions(b'__blk_account_io_done'):
+ b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_completion")
+else:
+ b.attach_kprobe(event="blk_account_io_done", fn_name="trace_completion")
# header
print("%-18s %-2s %-7s %8s" % ("TIME(s)", "T", "BYTES", "LAT(ms)"))
diff --git a/libbpf-tools/.gitignore b/libbpf-tools/.gitignore
index 59370553..ce95db71 100644
--- a/libbpf-tools/.gitignore
+++ b/libbpf-tools/.gitignore
@@ -31,6 +31,7 @@
/mountsnoop
/numamove
/offcputime
+/oomkill
/opensnoop
/readahead
/runqlat
@@ -43,6 +44,7 @@
/tcpconnect
/tcpconnlat
/tcprtt
+/tcpsynbl
/vfsstat
/xfsdist
/xfsslower
diff --git a/libbpf-tools/Android.bp b/libbpf-tools/Android.bp
index e250f5dc..03165fdc 100644
--- a/libbpf-tools/Android.bp
+++ b/libbpf-tools/Android.bp
@@ -180,6 +180,26 @@ cc_binary {
}
cc_object {
+ name: "drsnoop.bpf.o",
+ srcs: ["drsnoop.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "drsnoop.skel.h",
+ srcs: [":drsnoop.bpf.o"],
+ out: ["drsnoop.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "drsnoop",
+ srcs: ["drsnoop.c"],
+ generated_headers: ["drsnoop.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
name: "filelife.bpf.o",
srcs: ["filelife.bpf.c"],
defaults: ["bcc_bpf_defaults"],
@@ -220,6 +240,46 @@ cc_binary {
}
cc_object {
+ name: "fsdist.bpf.o",
+ srcs: ["fsdist.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "fsdist.skel.h",
+ srcs: [":fsdist.bpf.o"],
+ out: ["fsdist.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "fsdist",
+ srcs: ["fsdist.c"],
+ generated_headers: ["fsdist.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
+ name: "fsslower.bpf.o",
+ srcs: ["fsslower.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "fsslower.skel.h",
+ srcs: [":fsslower.bpf.o"],
+ out: ["fsslower.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "fsslower",
+ srcs: ["fsslower.c"],
+ generated_headers: ["fsslower.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
name: "funclatency.bpf.o",
srcs: ["funclatency.bpf.c"],
defaults: ["bcc_bpf_defaults"],
@@ -329,6 +389,26 @@ cc_binary {
}
cc_object {
+ name: "oomkill.bpf.o",
+ srcs: ["oomkill.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "oomkill.skel.h",
+ srcs: [":oomkill.bpf.o"],
+ out: ["oomkill.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "oomkill",
+ srcs: ["oomkill.c"],
+ generated_headers: ["oomkill.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
name: "runqlat.bpf.o",
srcs: ["runqlat.bpf.c"],
defaults: ["bcc_bpf_defaults"],
@@ -409,6 +489,26 @@ cc_binary {
}
cc_object {
+ name: "solisten.bpf.o",
+ srcs: ["solisten.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "solisten.skel.h",
+ srcs: [":solisten.bpf.o"],
+ out: ["solisten.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "solisten",
+ srcs: ["solisten.c"],
+ generated_headers: ["solisten.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
name: "tcpconnect.bpf.o",
srcs: ["tcpconnect.bpf.c"],
defaults: ["bcc_bpf_defaults"],
@@ -430,3 +530,43 @@ cc_binary {
generated_headers: ["tcpconnect.skel.h"],
defaults: ["bcc_binary_defaults"],
}
+
+cc_object {
+ name: "tcprtt.bpf.o",
+ srcs: ["tcprtt.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "tcprtt.skel.h",
+ srcs: [":tcprtt.bpf.o"],
+ out: ["tcprtt.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "tcprtt",
+ srcs: ["tcprtt.c"],
+ generated_headers: ["tcprtt.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
+
+cc_object {
+ name: "vfsstat.bpf.o",
+ srcs: ["vfsstat.bpf.c"],
+ defaults: ["bcc_bpf_defaults"],
+}
+
+genrule {
+ name: "vfsstat.skel.h",
+ srcs: [":vfsstat.bpf.o"],
+ out: ["vfsstat.skel.h"],
+ defaults: ["bpf_skeleton_hdr_defaults"],
+}
+
+cc_binary {
+ name: "vfsstat",
+ srcs: ["vfsstat.c"],
+ generated_headers: ["vfsstat.skel.h"],
+ defaults: ["bcc_binary_defaults"],
+}
diff --git a/libbpf-tools/Makefile b/libbpf-tools/Makefile
index 6bf1ed08..81a6faaa 100644
--- a/libbpf-tools/Makefile
+++ b/libbpf-tools/Makefile
@@ -7,6 +7,7 @@ LIBBPF_SRC := $(abspath ../src/cc/libbpf/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
INCLUDES := -I$(OUTPUT) -I../src/cc/libbpf/include/uapi
CFLAGS := -g -O2 -Wall
+BPFCFLAGS := -g -O2 -Wall
INSTALL ?= install
prefix ?= /usr/local
ARCH := $(shell uname -m | sed 's/x86_64/x86/' | sed 's/aarch64/arm64/' | sed 's/ppc64le/powerpc/' | sed 's/mips.*/mips/')
@@ -42,6 +43,7 @@ APPS = \
mountsnoop \
numamove \
offcputime \
+ oomkill \
opensnoop \
readahead \
runqlat \
@@ -54,6 +56,7 @@ APPS = \
tcpconnect \
tcpconnlat \
tcprtt \
+ tcpsynbl \
vfsstat \
#
@@ -106,7 +109,7 @@ $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT)
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(ARCH)/vmlinux.h | $(OUTPUT)
$(call msg,BPF,$@)
- $(Q)$(CLANG) $(CFLAGS) -target bpf -D__TARGET_ARCH_$(ARCH) \
+ $(Q)$(CLANG) $(BPFCFLAGS) -target bpf -D__TARGET_ARCH_$(ARCH) \
-I$(ARCH)/ $(INCLUDES) -c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
@@ -114,7 +117,7 @@ $(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(ARCH)/vmlinux.h | $(O
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch]) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
- OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
+ OBJDIR=$(dir $@)libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install
diff --git a/libbpf-tools/android/argp.cpp b/libbpf-tools/android/argp.cpp
index 5c39f79f..6c1b22fe 100644
--- a/libbpf-tools/android/argp.cpp
+++ b/libbpf-tools/android/argp.cpp
@@ -61,7 +61,7 @@ extern "C" error_t argp_parse(const struct argp *argp, int argc, char **argv, in
// Handle positional arguments
if (optind < argc) {
for (int idx = optind; idx < argc; idx++) {
- struct argp_state state = { .input = input, .argp = argp, .arg_num = idx };
+ struct argp_state state = { .input = input, .argp = argp, .arg_num = idx - optind };
const error_t ret = argp->parser(ARGP_KEY_ARG, argv[idx], &state);
if (ret) return ret;
}
diff --git a/libbpf-tools/bashreadline.c b/libbpf-tools/bashreadline.c
index 2fcb2e2c..0277f535 100644
--- a/libbpf-tools/bashreadline.c
+++ b/libbpf-tools/bashreadline.c
@@ -132,7 +132,7 @@ cleanup:
if (line)
free(line);
if (fp)
- fclose(fp);
+ pclose(fp);
return result;
}
diff --git a/libbpf-tools/bindsnoop.bpf.c b/libbpf-tools/bindsnoop.bpf.c
index bcbfc542..941826c3 100644
--- a/libbpf-tools/bindsnoop.bpf.c
+++ b/libbpf-tools/bindsnoop.bpf.c
@@ -6,7 +6,6 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
#include "bindsnoop.h"
-#include "maps.bpf.h"
#define MAX_ENTRIES 10240
#define MAX_PORTS 1024
diff --git a/libbpf-tools/bindsnoop.h b/libbpf-tools/bindsnoop.h
index 1c881b03..fa7b19de 100644
--- a/libbpf-tools/bindsnoop.h
+++ b/libbpf-tools/bindsnoop.h
@@ -11,8 +11,8 @@ struct bind_event {
__u32 bound_dev_if;
int ret;
__u16 port;
+ __u16 proto;
__u8 opts;
- __u8 proto;
__u8 ver;
char task[TASK_COMM_LEN];
};
diff --git a/libbpf-tools/cachestat.c b/libbpf-tools/cachestat.c
index 05785251..5556cfda 100644
--- a/libbpf-tools/cachestat.c
+++ b/libbpf-tools/cachestat.c
@@ -142,12 +142,31 @@ int main(int argc, char **argv)
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
- obj = cachestat_bpf__open_and_load();
+ obj = cachestat_bpf__open();
if (!obj) {
- fprintf(stderr, "failed to open and/or load BPF object\n");
+ fprintf(stderr, "failed to open BPF object\n");
return 1;
}
+ /**
+ * account_page_dirtied was renamed to folio_account_dirtied
+ * in kernel commit 203a31516616 ("mm/writeback: Add __folio_mark_dirty()")
+ */
+ if (fentry_can_attach("folio_account_dirtied", NULL)) {
+ err = bpf_program__set_attach_target(obj->progs.account_page_dirtied, 0,
+ "folio_account_dirtied");
+ if (err) {
+ fprintf(stderr, "failed to set attach target\n");
+ goto cleanup;
+ }
+ }
+
+ err = cachestat_bpf__load(obj);
+ if (err) {
+ fprintf(stderr, "failed to load BPF object\n");
+ goto cleanup;
+ }
+
if (!obj->bss) {
fprintf(stderr, "Memory-mapping BPF maps is supported starting from Linux 5.7, please upgrade.\n");
goto cleanup;
diff --git a/libbpf-tools/filetop.bpf.c b/libbpf-tools/filetop.bpf.c
index c02a2054..d8b97124 100644
--- a/libbpf-tools/filetop.bpf.c
+++ b/libbpf-tools/filetop.bpf.c
@@ -44,7 +44,8 @@ static int probe_entry(struct pt_regs *ctx, struct file *file, size_t count, enu
if (regular_file_only && !S_ISREG(mode))
return 0;
- key.dev = BPF_CORE_READ(file, f_inode, i_rdev);
+ key.dev = BPF_CORE_READ(file, f_inode, i_sb, s_dev);
+ key.rdev = BPF_CORE_READ(file, f_inode, i_rdev);
key.inode = BPF_CORE_READ(file, f_inode, i_ino);
key.pid = pid;
key.tid = tid;
diff --git a/libbpf-tools/filetop.c b/libbpf-tools/filetop.c
index 70240d85..4e4554e0 100644
--- a/libbpf-tools/filetop.c
+++ b/libbpf-tools/filetop.c
@@ -195,7 +195,7 @@ static int print_stat(struct filetop_bpf *obj)
time(&t);
tm = localtime(&t);
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
- memset(buf, 0 , sizeof(buf));
+ memset(buf, 0, sizeof(buf));
n = fread(buf, 1, sizeof(buf), f);
if (n)
printf("%8s loadavg: %s\n", ts, buf);
diff --git a/libbpf-tools/filetop.h b/libbpf-tools/filetop.h
index 2974ebfd..7ddf3855 100644
--- a/libbpf-tools/filetop.h
+++ b/libbpf-tools/filetop.h
@@ -13,6 +13,7 @@ enum op {
struct file_id {
__u64 inode;
__u32 dev;
+ __u32 rdev;
__u32 pid;
__u32 tid;
};
diff --git a/libbpf-tools/fsdist.c b/libbpf-tools/fsdist.c
index f411d162..88d1a09d 100644
--- a/libbpf-tools/fsdist.c
+++ b/libbpf-tools/fsdist.c
@@ -233,7 +233,7 @@ static bool check_fentry()
for (i = 0; i < MAX_OP; i++) {
fn_name = fs_configs[fs_type].op_funcs[i];
module = fs_configs[fs_type].fs;
- if (fn_name && !fentry_exists(fn_name, module)) {
+ if (fn_name && !fentry_can_attach(fn_name, module)) {
support_fentry = false;
break;
}
diff --git a/libbpf-tools/fsslower.c b/libbpf-tools/fsslower.c
index e96c9efa..820a2019 100644
--- a/libbpf-tools/fsslower.c
+++ b/libbpf-tools/fsslower.c
@@ -201,7 +201,7 @@ static bool check_fentry()
for (i = 0; i < MAX_OP; i++) {
fn_name = fs_configs[fs_type].op_funcs[i];
module = fs_configs[fs_type].fs;
- if (fn_name && !fentry_exists(fn_name, module)) {
+ if (fn_name && !fentry_can_attach(fn_name, module)) {
support_fentry = false;
break;
}
diff --git a/libbpf-tools/klockstat.bpf.c b/libbpf-tools/klockstat.bpf.c
index eddf8b7e..2a5c8e72 100644
--- a/libbpf-tools/klockstat.bpf.c
+++ b/libbpf-tools/klockstat.bpf.c
@@ -107,6 +107,21 @@ static void lock_contended(void *ctx, struct mutex *lock)
bpf_map_update_elem(&lockholder_map, &tl, li, BPF_ANY);
}
+static void lock_aborted(struct mutex *lock)
+{
+ u64 task_id;
+ struct task_lock tl = {};
+
+ if (targ_lock && targ_lock != lock)
+ return;
+ task_id = bpf_get_current_pid_tgid();
+ if (!tracing_task(task_id))
+ return;
+ tl.task_id = task_id;
+ tl.lock_ptr = (u64)lock;
+ bpf_map_delete_elem(&lockholder_map, &tl);
+}
+
static void lock_acquired(struct mutex *lock)
{
u64 task_id;
@@ -220,6 +235,40 @@ int BPF_PROG(mutex_trylock_exit, struct mutex *lock, long ret)
return 0;
}
+SEC("fentry/mutex_lock_interruptible")
+int BPF_PROG(mutex_lock_interruptible, struct mutex *lock)
+{
+ lock_contended(ctx, lock);
+ return 0;
+}
+
+SEC("fexit/mutex_lock_interruptible")
+int BPF_PROG(mutex_lock_interruptible_exit, struct mutex *lock, long ret)
+{
+ if (ret)
+ lock_aborted(lock);
+ else
+ lock_acquired(lock);
+ return 0;
+}
+
+SEC("fentry/mutex_lock_killable")
+int BPF_PROG(mutex_lock_killable, struct mutex *lock)
+{
+ lock_contended(ctx, lock);
+ return 0;
+}
+
+SEC("fexit/mutex_lock_killable")
+int BPF_PROG(mutex_lock_killable_exit, struct mutex *lock, long ret)
+{
+ if (ret)
+ lock_aborted(lock);
+ else
+ lock_acquired(lock);
+ return 0;
+}
+
SEC("fentry/mutex_unlock")
int BPF_PROG(mutex_unlock, struct mutex *lock)
{
diff --git a/libbpf-tools/klockstat.c b/libbpf-tools/klockstat.c
index b1cac634..d3a6facf 100644
--- a/libbpf-tools/klockstat.c
+++ b/libbpf-tools/klockstat.c
@@ -54,6 +54,7 @@ static struct prog_env {
unsigned int iterations;
bool reset;
bool timestamp;
+ bool verbose;
} env = {
.nr_locks = 99999999,
.nr_stack_entries = 1,
@@ -70,7 +71,7 @@ static const char args_doc[] = "FUNCTION";
static const char program_doc[] =
"Trace mutex lock acquisition and hold times, in nsec\n"
"\n"
-"Usage: klockstat [-hRT] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
+"Usage: klockstat [-hRTv] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
" [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]\n"
"\v"
"Examples:\n"
@@ -104,6 +105,7 @@ static const struct argp_option opts[] = {
{ "interval", 'i', "SECONDS", 0, "Print interval" },
{ "reset", 'R', NULL, 0, "Reset stats each interval" },
{ "timestamp", 'T', NULL, 0, "Print timestamp" },
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
{},
@@ -230,6 +232,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
+ case 'v':
+ env->verbose = true;
+ break;
case ARGP_KEY_END:
if (env->duration) {
if (env->interval > env->duration)
@@ -324,7 +329,7 @@ static char *symname(struct ksyms *ksyms, uint64_t pc, char *buf, size_t n)
static void print_acq_header(void)
{
- printf("\n Caller Avg Spin Count Max Spin Total Spin\n");
+ printf("\n Caller Avg Wait Count Max Wait Total Wait\n");
}
static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
@@ -471,6 +476,13 @@ static void sig_hand(int signr)
static struct sigaction sigact = {.sa_handler = sig_hand};
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -494,6 +506,7 @@ int main(int argc, char **argv)
sigaction(SIGINT, &sigact, 0);
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
ksyms = ksyms__load();
if (!ksyms) {
@@ -521,6 +534,21 @@ int main(int argc, char **argv)
obj->rodata->targ_pid = env.tid;
obj->rodata->targ_lock = lock_addr;
+ if (fentry_can_attach("mutex_lock_nested", NULL)) {
+ bpf_program__set_attach_target(obj->progs.mutex_lock, 0,
+ "mutex_lock_nested");
+ bpf_program__set_attach_target(obj->progs.mutex_lock_exit, 0,
+ "mutex_lock_nested");
+ bpf_program__set_attach_target(obj->progs.mutex_lock_interruptible, 0,
+ "mutex_lock_interruptible_nested");
+ bpf_program__set_attach_target(obj->progs.mutex_lock_interruptible_exit, 0,
+ "mutex_lock_interruptible_nested");
+ bpf_program__set_attach_target(obj->progs.mutex_lock_killable, 0,
+ "mutex_lock_killable_nested");
+ bpf_program__set_attach_target(obj->progs.mutex_lock_killable_exit, 0,
+ "mutex_lock_killable_nested");
+ }
+
err = klockstat_bpf__load(obj);
if (err) {
warn("failed to load BPF object\n");
diff --git a/libbpf-tools/oomkill.bpf.c b/libbpf-tools/oomkill.bpf.c
new file mode 100644
index 00000000..25866837
--- /dev/null
+++ b/libbpf-tools/oomkill.bpf.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Jingxiang Zeng
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+
+#include "oomkill.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u32));
+} events SEC(".maps");
+
+SEC("kprobe/oom_kill_process")
+int BPF_KPROBE(oom_kill_process, struct oom_control *oc, const char *message)
+{
+ struct data_t data;
+
+ data.fpid = bpf_get_current_pid_tgid() >> 32;
+ data.tpid = BPF_CORE_READ(oc, chosen, tgid);
+ data.pages = BPF_CORE_READ(oc, totalpages);
+ bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm));
+ bpf_probe_read_kernel(&data.tcomm, sizeof(data.tcomm), BPF_CORE_READ(oc, chosen, comm));
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &data, sizeof(data));
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/oomkill.c b/libbpf-tools/oomkill.c
new file mode 100644
index 00000000..92976b8c
--- /dev/null
+++ b/libbpf-tools/oomkill.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2022 Jingxiang Zeng
+//
+// Based on oomkill(8) from BCC by Brendan Gregg.
+// 13-Jan-2022 Jingxiang Zeng Created this.
+#include <argp.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "oomkill.skel.h"
+#include "oomkill.h"
+#include "trace_helpers.h"
+
+#define PERF_POLL_TIMEOUT_MS 100
+
+static volatile sig_atomic_t exiting = 0;
+
+static bool verbose = false;
+
+const char *argp_program_version = "oomkill 0.1";
+const char *argp_program_bug_address =
+ "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Trace OOM kills.\n"
+"\n"
+"USAGE: oomkill [-h]\n"
+"\n"
+"EXAMPLES:\n"
+" oomkill # trace OOM kills\n";
+
+static const struct argp_option opts[] = {
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
+ { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case 'v':
+ verbose = true;
+ break;
+ case 'h':
+ argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
+{
+ FILE *f;
+ char buf[256];
+ int n = 0;
+ struct tm *tm;
+ char ts[32];
+ time_t t;
+ struct data_t *e = data;
+
+ f = fopen("/proc/loadavg", "r");
+ if (f) {
+ memset(buf, 0, sizeof(buf));
+ n = fread(buf, 1, sizeof(buf), f);
+ fclose(f);
+ }
+ time(&t);
+ tm = localtime(&t);
+ strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+
+ if (n)
+ printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages, loadavg: %s\n",
+ ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages, buf);
+ else
+ printf("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\"), %lld pages\n",
+ ts, e->fpid, e->fcomm, e->tpid, e->tcomm, e->pages);
+}
+
+static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
+{
+ printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
+}
+
+static void sig_int(int signo)
+{
+ exiting = 1;
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+int main(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ };
+ struct perf_buffer *pb = NULL;
+ struct oomkill_bpf *obj;
+ int err;
+
+ err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+ if (err)
+ return err;
+
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ obj = oomkill_bpf__open_and_load();
+ if (!obj) {
+ fprintf(stderr, "failed to load and open BPF object\n");
+ return 1;
+ }
+
+ err = oomkill_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs\n");
+ goto cleanup;
+ }
+
+ pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
+ handle_event, handle_lost_events, NULL, NULL);
+ if (!pb) {
+ err = -errno;
+ fprintf(stderr, "failed to open perf buffer: %d\n", err);
+ goto cleanup;
+ }
+
+ if (signal(SIGINT, sig_int) == SIG_ERR) {
+ fprintf(stderr, "can't set signal handler: %d\n", err);
+ err = 1;
+ goto cleanup;
+ }
+
+ printf("Tracing OOM kills... Ctrl-C to stop.\n");
+
+ while (!exiting) {
+ err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
+ if (err < 0 && err != -EINTR) {
+ fprintf(stderr, "error polling perf buffer: %d\n", err);
+ goto cleanup;
+ }
+ /* reset err to return 0 if exiting */
+ err = 0;
+ }
+
+cleanup:
+ perf_buffer__free(pb);
+ oomkill_bpf__destroy(obj);
+
+ return err != 0;
+}
diff --git a/libbpf-tools/oomkill.h b/libbpf-tools/oomkill.h
new file mode 100644
index 00000000..086099d5
--- /dev/null
+++ b/libbpf-tools/oomkill.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __OOMKILL_H
+#define __OOMKILL_H
+
+#define TASK_COMM_LEN 16
+
+struct data_t {
+ __u32 fpid;
+ __u32 tpid;
+ __u64 pages;
+ char fcomm[TASK_COMM_LEN];
+ char tcomm[TASK_COMM_LEN];
+};
+
+#endif /* __OOMKILL_H */
diff --git a/libbpf-tools/opensnoop.bpf.c b/libbpf-tools/opensnoop.bpf.c
index e378dcc2..e28131a1 100644
--- a/libbpf-tools/opensnoop.bpf.c
+++ b/libbpf-tools/opensnoop.bpf.c
@@ -5,9 +5,6 @@
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"
-#define TASK_RUNNING 0
-
-const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
const volatile pid_t targ_tgid = 0;
const volatile uid_t targ_uid = 0;
diff --git a/libbpf-tools/runqlen.c b/libbpf-tools/runqlen.c
index 9cbbc739..8c776936 100644
--- a/libbpf-tools/runqlen.c
+++ b/libbpf-tools/runqlen.c
@@ -29,7 +29,7 @@ struct env {
bool runqocc;
bool timestamp;
time_t interval;
- bool freq;
+ int freq;
int times;
bool verbose;
} env = {
@@ -171,12 +171,13 @@ static struct hist zero;
static void print_runq_occupancy(struct runqlen_bpf__bss *bss)
{
- __u64 samples, idle = 0, queued = 0;
struct hist hist;
int slot, i = 0;
float runqocc;
do {
+ __u64 samples, idle = 0, queued = 0;
+
hist = bss->hists[i];
bss->hists[i] = zero;
for (slot = 0; slot < MAX_SLOTS; slot++) {
diff --git a/libbpf-tools/softirqs.c b/libbpf-tools/softirqs.c
index 34cfdb77..833bc1a5 100644
--- a/libbpf-tools/softirqs.c
+++ b/libbpf-tools/softirqs.c
@@ -39,10 +39,10 @@ const char argp_program_doc[] =
"USAGE: softirqs [--help] [-T] [-N] [-d] [interval] [count]\n"
"\n"
"EXAMPLES:\n"
-" softirqss # sum soft irq event time\n"
-" softirqss -d # show soft irq event time as histograms\n"
-" softirqss 1 10 # print 1 second summaries, 10 times\n"
-" softirqss -NT 1 # 1s summaries, nanoseconds, and timestamps\n";
+" softirqs # sum soft irq event time\n"
+" softirqs -d # show soft irq event time as histograms\n"
+" softirqs 1 10 # print 1 second summaries, 10 times\n"
+" softirqs -NT 1 # 1s summaries, nanoseconds, and timestamps\n";
static const struct argp_option opts[] = {
{ "distributed", 'd', NULL, 0, "Show distributions as histograms" },
diff --git a/libbpf-tools/solisten.c b/libbpf-tools/solisten.c
index adaa668d..02f1ee54 100644
--- a/libbpf-tools/solisten.c
+++ b/libbpf-tools/solisten.c
@@ -156,7 +156,7 @@ int main(int argc, char **argv)
obj->rodata->target_pid = target_pid;
- if (fentry_exists("inet_listen", NULL)) {
+ if (fentry_can_attach("inet_listen", NULL)) {
bpf_program__set_autoload(obj->progs.inet_listen_entry, false);
bpf_program__set_autoload(obj->progs.inet_listen_exit, false);
} else {
diff --git a/libbpf-tools/tcprtt.c b/libbpf-tools/tcprtt.c
index bed6efa7..cfc0ed53 100644
--- a/libbpf-tools/tcprtt.c
+++ b/libbpf-tools/tcprtt.c
@@ -243,7 +243,7 @@ int main(int argc, char **argv)
obj->rodata->targ_daddr = env.raddr;
obj->rodata->targ_ms = env.milliseconds;
- if (fentry_exists("tcp_rcv_established", NULL))
+ if (fentry_can_attach("tcp_rcv_established", NULL))
bpf_program__set_autoload(obj->progs.tcp_rcv_kprobe, false);
else
bpf_program__set_autoload(obj->progs.tcp_rcv, false);
diff --git a/libbpf-tools/tcpsynbl.bpf.c b/libbpf-tools/tcpsynbl.bpf.c
new file mode 100644
index 00000000..c7d47faa
--- /dev/null
+++ b/libbpf-tools/tcpsynbl.bpf.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2021 Yaqi Chen
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+#include "tcpsynbl.h"
+#include "bits.bpf.h"
+#include "maps.bpf.h"
+
+#define MAX_ENTRIES 65536
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, u64);
+ __type(value, struct hist);
+} hists SEC(".maps");
+
+static struct hist zero;
+
+static int do_entry(struct sock *sk)
+{
+ u64 max_backlog, backlog, slot;
+ struct hist *histp;
+
+ max_backlog = BPF_CORE_READ(sk, sk_max_ack_backlog);
+ backlog = BPF_CORE_READ(sk, sk_ack_backlog);
+ histp = bpf_map_lookup_or_try_init(&hists, &max_backlog, &zero);
+ if (!histp)
+ return 0;
+
+ slot = log2l(backlog);
+ if (slot >= MAX_SLOTS)
+ slot = MAX_SLOTS - 1;
+ __sync_fetch_and_add(&histp->slots[slot], 1);
+ return 0;
+}
+
+
+SEC("kprobe/tcp_v4_syn_recv_sock")
+int BPF_KPROBE(tcp_v4_syn_recv_kprobe, struct sock *sk)
+{
+ return do_entry(sk);
+}
+
+SEC("kprobe/tcp_v6_syn_recv_sock")
+int BPF_KPROBE(tcp_v6_syn_recv_kprobe, struct sock *sk)
+{
+ return do_entry(sk);
+}
+
+SEC("fentry/tcp_v4_syn_recv_sock")
+int BPF_PROG(tcp_v4_syn_recv, struct sock *sk)
+{
+ return do_entry(sk);
+}
+
+SEC("fentry/tcp_v6_syn_recv_sock")
+int BPF_PROG(tcp_v6_syn_recv, struct sock *sk)
+{
+ return do_entry(sk);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/libbpf-tools/tcpsynbl.c b/libbpf-tools/tcpsynbl.c
new file mode 100644
index 00000000..188a2af0
--- /dev/null
+++ b/libbpf-tools/tcpsynbl.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2021 Yaqi Chen
+//
+// Based on tcpsynbl(8) from BCC by Brendan Gregg.
+// 19-Dec-2021 Yaqi Chen Created this.
+#include <argp.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include "tcpsynbl.h"
+#include "tcpsynbl.skel.h"
+#include "trace_helpers.h"
+
+static struct env {
+ bool ipv4;
+ bool ipv6;
+ time_t interval;
+ int times;
+ bool timestamp;
+ bool verbose;
+} env = {
+ .interval = 99999999,
+ .times = 99999999,
+};
+
+static volatile sig_atomic_t exiting = 0;
+
+const char *argp_program_version = "tcpsynbl 0.1";
+const char *argp_program_bug_address =
+ "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
+const char argp_program_doc[] =
+"Summarize TCP SYN backlog as a histogram.\n"
+"\n"
+"USAGE: tcpsynbl [--help] [-T] [-4] [-6] [interval] [count]\n"
+"\n"
+"EXAMPLES:\n"
+" tcpsynbl # summarize TCP SYN backlog as a histogram\n"
+" tcpsynbl 1 10 # print 1 second summaries, 10 times\n"
+" tcpsynbl -T 1 # 1s summaries with timestamps\n"
+" tcpsynbl -4 # trace IPv4 family only\n"
+" tcpsynbl -6 # trace IPv6 family only\n";
+
+
+static const struct argp_option opts[] = {
+ { "timestamp", 'T', NULL, 0, "Include timestamp on output" },
+ { "ipv4", '4', NULL, 0, "Trace IPv4 family only" },
+ { "ipv6", '6', NULL, 0, "Trace IPv6 family only" },
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
+ { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+ {},
+};
+
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ static int pos_args;
+
+ switch (key) {
+ case 'h':
+ argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+ break;
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'T':
+ env.timestamp = true;
+ break;
+ case '4':
+ env.ipv4 = true;
+ break;
+ case '6':
+ env.ipv6 = true;
+ break;
+ case ARGP_KEY_ARG:
+ errno = 0;
+ if (pos_args == 0) {
+ env.interval = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid internal\n");
+ argp_usage(state);
+ }
+ } else if (pos_args == 1) {
+ env.times = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid times\n");
+ argp_usage(state);
+ }
+ } else {
+ fprintf(stderr,
+ "unrecognized positional argument: %s\n", arg);
+ argp_usage(state);
+ }
+ pos_args++;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static void sig_handler(int sig)
+{
+ exiting = true;
+}
+
+static void disable_all_progs(struct tcpsynbl_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.tcp_v4_syn_recv_kprobe, false);
+ bpf_program__set_autoload(obj->progs.tcp_v6_syn_recv_kprobe, false);
+ bpf_program__set_autoload(obj->progs.tcp_v4_syn_recv, false);
+ bpf_program__set_autoload(obj->progs.tcp_v6_syn_recv, false);
+}
+
+static void set_autoload_prog(struct tcpsynbl_bpf *obj, int version)
+{
+ if (version == 4) {
+ if (fentry_can_attach("tcp_v4_syn_recv_sock", NULL))
+ bpf_program__set_autoload(obj->progs.tcp_v4_syn_recv, true);
+ else
+ bpf_program__set_autoload(obj->progs.tcp_v4_syn_recv_kprobe, true);
+ }
+
+ if (version == 6){
+ if (fentry_can_attach("tcp_v6_syn_recv_sock", NULL))
+ bpf_program__set_autoload(obj->progs.tcp_v6_syn_recv, true);
+ else
+ bpf_program__set_autoload(obj->progs.tcp_v6_syn_recv_kprobe, true);
+ }
+}
+
+static int print_log2_hists(int fd)
+{
+ __u64 lookup_key = -1, next_key;
+ struct hist hist;
+ int err;
+
+ while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
+ err = bpf_map_lookup_elem(fd, &next_key, &hist);
+ if (err < 0) {
+ fprintf(stderr, "failed to lookup hist: %d\n", err);
+ return -1;
+ }
+ printf("backlog_max = %lld\n", next_key);
+ print_log2_hist(hist.slots, MAX_SLOTS, "backlog");
+ lookup_key = next_key;
+ }
+
+ lookup_key = -1;
+ while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
+ err = bpf_map_delete_elem(fd, &next_key);
+ if (err < 0) {
+ fprintf(stderr, "failed to cleanup hist : %d\n", err);
+ return -1;
+ }
+ lookup_key = next_key;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc
+ };
+
+ struct tcpsynbl_bpf *obj;
+ struct tm *tm;
+ char ts[32];
+ time_t t;
+ int err, map_fd;
+
+ err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+ if (err)
+ return err;
+
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ obj = tcpsynbl_bpf__open();
+ if (!obj) {
+ fprintf(stderr, "failed to open BPF object\n");
+ return 1;
+ }
+
+ disable_all_progs(obj);
+
+ if (env.ipv4) {
+ set_autoload_prog(obj, 4);
+ } else if (env.ipv6) {
+ set_autoload_prog(obj, 6);
+ } else {
+ set_autoload_prog(obj, 4);
+ set_autoload_prog(obj, 6);
+ }
+
+ err = tcpsynbl_bpf__load(obj);
+ if (err) {
+ fprintf(stderr, "failed to load BPF object: %d\n", err);
+ goto cleanup;
+ }
+
+ err = tcpsynbl_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs\n");
+ goto cleanup;
+ }
+
+ map_fd= bpf_map__fd(obj->maps.hists);
+
+ signal(SIGINT, sig_handler);
+
+ printf("Tracing SYN backlog size. Ctrl-C to end.\n");
+
+ /* main: poll */
+ while (1) {
+ sleep(env.interval);
+ printf("\n");
+
+ if (env.timestamp) {
+ time(&t);
+ tm = localtime(&t);
+ strftime(ts, sizeof(ts), "%H:%M:%S", tm);
+ printf("%-8s\n", ts);
+ }
+
+ err = print_log2_hists(map_fd);
+ if (err)
+ break;
+
+ if (exiting || --env.times == 0)
+ break;
+ }
+
+cleanup:
+ tcpsynbl_bpf__destroy(obj);
+ return err != 0;
+}
diff --git a/libbpf-tools/tcpsynbl.h b/libbpf-tools/tcpsynbl.h
new file mode 100644
index 00000000..6c22abb2
--- /dev/null
+++ b/libbpf-tools/tcpsynbl.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __TCPSYNBL_H
+#define __TCPSYNBL_H
+
+#define MAX_SLOTS 32
+
+struct hist {
+ __u32 slots[MAX_SLOTS];
+};
+
+#endif /* __TCPSYNBL_H */
diff --git a/libbpf-tools/trace_helpers.c b/libbpf-tools/trace_helpers.c
index 322b3c4f..9165be42 100644
--- a/libbpf-tools/trace_helpers.c
+++ b/libbpf-tools/trace_helpers.c
@@ -15,6 +15,7 @@
#include <fcntl.h>
#include <sys/resource.h>
#include <time.h>
+#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
#include <limits.h>
@@ -990,14 +991,33 @@ bool is_kernel_module(const char *name)
return found;
}
-bool fentry_exists(const char *name, const char *mod)
+static bool fentry_try_attach(int id)
+{
+ struct bpf_insn insns[] = { { .code = BPF_JMP | BPF_EXIT } };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int prog_fd, attach_fd;
+
+ opts.expected_attach_type = BPF_TRACE_FENTRY;
+ opts.attach_btf_id = id,
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACING, "test", NULL, insns, 1, &opts);
+ if (prog_fd < 0)
+ return false;
+
+ attach_fd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ if (attach_fd >= 0)
+ close(attach_fd);
+
+ close(prog_fd);
+ return attach_fd >= 0;
+}
+
+bool fentry_can_attach(const char *name, const char *mod)
{
const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux";
struct btf *base, *btf = NULL;
- const struct btf_type *type;
- const struct btf_enum *e;
char sysfs_mod[80];
- int id = -1, i, err;
+ int id = -1, err;
base = btf__parse(sysfs_vmlinux, NULL);
if (!base) {
@@ -1021,28 +1041,12 @@ bool fentry_exists(const char *name, const char *mod)
base = NULL;
}
- id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM);
- if (id < 0)
- goto err_out;
- type = btf__type_by_id(btf, id);
-
- /*
- * As kernel BTF is exposed starting from 5.4 kernel, but fentry/fexit
- * is actually supported starting from 5.5, so that's check this gap
- * first, then check if target func has btf type.
- */
- for (id = -1, i = 0, e = btf_enum(type); i < btf_vlen(type); i++, e++) {
- if (!strcmp(btf__name_by_offset(btf, e->name_off),
- "BPF_TRACE_FENTRY")) {
- id = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
- break;
- }
- }
+ id = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
err_out:
btf__free(btf);
btf__free(base);
- return id > 0;
+ return id > 0 && fentry_try_attach(id);
}
bool kprobe_exists(const char *name)
diff --git a/libbpf-tools/trace_helpers.h b/libbpf-tools/trace_helpers.h
index 98fd640f..d68d2468 100644
--- a/libbpf-tools/trace_helpers.h
+++ b/libbpf-tools/trace_helpers.h
@@ -77,7 +77,7 @@ bool is_kernel_module(const char *name);
* *mod* is a hint that indicates the *name* may reside in module BTF,
* if NULL, it means *name* belongs to vmlinux.
*/
-bool fentry_exists(const char *name, const char *mod);
+bool fentry_can_attach(const char *name, const char *mod);
/*
* The name of a kernel function to be attached to may be changed between
diff --git a/libbpf-tools/vfsstat.c b/libbpf-tools/vfsstat.c
index 5519c366..3cba0b01 100644
--- a/libbpf-tools/vfsstat.c
+++ b/libbpf-tools/vfsstat.c
@@ -160,7 +160,7 @@ int main(int argc, char **argv)
}
/* It fallbacks to kprobes when kernel does not support fentry. */
- if (vmlinux_btf_exists() && fentry_exists("vfs_read", NULL)) {
+ if (vmlinux_btf_exists() && fentry_can_attach("vfs_read", NULL)) {
bpf_program__set_autoload(skel->progs.kprobe_vfs_read, false);
bpf_program__set_autoload(skel->progs.kprobe_vfs_write, false);
bpf_program__set_autoload(skel->progs.kprobe_vfs_fsync, false);
diff --git a/man/man8/biopattern.8 b/man/man8/biopattern.8
new file mode 100644
index 00000000..451d667f
--- /dev/null
+++ b/man/man8/biopattern.8
@@ -0,0 +1,78 @@
+.TH biopattern 8 "2022-02-21" "USER COMMANDS"
+.SH NAME
+biopattern \- Identify random/sequential disk access patterns.
+.SH SYNOPSIS
+.B biopattern [\-h] [\-d DISK] [interval] [count]
+.SH DESCRIPTION
+This traces block device I/O (disk I/O), and prints ratio of random/sequential I/O
+for each disk or the specified disk either on Ctrl-C, or after a given interval in seconds.
+
+This works by tracing kernel tracepoint block:block_rq_complete.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Show help message and exit.
+.TP
+\-d
+Trace this disk only.
+.TP
+interval
+Print output every interval seconds, if any.
+.TP
+count
+Number of interval summaries.
+.SH EXAMPLES
+.TP
+Trace access patterns of all disks, and print a summary on Ctrl-C:
+#
+.B biopattern
+.TP
+Trace disk sdb only:
+#
+.B biopattern -d sdb
+.TP
+Print 1 second summaries, 10 times:
+#
+.B biopattern 1 10
+.SH FIELDS
+.TP
+TIME
+Time of the output, in HH:MM:SS format.
+.TP
+DISK
+Disk device name.
+.TP
+%RND
+Ratio of random I/O.
+.TP
+%SEQ
+Ratio of sequential I/O.
+.TP
+COUNT
+Number of I/O during the interval.
+.TP
+KBYTES
+Total Kbytes for these I/O, during the interval.
+.SH OVERHEAD
+Since block device I/O usually has a relatively low frequency (< 10,000/s),
+the overhead for this tool is expected to be low or negligible. For high IOPS
+storage systems, test and quantify before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Rocky Xing
+.SH SEE ALSO
+biosnoop(8), biolatency(8), iostat(1)
diff --git a/man/man8/biotop.8 b/man/man8/biotop.8
index ed25521f..47392bc7 100644
--- a/man/man8/biotop.8
+++ b/man/man8/biotop.8
@@ -2,7 +2,7 @@
.SH NAME
biotop \- Block device (disk) I/O by process top.
.SH SYNOPSIS
-.B biotop [\-h] [\-C] [\-r MAXROWS] [interval] [count]
+.B biotop [\-h] [\-C] [\-r MAXROWS] [\-p PID] [interval] [count]
.SH DESCRIPTION
This is top for disks.
@@ -30,6 +30,9 @@ Don't clear the screen.
\-r MAXROWS
Maximum number of rows to print. Default is 20.
.TP
+\-p PID
+Trace this PID only.
+.TP
interval
Interval between updates, seconds.
.TP
@@ -98,7 +101,7 @@ Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
-Brendan Gregg
+Brendan Gregg, Rocky Xing
.SH INSPIRATION
top(1) by William LeFebvre
.SH SEE ALSO
diff --git a/man/man8/cachetop.8 b/man/man8/cachetop.8
index 5642fa1d..f6d1ea3a 100644
--- a/man/man8/cachetop.8
+++ b/man/man8/cachetop.8
@@ -2,7 +2,7 @@
.SH NAME
cachetop \- Statistics for linux page cache hit/miss ratios per processes. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B cachetop
+.B cachetop [\-p PID]
[interval]
.SH DESCRIPTION
This traces four kernel functions and prints per-processes summaries every
@@ -15,6 +15,10 @@ need updating to match any changes to these functions. Edit the script to
customize which functions are traced.
Since this uses BPF, only the root user can use this tool.
+.SH OPTIONS
+.TP
+\-p PID
+Trace this PID only.
.SH KEYBINDINGS
The following keybindings can be used to control the output of \fBcachetop\fR.
.TP
@@ -86,6 +90,6 @@ Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
-Emmanuel Bretelle
+Emmanuel Bretelle, Rocky Xing
.SH SEE ALSO
cachestat (8)
diff --git a/man/man8/cpudist.8 b/man/man8/cpudist.8
index b5179102..b59346ba 100644
--- a/man/man8/cpudist.8
+++ b/man/man8/cpudist.8
@@ -2,7 +2,7 @@
.SH NAME
cpudist \- On- and off-CPU task time as a histogram.
.SH SYNOPSIS
-.B cpudist [\-h] [-O] [\-T] [\-m] [\-P] [\-L] [\-p PID] [interval] [count]
+.B cpudist [\-h] [-O] [\-T] [\-m] [\-P] [\-L] [\-p PID] [\-I] [interval] [count]
.SH DESCRIPTION
This measures the time a task spends on the CPU before being descheduled, and
shows the times as a histogram. Tasks that spend a very short time on the CPU
@@ -15,6 +15,8 @@ is scheduled again. This can be helpful in identifying long blocking and I/O
operations, or alternatively very short descheduling times due to short-lived
locks or timers.
+By default CPU idle time are excluded by simply excluding PID 0.
+
This tool uses in-kernel eBPF maps for storing timestamps and the histogram,
for efficiency. Despite this, the overhead of this tool may become significant
for some workloads: see the OVERHEAD section.
@@ -45,6 +47,9 @@ Print a histogram for each TID (pid from the kernel's perspective).
\-p PID
Only show this PID (filtered in kernel for efficiency).
.TP
+\-I
+Include CPU idle time (by default these are excluded).
+.TP
interval
Output interval, in seconds.
.TP
@@ -71,6 +76,10 @@ Print 1 second summaries, using milliseconds as units for the histogram, and inc
Trace PID 185 only, 1 second summaries:
#
.B cpudist -p 185 1
+.TP
+Include CPU idle time:
+#
+.B cpudist -I
.SH FIELDS
.TP
usecs
diff --git a/man/man8/hardirqs.8 b/man/man8/hardirqs.8
index 12ae6be5..aa9afb84 100644
--- a/man/man8/hardirqs.8
+++ b/man/man8/hardirqs.8
@@ -33,6 +33,9 @@ Count events only.
.TP
\-d
Show IRQ time distribution as histograms.
+.TP
+\-c CPU
+Trace on this CPU only.
.SH EXAMPLES
.TP
Sum hard IRQ event time until Ctrl-C:
@@ -50,6 +53,10 @@ Print 1 second summaries, 10 times:
1 second summaries, printed in nanoseconds, with timestamps:
#
.B hardirqs \-NT 1
+.TP
+Sum hard IRQ event time on CPU 1 until Ctrl-C:
+#
+.B hardirqs \-c 1
.SH FIELDS
.TP
HARDIRQ
@@ -91,6 +98,6 @@ Linux
.SH STABILITY
Unstable - in development.
.SH AUTHOR
-Brendan Gregg, Hengqi Chen
+Brendan Gregg, Hengqi Chen, Rocky Xing
.SH SEE ALSO
softirqs(8)
diff --git a/man/man8/softirqs.8 b/man/man8/softirqs.8
index a9a14414..fa475f78 100644
--- a/man/man8/softirqs.8
+++ b/man/man8/softirqs.8
@@ -2,7 +2,7 @@
.SH NAME
softirqs \- Measure soft IRQ (soft interrupt) event time. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B softirqs [\-h] [\-T] [\-N] [\-d] [interval] [count]
+.B softirqs [\-h] [\-T] [\-N] [\-C] [\-d] [\-c CPU] [interval] [count]
.SH DESCRIPTION
This summarizes the time spent servicing soft IRQs (soft interrupts), and can
show this time as either totals or histogram distributions. A system-wide
@@ -26,16 +26,26 @@ Print usage message.
Include timestamps on output.
.TP
\-N
-Output in nanoseconds
+Output in nanoseconds.
+.TP
+\-C
+Show the number of soft irq events.
.TP
\-d
-Show IRQ time distribution as histograms
+Show IRQ time distribution as histograms.
+.TP
+\-c CPU
+Trace on this CPU only.
.SH EXAMPLES
.TP
Sum soft IRQ event time until Ctrl-C:
#
.B softirqs
.TP
+Show the number of soft irq events:
+#
+.B softirqs \-C
+.TP
Show soft IRQ event time as histograms:
#
.B softirqs \-d
@@ -47,6 +57,10 @@ Print 1 second summaries, 10 times:
1 second summaries, printed in nanoseconds, with timestamps:
#
.B softirqs \-NT 1
+.TP
+Sum soft IRQ event time on CPU 1 until Ctrl-C:
+#
+.B softirqs \-c 1
.SH FIELDS
.TP
SOFTIRQ
@@ -88,6 +102,6 @@ Linux
.SH STABILITY
Unstable - in development.
.SH AUTHORS
-Brendan Gregg, Sasha Goldshtein
+Brendan Gregg, Sasha Goldshtein, Rocky Xing
.SH SEE ALSO
hardirqs(8)
diff --git a/man/man8/sslsniff.8 b/man/man8/sslsniff.8
index df81664b..4b80191a 100644
--- a/man/man8/sslsniff.8
+++ b/man/man8/sslsniff.8
@@ -3,7 +3,8 @@
sslsniff \- Print data passed to OpenSSL, GnuTLS or NSS. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B sslsniff [-h] [-p PID] [-u UID] [-x] [-c COMM] [-o] [-g] [-n] [-d]
-.B [--hexdump] [--max-buffer-size SIZE]
+.B [--hexdump] [--max-buffer-size SIZE] [-l] [--handshake]
+.B [--extra-lib EXTRA_LIB]
.SH DESCRIPTION
sslsniff prints data sent to write/send and read/recv functions of
OpenSSL, GnuTLS and NSS, allowing us to read plain text content before
@@ -46,6 +47,16 @@ Show data as hexdump instead of trying to decode it as UTF-8
\-\-max-buffer-size SIZE
Sets maximum buffer size of intercepted data. Longer values would be truncated.
Default value is 8 Kib, maximum possible value is a bit less than 32 Kib.
+.TP
+\-l, \-\-latency
+Show function latency in ms.
+.TP
+\--handshake
+Show handshake latency, enabled only if latency option is on.
+.TP
+\--extra-lib EXTRA_LIB
+Consist type of the library and library path separated by colon. Supported
+library types are: openssl, gnutls, nss. Can be specified multiple times.
.SH EXAMPLES
.TP
Print all calls to SSL write/send and read/recv system-wide:
@@ -55,6 +66,14 @@ Print all calls to SSL write/send and read/recv system-wide:
Print only OpenSSL calls issued by user with UID 1000
#
.B sslsniff -u 1000 --no-nss --no-gnutls
+.TP
+Print SSL handshake event and latency for all traced functions:
+#
+.B sslsniff -l --handshake
+.TP
+Print only calls to OpenSSL from /some/path/libssl.so
+.B sslsniff --no-openssl --no-gnutls --no-nss --extra-lib
+.B openssl:/some/path/libssl.so
.SH FIELDS
.TP
FUNC
@@ -77,6 +96,9 @@ UID of the process, displayed only if launched with -x.
.TP
TID
Thread ID, displayed only if launched with -x.
+.TP
+LAT(ms)
+Function latency in ms.
.SH SOURCE
This is from bcc.
.IP
diff --git a/man/man8/tcpcong.8 b/man/man8/tcpcong.8
new file mode 100644
index 00000000..877ed805
--- /dev/null
+++ b/man/man8/tcpcong.8
@@ -0,0 +1,136 @@
+.TH tcpcong 8 "2022-01-27" "USER COMMANDS"
+.SH NAME
+tcpcong \- Measure tcp congestion state duration. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B tcpcong [\-h] [\-T] [\-L] [\-R] [\-u] [\-d] [interval] [outputs]
+.SH DESCRIPTION
+this tool measures tcp sockets congestion control status duration, and
+prints a summary of tcp congestion state durations along with the number
+of total state changes.
+
+It uses dynamic tracing of kernel tcp congestion control status
+updating functions, and will need to be updated to match kernel changes.
+
+The traced functions are only called when there is congestion state update,
+and therefore have low overhead. we also use BPF map to store traced data
+to reduce overhead. See the OVERHEAD section for more details.
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-T
+Include a timestamp column.
+.TP
+\-L
+Specify local tcp port range.
+.TP
+\-R
+Specify remote tcp port range.
+.TP
+\-u
+Output in microseconds.
+.TP
+\-d
+Show congestion status duration distribution as histograms.
+.SH EXAMPLES
+.TP
+Show all tcp sockets congestion status duration until Ctrl-C:
+#
+.B tcpcongestdura
+.TP
+Show all tcp sockets congestion status duration every 1 second and 10 times:
+#
+.B tcpcong 1 10
+.TP
+Show only local port 3000-3006 congestion status duration every 1 second:
+#
+.B tcpcong \-L 3000-3006 1
+.TP
+Show only remote port 5000-5005 congestion status duration every 1 second:
+#
+.B tcpcong \-R 5000-5005 1
+.TP
+Show 1 second summaries, printed in microseconds, with timestamps:
+#
+.B tcpcong \-uT 1
+.TP
+Show all tcp sockets congestion status duration as histograms:
+#
+.B tcpcong \-d
+.SH FIELDS
+.TP
+LAddrPort
+local ip address and tcp socket port.
+.TP
+RAddrPort
+remote ip address and tcp socket port.
+.TP
+Open_us
+Total duration in open status for microseconds.
+.TP
+Dod_us
+Total duration in disorder status for microseconds.
+.TP
+Rcov_us
+Total duration in recovery status for microseconds.
+.TP
+Cwr_us
+Total duration in cwr status for microseconds.
+.TP
+Los_us
+Total duration in loss status for microseconds.
+.TP
+Open_ms
+Total duration in open status for milliseconds.
+.TP
+Dod_ms
+Total duration in disorder status for milliseconds.
+.TP
+Rcov_ms
+Total duration in recovery status for milliseconds.
+.TP
+Cwr_ms
+Total duration in cwr status for milliseconds.
+.TP
+Loss_ms
+Total duration in loss status for milliseconds.
+.TP
+Chgs
+Total number of status change.
+.TP
+usecs
+Range of microseconds for this bucket.
+.TP
+msecs
+Range of milliseconds for this bucket.
+.TP
+count
+Number of congestion status in this time range.
+.TP
+distribution
+ASCII representation of the distribution (the count column).
+.SH OVERHEAD
+This traces the kernel tcp congestion status change functions.
+As called rate per second of these functions per socket is low(<10000), the
+overhead is also expected to be negligible. If you have an application that
+will create thousands of tcp connections, then test and understand overhead
+before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+jacky gan
+.SH SEE ALSO
+tcpretrans(8), tcpconnect(8), tcptop(8), tcpdrop(8)
diff --git a/man/man8/trace.8 b/man/man8/trace.8
index 7afd2527..acfff58f 100644
--- a/man/man8/trace.8
+++ b/man/man8/trace.8
@@ -3,7 +3,7 @@
trace \- Trace a function and print its arguments or return value, optionally evaluating a filter. Uses Linux eBPF/bcc.
.SH SYNOPSIS
.B trace [-h] [-b BUFFER_PAGES] [-p PID] [-L TID] [--uid UID] [-v] [-Z STRING_SIZE] [-S] [-s SYM_FILE_LIST]
- [-M MAX_EVENTS] [-t] [-u] [-T] [-C] [-K] [-U] [-a] [-I header]
+ [-M MAX_EVENTS] [-t] [-u] [-T] [-C] [-K] [-U] [-a] [-I header] [-A]
probe [probe ...]
.SH DESCRIPTION
trace probes functions you specify and displays trace messages if a particular
@@ -83,6 +83,9 @@ Additional header files to include in the BPF program. This is needed if your
filter or print expressions use types or data structures that are not available
in the standard headers. For example: 'linux/mm.h'
.TP
+\-A
+Print aggregated amount of each trace. This should be used with -M/--max-events together.
+.TP
probe [probe ...]
One or more probes that attach to functions, filter conditions, and print
information. See PROBE SYNTAX below.
diff --git a/src/cc/TEST_MAPPING b/src/cc/TEST_MAPPING
new file mode 100644
index 00000000..90892fe0
--- /dev/null
+++ b/src/cc/TEST_MAPPING
@@ -0,0 +1,7 @@
+{
+ "presubmit": [
+ {
+ "name": "libbpf_load_test"
+ }
+ ]
+}
diff --git a/src/cc/api/BPFTable.cc b/src/cc/api/BPFTable.cc
index 689992b6..23beae37 100644
--- a/src/cc/api/BPFTable.cc
+++ b/src/cc/api/BPFTable.cc
@@ -397,21 +397,21 @@ BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
"' is not a perf buffer");
}
-StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
- perf_reader_lost_cb lost_cb, int cpu,
- void* cb_cookie, int page_cnt) {
- if (cpu_readers_.find(cpu) != cpu_readers_.end())
- return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
+StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb, perf_reader_lost_cb lost_cb,
+ void* cb_cookie, int page_cnt,
+ struct bcc_perf_buffer_opts& opts) {
+ if (cpu_readers_.find(opts.cpu) != cpu_readers_.end())
+ return StatusTuple(-1, "Perf buffer already open on CPU %d", opts.cpu);
auto reader = static_cast<perf_reader*>(
- bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
+ bpf_open_perf_buffer_opts(cb, lost_cb, cb_cookie, page_cnt, &opts));
if (reader == nullptr)
return StatusTuple(-1, "Unable to construct perf reader");
int reader_fd = perf_reader_fd(reader);
- if (!update(&cpu, &reader_fd)) {
+ if (!update(&opts.cpu, &reader_fd)) {
perf_reader_free(static_cast<void*>(reader));
- return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
+ return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", opts.cpu,
std::strerror(errno));
}
@@ -424,13 +424,21 @@ StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
std::strerror(errno));
}
- cpu_readers_[cpu] = reader;
+ cpu_readers_[opts.cpu] = reader;
return StatusTuple::OK();
}
StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
perf_reader_lost_cb lost_cb,
void* cb_cookie, int page_cnt) {
+ return open_all_cpu(cb, lost_cb, cb_cookie, page_cnt, 1);
+}
+
+StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
+ perf_reader_lost_cb lost_cb,
+ void* cb_cookie, int page_cnt,
+ int wakeup_events)
+{
if (cpu_readers_.size() != 0 || epfd_ != -1)
return StatusTuple(-1, "Previously opened perf buffer not cleaned");
@@ -439,7 +447,12 @@ StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
epfd_ = epoll_create1(EPOLL_CLOEXEC);
for (int i : cpus) {
- auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
+ struct bcc_perf_buffer_opts opts = {
+ .pid = -1,
+ .cpu = i,
+ .wakeup_events = wakeup_events,
+ };
+ auto res = open_on_cpu(cb, lost_cb, cb_cookie, page_cnt, opts);
if (!res.ok()) {
TRY2(close_all_cpu());
return res;
@@ -500,6 +513,14 @@ int BPFPerfBuffer::poll(int timeout_ms) {
return cnt;
}
+int BPFPerfBuffer::consume() {
+ if (epfd_ < 0)
+ return -1;
+ for (auto it : cpu_readers_)
+ perf_reader_event_read(it.second);
+ return 0;
+}
+
BPFPerfBuffer::~BPFPerfBuffer() {
auto res = close_all_cpu();
if (!res.ok())
diff --git a/src/cc/api/BPFTable.h b/src/cc/api/BPFTable.h
index 4b902dcb..681b4a94 100644
--- a/src/cc/api/BPFTable.h
+++ b/src/cc/api/BPFTable.h
@@ -415,12 +415,15 @@ class BPFPerfBuffer : public BPFTableBase<int, int> {
StatusTuple open_all_cpu(perf_reader_raw_cb cb, perf_reader_lost_cb lost_cb,
void* cb_cookie, int page_cnt);
+ StatusTuple open_all_cpu(perf_reader_raw_cb cb, perf_reader_lost_cb lost_cb,
+ void* cb_cookie, int page_cnt, int wakeup_events);
StatusTuple close_all_cpu();
int poll(int timeout_ms);
+ int consume();
private:
StatusTuple open_on_cpu(perf_reader_raw_cb cb, perf_reader_lost_cb lost_cb,
- int cpu, void* cb_cookie, int page_cnt);
+ void* cb_cookie, int page_cnt, struct bcc_perf_buffer_opts& opts);
StatusTuple close_on_cpu(int cpu);
std::map<int, perf_reader*> cpu_readers_;
diff --git a/src/cc/bcc_btf.cc b/src/cc/bcc_btf.cc
index 7f551ae8..be248612 100644
--- a/src/cc/bcc_btf.cc
+++ b/src/cc/bcc_btf.cc
@@ -652,9 +652,47 @@ int BTF::get_btf_info(const char *fname,
int BTF::get_map_tids(std::string map_name,
unsigned expected_ksize, unsigned expected_vsize,
unsigned *key_tid, unsigned *value_tid) {
- return btf__get_map_kv_tids(btf_, map_name.c_str(),
- expected_ksize, expected_vsize,
- key_tid, value_tid);
+ auto struct_name = "____btf_map_" + map_name;
+ auto type_id = btf__find_by_name_kind(btf_, struct_name.c_str(), BTF_KIND_STRUCT);
+ if (type_id < 0) {
+ warning("struct %s not found in BTF\n", struct_name.c_str());
+ return -1;
+ }
+
+ auto struct_type = btf__type_by_id(btf_, type_id);
+ if (!struct_type || btf_vlen(struct_type) < 2) {
+ warning("struct %s is not a valid map struct\n", struct_name.c_str());
+ return -1;
+ }
+
+ auto members = btf_members(struct_type);
+ auto key = members[0];
+ auto key_name = btf__name_by_offset(btf_, key.name_off);
+ if (strcmp(key_name, "key")) {
+ warning("'key' should be the first member\n");
+ return -1;
+ }
+ auto key_size = btf__resolve_size(btf_, key.type);
+ if (key_size != expected_ksize) {
+ warning("expect key size to be %d, got %d\n", expected_ksize, key_size);
+ return -1;
+ }
+ *key_tid = key.type;
+
+ auto value = members[1];
+ auto value_name = btf__name_by_offset(btf_, value.name_off);
+ if (strcmp(value_name, "value")) {
+ warning("'value' should be the second member\n");
+ return -1;
+ }
+ auto value_size = btf__resolve_size(btf_, value.type);
+ if (value_size != expected_vsize) {
+ warning("expect value size to be %d, got %d\n", expected_vsize, value_size);
+ return -1;
+ }
+ *value_tid = value.type;
+
+ return 0;
}
} // namespace ebpf
diff --git a/src/cc/bcc_btf.h b/src/cc/bcc_btf.h
index b460eb35..96492b4b 100644
--- a/src/cc/bcc_btf.h
+++ b/src/cc/bcc_btf.h
@@ -26,6 +26,7 @@
#include "bpf_module.h"
struct btf;
+struct btf_type;
namespace btf_ext_vendored {
diff --git a/src/cc/bcc_common.cc b/src/cc/bcc_common.cc
index 5c349d70..c33e37af 100644
--- a/src/cc/bcc_common.cc
+++ b/src/cc/bcc_common.cc
@@ -37,6 +37,10 @@ void * bpf_module_create_c_from_string(const char *text, unsigned flags, const c
return mod;
}
+bool bpf_module_rw_engine_enabled() {
+ return ebpf::bpf_module_rw_engine_enabled();
+}
+
void bpf_module_destroy(void *program) {
auto mod = static_cast<ebpf::BPFModule *>(program);
if (!mod) return;
diff --git a/src/cc/bcc_common.h b/src/cc/bcc_common.h
index b5f77db9..ed68f543 100644
--- a/src/cc/bcc_common.h
+++ b/src/cc/bcc_common.h
@@ -30,6 +30,7 @@ void * bpf_module_create_c(const char *filename, unsigned flags, const char *cfl
void * bpf_module_create_c_from_string(const char *text, unsigned flags, const char *cflags[],
int ncflags, bool allow_rlimit,
const char *dev_name);
+bool bpf_module_rw_engine_enabled();
void bpf_module_destroy(void *program);
char * bpf_module_license(void *program);
unsigned bpf_module_kern_version(void *program);
diff --git a/src/cc/bcc_debug.cc b/src/cc/bcc_debug.cc
index 52b6571e..d7ed49fa 100644
--- a/src/cc/bcc_debug.cc
+++ b/src/cc/bcc_debug.cc
@@ -19,6 +19,9 @@
#include <tuple>
#include <vector>
+#if LLVM_MAJOR_VERSION >= 15
+#include <llvm/DebugInfo/DWARF/DWARFCompileUnit.h>
+#endif
#include <llvm/DebugInfo/DWARF/DWARFContext.h>
#include <llvm/DebugInfo/DWARF/DWARFDebugLine.h>
#include <llvm/IR/Module.h>
@@ -29,6 +32,9 @@
#include <llvm/MC/MCInstrInfo.h>
#include <llvm/MC/MCObjectFileInfo.h>
#include <llvm/MC/MCRegisterInfo.h>
+#if LLVM_MAJOR_VERSION >= 15
+#include <llvm/MC/MCSubtargetInfo.h>
+#endif
#if LLVM_MAJOR_VERSION >= 14
#include <llvm/MC/TargetRegistry.h>
#else
@@ -190,68 +196,67 @@ void SourceDebugger::dump() {
vector<string> LineCache = buildLineCache();
// Start to disassemble with source code annotation section by section
- for (auto section : sections_)
- if (!strncmp(fn_prefix_.c_str(), section.first.c_str(),
- fn_prefix_.size())) {
- MCDisassembler::DecodeStatus S;
- MCInst Inst;
- uint64_t Size;
- uint8_t *FuncStart = get<0>(section.second);
- uint64_t FuncSize = get<1>(section.second);
+ prog_func_info_.for_each_func([&](std::string func_name, FuncInfo &info) {
+ MCDisassembler::DecodeStatus S;
+ MCInst Inst;
+ uint64_t Size;
+ uint8_t *FuncStart = info.start_;
+ uint64_t FuncSize = info.size_;
#if LLVM_MAJOR_VERSION >= 9
- unsigned SectionID = get<2>(section.second);
+ auto section = sections_.find(info.section_);
+ if (section == sections_.end()) {
+ errs() << "Debug Error: no section entry for section " << info.section_
+ << '\n';
+ return;
+ }
+ unsigned SectionID = get<2>(section->second);
#endif
- ArrayRef<uint8_t> Data(FuncStart, FuncSize);
- uint32_t CurrentSrcLine = 0;
- string func_name = section.first.substr(fn_prefix_.size());
+ ArrayRef<uint8_t> Data(FuncStart, FuncSize);
+ uint32_t CurrentSrcLine = 0;
- errs() << "Disassembly of section " << section.first << ":\n"
- << func_name << ":\n";
+ errs() << "Disassembly of function " << func_name << "\n";
- string src_dbg_str;
- llvm::raw_string_ostream os(src_dbg_str);
- for (uint64_t Index = 0; Index < FuncSize; Index += Size) {
+ string src_dbg_str;
+ llvm::raw_string_ostream os(src_dbg_str);
+ for (uint64_t Index = 0; Index < FuncSize; Index += Size) {
#if LLVM_MAJOR_VERSION >= 10
- S = DisAsm->getInstruction(Inst, Size, Data.slice(Index), Index,
- nulls());
+ S = DisAsm->getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
#else
- S = DisAsm->getInstruction(Inst, Size, Data.slice(Index), Index,
- nulls(), nulls());
+ S = DisAsm->getInstruction(Inst, Size, Data.slice(Index), Index, nulls(),
+ nulls());
#endif
- if (S != MCDisassembler::Success) {
- os << "Debug Error: disassembler failed: " << std::to_string(S)
- << '\n';
- break;
- } else {
- DILineInfo LineInfo;
+ if (S != MCDisassembler::Success) {
+ os << "Debug Error: disassembler failed: " << std::to_string(S) << '\n';
+ break;
+ } else {
+ DILineInfo LineInfo;
- LineTable->getFileLineInfoForAddress(
+ LineTable->getFileLineInfoForAddress(
#if LLVM_MAJOR_VERSION >= 9
- {(uint64_t)FuncStart + Index, SectionID},
+ {(uint64_t)FuncStart + Index, SectionID},
#else
- (uint64_t)FuncStart + Index,
+ (uint64_t)FuncStart + Index,
#endif
- CU->getCompilationDir(),
- DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
- LineInfo);
+ CU->getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, LineInfo);
- adjustInstSize(Size, Data[Index], Data[Index + 1]);
- dumpSrcLine(LineCache, LineInfo.FileName, LineInfo.Line,
- CurrentSrcLine, os);
- os << format("%4" PRIu64 ":", Index >> 3) << '\t';
- dumpBytes(Data.slice(Index, Size), os);
+ adjustInstSize(Size, Data[Index], Data[Index + 1]);
+ dumpSrcLine(LineCache, LineInfo.FileName, LineInfo.Line, CurrentSrcLine,
+ os);
+ os << format("%4" PRIu64 ":", Index >> 3) << '\t';
+ dumpBytes(Data.slice(Index, Size), os);
#if LLVM_MAJOR_VERSION >= 10
- IP->printInst(&Inst, 0, "", *STI, os);
+ IP->printInst(&Inst, 0, "", *STI, os);
#else
- IP->printInst(&Inst, os, "", *STI);
+ IP->printInst(&Inst, os, "", *STI);
#endif
- os << '\n';
- }
+ os << '\n';
}
- os.flush();
- errs() << src_dbg_str << '\n';
- src_dbg_fmap_[func_name] = src_dbg_str;
}
+ os.flush();
+ errs() << src_dbg_str << '\n';
+ src_dbg_fmap_[func_name] = src_dbg_str;
+ });
}
} // namespace ebpf
diff --git a/src/cc/bcc_debug.h b/src/cc/bcc_debug.h
index 1467ca80..f9bda118 100644
--- a/src/cc/bcc_debug.h
+++ b/src/cc/bcc_debug.h
@@ -15,19 +15,18 @@
*/
#include "bpf_module.h"
+#include "frontends/clang/loader.h"
namespace ebpf {
class SourceDebugger {
public:
- SourceDebugger(
- llvm::Module *mod,
- sec_map_def &sections,
- const std::string &fn_prefix, const std::string &mod_src,
- std::map<std::string, std::string> &src_dbg_fmap)
+ SourceDebugger(llvm::Module *mod, sec_map_def &sections,
+ ProgFuncInfo &prog_func_info, const std::string &mod_src,
+ std::map<std::string, std::string> &src_dbg_fmap)
: mod_(mod),
sections_(sections),
- fn_prefix_(fn_prefix),
+ prog_func_info_(prog_func_info),
mod_src_(mod_src),
src_dbg_fmap_(src_dbg_fmap) {}
// Only support dump for llvm 6.x and later.
@@ -56,7 +55,7 @@ class SourceDebugger {
private:
llvm::Module *mod_;
const sec_map_def &sections_;
- const std::string &fn_prefix_;
+ ProgFuncInfo &prog_func_info_;
const std::string &mod_src_;
std::map<std::string, std::string> &src_dbg_fmap_;
};
diff --git a/src/cc/bpf_module.cc b/src/cc/bpf_module.cc
index 36f9582a..b029962e 100644
--- a/src/cc/bpf_module.cc
+++ b/src/cc/bpf_module.cc
@@ -13,38 +13,48 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include "bpf_module.h"
+
#include <fcntl.h>
-#include <map>
-#include <string>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <vector>
-#include <set>
#include <linux/bpf.h>
-#include <net/if.h>
-
+#include <llvm-c/Transforms/IPO.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/IR/IRPrintingPasses.h>
-#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Module.h>
+
+#if LLVM_MAJOR_VERSION >= 15
+#include <llvm/Pass.h>
+#endif
+
#include <llvm/IR/Verifier.h>
+#include <llvm/Object/ObjectFile.h>
+#include <llvm/Object/ELFObjectFile.h>
+#include <llvm/Object/SymbolSize.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
-#include <llvm-c/Transforms/IPO.h>
+#include <net/if.h>
+#include <sys/stat.h>
+#include <unistd.h>
-#include "common.h"
+#include <map>
+#include <set>
+#include <string>
+#include <iostream>
+#include <vector>
+
+#include "bcc_btf.h"
#include "bcc_debug.h"
#include "bcc_elf.h"
-#include "frontends/clang/loader.h"
-#include "frontends/clang/b_frontend_action.h"
-#include "bpf_module.h"
+#include "bcc_libbpf_inc.h"
+#include "common.h"
#include "exported_files.h"
+#include "frontends/clang/b_frontend_action.h"
+#include "frontends/clang/loader.h"
#include "libbpf.h"
-#include "bcc_btf.h"
-#include "bcc_libbpf_inc.h"
namespace ebpf {
@@ -58,15 +68,11 @@ using std::unique_ptr;
using std::vector;
using namespace llvm;
-const string BPFModule::FN_PREFIX = BPF_FN_PREFIX;
-
// Snooping class to remember the sections as the JIT creates them
class MyMemoryManager : public SectionMemoryManager {
public:
-
- explicit MyMemoryManager(sec_map_def *sections)
- : sections_(sections) {
- }
+ explicit MyMemoryManager(sec_map_def *sections, ProgFuncInfo *prog_func_info)
+ : sections_(sections), prog_func_info_(prog_func_info) {}
virtual ~MyMemoryManager() {}
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
@@ -74,8 +80,6 @@ class MyMemoryManager : public SectionMemoryManager {
StringRef SectionName) override {
// The programs need to change from fake fd to real map fd, so not allocate ReadOnly regions.
uint8_t *Addr = SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, SectionName, false);
- //printf("allocateDataSection: %s Addr %p Size %ld Alignment %d SectionID %d\n",
- // SectionName.str().c_str(), (void *)Addr, Size, Alignment, SectionID);
(*sections_)[SectionName.str()] = make_tuple(Addr, Size, SectionID);
return Addr;
}
@@ -85,12 +89,38 @@ class MyMemoryManager : public SectionMemoryManager {
// The lines in .BTF.ext line_info, if corresponding to remapped files, will have empty source line.
// The line_info will be fixed in place, so not allocate ReadOnly regions.
uint8_t *Addr = SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, SectionName, false);
- //printf("allocateDataSection: %s Addr %p Size %ld Alignment %d SectionID %d\n",
- // SectionName.str().c_str(), (void *)Addr, Size, Alignment, SectionID);
(*sections_)[SectionName.str()] = make_tuple(Addr, Size, SectionID);
return Addr;
}
+
+ void notifyObjectLoaded(ExecutionEngine *EE,
+ const object::ObjectFile &o) override {
+ auto sizes = llvm::object::computeSymbolSizes(o);
+ for (auto ss : sizes) {
+ auto maybe_name = ss.first.getName();
+ if (!maybe_name)
+ continue;
+
+ std::string name = maybe_name->str();
+ auto info = prog_func_info_->get_func(name);
+ if (!info)
+ continue;
+
+ auto section = ss.first.getSection();
+ if (!section)
+ continue;
+
+ auto sec_name = section.get()->getName();
+ if (!sec_name)
+ continue;
+
+ info->section_ = sec_name->str();
+ info->size_ = ss.second;
+ }
+ }
+
sec_map_def *sections_;
+ ProgFuncInfo *prog_func_info_;
};
BPFModule::BPFModule(unsigned flags, TableStorage *ts, bool rw_engine_enabled,
@@ -120,7 +150,7 @@ BPFModule::BPFModule(unsigned flags, TableStorage *ts, bool rw_engine_enabled,
local_ts_ = createSharedTableStorage();
ts_ = &*local_ts_;
}
- func_src_ = ebpf::make_unique<FuncSource>();
+ prog_func_info_ = ebpf::make_unique<ProgFuncInfo>();
}
static StatusTuple unimplemented_sscanf(const char *, void *) {
@@ -139,14 +169,18 @@ BPFModule::~BPFModule() {
}
if (!rw_engine_enabled_) {
- for (auto section : sections_)
- delete[] get<0>(section.second);
+ prog_func_info_->for_each_func(
+ [&](std::string name, FuncInfo &info) {
+ if (!info.start_)
+ return;
+ delete[] info.start_;
+ });
}
engine_.reset();
cleanup_rw_engine();
ctx_.reset();
- func_src_.reset();
+ prog_func_info_.reset();
if (btf_)
delete btf_;
@@ -162,7 +196,8 @@ int BPFModule::free_bcc_memory() {
int BPFModule::load_cfile(const string &file, bool in_memory, const char *cflags[], int ncflags) {
ClangLoader clang_loader(&*ctx_, flags_);
if (clang_loader.parse(&mod_, *ts_, file, in_memory, cflags, ncflags, id_,
- *func_src_, mod_src_, maps_ns_, fake_fd_map_, perf_events_))
+ *prog_func_info_, mod_src_, maps_ns_, fake_fd_map_,
+ perf_events_))
return -1;
return 0;
}
@@ -175,8 +210,9 @@ int BPFModule::load_cfile(const string &file, bool in_memory, const char *cflags
int BPFModule::load_includes(const string &text) {
ClangLoader clang_loader(&*ctx_, flags_);
const char *cflags[] = {"-DB_WORKAROUND"};
- if (clang_loader.parse(&mod_, *ts_, text, true, cflags, 1, "", *func_src_,
- mod_src_, "", fake_fd_map_, perf_events_))
+ if (clang_loader.parse(&mod_, *ts_, text, true, cflags, 1, "",
+ *prog_func_info_, mod_src_, "", fake_fd_map_,
+ perf_events_))
return -1;
return 0;
}
@@ -426,26 +462,19 @@ int BPFModule::load_maps(sec_map_def &sections) {
}
// update instructions
- for (auto section : sections) {
- auto sec_name = section.first;
- if (strncmp(".bpf.fn.", sec_name.c_str(), 8) == 0) {
- uint8_t *addr = get<0>(section.second);
- uintptr_t size = get<1>(section.second);
- struct bpf_insn *insns = (struct bpf_insn *)addr;
- int i, num_insns;
-
- num_insns = size/sizeof(struct bpf_insn);
- for (i = 0; i < num_insns; i++) {
- if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM)) {
- // change map_fd is it is a ld_pseudo */
- if (insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
- map_fds.find(insns[i].imm) != map_fds.end())
- insns[i].imm = map_fds[insns[i].imm];
- i++;
- }
+ prog_func_info_->for_each_func([&](std::string name, FuncInfo &info) {
+ struct bpf_insn *insns = (struct bpf_insn *)info.start_;
+ uint32_t i, num_insns = info.size_ / sizeof(struct bpf_insn);
+ for (i = 0; i < num_insns; i++) {
+ if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM)) {
+ // change map_fd is it is a ld_pseudo
+ if (insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
+ map_fds.find(insns[i].imm) != map_fds.end())
+ insns[i].imm = map_fds[insns[i].imm];
+ i++;
}
}
- }
+ });
return 0;
}
@@ -474,7 +503,8 @@ int BPFModule::finalize() {
string err;
EngineBuilder builder(move(mod_));
builder.setErrorStr(&err);
- builder.setMCJITMemoryManager(ebpf::make_unique<MyMemoryManager>(sections_p));
+ builder.setMCJITMemoryManager(
+ ebpf::make_unique<MyMemoryManager>(sections_p, &*prog_func_info_));
builder.setMArch("bpf");
#if LLVM_MAJOR_VERSION <= 11
builder.setUseOrcMCJITReplacement(false);
@@ -485,20 +515,19 @@ int BPFModule::finalize() {
return -1;
}
-#if LLVM_MAJOR_VERSION >= 9
engine_->setProcessAllSections(true);
-#else
- if (flags_ & DEBUG_SOURCE)
- engine_->setProcessAllSections(true);
-#endif
if (int rc = run_pass_manager(*mod))
return rc;
engine_->finalizeObject();
+ prog_func_info_->for_each_func([&](std::string name, FuncInfo &info) {
+ info.start_ = (uint8_t *)engine_->getFunctionAddress(name);
+ });
+ finalize_prog_func_info();
if (flags_ & DEBUG_SOURCE) {
- SourceDebugger src_debugger(mod, *sections_p, FN_PREFIX, mod_src_,
+ SourceDebugger src_debugger(mod, *sections_p, *prog_func_info_, mod_src_,
src_dbg_fmap_);
src_debugger.dump();
}
@@ -521,51 +550,74 @@ int BPFModule::finalize() {
}
sections_[fname] = make_tuple(tmp_p, size, get<2>(section.second));
}
+
+ prog_func_info_->for_each_func([](std::string name, FuncInfo &info) {
+ uint8_t *tmp_p = new uint8_t[info.size_];
+ memcpy(tmp_p, info.start_, info.size_);
+ info.start_ = tmp_p;
+ });
engine_.reset();
ctx_.reset();
}
- // give functions an id
- for (auto section : sections_)
- if (!strncmp(FN_PREFIX.c_str(), section.first.c_str(), FN_PREFIX.size()))
- function_names_.push_back(section.first);
-
return 0;
}
-size_t BPFModule::num_functions() const {
- return function_names_.size();
+void BPFModule::finalize_prog_func_info() {
+ // prog_func_info_'s FuncInfo data is gradually populated (first in frontend
+ // action, then bpf_module). It's possible for a FuncInfo to have been
+ // created by FrontendAction but no corresponding start location found in
+ // bpf_module - filter out these functions
+ //
+ // The numeric function ids in the new prog_func_info_ are considered
+ // canonical
+ std::unique_ptr<ProgFuncInfo> finalized = ebpf::make_unique<ProgFuncInfo>();
+ prog_func_info_->for_each_func([&](std::string name, FuncInfo &info) {
+ if(info.start_) {
+ auto i = finalized->add_func(name);
+ if (i) { // should always be true
+ *i = info;
+ }
+ }
+ });
+ prog_func_info_.swap(finalized);
}
+size_t BPFModule::num_functions() const { return prog_func_info_->num_funcs(); }
+
const char * BPFModule::function_name(size_t id) const {
- if (id >= function_names_.size())
- return nullptr;
- return function_names_[id].c_str() + FN_PREFIX.size();
+ auto name = prog_func_info_->func_name(id);
+ if (name)
+ return name->c_str();
+ return nullptr;
}
uint8_t * BPFModule::function_start(size_t id) const {
- if (id >= function_names_.size())
- return nullptr;
- auto section = sections_.find(function_names_[id]);
- if (section == sections_.end())
- return nullptr;
- return get<0>(section->second);
+ auto fn = prog_func_info_->get_func(id);
+ if (fn)
+ return fn->start_;
+ return nullptr;
}
uint8_t * BPFModule::function_start(const string &name) const {
- auto section = sections_.find(FN_PREFIX + name);
- if (section == sections_.end())
- return nullptr;
-
- return get<0>(section->second);
+ auto fn = prog_func_info_->get_func(name);
+ if (fn)
+ return fn->start_;
+ return nullptr;
}
const char * BPFModule::function_source(const string &name) const {
- return func_src_->src(name);
+ auto fn = prog_func_info_->get_func(name);
+ if (fn)
+ return fn->src_.c_str();
+ return "";
}
const char * BPFModule::function_source_rewritten(const string &name) const {
- return func_src_->src_rewritten(name);
+ auto fn = prog_func_info_->get_func(name);
+ if (fn)
+ return fn->src_rewritten_.c_str();
+ return "";
}
int BPFModule::annotate_prog_tag(const string &name, int prog_fd,
@@ -637,20 +689,17 @@ int BPFModule::annotate_prog_tag(const string &name, int prog_fd,
}
size_t BPFModule::function_size(size_t id) const {
- if (id >= function_names_.size())
- return 0;
- auto section = sections_.find(function_names_[id]);
- if (section == sections_.end())
- return 0;
- return get<1>(section->second);
+ auto fn = prog_func_info_->get_func(id);
+ if (fn)
+ return fn->size_;
+ return 0;
}
size_t BPFModule::function_size(const string &name) const {
- auto section = sections_.find(FN_PREFIX + name);
- if (section == sections_.end())
- return 0;
-
- return get<1>(section->second);
+ auto fn = prog_func_info_->get_func(name);
+ if (fn)
+ return fn->size_;
+ return 0;
}
char * BPFModule::license() const {
@@ -903,7 +952,7 @@ int BPFModule::bcc_func_load(int prog_type, const char *name,
int btf_fd = btf_->get_fd();
char secname[256];
- ::snprintf(secname, sizeof(secname), ".bpf.fn.%s", name);
+ ::snprintf(secname, sizeof(secname), "%s%s", BPF_FN_PREFIX, name);
ret = btf_->get_btf_info(secname, &func_info, &func_info_cnt,
&finfo_rec_size, &line_info,
&line_info_cnt, &linfo_rec_size);
diff --git a/src/cc/bpf_module.h b/src/cc/bpf_module.h
index 87938c3f..fb368af2 100644
--- a/src/cc/bpf_module.h
+++ b/src/cc/bpf_module.h
@@ -59,14 +59,13 @@ class TableDesc;
class TableStorage;
class BLoader;
class ClangLoader;
-class FuncSource;
+class ProgFuncInfo;
class BTF;
bool bpf_module_rw_engine_enabled(void);
class BPFModule {
private:
- static const std::string FN_PREFIX;
int init_engine();
void initialize_rw_engine();
void cleanup_rw_engine();
@@ -74,6 +73,7 @@ class BPFModule {
int finalize();
int annotate();
void annotate_light();
+ void finalize_prog_func_info();
std::unique_ptr<llvm::ExecutionEngine> finalize_rw(std::unique_ptr<llvm::Module> mod);
std::string make_reader(llvm::Module *mod, llvm::Type *type);
std::string make_writer(llvm::Module *mod, llvm::Type *type);
@@ -162,11 +162,10 @@ class BPFModule {
std::unique_ptr<llvm::ExecutionEngine> engine_;
std::unique_ptr<llvm::ExecutionEngine> rw_engine_;
std::unique_ptr<llvm::Module> mod_;
- std::unique_ptr<FuncSource> func_src_;
+ std::unique_ptr<ProgFuncInfo> prog_func_info_;
sec_map_def sections_;
std::vector<TableDesc *> tables_;
std::map<std::string, size_t> table_names_;
- std::vector<std::string> function_names_;
std::map<llvm::Type *, std::string> readers_;
std::map<llvm::Type *, std::string> writers_;
std::string id_;
diff --git a/src/cc/bpf_module_rw_engine.cc b/src/cc/bpf_module_rw_engine.cc
index 533d8a13..f1649880 100644
--- a/src/cc/bpf_module_rw_engine.cc
+++ b/src/cc/bpf_module_rw_engine.cc
@@ -401,7 +401,12 @@ int BPFModule::annotate() {
GlobalValue *gvar = mod_->getNamedValue(table.name);
if (!gvar) continue;
if (PointerType *pt = dyn_cast<PointerType>(gvar->getType())) {
- if (StructType *st = dyn_cast<StructType>(pt->getElementType())) {
+#if LLVM_MAJOR_VERSION >= 15
+ StructType *st = dyn_cast<StructType>(pt->getPointerElementType());
+#else
+ StructType *st = dyn_cast<StructType>(pt->getElementType());
+#endif
+ if (st) {
if (st->getNumElements() < 2) continue;
Type *key_type = st->elements()[0];
Type *leaf_type = st->elements()[1];
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
index 0f3a5473..f54dd255 100644
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -331,6 +331,8 @@ union bpf_iter_link_info {
* *ctx_out*, *data_in* and *data_out* must be NULL.
* *repeat* must be zero.
*
+ * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN.
+ *
* Return
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
@@ -996,6 +998,7 @@ enum bpf_attach_type {
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
BPF_PERF_EVENT,
+ BPF_TRACE_KPROBE_MULTI,
__MAX_BPF_ATTACH_TYPE
};
@@ -1010,6 +1013,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
BPF_LINK_TYPE_PERF_EVENT = 7,
+ BPF_LINK_TYPE_KPROBE_MULTI = 8,
MAX_BPF_LINK_TYPE,
};
@@ -1112,6 +1116,16 @@ enum bpf_link_type {
*/
#define BPF_F_SLEEPABLE (1U << 4)
+/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program
+ * fully support xdp frags.
+ */
+#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+
+/* link_create.kprobe_multi.flags used in LINK_CREATE command for
+ * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
+ */
+#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1226,6 +1240,8 @@ enum {
/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
+/* If set, XDP frames will be transmitted after processing */
+#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
@@ -1387,6 +1403,7 @@ union bpf_attr {
__aligned_u64 ctx_out;
__u32 flags;
__u32 cpu;
+ __u32 batch_size;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -1466,6 +1483,13 @@ union bpf_attr {
*/
__u64 bpf_cookie;
} perf_event;
+ struct {
+ __u32 flags;
+ __u32 cnt;
+ __aligned_u64 syms;
+ __aligned_u64 addrs;
+ __aligned_u64 cookies;
+ } kprobe_multi;
};
} link_create;
@@ -1776,6 +1800,8 @@ union bpf_attr {
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_get_current_pid_tgid(void)
+ * Description
+ * Get the current pid and tgid.
* Return
* A 64-bit integer containing the current tgid and pid, and
* created as such:
@@ -1783,6 +1809,8 @@ union bpf_attr {
* *current_task*\ **->pid**.
*
* u64 bpf_get_current_uid_gid(void)
+ * Description
+ * Get the current uid and gid.
* Return
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
@@ -2257,6 +2285,8 @@ union bpf_attr {
* The 32-bit hash.
*
* u64 bpf_get_current_task(void)
+ * Description
+ * Get the current task.
* Return
* A pointer to the current task struct.
*
@@ -2287,8 +2317,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if current task belongs to the cgroup2.
- * * 1, if current task does not belong to the cgroup2.
+ * * 1, if current task belongs to the cgroup2.
+ * * 0, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
@@ -2370,6 +2400,8 @@ union bpf_attr {
* indicate that the hash is outdated and to trigger a
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ * Return
+ * void.
*
* long bpf_get_numa_node_id(void)
* Description
@@ -2467,6 +2499,8 @@ union bpf_attr {
* A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Description
+ * Get the owner UID of the socked associated to *skb*.
* Return
* The owner UID of the socket associated to *skb*. If the socket
* is **NULL**, or if it is not a full socket (i.e. if it is a
@@ -3241,6 +3275,9 @@ union bpf_attr {
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_get_current_cgroup_id(void)
+ * Description
+ * Get the current cgroup id based on the cgroup within which
+ * the current task is running.
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
@@ -5019,6 +5056,94 @@ union bpf_attr {
*
* Return
* The number of arguments of the traced function.
+ *
+ * int bpf_get_retval(void)
+ * Description
+ * Get the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * The syscall's return value.
+ *
+ * int bpf_set_retval(int retval)
+ * Description
+ * Set the syscall's return value that will be returned to userspace.
+ *
+ * This helper is currently supported by cgroup programs only.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md)
+ * Description
+ * Get the total size of a given xdp buff (linear and paged area)
+ * Return
+ * The total size of a given xdp buffer.
+ *
+ * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * This helper is provided as an easy way to load data from a
+ * xdp buffer. It can be used to load *len* bytes from *offset* from
+ * the frame associated to *xdp_md*, into the buffer pointed by
+ * *buf*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len)
+ * Description
+ * Store *len* bytes from buffer *buf* into the frame
+ * associated to *xdp_md*, at *offset*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags)
+ * Description
+ * Read *size* bytes from user space address *user_ptr* in *tsk*'s
+ * address space, and stores the data in *dst*. *flags* is not
+ * used yet and is provided for future extensibility. This helper
+ * can only be used by sleepable programs.
+ * Return
+ * 0 on success, or a negative error in case of failure. On error
+ * *dst* buffer is zeroed out.
+ *
+ * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type)
+ * Description
+ * Change the __sk_buff->tstamp_type to *tstamp_type*
+ * and set *tstamp* to the __sk_buff->tstamp together.
+ *
+ * If there is no need to change the __sk_buff->tstamp_type,
+ * the tstamp value can be directly written to __sk_buff->tstamp
+ * instead.
+ *
+ * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+ * will be kept during bpf_redirect_*(). A non zero
+ * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+ * *tstamp_type*.
+ *
+ * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used
+ * with a zero *tstamp*.
+ *
+ * Only IPv4 and IPv6 skb->protocol are supported.
+ *
+ * This function is most useful when it needs to set a
+ * mono delivery time to __sk_buff->tstamp and then
+ * bpf_redirect_*() to the egress of an iface. For example,
+ * changing the (rcv) timestamp in __sk_buff->tstamp at
+ * ingress to a mono delivery time and then bpf_redirect_*()
+ * to sch_fq@phy-dev.
+ * Return
+ * 0 on success.
+ * **-EINVAL** for invalid input
+ * **-EOPNOTSUPP** for unsupported protocol
+ *
+ * long bpf_ima_file_hash(struct file *file, void *dst, u32 size)
+ * Description
+ * Returns a calculated IMA hash of the *file*.
+ * If the hash is larger than *size*, then only *size*
+ * bytes will be copied to *dst*
+ * Return
+ * The **hash_algo** is returned on success,
+ * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * invalid arguments are passed.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5207,6 +5332,14 @@ union bpf_attr {
FN(get_func_arg), \
FN(get_func_ret), \
FN(get_func_arg_cnt), \
+ FN(get_retval), \
+ FN(set_retval), \
+ FN(xdp_get_buff_len), \
+ FN(xdp_load_bytes), \
+ FN(xdp_store_bytes), \
+ FN(copy_from_user_task), \
+ FN(skb_set_tstamp), \
+ FN(ima_file_hash), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5396,6 +5529,15 @@ union { \
__u64 :64; \
} __attribute__((aligned(8)))
+enum {
+ BPF_SKB_TSTAMP_UNSPEC,
+ BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
+ /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
+ * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
+ * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+ */
+};
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -5436,7 +5578,8 @@ struct __sk_buff {
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
__u32 gso_size;
- __u32 :32; /* Padding, future use. */
+ __u8 tstamp_type;
+ __u32 :24; /* Padding, future use. */
__u64 hwtstamp;
};
@@ -5501,7 +5644,8 @@ struct bpf_sock {
__u32 src_ip4;
__u32 src_ip6[4];
__u32 src_port; /* host byte order */
- __u32 dst_port; /* network byte order */
+ __be16 dst_port; /* network byte order */
+ __u16 :16; /* zero padding */
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
@@ -6379,7 +6523,8 @@ struct bpf_sk_lookup {
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
__u32 remote_ip4; /* Network byte order */
__u32 remote_ip6[4]; /* Network byte order */
- __u32 remote_port; /* Network byte order */
+ __be16 remote_port; /* Network byte order */
+ __u16 :16; /* Zero padding */
__u32 local_ip4; /* Network byte order */
__u32 local_ip6[4]; /* Network byte order */
__u32 local_port; /* Host byte order */
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
index c1253e29..7ede57a3 100644
--- a/src/cc/export/helpers.h
+++ b/src/cc/export/helpers.h
@@ -109,6 +109,12 @@ struct _name##_table_t { \
void (*increment) (_key_type, ...); \
void (*atomic_increment) (_key_type, ...); \
int (*get_stackid) (void *, u64); \
+ void * (*sk_storage_get) (void *, void *, int); \
+ int (*sk_storage_delete) (void *); \
+ void * (*inode_storage_get) (void *, void *, int); \
+ int (*inode_storage_delete) (void *); \
+ void * (*task_storage_get) (void *, void *, int); \
+ int (*task_storage_delete) (void *); \
u32 max_entries; \
int flags; \
}; \
@@ -164,8 +170,17 @@ struct _name##_table_t __##_name
#define BPF_TABLE(_table_type, _key_type, _leaf_type, _name, _max_entries) \
BPF_F_TABLE(_table_type, _key_type, _leaf_type, _name, _max_entries, 0)
-#define BPF_TABLE_PINNED(_table_type, _key_type, _leaf_type, _name, _max_entries, _pinned) \
-BPF_TABLE(_table_type ":" _pinned, _key_type, _leaf_type, _name, _max_entries)
+#define BPF_TABLE_PINNED7(_table_type, _key_type, _leaf_type, _name, _max_entries, _pinned, _flags) \
+ BPF_F_TABLE(_table_type ":" _pinned, _key_type, _leaf_type, _name, _max_entries, _flags)
+
+#define BPF_TABLE_PINNED6(_table_type, _key_type, _leaf_type, _name, _max_entries, _pinned) \
+ BPF_F_TABLE(_table_type ":" _pinned, _key_type, _leaf_type, _name, _max_entries, 0)
+
+#define BPF_TABLE_PINNEDX(_1, _2, _3, _4, _5, _6, _7, NAME, ...) NAME
+
+// Define a pinned table with optional flags argument
+#define BPF_TABLE_PINNED(...) \
+ BPF_TABLE_PINNEDX(__VA_ARGS__, BPF_TABLE_PINNED7, BPF_TABLE_PINNED6)(__VA_ARGS__)
// define a table same as above but allow it to be referenced by other modules
#define BPF_TABLE_PUBLIC(_table_type, _key_type, _leaf_type, _name, _max_entries) \
@@ -952,6 +967,20 @@ static long (*bpf_get_func_arg)(void *ctx, __u32 n, __u64 *value) =
(void *)BPF_FUNC_get_func_arg;
static long (*bpf_get_func_ret)(void *ctx, __u64 *value) = (void *)BPF_FUNC_get_func_ret;
static long (*bpf_get_func_arg_cnt)(void *ctx) = (void *)BPF_FUNC_get_func_arg_cnt;
+static int (*bpf_get_retval)(void) = (void *)BPF_FUNC_get_retval;
+static int (*bpf_set_retval)(int retval) = (void *)BPF_FUNC_set_retval;
+static __u64 (*bpf_xdp_get_buff_len)(struct xdp_md *xdp_md) = (void *)BPF_FUNC_xdp_get_buff_len;
+static long (*bpf_xdp_load_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) =
+ (void *)BPF_FUNC_xdp_load_bytes;
+static long (*bpf_xdp_store_bytes)(struct xdp_md *xdp_md, __u32 offset, void *buf, __u32 len) =
+ (void *)BPF_FUNC_xdp_store_bytes;
+static long (*bpf_copy_from_user_task)(void *dst, __u32 size, const void *user_ptr,
+ struct task_struct *tsk, __u64 flags) =
+ (void *)BPF_FUNC_copy_from_user_task;
+static long (*bpf_skb_set_tstamp)(struct __sk_buff *skb, __u64 tstamp, __u32 tstamp_type) =
+ (void *)BPF_FUNC_skb_set_tstamp;
+static long (*bpf_ima_file_hash)(struct file *file, void *dst, __u32 size) =
+ (void *)BPF_FUNC_ima_file_hash;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index 7bfc4ed7..9b2853a9 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -811,10 +811,23 @@ bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
if (fe_.is_rewritable_ext_func(D)) {
current_fn_ = string(D->getName());
string bd = rewriter_.getRewrittenText(expansionRange(D->getSourceRange()));
- fe_.func_src_.set_src(current_fn_, bd);
+ auto func_info = fe_.prog_func_info_.add_func(current_fn_);
+ if (!func_info) {
+ // We should only reach add_func above once per function seen, but the
+ // BPF_PROG-helper using macros in export/helpers.h (KFUNC_PROBE ..
+ // LSM_PROBE) break this logic. TODO: adjust export/helpers.h to not
+ // do so and bail out here, or find a better place to do add_func
+ func_info = fe_.prog_func_info_.get_func(current_fn_);
+ //error(GET_BEGINLOC(D), "redefinition of existing function");
+ //return false;
+ }
+ func_info->src_ = bd;
fe_.func_range_[current_fn_] = expansionRange(D->getSourceRange());
- string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX + D->getName().str() + "\")))\n";
- rewriter_.InsertText(real_start_loc, attr);
+ if (!D->getAttr<SectionAttr>()) {
+ string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX +
+ D->getName().str() + "\")))\n";
+ rewriter_.InsertText(real_start_loc, attr);
+ }
if (D->param_size() > MAX_CALLING_CONV_REGS + 1) {
error(GET_BEGINLOC(D->getParamDecl(MAX_CALLING_CONV_REGS + 1)),
"too many arguments, bcc only supports in-register parameters");
@@ -1689,13 +1702,12 @@ void BTypeConsumer::HandleTranslationUnit(ASTContext &Context) {
}
-BFrontendAction::BFrontendAction(llvm::raw_ostream &os, unsigned flags,
- TableStorage &ts, const std::string &id,
- const std::string &main_path,
- FuncSource &func_src, std::string &mod_src,
- const std::string &maps_ns,
- fake_fd_map_def &fake_fd_map,
- std::map<std::string, std::vector<std::string>> &perf_events)
+BFrontendAction::BFrontendAction(
+ llvm::raw_ostream &os, unsigned flags, TableStorage &ts,
+ const std::string &id, const std::string &main_path,
+ ProgFuncInfo &prog_func_info, std::string &mod_src,
+ const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
+ std::map<std::string, std::vector<std::string>> &perf_events)
: os_(os),
flags_(flags),
ts_(ts),
@@ -1703,7 +1715,7 @@ BFrontendAction::BFrontendAction(llvm::raw_ostream &os, unsigned flags,
maps_ns_(maps_ns),
rewriter_(new Rewriter),
main_path_(main_path),
- func_src_(func_src),
+ prog_func_info_(prog_func_info),
mod_src_(mod_src),
next_fake_fd_(-1),
fake_fd_map_(fake_fd_map),
@@ -1781,7 +1793,9 @@ void BFrontendAction::EndSourceFileAction() {
for (auto func : func_range_) {
auto f = func.first;
string bd = rewriter_->getRewrittenText(func_range_[f]);
- func_src_.set_src_rewritten(f, bd);
+ auto fn = prog_func_info_.get_func(f);
+ if (fn)
+ fn->src_rewritten_ = bd;
}
rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(os_);
os_.flush();
diff --git a/src/cc/frontends/clang/b_frontend_action.h b/src/cc/frontends/clang/b_frontend_action.h
index 530d322a..22564591 100644
--- a/src/cc/frontends/clang/b_frontend_action.h
+++ b/src/cc/frontends/clang/b_frontend_action.h
@@ -40,7 +40,7 @@ class StringRef;
namespace ebpf {
class BFrontendAction;
-class FuncSource;
+class ProgFuncInfo;
// Traces maps with external pointers as values.
class MapVisitor : public clang::RecursiveASTVisitor<MapVisitor> {
@@ -156,9 +156,8 @@ class BFrontendAction : public clang::ASTFrontendAction {
// should be written.
BFrontendAction(llvm::raw_ostream &os, unsigned flags, TableStorage &ts,
const std::string &id, const std::string &main_path,
- FuncSource &func_src, std::string &mod_src,
- const std::string &maps_ns,
- fake_fd_map_def &fake_fd_map,
+ ProgFuncInfo &prog_func_info, std::string &mod_src,
+ const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
std::map<std::string, std::vector<std::string>> &perf_events);
// Called by clang when the AST has been completed, here the output stream
@@ -192,7 +191,7 @@ class BFrontendAction : public clang::ASTFrontendAction {
friend class BTypeVisitor;
std::map<std::string, clang::SourceRange> func_range_;
const std::string &main_path_;
- FuncSource &func_src_;
+ ProgFuncInfo &prog_func_info_;
std::string &mod_src_;
std::set<clang::Decl *> m_;
int next_fake_fd_;
diff --git a/src/cc/frontends/clang/loader.cc b/src/cc/frontends/clang/loader.cc
index 4f9914a2..d0f4d880 100644
--- a/src/cc/frontends/clang/loader.cc
+++ b/src/cc/frontends/clang/loader.cc
@@ -66,6 +66,44 @@ using std::vector;
namespace ebpf {
+optional<FuncInfo &> ProgFuncInfo::get_func(std::string name) {
+ auto it = funcs_.find(name);
+ if (it != funcs_.end())
+ return it->second;
+ return nullopt;
+}
+
+optional<FuncInfo &> ProgFuncInfo::get_func(size_t id) {
+ auto it = func_idx_.find(id);
+ if (it != func_idx_.end())
+ return get_func(it->second);
+ return nullopt;
+}
+
+optional<std::string &> ProgFuncInfo::func_name(size_t id) {
+ auto it = func_idx_.find(id);
+ if (it != func_idx_.end())
+ return it->second;
+ return nullopt;
+}
+
+void ProgFuncInfo::for_each_func(
+ std::function<void(std::string, FuncInfo &)> cb) {
+ for (auto it = funcs_.begin(); it != funcs_.end(); ++it) {
+ cb(it->first, it->second);
+ }
+}
+
+optional<FuncInfo &> ProgFuncInfo::add_func(std::string name) {
+ auto fn = get_func(name);
+ if (fn)
+ return nullopt;
+ size_t current = funcs_.size();
+ funcs_.emplace(name, 0);
+ func_idx_.emplace(current, name);
+ return get_func(name);
+}
+
ClangLoader::ClangLoader(llvm::LLVMContext *ctx, unsigned flags)
: ctx_(ctx), flags_(flags)
{
@@ -152,13 +190,12 @@ static int CreateFromArgs(clang::CompilerInvocation &invocation,
}
-int ClangLoader::parse(unique_ptr<llvm::Module> *mod, TableStorage &ts,
- const string &file, bool in_memory, const char *cflags[],
- int ncflags, const std::string &id, FuncSource &func_src,
- std::string &mod_src,
- const std::string &maps_ns,
- fake_fd_map_def &fake_fd_map,
- std::map<std::string, std::vector<std::string>> &perf_events) {
+int ClangLoader::parse(
+ unique_ptr<llvm::Module> *mod, TableStorage &ts, const string &file,
+ bool in_memory, const char *cflags[], int ncflags, const std::string &id,
+ ProgFuncInfo &prog_func_info, std::string &mod_src,
+ const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
+ std::map<std::string, std::vector<std::string>> &perf_events) {
string main_path = "/virtual/main.c";
unique_ptr<llvm::MemoryBuffer> main_buf;
struct utsname un;
@@ -249,6 +286,7 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, TableStorage &ts,
return -1;
#if LLVM_MAJOR_VERSION >= 9
flags_cstr.push_back("-g");
+ flags_cstr.push_back("-gdwarf-4");
#else
if (flags_ & DEBUG_SOURCE)
flags_cstr.push_back("-g");
@@ -280,7 +318,8 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, TableStorage &ts,
#endif
if (do_compile(mod, ts, in_memory, flags_cstr, flags_cstr_rem, main_path,
- main_buf, id, func_src, mod_src, true, maps_ns, fake_fd_map, perf_events)) {
+ main_buf, id, prog_func_info, mod_src, true, maps_ns,
+ fake_fd_map, perf_events)) {
#if BCC_BACKUP_COMPILE != 1
return -1;
#else
@@ -288,11 +327,12 @@ int ClangLoader::parse(unique_ptr<llvm::Module> *mod, TableStorage &ts,
llvm::errs() << "WARNING: compilation failure, trying with system bpf.h\n";
ts.DeletePrefix(Path({id}));
- func_src.clear();
+ prog_func_info.clear();
mod_src.clear();
fake_fd_map.clear();
if (do_compile(mod, ts, in_memory, flags_cstr, flags_cstr_rem, main_path,
- main_buf, id, func_src, mod_src, false, maps_ns, fake_fd_map, perf_events))
+ main_buf, id, prog_func_info, mod_src, false, maps_ns,
+ fake_fd_map, perf_events))
return -1;
#endif
}
@@ -334,17 +374,14 @@ string get_clang_target(void) {
return string(ret);
}
-int ClangLoader::do_compile(unique_ptr<llvm::Module> *mod, TableStorage &ts,
- bool in_memory,
- const vector<const char *> &flags_cstr_in,
- const vector<const char *> &flags_cstr_rem,
- const std::string &main_path,
- const unique_ptr<llvm::MemoryBuffer> &main_buf,
- const std::string &id, FuncSource &func_src,
- std::string &mod_src, bool use_internal_bpfh,
- const std::string &maps_ns,
- fake_fd_map_def &fake_fd_map,
- std::map<std::string, std::vector<std::string>> &perf_events) {
+int ClangLoader::do_compile(
+ unique_ptr<llvm::Module> *mod, TableStorage &ts, bool in_memory,
+ const vector<const char *> &flags_cstr_in,
+ const vector<const char *> &flags_cstr_rem, const std::string &main_path,
+ const unique_ptr<llvm::MemoryBuffer> &main_buf, const std::string &id,
+ ProgFuncInfo &prog_func_info, std::string &mod_src, bool use_internal_bpfh,
+ const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
+ std::map<std::string, std::vector<std::string>> &perf_events) {
using namespace clang;
vector<const char *> flags_cstr = flags_cstr_in;
@@ -444,7 +481,7 @@ int ClangLoader::do_compile(unique_ptr<llvm::Module> *mod, TableStorage &ts,
// capture the rewritten c file
string out_str1;
llvm::raw_string_ostream os1(out_str1);
- BFrontendAction bact(os1, flags_, ts, id, main_path, func_src, mod_src,
+ BFrontendAction bact(os1, flags_, ts, id, main_path, prog_func_info, mod_src,
maps_ns, fake_fd_map, perf_events);
if (!compiler1.ExecuteAction(bact))
return -1;
@@ -474,27 +511,4 @@ int ClangLoader::do_compile(unique_ptr<llvm::Module> *mod, TableStorage &ts,
return 0;
}
-
-const char * FuncSource::src(const std::string& name) {
- auto src = funcs_.find(name);
- if (src == funcs_.end())
- return "";
- return src->second.src_.data();
-}
-
-const char * FuncSource::src_rewritten(const std::string& name) {
- auto src = funcs_.find(name);
- if (src == funcs_.end())
- return "";
- return src->second.src_rewritten_.data();
-}
-
-void FuncSource::set_src(const std::string& name, const std::string& src) {
- funcs_[name].src_ = src;
-}
-
-void FuncSource::set_src_rewritten(const std::string& name, const std::string& src) {
- funcs_[name].src_rewritten_ = src;
-}
-
} // namespace ebpf
diff --git a/src/cc/frontends/clang/loader.h b/src/cc/frontends/clang/loader.h
index 05db08cb..aa6f9eea 100644
--- a/src/cc/frontends/clang/loader.h
+++ b/src/cc/frontends/clang/loader.h
@@ -16,13 +16,18 @@
#pragma once
+#include <clang/Frontend/CompilerInvocation.h>
+
+#include <functional>
#include <map>
#include <memory>
#include <string>
-#include <clang/Frontend/CompilerInvocation.h>
-
#include "table_storage.h"
+#include "vendor/optional.hpp"
+
+using std::experimental::nullopt;
+using std::experimental::optional;
namespace llvm {
class Module;
@@ -32,21 +37,33 @@ class MemoryBuffer;
namespace ebpf {
-class FuncSource {
- class SourceCode {
- public:
- SourceCode(const std::string& s1 = "", const std::string& s2 = ""): src_(s1), src_rewritten_(s2) {}
- std::string src_;
- std::string src_rewritten_;
- };
- std::map<std::string, SourceCode> funcs_;
+struct FuncInfo {
+ uint8_t *start_ = nullptr;
+ size_t size_ = 0;
+ std::string section_;
+ std::string src_;
+ std::string src_rewritten_;
+ // dummy constructor so emplace() works
+ FuncInfo(int i) {}
+};
+
+class ProgFuncInfo {
public:
- FuncSource() {}
- void clear() { funcs_.clear(); }
- const char * src(const std::string& name);
- const char * src_rewritten(const std::string& name);
- void set_src(const std::string& name, const std::string& src);
- void set_src_rewritten(const std::string& name, const std::string& src);
+ ProgFuncInfo() {}
+ void clear() {
+ funcs_.clear();
+ func_idx_.clear();
+ }
+ optional<FuncInfo &> get_func(std::string name);
+ optional<FuncInfo &> get_func(size_t id);
+ optional<std::string &> func_name(size_t id);
+ optional<FuncInfo &> add_func(std::string name);
+ size_t num_funcs() { return funcs_.size(); }
+ void for_each_func(std::function<void(std::string, FuncInfo &)> cb);
+
+ private:
+ std::map<std::string, FuncInfo> funcs_;
+ std::map<uint32_t, std::string> func_idx_;
};
class ClangLoader {
@@ -55,7 +72,7 @@ class ClangLoader {
~ClangLoader();
int parse(std::unique_ptr<llvm::Module> *mod, TableStorage &ts,
const std::string &file, bool in_memory, const char *cflags[],
- int ncflags, const std::string &id, FuncSource &func_src,
+ int ncflags, const std::string &id, ProgFuncInfo &prog_func_info,
std::string &mod_src, const std::string &maps_ns,
fake_fd_map_def &fake_fd_map,
std::map<std::string, std::vector<std::string>> &perf_events);
@@ -66,10 +83,9 @@ class ClangLoader {
const std::vector<const char *> &flags_cstr_rem,
const std::string &main_path,
const std::unique_ptr<llvm::MemoryBuffer> &main_buf,
- const std::string &id, FuncSource &func_src,
+ const std::string &id, ProgFuncInfo &prog_func_info,
std::string &mod_src, bool use_internal_bpfh,
- const std::string &maps_ns,
- fake_fd_map_def &fake_fd_map,
+ const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
std::map<std::string, std::vector<std::string>> &perf_events);
void add_remapped_includes(clang::CompilerInvocation& invocation);
void add_main_input(clang::CompilerInvocation& invocation,
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index bbd61615..c93f82f6 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -289,7 +289,15 @@ static struct bpf_helper helpers[] = {
{"strncmp", "5.17"},
{"get_func_arg", "5.17"},
{"get_func_ret", "5.17"},
- {"get_func_arg_cnt", "5.17"},
+ {"get_func_ret", "5.17"},
+ {"get_retval", "5.18"},
+ {"set_retval", "5.18"},
+ {"xdp_get_buff_len", "5.18"},
+ {"xdp_load_bytes", "5.18"},
+ {"xdp_store_bytes", "5.18"},
+ {"copy_from_user_task", "5.18"},
+ {"skb_set_tstamp", "5.18"},
+ {"ima_file_hash", "5.18"},
};
static uint64_t ptr_to_u64(void *ptr)
@@ -1386,7 +1394,16 @@ int bpf_attach_raw_tracepoint(int progfd, const char *tp_name)
#ifndef MINIMAL_LIBBPF
bool bpf_has_kernel_btf(void)
{
- return libbpf_find_vmlinux_btf_id("bpf_prog_put", 0) > 0;
+ struct btf *btf;
+ int err;
+
+ btf = btf__parse_raw("/sys/kernel/btf/vmlinux");
+ err = libbpf_get_error(btf);
+ if (err)
+ return false;
+
+ btf__free(btf);
+ return true;
}
int kernel_struct_has_field(const char *struct_name, const char *field_name)
diff --git a/src/cc/perf_reader.c b/src/cc/perf_reader.c
index dedb11d2..f4c24fdd 100644
--- a/src/cc/perf_reader.c
+++ b/src/cc/perf_reader.c
@@ -93,7 +93,7 @@ int perf_reader_mmap(struct perf_reader *reader) {
return -1;
}
- reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, reader->fd, 0);
+ reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, reader->fd, 0);
if (reader->base == MAP_FAILED) {
perror("mmap");
return -1;
@@ -237,6 +237,14 @@ int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout)
return 0;
}
+int perf_reader_consume(int num_readers, struct perf_reader **readers) {
+ int i;
+ for (i = 0; i < num_readers; ++i) {
+ perf_reader_event_read(readers[i]);
+ }
+ return 0;
+}
+
void perf_reader_set_fd(struct perf_reader *reader, int fd) {
reader->fd = fd;
}
diff --git a/src/cc/perf_reader.h b/src/cc/perf_reader.h
index dbe9cfb8..278b8850 100644
--- a/src/cc/perf_reader.h
+++ b/src/cc/perf_reader.h
@@ -32,6 +32,7 @@ void perf_reader_free(void *ptr);
int perf_reader_mmap(struct perf_reader *reader);
void perf_reader_event_read(struct perf_reader *reader);
int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout);
+int perf_reader_consume(int num_readers, struct perf_reader **readers);
int perf_reader_fd(struct perf_reader *reader);
void perf_reader_set_fd(struct perf_reader *reader, int fd);
diff --git a/src/cc/usdt.h b/src/cc/usdt.h
index 5f125882..a3d9bfe5 100644
--- a/src/cc/usdt.h
+++ b/src/cc/usdt.h
@@ -166,6 +166,22 @@ private:
X64_REG_14,
X64_REG_15,
X64_REG_RIP,
+ X64_REG_XMM0,
+ X64_REG_XMM1,
+ X64_REG_XMM2,
+ X64_REG_XMM3,
+ X64_REG_XMM4,
+ X64_REG_XMM5,
+ X64_REG_XMM6,
+ X64_REG_XMM7,
+ X64_REG_XMM8,
+ X64_REG_XMM9,
+ X64_REG_XMM10,
+ X64_REG_XMM11,
+ X64_REG_XMM12,
+ X64_REG_XMM13,
+ X64_REG_XMM14,
+ X64_REG_XMM15,
};
struct RegInfo {
diff --git a/src/cc/usdt/usdt.cc b/src/cc/usdt/usdt.cc
index e3d0c441..84f80768 100644
--- a/src/cc/usdt/usdt.cc
+++ b/src/cc/usdt/usdt.cc
@@ -175,6 +175,11 @@ bool Probe::usdt_getarg(std::ostream &stream, const std::string& probe_func) {
if (arg_count == 0)
return true;
+ uint64_t page_size = sysconf(_SC_PAGESIZE);
+ std::unordered_set<int> page_offsets;
+ for (Location &location : locations_)
+ page_offsets.insert(location.address_ % page_size);
+
for (size_t arg_n = 0; arg_n < arg_count; ++arg_n) {
std::string ctype = largest_arg_type(arg_n);
std::string cptr = tfm::format("*((%s *)dest)", ctype);
@@ -193,15 +198,22 @@ bool Probe::usdt_getarg(std::ostream &stream, const std::string& probe_func) {
return false;
stream << "\n return 0;\n}\n";
} else {
- stream << " switch(PT_REGS_IP(ctx)) {\n";
+ if (page_offsets.size() == locations_.size())
+ tfm::format(stream, " switch (PT_REGS_IP(ctx) %% 0x%xULL) {\n", page_size);
+ else
+ stream << " switch (PT_REGS_IP(ctx)) {\n";
for (Location &location : locations_) {
- uint64_t global_address;
+ if (page_offsets.size() == locations_.size()) {
+ tfm::format(stream, " case 0x%xULL: ", location.address_ % page_size);
+ } else {
+ uint64_t global_address;
- if (!resolve_global_address(&global_address, location.bin_path_,
- location.address_))
- return false;
+ if (!resolve_global_address(&global_address, location.bin_path_,
+ location.address_))
+ return false;
- tfm::format(stream, " case 0x%xULL: ", global_address);
+ tfm::format(stream, " case 0x%xULL: ", global_address);
+ }
if (!location.arguments_[arg_n].assign_to_local(stream, cptr, location.bin_path_,
pid_))
return false;
diff --git a/src/cc/usdt/usdt_args.cc b/src/cc/usdt/usdt_args.cc
index c3384e16..88555c3e 100644
--- a/src/cc/usdt/usdt_args.cc
+++ b/src/cc/usdt/usdt_args.cc
@@ -69,7 +69,13 @@ bool Argument::assign_to_local(std::ostream &stream,
}
if (!deref_offset_) {
- tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_);
+ if(base_register_name_->substr(0,3) == "xmm") {
+ // TODO: When we can read xmm registers from BPF, update this to read
+ // the actual value
+ tfm::format(stream, "%s = 0;", local_name);
+ } else {
+ tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_);
+ }
// Put a compiler barrier to prevent optimization
// like llvm SimplifyCFG SinkThenElseCodeToEnd
// Volatile marking is not sufficient to prevent such optimization.
@@ -532,6 +538,23 @@ const std::unordered_map<std::string, ArgumentParser_x64::RegInfo>
{"r15w", {X64_REG_15, 2}}, {"r15b", {X64_REG_15, 1}},
{"rip", {X64_REG_RIP, 8}},
+
+ {"xmm0", {X64_REG_XMM0, 16}},
+ {"xmm1", {X64_REG_XMM1, 16}},
+ {"xmm2", {X64_REG_XMM2, 16}},
+ {"xmm3", {X64_REG_XMM3, 16}},
+ {"xmm4", {X64_REG_XMM4, 16}},
+ {"xmm5", {X64_REG_XMM5, 16}},
+ {"xmm6", {X64_REG_XMM6, 16}},
+ {"xmm7", {X64_REG_XMM7, 16}},
+ {"xmm8", {X64_REG_XMM8, 16}},
+ {"xmm9", {X64_REG_XMM9, 16}},
+ {"xmm10", {X64_REG_XMM10, 16}},
+ {"xmm11", {X64_REG_XMM11, 16}},
+ {"xmm12", {X64_REG_XMM12, 16}},
+ {"xmm13", {X64_REG_XMM13, 16}},
+ {"xmm14", {X64_REG_XMM14, 16}},
+ {"xmm15", {X64_REG_XMM15, 16}},
};
void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) {
@@ -590,6 +613,56 @@ void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) {
case X64_REG_RIP:
*norm = "ip";
break;
+
+ case X64_REG_XMM0:
+ *norm = "xmm0";
+ break;
+ case X64_REG_XMM1:
+ *norm = "xmm1";
+ break;
+ case X64_REG_XMM2:
+ *norm = "xmm2";
+ break;
+ case X64_REG_XMM3:
+ *norm = "xmm3";
+ break;
+ case X64_REG_XMM4:
+ *norm = "xmm4";
+ break;
+ case X64_REG_XMM5:
+ *norm = "xmm5";
+ break;
+ case X64_REG_XMM6:
+ *norm = "xmm6";
+ break;
+ case X64_REG_XMM7:
+ *norm = "xmm7";
+ break;
+ case X64_REG_XMM8:
+ *norm = "xmm8";
+ break;
+ case X64_REG_XMM9:
+ *norm = "xmm9";
+ break;
+ case X64_REG_XMM10:
+ *norm = "xmm10";
+ break;
+ case X64_REG_XMM11:
+ *norm = "xmm11";
+ break;
+ case X64_REG_XMM12:
+ *norm = "xmm12";
+ break;
+ case X64_REG_XMM13:
+ *norm = "xmm13";
+ break;
+ case X64_REG_XMM14:
+ *norm = "xmm14";
+ break;
+ case X64_REG_XMM15:
+ *norm = "xmm15";
+ break;
+
}
}
diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py
index 2ff5cf02..1118698e 100644
--- a/src/python/bcc/__init__.py
+++ b/src/python/bcc/__init__.py
@@ -957,7 +957,8 @@ class BPF(object):
ct.cast(None, ct.POINTER(bcc_symbol_option)),
ct.byref(sym),
) < 0:
- raise Exception("could not determine address of symbol %s" % symname)
+ raise Exception("could not determine address of symbol %s in %s"
+ % (symname.decode(), module.decode()))
new_addr = sym.offset + sym_off
module_path = ct.cast(sym.module, ct.c_char_p).value
lib.bcc_procutils_free(sym.module)
@@ -1667,6 +1668,18 @@ class BPF(object):
readers[i] = v
lib.perf_reader_poll(len(readers), readers, timeout)
+ def perf_buffer_consume(self):
+ """perf_buffer_consume(self)
+
+ Consume all open perf buffers, regardless of whether or not
+ they currently contain events data. Necessary to catch 'remainder'
+ events when wakeup_events > 1 is set in open_perf_buffer
+ """
+ readers = (ct.c_void_p * len(self.perf_buffers))()
+ for i, v in enumerate(self.perf_buffers.values()):
+ readers[i] = v
+ lib.perf_reader_consume(len(readers), readers)
+
def kprobe_poll(self, timeout = -1):
"""kprobe_poll(self)
diff --git a/src/python/bcc/libbcc.py b/src/python/bcc/libbcc.py
index f9b83b3c..ca5584c6 100644
--- a/src/python/bcc/libbcc.py
+++ b/src/python/bcc/libbcc.py
@@ -26,6 +26,8 @@ lib.bpf_module_create_c.argtypes = [ct.c_char_p, ct.c_uint,
lib.bpf_module_create_c_from_string.restype = ct.c_void_p
lib.bpf_module_create_c_from_string.argtypes = [ct.c_char_p, ct.c_uint,
ct.POINTER(ct.c_char_p), ct.c_int, ct.c_bool, ct.c_char_p]
+lib.bpf_module_rw_engine_enabled.restype = ct.c_bool
+lib.bpf_module_rw_engine_enabled.argtypes = None
lib.bpf_module_destroy.restype = None
lib.bpf_module_destroy.argtypes = [ct.c_void_p]
lib.bpf_module_license.restype = ct.c_char_p
@@ -147,6 +149,8 @@ lib.bpf_open_perf_event.restype = ct.c_int
lib.bpf_open_perf_event.argtypes = [ct.c_uint, ct.c_ulonglong, ct.c_int, ct.c_int]
lib.perf_reader_poll.restype = ct.c_int
lib.perf_reader_poll.argtypes = [ct.c_int, ct.POINTER(ct.c_void_p), ct.c_int]
+lib.perf_reader_consume.restype = ct.c_int
+lib.perf_reader_consume.argtypes = [ct.c_int, ct.POINTER(ct.c_void_p)]
lib.perf_reader_free.restype = None
lib.perf_reader_free.argtypes = [ct.c_void_p]
lib.perf_reader_fd.restype = int
diff --git a/tests/cc/test_bpf_table.cc b/tests/cc/test_bpf_table.cc
index 2d5a5644..43bf28b0 100644
--- a/tests/cc/test_bpf_table.cc
+++ b/tests/cc/test_bpf_table.cc
@@ -21,7 +21,7 @@
#include "BPF.h"
#include "catch.hpp"
-TEST_CASE("test bpf table", "[bpf_table]") {
+TEST_CASE("test bpf table", ebpf::bpf_module_rw_engine_enabled() ? "[bpf_table]" : "[bpf_table][!mayfail]") {
const std::string BPF_PROGRAM = R"(
BPF_TABLE("hash", int, int, myhash, 128);
)";
@@ -92,7 +92,7 @@ TEST_CASE("test bpf table", "[bpf_table]") {
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
-TEST_CASE("test bpf percpu tables", "[bpf_percpu_table]") {
+TEST_CASE("test bpf percpu tables", ebpf::bpf_module_rw_engine_enabled() ? "[bpf_percpu_table]" : "[bpf_percpu_table][!mayfail]") {
const std::string BPF_PROGRAM = R"(
BPF_PERCPU_HASH(myhash, int, u64, 128);
)";
diff --git a/tests/cc/test_pinned_table.cc b/tests/cc/test_pinned_table.cc
index 265a8be7..e478b40e 100644
--- a/tests/cc/test_pinned_table.cc
+++ b/tests/cc/test_pinned_table.cc
@@ -47,7 +47,7 @@ TEST_CASE("test pinned table", "[pinned_table]") {
// test table access
{
const std::string BPF_PROGRAM = R"(
- BPF_TABLE_PINNED("hash", u64, u64, ids, 1024, "/sys/fs/bpf/test_pinned_table");
+ BPF_TABLE_PINNED("hash", u64, u64, ids, 0, "/sys/fs/bpf/test_pinned_table", BPF_F_NO_PREALLOC);
)";
ebpf::BPF bpf;
@@ -85,3 +85,65 @@ TEST_CASE("test pinned table", "[pinned_table]") {
}
}
#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0)
+TEST_CASE("test pinned sk_storage table", "[pinned_sk_storage_table]") {
+ bool mounted = false;
+ if (system("mount | grep /sys/fs/bpf")) {
+ REQUIRE(system("mkdir -p /sys/fs/bpf") == 0);
+ REQUIRE(system("mount -o nosuid,nodev,noexec,mode=700 -t bpf bpf /sys/fs/bpf") == 0);
+ mounted = true;
+ }
+ // prepare test by pinning table to bpffs
+ {
+ const std::string BPF_PROGRAM = R"(
+ BPF_SK_STORAGE(sk_stg, __u64);
+ int test(struct __sk_buff *skb) { return 0; }
+ )";
+
+ ebpf::BPF bpf;
+ ebpf::StatusTuple res(0);
+ res = bpf.init(BPF_PROGRAM);
+ REQUIRE(res.ok());
+
+ REQUIRE(bpf_obj_pin(bpf.get_sk_storage_table<unsigned long long>("sk_stg").get_fd(), "/sys/fs/bpf/test_pinned_table") == 0);
+ }
+
+ // exercise <pinned_map>.sk_storage_get().
+ {
+ const std::string BPF_PROGRAM = R"(
+ BPF_TABLE_PINNED("sk_storage", __u32, __u64, sk_stg, 0, "/sys/fs/bpf/test_pinned_table");
+ int test(struct __sk_buff *skb) {
+ struct bpf_sock *sk;
+ __u64 *val;
+
+ sk = skb->sk;
+ if (!sk)
+ return 0;
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return 0;
+
+ val = sk_stg.sk_storage_get(sk, NULL, BPF_SK_STORAGE_GET_F_CREATE);
+ if (!val)
+ return 0;
+
+ return 1;
+ }
+ )";
+
+ ebpf::BPF bpf;
+ ebpf::StatusTuple res(0);
+ res = bpf.init(BPF_PROGRAM);
+ REQUIRE(res.ok());
+ int prog_fd;
+ res = bpf.load_func("test", BPF_PROG_TYPE_CGROUP_SKB, prog_fd);
+ REQUIRE(res.ok());
+ }
+
+ unlink("/sys/fs/bpf/test_pinned_table");
+ if (mounted) {
+ REQUIRE(umount("/sys/fs/bpf") == 0);
+ }
+}
+#endif
diff --git a/tests/python/test_clang.py b/tests/python/test_clang.py
index 7bf12cc3..519e5021 100755
--- a/tests/python/test_clang.py
+++ b/tests/python/test_clang.py
@@ -3,6 +3,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc import BPF
+from bcc.libbcc import lib
import ctypes as ct
from unittest import main, skipUnless, TestCase
from utils import kernel_version_ge
@@ -143,6 +144,7 @@ int do_completion(struct pt_regs *ctx, struct request *req) {
b = BPF(text=text, debug=0)
fns = b.load_funcs(BPF.KPROBE)
+ @skipUnless(lib.bpf_module_rw_engine_enabled(), "requires enabled rwengine")
def test_sscanf(self):
text = """
BPF_HASH(stats, int, struct { u64 a; u64 b; u64 c:36; u64 d:28; struct { u32 a; u32 b; } s; }, 10);
@@ -164,6 +166,7 @@ int foo(void *ctx) {
self.assertEqual(l.s.a, 5)
self.assertEqual(l.s.b, 6)
+ @skipUnless(lib.bpf_module_rw_engine_enabled(), "requires enabled rwengine")
def test_sscanf_array(self):
text = """
BPF_HASH(stats, int, struct { u32 a[3]; u32 b; }, 10);
@@ -180,6 +183,7 @@ BPF_HASH(stats, int, struct { u32 a[3]; u32 b; }, 10);
self.assertEqual(l.a[2], 3)
self.assertEqual(l.b, 4)
+ @skipUnless(lib.bpf_module_rw_engine_enabled(), "requires enabled rwengine")
def test_sscanf_string(self):
text = """
struct Symbol {
diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py
index ebc17285..879bdb14 100755
--- a/tests/python/test_tools_smoke.py
+++ b/tests/python/test_tools_smoke.py
@@ -358,6 +358,9 @@ class SmokeTests(TestCase):
def test_tcptop(self):
self.run_with_duration("tcptop.py 1 1")
+ def test_tcpcong(self):
+ self.run_with_duration("tcpcong.py 1 1")
+
def test_tplist(self):
self.run_with_duration("tplist.py -p %d" % os.getpid())
diff --git a/tools/bashreadline.py b/tools/bashreadline.py
index 908a1451..3e189976 100755
--- a/tools/bashreadline.py
+++ b/tools/bashreadline.py
@@ -68,11 +68,11 @@ b = BPF(text=bpf_text)
b.attach_uretprobe(name=name, sym="readline", fn_name="printret")
# header
-print("%-9s %-6s %s" % ("TIME", "PID", "COMMAND"))
+print("%-9s %-7s %s" % ("TIME", "PID", "COMMAND"))
def print_event(cpu, data, size):
event = b["events"].event(data)
- print("%-9s %-6d %s" % (strftime("%H:%M:%S"), event.pid,
+ print("%-9s %-7d %s" % (strftime("%H:%M:%S"), event.pid,
event.str.decode('utf-8', 'replace')))
b["events"].open_perf_buffer(print_event)
diff --git a/tools/bindsnoop.py b/tools/bindsnoop.py
index ac3a8aa0..07503352 100755
--- a/tools/bindsnoop.py
+++ b/tools/bindsnoop.py
@@ -27,7 +27,7 @@
# 14-Feb-2020 Pavel Dubovitsky Created this.
from __future__ import print_function, absolute_import, unicode_literals
-from bcc import BPF, DEBUG_SOURCE
+from bcc import BPF
from bcc.containers import filter_by_containers
from bcc.utils import printb
import argparse
@@ -243,10 +243,14 @@ static int bindsnoop_return(struct pt_regs *ctx, short ipver)
opts.fields.reuseport = bitfield >> 4 & 0x01;
// workaround for reading the sk_protocol bitfield (from tcpaccept.py):
- u8 protocol;
+ u16 protocol;
int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
- if (sk_lingertime_offset - gso_max_segs_offset == 4)
+
+ // Since kernel v5.6 sk_protocol has its own u16 field
+ if (sk_lingertime_offset - gso_max_segs_offset == 2)
+ protocol = skp->sk_protocol;
+ else if (sk_lingertime_offset - gso_max_segs_offset == 4)
// 4.10+ with little endian
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
protocol = *(u8 *)((u64)&skp->sk_gso_max_segs - 3);
diff --git a/tools/biolatency.py b/tools/biolatency.py
index f4e2c9ea..63a2a572 100755
--- a/tools/biolatency.py
+++ b/tools/biolatency.py
@@ -4,7 +4,7 @@
# biolatency Summarize block device I/O latency as a histogram.
# For Linux, uses BCC, eBPF.
#
-# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [-e] [interval] [count]
+# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [-F] [-e] [-j] [interval] [count]
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
@@ -64,7 +64,7 @@ if args.flags and args.disks:
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
typedef struct disk_key {
char disk[DISK_NAME_LEN];
@@ -128,12 +128,16 @@ storage_str = ""
store_str = ""
if args.disks:
storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
- store_str += """
+ disks_str = """
disk_key_t key = {.slot = bpf_log2l(delta)};
- void *__tmp = (void *)req->rq_disk->disk_name;
+ void *__tmp = (void *)req->__RQ_DISK__->disk_name;
bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
dist.atomic_increment(key);
"""
+ if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
+ store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
+ else:
+ store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
elif args.flags:
storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
store_str += """
@@ -184,6 +188,12 @@ else:
if not args.json:
print("Tracing block device I/O... Hit Ctrl-C to end.")
+def disk_print(s):
+ disk = s.decode('utf-8', 'replace')
+ if not disk:
+ disk = "<unknown>"
+ return disk
+
# see blk_fill_rwbs():
req_opf = {
0: "Read",
@@ -252,9 +262,8 @@ while (1):
if args.flags:
dist.print_json_hist(label, "flags", flags_print)
-
else:
- dist.print_json_hist(label)
+ dist.print_json_hist(label, "disk", disk_print)
else:
if args.timestamp:
@@ -263,7 +272,7 @@ while (1):
if args.flags:
dist.print_log2_hist(label, "flags", flags_print)
else:
- dist.print_log2_hist(label, "disk")
+ dist.print_log2_hist(label, "disk", disk_print)
if args.extension:
total = extension[0].total
counts = extension[0].count
diff --git a/tools/biolatpcts.py b/tools/biolatpcts.py
index a2f59592..ea8b1ce6 100755
--- a/tools/biolatpcts.py
+++ b/tools/biolatpcts.py
@@ -56,6 +56,7 @@ parser.add_argument('--verbose', '-v', action='count', default = 0)
bpf_source = """
#include <linux/blk_types.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/time64.h>
BPF_PERCPU_ARRAY(rwdf_100ms, u64, 400);
@@ -71,9 +72,9 @@ void kprobe_blk_account_io_done(struct pt_regs *ctx, struct request *rq, u64 now
if (!rq->__START_TIME_FIELD__)
return;
- if (!rq->rq_disk ||
- rq->rq_disk->major != __MAJOR__ ||
- rq->rq_disk->first_minor != __MINOR__)
+ if (!rq->__RQ_DISK__ ||
+ rq->__RQ_DISK__->major != __MAJOR__ ||
+ rq->__RQ_DISK__->first_minor != __MINOR__)
return;
cmd_flags = rq->cmd_flags;
@@ -141,6 +142,11 @@ bpf_source = bpf_source.replace('__START_TIME_FIELD__', start_time_field)
bpf_source = bpf_source.replace('__MAJOR__', str(major))
bpf_source = bpf_source.replace('__MINOR__', str(minor))
+if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
+ bpf_source = bpf_source.replace('__RQ_DISK__', 'rq_disk')
+else:
+ bpf_source = bpf_source.replace('__RQ_DISK__', 'q->disk')
+
bpf = BPF(text=bpf_source)
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
bpf.attach_kprobe(event="__blk_account_io_done", fn_name="kprobe_blk_account_io_done")
diff --git a/tools/biopattern.py b/tools/biopattern.py
new file mode 100755
index 00000000..9bfc0776
--- /dev/null
+++ b/tools/biopattern.py
@@ -0,0 +1,140 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# biopattern - Identify random/sequential disk access patterns.
+# For Linux, uses BCC, eBPF.
+#
+# Copyright (c) 2022 Rocky Xing.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 21-Feb-2022 Rocky Xing Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+import argparse
+import os
+
+examples = """examples:
+ ./biopattern # show block device I/O pattern.
+ ./biopattern 1 10 # print 1 second summaries, 10 times
+ ./biopattern -d sdb # show sdb only
+"""
+parser = argparse.ArgumentParser(
+ description="Show block device I/O pattern.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-d", "--disk", type=str,
+ help="Trace this disk only")
+parser.add_argument("interval", nargs="?", default=99999999,
+ help="Output interval in seconds")
+parser.add_argument("count", nargs="?", default=99999999,
+ help="Number of outputs")
+args = parser.parse_args()
+countdown = int(args.count)
+
+bpf_text="""
+struct counter {
+ u64 last_sector;
+ u64 bytes;
+ u32 sequential;
+ u32 random;
+};
+
+BPF_HASH(counters, u32, struct counter);
+
+TRACEPOINT_PROBE(block, block_rq_complete)
+{
+ struct counter *counterp;
+ struct counter zero = {};
+ u32 dev = args->dev;
+ u64 sector = args->sector;
+ u32 nr_sector = args->nr_sector;
+
+ DISK_FILTER
+
+ counterp = counters.lookup_or_try_init(&dev, &zero);
+ if (counterp == 0) {
+ return 0;
+ }
+
+ if (counterp->last_sector) {
+ if (counterp->last_sector == sector) {
+ __sync_fetch_and_add(&counterp->sequential, 1);
+ } else {
+ __sync_fetch_and_add(&counterp->random, 1);
+ }
+ __sync_fetch_and_add(&counterp->bytes, nr_sector * 512);
+ }
+ counterp->last_sector = sector + nr_sector;
+
+ return 0;
+}
+"""
+
+dev_minor_bits = 20
+
+def mkdev(major, minor):
+ return (major << dev_minor_bits) | minor
+
+
+partitions = {}
+
+with open("/proc/partitions", 'r') as f:
+ lines = f.readlines()
+ for line in lines[2:]:
+ words = line.strip().split()
+ major = int(words[0])
+ minor = int(words[1])
+ part_name = words[3]
+ partitions[mkdev(major, minor)] = part_name
+
+if args.disk is not None:
+ disk_path = os.path.join('/dev', args.disk)
+ if os.path.exists(disk_path) == False:
+ print("no such disk '%s'" % args.disk)
+ exit(1)
+
+ stat_info = os.stat(disk_path)
+ major = os.major(stat_info.st_rdev)
+ minor = os.minor(stat_info.st_rdev)
+ bpf_text = bpf_text.replace('DISK_FILTER',
+ 'if (dev != %s) { return 0; }' % mkdev(major, minor))
+else:
+ bpf_text = bpf_text.replace('DISK_FILTER', '')
+
+b = BPF(text=bpf_text)
+
+exiting = 0 if args.interval else 1
+counters = b.get_table("counters")
+
+print("%-9s %-7s %5s %5s %8s %10s" %
+ ("TIME", "DISK", "%RND", "%SEQ", "COUNT", "KBYTES"))
+
+while True:
+ try:
+ sleep(int(args.interval))
+ except KeyboardInterrupt:
+ exiting = 1
+
+ for k, v in counters.items():
+ total = v.random + v.sequential
+ if total == 0:
+ continue
+
+ part_name = partitions.get(k.value, "Unknown")
+
+ print("%-9s %-7s %5d %5d %8d %10d" % (
+ strftime("%H:%M:%S"),
+ part_name,
+ v.random * 100 / total,
+ v.sequential * 100 / total,
+ total,
+ v.bytes / 1024))
+
+ counters.clear()
+
+ countdown -= 1
+ if exiting or countdown == 0:
+ exit()
+
diff --git a/tools/biopattern_example.txt b/tools/biopattern_example.txt
new file mode 100644
index 00000000..ac3e5c6e
--- /dev/null
+++ b/tools/biopattern_example.txt
@@ -0,0 +1,45 @@
+Demonstrations of biopattern, the Linux eBPF/bcc version.
+
+
+biopattern identifies random/sequential disk access patterns. Example:
+
+# ./biopattern.py
+TIME DISK %RND %SEQ COUNT KBYTES
+22:03:51 vdb 0 99 788 3184
+22:03:51 Unknown 0 100 4 0
+22:03:51 vda 85 14 21 488
+[...]
+
+
+The -d option only print the matched disk.
+
+# ./biopattern.py -d vdb 1 10
+TIME DISK %RND %SEQ COUNT KBYTES
+22:12:57 vdb 0 99 193 772
+22:12:58 vdb 0 100 1119 4476
+22:12:59 vdb 0 100 1126 4504
+22:13:00 vdb 0 100 1009 4036
+22:13:01 vdb 0 100 958 3832
+22:13:02 vdb 0 99 957 3856
+22:13:03 vdb 0 100 1130 4520
+22:13:04 vdb 0 100 1051 4204
+22:13:05 vdb 0 100 1158 4632
+[...]
+
+
+USAGE message:
+
+Show block device I/O pattern.
+
+positional arguments:
+ interval Output interval in seconds
+ count Number of outputs
+
+optional arguments:
+ -h, --help show this help message and exit
+ -d DISK, --disk DISK Trace this disk only
+
+examples:
+ ./biopattern # show block device I/O pattern.
+ ./biopattern 1 10 # print 1 second summaries, 10 times
+ ./biopattern -d sdb # show sdb only
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
index 2b954ac9..5e7c6e6f 100755
--- a/tools/biosnoop.py
+++ b/tools/biosnoop.py
@@ -37,7 +37,7 @@ debug = 0
# define BPF program
bpf_text="""
#include <uapi/linux/ptrace.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
// for saving the timestamp and __data_len of each request
struct start_req_t {
@@ -125,7 +125,7 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
data.pid = valp->pid;
data.sector = req->__sector;
bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
- struct gendisk *rq_disk = req->rq_disk;
+ struct gendisk *rq_disk = req->__RQ_DISK__;
bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
rq_disk->disk_name);
}
@@ -156,6 +156,10 @@ if args.queue:
bpf_text = bpf_text.replace('##QUEUE##', '1')
else:
bpf_text = bpf_text.replace('##QUEUE##', '0')
+if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
+else:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
@@ -176,7 +180,7 @@ else:
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
# header
-print("%-11s %-14s %-6s %-7s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
+print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
"DISK", "T", "SECTOR", "BYTES"), end="")
if args.queue:
print("%7s " % ("QUE(ms)"), end="")
@@ -202,10 +206,13 @@ def print_event(cpu, data, size):
delta = float(event.ts) - start_ts
- print("%-11.6f %-14.14s %-6s %-7s %-1s %-10s %-7s" % (
+ disk_name = event.disk_name.decode('utf-8', 'replace')
+ if not disk_name:
+ disk_name = '<unknown>'
+
+ print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % (
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
- event.disk_name.decode('utf-8', 'replace'), rwflg, event.sector,
- event.len), end="")
+ disk_name, rwflg, event.sector, event.len), end="")
if args.queue:
print("%7.2f " % (float(event.qdelta) / 1000000), end="")
print("%7.2f" % (float(event.delta) / 1000000))
diff --git a/tools/biotop.py b/tools/biotop.py
index eac4dab9..3c9c071c 100755
--- a/tools/biotop.py
+++ b/tools/biotop.py
@@ -4,7 +4,7 @@
# biotop block device (disk) I/O by process.
# For Linux, uses BCC, eBPF.
#
-# USAGE: biotop.py [-h] [-C] [-r MAXROWS] [interval] [count]
+# USAGE: biotop.py [-h] [-C] [-r MAXROWS] [-p PID] [interval] [count]
#
# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O
# request, as well as a starting timestamp for calculating I/O latency.
@@ -13,6 +13,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 06-Feb-2016 Brendan Gregg Created this.
+# 17-Mar-2022 Rocky Xing Added PID filter support.
from __future__ import print_function
from bcc import BPF
@@ -24,6 +25,7 @@ from subprocess import call
examples = """examples:
./biotop # block device I/O top, 1 second refresh
./biotop -C # don't clear the screen
+ ./biotop -p 181 # only trace PID 181
./biotop 5 # 5 second summaries
./biotop 5 10 # 5 second summaries, 10 times only
"""
@@ -35,6 +37,8 @@ parser.add_argument("-C", "--noclear", action="store_true",
help="don't clear the screen")
parser.add_argument("-r", "--maxrows", default=20,
help="maximum rows to print, default 20")
+parser.add_argument("-p", "--pid", type=int, metavar="PID",
+ help="trace this PID only")
parser.add_argument("interval", nargs="?", default=1,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
@@ -54,7 +58,7 @@ diskstats = "/proc/diskstats"
# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
// for saving the timestamp and __data_len of each request
struct start_req_t {
@@ -92,9 +96,14 @@ BPF_HASH(counts, struct info_t, struct val_t);
int trace_pid_start(struct pt_regs *ctx, struct request *req)
{
struct who_t who = {};
+ u32 pid;
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
- who.pid = bpf_get_current_pid_tgid() >> 32;
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (FILTER_PID)
+ return 0;
+
+ who.pid = pid;
whobyreq.update(&req, &who);
}
@@ -124,13 +133,25 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
}
struct who_t *whop;
+ u32 pid;
+
+ whop = whobyreq.lookup(&req);
+ pid = whop != 0 ? whop->pid : 0;
+ if (FILTER_PID) {
+ start.delete(&req);
+ if (whop != 0) {
+ whobyreq.delete(&req);
+ }
+ return 0;
+ }
+
struct val_t *valp, zero = {};
u64 delta_us = (bpf_ktime_get_ns() - startp->ts) / 1000;
// setup info_t key
struct info_t info = {};
- info.major = req->rq_disk->major;
- info.minor = req->rq_disk->first_minor;
+ info.major = req->__RQ_DISK__->major;
+ info.minor = req->__RQ_DISK__->first_minor;
/*
* The following deals with a kernel version change (in mainline 4.7, although
* it may be backported to earlier kernels) with how block request write flags
@@ -146,7 +167,6 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
#endif
- whop = whobyreq.lookup(&req);
if (whop == 0) {
// missed pid who, save stats as pid 0
valp = counts.lookup_or_try_init(&info, &zero);
@@ -174,6 +194,16 @@ if args.ebpf:
print(bpf_text)
exit()
+if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
+else:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
+
+if args.pid is not None:
+ bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid)
+else:
+ bpf_text = bpf_text.replace('FILTER_PID', '0')
+
b = BPF(text=bpf_text)
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
@@ -211,7 +241,7 @@ while 1:
print()
with open(loadavg) as stats:
print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
- print("%-6s %-16s %1s %-3s %-3s %-8s %5s %7s %6s" % ("PID", "COMM",
+ print("%-7s %-16s %1s %-3s %-3s %-8s %5s %7s %6s" % ("PID", "COMM",
"D", "MAJ", "MIN", "DISK", "I/O", "Kbytes", "AVGms"))
# by-PID output
@@ -229,7 +259,7 @@ while 1:
# print line
avg_ms = (float(v.us) / 1000) / v.io
- print("%-6d %-16s %1s %-3d %-3d %-8s %5s %7s %6.2f" % (k.pid,
+ print("%-7d %-16s %1s %-3d %-3d %-8s %5s %7s %6.2f" % (k.pid,
k.name.decode('utf-8', 'replace'), "W" if k.rwflag else "R",
k.major, k.minor, diskname, v.io, v.bytes / 1024, avg_ms))
diff --git a/tools/btrfsdist.py b/tools/btrfsdist.py
index 72ea304a..a9bf6e49 100755
--- a/tools/btrfsdist.py
+++ b/tools/btrfsdist.py
@@ -231,7 +231,7 @@ while (1):
if args.interval and (not args.notimestamp):
print(strftime("%H:%M:%S:"))
- dist.print_log2_hist(label, "operation")
+ dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode)
dist.clear()
countdown -= 1
diff --git a/tools/btrfsslower.py b/tools/btrfsslower.py
index 9e46243a..6de02e07 100755
--- a/tools/btrfsslower.py
+++ b/tools/btrfsslower.py
@@ -310,7 +310,7 @@ def print_event(cpu, data, size):
type, event.size, event.offset, event.delta_us,
event.file.decode('utf-8', 'replace')))
return
- print("%-8s %-14.14s %-6s %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
+ print("%-8s %-14.14s %-7d %1s %-7s %-8d %7.2f %s" % (strftime("%H:%M:%S"),
event.task.decode('utf-8', 'replace'), event.pid, type, event.size,
event.offset / 1024, float(event.delta_us) / 1000,
event.file.decode('utf-8', 'replace')))
@@ -336,7 +336,7 @@ else:
print("Tracing btrfs operations")
else:
print("Tracing btrfs operations slower than %d ms" % min_ms)
- print("%-8s %-14s %-6s %1s %-7s %-8s %7s %s" % ("TIME", "COMM", "PID", "T",
+ print("%-8s %-14s %-7s %1s %-7s %-8s %7s %s" % ("TIME", "COMM", "PID", "T",
"BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
# read events
diff --git a/tools/cachetop.py b/tools/cachetop.py
index 7c02455e..d02b72b8 100755
--- a/tools/cachetop.py
+++ b/tools/cachetop.py
@@ -11,6 +11,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 13-Jul-2016 Emmanuel Bretelle first version
+# 17-Mar-2022 Rocky Xing Added PID filter support.
from __future__ import absolute_import
from __future__ import division
@@ -152,12 +153,15 @@ def handle_loop(stdscr, args):
BPF_HASH(counts, struct key_t);
int do_count(struct pt_regs *ctx) {
+ u32 pid = bpf_get_current_pid_tgid() >> 32;
+ if (FILTER_PID)
+ return 0;
+
struct key_t key = {};
- u64 pid = bpf_get_current_pid_tgid();
u32 uid = bpf_get_current_uid_gid();
key.ip = PT_REGS_IP(ctx);
- key.pid = pid >> 32;
+ key.pid = pid;
key.uid = uid;
bpf_get_current_comm(&(key.comm), 16);
@@ -166,6 +170,12 @@ def handle_loop(stdscr, args):
}
"""
+
+ if args.pid:
+ bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid)
+ else:
+ bpf_text = bpf_text.replace('FILTER_PID', '0')
+
b = BPF(text=bpf_text)
b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count")
b.attach_kprobe(event="mark_page_accessed", fn_name="do_count")
@@ -251,9 +261,11 @@ def handle_loop(stdscr, args):
def parse_arguments():
parser = argparse.ArgumentParser(
- description='show Linux page cache hit/miss statistics including read '
+ description='Show Linux page cache hit/miss statistics including read '
'and write hit % per processes in a UI like top.'
)
+ parser.add_argument("-p", "--pid", type=int, metavar="PID",
+ help="trace this PID only")
parser.add_argument(
'interval', type=int, default=5, nargs='?',
help='Interval between probes.'
diff --git a/tools/compactsnoop.py b/tools/compactsnoop.py
index 71ef95b0..9daaf485 100755
--- a/tools/compactsnoop.py
+++ b/tools/compactsnoop.py
@@ -18,6 +18,7 @@ from bcc import BPF
import argparse
import platform
from datetime import datetime, timedelta
+import sys
# arguments
examples = """examples:
@@ -390,6 +391,8 @@ def print_event(cpu, data, size):
print("\t%s" % sym)
print("")
+ sys.stdout.flush()
+
# loop with callback to print_event
b["events"].open_perf_buffer(print_event, page_cnt=64)
start_time = datetime.now()
diff --git a/tools/cpudist.py b/tools/cpudist.py
index a4303f85..3f58aa18 100755
--- a/tools/cpudist.py
+++ b/tools/cpudist.py
@@ -3,13 +3,17 @@
#
# cpudist Summarize on- and off-CPU time per task as a histogram.
#
-# USAGE: cpudist [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [interval] [count]
+# USAGE: cpudist [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [-I] [interval] [count]
#
# This measures the time a task spends on or off the CPU, and shows this time
# as a histogram, optionally per-process.
#
+# By default CPU idle time are excluded by simply excluding PID 0.
+#
# Copyright 2016 Sasha Goldshtein
# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 27-Mar-2022 Rocky Xing Changed to exclude CPU idle time by default.
from __future__ import print_function
from bcc import BPF
@@ -23,6 +27,7 @@ examples = """examples:
cpudist -mT 1 # 1s summaries, milliseconds, and timestamps
cpudist -P # show each PID separately
cpudist -p 185 # trace PID 185 only
+ cpudist -I # include CPU idle time
"""
parser = argparse.ArgumentParser(
description="Summarize on-CPU time per task as a histogram.",
@@ -40,6 +45,8 @@ parser.add_argument("-L", "--tids", action="store_true",
help="print a histogram per thread ID")
parser.add_argument("-p", "--pid",
help="trace this PID only")
+parser.add_argument("-I", "--include-idle", action="store_true",
+ help="include CPU idle time")
parser.add_argument("interval", nargs="?", default=99999999,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
@@ -58,29 +65,42 @@ if not args.offcpu:
bpf_text += "#define ONCPU\n"
bpf_text += """
+typedef struct entry_key {
+ u32 pid;
+ u32 cpu;
+} entry_key_t;
+
typedef struct pid_key {
u64 id;
u64 slot;
} pid_key_t;
-BPF_HASH(start, u32, u64, MAX_PID);
+BPF_HASH(start, entry_key_t, u64, MAX_PID);
STORAGE
-static inline void store_start(u32 tgid, u32 pid, u64 ts)
+static inline void store_start(u32 tgid, u32 pid, u32 cpu, u64 ts)
{
- if (FILTER)
+ if (PID_FILTER)
+ return;
+
+ if (IDLE_FILTER)
return;
- start.update(&pid, &ts);
+ entry_key_t entry_key = { .pid = pid, .cpu = cpu };
+ start.update(&entry_key, &ts);
}
-static inline void update_hist(u32 tgid, u32 pid, u64 ts)
+static inline void update_hist(u32 tgid, u32 pid, u32 cpu, u64 ts)
{
- if (FILTER)
+ if (PID_FILTER)
return;
- u64 *tsp = start.lookup(&pid);
+ if (IDLE_FILTER)
+ return;
+
+ entry_key_t entry_key = { .pid = pid, .cpu = cpu };
+ u64 *tsp = start.lookup(&entry_key);
if (tsp == 0)
return;
@@ -99,20 +119,21 @@ int sched_switch(struct pt_regs *ctx, struct task_struct *prev)
u64 ts = bpf_ktime_get_ns();
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 tgid = pid_tgid >> 32, pid = pid_tgid;
+ u32 cpu = bpf_get_smp_processor_id();
u32 prev_pid = prev->pid;
u32 prev_tgid = prev->tgid;
#ifdef ONCPU
- update_hist(prev_tgid, prev_pid, ts);
+ update_hist(prev_tgid, prev_pid, cpu, ts);
#else
- store_start(prev_tgid, prev_pid, ts);
+ store_start(prev_tgid, prev_pid, cpu, ts);
#endif
BAIL:
#ifdef ONCPU
- store_start(tgid, pid, ts);
+ store_start(tgid, pid, cpu, ts);
#else
- update_hist(tgid, pid, ts);
+ update_hist(tgid, pid, cpu, ts);
#endif
return 0;
@@ -120,9 +141,16 @@ BAIL:
"""
if args.pid:
- bpf_text = bpf_text.replace('FILTER', 'tgid != %s' % args.pid)
+ bpf_text = bpf_text.replace('PID_FILTER', 'tgid != %s' % args.pid)
else:
- bpf_text = bpf_text.replace('FILTER', '0')
+ bpf_text = bpf_text.replace('PID_FILTER', '0')
+
+# set idle filter
+idle_filter = 'pid == 0'
+if args.include_idle:
+ idle_filter = '0'
+bpf_text = bpf_text.replace('IDLE_FILTER', idle_filter)
+
if args.milliseconds:
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
label = "msecs"
diff --git a/tools/cpudist_example.txt b/tools/cpudist_example.txt
index 7da43540..43be7a00 100644
--- a/tools/cpudist_example.txt
+++ b/tools/cpudist_example.txt
@@ -6,6 +6,8 @@ that can indicate oversubscription (too many tasks for too few processors),
overhead due to excessive context switching (e.g. a common shared lock for
multiple threads), uneven workload distribution, too-granular tasks, and more.
+By default CPU idle time are excluded by simply excluding PID 0.
+
Alternatively, the same options are available for summarizing task off-CPU
time, which helps understand how often threads are being descheduled and how
long they spend waiting for I/O, locks, timers, and other causes of suspension.
@@ -280,7 +282,7 @@ USAGE message:
# ./cpudist.py -h
-usage: cpudist.py [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [interval] [count]
+usage: cpudist.py [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [-I] [interval] [count]
Summarize on-CPU time per task as a histogram.
@@ -296,6 +298,7 @@ optional arguments:
-P, --pids print a histogram per process ID
-L, --tids print a histogram per thread ID
-p PID, --pid PID trace this PID only
+ -I, --include-idle include CPU idle time
examples:
cpudist # summarize on-CPU time as a histogram
@@ -304,3 +307,5 @@ examples:
cpudist -mT 1 # 1s summaries, milliseconds, and timestamps
cpudist -P # show each PID separately
cpudist -p 185 # trace PID 185 only
+ cpudist -I # include CPU idle time
+
diff --git a/tools/dbslower.py b/tools/dbslower.py
index 090d5218..1d459176 100755
--- a/tools/dbslower.py
+++ b/tools/dbslower.py
@@ -212,7 +212,7 @@ start = BPF.monotonic_time()
def print_event(cpu, data, size):
event = bpf["events"].event(data)
- print("%-14.6f %-6d %8.3f %s" % (
+ print("%-14.6f %-7d %8.3f %s" % (
float(event.timestamp - start) / 1000000000,
event.pid, float(event.duration) / 1000000, event.query))
@@ -223,7 +223,7 @@ else:
print("Tracing database queries for pids %s slower than %d ms..." %
(', '.join(map(str, args.pids)), args.threshold))
-print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
+print("%-14s %-7s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
bpf["events"].open_perf_buffer(print_event, page_cnt=64)
while True:
diff --git a/tools/dcsnoop.py b/tools/dcsnoop.py
index 274eaa59..74a3914a 100755
--- a/tools/dcsnoop.py
+++ b/tools/dcsnoop.py
@@ -148,13 +148,13 @@ start_ts = time.time()
def print_event(cpu, data, size):
event = b["events"].event(data)
- print("%-11.6f %-6d %-16s %1s %s" % (
+ print("%-11.6f %-7d %-16s %1s %s" % (
time.time() - start_ts, event.pid,
event.comm.decode('utf-8', 'replace'), mode_s[event.type],
event.filename.decode('utf-8', 'replace')))
# header
-print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
+print("%-11s %-7s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
diff --git a/tools/drsnoop.py b/tools/drsnoop.py
index e4ea9222..e0344d12 100755
--- a/tools/drsnoop.py
+++ b/tools/drsnoop.py
@@ -20,6 +20,7 @@ import argparse
from datetime import datetime, timedelta
import os
import math
+import sys
# symbols
kallsyms = "/proc/kallsyms"
@@ -224,6 +225,8 @@ def print_event(cpu, data, size):
else:
print("")
+ sys.stdout.flush()
+
# loop with callback to print_event
b["events"].open_perf_buffer(print_event, page_cnt=64)
diff --git a/tools/execsnoop.py b/tools/execsnoop.py
index 53052d39..ea8f40b8 100755
--- a/tools/execsnoop.py
+++ b/tools/execsnoop.py
@@ -236,7 +236,7 @@ if args.timestamp:
print("%-8s" % ("TIME(s)"), end="")
if args.print_uid:
print("%-6s" % ("UID"), end="")
-print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))
+print("%-16s %-7s %-7s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))
class EventType(object):
EVENT_ARG = 0
@@ -290,7 +290,7 @@ def print_event(cpu, data, size):
ppid = event.ppid if event.ppid > 0 else get_ppid(event.pid)
ppid = b"%d" % ppid if ppid > 0 else b"?"
argv_text = b' '.join(argv[event.pid]).replace(b'\n', b'\\n')
- printb(b"%-16s %-6d %-6s %3d %s" % (event.comm, event.pid,
+ printb(b"%-16s %-7d %-7s %3d %s" % (event.comm, event.pid,
ppid, event.retval, argv_text))
try:
del(argv[event.pid])
diff --git a/tools/ext4slower.py b/tools/ext4slower.py
index 90663a58..5cd75abc 100755
--- a/tools/ext4slower.py
+++ b/tools/ext4slower.py
@@ -101,7 +101,7 @@ BPF_PERF_OUTPUT(events);
// own function, for reads. So we need to trace that and then filter on ext4,
// which I do by checking file->f_op.
// The new Linux version (since form 4.10) uses ext4_file_read_iter(), And if the 'CONFIG_FS_DAX'
-// is not set ,then ext4_file_read_iter() will call generic_file_read_iter(), else it will call
+// is not set, then ext4_file_read_iter() will call generic_file_read_iter(), else it will call
// ext4_dax_read_iter(), and trace generic_file_read_iter() will fail.
int trace_read_entry(struct pt_regs *ctx, struct kiocb *iocb)
{
diff --git a/tools/filelife.py b/tools/filelife.py
index 9b7562f4..e869607b 100755
--- a/tools/filelife.py
+++ b/tools/filelife.py
@@ -118,12 +118,12 @@ b.attach_kprobe(event="security_inode_create", fn_name="trace_create")
b.attach_kprobe(event="vfs_unlink", fn_name="trace_unlink")
# header
-print("%-8s %-6s %-16s %-7s %s" % ("TIME", "PID", "COMM", "AGE(s)", "FILE"))
+print("%-8s %-7s %-16s %-7s %s" % ("TIME", "PID", "COMM", "AGE(s)", "FILE"))
# process event
def print_event(cpu, data, size):
event = b["events"].event(data)
- print("%-8s %-6d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), event.pid,
+ print("%-8s %-7d %-16s %-7.2f %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode('utf-8', 'replace'), float(event.delta) / 1000,
event.fname.decode('utf-8', 'replace')))
diff --git a/tools/filetop.py b/tools/filetop.py
index 9a79a64f..aec11a86 100755
--- a/tools/filetop.py
+++ b/tools/filetop.py
@@ -67,6 +67,7 @@ bpf_text = """
struct info_t {
unsigned long inode;
dev_t dev;
+ dev_t rdev;
u32 pid;
u32 name_len;
char comm[TASK_COMM_LEN];
@@ -105,7 +106,8 @@ static int do_entry(struct pt_regs *ctx, struct file *file,
struct info_t info = {
.pid = pid,
.inode = file->f_inode->i_ino,
- .dev = file->f_inode->i_rdev,
+ .dev = file->f_inode->i_sb->s_dev,
+ .rdev = file->f_inode->i_rdev,
};
bpf_get_current_comm(&info.comm, sizeof(info.comm));
info.name_len = d_name.len;
diff --git a/tools/funcslower.py b/tools/funcslower.py
index ffa618d7..ddd786fc 100755
--- a/tools/funcslower.py
+++ b/tools/funcslower.py
@@ -88,6 +88,13 @@ struct entry_t {
u64 args[5];
#endif
#endif
+#ifdef USER_STACKS
+ int user_stack_id;
+#endif
+#ifdef KERNEL_STACKS
+ int kernel_stack_id;
+ u64 kernel_ip;
+#endif
};
struct data_t {
@@ -143,6 +150,40 @@ static int trace_entry(struct pt_regs *ctx, int id)
#endif
#endif
+#ifdef USER_STACKS
+ entry.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
+#endif
+
+#ifdef KERNEL_STACKS
+ entry.kernel_stack_id = stacks.get_stackid(ctx, 0);
+
+ if (entry.kernel_stack_id >= 0) {
+ u64 ip = PT_REGS_IP(ctx);
+ u64 page_offset;
+
+ // if ip isn't sane, leave key ips as zero for later checking
+#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)
+ // x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it
+ page_offset = __PAGE_OFFSET_BASE;
+#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)
+ // x64, 4.17, and later
+#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)
+ page_offset = __PAGE_OFFSET_BASE_L5;
+#else
+ page_offset = __PAGE_OFFSET_BASE_L4;
+#endif
+#else
+ // earlier x86_64 kernels, e.g., 4.6, comes here
+ // arm64, s390, powerpc, x86_32
+ page_offset = PAGE_OFFSET;
+#endif
+
+ if (ip > page_offset) {
+ entry.kernel_ip = ip;
+ }
+ }
+#endif
+
entryinfo.update(&tgid_pid, &entry);
return 0;
@@ -172,37 +213,12 @@ int trace_return(struct pt_regs *ctx)
data.retval = PT_REGS_RC(ctx);
#ifdef USER_STACKS
- data.user_stack_id = stacks.get_stackid(ctx, BPF_F_USER_STACK);
+ data.user_stack_id = entryp->user_stack_id;
#endif
#ifdef KERNEL_STACKS
- data.kernel_stack_id = stacks.get_stackid(ctx, 0);
-
- if (data.kernel_stack_id >= 0) {
- u64 ip = PT_REGS_IP(ctx);
- u64 page_offset;
-
- // if ip isn't sane, leave key ips as zero for later checking
-#if defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE)
- // x64, 4.16, ..., 4.11, etc., but some earlier kernel didn't have it
- page_offset = __PAGE_OFFSET_BASE;
-#elif defined(CONFIG_X86_64) && defined(__PAGE_OFFSET_BASE_L4)
- // x64, 4.17, and later
-#if defined(CONFIG_DYNAMIC_MEMORY_LAYOUT) && defined(CONFIG_X86_5LEVEL)
- page_offset = __PAGE_OFFSET_BASE_L5;
-#else
- page_offset = __PAGE_OFFSET_BASE_L4;
-#endif
-#else
- // earlier x86_64 kernels, e.g., 4.6, comes here
- // arm64, s390, powerpc, x86_32
- page_offset = PAGE_OFFSET;
-#endif
-
- if (ip > page_offset) {
- data.kernel_ip = ip;
- }
- }
+ data.kernel_stack_id = entryp->kernel_stack_id;
+ data.kernel_ip = entryp->kernel_ip;
#endif
#ifdef GRAB_ARGS
diff --git a/tools/hardirqs.py b/tools/hardirqs.py
index 0eeddddc..3bcf6492 100755
--- a/tools/hardirqs.py
+++ b/tools/hardirqs.py
@@ -4,7 +4,7 @@
# hardirqs Summarize hard IRQ (interrupt) event time.
# For Linux, uses BCC, eBPF.
#
-# USAGE: hardirqs [-h] [-T] [-N] [-C] [-d] [interval] [outputs]
+# USAGE: hardirqs [-h] [-T] [-N] [-C] [-d] [-c CPU] [interval] [outputs]
#
# Thanks Amer Ather for help understanding irq behavior.
#
@@ -13,11 +13,13 @@
#
# 19-Oct-2015 Brendan Gregg Created this.
# 22-May-2021 Hengqi Chen Migrated to kernel tracepoints.
+# 07-Mar-2022 Rocky Xing Added CPU filter support.
from __future__ import print_function
from bcc import BPF
from time import sleep, strftime
import argparse
+import sys
# arguments
examples = """examples:
@@ -25,6 +27,7 @@ examples = """examples:
./hardirqs -d # show hard irq event time as histograms
./hardirqs 1 10 # print 1 second summaries, 10 times
./hardirqs -NT 1 # 1s summaries, nanoseconds, and timestamps
+ ./hardirqs -c 1 # sum hard irq event time on CPU 1 only
"""
parser = argparse.ArgumentParser(
description="Summarize hard irq event time as histograms",
@@ -38,6 +41,8 @@ parser.add_argument("-C", "--count", action="store_true",
help="show event counts instead of timing")
parser.add_argument("-d", "--dist", action="store_true",
help="show distributions as histograms")
+parser.add_argument("-c", "--cpu", type=int,
+ help="trace this CPU only")
parser.add_argument("interval", nargs="?", default=99999999,
help="output interval, in seconds")
parser.add_argument("outputs", nargs="?", default=99999999,
@@ -94,9 +99,12 @@ TRACEPOINT_PROBE(irq, irq_handler_entry)
{
struct entry_key key = {};
irq_name_t name = {};
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
key.tid = bpf_get_current_pid_tgid();
- key.cpu_id = bpf_get_smp_processor_id();
+ key.cpu_id = cpu;
TP_DATA_LOC_READ_STR(&name.name, name, sizeof(name));
irqnames.update(&key, &name);
@@ -106,9 +114,12 @@ TRACEPOINT_PROBE(irq, irq_handler_entry)
TRACEPOINT_PROBE(irq, irq_handler_exit)
{
struct entry_key key = {};
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
key.tid = bpf_get_current_pid_tgid();
- key.cpu_id = bpf_get_smp_processor_id();
+ key.cpu_id = cpu;
// check ret value of irq handler is not IRQ_NONE to make sure
// the current event belong to this irq handler
@@ -137,9 +148,12 @@ TRACEPOINT_PROBE(irq, irq_handler_entry)
u64 ts = bpf_ktime_get_ns();
irq_name_t name = {};
struct entry_key key = {};
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
key.tid = bpf_get_current_pid_tgid();
- key.cpu_id = bpf_get_smp_processor_id();
+ key.cpu_id = cpu;
TP_DATA_LOC_READ_STR(&name.name, name, sizeof(name));
irqnames.update(&key, &name);
@@ -152,9 +166,10 @@ TRACEPOINT_PROBE(irq, irq_handler_exit)
u64 *tsp, delta;
irq_name_t *namep;
struct entry_key key = {};
+ u32 cpu = bpf_get_smp_processor_id();
key.tid = bpf_get_current_pid_tgid();
- key.cpu_id = bpf_get_smp_processor_id();
+ key.cpu_id = cpu;
// check ret value of irq handler is not IRQ_NONE to make sure
// the current event belong to this irq handler
@@ -195,6 +210,11 @@ else:
'irq_key_t key = {.slot = 0 /* ignore */};' +
'bpf_probe_read_kernel(&key.name, sizeof(key.name), name);' +
'dist.atomic_increment(key, delta);')
+if args.cpu is not None:
+ bpf_text = bpf_text.replace('FILTER_CPU',
+ 'if (cpu != %d) { return 0; }' % int(args.cpu))
+else:
+ bpf_text = bpf_text.replace('FILTER_CPU', '')
if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
@@ -222,13 +242,15 @@ while (1):
print("%-8s\n" % strftime("%H:%M:%S"), end="")
if args.dist:
- dist.print_log2_hist(label, "hardirq")
+ dist.print_log2_hist(label, "hardirq", section_print_fn=bytes.decode)
else:
print("%-26s %11s" % ("HARDIRQ", "TOTAL_" + label))
for k, v in sorted(dist.items(), key=lambda dist: dist[1].value):
print("%-26s %11d" % (k.name.decode('utf-8', 'replace'), v.value / factor))
dist.clear()
+ sys.stdout.flush()
+
countdown -= 1
if exiting or countdown == 0:
exit()
diff --git a/tools/klockstat.py b/tools/klockstat.py
index d157b7be..b8cafd97 100755
--- a/tools/klockstat.py
+++ b/tools/klockstat.py
@@ -367,9 +367,30 @@ KFUNC_PROBE(mutex_lock, void *lock)
"""
+program_kfunc_nested = """
+KFUNC_PROBE(mutex_unlock, void *lock)
+{
+ return do_mutex_unlock_enter();
+}
+
+KRETFUNC_PROBE(mutex_lock_nested, void *lock, int ret)
+{
+ return do_mutex_lock_return();
+}
+
+KFUNC_PROBE(mutex_lock_nested, void *lock)
+{
+ return do_mutex_lock_enter(ctx, 3);
+}
+
+"""
+
is_support_kfunc = BPF.support_kfunc()
if is_support_kfunc:
- program += program_kfunc
+ if BPF.get_kprobe_functions(b"mutex_lock_nested"):
+ program += program_kfunc_nested
+ else:
+ program += program_kfunc
else:
program += program_kprobe
@@ -428,9 +449,14 @@ program = program.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size))
b = BPF(text=program)
if not is_support_kfunc:
- b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
- b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
- b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
+ b.attach_kprobe(event="mutex_unlock", fn_name="mutex_unlock_enter")
+ # Depending on whether DEBUG_LOCK_ALLOC is set, the proper kprobe may be either mutex_lock or mutex_lock_nested
+ if BPF.get_kprobe_functions(b"mutex_lock_nested"):
+ b.attach_kretprobe(event="mutex_lock_nested", fn_name="mutex_lock_return")
+ b.attach_kprobe(event="mutex_lock_nested", fn_name="mutex_lock_enter")
+ else:
+ b.attach_kretprobe(event="mutex_lock", fn_name="mutex_lock_return")
+ b.attach_kprobe(event="mutex_lock", fn_name="mutex_lock_enter")
enabled = b.get_table("enabled");
diff --git a/tools/mdflush.py b/tools/mdflush.py
index 8a23520b..5dea0b4b 100755
--- a/tools/mdflush.py
+++ b/tools/mdflush.py
@@ -55,12 +55,12 @@ int kprobe__md_flush_request(struct pt_regs *ctx, void *mddev, struct bio *bio)
# header
print("Tracing md flush requests... Hit Ctrl-C to end.")
-print("%-8s %-6s %-16s %s" % ("TIME", "PID", "COMM", "DEVICE"))
+print("%-8s %-7s %-16s %s" % ("TIME", "PID", "COMM", "DEVICE"))
# process event
def print_event(cpu, data, size):
event = b["events"].event(data)
- print("%-8s %-6d %-16s %s" % (strftime("%H:%M:%S"), event.pid,
+ print("%-8s %-7d %-16s %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode('utf-8', 'replace'),
event.disk.decode('utf-8', 'replace')))
diff --git a/tools/memleak.py b/tools/memleak.py
index 6cda1506..27a2e095 100755
--- a/tools/memleak.py
+++ b/tools/memleak.py
@@ -272,6 +272,19 @@ int realloc_exit(struct pt_regs *ctx) {
return gen_alloc_exit(ctx);
}
+int mmap_enter(struct pt_regs *ctx) {
+ size_t size = (size_t)PT_REGS_PARM2(ctx);
+ return gen_alloc_enter(ctx, size);
+}
+
+int mmap_exit(struct pt_regs *ctx) {
+ return gen_alloc_exit(ctx);
+}
+
+int munmap_enter(struct pt_regs *ctx, void *address) {
+ return gen_free_enter(ctx, address);
+}
+
int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment,
size_t size) {
u64 memptr64 = (u64)(size_t)memptr;
@@ -449,6 +462,7 @@ if not kernel_trace:
attach_probes("malloc")
attach_probes("calloc")
attach_probes("realloc")
+ attach_probes("mmap")
attach_probes("posix_memalign")
attach_probes("valloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory
attach_probes("memalign")
@@ -456,6 +470,8 @@ if not kernel_trace:
attach_probes("aligned_alloc", can_fail=True) # added in C11
bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter",
pid=pid)
+ bpf.attach_uprobe(name=obj, sym="munmap", fn_name="munmap_enter",
+ pid=pid)
else:
print("Attaching to kernel allocators, Ctrl+C to quit.")
@@ -494,7 +510,7 @@ def print_outstanding():
stack = list(stack_traces.walk(info.stack_id))
combined = []
for addr in stack:
- combined.append(bpf.sym(addr, pid,
+ combined.append(('0x'+format(addr, '016x')+'\t').encode('utf-8') + bpf.sym(addr, pid,
show_module=True, show_offset=True))
alloc_info[info.stack_id] = Allocation(combined,
info.size)
diff --git a/tools/mountsnoop.py b/tools/mountsnoop.py
index a6d7ecee..d186602d 100755
--- a/tools/mountsnoop.py
+++ b/tools/mountsnoop.py
@@ -420,6 +420,7 @@ def print_event(mounts, umounts, parent, cpu, data, size):
print('{:16} {:<7} {:<7} {:<11} {}'.format(
syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'],
syscall['pid'], syscall['mnt_ns'], call))
+ sys.stdout.flush()
except KeyError:
# This might happen if we lost an event.
pass
diff --git a/tools/mysqld_qslower.py b/tools/mysqld_qslower.py
index 088cd632..e5e3b847 100755
--- a/tools/mysqld_qslower.py
+++ b/tools/mysqld_qslower.py
@@ -108,7 +108,7 @@ b = BPF(text=bpf_text, usdt_contexts=[u])
# header
print("Tracing MySQL server queries for PID %d slower than %s ms..." % (pid,
min_ms_text))
-print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
+print("%-14s %-7s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
# process event
start = 0
@@ -117,7 +117,7 @@ def print_event(cpu, data, size):
event = b["events"].event(data)
if start == 0:
start = event.ts
- print("%-14.6f %-6d %8.3f %s" % (float(event.ts - start) / 1000000000,
+ print("%-14.6f %-7d %8.3f %s" % (float(event.ts - start) / 1000000000,
event.pid, float(event.delta) / 1000000, event.query))
# loop with callback to print_event
diff --git a/tools/oomkill.py b/tools/oomkill.py
index 3d6e927b..1bf441c4 100755
--- a/tools/oomkill.py
+++ b/tools/oomkill.py
@@ -37,12 +37,12 @@ BPF_PERF_OUTPUT(events);
void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, const char *message)
{
- unsigned long totalpages;
struct task_struct *p = oc->chosen;
struct data_t data = {};
u32 pid = bpf_get_current_pid_tgid() >> 32;
+
data.fpid = pid;
- data.tpid = p->pid;
+ data.tpid = p->tgid;
data.pages = oc->totalpages;
bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm));
bpf_probe_read_kernel(&data.tcomm, sizeof(data.tcomm), p->comm);
diff --git a/tools/softirqs.py b/tools/softirqs.py
index ba0dac36..0ed18c40 100755
--- a/tools/softirqs.py
+++ b/tools/softirqs.py
@@ -4,25 +4,30 @@
# softirqs Summarize soft IRQ (interrupt) event time.
# For Linux, uses BCC, eBPF.
#
-# USAGE: softirqs [-h] [-T] [-N] [-d] [interval] [count]
+# USAGE: softirqs [-h] [-T] [-N] [-C] [-d] [-c CPU] [interval] [count]
#
# Copyright (c) 2015 Brendan Gregg.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 20-Oct-2015 Brendan Gregg Created this.
# 03-Apr-2017 Sasha Goldshtein Migrated to kernel tracepoints.
+# 07-Mar-2022 Rocky Xing Added CPU filter support.
+# 24-Mar-2022 Rocky Xing Added event counting support.
from __future__ import print_function
from bcc import BPF
from time import sleep, strftime
import argparse
+import sys
# arguments
examples = """examples:
./softirqs # sum soft irq event time
+ ./softirqs -C # show the number of soft irq events
./softirqs -d # show soft irq event time as histograms
./softirqs 1 10 # print 1 second summaries, 10 times
./softirqs -NT 1 # 1s summaries, nanoseconds, and timestamps
+ ./softirqs -c 1 # sum soft irq event time on CPU 1 only
"""
parser = argparse.ArgumentParser(
description="Summarize soft irq event time as histograms.",
@@ -32,8 +37,12 @@ parser.add_argument("-T", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-N", "--nanoseconds", action="store_true",
help="output in nanoseconds")
+parser.add_argument("-C", "--events", action="store_true",
+ help="show the number of soft irq events")
parser.add_argument("-d", "--dist", action="store_true",
help="show distributions as histograms")
+parser.add_argument("-c", "--cpu", type=int,
+ help="trace this CPU only")
parser.add_argument("interval", nargs="?", default=99999999,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
@@ -42,7 +51,13 @@ parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
countdown = int(args.count)
-if args.nanoseconds:
+if args.events and (args.dist or args.nanoseconds):
+ print("The --events option can't be used with time-based options")
+ exit()
+if args.events:
+ factor = 1
+ label = "count"
+elif args.nanoseconds:
factor = 1
label = "nsecs"
else:
@@ -70,16 +85,36 @@ typedef struct account_val {
} account_val_t;
BPF_HASH(start, entry_key_t, account_val_t);
-BPF_HASH(iptr, u32);
BPF_HISTOGRAM(dist, irq_key_t);
+"""
+bpf_text_count = """
+TRACEPOINT_PROBE(irq, softirq_entry)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
+
+ irq_key_t key = { .slot = 0 /* ignore */ };
+ key.vec = args->vec;
+
+ dist.atomic_increment(key);
+
+ return 0;
+}
+"""
+
+bpf_text_time = """
TRACEPOINT_PROBE(irq, softirq_entry)
{
account_val_t val = {};
entry_key_t key = {};
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
key.pid = bpf_get_current_pid_tgid();
- key.cpu = bpf_get_smp_processor_id();
+ key.cpu = cpu;
val.ts = bpf_ktime_get_ns();
val.vec = args->vec;
@@ -95,9 +130,12 @@ TRACEPOINT_PROBE(irq, softirq_exit)
account_val_t *valp;
irq_key_t key = {0};
entry_key_t entry_key = {};
+ u32 cpu = bpf_get_smp_processor_id();
+
+ FILTER_CPU
entry_key.pid = bpf_get_current_pid_tgid();
- entry_key.cpu = bpf_get_smp_processor_id();
+ entry_key.cpu = cpu;
// fetch timestamp and calculate delta
valp = start.lookup(&entry_key);
@@ -115,6 +153,11 @@ TRACEPOINT_PROBE(irq, softirq_exit)
}
"""
+if args.events:
+ bpf_text += bpf_text_count
+else:
+ bpf_text += bpf_text_time
+
# code substitutions
if args.dist:
bpf_text = bpf_text.replace('STORE',
@@ -124,6 +167,11 @@ else:
bpf_text = bpf_text.replace('STORE',
'key.vec = valp->vec; ' +
'dist.atomic_increment(key, delta);')
+if args.cpu is not None:
+ bpf_text = bpf_text.replace('FILTER_CPU',
+ 'if (cpu != %d) { return 0; }' % int(args.cpu))
+else:
+ bpf_text = bpf_text.replace('FILTER_CPU', '')
if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
@@ -138,7 +186,10 @@ def vec_to_name(vec):
return ["hi", "timer", "net_tx", "net_rx", "block", "irq_poll",
"tasklet", "sched", "hrtimer", "rcu"][vec]
-print("Tracing soft irq event time... Hit Ctrl-C to end.")
+if args.events:
+ print("Tracing soft irq events... Hit Ctrl-C to end.")
+else:
+ print("Tracing soft irq event time... Hit Ctrl-C to end.")
# output
exiting = 0 if args.interval else 1
@@ -161,6 +212,8 @@ while (1):
print("%-16s %11d" % (vec_to_name(k.vec), v.value / factor))
dist.clear()
+ sys.stdout.flush()
+
countdown -= 1
if exiting or countdown == 0:
exit()
diff --git a/tools/softirqs_example.txt b/tools/softirqs_example.txt
index ef3174a2..a9141431 100644
--- a/tools/softirqs_example.txt
+++ b/tools/softirqs_example.txt
@@ -179,12 +179,27 @@ softirq = run_rebalance_domains
16384 -> 32767 : 24 |** |
+Sometimes you just want counts of events, and don't need the distribution
+of times. You can use the -C or --events option:
+
+# ./softirqs.py -C
+Tracing soft irq events... Hit Ctrl-C to end.
+^C
+SOFTIRQ TOTAL_count
+block 5
+tasklet 6
+net_rx 402
+sched 5251
+rcu 5748
+timer 9530
+
+
USAGE message:
# ./softirqs -h
-usage: softirqs [-h] [-T] [-N] [-d] [interval] [count]
+usage: softirqs [-h] [-T] [-N] [-C] [-d] [-c CPU] [interval] [count]
-Summarize soft irq event time as histograms
+Summarize soft irq event time as histograms.
positional arguments:
interval output interval, in seconds
@@ -194,10 +209,15 @@ optional arguments:
-h, --help show this help message and exit
-T, --timestamp include timestamp on output
-N, --nanoseconds output in nanoseconds
+ -C, --events show the number of soft irq events
-d, --dist show distributions as histograms
+ -c CPU, --cpu CPU trace this CPU only
examples:
./softirqs # sum soft irq event time
+ ./softirqs -C # show the number of soft irq events
./softirqs -d # show soft irq event time as histograms
./softirqs 1 10 # print 1 second summaries, 10 times
./softirqs -NT 1 # 1s summaries, nanoseconds, and timestamps
+ ./softirqs -c 1 # sum soft irq event time on CPU 1 only
+
diff --git a/tools/sslsniff.py b/tools/sslsniff.py
index 8bc61ce7..4621e9f6 100755
--- a/tools/sslsniff.py
+++ b/tools/sslsniff.py
@@ -5,7 +5,7 @@
# For Linux, uses BCC, eBPF.
#
# USAGE: sslsniff.py [-h] [-p PID] [-u UID] [-x] [-c COMM] [-o] [-g] [-n] [-d]
-# [--hexdump] [--max-buffer-size SIZE]
+# [--hexdump] [--max-buffer-size SIZE] [-l] [--handshake]
#
# Licensed under the Apache License, Version 2.0 (the "License")
#
@@ -19,6 +19,7 @@ from bcc import BPF
import argparse
import binascii
import textwrap
+import os.path
# arguments
examples = """examples:
@@ -31,7 +32,29 @@ examples = """examples:
./sslsniff --no-nss # don't show NSS calls
./sslsniff --hexdump # show data as hex instead of trying to decode it as UTF-8
./sslsniff -x # show process UID and TID
+ ./sslsniff -l # show function latency
+ ./sslsniff -l --handshake # show SSL handshake latency
+ ./sslsniff --extra-lib openssl:/path/libssl.so.1.1 # sniff extra library
"""
+
+
+def ssllib_type(input_str):
+ valid_types = frozenset(['openssl', 'gnutls', 'nss'])
+
+ try:
+ lib_type, lib_path = input_str.split(':', 1)
+ except ValueError:
+ raise argparse.ArgumentTypeError("Invalid SSL library param: %r" % input_str)
+
+ if lib_type not in valid_types:
+ raise argparse.ArgumentTypeError("Invalid SSL library type: %r" % lib_type)
+
+ if not os.path.isfile(lib_path):
+ raise argparse.ArgumentTypeError("Invalid library path: %r" % lib_path)
+
+ return lib_type, lib_path
+
+
parser = argparse.ArgumentParser(
description="Sniff SSL data",
formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -57,6 +80,12 @@ parser.add_argument("--hexdump", action="store_true", dest="hexdump",
help="show data as hexdump instead of trying to decode it as UTF-8")
parser.add_argument('--max-buffer-size', type=int, default=8192,
help='Size of captured buffer')
+parser.add_argument("-l", "--latency", action="store_true",
+ help="show function latency")
+parser.add_argument("--handshake", action="store_true",
+ help="show SSL handshake latency, enabled only if latency option is on.")
+parser.add_argument("--extra-lib", type=ssllib_type, action='append',
+ help="Intercept calls from extra library (format: lib_type:lib_path)")
args = parser.parse_args()
@@ -68,11 +97,13 @@ prog = """
struct probe_SSL_data_t {
u64 timestamp_ns;
+ u64 delta_ns;
u32 pid;
u32 tid;
u32 uid;
u32 len;
int buf_filled;
+ int rw;
char comm[TASK_COMM_LEN];
u8 buf[MAX_BUF_SIZE];
};
@@ -80,106 +111,143 @@ struct probe_SSL_data_t {
#define BASE_EVENT_SIZE ((size_t)(&((struct probe_SSL_data_t*)0)->buf))
#define EVENT_SIZE(X) (BASE_EVENT_SIZE + ((size_t)(X)))
-
BPF_PERCPU_ARRAY(ssl_data, struct probe_SSL_data_t, 1);
-BPF_PERF_OUTPUT(perf_SSL_write);
+BPF_PERF_OUTPUT(perf_SSL_rw);
+
+BPF_HASH(start_ns, u32);
+BPF_HASH(bufs, u32, u64);
-int probe_SSL_write(struct pt_regs *ctx, void *ssl, void *buf, int num) {
+int probe_SSL_rw_enter(struct pt_regs *ctx, void *ssl, void *buf, int num) {
int ret;
u32 zero = 0;
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = pid_tgid;
u32 uid = bpf_get_current_uid_gid();
+ u64 ts = bpf_ktime_get_ns();
PID_FILTER
UID_FILTER
+
+ bufs.update(&tid, (u64*)&buf);
+ start_ns.update(&tid, &ts);
+ return 0;
+}
+
+static int SSL_exit(struct pt_regs *ctx, int rw) {
+ int ret;
+ u32 zero = 0;
+ u64 pid_tgid = bpf_get_current_pid_tgid();
+ u32 pid = pid_tgid >> 32;
+ u32 tid = (u32)pid_tgid;
+ u32 uid = bpf_get_current_uid_gid();
+ u64 ts = bpf_ktime_get_ns();
+
+ PID_FILTER
+ UID_FILTER
+
+ u64 *bufp = bufs.lookup(&tid);
+ if (bufp == 0)
+ return 0;
+
+ u64 *tsp = start_ns.lookup(&tid);
+ if (tsp == 0)
+ return 0;
+
+ int len = PT_REGS_RC(ctx);
+ if (len <= 0) // no data
+ return 0;
+
struct probe_SSL_data_t *data = ssl_data.lookup(&zero);
if (!data)
return 0;
- data->timestamp_ns = bpf_ktime_get_ns();
+ data->timestamp_ns = ts;
+ data->delta_ns = ts - *tsp;
data->pid = pid;
data->tid = tid;
data->uid = uid;
- data->len = num;
+ data->len = (u32)len;
data->buf_filled = 0;
+ data->rw = rw;
+ u32 buf_copy_size = min((size_t)MAX_BUF_SIZE, (size_t)len);
+
bpf_get_current_comm(&data->comm, sizeof(data->comm));
- u32 buf_copy_size = min((size_t)MAX_BUF_SIZE, (size_t)num);
- if (buf != 0)
- ret = bpf_probe_read_user(data->buf, buf_copy_size, buf);
+ if (bufp != 0)
+ ret = bpf_probe_read_user(&data->buf, buf_copy_size, (char *)*bufp);
+
+ bufs.delete(&tid);
+ start_ns.delete(&tid);
if (!ret)
data->buf_filled = 1;
else
buf_copy_size = 0;
- perf_SSL_write.perf_submit(ctx, data, EVENT_SIZE(buf_copy_size));
+ perf_SSL_rw.perf_submit(ctx, data, EVENT_SIZE(buf_copy_size));
return 0;
}
-BPF_PERF_OUTPUT(perf_SSL_read);
+int probe_SSL_read_exit(struct pt_regs *ctx) {
+ return (SSL_exit(ctx, 0));
+}
-BPF_HASH(bufs, u32, u64);
+int probe_SSL_write_exit(struct pt_regs *ctx) {
+ return (SSL_exit(ctx, 1));
+}
+
+BPF_PERF_OUTPUT(perf_SSL_do_handshake);
-int probe_SSL_read_enter(struct pt_regs *ctx, void *ssl, void *buf, int num) {
+int probe_SSL_do_handshake_enter(struct pt_regs *ctx, void *ssl) {
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = (u32)pid_tgid;
- u32 uid = bpf_get_current_uid_gid();
+ u64 ts = bpf_ktime_get_ns();
PID_FILTER
UID_FILTER
- bufs.update(&tid, (u64*)&buf);
+ start_ns.update(&tid, &ts);
return 0;
}
-int probe_SSL_read_exit(struct pt_regs *ctx, void *ssl, void *buf, int num) {
+int probe_SSL_do_handshake_exit(struct pt_regs *ctx) {
u32 zero = 0;
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = (u32)pid_tgid;
u32 uid = bpf_get_current_uid_gid();
+ u64 ts = bpf_ktime_get_ns();
int ret;
PID_FILTER
UID_FILTER
- u64 *bufp = bufs.lookup(&tid);
- if (bufp == 0)
+ u64 *tsp = start_ns.lookup(&tid);
+ if (tsp == 0)
return 0;
- int len = PT_REGS_RC(ctx);
- if (len <= 0) // read failed
+ ret = PT_REGS_RC(ctx);
+ if (ret <= 0) // handshake failed
return 0;
struct probe_SSL_data_t *data = ssl_data.lookup(&zero);
if (!data)
return 0;
- data->timestamp_ns = bpf_ktime_get_ns();
+ data->timestamp_ns = ts;
+ data->delta_ns = ts - *tsp;
data->pid = pid;
data->tid = tid;
data->uid = uid;
- data->len = (u32)len;
+ data->len = ret;
data->buf_filled = 0;
- u32 buf_copy_size = min((size_t)MAX_BUF_SIZE, (size_t)len);
-
+ data->rw = 2;
bpf_get_current_comm(&data->comm, sizeof(data->comm));
+ start_ns.delete(&tid);
- if (bufp != 0)
- ret = bpf_probe_read_user(&data->buf, buf_copy_size, (char *)*bufp);
-
- bufs.delete(&tid);
-
- if (!ret)
- data->buf_filled = 1;
- else
- buf_copy_size = 0;
-
- perf_SSL_read.perf_submit(ctx, data, EVENT_SIZE(buf_copy_size));
+ perf_SSL_do_handshake.perf_submit(ctx, data, EVENT_SIZE(0));
return 0;
}
"""
@@ -208,59 +276,92 @@ b = BPF(text=prog)
# need to stash the buffer address in a map on the function entry and read it
# on its exit (Mark Drayton)
#
-if args.openssl:
- b.attach_uprobe(name="ssl", sym="SSL_write", fn_name="probe_SSL_write",
- pid=args.pid or -1)
- b.attach_uprobe(name="ssl", sym="SSL_read", fn_name="probe_SSL_read_enter",
- pid=args.pid or -1)
- b.attach_uretprobe(name="ssl", sym="SSL_read",
+def attach_openssl(lib):
+ b.attach_uprobe(name=lib, sym="SSL_write",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="SSL_write",
+ fn_name="probe_SSL_write_exit", pid=args.pid or -1)
+ b.attach_uprobe(name=lib, sym="SSL_read",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="SSL_read",
fn_name="probe_SSL_read_exit", pid=args.pid or -1)
-
-if args.gnutls:
- b.attach_uprobe(name="gnutls", sym="gnutls_record_send",
- fn_name="probe_SSL_write", pid=args.pid or -1)
- b.attach_uprobe(name="gnutls", sym="gnutls_record_recv",
- fn_name="probe_SSL_read_enter", pid=args.pid or -1)
- b.attach_uretprobe(name="gnutls", sym="gnutls_record_recv",
+ if args.latency and args.handshake:
+ b.attach_uprobe(name="ssl", sym="SSL_do_handshake",
+ fn_name="probe_SSL_do_handshake_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name="ssl", sym="SSL_do_handshake",
+ fn_name="probe_SSL_do_handshake_exit", pid=args.pid or -1)
+
+def attach_gnutls(lib):
+ b.attach_uprobe(name=lib, sym="gnutls_record_send",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="gnutls_record_send",
+ fn_name="probe_SSL_write_exit", pid=args.pid or -1)
+ b.attach_uprobe(name=lib, sym="gnutls_record_recv",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="gnutls_record_recv",
fn_name="probe_SSL_read_exit", pid=args.pid or -1)
-if args.nss:
- b.attach_uprobe(name="nspr4", sym="PR_Write", fn_name="probe_SSL_write",
- pid=args.pid or -1)
- b.attach_uprobe(name="nspr4", sym="PR_Send", fn_name="probe_SSL_write",
- pid=args.pid or -1)
- b.attach_uprobe(name="nspr4", sym="PR_Read", fn_name="probe_SSL_read_enter",
- pid=args.pid or -1)
- b.attach_uretprobe(name="nspr4", sym="PR_Read",
+def attach_nss(lib):
+ b.attach_uprobe(name=lib, sym="PR_Write",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="PR_Write",
+ fn_name="probe_SSL_write_exit", pid=args.pid or -1)
+ b.attach_uprobe(name=lib, sym="PR_Send",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="PR_Send",
+ fn_name="probe_SSL_write_exit", pid=args.pid or -1)
+ b.attach_uprobe(name=lib, sym="PR_Read",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="PR_Read",
fn_name="probe_SSL_read_exit", pid=args.pid or -1)
- b.attach_uprobe(name="nspr4", sym="PR_Recv", fn_name="probe_SSL_read_enter",
- pid=args.pid or -1)
- b.attach_uretprobe(name="nspr4", sym="PR_Recv",
+ b.attach_uprobe(name=lib, sym="PR_Recv",
+ fn_name="probe_SSL_rw_enter", pid=args.pid or -1)
+ b.attach_uretprobe(name=lib, sym="PR_Recv",
fn_name="probe_SSL_read_exit", pid=args.pid or -1)
+
+LIB_TRACERS = {
+ "openssl": attach_openssl,
+ "gnutls": attach_gnutls,
+ "nss": attach_nss,
+}
+
+
+if args.openssl:
+ attach_openssl("ssl")
+if args.gnutls:
+ attach_gnutls("gnutls")
+if args.nss:
+ attach_nss("nspr4")
+
+
+if args.extra_lib:
+ for lib_type, lib_path in args.extra_lib:
+ LIB_TRACERS[lib_type](lib_path)
+
# define output data structure in Python
# header
-header = "%-12s %-18s %-16s %-7s %-6s" % ("FUNC", "TIME(s)", "COMM", "PID", "LEN")
+header = "%-12s %-18s %-16s %-7s %-7s" % ("FUNC", "TIME(s)", "COMM", "PID", "LEN")
if args.extra:
header += " %-7s %-7s" % ("UID", "TID")
+if args.latency:
+ header += " %-7s" % ("LAT(ms)")
+
print(header)
# process event
start = 0
+def print_event_rw(cpu, data, size):
+ print_event(cpu, data, size, "perf_SSL_rw")
-def print_event_write(cpu, data, size):
- print_event(cpu, data, size, "WRITE/SEND", "perf_SSL_write")
-
+def print_event_handshake(cpu, data, size):
+ print_event(cpu, data, size, "perf_SSL_do_handshake")
-def print_event_read(cpu, data, size):
- print_event(cpu, data, size, "READ/RECV", "perf_SSL_read")
-
-
-def print_event(cpu, data, size, rw, evt):
+def print_event(cpu, data, size, evt):
global start
event = b[evt].event(data)
if event.len <= args.max_buffer_size:
@@ -283,6 +384,8 @@ def print_event(cpu, data, size, rw, evt):
start = event.timestamp_ns
time_s = (float(event.timestamp_ns - start)) / 1000000000
+ lat_str = "%.3f" % (event.delta_ns / 1000000) if event.delta_ns else "N/A"
+
s_mark = "-" * 5 + " DATA " + "-" * 5
e_mark = "-" * 5 + " END DATA " + "-" * 5
@@ -297,6 +400,9 @@ def print_event(cpu, data, size, rw, evt):
if args.extra:
base_fmt += " %(uid)-7d %(tid)-7d"
+ if args.latency:
+ base_fmt += " %(lat)-7s"
+
fmt = ''.join([base_fmt, "\n%(begin)s\n%(data)s\n%(end)s\n\n"])
if args.hexdump:
unwrapped_data = binascii.hexlify(buf)
@@ -304,9 +410,16 @@ def print_event(cpu, data, size, rw, evt):
else:
data = buf.decode('utf-8', 'replace')
+ rw_event = {
+ 0: "READ/RECV",
+ 1: "WRITE/SEND",
+ 2: "HANDSHAKE"
+ }
+
fmt_data = {
- 'func': rw,
+ 'func': rw_event[event.rw],
'time': time_s,
+ 'lat': lat_str,
'comm': event.comm.decode('utf-8', 'replace'),
'pid': event.pid,
'tid': event.tid,
@@ -317,11 +430,14 @@ def print_event(cpu, data, size, rw, evt):
'data': data
}
- print(fmt % fmt_data)
-
+ # use base_fmt if no buf filled
+ if buf_size == 0:
+ print(base_fmt % fmt_data)
+ else:
+ print(fmt % fmt_data)
-b["perf_SSL_write"].open_perf_buffer(print_event_write)
-b["perf_SSL_read"].open_perf_buffer(print_event_read)
+b["perf_SSL_rw"].open_perf_buffer(print_event_rw)
+b["perf_SSL_do_handshake"].open_perf_buffer(print_event_handshake)
while 1:
try:
b.perf_buffer_poll()
diff --git a/tools/sslsniff_example.txt b/tools/sslsniff_example.txt
index fa36c40d..905f8a05 100644
--- a/tools/sslsniff_example.txt
+++ b/tools/sslsniff_example.txt
@@ -103,10 +103,75 @@ lot of characters that are not printable or even Unicode replacement
characters.
+Use -l or --latency option to show function latency, and show handshake latency
+by using both -l and --handshake. This is useful for SSL/TLS performance
+analysis. Tracing output of "echo | openssl s_client -connect example.com:443":
+
+# ./sslsniff.py -l --handshake
+FUNC TIME(s) COMM PID LEN LAT(ms)
+WRITE/SEND 0.000000000 openssl 10377 1 0.005
+----- DATA -----
+
+
+----- END DATA -----
+
+Trace localhost server instead of example.com. It takes 0.7ms for server
+handshake before secure connection is ready for initial SSL_read or SSL_write.
+
+# ./sslsniff.py -l --handshake
+FUNC TIME(s) COMM PID LEN LAT(ms)
+HANDSHAKE 0.000000000 nginx 7081 1 0.699
+WRITE/SEND 0.000132180 openssl 14800 1 0.010
+----- DATA -----
+
+
+----- END DATA -----
+
+READ/RECV 0.000136583 nginx 7081 1 0.004
+----- DATA -----
+
+
+----- END DATA -----
+
+Tracing output of "echo | gnutls-cli -p 443 example.com":
+
+# ./sslsniff.py -l --handshake
+FUNC TIME(s) COMM PID LEN LAT(ms)
+WRITE/SEND 0.000000000 gnutls-cli 43554 1 0.012
+----- DATA -----
+
+
+----- END DATA -----
+
+Tracing output of "echo | gnutls-cli -p 443 --insecure localhost":
+
+# ./sslsniff.py -l --handshake
+FUNC TIME(s) COMM PID LEN LAT(ms)
+HANDSHAKE 0.000000000 nginx 7081 1 0.710
+WRITE/SEND 0.000045126 gnutls-cli 43752 1 0.014
+----- DATA -----
+
+
+----- END DATA -----
+
+READ/RECV 0.000049464 nginx 7081 1 0.004
+----- DATA -----
+
+
+----- END DATA -----
+
+Tracing few extra libraries (useful for docker containers and other isolated
+apps)
+
+# ./sslsniff.py --extra-lib openssl:/var/lib/docker/overlay2/l/S4EMHE/lib/libssl.so.1.1
+
+
+
USAGE message:
usage: sslsniff.py [-h] [-p PID] [-u UID] [-x] [-c COMM] [-o] [-g] [-n] [-d]
- [--hexdump] [--max-buffer-size MAX_BUFFER_SIZE]
+ [--hexdump] [--max-buffer-size MAX_BUFFER_SIZE] [-l]
+ [--handshake] [--extra-lib EXTRA_LIB]
Sniff SSL data
@@ -124,6 +189,14 @@ optional arguments:
UTF-8
--max-buffer-size MAX_BUFFER_SIZE
Size of captured buffer
+ -l, --latency show function latency
+ --handshake show SSL handshake latency, enabled only if latency
+ option is on.
+ --extra-lib EXTRA_LIB
+ Intercept calls from extra library
+ (format: lib_type:lib_path)
+
+
examples:
./sslsniff # sniff OpenSSL and GnuTLS functions
@@ -135,3 +208,6 @@ examples:
./sslsniff --no-nss # don't show NSS calls
./sslsniff --hexdump # show data as hex instead of trying to decode it as UTF-8
./sslsniff -x # show process UID and TID
+ ./sslsniff -l # show function latency
+ ./sslsniff -l --handshake # show SSL handshake latency
+ ./sslsniff --extra-lib openssl:/path/libssl.so.1.1 # sniff extra library
diff --git a/tools/swapin.py b/tools/swapin.py
index e94000af..67a10dbb 100755
--- a/tools/swapin.py
+++ b/tools/swapin.py
@@ -74,11 +74,11 @@ while 1:
if not args.notime:
print(strftime("%H:%M:%S"))
- print("%-16s %-6s %s" % ("COMM", "PID", "COUNT"))
+ print("%-16s %-7s %s" % ("COMM", "PID", "COUNT"))
counts = b.get_table("counts")
for k, v in sorted(counts.items(),
key=lambda counts: counts[1].value):
- print("%-16s %-6d %d" % (k.comm, k.pid, v.value))
+ print("%-16s %-7d %d" % (k.comm, k.pid, v.value))
counts.clear()
print()
diff --git a/tools/syncsnoop.py b/tools/syncsnoop.py
index e5fa78e3..e96cd3c4 100755
--- a/tools/syncsnoop.py
+++ b/tools/syncsnoop.py
@@ -15,6 +15,7 @@
from __future__ import print_function
from bcc import BPF
+import sys
# load BPF program
b = BPF(text="""
@@ -40,6 +41,7 @@ print("%-18s %s" % ("TIME(s)", "CALL"))
def print_event(cpu, data, size):
event = b["events"].event(data)
print("%-18.9f sync()" % (float(event.ts) / 1000000))
+ sys.stdout.flush()
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
diff --git a/tools/tcpaccept.py b/tools/tcpaccept.py
index d3e44143..b2ace4fa 100755
--- a/tools/tcpaccept.py
+++ b/tools/tcpaccept.py
@@ -116,7 +116,7 @@ int kretprobe__inet_csk_accept(struct pt_regs *ctx)
return 0;
// check this is TCP
- u8 protocol = 0;
+ u16 protocol = 0;
// workaround for reading the sk_protocol bitfield:
// Following comments add by Joe Yin:
@@ -132,7 +132,12 @@ int kretprobe__inet_csk_accept(struct pt_regs *ctx)
int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
- if (sk_lingertime_offset - gso_max_segs_offset == 4)
+
+ // Since kernel v5.6 sk_protocol is its own u16 field and gso_max_segs
+ // precedes sk_lingertime.
+ if (sk_lingertime_offset - gso_max_segs_offset == 2)
+ protocol = newsk->sk_protocol;
+ else if (sk_lingertime_offset - gso_max_segs_offset == 4)
// 4.10+ with little endian
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3);
diff --git a/tools/tcpcong.py b/tools/tcpcong.py
new file mode 100755
index 00000000..671cd11f
--- /dev/null
+++ b/tools/tcpcong.py
@@ -0,0 +1,559 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# tcpcong Measure tcp congestion control status duration.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: tcpcong [-h] [-T] [-L] [-R] [-m] [-d] [interval] [outputs]
+#
+# Copyright (c) Ping Gan.
+#
+# 27-Jan-2022 Ping Gan Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+from struct import pack
+from socket import inet_ntop, AF_INET, AF_INET6
+from struct import pack
+import argparse
+
+examples = """examples:
+ ./tcpcong # show tcp congestion status duration
+ ./tcpcong 1 10 # show 1 second summaries, 10 times
+ ./tcpcong -L 3000-3006 1 # 1s summaries, local port 3000-3006
+ ./tcpcong -R 5000-5005 1 # 1s summaries, remote port 5000-5005
+ ./tcpcong -uT 1 # 1s summaries, microseconds, and timestamps
+ ./tcpcong -d # show the duration as histograms
+"""
+
+parser = argparse.ArgumentParser(
+ description="Summarize tcp socket congestion control status duration",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-L", "--localport",
+ help="trace local ports only")
+parser.add_argument("-R", "--remoteport",
+ help="trace the dest ports only")
+parser.add_argument("-T", "--timestamp", action="store_true",
+ help="include timestamp on output")
+parser.add_argument("-d", "--dist", action="store_true",
+ help="show distributions as histograms")
+parser.add_argument("-u", "--microseconds", action="store_true",
+ help="output in microseconds")
+parser.add_argument("interval", nargs="?", default=99999999,
+ help="output interval, in seconds")
+parser.add_argument("outputs", nargs="?", default=99999999,
+ help="number of outputs")
+parser.add_argument("--ebpf", action="store_true",
+ help=argparse.SUPPRESS)
+args = parser.parse_args()
+countdown = int(args.outputs)
+debug = 0
+
+start_rport = end_rport = -1
+if args.remoteport:
+ rports = args.remoteport.split("-")
+ if (len(rports) != 2) and (len(rports) != 1):
+ print("unrecognized remote port range")
+ exit(1)
+ if len(rports) == 2:
+ start_rport = int(rports[0])
+ end_rport = int(rports[1])
+ else:
+ start_rport = int(rports[0])
+ end_rport = int(rports[0])
+if start_rport > end_rport:
+ tmp = start_rport
+ start_rport = end_rport
+ end_rport = tmp
+
+start_lport = end_lport = -1
+if args.localport:
+ lports = args.localport.split("-")
+ if (len(lports) != 2) and (len(lports) != 1):
+ print("unrecognized local port range")
+ exit(1)
+ if len(lports) == 2:
+ start_lport = int(lports[0])
+ end_lport = int(lports[1])
+ else:
+ start_lport = int(lports[0])
+ end_lport = int(lports[0])
+if start_lport > end_lport:
+ tmp = start_lport
+ start_lport = end_lport
+ end_lport = tmp
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+#include <net/tcp.h>
+#include <net/inet_connection_sock.h>
+
+typedef struct ipv4_flow_key {
+ u32 saddr;
+ u32 daddr;
+ u16 lport;
+ u16 dport;
+} ipv4_flow_key_t;
+
+typedef struct ipv6_flow_key {
+ unsigned __int128 saddr;
+ unsigned __int128 daddr;
+ u16 lport;
+ u16 dport;
+} ipv6_flow_key_t;
+
+typedef struct process_key {
+ char comm[TASK_COMM_LEN];
+ u32 tid;
+} process_key_t;
+
+typedef struct ipv4_flow_val {
+ ipv4_flow_key_t ipv4_key;
+ u16 cong_state;
+} ipv4_flow_val_t;
+
+typedef struct ipv6_flow_val {
+ ipv6_flow_key_t ipv6_key;
+ u16 cong_state;
+} ipv6_flow_val_t;
+
+BPF_HASH(start_ipv4, process_key_t, ipv4_flow_val_t);
+BPF_HASH(start_ipv6, process_key_t, ipv6_flow_val_t);
+SOCK_STORE_DEF
+
+typedef struct data_val {
+ DEF_TEXT
+ u64 last_ts;
+ u16 last_cong_stat;
+} data_val_t;
+
+typedef struct cong {
+ u8 cong_stat:5,
+ ca_inited:1,
+ ca_setsockopt:1,
+ ca_dstlocked:1;
+} cong_status_t;
+
+BPF_HASH(ipv4_stat, ipv4_flow_key_t, data_val_t);
+BPF_HASH(ipv6_stat, ipv6_flow_key_t, data_val_t);
+
+HIST_TABLE
+
+static int entry_state_update_func(struct sock *sk)
+{
+ u16 dport = 0, lport = 0;
+ u32 tid = bpf_get_current_pid_tgid();
+ process_key_t key = {0};
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
+ key.tid = tid;
+
+ u64 family = sk->__sk_common.skc_family;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ cong_status_t cong_status;
+ bpf_probe_read_kernel(&cong_status, sizeof(cong_status),
+ (void *)((long)&icsk->icsk_retransmits) - 1);
+ if (family == AF_INET) {
+ ipv4_flow_val_t ipv4_val = {0};
+ ipv4_val.ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr;
+ ipv4_val.ipv4_key.daddr = sk->__sk_common.skc_daddr;
+ ipv4_val.ipv4_key.lport = sk->__sk_common.skc_num;
+ dport = sk->__sk_common.skc_dport;
+ dport = ntohs(dport);
+ lport = ipv4_val.ipv4_key.lport;
+ FILTER_LPORT
+ FILTER_DPORT
+ ipv4_val.ipv4_key.dport = dport;
+ ipv4_val.cong_state = cong_status.cong_stat + 1;
+ start_ipv4.update(&key, &ipv4_val);
+ } else if (family == AF_INET6) {
+ ipv6_flow_val_t ipv6_val = {0};
+ bpf_probe_read_kernel(&ipv6_val.ipv6_key.saddr,
+ sizeof(ipv6_val.ipv6_key.saddr),
+ &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
+ bpf_probe_read_kernel(&ipv6_val.ipv6_key.daddr,
+ sizeof(ipv6_val.ipv6_key.daddr),
+ &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
+ ipv6_val.ipv6_key.lport = sk->__sk_common.skc_num;
+ dport = sk->__sk_common.skc_dport;
+ dport = ntohs(dport);
+ lport = ipv6_val.ipv6_key.lport;
+ FILTER_LPORT
+ FILTER_DPORT
+ ipv6_val.ipv6_key.dport = dport;
+ ipv6_val.cong_state = cong_status.cong_stat + 1;
+ start_ipv6.update(&key, &ipv6_val);
+ }
+ SOCK_STORE_ADD
+ return 0;
+}
+
+static int ret_state_update_func(struct sock *sk)
+{
+ u64 ts, ts1;
+ u16 family, last_cong_state;
+ u16 dport = 0, lport = 0;
+ u32 tid = bpf_get_current_pid_tgid();
+ process_key_t key = {0};
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
+ key.tid = tid;
+
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ cong_status_t cong_status;
+ bpf_probe_read_kernel(&cong_status, sizeof(cong_status),
+ (void *)((long)&icsk->icsk_retransmits) - 1);
+ data_val_t *datap, data = {0};
+ STATE_KEY
+ bpf_probe_read_kernel(&family, sizeof(family),
+ &sk->__sk_common.skc_family);
+ if (family == AF_INET) {
+ ipv4_flow_val_t *val4 = start_ipv4.lookup(&key);
+ if (val4 == 0) {
+ SOCK_STORE_DEL
+ return 0; //missed
+ }
+ ipv4_flow_key_t keyv4 = {0};
+ bpf_probe_read_kernel(&keyv4, sizeof(ipv4_flow_key_t),
+ &(val4->ipv4_key));
+ dport = keyv4.dport;
+ lport = keyv4.lport;
+ FILTER_LPORT
+ FILTER_DPORT
+ datap = ipv4_stat.lookup(&keyv4);
+ if (datap == 0) {
+ data.last_ts = bpf_ktime_get_ns();
+ data.last_cong_stat = val4->cong_state;
+ ipv4_stat.update(&keyv4, &data);
+ } else {
+ last_cong_state = val4->cong_state;
+ if ((cong_status.cong_stat + 1) != last_cong_state) {
+ ts1 = bpf_ktime_get_ns();
+ ts = ts1 - datap->last_ts;
+ datap->last_ts = ts1;
+ datap->last_cong_stat = cong_status.cong_stat + 1;
+ ts /= 1000;
+ STORE
+ }
+ }
+ start_ipv4.delete(&key);
+ } else if (family == AF_INET6) {
+ ipv6_flow_val_t *val6 = start_ipv6.lookup(&key);
+ if (val6 == 0) {
+ SOCK_STORE_DEL
+ return 0; //missed
+ }
+ ipv6_flow_key_t keyv6 = {0};
+ bpf_probe_read_kernel(&keyv6, sizeof(ipv6_flow_key_t),
+ &(val6->ipv6_key));
+ dport = keyv6.dport;
+ lport = keyv6.lport;
+ FILTER_LPORT
+ FILTER_DPORT
+ datap = ipv6_stat.lookup(&keyv6);
+ if (datap == 0) {
+ data.last_ts = bpf_ktime_get_ns();
+ data.last_cong_stat = val6->cong_state;
+ ipv6_stat.update(&keyv6, &data);
+ } else {
+ last_cong_state = val6->cong_state;
+ if ((cong_status.cong_stat + 1) != last_cong_state) {
+ ts1 = bpf_ktime_get_ns();
+ ts = ts1 - datap->last_ts;
+ datap->last_ts = ts1;
+ datap->last_cong_stat = (cong_status.cong_stat + 1);
+ ts /= 1000;
+ STORE
+ }
+ }
+ start_ipv6.delete(&key);
+ }
+ SOCK_STORE_DEL
+ return 0;
+}
+"""
+
+kprobe_program = """
+int entry_func(struct pt_regs *ctx, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+int ret_func(struct pt_regs *ctx)
+{
+ u32 tid = bpf_get_current_pid_tgid();
+ process_key_t key = {0};
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
+ key.tid = tid;
+ struct sock **sockpp;
+ sockpp = sock_store.lookup(&key);
+ if (sockpp == 0) {
+ return 0; //miss the entry
+ }
+ struct sock *sk = *sockpp;
+ return ret_state_update_func(sk);
+}
+"""
+
+kfunc_program = """
+KFUNC_PROBE(tcp_fastretrans_alert, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+KRETFUNC_PROBE(tcp_fastretrans_alert, struct sock *sk)
+{
+ return ret_state_update_func(sk);
+}
+
+KFUNC_PROBE(tcp_enter_cwr, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+KRETFUNC_PROBE(tcp_enter_cwr, struct sock *sk)
+{
+ return ret_state_update_func(sk);
+}
+
+KFUNC_PROBE(tcp_enter_loss, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+KRETFUNC_PROBE(tcp_enter_loss, struct sock *sk)
+{
+ return ret_state_update_func(sk);
+}
+
+KFUNC_PROBE(tcp_enter_recovery, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+KRETFUNC_PROBE(tcp_enter_recovery, struct sock *sk)
+{
+ return ret_state_update_func(sk);
+}
+
+KFUNC_PROBE(tcp_process_tlp_ack, struct sock *sk)
+{
+ return entry_state_update_func(sk);
+}
+
+KRETFUNC_PROBE(tcp_process_tlp_ack, struct sock *sk)
+{
+ return ret_state_update_func(sk);
+}
+"""
+
+# code replace
+is_support_kfunc = BPF.support_kfunc()
+if is_support_kfunc:
+ bpf_text += kfunc_program
+ bpf_text = bpf_text.replace('SOCK_STORE_DEF', '')
+ bpf_text = bpf_text.replace('SOCK_STORE_ADD', '')
+ bpf_text = bpf_text.replace('SOCK_STORE_DEL', '')
+else:
+ bpf_text += kprobe_program
+ bpf_text = bpf_text.replace('SOCK_STORE_DEF',
+ 'BPF_HASH(sock_store, process_key_t, struct sock *);')
+ bpf_text = bpf_text.replace('SOCK_STORE_ADD',
+ 'sock_store.update(&key, &sk);')
+ bpf_text = bpf_text.replace('SOCK_STORE_DEL',
+ 'sock_store.delete(&key);')
+
+if args.localport:
+ bpf_text = bpf_text.replace('FILTER_LPORT',
+ 'if (lport < %d || lport > %d) { return 0; }'
+ % (start_lport, end_lport))
+else:
+ bpf_text = bpf_text.replace('FILTER_LPORT', '')
+
+if args.remoteport:
+ bpf_text = bpf_text.replace('FILTER_DPORT',
+ 'if (dport < %d || dport > %d) { return 0; }'
+ % (start_rport, end_rport))
+else:
+ bpf_text = bpf_text.replace('FILTER_DPORT', '')
+
+table_def_text = """
+ u64 open_dura;
+ u64 loss_dura;
+ u64 disorder_dura;
+ u64 recover_dura;
+ u64 cwr_dura;
+ u64 total_changes;
+"""
+
+store_text = """
+ datap->total_changes += 1;
+ if (last_cong_state == (TCP_CA_Open + 1)) {
+ datap->open_dura += ts;
+ } else if (last_cong_state == (TCP_CA_Disorder + 1)) {
+ datap->disorder_dura += ts;
+ } else if (last_cong_state == (TCP_CA_CWR + 1)) {
+ datap->cwr_dura += ts;
+ } else if (last_cong_state == (TCP_CA_Recovery + 1)) {
+ datap->recover_dura += ts;
+ } else if (last_cong_state == (TCP_CA_Loss + 1)) {
+ datap->loss_dura += ts;
+ }
+"""
+
+store_dist_text = """
+ if (last_cong_state == (TCP_CA_Open + 1)) {
+ key_s.state = TCP_CA_Open;
+ } else if (last_cong_state == (TCP_CA_Disorder + 1)) {
+ key_s.state = TCP_CA_Disorder;
+ } else if (last_cong_state == (TCP_CA_CWR + 1)) {
+ key_s.state = TCP_CA_CWR;
+ } else if (last_cong_state == (TCP_CA_Recovery + 1)) {
+ key_s.state = TCP_CA_Recovery;
+ } else if (last_cong_state == (TCP_CA_Loss + 1)) {
+ key_s.state = TCP_CA_Loss;
+ }
+ TIME_UNIT
+ key_s.slot = bpf_log2l(ts);
+ dist.atomic_increment(key_s);
+"""
+
+hist_table_text = """
+typedef struct congest_state_key {
+ u32 state;
+ u64 slot;
+}congest_state_key_t;
+
+BPF_HISTOGRAM(dist, congest_state_key_t);
+"""
+
+if args.dist:
+ bpf_text = bpf_text.replace('DEF_TEXT', '')
+ bpf_text = bpf_text.replace('STORE', store_dist_text)
+ bpf_text = bpf_text.replace('STATE_KEY',
+ 'congest_state_key_t key_s = {0};')
+ bpf_text = bpf_text.replace('HIST_TABLE', hist_table_text)
+ if args.microseconds:
+ bpf_text = bpf_text.replace('TIME_UNIT', '')
+ else:
+ bpf_text = bpf_text.replace('TIME_UNIT', 'ts /= 1000;')
+else:
+ bpf_text = bpf_text.replace('DEF_TEXT', table_def_text)
+ bpf_text = bpf_text.replace('STORE', store_text)
+ bpf_text = bpf_text.replace('STATE_KEY', '')
+ bpf_text = bpf_text.replace('HIST_TABLE', '')
+
+
+if debug or args.ebpf:
+ print(bpf_text)
+ if args.ebpf:
+ exit()
+
+# load BPF program
+b = BPF(text=bpf_text)
+
+if not is_support_kfunc:
+ # all the tcp congestion control status update functions
+ # are called by below 5 functions.
+ b.attach_kprobe(event="tcp_fastretrans_alert", fn_name="entry_func")
+ b.attach_kretprobe(event="tcp_fastretrans_alert", fn_name="ret_func")
+ b.attach_kprobe(event="tcp_enter_cwr", fn_name="entry_func")
+ b.attach_kretprobe(event="tcp_enter_cwr", fn_name="ret_func")
+ b.attach_kprobe(event="tcp_process_tlp_ack", fn_name="entry_func")
+ b.attach_kretprobe(event="tcp_process_tlp_ack", fn_name="ret_func")
+ b.attach_kprobe(event="tcp_enter_loss", fn_name="entry_func")
+ b.attach_kretprobe(event="tcp_enter_loss", fn_name="ret_func")
+ b.attach_kprobe(event="tcp_enter_recovery", fn_name="entry_func")
+ b.attach_kretprobe(event="tcp_enter_recovery", fn_name="ret_func")
+
+print("Tracing tcp congestion control status duration... Hit Ctrl-C to end.")
+
+
+def cong_state_to_name(state):
+ # this need to match with kernel state
+ state_name = ["open", "disorder", "cwr", "recovery", "loss"]
+ return state_name[state]
+
+# output
+exiting = 0 if args.interval else 1
+ipv6_stat = b.get_table("ipv6_stat")
+ipv4_stat = b.get_table("ipv4_stat")
+if args.dist:
+ dist = b.get_table("dist")
+label = "ms"
+if args.microseconds:
+ label = "us"
+while (1):
+ try:
+ sleep(int(args.interval))
+ except KeyboardInterrupt:
+ exiting = 1
+
+ print()
+ if args.timestamp:
+ print("%-8s\n" % strftime("%H:%M:%S"), end="")
+ if args.dist:
+ if args.microseconds:
+ dist.print_log2_hist("usecs", "tcp_congest_state",
+ section_print_fn=cong_state_to_name)
+ else:
+ dist.print_log2_hist("msecs", "tcp_congest_state",
+ section_print_fn=cong_state_to_name)
+ dist.clear()
+ else:
+ if ipv4_stat:
+ print("%-21s% -21s %-7s %-6s %-7s %-7s %-6s %-5s" % ("LAddrPort",
+ "RAddrPort", "Open_" + label, "Dod_" + label,
+ "Rcov_" + label, "Cwr_" + label, "Los_" + label, "Chgs"))
+ laddr = ""
+ raddr = ""
+ for k, v in sorted(ipv4_stat.items(), key=lambda ipv4_stat: ipv4_stat[0].lport):
+ laddr = inet_ntop(AF_INET, pack("I", k.saddr))
+ raddr = inet_ntop(AF_INET, pack("I", k.daddr))
+ open_dura = v.open_dura
+ disorder_dura = v.disorder_dura
+ recover_dura = v.recover_dura
+ cwr_dura = v.cwr_dura
+ loss_dura = v.loss_dura
+ if not args.microseconds:
+ open_dura /= 1000
+ disorder_dura /= 1000
+ recover_dura /= 1000
+ cwr_dura /= 1000
+ loss_dura /= 1000
+ if v.total_changes != 0:
+ print("%-21s %-21s %-7d %-6d %-7d %-7d %-6d %-5d" % (laddr +
+ "/" + str(k.lport), raddr + "/" + str(k.dport), open_dura,
+ disorder_dura, recover_dura, cwr_dura, loss_dura,
+ v.total_changes))
+ if ipv6_stat:
+ print("%-32s %-32s %-7s %-6s %-7s %-7s %-6s %-5s" % ("LAddrPort6",
+ "RAddrPort6", "Open_" + label, "Dod_" + label, "Rcov_" + label,
+ "Cwr_" + label, "Los_" + label, "Chgs"))
+ for k, v in sorted(ipv6_stat.items(), key=lambda ipv6_stat: ipv6_stat[0].lport):
+ laddr = inet_ntop(AF_INET6, bytes(k.saddr))
+ raddr = inet_ntop(AF_INET6, bytes(k.daddr))
+ open_dura = v.open_dura
+ disorder_dura = v.disorder_dura
+ recover_dura = v.recover_dura
+ cwr_dura = v.cwr_dura
+ loss_dura = v.loss_dura
+ if not args.microseconds:
+ open_dura /= 1000
+ disorder_dura /= 1000
+ recover_dura /= 1000
+ cwr_dura /= 1000
+ loss_dura /= 1000
+ if v.total_changes != 0:
+ print("%-32s %-32s %-7d %-7d %-7d %-6d %-6d %-5d" % (laddr +
+ "/" + str(k.lport), raddr + "/" + str(k.dport), open_dura,
+ disorder_dura, recover_dura, cwr_dura, loss_dura,
+ v.total_changes))
+ ipv4_stat.clear()
+ ipv6_stat.clear()
+ countdown -= 1
+ if exiting or countdown == 0:
+ exit()
diff --git a/tools/tcpcong_example.txt b/tools/tcpcong_example.txt
new file mode 100644
index 00000000..837c3b20
--- /dev/null
+++ b/tools/tcpcong_example.txt
@@ -0,0 +1,491 @@
+Demonstrations of tcpcong, the Linux eBPF/bcc version.
+
+This tool traces linux kernel's tcp congestion control status change functions,
+then calculate duration of every status and record it, at last prints it as
+tables or histogram, which can be used for evaluating the tcp congestion
+algorithm's performance.
+
+For example:
+
+./tcpcong
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+^C
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/34968 192.168.219.4/19230 884 12 102 507 0 2721
+192.168.219.3/34976 192.168.219.4/19230 869 12 133 490 0 2737
+192.168.219.3/34982 192.168.219.4/19230 807 0 0 699 0 3158
+192.168.219.3/34988 192.168.219.4/19230 892 16 88 508 0 2540
+192.168.219.3/38946 192.168.219.4/19229 894 13 97 500 0 2697
+192.168.219.3/38950 192.168.219.4/19229 840 10 73 579 1 1840
+192.168.219.3/38970 192.168.219.4/19229 862 17 91 534 0 2339
+192.168.219.3/38982 192.168.219.4/19229 812 13 92 587 0 2102
+192.168.219.3/39070 192.168.219.1/19225 855 7 61 580 0 2826
+192.168.219.3/39098 192.168.219.1/19225 880 8 47 568 0 2557
+192.168.219.3/39112 192.168.219.1/19225 674 2 10 819 0 2867
+192.168.219.3/39120 192.168.219.1/19225 757 1 11 736 0 2978
+192.168.219.3/41146 192.168.219.1/19227 736 1 10 758 0 2972
+192.168.219.3/41162 192.168.219.1/19227 662 2 10 830 0 2889
+192.168.219.3/41178 192.168.219.1/19227 646 2 11 846 0 2858
+192.168.219.3/41192 192.168.219.1/19227 812 9 67 615 0 2204
+192.168.219.3/43856 192.168.219.2/19225 745 1 5 754 0 3067
+192.168.219.3/43858 192.168.219.2/19225 827 4 36 636 0 2130
+192.168.219.3/43872 192.168.219.2/19225 739 0 2 764 0 3035
+192.168.219.3/43880 192.168.219.2/19225 747 0 3 756 0 3144
+192.168.219.3/47230 192.168.219.2/19227 830 4 38 632 0 2554
+192.168.219.3/47242 192.168.219.2/19227 782 3 32 687 0 2136
+192.168.219.3/47272 192.168.219.2/19227 611 1 3 889 0 2629
+192.168.219.3/47294 192.168.219.2/19227 832 3 38 630 0 2631
+192.168.219.3/49716 192.168.219.2/19226 846 4 44 610 0 2562
+192.168.219.3/49746 192.168.219.2/19226 765 0 4 736 0 2998
+192.168.219.3/49760 192.168.219.2/19226 812 2 47 644 0 2273
+192.168.219.3/49766 192.168.219.2/19226 724 0 2 779 0 3106
+192.168.219.3/54076 192.168.219.1/19226 690 1 9 804 0 2939
+192.168.219.3/54096 192.168.219.1/19226 715 2 10 778 0 2974
+192.168.219.3/54114 192.168.219.1/19226 878 6 61 558 0 2742
+192.168.219.3/54120 192.168.219.1/19226 738 0 9 757 0 2959
+192.168.219.3/60926 192.168.219.4/19228 711 11 80 702 0 1870
+192.168.219.3/60930 192.168.219.4/19228 785 0 0 720 0 3325
+192.168.219.3/60942 192.168.219.4/19228 762 0 1 743 0 3342
+192.168.219.3/60948 192.168.219.4/19228 877 11 102 514 0 2654
+
+The example shows all tcp socket's congestion status duration for milliseconds,
+open_ms column is the duration of tcp connection in open status whose cwnd can
+increase; dod_ms column is the duration of tcp connection in disorder status
+who receives disordered packet; rcov_ms column is the duration of tcp
+connection in recovery status who receives 3 duplicated acks; cwr_ms column
+is the duration of tcp connection who receives explicitly congest notifier and
+two acks to reduce the cwnd. the last column chgs prints total status change
+number of the socket.
+
+An interval can be provided, and also optionally a count. Eg, printing output
+every 1 second, and including timestamps (-T):
+./tcpcong -T 1 3
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+
+07:37:55
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/34968 192.168.219.4/19230 742 15 82 311 0 1678
+192.168.219.3/34976 192.168.219.4/19230 700 12 98 340 0 1965
+192.168.219.3/34982 192.168.219.4/19230 634 0 1 516 0 2471
+192.168.219.3/34988 192.168.219.4/19230 692 12 94 354 0 1941
+192.168.219.3/38946 192.168.219.4/19229 722 12 90 323 0 2006
+192.168.219.3/38950 192.168.219.4/19229 420 7 264 439 1 951
+192.168.219.3/38970 192.168.219.4/19229 724 14 90 323 0 1986
+192.168.219.3/38982 192.168.219.4/19229 686 13 87 365 0 1675
+192.168.219.3/39070 192.168.219.1/19225 653 5 46 446 0 1998
+192.168.219.3/39098 192.168.219.1/19225 667 4 38 440 0 2098
+192.168.219.3/39112 192.168.219.1/19225 606 0 1 543 0 2146
+192.168.219.3/39120 192.168.219.1/19225 492 0 205 453 0 1916
+192.168.219.3/41146 192.168.219.1/19227 583 0 3 564 0 2332
+192.168.219.3/41162 192.168.219.1/19227 536 0 1 613 0 2192
+192.168.219.3/41178 192.168.219.1/19227 499 0 2 649 0 2064
+192.168.219.3/41192 192.168.219.1/19227 622 6 34 488 0 1660
+192.168.219.3/43856 192.168.219.2/19225 555 0 1 593 0 2359
+192.168.219.3/43858 192.168.219.2/19225 618 3 28 502 0 1773
+192.168.219.3/43872 192.168.219.2/19225 558 0 0 592 0 2318
+192.168.219.3/43880 192.168.219.2/19225 580 0 1 569 0 2303
+192.168.219.3/47230 192.168.219.2/19227 646 1 18 485 0 1776
+192.168.219.3/47242 192.168.219.2/19227 634 0 20 495 0 1582
+192.168.219.3/47272 192.168.219.2/19227 463 0 1 687 0 1854
+192.168.219.3/47294 192.168.219.2/19227 636 2 27 486 0 1901
+192.168.219.3/49716 192.168.219.2/19226 646 2 28 475 0 1832
+192.168.219.3/49746 192.168.219.2/19226 583 0 0 567 0 2333
+192.168.219.3/49760 192.168.219.2/19226 628 2 26 495 0 1755
+192.168.219.3/49766 192.168.219.2/19226 558 0 0 592 0 2412
+192.168.219.3/54076 192.168.219.1/19226 581 0 2 567 0 2042
+192.168.219.3/54096 192.168.219.1/19226 554 0 2 594 0 2239
+192.168.219.3/54114 192.168.219.1/19226 685 4 33 427 0 1859
+192.168.219.3/54120 192.168.219.1/19226 611 0 3 537 0 2322
+192.168.219.3/60926 192.168.219.4/19228 681 20 101 347 0 1636
+192.168.219.3/60930 192.168.219.4/19228 616 0 1 532 0 2310
+192.168.219.3/60942 192.168.219.4/19228 607 0 1 543 0 2433
+192.168.219.3/60948 192.168.219.4/19228 597 11 76 293 0 1641
+
+07:37:57
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/34968 192.168.219.4/19230 469 9 255 265 0 1305
+192.168.219.3/34976 192.168.219.4/19230 580 11 91 316 0 1916
+192.168.219.3/34982 192.168.219.4/19230 566 0 0 433 0 2092
+192.168.219.3/34988 192.168.219.4/19230 583 9 63 345 0 1871
+192.168.219.3/38946 192.168.219.4/19229 449 16 69 464 0 1425
+192.168.219.3/38950 192.168.219.4/19229 569 10 68 349 0 1848
+192.168.219.3/38970 192.168.219.4/19229 573 20 66 339 0 1839
+192.168.219.3/38982 192.168.219.4/19229 553 9 60 378 0 1483
+192.168.219.3/39070 192.168.219.1/19225 471 3 243 280 0 1279
+192.168.219.3/39098 192.168.219.1/19225 598 4 37 355 0 1717
+192.168.219.3/39112 192.168.219.1/19225 522 0 1 476 0 1816
+192.168.219.3/39120 192.168.219.1/19225 518 0 1 480 0 2031
+192.168.219.3/41146 192.168.219.1/19227 500 0 3 497 0 1996
+192.168.219.3/41162 192.168.219.1/19227 448 0 2 548 0 1849
+192.168.219.3/41178 192.168.219.1/19227 441 0 4 554 0 1693
+192.168.219.3/41192 192.168.219.1/19227 555 4 34 405 0 1341
+192.168.219.3/43856 192.168.219.2/19225 471 0 3 525 0 2118
+192.168.219.3/43858 192.168.219.2/19225 541 1 25 430 0 1446
+192.168.219.3/43872 192.168.219.2/19225 483 0 1 516 0 2044
+192.168.219.3/43880 192.168.219.2/19225 492 0 0 507 0 2073
+192.168.219.3/47230 192.168.219.2/19227 581 3 29 385 0 1453
+192.168.219.3/47242 192.168.219.2/19227 571 2 22 403 0 1292
+192.168.219.3/47272 192.168.219.2/19227 393 0 0 604 0 1516
+192.168.219.3/47294 192.168.219.2/19227 575 2 27 393 0 1660
+192.168.219.3/49716 192.168.219.2/19226 584 1 25 389 0 1582
+192.168.219.3/49746 192.168.219.2/19226 513 0 0 486 0 2017
+192.168.219.3/49760 192.168.219.2/19226 560 1 24 412 0 1370
+192.168.219.3/49766 192.168.219.2/19226 474 0 0 525 0 2121
+192.168.219.3/54076 192.168.219.1/19226 504 0 1 494 0 1724
+192.168.219.3/54096 192.168.219.1/19226 490 0 2 507 0 1906
+192.168.219.3/54114 192.168.219.1/19226 611 3 25 360 0 1560
+192.168.219.3/54120 192.168.219.1/19226 520 0 1 479 0 2010
+192.168.219.3/60926 192.168.219.4/19228 527 9 53 408 0 1473
+192.168.219.3/60930 192.168.219.4/19228 551 0 0 448 0 1951
+192.168.219.3/60942 192.168.219.4/19228 538 0 0 461 0 2038
+192.168.219.3/60948 192.168.219.4/19228 511 9 68 295 1 1701
+
+07:37:58
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/34968 192.168.219.4/19230 293 1 226 211 0 755
+192.168.219.3/34976 192.168.219.4/19230 424 4 36 354 0 1489
+192.168.219.3/34982 192.168.219.4/19230 552 0 0 446 0 2249
+192.168.219.3/34988 192.168.219.4/19230 493 4 42 327 0 1715
+192.168.219.3/38946 192.168.219.4/19229 425 4 37 340 41 1478
+192.168.219.3/38950 192.168.219.4/19229 465 5 45 335 0 1586
+192.168.219.3/38970 192.168.219.4/19229 531 5 41 420 0 1863
+192.168.219.3/38982 192.168.219.4/19229 525 5 41 427 0 1625
+192.168.219.3/39070 192.168.219.1/19225 576 4 44 374 0 1787
+192.168.219.3/39098 192.168.219.1/19225 596 6 41 355 0 1782
+192.168.219.3/39112 192.168.219.1/19225 501 0 3 494 0 1887
+192.168.219.3/39120 192.168.219.1/19225 511 0 4 483 0 2070
+192.168.219.3/41146 192.168.219.1/19227 503 0 3 492 0 2068
+192.168.219.3/41162 192.168.219.1/19227 449 1 3 545 0 1962
+192.168.219.3/41178 192.168.219.1/19227 445 0 5 546 0 1907
+192.168.219.3/41192 192.168.219.1/19227 436 4 248 309 0 1208
+192.168.219.3/43856 192.168.219.2/19225 480 0 0 519 0 2108
+192.168.219.3/43858 192.168.219.2/19225 534 3 24 437 0 1644
+192.168.219.3/43872 192.168.219.2/19225 480 0 0 519 0 2068
+192.168.219.3/43880 192.168.219.2/19225 490 0 0 508 0 2083
+192.168.219.3/47230 192.168.219.2/19227 561 3 22 411 0 1556
+192.168.219.3/47242 192.168.219.2/19227 550 2 22 424 0 1485
+192.168.219.3/47272 192.168.219.2/19227 398 0 0 601 0 1537
+192.168.219.3/47294 192.168.219.2/19227 551 1 19 427 0 1712
+192.168.219.3/49716 192.168.219.2/19226 570 1 20 405 0 1712
+192.168.219.3/49746 192.168.219.2/19226 494 0 0 503 0 2052
+192.168.219.3/49760 192.168.219.2/19226 547 1 18 431 0 1673
+192.168.219.3/49766 192.168.219.2/19226 497 0 0 501 0 1983
+192.168.219.3/54076 192.168.219.1/19226 495 0 4 499 0 1849
+192.168.219.3/54096 192.168.219.1/19226 485 0 4 508 0 2037
+192.168.219.3/54114 192.168.219.1/19226 603 5 37 354 0 1671
+192.168.219.3/54120 192.168.219.1/19226 516 0 1 482 0 2047
+192.168.219.3/60926 192.168.219.4/19228 543 5 39 412 0 1708
+192.168.219.3/60930 192.168.219.4/19228 530 0 0 469 0 2096
+192.168.219.3/60942 192.168.219.4/19228 510 0 0 489 0 2234
+192.168.219.3/60948 192.168.219.4/19228 565 4 61 367 0 1956
+
+An local port and remote port can be specified, and also optionally a count.
+Eg printing output every 1 second, and including timestamps (-T) for local
+ports 30000-40000 and remote ports 19225-19227:
+./tcpcong -T -L 30000-40000 -R 19225-19227 1 3
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+
+07:39:11
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/39070 192.168.219.1/19225 668 4 32 455 0 1706
+192.168.219.3/39098 192.168.219.1/19225 692 4 38 424 0 2110
+192.168.219.3/39112 192.168.219.1/19225 564 0 2 593 0 2291
+192.168.219.3/39120 192.168.219.1/19225 599 0 4 555 0 2387
+
+07:39:12
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/39070 192.168.219.1/19225 576 3 27 391 0 1525
+192.168.219.3/39098 192.168.219.1/19225 580 3 36 379 0 1893
+192.168.219.3/39112 192.168.219.1/19225 474 1 10 512 0 2009
+192.168.219.3/39120 192.168.219.1/19225 505 1 9 483 0 2022
+
+07:39:13
+LAddrPort RAddrPort Open_ms Dod_ms Rcov_ms Cwr_ms Los_ms Chgs
+192.168.219.3/39070 192.168.219.1/19225 546 6 27 418 0 1659
+192.168.219.3/39098 192.168.219.1/19225 564 4 40 390 0 1937
+192.168.219.3/39112 192.168.219.1/19225 479 0 3 514 0 2008
+192.168.219.3/39120 192.168.219.1/19225 515 0 4 479 0 1982
+
+The (-u) option can be specified for recording the duration as miroseconds.
+Eg printing output every 1 second, and including timestamps (-T) and
+microseconds (-u) for local ports 30000-40000 and remote ports 19225-19227:
+./tcpcong -T -u -L 30000-40000 -R 19225-19227 1 3
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+
+07:39:44
+LAddrPort RAddrPort Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+192.168.219.3/39070 192.168.219.1/19225 600971 3232 38601 509796 0 1843
+192.168.219.3/39098 192.168.219.1/19225 667184 5585 26285 453575 0 1969
+192.168.219.3/39112 192.168.219.1/19225 580982 22 1502 569479 0 2210
+192.168.219.3/39120 192.168.219.1/19225 600280 201 955 550752 0 2327
+
+07:39:45
+LAddrPort RAddrPort Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+192.168.219.3/39070 192.168.219.1/19225 567189 2029 25966 404698 0 1612
+192.168.219.3/39098 192.168.219.1/19225 597201 2263 24073 376454 0 1578
+192.168.219.3/39112 192.168.219.1/19225 500792 846 9297 489264 0 1850
+192.168.219.3/39120 192.168.219.1/19225 518700 94 749 480171 0 1967
+
+07:39:46
+LAddrPort RAddrPort Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+192.168.219.3/39070 192.168.219.1/19225 587340 5324 37035 370066 0 1602
+192.168.219.3/39098 192.168.219.1/19225 532986 5630 31624 345336 0 1319
+192.168.219.3/39112 192.168.219.1/19225 481936 1129 6244 510235 0 1909
+192.168.219.3/39120 192.168.219.1/19225 507196 316 6200 485737 0 1957
+
+
+the ipv6 example with (-u) option can be shown.
+Eg printing output every 1 second, and including timestamps (-T) and
+microseconds (-u) for local ports 30000-40000 and remote ports 19225-19227:
+./tcpcong.py -T -u -L 30000-40000 -R 19225-19227 1 3
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+
+11:31:55
+LAddrPort6 RAddrPort6 Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+fe80::bace:f6ff:fe14:d21c/32810 fe80::bace:f6ff:fe43:fe96/19226 876328 0 0 137957 0 235
+fe80::bace:f6ff:fe14:d21c/32812 fe80::bace:f6ff:fe43:fe96/19226 757739 0 0 283114 0 590
+fe80::bace:f6ff:fe14:d21c/32814 fe80::bace:f6ff:fe43:fe96/19226 855426 0 0 136134 0 231
+fe80::bace:f6ff:fe14:d21c/32816 fe80::bace:f6ff:fe43:fe96/19226 695271 0 0 345443 0 606
+
+11:31:56
+LAddrPort6 RAddrPort6 Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+fe80::bace:f6ff:fe14:d21c/32810 fe80::bace:f6ff:fe43:fe96/19226 913925 0 0 81995 0 92
+fe80::bace:f6ff:fe14:d21c/32812 fe80::bace:f6ff:fe43:fe96/19226 785024 0 0 202819 0 777
+fe80::bace:f6ff:fe14:d21c/32814 fe80::bace:f6ff:fe43:fe96/19226 920963 0 0 80715 0 111
+fe80::bace:f6ff:fe14:d21c/32816 fe80::bace:f6ff:fe43:fe96/19226 765172 0 0 222897 0 734
+
+11:31:57
+LAddrPort6 RAddrPort6 Open_us Dod_us Rcov_us Cwr_us Los_us Chgs
+fe80::bace:f6ff:fe14:d21c/32810 fe80::bace:f6ff:fe43:fe96/19226 839563 0 0 98313 0 149
+fe80::bace:f6ff:fe14:d21c/32812 fe80::bace:f6ff:fe43:fe96/19226 534816 0 0 329683 0 495
+fe80::bace:f6ff:fe14:d21c/32814 fe80::bace:f6ff:fe43:fe96/19226 841706 103 2404 91273 0 132
+fe80::bace:f6ff:fe14:d21c/32816 fe80::bace:f6ff:fe43:fe96/19226 633320 0 0 286584 0 565
+
+
+The distribution of congestion status duration can be printed as a histogram
+with the -d option and also optionally a count. Eg printing output every
+1 second for microseconds, and including timestamps (-T):
+./tcpcong.py -d -u -T 1 2
+Tracing tcp congestion control status duration... Hit Ctrl-C to end.
+
+07:40:12
+
+tcp_congest_state = cwr
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 11 | |
+ 8 -> 15 : 10 | |
+ 16 -> 31 : 25 | |
+ 32 -> 63 : 58 | |
+ 64 -> 127 : 117 | |
+ 128 -> 255 : 2924 |******* |
+ 256 -> 511 : 16249 |****************************************|
+ 512 -> 1023 : 15340 |************************************* |
+ 1024 -> 2047 : 786 |* |
+ 2048 -> 4095 : 24 | |
+ 4096 -> 8191 : 7 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 1 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 1 | |
+
+tcp_congest_state = recovery
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 1 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 2 | |
+ 32 -> 63 : 9 | |
+ 64 -> 127 : 28 | |
+ 128 -> 255 : 895 |****************************** |
+ 256 -> 511 : 1190 |****************************************|
+ 512 -> 1023 : 384 |************ |
+ 1024 -> 2047 : 66 |** |
+ 2048 -> 4095 : 2 | |
+ 4096 -> 8191 : 4 | |
+ 8192 -> 16383 : 2 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 2 | |
+
+tcp_congest_state = disorder
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 21 |** |
+ 8 -> 15 : 59 |***** |
+ 16 -> 31 : 102 |********* |
+ 32 -> 63 : 256 |************************* |
+ 64 -> 127 : 409 |****************************************|
+ 128 -> 255 : 255 |************************ |
+ 256 -> 511 : 104 |********** |
+ 512 -> 1023 : 8 | |
+
+tcp_congest_state = open
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 11 | |
+ 4 -> 7 : 266 | |
+ 8 -> 15 : 319 | |
+ 16 -> 31 : 396 |* |
+ 32 -> 63 : 488 |* |
+ 64 -> 127 : 695 |** |
+ 128 -> 255 : 4395 |************* |
+ 256 -> 511 : 13329 |****************************************|
+ 512 -> 1023 : 12727 |************************************** |
+ 1024 -> 2047 : 3327 |********* |
+ 2048 -> 4095 : 601 |* |
+ 4096 -> 8191 : 45 | |
+ 8192 -> 16383 : 3 | |
+ 16384 -> 32767 : 1 | |
+ 32768 -> 65535 : 1 | |
+
+tcp_congest_state = loss
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 1 |****************************************|
+ 256 -> 511 : 1 |****************************************|
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 0 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 1 |****************************************|
+
+07:40:14
+
+tcp_congest_state = cwr
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 7 | |
+ 4 -> 7 : 162 | |
+ 8 -> 15 : 591 |* |
+ 16 -> 31 : 462 | |
+ 32 -> 63 : 351 | |
+ 64 -> 127 : 441 | |
+ 128 -> 255 : 4073 |******** |
+ 256 -> 511 : 19188 |****************************************|
+ 512 -> 1023 : 16127 |********************************* |
+ 1024 -> 2047 : 725 |* |
+ 2048 -> 4095 : 23 | |
+ 4096 -> 8191 : 3 | |
+ 8192 -> 16383 : 2 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 4 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 2 | |
+
+tcp_congest_state = recovery
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 3 | |
+ 8 -> 15 : 16 | |
+ 16 -> 31 : 22 | |
+ 32 -> 63 : 37 |* |
+ 64 -> 127 : 75 |** |
+ 128 -> 255 : 1082 |******************************* |
+ 256 -> 511 : 1364 |****************************************|
+ 512 -> 1023 : 369 |********** |
+ 1024 -> 2047 : 67 |* |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 2 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 5 | |
+
+tcp_congest_state = disorder
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 4 | |
+ 4 -> 7 : 43 |**** |
+ 8 -> 15 : 107 |*********** |
+ 16 -> 31 : 145 |*************** |
+ 32 -> 63 : 312 |********************************* |
+ 64 -> 127 : 370 |****************************************|
+ 128 -> 255 : 256 |*************************** |
+ 256 -> 511 : 101 |********** |
+ 512 -> 1023 : 8 | |
+
+tcp_congest_state = open
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 21 | |
+ 4 -> 7 : 359 | |
+ 8 -> 15 : 516 |* |
+ 16 -> 31 : 484 |* |
+ 32 -> 63 : 522 |* |
+ 64 -> 127 : 818 |** |
+ 128 -> 255 : 5081 |************* |
+ 256 -> 511 : 14852 |****************************************|
+ 512 -> 1023 : 13753 |************************************* |
+ 1024 -> 2047 : 3224 |******** |
+ 2048 -> 4095 : 598 |* |
+ 4096 -> 8191 : 41 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 1 | |
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 1 | |
+
+tcp_congest_state = loss
+ usecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 1 |****** |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 2 |************* |
+ 512 -> 1023 : 6 |****************************************|
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 0 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 1 |****** |
+
+
+USAGE:
+./tcpcong -h
+usage: tcpcong [-h] [-L LOCALPORT] [-R REMOTEPORT] [-T] [-d] [-u]
+ [interval] [outputs]
+
+Summarize tcp socket congestion control status duration
+
+positional arguments:
+ interval output interval, in seconds
+ outputs number of outputs
+
+optional arguments:
+ -h, --help show this help message and exit
+ -L LOCALPORT, --localport LOCALPORT
+ trace local ports only
+ -R REMOTEPORT, --remoteport REMOTEPORT
+ trace the dest ports only
+ -T, --timestamp include timestamp on output
+ -d, --dist show distributions as histograms
+ -u, --microseconds output in microseconds
+
+examples:
+ ./tcpcong # show tcp congestion status duration
+ ./tcpcong 1 10 # show 1 second summaries, 10 times
+ ./tcpcong -L 3000-3006 1 # 1s summaries, local port 3000-3006
+ ./tcpcong -R 5000-5005 1 # 1s summaries, remote port 5000-5005
+ ./tcpcong -uT 1 # 1s summaries, microseconds, and timestamps
+ ./tcpcong -d # show the duration as histograms
diff --git a/tools/tcpconnect.py b/tools/tcpconnect.py
index 8b49c70a..531459e3 100755
--- a/tools/tcpconnect.py
+++ b/tools/tcpconnect.py
@@ -178,7 +178,7 @@ static int trace_connect_return(struct pt_regs *ctx, short ipver)
u16 dport = skp->__sk_common.skc_dport;
FILTER_PORT
-
+
FILTER_FAMILY
if (ipver == 4) {
@@ -295,7 +295,7 @@ int trace_udp_ret_recvmsg(struct pt_regs *ctx)
return 0;
struct msghdr *msghdr = (struct msghdr *)*msgpp;
- if (msghdr->msg_iter.type != ITER_IOVEC)
+ if (msghdr->msg_iter.TYPE_FIELD != ITER_IOVEC)
goto delete_and_return;
int copied = (int)PT_REGS_RC(ctx);
@@ -361,6 +361,10 @@ bpf_text = bpf_text.replace('FILTER_FAMILY', '')
bpf_text = bpf_text.replace('FILTER_UID', '')
if args.dns:
+ if BPF.kernel_struct_has_field(b'iov_iter', b'iter_type') == 1:
+ dns_bpf_text = dns_bpf_text.replace('TYPE_FIELD', 'iter_type')
+ else:
+ dns_bpf_text = dns_bpf_text.replace('TYPE_FIELD', 'type')
bpf_text += dns_bpf_text
if debug or args.ebpf:
@@ -380,12 +384,12 @@ def print_ipv4_event(cpu, data, size):
printb(b"%-6d" % event.uid, nl="")
dest_ip = inet_ntop(AF_INET, pack("I", event.daddr)).encode()
if args.lport:
- printb(b"%-6d %-12.12s %-2d %-16s %-6d %-16s %-6d %s" % (event.pid,
+ printb(b"%-7d %-12.12s %-2d %-16s %-6d %-16s %-6d %s" % (event.pid,
event.task, event.ip,
inet_ntop(AF_INET, pack("I", event.saddr)).encode(), event.lport,
dest_ip, event.dport, print_dns(dest_ip)))
else:
- printb(b"%-6d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid,
+ printb(b"%-7d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid,
event.task, event.ip,
inet_ntop(AF_INET, pack("I", event.saddr)).encode(),
dest_ip, event.dport, print_dns(dest_ip)))
@@ -401,12 +405,12 @@ def print_ipv6_event(cpu, data, size):
printb(b"%-6d" % event.uid, nl="")
dest_ip = inet_ntop(AF_INET6, event.daddr).encode()
if args.lport:
- printb(b"%-6d %-12.12s %-2d %-16s %-6d %-16s %-6d %s" % (event.pid,
+ printb(b"%-7d %-12.12s %-2d %-16s %-6d %-16s %-6d %s" % (event.pid,
event.task, event.ip,
inet_ntop(AF_INET6, event.saddr).encode(), event.lport,
dest_ip, event.dport, print_dns(dest_ip)))
else:
- printb(b"%-6d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid,
+ printb(b"%-7d %-12.12s %-2d %-16s %-16s %-6d %s" % (event.pid,
event.task, event.ip,
inet_ntop(AF_INET6, event.saddr).encode(),
dest_ip, event.dport, print_dns(dest_ip)))
@@ -528,10 +532,10 @@ else:
if args.print_uid:
print("%-6s" % ("UID"), end="")
if args.lport:
- print("%-6s %-12s %-2s %-16s %-6s %-16s %-6s" % ("PID", "COMM", "IP", "SADDR",
+ print("%-7s %-12s %-2s %-16s %-6s %-16s %-6s" % ("PID", "COMM", "IP", "SADDR",
"LPORT", "DADDR", "DPORT"), end="")
else:
- print("%-6s %-12s %-2s %-16s %-16s %-6s" % ("PID", "COMM", "IP", "SADDR",
+ print("%-7s %-12s %-2s %-16s %-16s %-6s" % ("PID", "COMM", "IP", "SADDR",
"DADDR", "DPORT"), end="")
if args.dns:
print(" QUERY")
diff --git a/tools/tcpconnlat.py b/tools/tcpconnlat.py
index 093f2676..885b26d5 100755
--- a/tools/tcpconnlat.py
+++ b/tools/tcpconnlat.py
@@ -231,13 +231,13 @@ def print_ipv4_event(cpu, data, size):
start_ts = event.ts_us
print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
if args.lport:
- print("%-6d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
+ print("%-7d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
event.task.decode('utf-8', 'replace'), event.ip,
inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
float(event.delta_us) / 1000))
else:
- print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
+ print("%-7d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
event.task.decode('utf-8', 'replace'), event.ip,
inet_ntop(AF_INET, pack("I", event.saddr)),
inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
@@ -251,13 +251,13 @@ def print_ipv6_event(cpu, data, size):
start_ts = event.ts_us
print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
if args.lport:
- print("%-6d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
+ print("%-7d %-12.12s %-2d %-16s %-6d %-16s %-5d %.2f" % (event.pid,
event.task.decode('utf-8', 'replace'), event.ip,
inet_ntop(AF_INET6, event.saddr), event.lport,
inet_ntop(AF_INET6, event.daddr),
event.dport, float(event.delta_us) / 1000))
else:
- print("%-6d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
+ print("%-7d %-12.12s %-2d %-16s %-16s %-5d %.2f" % (event.pid,
event.task.decode('utf-8', 'replace'), event.ip,
inet_ntop(AF_INET6, event.saddr), inet_ntop(AF_INET6, event.daddr),
event.dport, float(event.delta_us) / 1000))
@@ -266,10 +266,10 @@ def print_ipv6_event(cpu, data, size):
if args.timestamp:
print("%-9s" % ("TIME(s)"), end="")
if args.lport:
- print("%-6s %-12s %-2s %-16s %-6s %-16s %-5s %s" % ("PID", "COMM",
+ print("%-7s %-12s %-2s %-16s %-6s %-16s %-5s %s" % ("PID", "COMM",
"IP", "SADDR", "LPORT", "DADDR", "DPORT", "LAT(ms)"))
else:
- print("%-6s %-12s %-2s %-16s %-16s %-5s %s" % ("PID", "COMM", "IP",
+ print("%-7s %-12s %-2s %-16s %-16s %-5s %s" % ("PID", "COMM", "IP",
"SADDR", "DADDR", "DPORT", "LAT(ms)"))
# read events
diff --git a/tools/tcpretrans.py b/tools/tcpretrans.py
index 79b481bb..79ff1cad 100755
--- a/tools/tcpretrans.py
+++ b/tools/tcpretrans.py
@@ -355,7 +355,7 @@ tcpstate[12] = 'NEW_SYN_RECV'
# process event
def print_ipv4_event(cpu, data, size):
event = b["ipv4_events"].event(data)
- print("%-8s %-6d %-2d %-20s %1s> %-20s" % (
+ print("%-8s %-7d %-2d %-20s %1s> %-20s" % (
strftime("%H:%M:%S"), event.pid, event.ip,
"%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
type[event.type],
@@ -368,7 +368,7 @@ def print_ipv4_event(cpu, data, size):
def print_ipv6_event(cpu, data, size):
event = b["ipv6_events"].event(data)
- print("%-8s %-6d %-2d %-20s %1s> %-20s" % (
+ print("%-8s %-7d %-2d %-20s %1s> %-20s" % (
strftime("%H:%M:%S"), event.pid, event.ip,
"%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
type[event.type],
@@ -415,7 +415,7 @@ if args.count:
# read events
else:
# header
- print("%-8s %-6s %-2s %-20s %1s> %-20s" % ("TIME", "PID", "IP",
+ print("%-8s %-7s %-2s %-20s %1s> %-20s" % ("TIME", "PID", "IP",
"LADDR:LPORT", "T", "RADDR:RPORT"), end='')
if args.sequence:
print(" %-12s %-10s" % ("STATE", "SEQ"))
diff --git a/tools/tcptop.py b/tools/tcptop.py
index c8bde8f6..d369e133 100755
--- a/tools/tcptop.py
+++ b/tools/tcptop.py
@@ -281,14 +281,14 @@ while i != args.count and not exiting:
ipv4_recv_bytes.clear()
if ipv4_throughput:
- print("%-6s %-12s %-21s %-21s %6s %6s" % ("PID", "COMM",
+ print("%-7s %-12s %-21s %-21s %6s %6s" % ("PID", "COMM",
"LADDR", "RADDR", "RX_KB", "TX_KB"))
# output
for k, (send_bytes, recv_bytes) in sorted(ipv4_throughput.items(),
key=lambda kv: sum(kv[1]),
reverse=True):
- print("%-6d %-12.12s %-21s %-21s %6d %6d" % (k.pid,
+ print("%-7d %-12.12s %-21s %-21s %6d %6d" % (k.pid,
k.name,
k.laddr + ":" + str(k.lport),
k.daddr + ":" + str(k.dport),
@@ -308,14 +308,14 @@ while i != args.count and not exiting:
if ipv6_throughput:
# more than 80 chars, sadly.
- print("\n%-6s %-12s %-32s %-32s %6s %6s" % ("PID", "COMM",
+ print("\n%-7s %-12s %-32s %-32s %6s %6s" % ("PID", "COMM",
"LADDR6", "RADDR6", "RX_KB", "TX_KB"))
# output
for k, (send_bytes, recv_bytes) in sorted(ipv6_throughput.items(),
key=lambda kv: sum(kv[1]),
reverse=True):
- print("%-6d %-12.12s %-32s %-32s %6d %6d" % (k.pid,
+ print("%-7d %-12.12s %-32s %-32s %6d %6d" % (k.pid,
k.name,
k.laddr + ":" + str(k.lport),
k.daddr + ":" + str(k.dport),
diff --git a/tools/threadsnoop.py b/tools/threadsnoop.py
index 471b0c3c..8adca2eb 100755
--- a/tools/threadsnoop.py
+++ b/tools/threadsnoop.py
@@ -45,7 +45,7 @@ try:
except Exception:
b.attach_uprobe(name="c", sym="pthread_create", fn_name="do_entry")
-print("%-10s %-6s %-16s %s" % ("TIME(ms)", "PID", "COMM", "FUNC"))
+print("%-10s %-7s %-16s %s" % ("TIME(ms)", "PID", "COMM", "FUNC"))
start_ts = 0
@@ -58,7 +58,7 @@ def print_event(cpu, data, size):
func = b.sym(event.start, event.pid)
if (func == "[unknown]"):
func = hex(event.start)
- print("%-10d %-6d %-16s %s" % ((event.ts - start_ts) / 1000000,
+ print("%-10d %-7d %-16s %s" % ((event.ts - start_ts) / 1000000,
event.pid, event.comm, func))
b["events"].open_perf_buffer(print_event)
diff --git a/tools/trace.py b/tools/trace.py
index 0f6d90e8..b51cccff 100755
--- a/tools/trace.py
+++ b/tools/trace.py
@@ -5,6 +5,7 @@
#
# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S] [-c cgroup_path]
# [-M MAX_EVENTS] [-s SYMBOLFILES] [-T] [-t] [-K] [-U] [-a] [-I header]
+# [-A]
# probe [probe ...]
#
# Licensed under the Apache License, Version 2.0 (the "License")
@@ -40,6 +41,8 @@ class Probe(object):
uid = -1
page_cnt = None
build_id_enabled = False
+ aggregate = False
+ symcount = {}
@classmethod
def configure(cls, args):
@@ -58,6 +61,10 @@ class Probe(object):
cls.page_cnt = args.buffer_pages
cls.bin_cmp = args.bin_cmp
cls.build_id_enabled = args.sym_file_list is not None
+ cls.aggregate = args.aggregate
+ if cls.aggregate and cls.max_events is None:
+ raise ValueError("-M/--max-events should be specified"
+ " with -A/--aggregate")
def __init__(self, probe, string_size, kernel_stack, user_stack,
cgroup_map_name, name, msg_filter):
@@ -584,18 +591,20 @@ BPF_PERF_OUTPUT(%s);
else: # self.probe_type == 't'
return self.tp_event
- def print_stack(self, bpf, stack_id, tgid):
+ def _stack_to_string(self, bpf, stack_id, tgid):
if stack_id < 0:
- print(" %d" % stack_id)
- return
+ return (" %d" % stack_id)
+ stackstr = ''
stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
for addr in stack:
- print(" ", end="")
+ stackstr += ' '
if Probe.print_address:
- print("%16x " % addr, end="")
- print("%s" % (bpf.sym(addr, tgid,
- show_module=True, show_offset=True)))
+ stackstr += ("%16x " % addr)
+ symstr = bpf.sym(addr, tgid, show_module=True, show_offset=True)
+ stackstr += ('%s\n' % (symstr.decode('utf-8')))
+
+ return stackstr
def _format_message(self, bpf, tgid, values):
# Replace each %K with kernel sym and %U with user sym in tgid
@@ -610,6 +619,11 @@ BPF_PERF_OUTPUT(%s);
show_module=True, show_offset=True)
return self.python_format % tuple(values)
+ def print_aggregate_events(self):
+ for k, v in sorted(self.symcount.items(), key=lambda item: \
+ item[1], reverse=True):
+ print("%s-->COUNT %d\n\n" % (k, v), end="")
+
def print_event(self, bpf, cpu, data, size):
# Cast as the generated structure type and display
# according to the format string in the probe.
@@ -621,32 +635,43 @@ BPF_PERF_OUTPUT(%s);
msg = self._format_message(bpf, event.tgid, values)
if self.msg_filter and self.msg_filter not in msg:
return
+ eventstr = ''
if Probe.print_time:
time = strftime("%H:%M:%S") if Probe.use_localtime else \
Probe._time_off_str(event.timestamp_ns)
if Probe.print_unix_timestamp:
- print("%-17s " % time[:17], end="")
+ eventstr += ("%-17s " % time[:17])
else:
- print("%-8s " % time[:8], end="")
+ eventstr += ("%-8s " % time[:8])
if Probe.print_cpu:
- print("%-3s " % event.cpu, end="")
- print("%-7d %-7d %-15s %-16s %s" %
+ eventstr += ("%-3s " % event.cpu)
+ eventstr += ("%-7d %-7d %-15s %-16s %s\n" %
(event.tgid, event.pid,
event.comm.decode('utf-8', 'replace'),
self._display_function(), msg))
if self.kernel_stack:
- self.print_stack(bpf, event.kernel_stack_id, -1)
+ eventstr += self._stack_to_string(bpf, event.kernel_stack_id, -1)
if self.user_stack:
- self.print_stack(bpf, event.user_stack_id, event.tgid)
- if self.user_stack or self.kernel_stack:
+ eventstr += self._stack_to_string(bpf, event.user_stack_id, event.tgid)
+
+ if self.aggregate is False:
+ print(eventstr, end="")
+ if self.kernel_stack or self.user_stack:
print("")
+ else:
+ if eventstr in self.symcount:
+ self.symcount[eventstr] += 1
+ else:
+ self.symcount[eventstr] = 1
Probe.event_count += 1
if Probe.max_events is not None and \
Probe.event_count >= Probe.max_events:
- exit()
- sys.stdout.flush()
+ if self.aggregate:
+ self.print_aggregate_events()
+ sys.stdout.flush()
+ exit()
def attach(self, bpf, verbose):
if len(self.library) == 0:
@@ -700,7 +725,7 @@ trace do_sys_open
trace kfree_skb+0x12
Trace the kfree_skb kernel function after the instruction on the 0x12 offset
trace 'do_sys_open "%s", arg2@user'
- Trace the open syscall and print the filename. being opened @user is
+ Trace the open syscall and print the filename being opened @user is
added to arg2 in kprobes to ensure that char * should be copied from
the userspace stack to the bpf stack. If not specified, previous
behaviour is expected.
@@ -752,6 +777,9 @@ trace -I 'net/sock.h' \\
to 53 (DNS; 13568 in big endian order)
trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
Trace the number of users accessing the file system of the current task
+trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
+ Trace inet_pton system call and use the specified libraries/executables for
+ symbol resolution.
"""
def __init__(self):
@@ -815,6 +843,8 @@ trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
"as either full path, "
"or relative to current working directory, "
"or relative to default kernel header search path")
+ parser.add_argument("-A", "--aggregate", action="store_true",
+ help="aggregate amount of each trace")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
self.args = parser.parse_args()
diff --git a/tools/trace_example.txt b/tools/trace_example.txt
index 36010d61..ccefdaa7 100644
--- a/tools/trace_example.txt
+++ b/tools/trace_example.txt
@@ -237,6 +237,32 @@ Remember to use the -I argument include the appropriate header file. We didn't
need to do that here because `struct timespec` is used internally by the tool,
so it always includes this header file.
+To aggregate amount of trace, you need specify -A with -M EVENTS. A typical
+example:
+1, if we find that the sys CPU utilization is higher by 'top' command
+2, then find that the timer interrupt is more normal by 'irqtop' command
+3, to confirm kernel timer setting frequence by 'funccount -i 1 clockevents_program_event'
+4, to trace timer setting by 'trace clockevents_program_event -K -A -M 1000'
+
+1294576 1294584 CPU 0/KVM clockevents_program_event
+ clockevents_program_event+0x1 [kernel]
+ hrtimer_start_range_ns+0x209 [kernel]
+ start_sw_timer+0x173 [kvm]
+ restart_apic_timer+0x6c [kvm]
+ kvm_set_msr_common+0x442 [kvm]
+ __kvm_set_msr+0xa2 [kvm]
+ kvm_emulate_wrmsr+0x36 [kvm]
+ vcpu_enter_guest+0x326 [kvm]
+ kvm_arch_vcpu_ioctl_run+0xcc [kvm]
+ kvm_vcpu_ioctl+0x22f [kvm]
+ do_vfs_ioctl+0xa1 [kernel]
+ ksys_ioctl+0x60 [kernel]
+ __x64_sys_ioctl+0x16 [kernel]
+ do_syscall_64+0x59 [kernel]
+ entry_SYSCALL_64_after_hwframe+0x44 [kernel]
+-->COUNT 271
+...
+So we can know that 271 timer setting in recent 1000(~27%).
As a final example, let's trace open syscalls for a specific process. By
default, tracing is system-wide, but the -p switch overrides this:
@@ -384,6 +410,7 @@ optional arguments:
as either full path, or relative to current working
directory, or relative to default kernel header search
path
+ -A, --aggregate aggregate amount of each trace
EXAMPLES:
@@ -392,10 +419,11 @@ trace do_sys_open
trace kfree_skb+0x12
Trace the kfree_skb kernel function after the instruction on the 0x12 offset
trace 'do_sys_open "%s", arg2@user'
- Trace the open syscall and print the filename being opened. @user is
+ Trace the open syscall and print the filename being opened @user is
added to arg2 in kprobes to ensure that char * should be copied from
the userspace stack to the bpf stack. If not specified, previous
behaviour is expected.
+
trace 'do_sys_open "%s", arg2@user' -n main
Trace the open syscall and only print event that process names containing "main"
trace 'do_sys_open "%s", arg2@user' --uid 1001
@@ -420,6 +448,8 @@ trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
Trace the block_rq_complete kernel tracepoint and print # of tx sectors
trace 'u:pthread:pthread_create (arg4 != 0)'
Trace the USDT probe pthread_create when its 4th argument is non-zero
+trace 'u:pthread:libpthread:pthread_create (arg4 != 0)'
+ Ditto, but the provider name "libpthread" is specified.
trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec'
Trace the nanosleep syscall and print the sleep duration in ns
trace -c /sys/fs/cgroup/system.slice/workload.service '__x64_sys_nanosleep' '__x64_sys_clone'
@@ -435,7 +465,7 @@ trace -I 'kernel/sched/sched.h' \
in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel
package. So this command needs to run at the kernel source tree root directory
so that the added header file can be found by the compiler.
-trace -I 'net/sock.h' \\
+trace -I 'net/sock.h' \
'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)'
Trace udpv6 sendmsg calls only if socket's destination port is equal
to 53 (DNS; 13568 in big endian order)
@@ -444,4 +474,3 @@ trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
Trace inet_pton system call and use the specified libraries/executables for
symbol resolution.
-"
diff --git a/tools/vfsstat.py b/tools/vfsstat.py
index a9c213d4..a862d333 100755
--- a/tools/vfsstat.py
+++ b/tools/vfsstat.py
@@ -65,11 +65,11 @@ void do_create(struct pt_regs *ctx) { stats_increment(S_CREATE); }
"""
bpf_text_kfunc = """
-KFUNC_PROBE(vfs_read) { stats_increment(S_READ); return 0; }
-KFUNC_PROBE(vfs_write) { stats_increment(S_WRITE); return 0; }
-KFUNC_PROBE(vfs_fsync) { stats_increment(S_FSYNC); return 0; }
-KFUNC_PROBE(vfs_open) { stats_increment(S_OPEN); return 0; }
-KFUNC_PROBE(vfs_create) { stats_increment(S_CREATE); return 0; }
+KFUNC_PROBE(vfs_read) { stats_increment(S_READ); return 0; }
+KFUNC_PROBE(vfs_write) { stats_increment(S_WRITE); return 0; }
+KFUNC_PROBE(vfs_fsync_range) { stats_increment(S_FSYNC); return 0; }
+KFUNC_PROBE(vfs_open) { stats_increment(S_OPEN); return 0; }
+KFUNC_PROBE(vfs_create) { stats_increment(S_CREATE); return 0; }
"""
is_support_kfunc = BPF.support_kfunc()
@@ -81,11 +81,11 @@ else:
b = BPF(text=bpf_text)
if not is_support_kfunc:
- b.attach_kprobe(event="vfs_read", fn_name="do_read")
- b.attach_kprobe(event="vfs_write", fn_name="do_write")
- b.attach_kprobe(event="vfs_fsync", fn_name="do_fsync")
- b.attach_kprobe(event="vfs_open", fn_name="do_open")
- b.attach_kprobe(event="vfs_create", fn_name="do_create")
+ b.attach_kprobe(event="vfs_read", fn_name="do_read")
+ b.attach_kprobe(event="vfs_write", fn_name="do_write")
+ b.attach_kprobe(event="vfs_fsync_range", fn_name="do_fsync")
+ b.attach_kprobe(event="vfs_open", fn_name="do_open")
+ b.attach_kprobe(event="vfs_create", fn_name="do_create")
# stat column labels and indexes
stat_types = {
diff --git a/tools/xfsdist.py b/tools/xfsdist.py
index 58f73afd..163c2207 100755
--- a/tools/xfsdist.py
+++ b/tools/xfsdist.py
@@ -169,7 +169,7 @@ while (1):
if args.interval and (not args.notimestamp):
print(strftime("%H:%M:%S:"))
- dist.print_log2_hist(label, "operation")
+ dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode)
dist.clear()
countdown -= 1
diff --git a/tools/zfsdist.py b/tools/zfsdist.py
index a30671da..f9c229c7 100755
--- a/tools/zfsdist.py
+++ b/tools/zfsdist.py
@@ -183,7 +183,7 @@ while (1):
if args.interval and (not args.notimestamp):
print(strftime("%H:%M:%S:"))
- dist.print_log2_hist(label, "operation")
+ dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode)
dist.clear()
countdown -= 1