aboutsummaryrefslogtreecommitdiff
path: root/tracecmd/trace-vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'tracecmd/trace-vm.c')
-rw-r--r--tracecmd/trace-vm.c388
1 files changed, 388 insertions, 0 deletions
diff --git a/tracecmd/trace-vm.c b/tracecmd/trace-vm.c
new file mode 100644
index 00000000..57dbef8d
--- /dev/null
+++ b/tracecmd/trace-vm.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2020, VMware, Tzvetomir Stoyanov <tz.stoyanov@gmail.com>
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace-local.h"
+#include "trace-msg.h"
+
+static struct trace_guest *guests;
+static size_t guests_len;
+
+static struct trace_guest *get_guest_by_cid(unsigned int guest_cid)
+{
+ int i;
+
+ if (!guests)
+ return NULL;
+
+ for (i = 0; i < guests_len; i++)
+ if (guest_cid == guests[i].cid)
+ return guests + i;
+ return NULL;
+}
+
+static struct trace_guest *get_guest_by_name(const char *name)
+{
+ int i;
+
+ if (!guests)
+ return NULL;
+
+ for (i = 0; i < guests_len; i++)
+ if (strcmp(name, guests[i].name) == 0)
+ return guests + i;
+ return NULL;
+}
+
+bool trace_have_guests_pid(void)
+{
+ for (int i = 0; i < guests_len; i++) {
+ if (guests[i].pid < 0)
+ return false;
+ }
+
+ return true;
+}
+
+static struct trace_guest *add_guest(unsigned int cid, const char *name)
+{
+ guests = realloc(guests, (guests_len + 1) * sizeof(*guests));
+ if (!guests)
+ die("allocating new guest");
+ memset(&guests[guests_len], 0, sizeof(struct trace_guest));
+ guests[guests_len].name = strdup(name);
+ if (!guests[guests_len].name)
+ die("allocating guest name");
+ guests[guests_len].cid = cid;
+ guests[guests_len].pid = -1;
+ guests_len++;
+
+ return &guests[guests_len - 1];
+}
+
+static struct tracefs_instance *start_trace_connect(void)
+{
+ struct tracefs_instance *open_instance;
+
+ open_instance = tracefs_instance_create("vsock_find_pid");
+ if (!open_instance)
+ return NULL;
+
+ tracefs_event_enable(open_instance, "sched", "sched_waking");
+ tracefs_event_enable(open_instance, "kvm", "kvm_exit");
+ tracefs_trace_on(open_instance);
+ return open_instance;
+}
+
+struct pids {
+ struct pids *next;
+ int pid;
+};
+
+struct trace_fields {
+ struct tep_event *sched_waking;
+ struct tep_event *kvm_exit;
+ struct tep_format_field *common_pid;
+ struct tep_format_field *sched_next;
+ struct pids *pids;
+ int found_pid;
+};
+
+static void free_pids(struct pids *pids)
+{
+ struct pids *next;
+
+ while (pids) {
+ next = pids;
+ pids = pids->next;
+ free(next);
+ }
+}
+
+static void add_pid(struct pids **pids, int pid)
+{
+ struct pids *new_pid;
+
+ new_pid = malloc(sizeof(*new_pid));
+ if (!new_pid)
+ return;
+
+ new_pid->pid = pid;
+ new_pid->next = *pids;
+ *pids = new_pid;
+}
+
+static bool match_pid(struct pids *pids, int pid)
+{
+ while (pids) {
+ if (pids->pid == pid)
+ return true;
+ pids = pids->next;
+ }
+ return false;
+}
+
+static int callback(struct tep_event *event, struct tep_record *record, int cpu,
+ void *data)
+{
+ struct trace_fields *fields = data;
+ struct tep_handle *tep = event->tep;
+ unsigned long long val;
+ int flags;
+ int type;
+ int pid;
+ int ret;
+
+ ret = tep_read_number_field(fields->common_pid, record->data, &val);
+ if (ret < 0)
+ return 0;
+
+ flags = tep_data_flags(tep, record);
+
+ /* Ignore events in interrupts */
+ if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ))
+ return 0;
+
+ /*
+ * First make sure that this event comes from a PID from
+ * this task (or a task woken by this task)
+ */
+ pid = val;
+ if (!match_pid(fields->pids, pid))
+ return 0;
+
+ type = tep_data_type(tep, record);
+
+ /*
+ * If this event is a kvm_exit, we have our PID
+ * and we can stop processing.
+ */
+ if (type == fields->kvm_exit->id) {
+ fields->found_pid = pid;
+ return -1;
+ }
+
+ if (type != fields->sched_waking->id)
+ return 0;
+
+ ret = tep_read_number_field(fields->sched_next, record->data, &val);
+ if (ret < 0)
+ return 0;
+
+ /* This is a task woken by our task or a chain of wake ups */
+ add_pid(&fields->pids, (int)val);
+ return 0;
+}
+
+static int find_tgid(int pid)
+{
+ FILE *fp;
+ char *path;
+ char *buf = NULL;
+ char *save;
+ size_t l = 0;
+ int tgid = -1;
+
+ if (asprintf(&path, "/proc/%d/status", pid) < 0)
+ return -1;
+
+ fp = fopen(path, "r");
+ free(path);
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &l, fp) > 0) {
+ char *tok;
+
+ if (strncmp(buf, "Tgid:", 5) != 0)
+ continue;
+ tok = strtok_r(buf, ":", &save);
+ if (!tok)
+ continue;
+ tok = strtok_r(NULL, ":", &save);
+ if (!tok)
+ continue;
+ while (isspace(*tok))
+ tok++;
+ tgid = strtol(tok, NULL, 0);
+ break;
+ }
+ free(buf);
+ fclose(fp);
+
+ return tgid;
+}
+
+static int stop_trace_connect(struct tracefs_instance *open_instance)
+{
+ const char *systems[] = { "kvm", "sched", NULL};
+ struct tep_handle *tep;
+ struct trace_fields trace_fields;
+ int tgid = -1;
+
+ if (!open_instance)
+ return -1;
+
+ /* The connection is finished, stop tracing, we have what we want */
+ tracefs_trace_off(open_instance);
+ tracefs_event_disable(open_instance, NULL, NULL);
+
+ tep = tracefs_local_events_system(NULL, systems);
+
+ trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking");
+ if (!trace_fields.sched_waking)
+ goto out;
+ trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit");
+ if (!trace_fields.kvm_exit)
+ goto out;
+ trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking,
+ "common_pid");
+ if (!trace_fields.common_pid)
+ goto out;
+ trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking,
+ "pid");
+ if (!trace_fields.sched_next)
+ goto out;
+
+ trace_fields.found_pid = -1;
+ trace_fields.pids = NULL;
+ add_pid(&trace_fields.pids, getpid());
+ tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields);
+ free_pids(trace_fields.pids);
+ out:
+ tracefs_instance_destroy(open_instance);
+ tracefs_instance_free(open_instance);
+
+ if (trace_fields.found_pid > 0)
+ tgid = find_tgid(trace_fields.found_pid);
+
+ return tgid;
+}
+
+/*
+ * In order to find the guest that is associated to the given cid,
+ * trace the sched_waking and kvm_exit events, connect to the cid
+ * (doesn't matter what port, use -1 to not connect to anything)
+ * and find what task gets woken up from this code and calls kvm_exit,
+ * then that is the task that is running the guest.
+ * Then look at the /proc/<guest-pid>/status file to find the task group
+ * id (Tgid), and this is the PID of the task running all the threads.
+ */
+static void find_pid_by_cid(struct trace_guest *guest)
+{
+ struct tracefs_instance *instance;
+ int fd;
+
+ instance = start_trace_connect();
+ fd = trace_vsock_open(guest->cid, -1);
+ guest->pid = stop_trace_connect(instance);
+ /* Just in case! */
+ if (fd >= 0)
+ close(fd);
+}
+
+struct trace_guest *trace_get_guest(unsigned int cid, const char *name)
+{
+ struct trace_guest *guest = NULL;
+
+ if (name) {
+ guest = get_guest_by_name(name);
+ if (guest)
+ return guest;
+ }
+
+ if (cid > 0) {
+ guest = get_guest_by_cid(cid);
+ if (!guest && name) {
+ guest = add_guest(cid, name);
+ if (guest)
+ find_pid_by_cid(guest);
+ }
+ }
+ return guest;
+}
+
+#define VM_CID_CMD "virsh dumpxml"
+#define VM_CID_LINE "<cid auto="
+#define VM_CID_ID "address='"
+static void read_guest_cid(char *name)
+{
+ struct trace_guest *guest;
+ char *cmd = NULL;
+ char line[512];
+ char *cid;
+ unsigned int cid_id = 0;
+ FILE *f;
+
+ asprintf(&cmd, "%s %s", VM_CID_CMD, name);
+ f = popen(cmd, "r");
+ free(cmd);
+ if (f == NULL)
+ return;
+
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (!strstr(line, VM_CID_LINE))
+ continue;
+ cid = strstr(line, VM_CID_ID);
+ if (!cid)
+ continue;
+ cid_id = strtol(cid + strlen(VM_CID_ID), NULL, 10);
+ if ((cid_id == INT_MIN || cid_id == INT_MAX) && errno == ERANGE)
+ continue;
+ guest = add_guest(cid_id, name);
+ if (guest)
+ find_pid_by_cid(guest);
+ break;
+ }
+
+ /* close */
+ pclose(f);
+}
+
+#define VM_NAME_CMD "virsh list --name"
+void read_qemu_guests(void)
+{
+ char name[256];
+ FILE *f;
+
+ f = popen(VM_NAME_CMD, "r");
+ if (f == NULL)
+ return;
+
+ while (fgets(name, sizeof(name), f) != NULL) {
+ if (name[0] == '\n')
+ continue;
+ if (name[strlen(name) - 1] == '\n')
+ name[strlen(name) - 1] = '\0';
+ read_guest_cid(name);
+ }
+
+ /* close */
+ pclose(f);
+}
+
+int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu)
+{
+ int i;
+
+ if (!guests)
+ return -1;
+
+ for (i = 0; i < guests_len; i++) {
+ if (guests[i].cpu_pid < 0 || guest_vcpu >= guests[i].cpu_max)
+ continue;
+ if (guest_cid == guests[i].cid)
+ return guests[i].cpu_pid[guest_vcpu];
+ }
+ return -1;
+}