diff options
Diffstat (limited to 'tracecmd/trace-record.c')
-rw-r--r-- | tracecmd/trace-record.c | 7322 |
1 files changed, 7322 insertions, 0 deletions
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c new file mode 100644 index 00000000..27c4e7ba --- /dev/null +++ b/tracecmd/trace-record.c @@ -0,0 +1,7322 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <dirent.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <getopt.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#ifndef NO_PTRACE +#include <sys/ptrace.h> +#else +#ifdef WARN_NO_PTRACE +#warning ptrace not supported. -c feature will not work +#endif +#endif +#include <netdb.h> +#include <fcntl.h> +#include <unistd.h> +#include <ctype.h> +#include <sched.h> +#include <glob.h> +#include <errno.h> +#include <limits.h> +#include <libgen.h> +#include <poll.h> +#include <pwd.h> +#include <grp.h> + +#include "tracefs.h" +#include "version.h" +#include "trace-local.h" +#include "trace-msg.h" + +#define _STR(x) #x +#define STR(x) _STR(x) + +#define TRACE_CTRL "tracing_on" +#define TRACE "trace" +#define AVAILABLE "available_tracers" +#define CURRENT "current_tracer" +#define ITER_CTRL "trace_options" +#define MAX_LATENCY "tracing_max_latency" +#define STAMP "stamp" +#define FUNC_STACK_TRACE "func_stack_trace" +#define TSC_CLOCK "x86-tsc" + +#define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) + +enum trace_type { + TRACE_TYPE_RECORD = 1, + TRACE_TYPE_START = (1 << 1), + TRACE_TYPE_STREAM = (1 << 2), + TRACE_TYPE_EXTRACT = (1 << 3), + TRACE_TYPE_SET = (1 << 4), +}; + +static tracecmd_handle_init_func handle_init = NULL; + +static int rt_prio; + +static int keep; + +static int latency; +static int sleep_time = 1000; +static int recorder_threads; +static struct pid_record_data *pids; +static int buffers; + +/* Clear all function filters */ +static int clear_function_filters; + +static bool no_fifos; + +static char *host; + +static const char *gai_err; + +static bool quiet; + +static bool fork_process; + +/* Max size to let a per cpu file get */ +static int max_kb; + +static int do_ptrace; + +static int filter_task; +static bool no_filter = false; + +static int local_cpu_count; + +static int finished; + +/* setting of /proc/sys/kernel/ftrace_enabled */ +static int fset; + +static unsigned recorder_flags; + +/* Try a few times to get an accurate date */ +static int date2ts_tries = 50; + +static struct func_list *graph_funcs; + +static int func_stack; + +static int save_stdout = -1; + +static struct hook_list *hooks; + +struct event_list { + struct event_list *next; + const char *event; + char *trigger; + char *filter; + char *pid_filter; + char *filter_file; + char *trigger_file; + char *enable_file; + int neg; +}; + +struct tracecmd_event_list *listed_events; + +struct events { + struct events *sibling; + struct events *children; + struct events *next; + char *name; +}; + +/* Files to be reset when done recording */ +struct reset_file { + struct reset_file *next; + char *path; + char *reset; + int prio; +}; + +static struct reset_file *reset_files; + +/* Triggers need to be cleared in a special way */ +static struct reset_file *reset_triggers; + +struct buffer_instance top_instance; +struct buffer_instance *buffer_instances; +struct buffer_instance *first_instance; + +static struct tracecmd_recorder *recorder; + +static int ignore_event_not_found = 0; + +static inline int is_top_instance(struct buffer_instance *instance) +{ + return instance == &top_instance; +} + +static inline int no_top_instance(void) +{ + return first_instance != &top_instance; +} + +static void init_instance(struct buffer_instance *instance) +{ + instance->event_next = &instance->events; +} + +enum { + RESET_DEFAULT_PRIO = 0, + RESET_HIGH_PRIO = 100000, +}; + +enum trace_cmd { + CMD_extract, + CMD_start, + CMD_stream, + CMD_profile, + CMD_record, + CMD_record_agent, + CMD_set, +}; + +struct common_record_context { + enum trace_cmd curr_cmd; + struct buffer_instance *instance; + const char *output; + char *date2ts; + char *user; + const char *clock; + const char *compression; + struct tsc_nsec tsc2nsec; + int data_flags; + int tsync_loop_interval; + + int record_all; + int total_disable; + int disable; + int events; + int global; + int filtered; + int date; + int manual; + int topt; + int run_command; + int saved_cmdlines_size; + int file_version; +}; + +static void add_reset_file(const char *file, const char *val, int prio) +{ + struct reset_file *reset; + struct reset_file **last = &reset_files; + + /* Only reset if we are not keeping the state */ + if (keep) + return; + + reset = malloc(sizeof(*reset)); + if (!reset) + die("Failed to allocate reset"); + reset->path = strdup(file); + reset->reset = strdup(val); + reset->prio = prio; + if (!reset->path || !reset->reset) + die("Failed to allocate reset path or val"); + + while (*last && (*last)->prio > prio) + last = &(*last)->next; + + reset->next = *last; + *last = reset; +} + +static void add_reset_trigger(const char *file) +{ + struct reset_file *reset; + + /* Only reset if we are not keeping the state */ + if (keep) + return; + + reset = malloc(sizeof(*reset)); + if (!reset) + die("Failed to allocate reset"); + reset->path = strdup(file); + + reset->next = reset_triggers; + reset_triggers = reset; +} + +/* To save the contents of the file */ +static void reset_save_file(const char *file, int prio) +{ + char *content; + + content = get_file_content(file); + if (content) { + add_reset_file(file, content, prio); + free(content); + } +} + +/* + * @file: the file to check + * @nop: If the content of the file is this, use the reset value + * @reset: What to write if the file == @nop + */ +static void reset_save_file_cond(const char *file, int prio, + const char *nop, const char *reset) +{ + char *content; + char *cond; + + if (keep) + return; + + content = get_file_content(file); + + cond = strstrip(content); + + if (strcmp(cond, nop) == 0) + add_reset_file(file, reset, prio); + else + add_reset_file(file, content, prio); + + free(content); +} + +/** + * add_instance - add a buffer instance to the internal list + * @instance: The buffer instance to add + */ +void add_instance(struct buffer_instance *instance, int cpu_count) +{ + init_instance(instance); + instance->next = buffer_instances; + if (first_instance == buffer_instances) + first_instance = instance; + buffer_instances = instance; + instance->cpu_count = cpu_count; + buffers++; +} + +static void instance_reset_file_save(struct buffer_instance *instance, char *file, int prio) +{ + char *path; + + path = tracefs_instance_get_file(instance->tracefs, file); + if (path) + reset_save_file(path, prio); + tracefs_put_tracing_file(path); +} + +static void test_set_event_pid(struct buffer_instance *instance) +{ + static int have_set_event_pid; + static int have_event_fork; + static int have_func_fork; + + if (!have_set_event_pid && + tracefs_file_exists(top_instance.tracefs, "set_event_pid")) + have_set_event_pid = 1; + if (!have_event_fork && + tracefs_file_exists(top_instance.tracefs, "options/event-fork")) + have_event_fork = 1; + if (!have_func_fork && + tracefs_file_exists(top_instance.tracefs, "options/function-fork")) + have_func_fork = 1; + + if (!instance->have_set_event_pid && have_set_event_pid) { + instance->have_set_event_pid = 1; + instance_reset_file_save(instance, "set_event_pid", + RESET_DEFAULT_PRIO); + } + if (!instance->have_event_fork && have_event_fork) { + instance->have_event_fork = 1; + instance_reset_file_save(instance, "options/event-fork", + RESET_DEFAULT_PRIO); + } + if (!instance->have_func_fork && have_func_fork) { + instance->have_func_fork = 1; + instance_reset_file_save(instance, "options/function-fork", + RESET_DEFAULT_PRIO); + } +} + +/** + * allocate_instance - allocate a new buffer instance, + * it must exist in the ftrace system + * @name: The name of the instance (instance will point to this) + * + * Returns a newly allocated instance. In case of an error or if the + * instance does not exist in the ftrace system, NULL is returned. + */ +struct buffer_instance *allocate_instance(const char *name) +{ + struct buffer_instance *instance; + + instance = calloc(1, sizeof(*instance)); + if (!instance) + return NULL; + if (name) + instance->name = strdup(name); + if (tracefs_instance_exists(name)) { + instance->tracefs = tracefs_instance_create(name); + if (!instance->tracefs) + goto error; + } + + return instance; + +error: + if (instance) { + free(instance->name); + tracefs_instance_free(instance->tracefs); + free(instance); + } + return NULL; +} + +static int __add_all_instances(const char *tracing_dir) +{ + struct dirent *dent; + char *instances_dir; + struct stat st; + DIR *dir; + int ret; + + if (!tracing_dir) + return -1; + + instances_dir = append_file(tracing_dir, "instances"); + if (!instances_dir) + return -1; + + ret = stat(instances_dir, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) { + ret = -1; + goto out_free; + } + + dir = opendir(instances_dir); + if (!dir) { + ret = -1; + goto out_free; + } + + while ((dent = readdir(dir))) { + const char *name = strdup(dent->d_name); + char *instance_path; + struct buffer_instance *instance; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + instance_path = append_file(instances_dir, name); + ret = stat(instance_path, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) { + free(instance_path); + continue; + } + free(instance_path); + + instance = allocate_instance(name); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + } + + closedir(dir); + ret = 0; + + out_free: + free(instances_dir); + return ret; +} + +/** + * add_all_instances - Add all pre-existing instances to the internal list + * @tracing_dir: The top-level tracing directory + * + * Returns whether the operation succeeded + */ +void add_all_instances(void) +{ + const char *tracing_dir = tracefs_tracing_dir(); + if (!tracing_dir) + die("can't get the tracing directory"); + + __add_all_instances(tracing_dir); +} + +/** + * tracecmd_stat_cpu - show the buffer stats of a particular CPU + * @s: the trace_seq to record the data in. + * @cpu: the CPU to stat + * + */ +void tracecmd_stat_cpu_instance(struct buffer_instance *instance, + struct trace_seq *s, int cpu) +{ + char buf[BUFSIZ]; + char *path; + char *file; + int fd; + int r; + + file = malloc(40); + if (!file) + return; + snprintf(file, 40, "per_cpu/cpu%d/stats", cpu); + + path = tracefs_instance_get_file(instance->tracefs, file); + free(file); + fd = open(path, O_RDONLY); + tracefs_put_tracing_file(path); + if (fd < 0) + return; + + while ((r = read(fd, buf, BUFSIZ)) > 0) + trace_seq_printf(s, "%.*s", r, buf); + + close(fd); +} + +/** + * tracecmd_stat_cpu - show the buffer stats of a particular CPU + * @s: the trace_seq to record the data in. + * @cpu: the CPU to stat + * + */ +void tracecmd_stat_cpu(struct trace_seq *s, int cpu) +{ + tracecmd_stat_cpu_instance(&top_instance, s, cpu); +} + +static void add_event(struct buffer_instance *instance, struct event_list *event) +{ + *instance->event_next = event; + instance->event_next = &event->next; + event->next = NULL; +} + +static void reset_event_list(struct buffer_instance *instance) +{ + instance->events = NULL; + init_instance(instance); +} + +static char *get_temp_file(struct buffer_instance *instance, int cpu) +{ + const char *output_file = instance->output_file; + const char *name; + char *file = NULL; + int size; + + name = tracefs_instance_get_name(instance->tracefs); + if (name) { + size = snprintf(file, 0, "%s.%s.cpu%d", output_file, name, cpu); + file = malloc(size + 1); + if (!file) + die("Failed to allocate temp file for %s", name); + sprintf(file, "%s.%s.cpu%d", output_file, name, cpu); + } else { + size = snprintf(file, 0, "%s.cpu%d", output_file, cpu); + file = malloc(size + 1); + if (!file) + die("Failed to allocate temp file for %s", name); + sprintf(file, "%s.cpu%d", output_file, cpu); + } + + return file; +} + +char *trace_get_guest_file(const char *file, const char *guest) +{ + const char *p; + char *out = NULL; + int ret, base_len; + + p = strrchr(file, '.'); + if (p && p != file) + base_len = p - file; + else + base_len = strlen(file); + + ret = asprintf(&out, "%.*s-%s%s", base_len, file, + guest, file + base_len); + if (ret < 0) + return NULL; + return out; +} + +static void put_temp_file(char *file) +{ + free(file); +} + +static void delete_temp_file(struct buffer_instance *instance, int cpu) +{ + const char *output_file = instance->output_file; + const char *name; + char file[PATH_MAX]; + + name = tracefs_instance_get_name(instance->tracefs); + if (name) + snprintf(file, PATH_MAX, "%s.%s.cpu%d", output_file, name, cpu); + else + snprintf(file, PATH_MAX, "%s.cpu%d", output_file, cpu); + unlink(file); +} + +static int kill_thread_instance(int start, struct buffer_instance *instance) +{ + int n = start; + int i; + + for (i = 0; i < instance->cpu_count; i++) { + if (pids[n].pid > 0) { + kill(pids[n].pid, SIGKILL); + delete_temp_file(instance, i); + pids[n].pid = 0; + if (pids[n].brass[0] >= 0) + close(pids[n].brass[0]); + } + n++; + } + + return n; +} + +static void kill_threads(void) +{ + struct buffer_instance *instance; + int i = 0; + + if (!recorder_threads || !pids) + return; + + for_all_instances(instance) + i = kill_thread_instance(i, instance); +} + +void die(const char *fmt, ...) +{ + va_list ap; + int ret = errno; + + if (errno) + perror("trace-cmd"); + else + ret = -1; + + kill_threads(); + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); + exit(ret); +} + +static int delete_thread_instance(int start, struct buffer_instance *instance) +{ + int n = start; + int i; + + for (i = 0; i < instance->cpu_count; i++) { + if (pids) { + if (pids[n].pid) { + delete_temp_file(instance, i); + if (pids[n].pid < 0) + pids[n].pid = 0; + } + n++; + } else + /* Extract does not allocate pids */ + delete_temp_file(instance, i); + } + return n; +} + +static void delete_thread_data(void) +{ + struct buffer_instance *instance; + int i = 0; + + for_all_instances(instance) + i = delete_thread_instance(i, instance); + /* + * Top instance temp files are still created even if it + * isn't used. + */ + if (no_top_instance()) { + for (i = 0; i < local_cpu_count; i++) + delete_temp_file(&top_instance, i); + } +} + +static void +add_tsc2nsec(struct tracecmd_output *handle, struct tsc_nsec *tsc2nsec) +{ + /* multiplier, shift, offset */ + struct iovec vector[3]; + + vector[0].iov_len = 4; + vector[0].iov_base = &tsc2nsec->mult; + vector[1].iov_len = 4; + vector[1].iov_base = &tsc2nsec->shift; + vector[2].iov_len = 8; + vector[2].iov_base = &tsc2nsec->offset; + + tracecmd_add_option_v(handle, TRACECMD_OPTION_TSC2NSEC, vector, 3); +} + +static void host_tsync_complete(struct common_record_context *ctx, + struct buffer_instance *instance) +{ + struct tracecmd_output *handle = NULL; + int fd = -1; + int ret; + + ret = tracecmd_tsync_with_guest_stop(instance->tsync); + if (!ret) { + fd = open(instance->output_file, O_RDWR); + if (fd < 0) + die("error opening %s", instance->output_file); + handle = tracecmd_get_output_handle_fd(fd); + if (!handle) + die("cannot create output handle"); + + if (ctx->tsc2nsec.mult) + add_tsc2nsec(handle, &ctx->tsc2nsec); + + tracecmd_write_guest_time_shift(handle, instance->tsync); + tracecmd_append_options(handle); + tracecmd_output_close(handle); + } + + tracecmd_tsync_free(instance->tsync); + instance->tsync = NULL; +} + +static void tell_guests_to_stop(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + + /* Send close message to guests */ + for_all_instances(instance) { + if (is_guest(instance)) + tracecmd_msg_send_close_msg(instance->msg_handle); + } + + for_all_instances(instance) { + if (is_guest(instance)) + host_tsync_complete(ctx, instance); + } + + /* Wait for guests to acknowledge */ + for_all_instances(instance) { + if (is_guest(instance)) { + tracecmd_msg_wait_close_resp(instance->msg_handle); + tracecmd_msg_handle_close(instance->msg_handle); + } + } +} + +static void stop_threads(enum trace_type type) +{ + int ret; + int i; + + if (!recorder_threads) + return; + + /* Tell all threads to finish up */ + for (i = 0; i < recorder_threads; i++) { + if (pids[i].pid > 0) { + kill(pids[i].pid, SIGUSR1); + } + } + + /* Flush out the pipes */ + if (type & TRACE_TYPE_STREAM) { + do { + ret = trace_stream_read(pids, recorder_threads, NULL); + } while (ret > 0); + } +} + +static void wait_threads() +{ + int i; + + for (i = 0; i < recorder_threads; i++) { + if (pids[i].pid > 0) { + waitpid(pids[i].pid, NULL, 0); + pids[i].pid = -1; + } + } +} + +static int create_recorder(struct buffer_instance *instance, int cpu, + enum trace_type type, int *brass); + +static void flush_threads(void) +{ + struct buffer_instance *instance; + long ret; + int i; + + for_all_instances(instance) { + for (i = 0; i < instance->cpu_count; i++) { + /* Extract doesn't support sub buffers yet */ + ret = create_recorder(instance, i, TRACE_TYPE_EXTRACT, NULL); + if (ret < 0) + die("error reading ring buffer"); + } + } +} + +static int set_ftrace_enable(const char *path, int set) +{ + struct stat st; + int fd; + char *val = set ? "1" : "0"; + int ret; + + /* if ftace_enable does not exist, simply ignore it */ + fd = stat(path, &st); + if (fd < 0) + return -ENODEV; + + reset_save_file(path, RESET_DEFAULT_PRIO); + + ret = -1; + fd = open(path, O_WRONLY); + if (fd < 0) + goto out; + + /* Now set or clear the function option */ + ret = write(fd, val, 1); + close(fd); + + out: + return ret < 0 ? ret : 0; +} + +static int set_ftrace_proc(int set) +{ + const char *path = "/proc/sys/kernel/ftrace_enabled"; + int ret; + + ret = set_ftrace_enable(path, set); + if (ret == -1) + die ("Can't %s ftrace", set ? "enable" : "disable"); + return ret; +} + +static int set_ftrace(struct buffer_instance *instance, int set, int use_proc) +{ + char *path; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, "options/function-trace"); + if (!path) + return -1; + ret = set_ftrace_enable(path, set); + tracefs_put_tracing_file(path); + + /* Always enable ftrace_enable proc file when set is true */ + if (ret < 0 || set || use_proc) + ret = set_ftrace_proc(set); + + return ret; +} + +static int write_file(const char *file, const char *str) +{ + int ret; + int fd; + + fd = open(file, O_WRONLY | O_TRUNC); + if (fd < 0) + die("opening to '%s'", file); + ret = write(fd, str, strlen(str)); + close(fd); + return ret; +} + +static void __clear_trace(struct buffer_instance *instance) +{ + FILE *fp; + char *path; + + if (is_guest(instance)) + return; + + /* reset the trace */ + path = tracefs_instance_get_file(instance->tracefs, "trace"); + fp = fopen(path, "w"); + if (!fp) + die("writing to '%s'", path); + tracefs_put_tracing_file(path); + fwrite("0", 1, 1, fp); + fclose(fp); +} + +static void clear_trace_instances(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + __clear_trace(instance); +} + +static void reset_max_latency(struct buffer_instance *instance) +{ + tracefs_instance_file_write(instance->tracefs, + "tracing_max_latency", "0"); +} + +static int add_filter_pid(struct buffer_instance *instance, int pid, int exclude) +{ + struct filter_pids *p; + char buf[100]; + + for (p = instance->filter_pids; p; p = p->next) { + if (p->pid == pid) { + p->exclude = exclude; + return 0; + } + } + + p = malloc(sizeof(*p)); + if (!p) + die("Failed to allocate pid filter"); + p->next = instance->filter_pids; + p->exclude = exclude; + p->pid = pid; + instance->filter_pids = p; + instance->nr_filter_pids++; + + instance->len_filter_pids += sprintf(buf, "%d", pid); + + return 1; +} + +static void add_filter_pid_all(int pid, int exclude) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + add_filter_pid(instance, pid, exclude); +} + +static void reset_save_ftrace_pid(struct buffer_instance *instance) +{ + static char *path; + + if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) + return; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); + if (!path) + return; + + reset_save_file_cond(path, RESET_DEFAULT_PRIO, "no pid", ""); + + tracefs_put_tracing_file(path); +} + +static void update_ftrace_pid(struct buffer_instance *instance, + const char *pid, int reset) +{ + int fd = -1; + char *path; + int ret; + + if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) + return; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); + if (!path) + return; + + fd = open(path, O_WRONLY | O_CLOEXEC | (reset ? O_TRUNC : 0)); + tracefs_put_tracing_file(path); + if (fd < 0) + return; + + ret = write(fd, pid, strlen(pid)); + + /* + * Older kernels required "-1" to disable pid + */ + if (ret < 0 && !strlen(pid)) + ret = write(fd, "-1", 2); + + if (ret < 0) + die("error writing to %s", path); + /* add whitespace in case another pid is written */ + write(fd, " ", 1); + close(fd); +} + +static void update_ftrace_pids(int reset) +{ + struct buffer_instance *instance; + struct filter_pids *pid; + static int first = 1; + char buf[100]; + int rst; + + for_all_instances(instance) { + if (first) + reset_save_ftrace_pid(instance); + rst = reset; + for (pid = instance->filter_pids; pid; pid = pid->next) { + if (pid->exclude) + continue; + snprintf(buf, 100, "%d ", pid->pid); + update_ftrace_pid(instance, buf, rst); + /* Only reset the first entry */ + rst = 0; + } + } + + if (first) + first = 0; +} + +static void update_event_filters(struct buffer_instance *instance); +static void update_pid_event_filters(struct buffer_instance *instance); + +static void append_filter_pid_range(char **filter, int *curr_len, + const char *field, + int start_pid, int end_pid, bool exclude) +{ + const char *op = "", *op1, *op2, *op3; + int len; + + if (*filter && **filter) + op = exclude ? "&&" : "||"; + + /* Handle thus case explicitly so that we get `pid==3` instead of + * `pid>=3&&pid<=3` for singleton ranges + */ + if (start_pid == end_pid) { +#define FMT "%s(%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, exclude ? "!=" : "==", start_pid); + *curr_len += len; + + return; +#undef FMT + } + + if (exclude) { + op1 = "<"; + op2 = "||"; + op3 = ">"; + } else { + op1 = ">="; + op2 = "&&"; + op3 = "<="; + } + +#define FMT "%s(%s%s%d%s%s%s%d)" + len = snprintf(NULL, 0, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *filter = realloc(*filter, *curr_len + len + 1); + if (!*filter) + die("realloc"); + + len = snprintf(*filter + *curr_len, len + 1, FMT, op, + field, op1, start_pid, op2, + field, op3, end_pid); + *curr_len += len; +} + +/** + * make_pid_filter - create a filter string to all pids against @field + * @curr_filter: Append to a previous filter (may realloc). Can be NULL + * @field: The field to compare the pids against + * + * Creates a new string or appends to an existing one if @curr_filter + * is not NULL. The new string will contain a filter with all pids + * in pid_filter list with the format (@field == pid) || .. + * If @curr_filter is not NULL, it will add this string as: + * (@curr_filter) && ((@field == pid) || ...) + */ +static char *make_pid_filter(struct buffer_instance *instance, + char *curr_filter, const char *field) +{ + int start_pid = -1, last_pid = -1; + int last_exclude = -1; + struct filter_pids *p; + char *filter = NULL; + int curr_len = 0; + + /* Use the new method if possible */ + if (instance->have_set_event_pid) + return NULL; + + if (!instance->filter_pids) + return curr_filter; + + for (p = instance->filter_pids; p; p = p->next) { + /* + * PIDs are inserted in `filter_pids` from the front and that's + * why we expect them in descending order here. + */ + if (p->pid == last_pid - 1 && p->exclude == last_exclude) { + last_pid = p->pid; + continue; + } + + if (start_pid != -1) + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, + last_exclude); + + start_pid = last_pid = p->pid; + last_exclude = p->exclude; + + } + append_filter_pid_range(&filter, &curr_len, field, + last_pid, start_pid, last_exclude); + + if (curr_filter) { + char *save = filter; + asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); + free(save); + } + + return filter; +} + +#define _STRINGIFY(x) #x +#define STRINGIFY(x) _STRINGIFY(x) + +static int get_pid_addr_maps(struct buffer_instance *instance, int pid) +{ + struct pid_addr_maps *maps = instance->pid_maps; + struct tracecmd_proc_addr_map *map; + unsigned long long begin, end; + struct pid_addr_maps *m; + char mapname[PATH_MAX+1]; + char fname[PATH_MAX+1]; + char buf[PATH_MAX+100]; + FILE *f; + int ret; + int res; + int i; + + sprintf(fname, "/proc/%d/exe", pid); + ret = readlink(fname, mapname, PATH_MAX); + if (ret >= PATH_MAX || ret < 0) + return -ENOENT; + mapname[ret] = 0; + + sprintf(fname, "/proc/%d/maps", pid); + f = fopen(fname, "r"); + if (!f) + return -ENOENT; + + while (maps) { + if (pid == maps->pid) + break; + maps = maps->next; + } + + ret = -ENOMEM; + if (!maps) { + maps = calloc(1, sizeof(*maps)); + if (!maps) + goto out_fail; + maps->pid = pid; + maps->next = instance->pid_maps; + instance->pid_maps = maps; + } else { + for (i = 0; i < maps->nr_lib_maps; i++) + free(maps->lib_maps[i].lib_name); + free(maps->lib_maps); + maps->lib_maps = NULL; + maps->nr_lib_maps = 0; + free(maps->proc_name); + } + + maps->proc_name = strdup(mapname); + if (!maps->proc_name) + goto out; + + while (fgets(buf, sizeof(buf), f)) { + mapname[0] = '\0'; + res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s", + &begin, &end, mapname); + if (res == 3 && mapname[0] != '\0') { + map = realloc(maps->lib_maps, + (maps->nr_lib_maps + 1) * sizeof(*map)); + if (!map) + goto out_fail; + map[maps->nr_lib_maps].end = end; + map[maps->nr_lib_maps].start = begin; + map[maps->nr_lib_maps].lib_name = strdup(mapname); + if (!map[maps->nr_lib_maps].lib_name) + goto out_fail; + maps->lib_maps = map; + maps->nr_lib_maps++; + } + } +out: + fclose(f); + return 0; + +out_fail: + fclose(f); + if (maps) { + for (i = 0; i < maps->nr_lib_maps; i++) + free(maps->lib_maps[i].lib_name); + if (instance->pid_maps != maps) { + m = instance->pid_maps; + while (m) { + if (m->next == maps) { + m->next = maps->next; + break; + } + m = m->next; + } + } else + instance->pid_maps = maps->next; + free(maps->lib_maps); + maps->lib_maps = NULL; + maps->nr_lib_maps = 0; + free(maps->proc_name); + maps->proc_name = NULL; + free(maps); + } + return ret; +} + +static void get_filter_pid_maps(void) +{ + struct buffer_instance *instance; + struct filter_pids *p; + + for_all_instances(instance) { + if (!instance->get_procmap) + continue; + for (p = instance->filter_pids; p; p = p->next) { + if (p->exclude) + continue; + get_pid_addr_maps(instance, p->pid); + } + } +} + +static void update_task_filter(void) +{ + struct buffer_instance *instance; + int pid = getpid(); + + if (no_filter) + return; + + get_filter_pid_maps(); + + if (filter_task) + add_filter_pid_all(pid, 0); + + for_all_instances(instance) { + if (!instance->filter_pids) + continue; + if (instance->common_pid_filter) + free(instance->common_pid_filter); + instance->common_pid_filter = make_pid_filter(instance, NULL, + "common_pid"); + } + update_ftrace_pids(1); + for_all_instances(instance) + update_pid_event_filters(instance); +} + +static pid_t trace_waitpid(enum trace_type type, pid_t pid, int *status, int options) +{ + struct timeval tv = { 1, 0 }; + int ret; + + if (type & TRACE_TYPE_STREAM) + options |= WNOHANG; + + do { + ret = waitpid(pid, status, options); + if (ret != 0) + return ret; + + if (type & TRACE_TYPE_STREAM) + trace_stream_read(pids, recorder_threads, &tv); + } while (1); +} + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + +static int pidfd_open(pid_t pid, unsigned int flags) { + return syscall(__NR_pidfd_open, pid, flags); +} + +static int trace_waitpidfd(id_t pidfd) { + struct pollfd pollfd; + + pollfd.fd = pidfd; + pollfd.events = POLLIN; + + while (!finished) { + int ret = poll(&pollfd, 1, -1); + /* If waitid was interrupted, keep waiting */ + if (ret < 0 && errno == EINTR) + continue; + else if (ret < 0) + return 1; + else + break; + } + + return 0; +} + +static int trace_wait_for_processes(struct buffer_instance *instance) { + int ret = 0; + int nr_fds = 0; + int i; + int *pidfds; + struct filter_pids *pid; + + pidfds = malloc(sizeof(int) * instance->nr_process_pids); + if (!pidfds) + return 1; + + for (pid = instance->process_pids; + pid && instance->nr_process_pids; + pid = pid->next) { + if (pid->exclude) { + instance->nr_process_pids--; + continue; + } + pidfds[nr_fds] = pidfd_open(pid->pid, 0); + + /* If the pid doesn't exist, the process has probably exited */ + if (pidfds[nr_fds] < 0 && errno == ESRCH) { + instance->nr_process_pids--; + continue; + } else if (pidfds[nr_fds] < 0) { + ret = 1; + goto out; + } + + nr_fds++; + instance->nr_process_pids--; + } + + for (i = 0; i < nr_fds; i++) { + if (trace_waitpidfd(pidfds[i])) { + ret = 1; + goto out; + } + } + +out: + for (i = 0; i < nr_fds; i++) + close(pidfds[i]); + free(pidfds); + return ret; +} + +static void add_event_pid(struct buffer_instance *instance, const char *buf) +{ + tracefs_instance_file_write(instance->tracefs, "set_event_pid", buf); +} + +#ifndef NO_PTRACE +/** + * append_pid_filter - add a new pid to an existing filter + * @curr_filter: the filter to append to. If NULL, then allocate one + * @field: The fild to compare the pid to + * @pid: The pid to add to. + */ +static char *append_pid_filter(char *curr_filter, const char *field, int pid) +{ + char *filter; + int len; + + len = snprintf(NULL, 0, "(%s==%d)||", field, pid); + + if (!curr_filter) { + /* No need for +1 as we don't use the "||" */ + filter = malloc(len); + if (!filter) + die("Failed to allocate pid filter"); + sprintf(filter, "(%s==%d)", field, pid); + } else { + int indx = strlen(curr_filter); + + len += indx; + filter = realloc(curr_filter, len + indx + 1); + if (!filter) + die("realloc"); + sprintf(filter + indx, "||(%s==%d)", field, pid); + } + + return filter; +} + +static void append_sched_event(struct event_list *event, const char *field, int pid) +{ + if (!event || !event->pid_filter) + return; + + event->pid_filter = append_pid_filter(event->pid_filter, field, pid); +} + +static void update_sched_events(struct buffer_instance *instance, int pid) +{ + /* + * Also make sure that the sched_switch to this pid + * and wakeups of this pid are also traced. + * Only need to do this if the events are active. + */ + append_sched_event(instance->sched_switch_event, "next_pid", pid); + append_sched_event(instance->sched_wakeup_event, "pid", pid); + append_sched_event(instance->sched_wakeup_new_event, "pid", pid); +} + +static int open_instance_fd(struct buffer_instance *instance, + const char *file, int flags); + +static void add_new_filter_child_pid(int pid, int child) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + char buf[100]; + + for_all_instances(instance) { + if (!instance->ptrace_child || !instance->filter_pids) + continue; + for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { + if (fpid->pid == pid) + break; + } + if (!fpid) + continue; + + add_filter_pid(instance, child, 0); + sprintf(buf, "%d", child); + update_ftrace_pid(instance, buf, 0); + + instance->common_pid_filter = append_pid_filter(instance->common_pid_filter, + "common_pid", pid); + if (instance->have_set_event_pid) { + add_event_pid(instance, buf); + } else { + update_sched_events(instance, pid); + update_event_filters(instance); + } + } + +} + +static void ptrace_attach(struct buffer_instance *instance, int pid) +{ + int ret; + + ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); + if (ret < 0) { + warning("Unable to trace process %d children", pid); + do_ptrace = 0; + return; + } + if (instance) + add_filter_pid(instance, pid, 0); + else + add_filter_pid_all(pid, 0); +} + +static void enable_ptrace(void) +{ + if (!do_ptrace || !filter_task) + return; + + ptrace(PTRACE_TRACEME, 0, NULL, 0); +} + +static struct buffer_instance *get_intance_fpid(int pid) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + + for_all_instances(instance) { + for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { + if (fpid->exclude) + continue; + if (fpid->pid == pid) + break; + } + if (fpid) + return instance; + } + + return NULL; +} + +static void ptrace_wait(enum trace_type type) +{ + struct buffer_instance *instance; + struct filter_pids *fpid; + unsigned long send_sig; + unsigned long child; + int nr_pids = 0; + siginfo_t sig; + int main_pids; + int cstatus; + int status; + int i = 0; + int *pids; + int event; + int pid; + int ret; + + + for_all_instances(instance) + nr_pids += instance->nr_filter_pids; + + pids = calloc(nr_pids, sizeof(int)); + if (!pids) { + warning("Unable to allocate array for %d PIDs", nr_pids); + return; + } + for_all_instances(instance) { + if (!instance->ptrace_child && !instance->get_procmap) + continue; + + for (fpid = instance->filter_pids; fpid && i < nr_pids; fpid = fpid->next) { + if (fpid->exclude) + continue; + pids[i++] = fpid->pid; + } + } + main_pids = i; + + do { + ret = trace_waitpid(type, -1, &status, WSTOPPED | __WALL); + if (ret < 0) + continue; + + pid = ret; + + if (WIFSTOPPED(status)) { + event = (status >> 16) & 0xff; + ptrace(PTRACE_GETSIGINFO, pid, NULL, &sig); + send_sig = sig.si_signo; + /* Don't send ptrace sigs to child */ + if (send_sig == SIGTRAP || send_sig == SIGSTOP) + send_sig = 0; + switch (event) { + case PTRACE_EVENT_FORK: + case PTRACE_EVENT_VFORK: + case PTRACE_EVENT_CLONE: + /* forked a child */ + ptrace(PTRACE_GETEVENTMSG, pid, NULL, &child); + ptrace(PTRACE_SETOPTIONS, child, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + add_new_filter_child_pid(pid, child); + ptrace(PTRACE_CONT, child, NULL, 0); + break; + + case PTRACE_EVENT_EXIT: + instance = get_intance_fpid(pid); + if (instance && instance->get_procmap) + get_pid_addr_maps(instance, pid); + ptrace(PTRACE_GETEVENTMSG, pid, NULL, &cstatus); + ptrace(PTRACE_DETACH, pid, NULL, NULL); + break; + } + ptrace(PTRACE_SETOPTIONS, pid, NULL, + PTRACE_O_TRACEFORK | + PTRACE_O_TRACEVFORK | + PTRACE_O_TRACECLONE | + PTRACE_O_TRACEEXIT); + ptrace(PTRACE_CONT, pid, NULL, send_sig); + } + if (WIFEXITED(status) || + (WIFSTOPPED(status) && event == PTRACE_EVENT_EXIT)) { + for (i = 0; i < nr_pids; i++) { + if (pid == pids[i]) { + pids[i] = 0; + main_pids--; + if (!main_pids) + finished = 1; + } + } + } + } while (!finished && ret > 0); + + free(pids); +} +#else +static inline void ptrace_wait(enum trace_type type) { } +static inline void enable_ptrace(void) { } +static inline void ptrace_attach(struct buffer_instance *instance, int pid) { } + +#endif /* NO_PTRACE */ + +static void trace_or_sleep(enum trace_type type, bool pwait) +{ + struct timeval tv = { 1 , 0 }; + + if (pwait) + ptrace_wait(type); + else if (type & TRACE_TYPE_STREAM) + trace_stream_read(pids, recorder_threads, &tv); + else + sleep(10); +} + +static int change_user(const char *user) +{ + struct passwd *pwd; + + if (!user) + return 0; + + pwd = getpwnam(user); + if (!pwd) + return -1; + if (initgroups(user, pwd->pw_gid) < 0) + return -1; + if (setgid(pwd->pw_gid) < 0) + return -1; + if (setuid(pwd->pw_uid) < 0) + return -1; + + if (setenv("HOME", pwd->pw_dir, 1) < 0) + return -1; + if (setenv("USER", pwd->pw_name, 1) < 0) + return -1; + if (setenv("LOGNAME", pwd->pw_name, 1) < 0) + return -1; + + return 0; +} + +static void run_cmd(enum trace_type type, const char *user, int argc, char **argv) +{ + int status; + int pid; + + if ((pid = fork()) < 0) + die("failed to fork"); + if (!pid) { + /* child */ + update_task_filter(); + tracecmd_enable_tracing(); + if (!fork_process) + enable_ptrace(); + /* + * If we are using stderr for stdout, switch + * it back to the saved stdout for the code we run. + */ + if (save_stdout >= 0) { + close(1); + dup2(save_stdout, 1); + close(save_stdout); + } + + if (change_user(user) < 0) + die("Failed to change user to %s", user); + + if (execvp(argv[0], argv)) { + fprintf(stderr, "\n********************\n"); + fprintf(stderr, " Unable to exec %s\n", argv[0]); + fprintf(stderr, "********************\n"); + die("Failed to exec %s", argv[0]); + } + } + if (fork_process) + exit(0); + if (do_ptrace) { + ptrace_attach(NULL, pid); + ptrace_wait(type); + } else + trace_waitpid(type, pid, &status, 0); + if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) + exit(0); +} + +static void +set_plugin_instance(struct buffer_instance *instance, const char *name) +{ + char *path; + char zero = '0'; + int ret; + int fd; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); + fd = open(path, O_WRONLY); + if (fd < 0) { + /* + * Legacy kernels do not have current_tracer file, and they + * always use nop. So, it doesn't need to try to change the + * plugin for those if name is "nop". + */ + if (!strncmp(name, "nop", 3)) { + tracefs_put_tracing_file(path); + return; + } + die("Opening '%s'", path); + } + ret = write(fd, name, strlen(name)); + close(fd); + + if (ret < 0) + die("writing to '%s'", path); + + tracefs_put_tracing_file(path); + + if (strncmp(name, "function", 8) != 0) + return; + + /* Make sure func_stack_trace option is disabled */ + /* First try instance file, then top level */ + path = tracefs_instance_get_file(instance->tracefs, "options/func_stack_trace"); + fd = open(path, O_WRONLY); + if (fd < 0) { + tracefs_put_tracing_file(path); + path = tracefs_get_tracing_file("options/func_stack_trace"); + fd = open(path, O_WRONLY); + if (fd < 0) { + tracefs_put_tracing_file(path); + return; + } + } + /* + * Always reset func_stack_trace to zero. Don't bother saving + * the original content. + */ + add_reset_file(path, "0", RESET_HIGH_PRIO); + tracefs_put_tracing_file(path); + write(fd, &zero, 1); + close(fd); +} + +static void set_plugin(const char *name) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + set_plugin_instance(instance, name); +} + +static void save_option(struct buffer_instance *instance, const char *option) +{ + struct opt_list *opt; + + opt = malloc(sizeof(*opt)); + if (!opt) + die("Failed to allocate option"); + opt->next = instance->options; + instance->options = opt; + opt->option = option; +} + +static int set_option(struct buffer_instance *instance, const char *option) +{ + FILE *fp; + char *path; + + path = tracefs_instance_get_file(instance->tracefs, "trace_options"); + fp = fopen(path, "w"); + if (!fp) + warning("writing to '%s'", path); + tracefs_put_tracing_file(path); + + if (!fp) + return -1; + + fwrite(option, 1, strlen(option), fp); + fclose(fp); + + return 0; +} + +static void disable_func_stack_trace_instance(struct buffer_instance *instance) +{ + struct stat st; + char *content; + char *path; + char *cond; + int size; + int ret; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); + ret = stat(path, &st); + tracefs_put_tracing_file(path); + if (ret < 0) + return; + + content = tracefs_instance_file_read(instance->tracefs, + "current_tracer", &size); + cond = strstrip(content); + if (memcmp(cond, "function", size - (cond - content)) !=0) + goto out; + + set_option(instance, "nofunc_stack_trace"); + out: + free(content); +} + +static void disable_func_stack_trace(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + disable_func_stack_trace_instance(instance); +} + +static void add_reset_options(struct buffer_instance *instance) +{ + struct opt_list *opt; + const char *option; + char *content; + char *path; + char *ptr; + int len; + + if (keep) + return; + + path = tracefs_instance_get_file(instance->tracefs, "trace_options"); + content = get_file_content(path); + + for (opt = instance->options; opt; opt = opt->next) { + option = opt->option; + len = strlen(option); + ptr = content; + again: + ptr = strstr(ptr, option); + if (ptr) { + /* First make sure its the option we want */ + if (ptr[len] != '\n') { + ptr += len; + goto again; + } + if (ptr - content >= 2 && strncmp(ptr - 2, "no", 2) == 0) { + /* Make sure this isn't ohno-option */ + if (ptr > content + 2 && *(ptr - 3) != '\n') { + ptr += len; + goto again; + } + /* we enabled it */ + ptr[len] = 0; + add_reset_file(path, ptr-2, RESET_DEFAULT_PRIO); + ptr[len] = '\n'; + continue; + } + /* make sure this is our option */ + if (ptr > content && *(ptr - 1) != '\n') { + ptr += len; + goto again; + } + /* this option hasn't changed, ignore it */ + continue; + } + + /* ptr is NULL, not found, maybe option is a no */ + if (strncmp(option, "no", 2) != 0) + /* option is really not found? */ + continue; + + option += 2; + len = strlen(option); + ptr = content; + loop: + ptr = strstr(content, option); + if (!ptr) + /* Really not found? */ + continue; + + /* make sure this is our option */ + if (ptr[len] != '\n') { + ptr += len; + goto loop; + } + + if (ptr > content && *(ptr - 1) != '\n') { + ptr += len; + goto loop; + } + + add_reset_file(path, option, RESET_DEFAULT_PRIO); + } + tracefs_put_tracing_file(path); + free(content); +} + +static void set_options(void) +{ + struct buffer_instance *instance; + struct opt_list *opt; + int ret; + + for_all_instances(instance) { + add_reset_options(instance); + while (instance->options) { + opt = instance->options; + instance->options = opt->next; + ret = set_option(instance, opt->option); + if (ret < 0) + die("Failed to set ftrace option %s", + opt->option); + free(opt); + } + } +} + +static void set_saved_cmdlines_size(struct common_record_context *ctx) +{ + int fd, len, ret = -1; + char *path, *str; + + if (!ctx->saved_cmdlines_size) + return; + + path = tracefs_get_tracing_file("saved_cmdlines_size"); + if (!path) + goto err; + + reset_save_file(path, RESET_DEFAULT_PRIO); + + fd = open(path, O_WRONLY); + tracefs_put_tracing_file(path); + if (fd < 0) + goto err; + + len = asprintf(&str, "%d", ctx->saved_cmdlines_size); + if (len < 0) + die("%s couldn't allocate memory", __func__); + + if (write(fd, str, len) > 0) + ret = 0; + + close(fd); + free(str); +err: + if (ret) + warning("Couldn't set saved_cmdlines_size"); +} + +static int trace_check_file_exists(struct buffer_instance *instance, char *file) +{ + struct stat st; + char *path; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, file); + ret = stat(path, &st); + tracefs_put_tracing_file(path); + + return ret < 0 ? 0 : 1; +} + +static int use_old_event_method(void) +{ + static int old_event_method; + static int processed; + + if (processed) + return old_event_method; + + /* Check if the kernel has the events/enable file */ + if (!trace_check_file_exists(&top_instance, "events/enable")) + old_event_method = 1; + + processed = 1; + + return old_event_method; +} + +static void old_update_events(const char *name, char update) +{ + char *path; + FILE *fp; + int ret; + + if (strcmp(name, "all") == 0) + name = "*:*"; + + /* need to use old way */ + path = tracefs_get_tracing_file("set_event"); + fp = fopen(path, "w"); + if (!fp) + die("opening '%s'", path); + tracefs_put_tracing_file(path); + + /* Disable the event with "!" */ + if (update == '0') + fwrite("!", 1, 1, fp); + + ret = fwrite(name, 1, strlen(name), fp); + if (ret < 0) + die("bad event '%s'", name); + + ret = fwrite("\n", 1, 1, fp); + if (ret < 0) + die("bad event '%s'", name); + + fclose(fp); + + return; +} + +static void +reset_events_instance(struct buffer_instance *instance) +{ + glob_t globbuf; + char *path; + char c; + int fd; + int i; + int ret; + + if (is_guest(instance)) + return; + + if (use_old_event_method()) { + /* old way only had top instance */ + if (!is_top_instance(instance)) + return; + old_update_events("all", '0'); + return; + } + + c = '0'; + path = tracefs_instance_get_file(instance->tracefs, "events/enable"); + fd = open(path, O_WRONLY); + if (fd < 0) + die("opening to '%s'", path); + ret = write(fd, &c, 1); + close(fd); + tracefs_put_tracing_file(path); + + path = tracefs_instance_get_file(instance->tracefs, "events/*/filter"); + globbuf.gl_offs = 0; + ret = glob(path, 0, NULL, &globbuf); + tracefs_put_tracing_file(path); + if (ret < 0) + return; + + for (i = 0; i < globbuf.gl_pathc; i++) { + path = globbuf.gl_pathv[i]; + fd = open(path, O_WRONLY); + if (fd < 0) + die("opening to '%s'", path); + ret = write(fd, &c, 1); + close(fd); + } + globfree(&globbuf); +} + +static void reset_events(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + reset_events_instance(instance); +} + +enum { + STATE_NEWLINE, + STATE_SKIP, + STATE_COPY, +}; + +static char *read_file(const char *file) +{ + char stbuf[BUFSIZ]; + char *buf = NULL; + int size = 0; + char *nbuf; + int fd; + int r; + + fd = open(file, O_RDONLY); + if (fd < 0) + return NULL; + + do { + r = read(fd, stbuf, BUFSIZ); + if (r <= 0) + continue; + nbuf = realloc(buf, size+r+1); + if (!nbuf) { + free(buf); + buf = NULL; + break; + } + buf = nbuf; + memcpy(buf+size, stbuf, r); + size += r; + } while (r > 0); + + close(fd); + if (r == 0 && size > 0) + buf[size] = '\0'; + + return buf; +} + +static void read_error_log(const char *log) +{ + char *buf, *line; + char *start = NULL; + char *p; + + buf = read_file(log); + if (!buf) + return; + + line = buf; + + /* Only the last lines have meaning */ + while ((p = strstr(line, "\n")) && p[1]) { + if (line[0] != ' ') + start = line; + line = p + 1; + } + + if (start) + printf("%s", start); + + free(buf); +} + +static void show_error(const char *file, const char *type) +{ + struct stat st; + char *path = strdup(file); + char *p; + int ret; + + if (!path) + die("Could not allocate memory"); + + p = strstr(path, "tracing"); + if (p) { + if (strncmp(p + sizeof("tracing"), "instances", sizeof("instances") - 1) == 0) { + p = strstr(p + sizeof("tracing") + sizeof("instances"), "/"); + if (!p) + goto read_file; + } else { + p += sizeof("tracing") - 1; + } + ret = asprintf(&p, "%.*s/error_log", (int)(p - path), path); + if (ret < 0) + die("Could not allocate memory"); + ret = stat(p, &st); + if (ret < 0) { + free(p); + goto read_file; + } + read_error_log(p); + goto out; + } + + read_file: + p = read_file(path); + if (p) + printf("%s", p); + + out: + printf("Failed %s of %s\n", type, file); + free(path); + return; +} + +static void write_filter(const char *file, const char *filter) +{ + if (write_file(file, filter) < 0) + show_error(file, "filter"); +} + +static void clear_filter(const char *file) +{ + write_filter(file, "0"); +} + +static void write_trigger(const char *file, const char *trigger) +{ + if (write_file(file, trigger) < 0) + show_error(file, "trigger"); +} + +static int clear_trigger(const char *file) +{ + char trigger[BUFSIZ]; + char *save = NULL; + char *line; + char *buf; + int len; + int ret; + + buf = read_file(file); + if (!buf) { + perror(file); + return 0; + } + + trigger[0] = '!'; + + for (line = strtok_r(buf, "\n", &save); line; line = strtok_r(NULL, "\n", &save)) { + if (line[0] == '#') + continue; + len = strlen(line); + if (len > BUFSIZ - 2) + len = BUFSIZ - 2; + strncpy(trigger + 1, line, len); + trigger[len + 1] = '\0'; + /* We don't want any filters or extra on the line */ + strtok(trigger, " "); + write_file(file, trigger); + } + + free(buf); + + /* + * Some triggers have an order in removing them. + * They will not be removed if done in the wrong order. + */ + buf = read_file(file); + if (!buf) + return 0; + + ret = 0; + for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { + if (line[0] == '#') + continue; + ret = 1; + break; + } + free(buf); + return ret; +} + +static void clear_func_filter(const char *file) +{ + char filter[BUFSIZ]; + struct stat st; + char *line; + char *buf; + char *p; + int len; + int ret; + int fd; + + /* Function filters may not exist */ + ret = stat(file, &st); + if (ret < 0) + return; + + /* First zero out normal filters */ + fd = open(file, O_WRONLY | O_TRUNC); + if (fd < 0) + die("opening to '%s'", file); + close(fd); + + buf = read_file(file); + if (!buf) { + perror(file); + return; + } + + /* Now remove filters */ + filter[0] = '!'; + + /* + * To delete a filter, we need to write a '!filter' + * to the file for each filter. + */ + for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { + if (line[0] == '#') + continue; + len = strlen(line); + if (len > BUFSIZ - 2) + len = BUFSIZ - 2; + + strncpy(filter + 1, line, len); + filter[len + 1] = '\0'; + /* + * To remove "unlimited" filters, we must remove + * the ":unlimited" from what we write. + */ + if ((p = strstr(filter, ":unlimited"))) { + *p = '\0'; + len = p - filter; + } + /* + * The write to this file expects white space + * at the end :-p + */ + filter[len] = '\n'; + filter[len+1] = '\0'; + write_file(file, filter); + } +} + +static void update_reset_triggers(void) +{ + struct reset_file *reset; + + while (reset_triggers) { + reset = reset_triggers; + reset_triggers = reset->next; + + clear_trigger(reset->path); + free(reset->path); + free(reset); + } +} + +static void update_reset_files(void) +{ + struct reset_file *reset; + + while (reset_files) { + reset = reset_files; + reset_files = reset->next; + + if (!keep) + write_file(reset->path, reset->reset); + free(reset->path); + free(reset->reset); + free(reset); + } +} + +static void +update_event(struct event_list *event, const char *filter, + int filter_only, char update) +{ + const char *name = event->event; + FILE *fp; + char *path; + int ret; + + if (use_old_event_method()) { + if (filter_only) + return; + old_update_events(name, update); + return; + } + + if (filter && event->filter_file) { + add_reset_file(event->filter_file, "0", RESET_DEFAULT_PRIO); + write_filter(event->filter_file, filter); + } + + if (event->trigger_file) { + add_reset_trigger(event->trigger_file); + clear_trigger(event->trigger_file); + write_trigger(event->trigger_file, event->trigger); + /* Make sure we don't write this again */ + free(event->trigger_file); + free(event->trigger); + event->trigger_file = NULL; + event->trigger = NULL; + } + + if (filter_only || !event->enable_file) + return; + + path = event->enable_file; + + fp = fopen(path, "w"); + if (!fp) + die("writing to '%s'", path); + ret = fwrite(&update, 1, 1, fp); + fclose(fp); + if (ret < 0) + die("writing to '%s'", path); +} + +/* + * The debugfs file tracing_enabled needs to be deprecated. + * But just in case anyone fiddled with it. If it exists, + * make sure it is one. + * No error checking needed here. + */ +static void check_tracing_enabled(void) +{ + static int fd = -1; + char *path; + + if (fd < 0) { + path = tracefs_get_tracing_file("tracing_enabled"); + fd = open(path, O_WRONLY | O_CLOEXEC); + tracefs_put_tracing_file(path); + + if (fd < 0) + return; + } + write(fd, "1", 1); +} + +static int open_instance_fd(struct buffer_instance *instance, + const char *file, int flags) +{ + int fd; + char *path; + + path = tracefs_instance_get_file(instance->tracefs, file); + fd = open(path, flags); + if (fd < 0) { + /* instances may not be created yet */ + if (is_top_instance(instance)) + die("opening '%s'", path); + } + tracefs_put_tracing_file(path); + + return fd; +} + +static int open_tracing_on(struct buffer_instance *instance) +{ + int fd = instance->tracing_on_fd; + + /* OK, we keep zero for stdin */ + if (fd > 0) + return fd; + + fd = open_instance_fd(instance, "tracing_on", O_RDWR | O_CLOEXEC); + if (fd < 0) { + return fd; + } + instance->tracing_on_fd = fd; + + return fd; +} + +static void write_tracing_on(struct buffer_instance *instance, int on) +{ + int ret; + int fd; + + if (is_guest(instance)) + return; + + fd = open_tracing_on(instance); + if (fd < 0) + return; + + if (on) + ret = write(fd, "1", 1); + else + ret = write(fd, "0", 1); + + if (ret < 0) + die("writing 'tracing_on'"); +} + +static int read_tracing_on(struct buffer_instance *instance) +{ + int fd; + char buf[10]; + int ret; + + if (is_guest(instance)) + return -1; + + fd = open_tracing_on(instance); + if (fd < 0) + return fd; + + ret = read(fd, buf, 10); + if (ret <= 0) + die("Reading 'tracing_on'"); + buf[9] = 0; + ret = atoi(buf); + + return ret; +} + +static void reset_max_latency_instance(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + reset_max_latency(instance); +} + +void tracecmd_enable_tracing(void) +{ + struct buffer_instance *instance; + + check_tracing_enabled(); + + for_all_instances(instance) + write_tracing_on(instance, 1); + + if (latency) + reset_max_latency_instance(); +} + +void tracecmd_disable_tracing(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + write_tracing_on(instance, 0); +} + +void tracecmd_disable_all_tracing(int disable_tracer) +{ + struct buffer_instance *instance; + + tracecmd_disable_tracing(); + + if (disable_tracer) { + disable_func_stack_trace(); + set_plugin("nop"); + } + + reset_events(); + + /* Force close and reset of ftrace pid file */ + for_all_instances(instance) + update_ftrace_pid(instance, "", 1); + + clear_trace_instances(); +} + +static void +update_sched_event(struct buffer_instance *instance, + struct event_list *event, const char *field) +{ + if (!event) + return; + + event->pid_filter = make_pid_filter(instance, event->pid_filter, field); +} + +static void update_event_filters(struct buffer_instance *instance) +{ + struct event_list *event; + char *event_filter; + int free_it; + int len; + int common_len = 0; + + if (instance->common_pid_filter) + common_len = strlen(instance->common_pid_filter); + + for (event = instance->events; event; event = event->next) { + if (!event->neg) { + + free_it = 0; + if (event->filter) { + if (!instance->common_pid_filter) + /* + * event->pid_filter is only created if + * common_pid_filter is. No need to check that. + * Just use the current event->filter. + */ + event_filter = event->filter; + else if (event->pid_filter) { + free_it = 1; + len = common_len + strlen(event->pid_filter) + + strlen(event->filter) + strlen("()&&(||)") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "(%s)&&(%s||%s)", + event->filter, instance->common_pid_filter, + event->pid_filter); + } else { + free_it = 1; + len = common_len + strlen(event->filter) + + strlen("()&&()") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "(%s)&&(%s)", + event->filter, instance->common_pid_filter); + } + } else { + /* event->pid_filter only exists when common_pid_filter does */ + if (!instance->common_pid_filter) + continue; + + if (event->pid_filter) { + free_it = 1; + len = common_len + strlen(event->pid_filter) + + strlen("||") + 1; + event_filter = malloc(len); + if (!event_filter) + die("Failed to allocate event_filter"); + sprintf(event_filter, "%s||%s", + instance->common_pid_filter, event->pid_filter); + } else + event_filter = instance->common_pid_filter; + } + + update_event(event, event_filter, 1, '1'); + if (free_it) + free(event_filter); + } + } +} + +static void update_pid_filters(struct buffer_instance *instance) +{ + struct filter_pids *p; + char *filter; + char *str; + int len; + int ret; + int fd; + + if (is_guest(instance)) + return; + + fd = open_instance_fd(instance, "set_event_pid", + O_WRONLY | O_CLOEXEC | O_TRUNC); + if (fd < 0) + die("Failed to access set_event_pid"); + + len = instance->len_filter_pids + instance->nr_filter_pids; + filter = malloc(len); + if (!filter) + die("Failed to allocate pid filter"); + + str = filter; + + for (p = instance->filter_pids; p; p = p->next) { + if (p->exclude) + continue; + len = sprintf(str, "%d ", p->pid); + str += len; + } + + if (filter == str) + goto out; + + len = str - filter; + str = filter; + do { + ret = write(fd, str, len); + if (ret < 0) + die("Failed to write to set_event_pid"); + str += ret; + len -= ret; + } while (ret >= 0 && len); + + out: + close(fd); +} + +static void update_pid_event_filters(struct buffer_instance *instance) +{ + if (instance->have_set_event_pid) + return update_pid_filters(instance); + /* + * Also make sure that the sched_switch to this pid + * and wakeups of this pid are also traced. + * Only need to do this if the events are active. + */ + update_sched_event(instance, instance->sched_switch_event, "next_pid"); + update_sched_event(instance, instance->sched_wakeup_event, "pid"); + update_sched_event(instance, instance->sched_wakeup_new_event, "pid"); + + update_event_filters(instance); +} + +#define MASK_STR_MAX 4096 /* Don't expect more than 32768 CPUS */ + +static char *alloc_mask_from_hex(struct buffer_instance *instance, const char *str) +{ + char *cpumask; + + if (strcmp(str, "-1") == 0) { + /* set all CPUs */ + int bytes = (instance->cpu_count + 7) / 8; + int last = instance->cpu_count % 8; + int i; + + cpumask = malloc(MASK_STR_MAX); + if (!cpumask) + die("can't allocate cpumask"); + + if (bytes > (MASK_STR_MAX-1)) { + warning("cpumask can't handle more than 32768 CPUS!"); + bytes = MASK_STR_MAX-1; + } + + sprintf(cpumask, "%x", (1 << last) - 1); + + for (i = 1; i < bytes; i++) + cpumask[i] = 'f'; + + cpumask[i+1] = 0; + } else { + cpumask = strdup(str); + if (!cpumask) + die("can't allocate cpumask"); + } + + return cpumask; +} + +static void set_mask(struct buffer_instance *instance) +{ + struct stat st; + char *path; + int fd; + int ret; + + if (is_guest(instance)) + return; + + if (!instance->cpumask) + return; + + path = tracefs_instance_get_file(instance->tracefs, "tracing_cpumask"); + if (!path) + die("could not allocate path"); + reset_save_file(path, RESET_DEFAULT_PRIO); + + ret = stat(path, &st); + if (ret < 0) { + warning("%s not found", path); + goto out; + } + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + die("could not open %s\n", path); + + write(fd, instance->cpumask, strlen(instance->cpumask)); + + close(fd); + out: + tracefs_put_tracing_file(path); + free(instance->cpumask); + instance->cpumask = NULL; +} + +static void enable_events(struct buffer_instance *instance) +{ + struct event_list *event; + + if (is_guest(instance)) + return; + + for (event = instance->events; event; event = event->next) { + if (!event->neg) + update_event(event, event->filter, 0, '1'); + } + + /* Now disable any events */ + for (event = instance->events; event; event = event->next) { + if (event->neg) + update_event(event, NULL, 0, '0'); + } +} + +void tracecmd_enable_events(void) +{ + enable_events(first_instance); +} + +static void set_clock(struct common_record_context *ctx, struct buffer_instance *instance) +{ + const char *clock; + char *path; + char *content; + char *str; + + if (is_guest(instance)) + return; + + if (instance->clock) + clock = instance->clock; + else + clock = ctx->clock; + + if (!clock) + return; + + /* The current clock is in brackets, reset it when we are done */ + content = tracefs_instance_file_read(instance->tracefs, + "trace_clock", NULL); + + /* check if first clock is set */ + if (*content == '[') + str = strtok(content+1, "]"); + else { + str = strtok(content, "["); + if (!str) + die("Can not find clock in trace_clock"); + str = strtok(NULL, "]"); + } + path = tracefs_instance_get_file(instance->tracefs, "trace_clock"); + add_reset_file(path, str, RESET_DEFAULT_PRIO); + + free(content); + tracefs_put_tracing_file(path); + + tracefs_instance_file_write(instance->tracefs, + "trace_clock", clock); +} + +static void set_max_graph_depth(struct buffer_instance *instance, char *max_graph_depth) +{ + char *path; + int ret; + + if (is_guest(instance)) + return; + + path = tracefs_instance_get_file(instance->tracefs, "max_graph_depth"); + reset_save_file(path, RESET_DEFAULT_PRIO); + tracefs_put_tracing_file(path); + ret = tracefs_instance_file_write(instance->tracefs, "max_graph_depth", + max_graph_depth); + if (ret < 0) + die("could not write to max_graph_depth"); +} + +static bool check_file_in_dir(char *dir, char *file) +{ + struct stat st; + char *path; + int ret; + + ret = asprintf(&path, "%s/%s", dir, file); + if (ret < 0) + die("Failed to allocate id file path for %s/%s", dir, file); + ret = stat(path, &st); + free(path); + if (ret < 0 || S_ISDIR(st.st_mode)) + return false; + return true; +} + +/** + * create_event - create and event descriptor + * @instance: instance to use + * @path: path to event attribute + * @old_event: event descriptor to use as base + * + * NOTE: the function purpose is to create a data structure to describe + * an ftrace event. During the process it becomes handy to change the + * string `path`. So, do not rely on the content of `path` after you + * invoke this function. + */ +static struct event_list * +create_event(struct buffer_instance *instance, char *path, struct event_list *old_event) +{ + struct event_list *event; + struct stat st; + char *path_dirname; + char *p; + int ret; + + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event"); + *event = *old_event; + add_event(instance, event); + + if (event->filter || filter_task || instance->filter_pids) { + event->filter_file = strdup(path); + if (!event->filter_file) + die("malloc filter file"); + } + + path_dirname = dirname(path); + + ret = asprintf(&p, "%s/enable", path_dirname); + if (ret < 0) + die("Failed to allocate enable path for %s", path); + ret = stat(p, &st); + if (ret >= 0) + event->enable_file = p; + else + free(p); + + if (old_event->trigger) { + if (check_file_in_dir(path_dirname, "trigger")) { + event->trigger = strdup(old_event->trigger); + ret = asprintf(&p, "%s/trigger", path_dirname); + if (ret < 0) + die("Failed to allocate trigger path for %s", path); + event->trigger_file = p; + } else { + /* Check if this is event or system. + * Systems do not have trigger files by design + */ + if (check_file_in_dir(path_dirname, "id")) + die("trigger specified but not supported by this kernel"); + } + } + + return event; +} + +static void make_sched_event(struct buffer_instance *instance, + struct event_list **event, struct event_list *sched, + const char *sched_path) +{ + char *path_dirname; + char *tmp_file; + char *path; + int ret; + + /* Do nothing if the event already exists */ + if (*event) + return; + + /* we do not want to corrupt sched->filter_file when using dirname() */ + tmp_file = strdup(sched->filter_file); + if (!tmp_file) + die("Failed to allocate path for %s", sched_path); + path_dirname = dirname(tmp_file); + + ret = asprintf(&path, "%s/%s/filter", path_dirname, sched_path); + free(tmp_file); + if (ret < 0) + die("Failed to allocate path for %s", sched_path); + + *event = create_event(instance, path, sched); + free(path); +} + +static void test_event(struct event_list *event, const char *path, + const char *name, struct event_list **save, int len) +{ + path += len - strlen(name); + + if (strcmp(path, name) != 0) + return; + + *save = event; +} + +static void print_event(const char *fmt, ...) +{ + va_list ap; + + if (!show_status) + return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + printf("\n"); +} + + +static int expand_event_files(struct buffer_instance *instance, + const char *file, struct event_list *old_event) +{ + struct event_list **save_event_tail = instance->event_next; + struct event_list *sched_event = NULL; + struct event_list *event; + glob_t globbuf; + char *path; + char *p; + int ret; + int i; + + ret = asprintf(&p, "events/%s/filter", file); + if (ret < 0) + die("Failed to allocate event filter path for %s", file); + + path = tracefs_instance_get_file(instance->tracefs, p); + + globbuf.gl_offs = 0; + ret = glob(path, 0, NULL, &globbuf); + tracefs_put_tracing_file(path); + free(p); + + if (ret < 0) + die("No filters found"); + + for (i = 0; i < globbuf.gl_pathc; i++) { + int len; + + path = globbuf.gl_pathv[i]; + + event = create_event(instance, path, old_event); + print_event("%s\n", path); + + len = strlen(path); + + test_event(event, path, "sched", &sched_event, len); + test_event(event, path, "sched/sched_switch", &instance->sched_switch_event, len); + test_event(event, path, "sched/sched_wakeup_new", &instance->sched_wakeup_new_event, len); + test_event(event, path, "sched/sched_wakeup", &instance->sched_wakeup_event, len); + } + + if (sched_event && sched_event->filter_file) { + /* make sure all sched events exist */ + make_sched_event(instance, &instance->sched_switch_event, + sched_event, "sched_switch"); + make_sched_event(instance, &instance->sched_wakeup_event, + sched_event, "sched_wakeup"); + make_sched_event(instance, &instance->sched_wakeup_new_event, + sched_event, "sched_wakeup_new"); + + } + + + globfree(&globbuf); + + /* If the event list tail changed, that means events were added */ + return save_event_tail == instance->event_next; +} + +static int expand_events_all(struct buffer_instance *instance, + char *system_name, char *event_name, + struct event_list *event) +{ + char *name; + int ret; + + ret = asprintf(&name, "%s/%s", system_name, event_name); + if (ret < 0) + die("Failed to allocate system/event for %s/%s", + system_name, event_name); + ret = expand_event_files(instance, name, event); + free(name); + + return ret; +} + +static void expand_event(struct buffer_instance *instance, struct event_list *event) +{ + const char *name = event->event; + char *str; + char *ptr; + int ret; + + /* + * We allow the user to use "all" to enable all events. + * Expand event_selection to all systems. + */ + if (strcmp(name, "all") == 0) { + expand_event_files(instance, "*", event); + return; + } + + str = strdup(name); + if (!str) + die("Failed to allocate %s string", name); + + ptr = strchr(str, ':'); + if (ptr) { + *ptr = '\0'; + ptr++; + + if (strlen(ptr)) + ret = expand_events_all(instance, str, ptr, event); + else + ret = expand_events_all(instance, str, "*", event); + + if (!ignore_event_not_found && ret) + die("No events enabled with %s", name); + + goto out; + } + + /* No ':' so enable all matching systems and events */ + ret = expand_event_files(instance, str, event); + ret &= expand_events_all(instance, "*", str, event); + if (event->trigger) + ret &= expand_events_all(instance, str, "*", event); + + if (!ignore_event_not_found && ret) + die("No events enabled with %s", name); + +out: + free(str); +} + +static void expand_event_instance(struct buffer_instance *instance) +{ + struct event_list *compressed_list = instance->events; + struct event_list *event; + + if (is_guest(instance)) + return; + + reset_event_list(instance); + + while (compressed_list) { + event = compressed_list; + compressed_list = event->next; + expand_event(instance, event); + free(event->trigger); + free(event); + } +} + +static void expand_event_list(void) +{ + struct buffer_instance *instance; + + if (use_old_event_method()) + return; + + for_all_instances(instance) + expand_event_instance(instance); +} + +static void finish(int sig) +{ + /* all done */ + if (recorder) + tracecmd_stop_recording(recorder); + finished = 1; +} + +static struct addrinfo *do_getaddrinfo(const char *host, unsigned int port, + enum port_type type) +{ + struct addrinfo *results; + struct addrinfo hints; + char buf[BUFSIZ]; + int s; + + snprintf(buf, BUFSIZ, "%u", port); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = type == USE_TCP ? SOCK_STREAM : SOCK_DGRAM; + + s = getaddrinfo(host, buf, &hints, &results); + if (s != 0) { + gai_err = gai_strerror(s); + return NULL; + } + + dprint("Attached port %s: %d to results: %p\n", + type == USE_TCP ? "TCP" : "UDP", port, results); + + return results; +} + +static int connect_addr(struct addrinfo *results) +{ + struct addrinfo *rp; + int sfd = -1; + + for (rp = results; rp != NULL; rp = rp->ai_next) { + sfd = socket(rp->ai_family, rp->ai_socktype, + rp->ai_protocol); + if (sfd == -1) + continue; + if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1) + break; + close(sfd); + } + + if (rp == NULL) + return -1; + + dprint("connect results: %p with fd: %d\n", results, sfd); + + return sfd; +} + +static int connect_port(const char *host, unsigned int port, enum port_type type) +{ + struct addrinfo *results; + int sfd; + + if (type == USE_VSOCK) + return trace_vsock_open(atoi(host), port); + + results = do_getaddrinfo(host, port, type); + + if (!results) + die("connecting to %s server %s:%u", + type == USE_TCP ? "TCP" : "UDP", host, port); + + sfd = connect_addr(results); + + freeaddrinfo(results); + + if (sfd < 0) + die("Can not connect to %s server %s:%u", + type == USE_TCP ? "TCP" : "UDP", host, port); + + return sfd; +} + +static int do_accept(int sd) +{ + int cd; + + for (;;) { + dprint("Wait on accept: %d\n", sd); + cd = accept(sd, NULL, NULL); + dprint("accepted: %d\n", cd); + if (cd < 0) { + if (errno == EINTR) + continue; + die("accept"); + } + + return cd; + } + + return -1; +} + +/* Find all the tasks associated with the guest pid */ +static void find_tasks(struct trace_guest *guest) +{ + struct dirent *dent; + char *path; + DIR *dir; + int ret; + int tasks = 0; + + ret = asprintf(&path, "/proc/%d/task", guest->pid); + if (ret < 0) + return; + + dir = opendir(path); + free(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + int *pids; + if (!(dent->d_type == DT_DIR && is_digits(dent->d_name))) + continue; + pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2)); + if (!pids) + break; + pids[tasks++] = strtol(dent->d_name, NULL, 0); + pids[tasks] = -1; + guest->task_pids = pids; + } + closedir(dir); +} + +static char *parse_guest_name(char *gname, int *cid, int *port, + struct addrinfo **res) +{ + struct trace_guest *guest = NULL; + struct addrinfo *result; + char *ip = NULL; + char *p; + + *res = NULL; + + *port = -1; + for (p = gname + strlen(gname); p > gname; p--) { + if (*p == ':') + break; + } + if (p > gname) { + *p = '\0'; + *port = atoi(p + 1); + } + + *cid = -1; + p = strrchr(gname, '@'); + if (p) { + *p = '\0'; + *cid = atoi(p + 1); + } else if (is_digits(gname)) { + *cid = atoi(gname); + } else { + /* Check if this is an IP address */ + if (strstr(gname, ":") || strstr(gname, ".")) + ip = gname; + } + + if (!ip && *cid < 0) + read_qemu_guests(); + + if (!ip) + guest = trace_get_guest(*cid, gname); + if (guest) { + *cid = guest->cid; + /* Mapping not found, search for them */ + if (!guest->cpu_pid) + find_tasks(guest); + return guest->name; + } + + /* Test to see if this is an internet address */ + result = do_getaddrinfo(gname, *port, USE_TCP); + if (!result) + return NULL; + + *res = result; + + return gname; +} + +static void set_prio(int prio) +{ + struct sched_param sp; + + memset(&sp, 0, sizeof(sp)); + sp.sched_priority = prio; + if (sched_setscheduler(0, SCHED_FIFO, &sp) < 0) + warning("failed to set priority"); +} + +static struct tracecmd_recorder * +create_recorder_instance_pipe(struct buffer_instance *instance, + int cpu, int *brass) +{ + struct tracecmd_recorder *recorder; + unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK_SPLICE; + char *path; + + path = tracefs_instance_get_dir(instance->tracefs); + + if (!path) + die("malloc"); + + /* This is already the child */ + close(brass[0]); + + recorder = tracecmd_create_buffer_recorder_fd(brass[1], cpu, flags, path); + + tracefs_put_tracing_file(path); + + return recorder; +} + +static struct tracecmd_recorder * +create_recorder_instance(struct buffer_instance *instance, const char *file, int cpu, + int *brass) +{ + struct tracecmd_recorder *record; + struct addrinfo *result; + char *path; + + if (is_guest(instance)) { + int fd; + unsigned int flags; + + if (instance->use_fifos) + fd = instance->fds[cpu]; + else if (is_network(instance)) { + result = do_getaddrinfo(instance->name, + instance->client_ports[cpu], + instance->port_type); + if (!result) + die("Failed to connect to %s port %d\n", + instance->name, + instance->client_ports[cpu]); + fd = connect_addr(result); + freeaddrinfo(result); + } else + fd = trace_vsock_open(instance->cid, instance->client_ports[cpu]); + if (fd < 0) + die("Failed to connect to agent"); + + flags = recorder_flags; + if (instance->use_fifos) + flags |= TRACECMD_RECORD_NOBRASS; + else if (!trace_vsock_can_splice_read()) + flags |= TRACECMD_RECORD_NOSPLICE; + return tracecmd_create_recorder_virt(file, cpu, flags, fd); + } + + if (brass) + return create_recorder_instance_pipe(instance, cpu, brass); + + if (!tracefs_instance_get_name(instance->tracefs)) + return tracecmd_create_recorder_maxkb(file, cpu, recorder_flags, max_kb); + + path = tracefs_instance_get_dir(instance->tracefs); + + record = tracecmd_create_buffer_recorder_maxkb(file, cpu, recorder_flags, + path, max_kb); + tracefs_put_tracing_file(path); + + return record; +} + +/* + * If extract is set, then this is going to set up the recorder, + * connections and exit as the tracing is serialized by a single thread. + */ +static int create_recorder(struct buffer_instance *instance, int cpu, + enum trace_type type, int *brass) +{ + long ret; + char *file; + pid_t pid; + + if (type != TRACE_TYPE_EXTRACT) { + + pid = fork(); + if (pid < 0) + die("fork"); + + if (pid) + return pid; + + signal(SIGINT, SIG_IGN); + signal(SIGUSR1, finish); + + if (rt_prio) + set_prio(rt_prio); + + /* do not kill tasks on error */ + instance->cpu_count = 0; + } + + if ((instance->client_ports && !is_guest(instance)) || is_agent(instance)) { + unsigned int flags = recorder_flags; + char *path = NULL; + int fd; + + if (is_agent(instance)) { + if (instance->use_fifos) + fd = instance->fds[cpu]; + else { + again: + fd = do_accept(instance->fds[cpu]); + if (instance->host && + !trace_net_cmp_connection_fd(fd, instance->host)) { + dprint("Client does not match '%s' for cpu:%d\n", + instance->host, cpu); + goto again; + } + } + } else { + fd = connect_port(host, instance->client_ports[cpu], + instance->port_type); + } + if (fd < 0) + die("Failed connecting to client"); + if (tracefs_instance_get_name(instance->tracefs) && !is_agent(instance)) { + path = tracefs_instance_get_dir(instance->tracefs); + } else { + const char *dir = tracefs_tracing_dir(); + + if (dir) + path = strdup(dir); + } + if (!path) + die("can't get the tracing directory"); + + recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, path); + tracefs_put_tracing_file(path); + } else { + file = get_temp_file(instance, cpu); + recorder = create_recorder_instance(instance, file, cpu, brass); + put_temp_file(file); + } + + if (!recorder) + die ("can't create recorder"); + + if (type == TRACE_TYPE_EXTRACT) { + ret = tracecmd_flush_recording(recorder); + tracecmd_free_recorder(recorder); + recorder = NULL; + return ret; + } + + while (!finished) { + if (tracecmd_start_recording(recorder, sleep_time) < 0) + break; + } + tracecmd_free_recorder(recorder); + recorder = NULL; + + exit(0); +} + +static void check_first_msg_from_server(struct tracecmd_msg_handle *msg_handle) +{ + char buf[BUFSIZ]; + + read(msg_handle->fd, buf, 8); + + /* Make sure the server is the tracecmd server */ + if (memcmp(buf, "tracecmd", 8) != 0) + die("server not tracecmd server"); +} + +static void communicate_with_listener_v1(struct tracecmd_msg_handle *msg_handle, + struct buffer_instance *instance) +{ + unsigned int *client_ports; + char buf[BUFSIZ]; + ssize_t n; + int cpu, i; + + check_first_msg_from_server(msg_handle); + + /* write the number of CPUs we have (in ASCII) */ + sprintf(buf, "%d", local_cpu_count); + + /* include \0 */ + write(msg_handle->fd, buf, strlen(buf)+1); + + /* write the pagesize (in ASCII) */ + sprintf(buf, "%d", page_size); + + /* include \0 */ + write(msg_handle->fd, buf, strlen(buf)+1); + + /* + * If we are using IPV4 and our page size is greater than + * or equal to 64K, we need to punt and use TCP. :-( + */ + + /* TODO, test for ipv4 */ + if (page_size >= UDP_MAX_PACKET) { + warning("page size too big for UDP using TCP in live read"); + instance->port_type = USE_TCP; + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + } + + if (instance->port_type == USE_TCP) { + /* Send one option */ + write(msg_handle->fd, "1", 2); + /* Size 4 */ + write(msg_handle->fd, "4", 2); + /* use TCP */ + write(msg_handle->fd, "TCP", 4); + } else + /* No options */ + write(msg_handle->fd, "0", 2); + + client_ports = malloc(local_cpu_count * sizeof(*client_ports)); + if (!client_ports) + die("Failed to allocate client ports for %d cpus", local_cpu_count); + + /* + * Now we will receive back a comma deliminated list + * of client ports to connect to. + */ + for (cpu = 0; cpu < local_cpu_count; cpu++) { + for (i = 0; i < BUFSIZ; i++) { + n = read(msg_handle->fd, buf+i, 1); + if (n != 1) + die("Error, reading server ports"); + if (!buf[i] || buf[i] == ',') + break; + } + if (i == BUFSIZ) + die("read bad port number"); + buf[i] = 0; + client_ports[cpu] = atoi(buf); + } + + instance->client_ports = client_ports; +} + +static void communicate_with_listener_v3(struct tracecmd_msg_handle *msg_handle, + unsigned int **client_ports) +{ + if (tracecmd_msg_send_init_data(msg_handle, client_ports) < 0) + die("Cannot communicate with server"); +} + +static void check_protocol_version(struct tracecmd_msg_handle *msg_handle) +{ + char buf[BUFSIZ]; + int fd = msg_handle->fd; + int n; + + check_first_msg_from_server(msg_handle); + + /* + * Write the protocol version, the magic number, and the dummy + * option(0) (in ASCII). The client understands whether the client + * uses the v3 protocol or not by checking a reply message from the + * server. If the message is "V3", the server uses v3 protocol. On the + * other hands, if the message is just number strings, the server + * returned port numbers. So, in that time, the client understands the + * server uses the v1 protocol. However, the old server tells the + * client port numbers after reading cpu_count, page_size, and option. + * So, we add the dummy number (the magic number and 0 option) to the + * first client message. + */ + write(fd, V3_CPU, sizeof(V3_CPU)); + + buf[0] = 0; + + /* read a reply message */ + n = read(fd, buf, BUFSIZ); + + if (n < 0 || !buf[0]) { + /* the server uses the v1 protocol, so we'll use it */ + msg_handle->version = V1_PROTOCOL; + tracecmd_plog("Use the v1 protocol\n"); + } else { + if (memcmp(buf, "V3", n) != 0) + die("Cannot handle the protocol %s", buf); + /* OK, let's use v3 protocol */ + write(fd, V3_MAGIC, sizeof(V3_MAGIC)); + + n = read(fd, buf, BUFSIZ - 1); + if (n != 2 || memcmp(buf, "OK", 2) != 0) { + if (n < 0) + n = 0; + buf[n] = 0; + die("Cannot handle the protocol %s", buf); + } + } +} + +static int connect_vsock(char *vhost) +{ + char *cid; + char *port; + char *p; + int sd; + + host = strdup(vhost); + if (!host) + die("alloctating server"); + + cid = strtok_r(host, ":", &p); + port = strtok_r(NULL, "", &p); + + if (!port) + die("vsocket must have format of 'CID:PORT'"); + + sd = trace_vsock_open(atoi(cid), atoi(port)); + + return sd; +} + +static int connect_ip(char *thost) +{ + struct addrinfo *result; + int sfd; + char *server; + char *port; + char *p; + + if (!strchr(host, ':')) { + server = strdup("localhost"); + if (!server) + die("alloctating server"); + port = thost; + host = server; + } else { + host = strdup(thost); + if (!host) + die("alloctating server"); + server = strtok_r(host, ":", &p); + port = strtok_r(NULL, ":", &p); + } + + result = do_getaddrinfo(server, atoi(port), USE_TCP); + if (!result) + die("getaddrinfo: %s", gai_err); + + sfd = connect_addr(result); + + freeaddrinfo(result); + + if (sfd < 0) + die("Can not connect to %s:%s", server, port); + + return sfd; +} + +static struct tracecmd_msg_handle *setup_network(struct buffer_instance *instance) +{ + struct tracecmd_msg_handle *msg_handle = NULL; + enum port_type type = instance->port_type; + int sfd; + +again: + switch (type) { + case USE_VSOCK: + sfd = connect_vsock(host); + break; + default: + sfd = connect_ip(host); + } + + if (sfd < 0) + return NULL; + + if (msg_handle) { + msg_handle->fd = sfd; + } else { + msg_handle = tracecmd_msg_handle_alloc(sfd, 0); + if (!msg_handle) + die("Failed to allocate message handle"); + + msg_handle->cpu_count = local_cpu_count; + msg_handle->version = V3_PROTOCOL; + } + + switch (type) { + case USE_TCP: + msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; + break; + case USE_VSOCK: + msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK; + break; + default: + break; + } + + if (msg_handle->version == V3_PROTOCOL) { + check_protocol_version(msg_handle); + if (msg_handle->version == V1_PROTOCOL) { + /* reconnect to the server for using the v1 protocol */ + close(sfd); + free(host); + goto again; + } + communicate_with_listener_v3(msg_handle, &instance->client_ports); + } + + if (msg_handle->version == V1_PROTOCOL) + communicate_with_listener_v1(msg_handle, instance); + + return msg_handle; +} + +static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx); + +static struct tracecmd_output *create_net_output(struct common_record_context *ctx, + struct tracecmd_msg_handle *msg_handle) +{ + struct tracecmd_output *out; + + out = tracecmd_output_create(NULL); + if (!out) + return NULL; + if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) + goto error; + if (tracecmd_output_set_msg(out, msg_handle)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(out, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(out, "any"); + } + + if (tracecmd_output_write_headers(out, listed_events)) + goto error; + + return out; +error: + tracecmd_output_close(out); + return NULL; +} + +static struct tracecmd_msg_handle * +setup_connection(struct buffer_instance *instance, struct common_record_context *ctx) +{ + struct tracecmd_msg_handle *msg_handle = NULL; + struct tracecmd_output *network_handle = NULL; + int ret; + + msg_handle = setup_network(instance); + if (!msg_handle) + die("Failed to make connection"); + + /* Now create the handle through this socket */ + if (msg_handle->version == V3_PROTOCOL) { + network_handle = create_net_output(ctx, msg_handle); + if (!network_handle) + goto error; + tracecmd_set_quiet(network_handle, quiet); + add_options(network_handle, ctx); + ret = tracecmd_write_cmdlines(network_handle); + if (ret) + goto error; + ret = tracecmd_write_cpus(network_handle, instance->cpu_count); + if (ret) + goto error; + ret = tracecmd_write_buffer_info(network_handle); + if (ret) + goto error; + ret = tracecmd_write_options(network_handle); + if (ret) + goto error; + ret = tracecmd_msg_finish_sending_data(msg_handle); + if (ret) + goto error; + } else { + network_handle = tracecmd_output_create_fd(msg_handle->fd); + if (!network_handle) + goto error; + if (tracecmd_output_set_version(network_handle, ctx->file_version)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(network_handle, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(network_handle, "any"); + } + + if (tracecmd_output_write_headers(network_handle, listed_events)) + goto error; + tracecmd_set_quiet(network_handle, quiet); + } + + instance->network_handle = network_handle; + + /* OK, we are all set, let'r rip! */ + return msg_handle; + +error: + if (msg_handle) + tracecmd_msg_handle_close(msg_handle); + if (network_handle) + tracecmd_output_close(network_handle); + return NULL; +} + +static void finish_network(struct tracecmd_msg_handle *msg_handle) +{ + if (msg_handle->version == V3_PROTOCOL) + tracecmd_msg_send_close_msg(msg_handle); + tracecmd_msg_handle_close(msg_handle); + free(host); +} + +static int open_guest_fifos(const char *guest, int **fds) +{ + char path[PATH_MAX]; + int i, fd, flags; + + for (i = 0; ; i++) { + snprintf(path, sizeof(path), GUEST_FIFO_FMT ".out", guest, i); + + /* O_NONBLOCK so we don't wait for writers */ + fd = open(path, O_RDONLY | O_NONBLOCK); + if (fd < 0) + break; + + /* Success, now clear O_NONBLOCK */ + flags = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); + + *fds = realloc(*fds, i + 1); + (*fds)[i] = fd; + } + + return i; +} + +struct trace_mapping { + struct tep_event *kvm_entry; + struct tep_format_field *vcpu_id; + struct tep_format_field *common_pid; + int *pids; + int *map; + int max_cpus; +}; + +static void start_mapping_vcpus(struct trace_guest *guest) +{ + char *pids = NULL; + char *t; + int len = 0; + int s; + int i; + + if (!guest->task_pids) + return; + + guest->instance = tracefs_instance_create("map_guest_pids"); + if (!guest->instance) + return; + + for (i = 0; guest->task_pids[i] >= 0; i++) { + s = snprintf(NULL, 0, "%d ", guest->task_pids[i]); + t = realloc(pids, len + s + 1); + if (!t) { + free(pids); + pids = NULL; + break; + } + pids = t; + sprintf(pids + len, "%d ", guest->task_pids[i]); + len += s; + } + if (pids) { + tracefs_instance_file_write(guest->instance, "set_event_pid", pids); + free(pids); + } + tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1"); +} + +static int map_vcpus(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_mapping *tmap = context; + unsigned long long val; + int type; + int pid; + int ret; + int i; + + /* Do we have junk in the buffer? */ + type = tep_data_type(event->tep, record); + if (type != tmap->kvm_entry->id) + return 0; + + ret = tep_read_number_field(tmap->common_pid, record->data, &val); + if (ret < 0) + return 0; + pid = (int)val; + + for (i = 0; tmap->pids[i] >= 0; i++) { + if (pid == tmap->pids[i]) + break; + } + /* Is this thread one we care about ? */ + if (tmap->pids[i] < 0) + return 0; + + ret = tep_read_number_field(tmap->vcpu_id, record->data, &val); + if (ret < 0) + return 0; + + cpu = (int)val; + + /* Sanity check, warn? */ + if (cpu >= tmap->max_cpus) + return 0; + + /* Already have this one? Should we check if it is the same? */ + if (tmap->map[cpu] >= 0) + return 0; + + tmap->map[cpu] = pid; + + /* Did we get them all */ + for (i = 0; i < tmap->max_cpus; i++) { + if (tmap->map[i] < 0) + break; + } + + return i == tmap->max_cpus; +} + +static void stop_mapping_vcpus(struct buffer_instance *instance, + struct trace_guest *guest) +{ + struct trace_mapping tmap = { }; + struct tep_handle *tep; + const char *systems[] = { "kvm", NULL }; + int i; + + if (!guest->instance) + return; + + tmap.pids = guest->task_pids; + tmap.max_cpus = instance->cpu_count; + + tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus); + if (!tmap.map) + return; + + for (i = 0; i < tmap.max_cpus; i++) + tmap.map[i] = -1; + + tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0"); + + tep = tracefs_local_events_system(NULL, systems); + if (!tep) + goto out; + + tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry"); + if (!tmap.kvm_entry) + goto out_free; + + tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id"); + if (!tmap.vcpu_id) + goto out_free; + + tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid"); + if (!tmap.common_pid) + goto out_free; + + tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap); + + for (i = 0; i < tmap.max_cpus; i++) { + if (tmap.map[i] < 0) + break; + } + /* We found all the mapped CPUs */ + if (i == tmap.max_cpus) { + guest->cpu_pid = tmap.map; + guest->cpu_max = tmap.max_cpus; + tmap.map = NULL; + } + + out_free: + tep_free(tep); + out: + free(tmap.map); + tracefs_instance_destroy(guest->instance); + tracefs_instance_free(guest->instance); +} + +static int host_tsync(struct common_record_context *ctx, + struct buffer_instance *instance, + unsigned int tsync_port, char *proto) +{ + struct trace_guest *guest; + int guest_pid = -1; + int fd; + + if (!proto) + return -1; + + if (is_network(instance)) { + fd = connect_port(instance->name, tsync_port, + instance->port_type); + } else { + guest = trace_get_guest(instance->cid, NULL); + if (guest == NULL) + return -1; + + guest_pid = guest->pid; + start_mapping_vcpus(guest); + fd = trace_vsock_open(instance->cid, tsync_port); + } + + instance->tsync = tracecmd_tsync_with_guest(top_instance.trace_id, + instance->tsync_loop_interval, + fd, guest_pid, + instance->cpu_count, + proto, ctx->clock); + if (!is_network(instance)) + stop_mapping_vcpus(instance, guest); + + if (!instance->tsync) + return -1; + + return 0; +} + +static void connect_to_agent(struct common_record_context *ctx, + struct buffer_instance *instance) +{ + struct tracecmd_tsync_protos *protos = NULL; + int sd, ret, nr_fifos, nr_cpus, page_size; + struct tracecmd_msg_handle *msg_handle; + enum tracecmd_time_sync_role role; + char *tsync_protos_reply = NULL; + unsigned int tsync_port = 0; + unsigned int *ports; + int i, *fds = NULL; + bool use_fifos = false; + + if (!no_fifos) { + nr_fifos = open_guest_fifos(instance->name, &fds); + use_fifos = nr_fifos > 0; + } + + if (ctx->instance->result) { + role = TRACECMD_TIME_SYNC_ROLE_CLIENT; + sd = connect_addr(ctx->instance->result); + if (sd < 0) + die("Failed to connect to host %s:%u", + instance->name, instance->port); + } else { + role = TRACECMD_TIME_SYNC_ROLE_HOST; + sd = trace_vsock_open(instance->cid, instance->port); + if (sd < 0) + die("Failed to connect to vsocket @%u:%u", + instance->cid, instance->port); + } + + msg_handle = tracecmd_msg_handle_alloc(sd, 0); + if (!msg_handle) + die("Failed to allocate message handle"); + + if (!instance->clock) + instance->clock = tracefs_get_clock(NULL); + + if (instance->tsync_loop_interval >= 0) + tracecmd_tsync_proto_getall(&protos, instance->clock, role); + + ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc, + instance->argv, use_fifos, + top_instance.trace_id, protos); + if (ret < 0) + die("Failed to send trace request"); + + if (protos) { + free(protos->names); + free(protos); + } + ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size, + &ports, &use_fifos, + &instance->trace_id, + &tsync_protos_reply, &tsync_port); + if (ret < 0) + die("Failed to receive trace response %d", ret); + if (tsync_protos_reply && tsync_protos_reply[0]) { + if (tsync_proto_is_supported(tsync_protos_reply)) { + printf("Negotiated %s time sync protocol with guest %s\n", + tsync_protos_reply, + instance->name); + instance->cpu_count = nr_cpus; + host_tsync(ctx, instance, tsync_port, tsync_protos_reply); + } else + warning("Failed to negotiate timestamps synchronization with the guest"); + } + free(tsync_protos_reply); + + if (use_fifos) { + if (nr_cpus != nr_fifos) { + warning("number of FIFOs (%d) for guest %s differs " + "from number of virtual CPUs (%d)", + nr_fifos, instance->name, nr_cpus); + nr_cpus = nr_cpus < nr_fifos ? nr_cpus : nr_fifos; + } + free(ports); + instance->fds = fds; + } else { + for (i = 0; i < nr_fifos; i++) + close(fds[i]); + free(fds); + instance->client_ports = ports; + } + + instance->use_fifos = use_fifos; + instance->cpu_count = nr_cpus; + + /* the msg_handle now points to the guest fd */ + instance->msg_handle = msg_handle; +} + +static void setup_guest(struct buffer_instance *instance) +{ + struct tracecmd_msg_handle *msg_handle = instance->msg_handle; + const char *output_file = instance->output_file; + char *file; + int fd; + + /* Create a place to store the guest meta data */ + file = trace_get_guest_file(output_file, instance->name); + if (!file) + die("Failed to allocate memory"); + + free(instance->output_file); + instance->output_file = file; + + fd = open(file, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (fd < 0) + die("Failed to open %s", file); + + /* Start reading tracing metadata */ + if (tracecmd_msg_read_data(msg_handle, fd)) + die("Failed receiving metadata"); + close(fd); +} + +static void setup_agent(struct buffer_instance *instance, + struct common_record_context *ctx) +{ + struct tracecmd_output *network_handle; + + network_handle = create_net_output(ctx, instance->msg_handle); + add_options(network_handle, ctx); + tracecmd_write_cmdlines(network_handle); + tracecmd_write_cpus(network_handle, instance->cpu_count); + tracecmd_write_buffer_info(network_handle); + tracecmd_write_options(network_handle); + tracecmd_write_meta_strings(network_handle); + tracecmd_msg_finish_sending_data(instance->msg_handle); + instance->network_handle = network_handle; +} + +void start_threads(enum trace_type type, struct common_record_context *ctx) +{ + struct buffer_instance *instance; + int total_cpu_count = 0; + int i = 0; + int ret; + + for_all_instances(instance) { + /* Start the connection now to find out how many CPUs we need */ + if (is_guest(instance)) + connect_to_agent(ctx, instance); + total_cpu_count += instance->cpu_count; + } + + /* make a thread for every CPU we have */ + pids = calloc(total_cpu_count * (buffers + 1), sizeof(*pids)); + if (!pids) + die("Failed to allocate pids for %d cpus", total_cpu_count); + + for_all_instances(instance) { + int *brass = NULL; + int x, pid; + + if (is_agent(instance)) { + setup_agent(instance, ctx); + } else if (is_guest(instance)) { + setup_guest(instance); + } else if (host) { + instance->msg_handle = setup_connection(instance, ctx); + if (!instance->msg_handle) + die("Failed to make connection"); + } + + for (x = 0; x < instance->cpu_count; x++) { + if (type & TRACE_TYPE_STREAM) { + brass = pids[i].brass; + ret = pipe(brass); + if (ret < 0) + die("pipe"); + pids[i].stream = trace_stream_init(instance, x, + brass[0], + instance->cpu_count, + hooks, handle_init, + ctx->global); + if (!pids[i].stream) + die("Creating stream for %d", i); + } else + pids[i].brass[0] = -1; + pids[i].cpu = x; + pids[i].instance = instance; + /* Make sure all output is flushed before forking */ + fflush(stdout); + pid = pids[i++].pid = create_recorder(instance, x, type, brass); + if (brass) + close(brass[1]); + if (pid > 0) + add_filter_pid(instance, pid, 1); + } + } + recorder_threads = i; +} + +static void touch_file(const char *file) +{ + int fd; + + fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + die("could not create file %s\n", file); + close(fd); +} + +static void append_buffer(struct tracecmd_output *handle, + struct buffer_instance *instance, + char **temp_files) +{ + int cpu_count = instance->cpu_count; + int i; + + /* + * Since we can record remote and virtual machines in the same file + * as the host, the buffers may no longer have matching number of + * CPU data as the host. For backward compatibility for older + * trace-cmd versions, which will blindly read the number of CPUs + * for each buffer instance as there are for the host, if there are + * fewer CPUs on the remote machine than on the host, an "empty" + * CPU is needed for each CPU that the host has that the remote does + * not. If there are more CPUs on the remote, older executables will + * simply ignore them (which is OK, we only need to guarantee that + * old executables don't crash). + */ + if (instance->cpu_count < local_cpu_count) + cpu_count = local_cpu_count; + + for (i = 0; i < cpu_count; i++) { + temp_files[i] = get_temp_file(instance, i); + if (i >= instance->cpu_count) + touch_file(temp_files[i]); + } + + tracecmd_append_buffer_cpu_data(handle, tracefs_instance_get_name(instance->tracefs), + cpu_count, temp_files); + + for (i = 0; i < instance->cpu_count; i++) { + if (i >= instance->cpu_count) + delete_temp_file(instance, i); + put_temp_file(temp_files[i]); + } +} + +static void +add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct trace_guest *guest; + const char *name; + char *buf, *p; + int size; + int pid; + int i; + + if (is_network(instance)) { + name = instance->name; + } else { + guest = trace_get_guest(instance->cid, NULL); + if (!guest) + return; + name = guest->name; + } + + size = strlen(name) + 1; + size += sizeof(long long); /* trace_id */ + size += sizeof(int); /* cpu count */ + size += instance->cpu_count * 2 * sizeof(int); /* cpu,pid pair */ + + buf = calloc(1, size); + if (!buf) + return; + p = buf; + strcpy(p, name); + p += strlen(name) + 1; + + memcpy(p, &instance->trace_id, sizeof(long long)); + p += sizeof(long long); + + memcpy(p, &instance->cpu_count, sizeof(int)); + p += sizeof(int); + for (i = 0; i < instance->cpu_count; i++) { + pid = -1; + if (!is_network(instance)) { + if (i < guest->cpu_max) + pid = guest->cpu_pid[i]; + } + memcpy(p, &i, sizeof(int)); + p += sizeof(int); + memcpy(p, &pid, sizeof(int)); + p += sizeof(int); + } + + tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf); + free(buf); +} + +static void +add_pid_maps(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct pid_addr_maps *maps = instance->pid_maps; + struct trace_seq s; + int i; + + trace_seq_init(&s); + while (maps) { + if (!maps->nr_lib_maps) { + maps = maps->next; + continue; + } + trace_seq_reset(&s); + trace_seq_printf(&s, "%x %x %s\n", + maps->pid, maps->nr_lib_maps, maps->proc_name); + for (i = 0; i < maps->nr_lib_maps; i++) + trace_seq_printf(&s, "%llx %llx %s\n", + maps->lib_maps[i].start, + maps->lib_maps[i].end, + maps->lib_maps[i].lib_name); + trace_seq_terminate(&s); + tracecmd_add_option(handle, TRACECMD_OPTION_PROCMAPS, + s.len + 1, s.buffer); + maps = maps->next; + } + trace_seq_destroy(&s); +} + +static void +add_trace_id(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + tracecmd_add_option(handle, TRACECMD_OPTION_TRACEID, + sizeof(long long), &instance->trace_id); +} + +static void +add_buffer_stat(struct tracecmd_output *handle, struct buffer_instance *instance) +{ + struct trace_seq s; + int i; + + trace_seq_init(&s); + trace_seq_printf(&s, "\nBuffer: %s\n\n", + tracefs_instance_get_name(instance->tracefs)); + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + s.len+1, s.buffer); + trace_seq_destroy(&s); + + for (i = 0; i < instance->cpu_count; i++) + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + instance->s_save[i].len+1, + instance->s_save[i].buffer); +} + +static void add_option_hooks(struct tracecmd_output *handle) +{ + struct hook_list *hook; + int len; + + for (hook = hooks; hook; hook = hook->next) { + len = strlen(hook->hook); + tracecmd_add_option(handle, TRACECMD_OPTION_HOOK, + len + 1, hook->hook); + } +} + +static void add_uname(struct tracecmd_output *handle) +{ + struct utsname buf; + char *str; + int len; + int ret; + + ret = uname(&buf); + /* if this fails for some reason, just ignore it */ + if (ret < 0) + return; + + len = strlen(buf.sysname) + strlen(buf.nodename) + + strlen(buf.release) + strlen(buf.machine) + 4; + str = malloc(len); + if (!str) + return; + sprintf(str, "%s %s %s %s", buf.sysname, buf.nodename, buf.release, buf.machine); + tracecmd_add_option(handle, TRACECMD_OPTION_UNAME, len, str); + free(str); +} + +static void add_version(struct tracecmd_output *handle) +{ + char *str; + int len; + + len = asprintf(&str, "%s %s", VERSION_STRING, VERSION_GIT); + if (len < 0) + return; + + tracecmd_add_option(handle, TRACECMD_OPTION_VERSION, len+1, str); + free(str); +} + +static void print_stat(struct buffer_instance *instance) +{ + int cpu; + + if (quiet) + return; + + if (!is_top_instance(instance)) + printf("\nBuffer: %s\n\n", + tracefs_instance_get_name(instance->tracefs)); + + for (cpu = 0; cpu < instance->cpu_count; cpu++) + trace_seq_do_printf(&instance->s_print[cpu]); +} + +static char *get_trace_clock(bool selected) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + break; + } + + if (selected) + return tracefs_get_clock(instance ? instance->tracefs : NULL); + else + return tracefs_instance_file_read(instance ? instance->tracefs : NULL, + "trace_clock", NULL); +} + +enum { + DATA_FL_NONE = 0, + DATA_FL_DATE = 1, + DATA_FL_OFFSET = 2, + DATA_FL_GUEST = 4, +}; + +static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx) +{ + int type = 0; + char *clocks; + + if (ctx->date2ts) { + if (ctx->data_flags & DATA_FL_DATE) + type = TRACECMD_OPTION_DATE; + else if (ctx->data_flags & DATA_FL_OFFSET) + type = TRACECMD_OPTION_OFFSET; + } + + if (type) + tracecmd_add_option(handle, type, strlen(ctx->date2ts)+1, ctx->date2ts); + + clocks = get_trace_clock(false); + tracecmd_add_option(handle, TRACECMD_OPTION_TRACECLOCK, + clocks ? strlen(clocks)+1 : 0, clocks); + add_option_hooks(handle); + add_uname(handle); + add_version(handle); + if (!no_top_instance()) + add_trace_id(handle, &top_instance); + free(clocks); +} + +static void write_guest_file(struct buffer_instance *instance) +{ + struct tracecmd_output *handle; + int cpu_count = instance->cpu_count; + char *file; + char **temp_files; + int i, fd; + + file = instance->output_file; + fd = open(file, O_RDWR); + if (fd < 0) + die("error opening %s", file); + + handle = tracecmd_get_output_handle_fd(fd); + if (!handle) + die("error writing to %s", file); + if (instance->flags & BUFFER_FL_TSC2NSEC) + tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); + temp_files = malloc(sizeof(*temp_files) * cpu_count); + if (!temp_files) + die("failed to allocate temp_files for %d cpus", + cpu_count); + + for (i = 0; i < cpu_count; i++) { + temp_files[i] = get_temp_file(instance, i); + if (!temp_files[i]) + die("failed to allocate memory"); + } + + if (tracecmd_write_cpu_data(handle, cpu_count, temp_files, NULL) < 0) + die("failed to write CPU data"); + tracecmd_output_close(handle); + + for (i = 0; i < cpu_count; i++) + put_temp_file(temp_files[i]); + free(temp_files); +} + +static struct tracecmd_output *create_output(struct common_record_context *ctx) +{ + struct tracecmd_output *out; + + if (!ctx->output) + return NULL; + + out = tracecmd_output_create(ctx->output); + if (!out) + goto error; + if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) + goto error; + + if (ctx->compression) { + if (tracecmd_output_set_compression(out, ctx->compression)) + goto error; + } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { + tracecmd_output_set_compression(out, "any"); + } + + if (tracecmd_output_write_headers(out, listed_events)) + goto error; + + return out; +error: + if (out) + tracecmd_output_close(out); + unlink(ctx->output); + return NULL; +} + +static void record_data(struct common_record_context *ctx) +{ + struct tracecmd_output *handle; + struct buffer_instance *instance; + bool local = false; + int max_cpu_count = local_cpu_count; + char **temp_files; + int i; + + for_all_instances(instance) { + if (is_guest(instance)) + write_guest_file(instance); + else if (host && instance->msg_handle) + finish_network(instance->msg_handle); + else + local = true; + } + + if (!local) + return; + + if (latency) { + handle = tracecmd_create_file_latency(ctx->output, local_cpu_count, + ctx->file_version, ctx->compression); + tracecmd_set_quiet(handle, quiet); + } else { + if (!local_cpu_count) + return; + + /* Allocate enough temp files to handle each instance */ + for_all_instances(instance) { + if (instance->msg_handle) + continue; + if (instance->cpu_count > max_cpu_count) + max_cpu_count = instance->cpu_count; + } + + temp_files = malloc(sizeof(*temp_files) * max_cpu_count); + if (!temp_files) + die("Failed to allocate temp_files for %d cpus", + local_cpu_count); + + for (i = 0; i < max_cpu_count; i++) + temp_files[i] = get_temp_file(&top_instance, i); + + /* + * If top_instance was not used, we still need to create + * empty trace.dat files for it. + */ + if (no_top_instance() || top_instance.msg_handle) { + for (i = 0; i < local_cpu_count; i++) + touch_file(temp_files[i]); + } + + handle = create_output(ctx); + if (!handle) + die("Error creating output file"); + tracecmd_set_quiet(handle, quiet); + + add_options(handle, ctx); + + /* Only record the top instance under TRACECMD_OPTION_CPUSTAT*/ + if (!no_top_instance() && !top_instance.msg_handle) { + struct trace_seq *s = top_instance.s_save; + + for (i = 0; i < local_cpu_count; i++) + tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, + s[i].len+1, s[i].buffer); + } + + if (buffers) { + i = 0; + for_each_instance(instance) { + int cpus = instance->cpu_count != local_cpu_count ? + instance->cpu_count : 0; + + if (instance->msg_handle) + continue; + tracecmd_add_buffer_info(handle, + tracefs_instance_get_name(instance->tracefs), + cpus); + add_buffer_stat(handle, instance); + } + } + + if (!no_top_instance() && !top_instance.msg_handle) + print_stat(&top_instance); + + for_all_instances(instance) { + add_pid_maps(handle, instance); + } + + for_all_instances(instance) { + if (is_guest(instance)) + add_guest_info(handle, instance); + } + + if (ctx->tsc2nsec.mult) { + add_tsc2nsec(handle, &ctx->tsc2nsec); + tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); + } + if (tracecmd_write_cmdlines(handle)) + die("Writing cmdlines"); + + tracecmd_append_cpu_data(handle, local_cpu_count, temp_files); + + for (i = 0; i < max_cpu_count; i++) + put_temp_file(temp_files[i]); + + if (buffers) { + i = 0; + for_each_instance(instance) { + if (instance->msg_handle) + continue; + print_stat(instance); + append_buffer(handle, instance, temp_files); + } + } + + free(temp_files); + } + if (!handle) + die("could not write to file"); + tracecmd_output_close(handle); +} + +enum filter_type { + FUNC_FILTER, + FUNC_NOTRACE, +}; + +static int filter_command(struct tracefs_instance *instance, const char *cmd) +{ + return tracefs_instance_file_append(instance, "set_ftrace_filter", cmd); +} + +static int write_func_filter(enum filter_type type, struct buffer_instance *instance, + struct func_list **list) +{ + struct func_list *item, *cmds = NULL; + const char *file; + int ret = -1; + int (*filter_function)(struct tracefs_instance *instance, const char *filter, + const char *module, unsigned int flags); + + if (!*list) + return 0; + + switch (type) { + case FUNC_FILTER: + filter_function = tracefs_function_filter; + file = "set_ftrace_filter"; + break; + case FUNC_NOTRACE: + filter_function = tracefs_function_notrace; + file = "set_ftrace_notrace"; + break; + } + + ret = filter_function(instance->tracefs, NULL, NULL, + TRACEFS_FL_RESET | TRACEFS_FL_CONTINUE); + if (ret < 0) + return ret; + + while (*list) { + item = *list; + *list = item->next; + /* Do commands separately at the end */ + if (type == FUNC_FILTER && strstr(item->func, ":")) { + item->next = cmds; + cmds = item; + continue; + } + ret = filter_function(instance->tracefs, item->func, item->mod, + TRACEFS_FL_CONTINUE); + if (ret < 0) + goto failed; + free(item); + } + ret = filter_function(instance->tracefs, NULL, NULL, 0); + + /* Now add any commands */ + while (cmds) { + item = cmds; + cmds = item->next; + ret = filter_command(instance->tracefs, item->func); + if (ret < 0) + goto failed; + free(item); + } + return ret; + failed: + die("Failed to write %s to %s.\n" + "Perhaps this function is not available for tracing.\n" + "run 'trace-cmd list -f %s' to see if it is.", + item->func, file, item->func); + return ret; +} + +static int write_func_file(struct buffer_instance *instance, + const char *file, struct func_list **list) +{ + struct func_list *item; + const char *prefix = ":mod:"; + char *path; + int fd; + int ret = -1; + + if (!*list) + return 0; + + path = tracefs_instance_get_file(instance->tracefs, file); + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + goto free; + + while (*list) { + item = *list; + *list = item->next; + ret = write(fd, item->func, strlen(item->func)); + if (ret < 0) + goto failed; + if (item->mod) { + ret = write(fd, prefix, strlen(prefix)); + if (ret < 0) + goto failed; + ret = write(fd, item->mod, strlen(item->mod)); + if (ret < 0) + goto failed; + } + ret = write(fd, " ", 1); + if (ret < 0) + goto failed; + free(item); + } + close(fd); + ret = 0; + free: + tracefs_put_tracing_file(path); + return ret; + failed: + die("Failed to write %s to %s.\n" + "Perhaps this function is not available for tracing.\n" + "run 'trace-cmd list -f %s' to see if it is.", + item->func, file, item->func); + return ret; +} + +static int functions_filtered(struct buffer_instance *instance) +{ + char buf[1] = { '#' }; + char *path; + int fd; + + path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter"); + fd = open(path, O_RDONLY); + tracefs_put_tracing_file(path); + if (fd < 0) { + if (is_top_instance(instance)) + warning("Can not set set_ftrace_filter"); + else + warning("Can not set set_ftrace_filter for %s", + tracefs_instance_get_name(instance->tracefs)); + return 0; + } + + /* + * If functions are not filtered, than the first character + * will be '#'. Make sure it is not an '#' and also not space. + */ + read(fd, buf, 1); + close(fd); + + if (buf[0] == '#' || isspace(buf[0])) + return 0; + return 1; +} + +static void set_funcs(struct buffer_instance *instance) +{ + int set_notrace = 0; + int ret; + + if (is_guest(instance)) + return; + + ret = write_func_filter(FUNC_FILTER, instance, &instance->filter_funcs); + if (ret < 0) + die("set_ftrace_filter does not exist. Can not filter functions"); + + /* graph tracing currently only works for top instance */ + if (is_top_instance(instance)) { + ret = write_func_file(instance, "set_graph_function", &graph_funcs); + if (ret < 0) + die("set_graph_function does not exist."); + if (instance->plugin && strcmp(instance->plugin, "function_graph") == 0) { + ret = write_func_file(instance, "set_graph_notrace", + &instance->notrace_funcs); + if (!ret) + set_notrace = 1; + } + if (!set_notrace) { + ret = write_func_filter(FUNC_NOTRACE, instance, + &instance->notrace_funcs); + if (ret < 0) + die("set_ftrace_notrace does not exist. Can not filter functions"); + } + } else + write_func_filter(FUNC_NOTRACE, instance, &instance->notrace_funcs); + + /* make sure we are filtering functions */ + if (func_stack && is_top_instance(instance)) { + if (!functions_filtered(instance)) + die("Function stack trace set, but functions not filtered"); + save_option(instance, FUNC_STACK_TRACE); + } + clear_function_filters = 1; +} + +static void add_func(struct func_list **list, const char *mod, const char *func) +{ + struct func_list *item; + + item = malloc(sizeof(*item)); + if (!item) + die("Failed to allocate function descriptor"); + item->func = func; + item->mod = mod; + item->next = *list; + *list = item; +} + +static int find_ts(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + unsigned long long *ts = (unsigned long long *)context; + struct tep_format_field *field; + + if (!ts) + return -1; + + field = tep_find_field(event, "buf"); + if (field && strcmp(STAMP"\n", record->data + field->offset) == 0) { + *ts = record->ts; + return 1; + } + + return 0; +} + +static unsigned long long find_time_stamp(struct tep_handle *tep, + struct tracefs_instance *instance) +{ + unsigned long long ts = 0; + + if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts)) + return ts; + + return 0; +} + + +static char *read_top_file(char *file, int *psize) +{ + return tracefs_instance_file_read(top_instance.tracefs, file, psize); +} + +static struct tep_handle *get_ftrace_tep(void) +{ + const char *systems[] = {"ftrace", NULL}; + struct tep_handle *tep; + char *buf; + int size; + int ret; + + tep = tracefs_local_events_system(NULL, systems); + if (!tep) + return NULL; + tep_set_file_bigendian(tep, tracecmd_host_bigendian()); + buf = read_top_file("events/header_page", &size); + if (!buf) + goto error; + ret = tep_parse_header_page(tep, buf, size, sizeof(unsigned long)); + free(buf); + if (ret < 0) + goto error; + + return tep; + +error: + tep_free(tep); + return NULL; +} + +/* + * Try to write the date into the ftrace buffer and then + * read it back, mapping the timestamp to the date. + */ +static char *get_date_to_ts(void) +{ + struct tep_handle *tep; + unsigned long long min = -1ULL; + unsigned long long diff; + unsigned long long stamp; + unsigned long long min_stamp; + unsigned long long min_ts; + unsigned long long ts; + struct timespec start; + struct timespec end; + char *date2ts = NULL; + int tfd; + int i; + + /* Set up a tep to read the raw format */ + tep = get_ftrace_tep(); + if (!tep) { + warning("failed to alloc tep, --date ignored"); + return NULL; + } + tfd = tracefs_instance_file_open(NULL, "trace_marker", O_WRONLY); + if (tfd < 0) { + warning("Can not open 'trace_marker', --date ignored"); + goto out_pevent; + } + + for (i = 0; i < date2ts_tries; i++) { + tracecmd_disable_tracing(); + clear_trace_instances(); + tracecmd_enable_tracing(); + + clock_gettime(CLOCK_REALTIME, &start); + write(tfd, STAMP, 5); + clock_gettime(CLOCK_REALTIME, &end); + + tracecmd_disable_tracing(); + ts = find_time_stamp(tep, NULL); + if (!ts) + continue; + + diff = (unsigned long long)end.tv_sec * 1000000000LL; + diff += (unsigned long long)end.tv_nsec; + stamp = diff; + diff -= (unsigned long long)start.tv_sec * 1000000000LL; + diff -= (unsigned long long)start.tv_nsec; + + if (diff < min) { + min_ts = ts; + min_stamp = stamp - diff / 2; + min = diff; + } + } + + close(tfd); + + if (min == -1ULL) { + warning("Failed to make date offset, --date ignored"); + goto out_pevent; + } + + /* 16 hex chars + 0x + \0 */ + date2ts = malloc(19); + if (!date2ts) + goto out_pevent; + + /* + * The difference between the timestamp and the gtod is + * stored as an ASCII string in hex. + */ + diff = min_stamp - min_ts; + snprintf(date2ts, 19, "0x%llx", diff/1000); + out_pevent: + tep_free(tep); + + return date2ts; +} + +static void set_buffer_size_instance(struct buffer_instance *instance) +{ + int buffer_size = instance->buffer_size; + char buf[BUFSIZ]; + char *path; + int ret; + int fd; + + if (is_guest(instance)) + return; + + if (!buffer_size) + return; + + if (buffer_size < 0) + die("buffer size must be positive"); + + snprintf(buf, BUFSIZ, "%d", buffer_size); + + path = tracefs_instance_get_file(instance->tracefs, "buffer_size_kb"); + fd = open(path, O_WRONLY); + if (fd < 0) { + warning("can't open %s", path); + goto out; + } + + ret = write(fd, buf, strlen(buf)); + if (ret < 0) + warning("Can't write to %s", path); + close(fd); + out: + tracefs_put_tracing_file(path); +} + +void set_buffer_size(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + set_buffer_size_instance(instance); +} + +static int +process_event_trigger(char *path, struct event_iter *iter) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *trigger = NULL; + char *file; + int ret; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + ret = stat(file, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) + goto out; + + trigger = append_file(file, "trigger"); + + ret = stat(trigger, &st); + if (ret < 0) + goto out; + + ret = clear_trigger(trigger); + out: + free(trigger); + free(file); + return ret; +} + +static void clear_instance_triggers(struct buffer_instance *instance) +{ + enum event_iter_type type; + struct event_iter *iter; + char *system; + char *path; + int retry = 0; + int ret; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + ret = process_event_trigger(path, iter); + if (ret > 0) + retry++; + } + + trace_event_iter_free(iter); + + if (retry) { + int i; + + /* Order matters for some triggers */ + for (i = 0; i < retry; i++) { + int tries = 0; + + iter = trace_event_iter_alloc(path); + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + ret = process_event_trigger(path, iter); + if (ret > 0) + tries++; + } + trace_event_iter_free(iter); + if (!tries) + break; + } + } + + tracefs_put_tracing_file(path); +} + +static void +process_event_filter(char *path, struct event_iter *iter, enum event_process *processed) +{ + const char *system = iter->system_dent->d_name; + const char *event = iter->event_dent->d_name; + struct stat st; + char *filter = NULL; + char *file; + int ret; + + path = append_file(path, system); + file = append_file(path, event); + free(path); + + ret = stat(file, &st); + if (ret < 0 || !S_ISDIR(st.st_mode)) + goto out; + + filter = append_file(file, "filter"); + + ret = stat(filter, &st); + if (ret < 0) + goto out; + + clear_filter(filter); + out: + free(filter); + free(file); +} + +static void clear_instance_filters(struct buffer_instance *instance) +{ + struct event_iter *iter; + char *path; + char *system; + enum event_iter_type type; + enum event_process processed = PROCESSED_NONE; + + path = tracefs_instance_get_file(instance->tracefs, "events"); + if (!path) + die("malloc"); + + iter = trace_event_iter_alloc(path); + + processed = PROCESSED_NONE; + system = NULL; + while ((type = trace_event_iter_next(iter, path, system))) { + + if (type == EVENT_ITER_SYSTEM) { + system = iter->system_dent->d_name; + continue; + } + + process_event_filter(path, iter, &processed); + } + + trace_event_iter_free(iter); + + tracefs_put_tracing_file(path); +} + +static void clear_filters(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_filters(instance); +} + +static void reset_clock(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + tracefs_instance_file_write(instance->tracefs, + "trace_clock", "local"); +} + +static void reset_cpu_mask(void) +{ + struct buffer_instance *instance; + int cpus = tracecmd_count_cpus(); + int fullwords = (cpus - 1) / 32; + int bits = (cpus - 1) % 32 + 1; + int len = (fullwords + 1) * 9; + char buf[len + 1]; + + buf[0] = '\0'; + + sprintf(buf, "%x", (unsigned int)((1ULL << bits) - 1)); + while (fullwords-- > 0) + strcat(buf, ",ffffffff"); + + for_all_instances(instance) + tracefs_instance_file_write(instance->tracefs, + "tracing_cpumask", buf); +} + +static void reset_event_pid(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + add_event_pid(instance, ""); +} + +static void clear_triggers(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_triggers(instance); +} + +static void clear_instance_error_log(struct buffer_instance *instance) +{ + char *file; + + if (!tracefs_file_exists(instance->tracefs, "error_log")) + return; + + file = tracefs_instance_get_file(instance->tracefs, "error_log"); + if (!file) + return; + write_file(file, " "); + tracefs_put_tracing_file(file); +} + +static void clear_error_log(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + clear_instance_error_log(instance); +} + +static void clear_all_dynamic_events(void) +{ + /* Clear event probes first, as they may be attached to other dynamic event */ + tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_EPROBE, true); + tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_ALL, true); +} + +static void clear_func_filters(void) +{ + struct buffer_instance *instance; + char *path; + int i; + const char * const files[] = { "set_ftrace_filter", + "set_ftrace_notrace", + "set_graph_function", + "set_graph_notrace", + NULL }; + + for_all_instances(instance) { + for (i = 0; files[i]; i++) { + path = tracefs_instance_get_file(instance->tracefs, files[i]); + clear_func_filter(path); + tracefs_put_tracing_file(path); + } + } +} + +static void make_instances(void) +{ + struct buffer_instance *instance; + + for_each_instance(instance) { + if (is_guest(instance)) + continue; + if (instance->name && !instance->tracefs) { + instance->tracefs = tracefs_instance_create(instance->name); + /* Don't delete instances that already exist */ + if (instance->tracefs && !tracefs_instance_is_new(instance->tracefs)) + instance->flags |= BUFFER_FL_KEEP; + } + } +} + +void tracecmd_remove_instances(void) +{ + struct buffer_instance *instance; + + for_each_instance(instance) { + /* Only delete what we created */ + if (is_guest(instance) || (instance->flags & BUFFER_FL_KEEP)) + continue; + if (instance->tracing_on_fd > 0) { + close(instance->tracing_on_fd); + instance->tracing_on_fd = 0; + } + tracefs_instance_destroy(instance->tracefs); + } +} + +static void check_plugin(const char *plugin) +{ + char *buf; + char *str; + char *tok; + + /* + * nop is special. We may want to just trace + * trace_printks, that are in the kernel. + */ + if (strcmp(plugin, "nop") == 0) + return; + + buf = read_top_file("available_tracers", NULL); + if (!buf) + die("No plugins available"); + + str = buf; + while ((tok = strtok(str, " "))) { + str = NULL; + if (strcmp(tok, plugin) == 0) + goto out; + } + die ("Plugin '%s' does not exist", plugin); + out: + if (!quiet) + fprintf(stderr, " plugin '%s'\n", plugin); + free(buf); +} + +static void check_function_plugin(void) +{ + const char *plugin; + + /* We only care about the top_instance */ + if (no_top_instance()) + return; + + plugin = top_instance.plugin; + if (!plugin) + return; + + if (plugin && strncmp(plugin, "function", 8) == 0 && + func_stack && !top_instance.filter_funcs) + die("Must supply function filtering with --func-stack\n"); +} + +static int __check_doing_something(struct buffer_instance *instance) +{ + return is_guest(instance) || (instance->flags & BUFFER_FL_PROFILE) || + instance->plugin || instance->events || instance->get_procmap; +} + +static void check_doing_something(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (__check_doing_something(instance)) + return; + } + + die("no event or plugin was specified... aborting"); +} + +static void +update_plugin_instance(struct buffer_instance *instance, + enum trace_type type) +{ + const char *plugin = instance->plugin; + + if (is_guest(instance)) + return; + + if (!plugin) + return; + + check_plugin(plugin); + + /* + * Latency tracers just save the trace and kill + * the threads. + */ + if (strcmp(plugin, "irqsoff") == 0 || + strcmp(plugin, "preemptoff") == 0 || + strcmp(plugin, "preemptirqsoff") == 0 || + strcmp(plugin, "wakeup") == 0 || + strcmp(plugin, "wakeup_rt") == 0) { + latency = 1; + if (host) + die("Network tracing not available with latency tracer plugins"); + if (type & TRACE_TYPE_STREAM) + die("Streaming is not available with latency tracer plugins"); + } else if (type == TRACE_TYPE_RECORD) { + if (latency) + die("Can not record latency tracer and non latency trace together"); + } + + if (fset < 0 && (strcmp(plugin, "function") == 0 || + strcmp(plugin, "function_graph") == 0)) + die("function tracing not configured on this kernel"); + + if (type != TRACE_TYPE_EXTRACT) + set_plugin_instance(instance, plugin); +} + +static void update_plugins(enum trace_type type) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + update_plugin_instance(instance, type); +} + +static void allocate_seq(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + instance->s_save = malloc(sizeof(struct trace_seq) * instance->cpu_count); + instance->s_print = malloc(sizeof(struct trace_seq) * instance->cpu_count); + if (!instance->s_save || !instance->s_print) + die("Failed to allocate instance info"); + } +} + +/* Find the overrun output, and add it to the print seq */ +static void add_overrun(int cpu, struct trace_seq *src, struct trace_seq *dst) +{ + const char overrun_str[] = "overrun: "; + const char commit_overrun_str[] = "commit overrun: "; + const char *p; + int overrun; + int commit_overrun; + + p = strstr(src->buffer, overrun_str); + if (!p) { + /* Warn? */ + trace_seq_printf(dst, "CPU %d: no overrun found?\n", cpu); + return; + } + + overrun = atoi(p + strlen(overrun_str)); + + p = strstr(p + 9, commit_overrun_str); + if (p) + commit_overrun = atoi(p + strlen(commit_overrun_str)); + else + commit_overrun = -1; + + if (!overrun && !commit_overrun) + return; + + trace_seq_printf(dst, "CPU %d:", cpu); + + if (overrun) + trace_seq_printf(dst, " %d events lost", overrun); + + if (commit_overrun) + trace_seq_printf(dst, " %d events lost due to commit overrun", + commit_overrun); + + trace_seq_putc(dst, '\n'); +} + +static void record_stats(void) +{ + struct buffer_instance *instance; + struct trace_seq *s_save; + struct trace_seq *s_print; + int cpu; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + + s_save = instance->s_save; + s_print = instance->s_print; + for (cpu = 0; cpu < instance->cpu_count; cpu++) { + trace_seq_init(&s_save[cpu]); + trace_seq_init(&s_print[cpu]); + trace_seq_printf(&s_save[cpu], "CPU: %d\n", cpu); + tracecmd_stat_cpu_instance(instance, &s_save[cpu], cpu); + add_overrun(cpu, &s_save[cpu], &s_print[cpu]); + } + } +} + +static void print_stats(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) + print_stat(instance); +} + +static void destroy_stats(void) +{ + struct buffer_instance *instance; + int cpu; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + + for (cpu = 0; cpu < instance->cpu_count; cpu++) { + trace_seq_destroy(&instance->s_save[cpu]); + trace_seq_destroy(&instance->s_print[cpu]); + } + } +} + +static void list_event(const char *event) +{ + struct tracecmd_event_list *list; + + list = malloc(sizeof(*list)); + if (!list) + die("Failed to allocate list for event"); + list->next = listed_events; + list->glob = event; + listed_events = list; +} + +#define ALL_EVENTS "*/*" + +static void record_all_events(void) +{ + struct tracecmd_event_list *list; + + while (listed_events) { + list = listed_events; + listed_events = list->next; + free(list); + } + list = malloc(sizeof(*list)); + if (!list) + die("Failed to allocate list for all events"); + list->next = NULL; + list->glob = ALL_EVENTS; + listed_events = list; +} + +static int recording_all_events(void) +{ + return listed_events && strcmp(listed_events->glob, ALL_EVENTS) == 0; +} + +static void add_trigger(struct event_list *event, const char *trigger) +{ + int ret; + + if (event->trigger) { + event->trigger = realloc(event->trigger, + strlen(event->trigger) + strlen("\n") + + strlen(trigger) + 1); + strcat(event->trigger, "\n"); + strcat(event->trigger, trigger); + } else { + ret = asprintf(&event->trigger, "%s", trigger); + if (ret < 0) + die("Failed to allocate event trigger"); + } +} + +static int test_stacktrace_trigger(struct buffer_instance *instance) +{ + char *path; + int ret = 0; + int fd; + + path = tracefs_instance_get_file(instance->tracefs, + "events/sched/sched_switch/trigger"); + + clear_trigger(path); + + fd = open(path, O_WRONLY); + if (fd < 0) + goto out; + + ret = write(fd, "stacktrace", 10); + if (ret != 10) + ret = 0; + else + ret = 1; + close(fd); + out: + tracefs_put_tracing_file(path); + + return ret; +} + +static int +profile_add_event(struct buffer_instance *instance, const char *event_str, int stack) +{ + struct event_list *event; + char buf[BUFSIZ]; + char *p; + + strcpy(buf, "events/"); + strncpy(buf + 7, event_str, BUFSIZ - 7); + buf[BUFSIZ-1] = 0; + + if ((p = strstr(buf, ":"))) { + *p = '/'; + p++; + } + + if (!trace_check_file_exists(instance, buf)) + return -1; + + /* Only add event if it isn't already added */ + for (event = instance->events; event; event = event->next) { + if (p && strcmp(event->event, p) == 0) + break; + if (strcmp(event->event, event_str) == 0) + break; + } + + if (!event) { + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event"); + memset(event, 0, sizeof(*event)); + event->event = event_str; + add_event(instance, event); + } + + if (!recording_all_events()) + list_event(event_str); + + if (stack) { + if (!event->trigger || !strstr(event->trigger, "stacktrace")) + add_trigger(event, "stacktrace"); + } + + return 0; +} + +int tracecmd_add_event(const char *event_str, int stack) +{ + return profile_add_event(first_instance, event_str, stack); +} + +static void enable_profile(struct buffer_instance *instance) +{ + int stacktrace = 0; + int i; + char *trigger_events[] = { + "sched:sched_switch", + "sched:sched_wakeup", + NULL, + }; + char *events[] = { + "exceptions:page_fault_user", + "irq:irq_handler_entry", + "irq:irq_handler_exit", + "irq:softirq_entry", + "irq:softirq_exit", + "irq:softirq_raise", + "sched:sched_process_exec", + "raw_syscalls", + NULL, + }; + + if (!instance->plugin) { + if (trace_check_file_exists(instance, "max_graph_depth")) { + instance->plugin = "function_graph"; + set_max_graph_depth(instance, "1"); + } else + warning("Kernel does not support max_graph_depth\n" + " Skipping user/kernel profiling"); + } + + if (test_stacktrace_trigger(instance)) + stacktrace = 1; + else + /* + * The stacktrace trigger is not implemented with this + * kernel, then we need to default to the stack trace option. + * This is less efficient but still works. + */ + save_option(instance, "stacktrace"); + + + for (i = 0; trigger_events[i]; i++) + profile_add_event(instance, trigger_events[i], stacktrace); + + for (i = 0; events[i]; i++) + profile_add_event(instance, events[i], 0); +} + +static struct event_list * +create_hook_event(struct buffer_instance *instance, + const char *system, const char *event) +{ + struct event_list *event_list; + char *event_name; + int len; + + if (!system) + system = "*"; + + len = strlen(event); + len += strlen(system) + 2; + + event_name = malloc(len); + if (!event_name) + die("Failed to allocate %s/%s", system, event); + sprintf(event_name, "%s:%s", system, event); + + event_list = malloc(sizeof(*event_list)); + if (!event_list) + die("Failed to allocate event list for %s", event_name); + memset(event_list, 0, sizeof(*event_list)); + event_list->event = event_name; + add_event(instance, event_list); + + list_event(event_name); + + return event_list; +} + +static void add_hook(struct buffer_instance *instance, const char *arg) +{ + struct event_list *event; + struct hook_list *hook; + + hook = tracecmd_create_event_hook(arg); + if (!hook) + die("Failed to create event hook %s", arg); + + hook->instance = instance; + hook->next = hooks; + hooks = hook; + + /* Make sure the event is enabled */ + event = create_hook_event(instance, hook->start_system, hook->start_event); + create_hook_event(instance, hook->end_system, hook->end_event); + + if (hook->stack) { + if (!event->trigger || !strstr(event->trigger, "stacktrace")) + add_trigger(event, "stacktrace"); + } +} + +void update_first_instance(struct buffer_instance *instance, int topt) +{ + if (topt || instance == &top_instance) + first_instance = &top_instance; + else + first_instance = buffer_instances; +} + +void init_top_instance(void) +{ + if (!top_instance.tracefs) + top_instance.tracefs = tracefs_instance_create(NULL); + top_instance.cpu_count = tracecmd_count_cpus(); + top_instance.flags = BUFFER_FL_KEEP; + top_instance.trace_id = tracecmd_generate_traceid(); + init_instance(&top_instance); +} + +enum { + OPT_compression = 237, + OPT_file_ver = 238, + OPT_verbose = 239, + OPT_tsc2nsec = 240, + OPT_fork = 241, + OPT_tsyncinterval = 242, + OPT_user = 243, + OPT_procmap = 244, + OPT_quiet = 245, + OPT_debug = 246, + OPT_no_filter = 247, + OPT_max_graph_depth = 248, + OPT_tsoffset = 249, + OPT_bycomm = 250, + OPT_stderr = 251, + OPT_profile = 252, + OPT_nosplice = 253, + OPT_funcstack = 254, + OPT_date = 255, + OPT_module = 256, + OPT_nofifos = 257, + OPT_cmdlines_size = 258, + OPT_poll = 259, + OPT_name = 260, +}; + +void trace_stop(int argc, char **argv) +{ + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "hatB:"); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + break; + case 'a': + add_all_instances(); + break; + case 't': + /* Force to use top instance */ + topt = 1; + instance = &top_instance; + break; + default: + usage(argv); + } + } + update_first_instance(instance, topt); + tracecmd_disable_tracing(); + exit(0); +} + +void trace_restart(int argc, char **argv) +{ + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + for (;;) { + int c; + + c = getopt(argc-1, argv+1, "hatB:"); + if (c == -1) + break; + switch (c) { + case 'h': + usage(argv); + break; + case 'B': + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + break; + case 'a': + add_all_instances(); + break; + case 't': + /* Force to use top instance */ + topt = 1; + instance = &top_instance; + break; + default: + usage(argv); + } + + } + update_first_instance(instance, topt); + tracecmd_enable_tracing(); + exit(0); +} + +void trace_reset(int argc, char **argv) +{ + int c; + int topt = 0; + struct buffer_instance *instance = &top_instance; + + init_top_instance(); + + /* if last arg is -a, then -b and -d apply to all instances */ + int last_specified_all = 0; + struct buffer_instance *inst; /* iterator */ + + while ((c = getopt(argc-1, argv+1, "hab:B:td")) >= 0) { + + switch (c) { + case 'h': + usage(argv); + break; + case 'b': + { + int size = atoi(optarg); + /* Min buffer size is 1 */ + if (size <= 1) + size = 1; + if (last_specified_all) { + for_each_instance(inst) { + inst->buffer_size = size; + } + } else { + instance->buffer_size = size; + } + break; + } + case 'B': + last_specified_all = 0; + instance = allocate_instance(optarg); + if (!instance) + die("Failed to create instance"); + add_instance(instance, local_cpu_count); + /* -d will remove keep */ + instance->flags |= BUFFER_FL_KEEP; + break; + case 't': + /* Force to use top instance */ + last_specified_all = 0; + topt = 1; + instance = &top_instance; + break; + case 'a': + last_specified_all = 1; + add_all_instances(); + for_each_instance(inst) { + inst->flags |= BUFFER_FL_KEEP; + } + break; + case 'd': + if (last_specified_all) { + for_each_instance(inst) { + inst->flags &= ~BUFFER_FL_KEEP; + } + } else { + if (is_top_instance(instance)) + die("Can not delete top level buffer"); + instance->flags &= ~BUFFER_FL_KEEP; + } + break; + } + } + update_first_instance(instance, topt); + tracecmd_disable_all_tracing(1); + set_buffer_size(); + clear_filters(); + clear_triggers(); + clear_all_dynamic_events(); + clear_error_log(); + /* set clock to "local" */ + reset_clock(); + reset_event_pid(); + reset_max_latency_instance(); + reset_cpu_mask(); + tracecmd_remove_instances(); + clear_func_filters(); + /* restore tracing_on to 1 */ + tracecmd_enable_tracing(); + exit(0); +} + +static void init_common_record_context(struct common_record_context *ctx, + enum trace_cmd curr_cmd) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->instance = &top_instance; + ctx->curr_cmd = curr_cmd; + local_cpu_count = tracecmd_count_cpus(); + ctx->file_version = tracecmd_default_file_version(); + init_top_instance(); +} + +#define IS_EXTRACT(ctx) ((ctx)->curr_cmd == CMD_extract) +#define IS_START(ctx) ((ctx)->curr_cmd == CMD_start) +#define IS_CMDSET(ctx) ((ctx)->curr_cmd == CMD_set) +#define IS_STREAM(ctx) ((ctx)->curr_cmd == CMD_stream) +#define IS_PROFILE(ctx) ((ctx)->curr_cmd == CMD_profile) +#define IS_RECORD(ctx) ((ctx)->curr_cmd == CMD_record) +#define IS_RECORD_AGENT(ctx) ((ctx)->curr_cmd == CMD_record_agent) + +static void add_argv(struct buffer_instance *instance, char *arg, bool prepend) +{ + instance->argv = realloc(instance->argv, + (instance->argc + 1) * sizeof(char *)); + if (!instance->argv) + die("Can not allocate instance args"); + if (prepend) { + memmove(instance->argv + 1, instance->argv, + instance->argc * sizeof(*instance->argv)); + instance->argv[0] = arg; + } else { + instance->argv[instance->argc] = arg; + } + instance->argc++; +} + +static void add_arg(struct buffer_instance *instance, + int c, const char *opts, + struct option *long_options, char *optarg) +{ + char *ptr, *arg; + int i, ret; + + /* Short or long arg */ + if (!(c & 0x80)) { + ptr = strchr(opts, c); + if (!ptr) + return; /* Not found? */ + ret = asprintf(&arg, "-%c", c); + if (ret < 0) + die("Can not allocate argument"); + add_argv(instance, arg, false); + if (ptr[1] == ':') { + arg = strdup(optarg); + if (!arg) + die("Can not allocate arguments"); + add_argv(instance, arg, false); + } + return; + } + for (i = 0; long_options[i].name; i++) { + if (c != long_options[i].val) + continue; + ret = asprintf(&arg, "--%s", long_options[i].name); + if (ret < 0) + die("Can not allocate argument"); + add_argv(instance, arg, false); + if (long_options[i].has_arg) { + arg = strdup(optarg); + if (!arg) + die("Can not allocate arguments"); + add_argv(instance, arg, false); + } + return; + } + /* Not found? */ +} + +static inline void cmd_check_die(struct common_record_context *ctx, + enum trace_cmd id, char *cmd, char *param) +{ + if (ctx->curr_cmd == id) + die("%s has no effect with the command %s\n" + "Did you mean 'record'?", param, cmd); +} + +static inline void remove_instances(struct buffer_instance *instances) +{ + struct buffer_instance *del; + + while (instances) { + del = instances; + instances = instances->next; + free(del->name); + tracefs_instance_destroy(del->tracefs); + tracefs_instance_free(del->tracefs); + free(del); + } +} + +static inline void +check_instance_die(struct buffer_instance *instance, char *param) +{ + if (instance->delete) + die("Instance %s is marked for deletion, invalid option %s", + tracefs_instance_get_name(instance->tracefs), param); +} + +static bool clock_is_supported(struct tracefs_instance *instance, const char *clock) +{ + char *all_clocks = NULL; + char *ret = NULL; + + all_clocks = tracefs_instance_file_read(instance, "trace_clock", NULL); + if (!all_clocks) + return false; + + ret = strstr(all_clocks, clock); + if (ret && (ret == all_clocks || ret[-1] == ' ' || ret[-1] == '[')) { + switch (ret[strlen(clock)]) { + case ' ': + case '\0': + case ']': + case '\n': + break; + default: + ret = NULL; + } + } else { + ret = NULL; + } + free(all_clocks); + + return ret != NULL; +} + +#ifdef PERF +static int get_tsc_nsec(int *shift, int *mult) +{ + static int cpu_shift, cpu_mult; + static int supported; + int cpus = tracecmd_count_cpus(); + struct trace_perf perf; + int i; + + if (supported) + goto out; + + supported = -1; + if (trace_perf_init(&perf, 1, 0, getpid())) + return -1; + if (trace_perf_open(&perf)) + return -1; + cpu_shift = perf.mmap->time_shift; + cpu_mult = perf.mmap->time_mult; + for (i = 1; i < cpus; i++) { + trace_perf_close(&perf); + if (trace_perf_init(&perf, 1, i, getpid())) + break; + if (trace_perf_open(&perf)) + break; + if (perf.mmap->time_shift != cpu_shift || + perf.mmap->time_mult != cpu_mult) { + warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d", + i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift); + break; + } + } + trace_perf_close(&perf); + if (i < cpus) + return -1; + + if (cpu_shift || cpu_mult) + supported = 1; +out: + if (supported < 0) + return -1; + + if (shift) + *shift = cpu_shift; + if (mult) + *mult = cpu_mult; + + return 0; +} +#else +static int get_tsc_nsec(int *shift, int *mult) +{ + return -1; +} +#endif + +bool trace_tsc2nsec_is_supported(void) +{ + return get_tsc_nsec(NULL, NULL) == 0; +} + +static void parse_record_options(int argc, + char **argv, + enum trace_cmd curr_cmd, + struct common_record_context *ctx) +{ + const char *plugin = NULL; + const char *option; + struct event_list *event = NULL; + struct event_list *last_event = NULL; + struct addrinfo *result; + char *pids; + char *pid; + char *sav; + int name_counter = 0; + int negative = 0; + struct buffer_instance *instance, *del_list = NULL; + int do_children = 0; + int fpids_count = 0; + + init_common_record_context(ctx, curr_cmd); + + if (IS_CMDSET(ctx)) + keep = 1; + + for (;;) { + int option_index = 0; + int ret; + int c; + const char *opts; + static struct option long_options[] = { + {"date", no_argument, NULL, OPT_date}, + {"func-stack", no_argument, NULL, OPT_funcstack}, + {"nosplice", no_argument, NULL, OPT_nosplice}, + {"nofifos", no_argument, NULL, OPT_nofifos}, + {"profile", no_argument, NULL, OPT_profile}, + {"stderr", no_argument, NULL, OPT_stderr}, + {"by-comm", no_argument, NULL, OPT_bycomm}, + {"ts-offset", required_argument, NULL, OPT_tsoffset}, + {"max-graph-depth", required_argument, NULL, OPT_max_graph_depth}, + {"cmdlines-size", required_argument, NULL, OPT_cmdlines_size}, + {"no-filter", no_argument, NULL, OPT_no_filter}, + {"debug", no_argument, NULL, OPT_debug}, + {"quiet", no_argument, NULL, OPT_quiet}, + {"help", no_argument, NULL, '?'}, + {"proc-map", no_argument, NULL, OPT_procmap}, + {"user", required_argument, NULL, OPT_user}, + {"module", required_argument, NULL, OPT_module}, + {"tsync-interval", required_argument, NULL, OPT_tsyncinterval}, + {"fork", no_argument, NULL, OPT_fork}, + {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec}, + {"poll", no_argument, NULL, OPT_poll}, + {"name", required_argument, NULL, OPT_name}, + {"verbose", optional_argument, NULL, OPT_verbose}, + {"compression", required_argument, NULL, OPT_compression}, + {"file-version", required_argument, NULL, OPT_file_ver}, + {NULL, 0, NULL, 0} + }; + + if (IS_EXTRACT(ctx)) + opts = "+haf:Fp:co:O:sr:g:l:n:P:N:tb:B:ksiT"; + else + opts = "+hae:f:FA:p:cC:dDGo:O:s:r:V:vg:l:n:P:N:tb:R:B:ksSiTm:M:H:q"; + c = getopt_long (argc-1, argv+1, opts, long_options, &option_index); + if (c == -1) + break; + + /* + * If the current instance is to record a guest, then save + * all the arguments for this instance. + */ + if (c != 'B' && c != 'A' && c != OPT_name && is_guest(ctx->instance)) { + add_arg(ctx->instance, c, opts, long_options, optarg); + if (c == 'C') + ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; + continue; + } + + switch (c) { + case 'h': + usage(argv); + break; + case 'a': + cmd_check_die(ctx, CMD_set, *(argv+1), "-a"); + if (IS_EXTRACT(ctx)) { + add_all_instances(); + } else { + ctx->record_all = 1; + record_all_events(); + } + break; + case 'e': + check_instance_die(ctx->instance, "-e"); + ctx->events = 1; + event = malloc(sizeof(*event)); + if (!event) + die("Failed to allocate event %s", optarg); + memset(event, 0, sizeof(*event)); + event->event = optarg; + add_event(ctx->instance, event); + event->neg = negative; + event->filter = NULL; + last_event = event; + + if (!ctx->record_all) + list_event(optarg); + break; + case 'f': + if (!last_event) + die("filter must come after event"); + if (last_event->filter) { + last_event->filter = + realloc(last_event->filter, + strlen(last_event->filter) + + strlen("&&()") + + strlen(optarg) + 1); + strcat(last_event->filter, "&&("); + strcat(last_event->filter, optarg); + strcat(last_event->filter, ")"); + } else { + ret = asprintf(&last_event->filter, "(%s)", optarg); + if (ret < 0) + die("Failed to allocate filter %s", optarg); + } + break; + + case 'R': + if (!last_event) + die("trigger must come after event"); + add_trigger(event, optarg); + break; + + case OPT_name: + if (!ctx->instance) + die("No instance defined for name option\n"); + if (!is_guest(ctx->instance)) + die(" --name is only used for -A options\n"); + free(ctx->instance->name); + ctx->instance->name = strdup(optarg); + if (!ctx->instance->name) + die("Failed to allocate name"); + break; + + case 'A': { + char *name = NULL; + int cid = -1, port = -1; + + if (!IS_RECORD(ctx)) + die("-A is only allowed for record operations"); + + name = parse_guest_name(optarg, &cid, &port, &result); + if (cid == -1 && !result) + die("guest %s not found", optarg); + if (port == -1) + port = TRACE_AGENT_DEFAULT_PORT; + if (!name || !*name) { + ret = asprintf(&name, "unnamed-%d", name_counter++); + if (ret < 0) + name = NULL; + } else { + /* Needs to be allocate */ + name = strdup(name); + } + if (!name) + die("Failed to allocate guest name"); + + ctx->instance = allocate_instance(name); + if (!ctx->instance) + die("Failed to allocate instance"); + + if (result) { + ctx->instance->flags |= BUFFER_FL_NETWORK; + ctx->instance->port_type = USE_TCP; + } + + ctx->instance->flags |= BUFFER_FL_GUEST; + ctx->instance->result = result; + ctx->instance->cid = cid; + ctx->instance->port = port; + ctx->instance->name = name; + add_instance(ctx->instance, 0); + ctx->data_flags |= DATA_FL_GUEST; + break; + } + case 'F': + test_set_event_pid(ctx->instance); + filter_task = 1; + break; + case 'G': + cmd_check_die(ctx, CMD_set, *(argv+1), "-G"); + ctx->global = 1; + break; + case 'P': + check_instance_die(ctx->instance, "-P"); + test_set_event_pid(ctx->instance); + pids = strdup(optarg); + if (!pids) + die("strdup"); + pid = strtok_r(pids, ",", &sav); + while (pid) { + fpids_count += add_filter_pid(ctx->instance, + atoi(pid), 0); + pid = strtok_r(NULL, ",", &sav); + ctx->instance->nr_process_pids++; + } + ctx->instance->process_pids = ctx->instance->filter_pids; + free(pids); + break; + case 'c': + check_instance_die(ctx->instance, "-c"); + test_set_event_pid(ctx->instance); + do_children = 1; + if (!ctx->instance->have_event_fork) { +#ifdef NO_PTRACE + die("-c invalid: ptrace not supported"); +#endif + do_ptrace = 1; + ctx->instance->ptrace_child = 1; + + } else { + save_option(ctx->instance, "event-fork"); + } + if (ctx->instance->have_func_fork) + save_option(ctx->instance, "function-fork"); + break; + case 'C': + check_instance_die(ctx->instance, "-C"); + if (strcmp(optarg, TSCNSEC_CLOCK) == 0) { + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret) + die("TSC to nanosecond is not supported"); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + ctx->instance->clock = TSC_CLOCK; + } else { + ctx->instance->clock = optarg; + } + if (!clock_is_supported(NULL, ctx->instance->clock)) + die("Clock %s is not supported", ctx->instance->clock); + ctx->instance->clock = strdup(ctx->instance->clock); + if (!ctx->instance->clock) + die("Failed allocation"); + ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; + if (!ctx->clock && !is_guest(ctx->instance)) + ctx->clock = ctx->instance->clock; + break; + case 'v': + negative = 1; + break; + case 'l': + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'n': + check_instance_die(ctx->instance, "-n"); + add_func(&ctx->instance->notrace_funcs, + ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'g': + check_instance_die(ctx->instance, "-g"); + add_func(&graph_funcs, ctx->instance->filter_mod, optarg); + ctx->filtered = 1; + break; + case 'p': + check_instance_die(ctx->instance, "-p"); + if (ctx->instance->plugin) + die("only one plugin allowed"); + for (plugin = optarg; isspace(*plugin); plugin++) + ; + ctx->instance->plugin = plugin; + for (optarg += strlen(optarg) - 1; + optarg > plugin && isspace(*optarg); optarg--) + ; + optarg++; + optarg[0] = '\0'; + break; + case 'D': + ctx->total_disable = 1; + /* fall through */ + case 'd': + ctx->disable = 1; + break; + case 'o': + cmd_check_die(ctx, CMD_set, *(argv+1), "-o"); + if (IS_RECORD_AGENT(ctx)) + die("-o incompatible with agent recording"); + if (host) + die("-o incompatible with -N"); + if (IS_START(ctx)) + die("start does not take output\n" + "Did you mean 'record'?"); + if (IS_STREAM(ctx)) + die("stream does not take output\n" + "Did you mean 'record'?"); + if (ctx->output) + die("only one output file allowed"); + ctx->output = optarg; + + if (IS_PROFILE(ctx)) { + int fd; + + /* pipe the output to this file instead of stdout */ + save_stdout = dup(1); + close(1); + fd = open(optarg, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + die("can't write to %s", optarg); + if (fd != 1) { + dup2(fd, 1); + close(fd); + } + } + break; + case 'O': + check_instance_die(ctx->instance, "-O"); + option = optarg; + save_option(ctx->instance, option); + break; + case 'T': + check_instance_die(ctx->instance, "-T"); + save_option(ctx->instance, "stacktrace"); + break; + case 'H': + cmd_check_die(ctx, CMD_set, *(argv+1), "-H"); + check_instance_die(ctx->instance, "-H"); + add_hook(ctx->instance, optarg); + ctx->events = 1; + break; + case 's': + cmd_check_die(ctx, CMD_set, *(argv+1), "-s"); + if (IS_EXTRACT(ctx)) { + if (optarg) + usage(argv); + recorder_flags |= TRACECMD_RECORD_SNAPSHOT; + break; + } + if (!optarg) + usage(argv); + sleep_time = atoi(optarg); + break; + case 'S': + cmd_check_die(ctx, CMD_set, *(argv+1), "-S"); + ctx->manual = 1; + /* User sets events for profiling */ + if (!event) + ctx->events = 0; + break; + case 'r': + cmd_check_die(ctx, CMD_set, *(argv+1), "-r"); + rt_prio = atoi(optarg); + break; + case 'N': + cmd_check_die(ctx, CMD_set, *(argv+1), "-N"); + if (!IS_RECORD(ctx)) + die("-N only available with record"); + if (IS_RECORD_AGENT(ctx)) + die("-N incompatible with agent recording"); + if (ctx->output) + die("-N incompatible with -o"); + host = optarg; + break; + case 'V': + cmd_check_die(ctx, CMD_set, *(argv+1), "-V"); + if (!IS_RECORD(ctx)) + die("-V only available with record"); + if (IS_RECORD_AGENT(ctx)) + die("-V incompatible with agent recording"); + if (ctx->output) + die("-V incompatible with -o"); + host = optarg; + ctx->instance->port_type = USE_VSOCK; + break; + case 'm': + if (max_kb) + die("-m can only be specified once"); + if (!IS_RECORD(ctx)) + die("only record take 'm' option"); + max_kb = atoi(optarg); + break; + case 'M': + check_instance_die(ctx->instance, "-M"); + ctx->instance->cpumask = alloc_mask_from_hex(ctx->instance, optarg); + break; + case 't': + cmd_check_die(ctx, CMD_set, *(argv+1), "-t"); + if (IS_EXTRACT(ctx)) + ctx->topt = 1; /* Extract top instance also */ + else + ctx->instance->port_type = USE_TCP; + break; + case 'b': + check_instance_die(ctx->instance, "-b"); + ctx->instance->buffer_size = atoi(optarg); + break; + case 'B': + ctx->instance = allocate_instance(optarg); + if (!ctx->instance) + die("Failed to create instance"); + ctx->instance->delete = negative; + negative = 0; + if (ctx->instance->delete) { + ctx->instance->next = del_list; + del_list = ctx->instance; + } else + add_instance(ctx->instance, local_cpu_count); + if (IS_PROFILE(ctx)) + ctx->instance->flags |= BUFFER_FL_PROFILE; + break; + case 'k': + cmd_check_die(ctx, CMD_set, *(argv+1), "-k"); + keep = 1; + break; + case 'i': + ignore_event_not_found = 1; + break; + case OPT_user: + ctx->user = strdup(optarg); + if (!ctx->user) + die("Failed to allocate user name"); + break; + case OPT_procmap: + cmd_check_die(ctx, CMD_start, *(argv+1), "--proc-map"); + cmd_check_die(ctx, CMD_set, *(argv+1), "--proc-map"); + check_instance_die(ctx->instance, "--proc-map"); + ctx->instance->get_procmap = 1; + break; + case OPT_date: + cmd_check_die(ctx, CMD_set, *(argv+1), "--date"); + ctx->date = 1; + if (ctx->data_flags & DATA_FL_OFFSET) + die("Can not use both --date and --ts-offset"); + ctx->data_flags |= DATA_FL_DATE; + break; + case OPT_funcstack: + func_stack = 1; + break; + case OPT_nosplice: + cmd_check_die(ctx, CMD_set, *(argv+1), "--nosplice"); + recorder_flags |= TRACECMD_RECORD_NOSPLICE; + break; + case OPT_nofifos: + cmd_check_die(ctx, CMD_set, *(argv+1), "--nofifos"); + no_fifos = true; + break; + case OPT_profile: + cmd_check_die(ctx, CMD_set, *(argv+1), "--profile"); + check_instance_die(ctx->instance, "--profile"); + handle_init = trace_init_profile; + ctx->instance->flags |= BUFFER_FL_PROFILE; + ctx->events = 1; + break; + case OPT_stderr: + /* if -o was used (for profile), ignore this */ + if (save_stdout >= 0) + break; + save_stdout = dup(1); + close(1); + dup2(2, 1); + break; + case OPT_bycomm: + cmd_check_die(ctx, CMD_set, *(argv+1), "--by-comm"); + trace_profile_set_merge_like_comms(); + break; + case OPT_tsoffset: + cmd_check_die(ctx, CMD_set, *(argv+1), "--ts-offset"); + ctx->date2ts = strdup(optarg); + if (ctx->data_flags & DATA_FL_DATE) + die("Can not use both --date and --ts-offset"); + ctx->data_flags |= DATA_FL_OFFSET; + break; + case OPT_max_graph_depth: + check_instance_die(ctx->instance, "--max-graph-depth"); + free(ctx->instance->max_graph_depth); + ctx->instance->max_graph_depth = strdup(optarg); + if (!ctx->instance->max_graph_depth) + die("Could not allocate option"); + break; + case OPT_cmdlines_size: + ctx->saved_cmdlines_size = atoi(optarg); + break; + case OPT_no_filter: + cmd_check_die(ctx, CMD_set, *(argv+1), "--no-filter"); + no_filter = true; + break; + case OPT_debug: + tracecmd_set_debug(true); + break; + case OPT_module: + check_instance_die(ctx->instance, "--module"); + if (ctx->instance->filter_mod) + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, "*"); + ctx->instance->filter_mod = optarg; + ctx->filtered = 0; + break; + case OPT_tsyncinterval: + cmd_check_die(ctx, CMD_set, *(argv+1), "--tsync-interval"); + ctx->tsync_loop_interval = atoi(optarg); + break; + case OPT_fork: + if (!IS_START(ctx)) + die("--fork option used for 'start' command only"); + fork_process = true; + break; + case OPT_tsc2nsec: + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret) + die("TSC to nanosecond is not supported"); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + break; + case OPT_poll: + cmd_check_die(ctx, CMD_set, *(argv+1), "--poll"); + recorder_flags |= TRACECMD_RECORD_POLL; + break; + case OPT_compression: + cmd_check_die(ctx, CMD_start, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_set, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_extract, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_stream, *(argv+1), "--compression"); + cmd_check_die(ctx, CMD_profile, *(argv+1), "--compression"); + if (strcmp(optarg, "any") && strcmp(optarg, "none") && + !tracecmd_compress_is_supported(optarg, NULL)) + die("Compression algorithm %s is not supported", optarg); + ctx->compression = strdup(optarg); + break; + case OPT_file_ver: + if (ctx->curr_cmd != CMD_record && ctx->curr_cmd != CMD_record_agent) + die("--file_version has no effect with the command %s\n", + *(argv+1)); + ctx->file_version = atoi(optarg); + if (ctx->file_version < FILE_VERSION_MIN || + ctx->file_version > FILE_VERSION_MAX) + die("Unsupported file version %d, " + "supported versions are from %d to %d", + ctx->file_version, FILE_VERSION_MIN, FILE_VERSION_MAX); + break; + case OPT_quiet: + case 'q': + quiet = true; + break; + case OPT_verbose: + if (trace_set_verbose(optarg) < 0) + die("invalid verbose level %s", optarg); + break; + default: + usage(argv); + } + } + + remove_instances(del_list); + + /* If --date is specified, prepend it to all guest VM flags */ + if (ctx->date) { + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + add_argv(instance, "--date", true); + } + } + + if (!ctx->filtered && ctx->instance->filter_mod) + add_func(&ctx->instance->filter_funcs, + ctx->instance->filter_mod, "*"); + + if (do_children && !filter_task && !fpids_count) + die(" -c can only be used with -F (or -P with event-fork support)"); + + if ((argc - optind) >= 2) { + if (IS_EXTRACT(ctx)) + die("Command extract does not take any commands\n" + "Did you mean 'record'?"); + ctx->run_command = 1; + } + if (ctx->user && !ctx->run_command) + warning("--user %s is ignored, no command is specified", + ctx->user); + + if (top_instance.get_procmap) { + /* use ptrace to get procmap on the command exit */ + if (ctx->run_command) { + do_ptrace = 1; + } else if (!top_instance.nr_filter_pids) { + warning("--proc-map is ignored for top instance, " + "no command or filtered PIDs are specified."); + top_instance.get_procmap = 0; + } + } + + for_all_instances(instance) { + if (instance->get_procmap && !instance->nr_filter_pids) { + warning("--proc-map is ignored for instance %s, " + "no filtered PIDs are specified.", + tracefs_instance_get_name(instance->tracefs)); + instance->get_procmap = 0; + } + } +} + +static enum trace_type get_trace_cmd_type(enum trace_cmd cmd) +{ + const static struct { + enum trace_cmd cmd; + enum trace_type ttype; + } trace_type_per_command[] = { + {CMD_record, TRACE_TYPE_RECORD}, + {CMD_stream, TRACE_TYPE_STREAM}, + {CMD_extract, TRACE_TYPE_EXTRACT}, + {CMD_profile, TRACE_TYPE_STREAM}, + {CMD_start, TRACE_TYPE_START}, + {CMD_record_agent, TRACE_TYPE_RECORD}, + {CMD_set, TRACE_TYPE_SET} + }; + + for (int i = 0; i < ARRAY_SIZE(trace_type_per_command); i++) { + if (trace_type_per_command[i].cmd == cmd) + return trace_type_per_command[i].ttype; + } + + die("Trace type UNKNOWN for the given cmd_fun"); +} + +static void finalize_record_trace(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + + if (keep) + return; + + update_reset_files(); + update_reset_triggers(); + if (clear_function_filters) + clear_func_filters(); + + set_plugin("nop"); + + tracecmd_remove_instances(); + + /* If tracing_on was enabled before we started, set it on now */ + for_all_instances(instance) { + if (instance->flags & BUFFER_FL_KEEP) + write_tracing_on(instance, + instance->tracing_on_init_val); + if (is_agent(instance)) { + tracecmd_msg_send_close_resp_msg(instance->msg_handle); + tracecmd_output_close(instance->network_handle); + } + } + + if (host) + tracecmd_output_close(ctx->instance->network_handle); +} + +static bool has_local_instances(void) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance)) + continue; + if (host && instance->msg_handle) + continue; + return true; + } + return false; +} + +static void set_tsync_params(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + int shift, mult; + bool force_tsc = false; + char *clock = NULL; + + if (!ctx->clock) { + /* + * If no clock is configured && + * KVM time sync protocol is available && + * there is information of each guest PID process && + * tsc-x86 clock is supported && + * TSC to nsec multiplier and shift are available: + * force using the x86-tsc clock for this host-guest tracing session + * and store TSC to nsec multiplier and shift. + */ + if (tsync_proto_is_supported("kvm") && + trace_have_guests_pid() && + clock_is_supported(NULL, TSC_CLOCK) && + !get_tsc_nsec(&shift, &mult) && mult) { + clock = strdup(TSC_CLOCK); + if (!clock) + die("Cannot not allocate clock"); + ctx->tsc2nsec.mult = mult; + ctx->tsc2nsec.shift = shift; + force_tsc = true; + } else { /* Use the current clock of the first host instance */ + clock = get_trace_clock(true); + } + } else { + clock = strdup(ctx->clock); + if (!clock) + die("Cannot not allocate clock"); + } + + if (!clock && !ctx->tsync_loop_interval) + goto out; + for_all_instances(instance) { + if (clock && !(instance->flags & BUFFER_FL_HAS_CLOCK)) { + /* use the same clock in all tracing peers */ + if (is_guest(instance)) { + if (!instance->clock) { + instance->clock = strdup(clock); + if (!instance->clock) + die("Can not allocate instance clock"); + } + add_argv(instance, (char *)instance->clock, true); + add_argv(instance, "-C", true); + if (ctx->tsc2nsec.mult) + instance->flags |= BUFFER_FL_TSC2NSEC; + } else if (force_tsc && !instance->clock) { + instance->clock = strdup(clock); + if (!instance->clock) + die("Can not allocate instance clock"); + } + } + instance->tsync_loop_interval = ctx->tsync_loop_interval; + } +out: + free(clock); +} + +static void record_trace(int argc, char **argv, + struct common_record_context *ctx) +{ + enum trace_type type = get_trace_cmd_type(ctx->curr_cmd); + struct buffer_instance *instance; + struct filter_pids *pid; + + /* + * If top_instance doesn't have any plugins or events, then + * remove it from being processed. + */ + if (!__check_doing_something(&top_instance) && !filter_task) + first_instance = buffer_instances; + else + ctx->topt = 1; + + update_first_instance(ctx->instance, ctx->topt); + if (!IS_CMDSET(ctx)) { + check_doing_something(); + check_function_plugin(); + } + + if (!ctx->output) + ctx->output = DEFAULT_INPUT_FILE; + + if (ctx->data_flags & DATA_FL_GUEST) + set_tsync_params(ctx); + + make_instances(); + + /* Save the state of tracing_on before starting */ + for_all_instances(instance) { + instance->output_file = strdup(ctx->output); + if (!instance->output_file) + die("Failed to allocate output file name for instance"); + if (!ctx->manual && instance->flags & BUFFER_FL_PROFILE) + enable_profile(instance); + + instance->tracing_on_init_val = read_tracing_on(instance); + /* Some instances may not be created yet */ + if (instance->tracing_on_init_val < 0) + instance->tracing_on_init_val = 1; + } + + if (ctx->events) + expand_event_list(); + + page_size = getpagesize(); + + if (!is_guest(ctx->instance)) + fset = set_ftrace(ctx->instance, !ctx->disable, ctx->total_disable); + if (!IS_CMDSET(ctx)) + tracecmd_disable_all_tracing(1); + + for_all_instances(instance) + set_clock(ctx, instance); + + + /* Record records the date first */ + if (ctx->date && + ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx))) + ctx->date2ts = get_date_to_ts(); + + for_all_instances(instance) { + set_funcs(instance); + set_mask(instance); + } + + if (ctx->events) { + for_all_instances(instance) + enable_events(instance); + } + + set_saved_cmdlines_size(ctx); + set_buffer_size(); + update_plugins(type); + set_options(); + + for_all_instances(instance) { + if (instance->max_graph_depth) { + set_max_graph_depth(instance, instance->max_graph_depth); + free(instance->max_graph_depth); + instance->max_graph_depth = NULL; + } + } + + allocate_seq(); + + if (type & (TRACE_TYPE_RECORD | TRACE_TYPE_STREAM)) { + signal(SIGINT, finish); + if (!latency) + start_threads(type, ctx); + } + + if (ctx->run_command) { + run_cmd(type, ctx->user, (argc - optind) - 1, &argv[optind + 1]); + } else if (ctx->instance && is_agent(ctx->instance)) { + update_task_filter(); + tracecmd_enable_tracing(); + tracecmd_msg_wait_close(ctx->instance->msg_handle); + } else { + bool pwait = false; + bool wait_indefinitely = false; + + update_task_filter(); + + if (!IS_CMDSET(ctx)) + tracecmd_enable_tracing(); + + if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) + exit(0); + + /* We don't ptrace ourself */ + if (do_ptrace) { + for_all_instances(instance) { + for (pid = instance->filter_pids; pid; pid = pid->next) { + if (!pid->exclude && instance->ptrace_child) { + ptrace_attach(instance, pid->pid); + pwait = true; + } + } + } + } + /* sleep till we are woken with Ctrl^C */ + printf("Hit Ctrl^C to stop recording\n"); + for_all_instances(instance) { + /* If an instance is not tracing individual processes + * or there is an error while waiting for a process to + * exit, fallback to waiting indefinitely. + */ + if (!instance->nr_process_pids || + trace_wait_for_processes(instance)) + wait_indefinitely = true; + } + while (!finished && wait_indefinitely) + trace_or_sleep(type, pwait); + } + + tell_guests_to_stop(ctx); + tracecmd_disable_tracing(); + if (!latency) + stop_threads(type); + + record_stats(); + + if (!latency) + wait_threads(); + + if (IS_RECORD(ctx)) { + record_data(ctx); + delete_thread_data(); + } else + print_stats(); + + if (!keep) + tracecmd_disable_all_tracing(0); + + destroy_stats(); + finalize_record_trace(ctx); +} + +/* + * This function contains common code for the following commands: + * record, start, stream, profile. + */ +static void record_trace_command(int argc, char **argv, + struct common_record_context *ctx) +{ + tracecmd_tsync_init(); + record_trace(argc, argv, ctx); +} + +void trace_start(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_start, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_set(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_set, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_extract(int argc, char **argv) +{ + struct common_record_context ctx; + struct buffer_instance *instance; + enum trace_type type; + + parse_record_options(argc, argv, CMD_extract, &ctx); + + type = get_trace_cmd_type(ctx.curr_cmd); + + update_first_instance(ctx.instance, 1); + check_function_plugin(); + + if (!ctx.output) + ctx.output = DEFAULT_INPUT_FILE; + + /* Save the state of tracing_on before starting */ + for_all_instances(instance) { + instance->output_file = strdup(ctx.output); + if (!instance->output_file) + die("Failed to allocate output file name for instance"); + + if (!ctx.manual && instance->flags & BUFFER_FL_PROFILE) + enable_profile(ctx.instance); + + instance->tracing_on_init_val = read_tracing_on(instance); + /* Some instances may not be created yet */ + if (instance->tracing_on_init_val < 0) + instance->tracing_on_init_val = 1; + } + + /* Extracting data records all events in the system. */ + if (!ctx.record_all) + record_all_events(); + + if (ctx.events) + expand_event_list(); + + page_size = getpagesize(); + update_plugins(type); + set_options(); + + for_all_instances(instance) { + if (instance->max_graph_depth) { + set_max_graph_depth(instance, instance->max_graph_depth); + free(instance->max_graph_depth); + instance->max_graph_depth = NULL; + } + } + + allocate_seq(); + flush_threads(); + record_stats(); + + if (!keep) + tracecmd_disable_all_tracing(0); + + /* extract records the date after extraction */ + if (ctx.date) { + /* + * We need to start tracing, don't let other traces + * screw with our trace_marker. + */ + tracecmd_disable_all_tracing(1); + ctx.date2ts = get_date_to_ts(); + } + + record_data(&ctx); + delete_thread_data(); + destroy_stats(); + finalize_record_trace(&ctx); + exit(0); +} + +void trace_stream(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_stream, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +void trace_profile(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_profile, &ctx); + + handle_init = trace_init_profile; + ctx.events = 1; + + /* + * If no instances were set, then enable profiling on the top instance. + */ + if (!buffer_instances) + top_instance.flags |= BUFFER_FL_PROFILE; + + record_trace_command(argc, argv, &ctx); + do_trace_profile(); + exit(0); +} + +void trace_record(int argc, char **argv) +{ + struct common_record_context ctx; + + parse_record_options(argc, argv, CMD_record, &ctx); + record_trace_command(argc, argv, &ctx); + exit(0); +} + +int trace_record_agent(struct tracecmd_msg_handle *msg_handle, + int cpus, int *fds, + int argc, char **argv, + bool use_fifos, + unsigned long long trace_id, const char *host) +{ + struct common_record_context ctx; + char **argv_plus; + + /* Reset optind for getopt_long */ + optind = 1; + /* + * argc is the number of elements in argv, but we need to convert + * argc and argv into "trace-cmd", "record", argv. + * where argc needs to grow by two. + */ + argv_plus = calloc(argc + 2, sizeof(char *)); + if (!argv_plus) + die("Failed to allocate record arguments"); + + argv_plus[0] = "trace-cmd"; + argv_plus[1] = "record"; + memmove(argv_plus + 2, argv, argc * sizeof(char *)); + argc += 2; + + parse_record_options(argc, argv_plus, CMD_record_agent, &ctx); + if (ctx.run_command) + return -EINVAL; + + ctx.instance->fds = fds; + ctx.instance->use_fifos = use_fifos; + ctx.instance->flags |= BUFFER_FL_AGENT; + ctx.instance->msg_handle = msg_handle; + ctx.instance->host = host; + msg_handle->version = V3_PROTOCOL; + top_instance.trace_id = trace_id; + record_trace(argc, argv, &ctx); + + free(argv_plus); + return 0; +} |