diff options
author | Adrian Salido <salidoa@google.com> | 2016-10-14 19:01:36 +0000 |
---|---|---|
committer | Android Partner Code Review <android-gerrit-partner@google.com> | 2016-10-14 19:01:36 +0000 |
commit | 179907a66a7c048c20a6688dcc9f0fa11b5852de (patch) | |
tree | 11d75fd1c06eab6846b3aba4e3af63f4e5177604 | |
parent | 7b116218b91ca680a1a473cdd57734a53a1c87e6 (diff) | |
parent | 159f284256e6a4c2d2555e8de81656ee51cb250e (diff) | |
download | tegra-179907a66a7c048c20a6688dcc9f0fa11b5852de.tar.gz |
Merge changes Ibdc46d20,Icbcc37c2,I8c5aa13b,I10a0c2aa,Iebc150f6, ... into android-chromeos-dragon-3.18-nyc
* changes:
ecryptfs: don't allow mmap when the lower fs doesn't support it
Revert "ecryptfs: forbid opening files without mmap handler"
UPSTREAM: arm64: make sys_call_table const
ion: blacklist %p kptr_restrict
binder: blacklist %p kptr_restrict
BACKPORT: android: binder: fix binder mmap failures
BACKPORT: perf: Fix event->ctx locking
UPSTREAM: perf: Fix events installation during moving group
perf: protect group_leader from races that cause ctx double-free
UPSTREAM: staging/android/ion : fix a race condition in the ion driver
drm/tegra: Check args->num_syncpts limit
-rw-r--r-- | arch/arm64/kernel/sys.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/tegra/drm.c | 5 | ||||
-rw-r--r-- | drivers/staging/android/binder.c | 47 | ||||
-rw-r--r-- | drivers/staging/android/ion/ion.c | 69 | ||||
-rw-r--r-- | fs/ecryptfs/file.c | 15 | ||||
-rw-r--r-- | fs/ecryptfs/kthread.c | 13 | ||||
-rw-r--r-- | include/linux/perf_event.h | 6 | ||||
-rw-r--r-- | kernel/events/core.c | 263 |
8 files changed, 322 insertions, 98 deletions
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 3fa98ff14f0e..df20b7918854 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -50,7 +50,7 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * The sys_call_table array must be 4K aligned to be accessible from * kernel/entry.S. */ -void *sys_call_table[__NR_syscalls] __aligned(4096) = { +void * const sys_call_table[__NR_syscalls] __aligned(4096) = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include <asm/unistd.h> }; diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 8ba928c96d7b..0991569cf7fb 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -468,9 +468,10 @@ int tegra_drm_submit(struct tegra_drm_context *context, /* * Doesn't make sense to submit a job without synchronizing on at least - * one syncpt. + * one syncpt. Also check client num_syncpts limit. */ - if (!args->num_syncpts) + if (!args->num_syncpts || + (args->num_syncpts > context->client->base.num_syncpts)) return -EINVAL; job = host1x_job_alloc(context->channel, args->num_cmdbufs, diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index ed770e420824..c7415b4b4446 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -536,7 +536,7 @@ static void binder_insert_free_buffer(struct binder_proc *proc, new_buffer_size = binder_buffer_size(proc, new_buffer); binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: add free buffer, size %zd, at %p\n", + "%d: add free buffer, size %zd, at %pK\n", proc->pid, new_buffer_size, new_buffer); while (*p) { @@ -610,12 +610,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, { void *page_addr; unsigned long user_page_addr; - struct vm_struct tmp_area; struct page **page; struct mm_struct *mm; binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: %s pages %p-%p\n", proc->pid, + "%d: %s pages %pK-%pK\n", proc->pid, allocate ? "allocate" : "free", start, end); if (end <= start) @@ -649,7 +648,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, goto err_no_vma; } - memset(&tmp_area, 0, sizeof(tmp_area)); for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; @@ -658,15 +656,16 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, BUG_ON(*page); *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (*page == NULL) { - pr_err("%d: binder_alloc_buf failed for page at %p\n", + pr_err("%d: binder_alloc_buf failed for page at %pK\n", proc->pid, page_addr); goto err_alloc_page_failed; } - tmp_area.addr = page_addr; - tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; - ret = map_vm_area(&tmp_area, PAGE_KERNEL, page); - if (ret) { - pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", + ret = map_kernel_range_noflush((unsigned long)page_addr, + PAGE_SIZE, PAGE_KERNEL, page); + flush_cache_vmap((unsigned long)page_addr, + (unsigned long)page_addr + PAGE_SIZE); + if (ret != 1) { + pr_err("%d: binder_alloc_buf failed to map page at %pK in kernel\n", proc->pid, page_addr); goto err_map_kernel_failed; } @@ -776,7 +775,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, } binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got buffer %p size %zd\n", + "%d: binder_alloc_buf size %zd got buffer %pK size %zd\n", proc->pid, size, buffer, buffer_size); has_page_addr = @@ -806,7 +805,7 @@ static struct binder_buffer *binder_alloc_buf(struct binder_proc *proc, binder_insert_free_buffer(proc, new_buffer); } binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_alloc_buf size %zd got %p\n", + "%d: binder_alloc_buf size %zd got %pK\n", proc->pid, size, buffer); buffer->data_size = data_size; buffer->offsets_size = offsets_size; @@ -846,7 +845,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc, if (buffer_end_page(prev) == buffer_end_page(buffer)) free_page_end = 0; binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", + "%d: merge free, buffer %pK share page with %pK\n", proc->pid, buffer, prev); } @@ -859,14 +858,14 @@ static void binder_delete_free_buffer(struct binder_proc *proc, buffer_start_page(buffer)) free_page_start = 0; binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p share page with %p\n", + "%d: merge free, buffer %pK share page with %pK\n", proc->pid, buffer, prev); } } list_del(&buffer->entry); if (free_page_start || free_page_end) { binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p do not share page%s%s with %p or %p\n", + "%d: merge free, buffer %pK do not share page%s%s with %pK or %pK\n", proc->pid, buffer, free_page_start ? "" : " end", free_page_end ? "" : " start", prev, next); binder_update_page_range(proc, 0, free_page_start ? @@ -887,7 +886,7 @@ static void binder_free_buf(struct binder_proc *proc, ALIGN(buffer->offsets_size, sizeof(void *)); binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: binder_free_buf %p size %zd buffer_size %zd\n", + "%d: binder_free_buf %pK size %zd buffer_size %zd\n", proc->pid, buffer, size, buffer_size); BUG_ON(buffer->free); @@ -1317,7 +1316,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, int debug_id = buffer->debug_id; binder_debug(BINDER_DEBUG_TRANSACTION, - "%d buffer release %d, size %zd-%zd, failed at %p\n", + "%d buffer release %d, size %zd-%zd, failed at %pK\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, failed_at); @@ -2172,7 +2171,7 @@ static int binder_thread_write(struct binder_proc *proc, } } binder_debug(BINDER_DEBUG_DEAD_BINDER, - "%d:%d BC_DEAD_BINDER_DONE %016llx found %p\n", + "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", proc->pid, thread->pid, (u64)cookie, death); if (death == NULL) { @@ -2977,7 +2976,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) #ifdef CONFIG_CPU_CACHE_VIPT if (cache_is_vipt_aliasing()) { while (CACHE_COLOUR((vma->vm_start ^ (uint32_t)proc->buffer))) { - pr_info("binder_mmap: %d %lx-%lx maps %p bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); + pr_info("binder_mmap: %d %lx-%lx maps %pK bad alignment\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer); vma->vm_start += PAGE_SIZE; } } @@ -3013,7 +3012,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) proc->vma = vma; proc->vma_vm_mm = vma->vm_mm; - /*pr_info("binder_mmap: %d %lx-%lx maps %p\n", + /*pr_info("binder_mmap: %d %lx-%lx maps %pK\n", proc->pid, vma->vm_start, vma->vm_end, proc->buffer);*/ return 0; @@ -3239,7 +3238,7 @@ static void binder_deferred_release(struct binder_proc *proc) page_addr = proc->buffer + i * PAGE_SIZE; binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%s: %d: page %d at %p not freed\n", + "%s: %d: page %d at %pK not freed\n", __func__, proc->pid, i, page_addr); unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); __free_page(proc->pages[i]); @@ -3324,7 +3323,7 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, struct binder_transaction *t) { seq_printf(m, - "%s %d: %p from %d:%d to %d:%d code %x flags %x pri %ld r%d", + "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d", prefix, t->debug_id, t, t->from ? t->from->proc->pid : 0, t->from ? t->from->pid : 0, @@ -3338,7 +3337,7 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, if (t->buffer->target_node) seq_printf(m, " node %d", t->buffer->target_node->debug_id); - seq_printf(m, " size %zd:%zd data %p\n", + seq_printf(m, " size %zd:%zd data %pK\n", t->buffer->data_size, t->buffer->offsets_size, t->buffer->data); } @@ -3346,7 +3345,7 @@ static void print_binder_transaction(struct seq_file *m, const char *prefix, static void print_binder_buffer(struct seq_file *m, const char *prefix, struct binder_buffer *buffer) { - seq_printf(m, "%s %d: %p size %zd:%zd %s\n", + seq_printf(m, "%s %d: %pK size %zd:%zd %s\n", prefix, buffer->debug_id, buffer->data, buffer->data_size, buffer->offsets_size, buffer->transaction ? "active" : "delivered"); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index a22521df6480..06dcd41c4a50 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -420,13 +420,22 @@ static void ion_handle_get(struct ion_handle *handle) kref_get(&handle->ref); } -static int ion_handle_put(struct ion_handle *handle) +static int ion_handle_put_nolock(struct ion_handle *handle) +{ + int ret; + + ret = kref_put(&handle->ref, ion_handle_destroy); + + return ret; +} + +int ion_handle_put(struct ion_handle *handle) { struct ion_client *client = handle->client; int ret; mutex_lock(&client->lock); - ret = kref_put(&handle->ref, ion_handle_destroy); + ret = ion_handle_put_nolock(handle); mutex_unlock(&client->lock); return ret; @@ -450,20 +459,30 @@ static struct ion_handle *ion_handle_lookup(struct ion_client *client, return ERR_PTR(-EINVAL); } -static struct ion_handle *ion_handle_get_by_id(struct ion_client *client, +static struct ion_handle *ion_handle_get_by_id_nolock(struct ion_client *client, int id) { struct ion_handle *handle; - mutex_lock(&client->lock); handle = idr_find(&client->idr, id); if (handle) ion_handle_get(handle); - mutex_unlock(&client->lock); return handle ? handle : ERR_PTR(-EINVAL); } +struct ion_handle *ion_handle_get_by_id(struct ion_client *client, + int id) +{ + struct ion_handle *handle; + + mutex_lock(&client->lock); + handle = ion_handle_get_by_id_nolock(client, id); + mutex_unlock(&client->lock); + + return handle; +} + static bool ion_handle_validate(struct ion_client *client, struct ion_handle *handle) { @@ -571,22 +590,28 @@ struct ion_handle *ion_alloc(struct ion_client *client, size_t len, } EXPORT_SYMBOL(ion_alloc); -void ion_free(struct ion_client *client, struct ion_handle *handle) +static void ion_free_nolock(struct ion_client *client, struct ion_handle *handle) { bool valid_handle; BUG_ON(client != handle->client); - mutex_lock(&client->lock); valid_handle = ion_handle_validate(client, handle); if (!valid_handle) { WARN(1, "%s: invalid handle passed to free.\n", __func__); - mutex_unlock(&client->lock); return; } + ion_handle_put_nolock(handle); +} + +void ion_free(struct ion_client *client, struct ion_handle *handle) +{ + BUG_ON(client != handle->client); + + mutex_lock(&client->lock); + ion_free_nolock(client, handle); mutex_unlock(&client->lock); - ion_handle_put(handle); } EXPORT_SYMBOL(ion_free); @@ -1167,7 +1192,7 @@ static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment, } } - dev_warn(attachment->dev, "Not found a map(%p)\n", + dev_warn(attachment->dev, "Not found a map(%pK)\n", to_dma_iommu_mapping(attachment->dev)); mutex_unlock(&buffer->lock); @@ -1258,7 +1283,7 @@ static void ion_vm_open(struct vm_area_struct *vma) mutex_lock(&buffer->lock); list_add(&vma_list->list, &buffer->vmas); mutex_unlock(&buffer->lock); - pr_debug("%s: adding %p\n", __func__, vma); + pr_debug("%s: adding %pK\n", __func__, vma); } static void ion_vm_close(struct vm_area_struct *vma) @@ -1273,7 +1298,7 @@ static void ion_vm_close(struct vm_area_struct *vma) continue; list_del(&vma_list->list); kfree(vma_list); - pr_debug("%s: deleting %p\n", __func__, vma); + pr_debug("%s: deleting %pK\n", __func__, vma); break; } mutex_unlock(&buffer->lock); @@ -1395,7 +1420,7 @@ static int ion_dma_buf_set_private(struct dma_buf *dmabuf, struct device *dev, imp->delete = delete; out: mutex_unlock(&buffer->lock); - dev_dbg(dev, "%s() dmabuf=%p err=%d i=%d priv=%p\n", + dev_dbg(dev, "%s() dmabuf=%pK err=%d i=%d priv=%pK\n", __func__, dmabuf, err, i, priv); return err; } @@ -1418,7 +1443,7 @@ static void *ion_dma_buf_get_private(struct dma_buf *dmabuf, } } mutex_unlock(&buffer->lock); - dev_dbg(dev, "%s() dmabuf=%p i=%d priv=%p\n", + dev_dbg(dev, "%s() dmabuf=%pK i=%d priv=%pK\n", __func__, dmabuf, i, priv); return priv; } @@ -1445,13 +1470,13 @@ static void *ion_dma_buf_vmap(struct dma_buf *dmabuf) { void *addr = ion_dma_buf_kmap(dmabuf, 0); - pr_info("%s() %p\n", __func__, addr); + pr_info("%s() %pK\n", __func__, addr); return addr; } static void ion_dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) { - pr_info("%s() %p\n", __func__, vaddr); + pr_info("%s() %pK\n", __func__, vaddr); ion_dma_buf_kunmap(dmabuf, 0, vaddr); } @@ -1786,11 +1811,15 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct ion_handle *handle; - handle = ion_handle_get_by_id(client, data.handle.handle); - if (IS_ERR(handle)) + mutex_lock(&client->lock); + handle = ion_handle_get_by_id_nolock(client, data.handle.handle); + if (IS_ERR(handle)) { + mutex_unlock(&client->lock); return PTR_ERR(handle); - ion_free(client, handle); - ion_handle_put(handle); + } + ion_free_nolock(client, handle); + ion_handle_put_nolock(handle); + mutex_unlock(&client->lock); break; } case ION_IOC_SHARE: diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index f5bce9096555..644dc16fa11a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -177,6 +177,19 @@ out: return rc; } +static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct file *lower_file = ecryptfs_file_to_lower(file); + /* + * Don't allow mmap on top of file systems that don't support it + * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs + * allows recursive mounting, this will need to be extended. + */ + if (!lower_file->f_op->mmap) + return -ENODEV; + return generic_file_mmap(file, vma); +} + /** * ecryptfs_open * @inode: inode speciying file to open @@ -360,7 +373,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = generic_file_mmap, + .mmap = ecryptfs_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 9b661a4ccee7..f1ea610362c6 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -25,7 +25,6 @@ #include <linux/slab.h> #include <linux/wait.h> #include <linux/mount.h> -#include <linux/file.h> #include "ecryptfs_kernel.h" struct ecryptfs_open_req { @@ -148,7 +147,7 @@ int ecryptfs_privileged_open(struct file **lower_file, flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) - goto have_file; + goto out; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; @@ -166,16 +165,8 @@ int ecryptfs_privileged_open(struct file **lower_file, mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); wait_for_completion(&req.done); - if (IS_ERR(*lower_file)) { + if (IS_ERR(*lower_file)) rc = PTR_ERR(*lower_file); - goto out; - } -have_file: - if ((*lower_file)->f_op->mmap == NULL) { - fput(*lower_file); - *lower_file = NULL; - rc = -EMEDIUMTYPE; - } out: return rc; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 948b07d0cd71..15970d1ba73f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -337,6 +337,12 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; + + /* + * Protect the pmu, attributes and context of a group leader. + * Note: does not protect the pointer to the group_leader. + */ + struct mutex group_leader_mutex; struct pmu *pmu; enum perf_event_active_state state; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6ac16003c284..2cc213668cf6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -912,6 +912,77 @@ static void put_ctx(struct perf_event_context *ctx) } /* + * Because of perf_event::ctx migration in sys_perf_event_open::move_group and + * perf_pmu_migrate_context() we need some magic. + * + * Those places that change perf_event::ctx will hold both + * perf_event_ctx::mutex of the 'old' and 'new' ctx value. + * + * Lock ordering is by mutex address. There is one other site where + * perf_event_context::mutex nests and that is put_event(). But remember that + * that is a parent<->child context relation, and migration does not affect + * children, therefore these two orderings should not interact. + * + * The change in perf_event::ctx does not affect children (as claimed above) + * because the sys_perf_event_open() case will install a new event and break + * the ctx parent<->child relation, and perf_pmu_migrate_context() is only + * concerned with cpuctx and that doesn't have children. + * + * The places that change perf_event::ctx will issue: + * + * perf_remove_from_context(); + * synchronize_rcu(); + * perf_install_in_context(); + * + * to affect the change. The remove_from_context() + synchronize_rcu() should + * quiesce the event, after which we can install it in the new location. This + * means that only external vectors (perf_fops, prctl) can perturb the event + * while in transit. Therefore all such accessors should also acquire + * perf_event_context::mutex to serialize against this. + * + * However; because event->ctx can change while we're waiting to acquire + * ctx->mutex we must be careful and use the below perf_event_ctx_lock() + * function. + * + * Lock order: + * task_struct::perf_event_mutex + * perf_event_context::mutex + * perf_event_context::lock + * perf_event::child_mutex; + * perf_event::mmap_mutex + * mmap_sem + */ +static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event) +{ + struct perf_event_context *ctx; + +again: + rcu_read_lock(); + ctx = ACCESS_ONCE(event->ctx); + if (!atomic_inc_not_zero(&ctx->refcount)) { + rcu_read_unlock(); + goto again; + } + rcu_read_unlock(); + + mutex_lock(&ctx->mutex); + if (event->ctx != ctx) { + mutex_unlock(&ctx->mutex); + put_ctx(ctx); + goto again; + } + + return ctx; +} + +static void perf_event_ctx_unlock(struct perf_event *event, + struct perf_event_context *ctx) +{ + mutex_unlock(&ctx->mutex); + put_ctx(ctx); +} + +/* * This must be done under the ctx->lock, such as to serialize against * context_equiv(), therefore we cannot call put_ctx() since that might end up * calling scheduler related locks and ctx->lock nests inside those. @@ -1659,7 +1730,7 @@ int __perf_event_disable(void *info) * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ -void perf_event_disable(struct perf_event *event) +static void _perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -1700,6 +1771,19 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } + +/* + * Strictly speaking kernel users cannot create groups and therefore this + * interface does not need the perf_event_ctx_lock() magic. + */ +void perf_event_disable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_disable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, @@ -2163,7 +2247,7 @@ unlock: * perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. */ -void perf_event_enable(struct perf_event *event) +static void _perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -2219,9 +2303,21 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } + +/* + * See perf_event_disable(); + */ +void perf_event_enable(struct perf_event *event) +{ + struct perf_event_context *ctx; + + ctx = perf_event_ctx_lock(event); + _perf_event_enable(event); + perf_event_ctx_unlock(event, ctx); +} EXPORT_SYMBOL_GPL(perf_event_enable); -int perf_event_refresh(struct perf_event *event, int refresh) +static int _perf_event_refresh(struct perf_event *event, int refresh) { /* * not supported on inherited events @@ -2230,10 +2326,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) return -EINVAL; atomic_add(refresh, &event->event_limit); - perf_event_enable(event); + _perf_event_enable(event); return 0; } + +/* + * See perf_event_disable() + */ +int perf_event_refresh(struct perf_event *event, int refresh) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_event_refresh(event, refresh); + perf_event_ctx_unlock(event, ctx); + + return ret; +} EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, @@ -3426,7 +3537,16 @@ static void perf_remove_from_owner(struct perf_event *event) rcu_read_unlock(); if (owner) { - mutex_lock(&owner->perf_event_mutex); + /* + * If we're here through perf_event_exit_task() we're already + * holding ctx->mutex which would be an inversion wrt. the + * normal lock order. + * + * However we can safely take this lock because its the child + * ctx->mutex. + */ + mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); + /* * We have to re-check the event->owner field, if it is cleared * we raced with perf_event_exit_task(), acquiring the mutex @@ -3552,12 +3672,13 @@ static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, ret = -EFAULT; struct perf_event_context *ctx = leader->ctx; - u64 values[5]; + int n = 0, size = 0, ret; u64 count, enabled, running; + u64 values[5]; + + lockdep_assert_held(&ctx->mutex); - mutex_lock(&ctx->mutex); count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; @@ -3572,7 +3693,7 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - goto unlock; + return -EFAULT; ret = size; @@ -3586,14 +3707,11 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf + ret, values, size)) { - ret = -EFAULT; - goto unlock; + return -EFAULT; } ret += size; } -unlock: - mutex_unlock(&ctx->mutex); return ret; } @@ -3665,8 +3783,14 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + int ret; - return perf_read_hw(event, buf, count); + ctx = perf_event_ctx_lock(event); + ret = perf_read_hw(event, buf, count); + perf_event_ctx_unlock(event, ctx); + + return ret; } static unsigned int perf_poll(struct file *file, poll_table *wait) @@ -3692,7 +3816,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } -static void perf_event_reset(struct perf_event *event) +static void _perf_event_reset(struct perf_event *event) { (void)perf_event_read(event); local64_set(&event->count, 0); @@ -3711,6 +3835,7 @@ static void perf_event_for_each_child(struct perf_event *event, struct perf_event *child; WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); func(event); list_for_each_entry(child, &event->child_list, child_list) @@ -3724,14 +3849,13 @@ static void perf_event_for_each(struct perf_event *event, struct perf_event_context *ctx = event->ctx; struct perf_event *sibling; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); + lockdep_assert_held(&ctx->mutex); + event = event->group_leader; perf_event_for_each_child(event, func); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(sibling, func); - mutex_unlock(&ctx->mutex); } static int perf_event_period(struct perf_event *event, u64 __user *arg) @@ -3801,25 +3925,24 @@ static int perf_event_set_output(struct perf_event *event, struct perf_event *output_event); static int perf_event_set_filter(struct perf_event *event, void __user *arg); -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { - struct perf_event *event = file->private_data; void (*func)(struct perf_event *); u32 flags = arg; switch (cmd) { case PERF_EVENT_IOC_ENABLE: - func = perf_event_enable; + func = _perf_event_enable; break; case PERF_EVENT_IOC_DISABLE: - func = perf_event_disable; + func = _perf_event_disable; break; case PERF_EVENT_IOC_RESET: - func = perf_event_reset; + func = _perf_event_reset; break; case PERF_EVENT_IOC_REFRESH: - return perf_event_refresh(event, arg); + return _perf_event_refresh(event, arg); case PERF_EVENT_IOC_PERIOD: return perf_event_period(event, (u64 __user *)arg); @@ -3866,6 +3989,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_event *event = file->private_data; + struct perf_event_context *ctx; + long ret; + + ctx = perf_event_ctx_lock(event); + ret = _perf_ioctl(event, cmd, arg); + perf_event_ctx_unlock(event, ctx); + + return ret; +} + #ifdef CONFIG_COMPAT static long perf_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -3888,11 +4024,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, int perf_event_task_enable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_enable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_enable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -3900,11 +4040,15 @@ int perf_event_task_enable(void) int perf_event_task_disable(void) { + struct perf_event_context *ctx; struct perf_event *event; mutex_lock(¤t->perf_event_mutex); - list_for_each_entry(event, ¤t->perf_event_list, owner_entry) - perf_event_for_each_child(event, perf_event_disable); + list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { + ctx = perf_event_ctx_lock(event); + perf_event_for_each_child(event, _perf_event_disable); + perf_event_ctx_unlock(event, ctx); + } mutex_unlock(¤t->perf_event_mutex); return 0; @@ -6927,6 +7071,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!group_leader) group_leader = event; + mutex_init(&event->group_leader_mutex); mutex_init(&event->child_mutex); INIT_LIST_HEAD(&event->child_list); @@ -7203,6 +7348,15 @@ out: return ret; } +static void mutex_lock_double(struct mutex *a, struct mutex *b) +{ + if (b < a) + swap(a, b); + + mutex_lock(a); + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); +} + /** * sys_perf_event_open - open a performance event, associate it to a task/cpu * @@ -7218,7 +7372,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event *group_leader = NULL, *output_event = NULL; struct perf_event *event, *sibling; struct perf_event_attr attr; - struct perf_event_context *ctx; + struct perf_event_context *ctx, *uninitialized_var(gctx); struct file *event_file = NULL; struct fd group = {NULL, 0}; struct task_struct *task = NULL; @@ -7279,6 +7433,16 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } + /* + * Take the group_leader's group_leader_mutex before observing + * anything in the group leader that leads to changes in ctx, + * many of which may be changing on another thread. + * In particular, we want to take this lock before deciding + * whether we need to move_group. + */ + if (group_leader) + mutex_lock(&group_leader->group_leader_mutex); + if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) { task = find_lively_task_by_vpid(pid); if (IS_ERR(task)) { @@ -7407,9 +7571,14 @@ SYSCALL_DEFINE5(perf_event_open, } if (move_group) { - struct perf_event_context *gctx = group_leader->ctx; + gctx = group_leader->ctx; + + /* + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. + */ + mutex_lock_double(&gctx->mutex, &ctx->mutex); - mutex_lock(&gctx->mutex); perf_remove_from_context(group_leader, false); /* @@ -7424,27 +7593,38 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(sibling); put_ctx(gctx); } - mutex_unlock(&gctx->mutex); - put_ctx(gctx); + } else { + mutex_lock(&ctx->mutex); } WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); if (move_group) { + /* + * Wait for everybody to stop referencing the events through + * the old lists, before installing it on new lists. + */ synchronize_rcu(); - perf_install_in_context(ctx, group_leader, event->cpu); + + perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_install_in_context(ctx, sibling, event->cpu); + perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } } perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); + + if (move_group) { + mutex_unlock(&gctx->mutex); + put_ctx(gctx); + } mutex_unlock(&ctx->mutex); + if (group_leader) + mutex_unlock(&group_leader->group_leader_mutex); put_online_cpus(); @@ -7481,6 +7661,8 @@ err_task: if (task) put_task_struct(task); err_group_fd: + if (group_leader) + mutex_unlock(&group_leader->group_leader_mutex); fdput(group); err_fd: put_unused_fd(event_fd); @@ -7551,7 +7733,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; - mutex_lock(&src_ctx->mutex); + /* + * See perf_event_ctx_lock() for comments on the details + * of swizzling perf_event::ctx. + */ + mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { perf_remove_from_context(event, false); @@ -7559,11 +7745,9 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) put_ctx(src_ctx); list_add(&event->migrate_entry, &events); } - mutex_unlock(&src_ctx->mutex); synchronize_rcu(); - mutex_lock(&dst_ctx->mutex); list_for_each_entry_safe(event, tmp, &events, migrate_entry) { list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) @@ -7573,6 +7757,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) get_ctx(dst_ctx); } mutex_unlock(&dst_ctx->mutex); + mutex_unlock(&src_ctx->mutex); } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); |