]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - kernel/perf_event.c
perf: Factorize callchain context handling
[mv-sheeva.git] / kernel / perf_event.c
index 31d6afe9259412025f02e20bddcc37fa3bf4d62b..615d024894cf6c110452f8c2d425238acde529dc 100644 (file)
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
        struct perf_event *event, *partial_group = NULL;
        const struct pmu *pmu = group_event->pmu;
        bool txn = false;
-       int ret;
 
        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
                }
        }
 
-       if (!txn)
-               return 0;
-
-       ret = pmu->commit_txn(pmu);
-       if (!ret) {
-               pmu->cancel_txn(pmu);
+       if (!txn || !pmu->commit_txn(pmu))
                return 0;
-       }
 
 group_error:
        /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
         * In order to keep per-task stats reliable we need to flip the event
         * values when we flip the contexts.
         */
-       value = atomic64_read(&next_event->count);
-       value = atomic64_xchg(&event->count, value);
-       atomic64_set(&next_event->count, value);
+       value = local64_read(&next_event->count);
+       value = local64_xchg(&event->count, value);
+       local64_set(&next_event->count, value);
 
        swap(event->total_time_enabled, next_event->total_time_enabled);
        swap(event->total_time_running, next_event->total_time_running);
@@ -1507,6 +1500,9 @@ do {                                      \
                divisor = nsec * frequency;
        }
 
+       if (!divisor)
+               return dividend;
+
        return div64_u64(dividend, divisor);
 }
 
@@ -1529,7 +1525,7 @@ static int perf_event_start(struct perf_event *event)
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
        struct hw_perf_event *hwc = &event->hw;
-       u64 period, sample_period;
+       s64 period, sample_period;
        s64 delta;
 
        period = perf_calculate_period(event, nsec, count);
@@ -1544,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
        hwc->sample_period = sample_period;
 
-       if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+       if (local64_read(&hwc->period_left) > 8*sample_period) {
                perf_disable();
                perf_event_stop(event);
-               atomic64_set(&hwc->period_left, 0);
+               local64_set(&hwc->period_left, 0);
                perf_event_start(event);
                perf_enable();
        }
@@ -1588,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 
                perf_disable();
                event->pmu->read(event);
-               now = atomic64_read(&event->count);
+               now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;
 
@@ -1740,6 +1736,11 @@ static void __perf_event_read(void *info)
        event->pmu->read(event);
 }
 
+static inline u64 perf_event_count(struct perf_event *event)
+{
+       return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
 static u64 perf_event_read(struct perf_event *event)
 {
        /*
@@ -1759,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
 
-       return atomic64_read(&event->count);
+       return perf_event_count(event);
 }
 
 /*
@@ -1880,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void perf_pending_sync(struct perf_event *event);
-static void perf_mmap_data_put(struct perf_mmap_data *data);
+static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
@@ -1888,7 +1889,7 @@ static void free_event(struct perf_event *event)
 
        if (!event->parent) {
                atomic_dec(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_dec(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_dec(&nr_comm_events);
@@ -1896,9 +1897,9 @@ static void free_event(struct perf_event *event)
                        atomic_dec(&nr_task_events);
        }
 
-       if (event->data) {
-               perf_mmap_data_put(event->data);
-               event->data = NULL;
+       if (event->buffer) {
+               perf_buffer_put(event->buffer);
+               event->buffer = NULL;
        }
 
        if (event->destroy)
@@ -2123,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
        struct perf_event *event = file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned int events = POLL_HUP;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data)
-               events = atomic_xchg(&data->poll, 0);
+       buffer = rcu_dereference(event->buffer);
+       if (buffer)
+               events = atomic_xchg(&buffer->poll, 0);
        rcu_read_unlock();
 
        poll_wait(file, &event->waitq, wait);
@@ -2140,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 static void perf_event_reset(struct perf_event *event)
 {
        (void)perf_event_read(event);
-       atomic64_set(&event->count, 0);
+       local64_set(&event->count, 0);
        perf_event_update_userpage(event);
 }
 
@@ -2339,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
 void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
-       userpg = data->user_page;
+       userpg = buffer->user_page;
 
        /*
         * Disable preemption so as to not let the corresponding user-space
@@ -2356,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
        ++userpg->lock;
        barrier();
        userpg->index = perf_event_index(event);
-       userpg->offset = atomic64_read(&event->count);
+       userpg->offset = perf_event_count(event);
        if (event->state == PERF_EVENT_STATE_ACTIVE)
-               userpg->offset -= atomic64_read(&event->hw.prev_count);
+               userpg->offset -= local64_read(&event->hw.prev_count);
 
        userpg->time_enabled = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -2373,6 +2374,25 @@ unlock:
        rcu_read_unlock();
 }
 
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+       long max_size = perf_data_size(buffer);
+
+       if (watermark)
+               buffer->watermark = min(max_size, watermark);
+
+       if (!buffer->watermark)
+               buffer->watermark = max_size / 2;
+
+       if (flags & PERF_BUFFER_WRITABLE)
+               buffer->writable = 1;
+
+       atomic_set(&buffer->refcount, 1);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
@@ -2380,15 +2400,15 @@ unlock:
  */
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > data->nr_pages)
+       if (pgoff > buffer->nr_pages)
                return NULL;
 
        if (pgoff == 0)
-               return virt_to_page(data->user_page);
+               return virt_to_page(buffer->user_page);
 
-       return virt_to_page(data->data_pages[pgoff - 1]);
+       return virt_to_page(buffer->data_pages[pgoff - 1]);
 }
 
 static void *perf_mmap_alloc_page(int cpu)
@@ -2404,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
        return page_address(page);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        int i;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += nr_pages * sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       data->user_page = perf_mmap_alloc_page(event->cpu);
-       if (!data->user_page)
+       buffer->user_page = perf_mmap_alloc_page(cpu);
+       if (!buffer->user_page)
                goto fail_user_page;
 
        for (i = 0; i < nr_pages; i++) {
-               data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
-               if (!data->data_pages[i])
+               buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+               if (!buffer->data_pages[i])
                        goto fail_data_pages;
        }
 
-       data->nr_pages = nr_pages;
+       buffer->nr_pages = nr_pages;
+
+       perf_buffer_init(buffer, watermark, flags);
 
-       return data;
+       return buffer;
 
 fail_data_pages:
        for (i--; i >= 0; i--)
-               free_page((unsigned long)data->data_pages[i]);
+               free_page((unsigned long)buffer->data_pages[i]);
 
-       free_page((unsigned long)data->user_page);
+       free_page((unsigned long)buffer->user_page);
 
 fail_user_page:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2453,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
        __free_page(page);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
        int i;
 
-       perf_mmap_free_page((unsigned long)data->user_page);
-       for (i = 0; i < data->nr_pages; i++)
-               perf_mmap_free_page((unsigned long)data->data_pages[i]);
-       kfree(data);
+       perf_mmap_free_page((unsigned long)buffer->user_page);
+       for (i = 0; i < buffer->nr_pages; i++)
+               perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+       kfree(buffer);
 }
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
        return 0;
 }
@@ -2476,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
  * Required for architectures that have d-cache aliasing issues.
  */
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
-       return data->page_order;
+       return buffer->page_order;
 }
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > (1UL << page_order(data)))
+       if (pgoff > (1UL << page_order(buffer)))
                return NULL;
 
-       return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+       return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
 }
 
 static void perf_mmap_unmark_page(void *addr)
@@ -2497,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
        page->mapping = NULL;
 }
 
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        void *base;
        int i, nr;
 
-       data = container_of(work, struct perf_mmap_data, work);
-       nr = 1 << page_order(data);
+       buffer = container_of(work, struct perf_buffer, work);
+       nr = 1 << page_order(buffer);
 
-       base = data->user_page;
+       base = buffer->user_page;
        for (i = 0; i < nr + 1; i++)
                perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
        vfree(base);
-       kfree(data);
+       kfree(buffer);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
-       schedule_work(&data->work);
+       schedule_work(&buffer->work);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        void *all_buf;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       INIT_WORK(&data->work, perf_mmap_data_free_work);
+       INIT_WORK(&buffer->work, perf_buffer_free_work);
 
        all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
        if (!all_buf)
                goto fail_all_buf;
 
-       data->user_page = all_buf;
-       data->data_pages[0] = all_buf + PAGE_SIZE;
-       data->page_order = ilog2(nr_pages);
-       data->nr_pages = 1;
+       buffer->user_page = all_buf;
+       buffer->data_pages[0] = all_buf + PAGE_SIZE;
+       buffer->page_order = ilog2(nr_pages);
+       buffer->nr_pages = 1;
 
-       return data;
+       perf_buffer_init(buffer, watermark, flags);
+
+       return buffer;
 
 fail_all_buf:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2555,15 +2579,15 @@ fail:
 
 #endif
 
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
 {
-       return data->nr_pages << (PAGE_SHIFT + page_order(data));
+       return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct perf_event *event = vma->vm_file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        int ret = VM_FAULT_SIGBUS;
 
        if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2573,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        }
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
        if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
                goto unlock;
 
-       vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+       vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
        if (!vmf->page)
                goto unlock;
 
@@ -2595,52 +2619,35 @@ unlock:
        return ret;
 }
 
-static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
-       long max_size = perf_data_size(data);
-
-       if (event->attr.watermark) {
-               data->watermark = min_t(long, max_size,
-                                       event->attr.wakeup_watermark);
-       }
+       struct perf_buffer *buffer;
 
-       if (!data->watermark)
-               data->watermark = max_size / 2;
-
-       atomic_set(&data->refcount, 1);
-       rcu_assign_pointer(event->data, data);
+       buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+       perf_buffer_free(buffer);
 }
 
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
 {
-       struct perf_mmap_data *data;
-
-       data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-       perf_mmap_data_free(data);
-}
-
-static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
-{
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data) {
-               if (!atomic_inc_not_zero(&data->refcount))
-                       data = NULL;
+       buffer = rcu_dereference(event->buffer);
+       if (buffer) {
+               if (!atomic_inc_not_zero(&buffer->refcount))
+                       buffer = NULL;
        }
        rcu_read_unlock();
 
-       return data;
+       return buffer;
 }
 
-static void perf_mmap_data_put(struct perf_mmap_data *data)
+static void perf_buffer_put(struct perf_buffer *buffer)
 {
-       if (!atomic_dec_and_test(&data->refcount))
+       if (!atomic_dec_and_test(&buffer->refcount))
                return;
 
-       call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+       call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2655,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
 
        if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-               unsigned long size = perf_data_size(event->data);
+               unsigned long size = perf_data_size(event->buffer);
                struct user_struct *user = event->mmap_user;
-               struct perf_mmap_data *data = event->data;
+               struct perf_buffer *buffer = event->buffer;
 
                atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
                vma->vm_mm->locked_vm -= event->mmap_locked;
-               rcu_assign_pointer(event->data, NULL);
+               rcu_assign_pointer(event->buffer, NULL);
                mutex_unlock(&event->mmap_mutex);
 
-               perf_mmap_data_put(data);
+               perf_buffer_put(buffer);
                free_uid(user);
        }
 }
@@ -2682,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
        unsigned long locked, lock_limit;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long vma_size;
        unsigned long nr_pages;
        long user_extra, extra;
-       int ret = 0;
+       int ret = 0, flags = 0;
 
        /*
         * Don't allow mmap() of inherited per-task counters. This would
@@ -2703,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        nr_pages = (vma_size / PAGE_SIZE) - 1;
 
        /*
-        * If we have data pages ensure they're a power-of-two number, so we
+        * If we have buffer pages ensure they're a power-of-two number, so we
         * can do bitmasks instead of modulo.
         */
        if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2717,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
        mutex_lock(&event->mmap_mutex);
-       if (event->data) {
-               if (event->data->nr_pages == nr_pages)
-                       atomic_inc(&event->data->refcount);
+       if (event->buffer) {
+               if (event->buffer->nr_pages == nr_pages)
+                       atomic_inc(&event->buffer->refcount);
                else
                        ret = -EINVAL;
                goto unlock;
@@ -2749,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
        }
 
-       WARN_ON(event->data);
+       WARN_ON(event->buffer);
 
-       data = perf_mmap_data_alloc(event, nr_pages);
-       if (!data) {
+       if (vma->vm_flags & VM_WRITE)
+               flags |= PERF_BUFFER_WRITABLE;
+
+       buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+                                  event->cpu, flags);
+       if (!buffer) {
                ret = -ENOMEM;
                goto unlock;
        }
-
-       perf_mmap_data_init(event, data);
-       if (vma->vm_flags & VM_WRITE)
-               event->data->writable = 1;
+       rcu_assign_pointer(event->buffer, buffer);
 
        atomic_long_add(user_extra, &user->locked_vm);
        event->mmap_locked = extra;
@@ -2929,20 +2937,54 @@ void perf_event_do_pending(void)
        __perf_pending_run();
 }
 
+DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
+
 /*
  * Callchain support -- arch specific
  */
 
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+__weak struct perf_callchain_entry *perf_callchain_buffer(void)
 {
-       return NULL;
+       return &__get_cpu_var(perf_callchain_entry);
 }
 
-__weak
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+                                 struct pt_regs *regs)
 {
 }
 
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+                               struct pt_regs *regs)
+{
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+       struct perf_callchain_entry *entry;
+
+       entry = perf_callchain_buffer();
+       if (!entry)
+               return NULL;
+
+       entry->nr = 0;
+
+       if (!user_mode(regs)) {
+               perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+               perf_callchain_kernel(entry, regs);
+               if (current->mm)
+                       regs = task_pt_regs(current);
+               else
+                       regs = NULL;
+       }
+
+       if (regs) {
+               perf_callchain_store(entry, PERF_CONTEXT_USER);
+               perf_callchain_user(entry, regs);
+       }
+
+       return entry;
+}
+
 
 /*
  * We assume there is only KVM supporting the callbacks.
@@ -2968,15 +3010,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 /*
  * Output
  */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
                              unsigned long offset, unsigned long head)
 {
        unsigned long mask;
 
-       if (!data->writable)
+       if (!buffer->writable)
                return true;
 
-       mask = perf_data_size(data) - 1;
+       mask = perf_data_size(buffer) - 1;
 
        offset = (offset - tail) & mask;
        head   = (head   - tail) & mask;
@@ -2989,7 +3031,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
-       atomic_set(&handle->data->poll, POLL_IN);
+       atomic_set(&handle->buffer->poll, POLL_IN);
 
        if (handle->nmi) {
                handle->event->pending_wakeup = 1;
@@ -3009,45 +3051,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
  */
 static void perf_output_get_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        preempt_disable();
-       local_inc(&data->nest);
-       handle->wakeup = local_read(&data->wakeup);
+       local_inc(&buffer->nest);
+       handle->wakeup = local_read(&buffer->wakeup);
 }
 
 static void perf_output_put_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
        unsigned long head;
 
 again:
-       head = local_read(&data->head);
+       head = local_read(&buffer->head);
 
        /*
         * IRQ/NMI can happen here, which means we can miss a head update.
         */
 
-       if (!local_dec_and_test(&data->nest))
+       if (!local_dec_and_test(&buffer->nest))
                goto out;
 
        /*
         * Publish the known good head. Rely on the full barrier implied
-        * by atomic_dec_and_test() order the data->head read and this
+        * by atomic_dec_and_test() order the buffer->head read and this
         * write.
         */
-       data->user_page->data_head = head;
+       buffer->user_page->data_head = head;
 
        /*
         * Now check if we missed an update, rely on the (compiler)
-        * barrier in atomic_dec_and_test() to re-read data->head.
+        * barrier in atomic_dec_and_test() to re-read buffer->head.
         */
-       if (unlikely(head != local_read(&data->head))) {
-               local_inc(&data->nest);
+       if (unlikely(head != local_read(&buffer->head))) {
+               local_inc(&buffer->nest);
                goto again;
        }
 
-       if (handle->wakeup != local_read(&data->wakeup))
+       if (handle->wakeup != local_read(&buffer->wakeup))
                perf_output_wakeup(handle);
 
  out:
@@ -3067,12 +3109,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
                buf += size;
                handle->size -= size;
                if (!handle->size) {
-                       struct perf_mmap_data *data = handle->data;
+                       struct perf_buffer *buffer = handle->buffer;
 
                        handle->page++;
-                       handle->page &= data->nr_pages - 1;
-                       handle->addr = data->data_pages[handle->page];
-                       handle->size = PAGE_SIZE << page_order(data);
+                       handle->page &= buffer->nr_pages - 1;
+                       handle->addr = buffer->data_pages[handle->page];
+                       handle->size = PAGE_SIZE << page_order(buffer);
                }
        } while (len);
 }
@@ -3081,7 +3123,7 @@ int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
        struct {
@@ -3097,19 +3139,19 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (event->parent)
                event = event->parent;
 
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto out;
 
-       handle->data    = data;
+       handle->buffer  = buffer;
        handle->event   = event;
        handle->nmi     = nmi;
        handle->sample  = sample;
 
-       if (!data->nr_pages)
+       if (!buffer->nr_pages)
                goto out;
 
-       have_lost = local_read(&data->lost);
+       have_lost = local_read(&buffer->lost);
        if (have_lost)
                size += sizeof(lost_event);
 
@@ -3121,30 +3163,30 @@ int perf_output_begin(struct perf_output_handle *handle,
                 * tail pointer. So that all reads will be completed before the
                 * write is issued.
                 */
-               tail = ACCESS_ONCE(data->user_page->data_tail);
+               tail = ACCESS_ONCE(buffer->user_page->data_tail);
                smp_rmb();
-               offset = head = local_read(&data->head);
+               offset = head = local_read(&buffer->head);
                head += size;
-               if (unlikely(!perf_output_space(data, tail, offset, head)))
+               if (unlikely(!perf_output_space(buffer, tail, offset, head)))
                        goto fail;
-       } while (local_cmpxchg(&data->head, offset, head) != offset);
+       } while (local_cmpxchg(&buffer->head, offset, head) != offset);
 
-       if (head - local_read(&data->wakeup) > data->watermark)
-               local_add(data->watermark, &data->wakeup);
+       if (head - local_read(&buffer->wakeup) > buffer->watermark)
+               local_add(buffer->watermark, &buffer->wakeup);
 
-       handle->page = offset >> (PAGE_SHIFT + page_order(data));
-       handle->page &= data->nr_pages - 1;
-       handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
-       handle->addr = data->data_pages[handle->page];
+       handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+       handle->page &= buffer->nr_pages - 1;
+       handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+       handle->addr = buffer->data_pages[handle->page];
        handle->addr += handle->size;
-       handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+       handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
 
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
                lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
-               lost_event.lost        = local_xchg(&data->lost, 0);
+               lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
        }
@@ -3152,7 +3194,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        return 0;
 
 fail:
-       local_inc(&data->lost);
+       local_inc(&buffer->lost);
        perf_output_put_handle(handle);
 out:
        rcu_read_unlock();
@@ -3163,15 +3205,15 @@ out:
 void perf_output_end(struct perf_output_handle *handle)
 {
        struct perf_event *event = handle->event;
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        int wakeup_events = event->attr.wakeup_events;
 
        if (handle->sample && wakeup_events) {
-               int events = local_inc_return(&data->events);
+               int events = local_inc_return(&buffer->events);
                if (events >= wakeup_events) {
-                       local_sub(wakeup_events, &data->events);
-                       local_inc(&data->wakeup);
+                       local_sub(wakeup_events, &buffer->events);
+                       local_inc(&buffer->wakeup);
                }
        }
 
@@ -3208,7 +3250,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
        u64 values[4];
        int n = 0;
 
-       values[n++] = atomic64_read(&event->count);
+       values[n++] = perf_event_count(event);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
                values[n++] = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -3245,7 +3287,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        if (leader != event)
                leader->pmu->read(leader);
 
-       values[n++] = atomic64_read(&leader->count);
+       values[n++] = perf_event_count(leader);
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
 
@@ -3257,7 +3299,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
                if (sub != event)
                        sub->pmu->read(sub);
 
-               values[n++] = atomic64_read(&sub->count);
+               values[n++] = perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
 
@@ -3488,7 +3530,7 @@ perf_event_read_event(struct perf_event *event,
 /*
  * task tracking -- fork/exit
  *
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
  */
 
 struct perf_task_event {
@@ -3538,7 +3580,8 @@ static int perf_event_task_match(struct perf_event *event)
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.comm || event->attr.mmap || event->attr.task)
+       if (event->attr.comm || event->attr.mmap ||
+           event->attr.mmap_data || event->attr.task)
                return 1;
 
        return 0;
@@ -3763,7 +3806,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
-                                  struct perf_mmap_event *mmap_event)
+                                  struct perf_mmap_event *mmap_event,
+                                  int executable)
 {
        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
@@ -3771,19 +3815,21 @@ static int perf_event_mmap_match(struct perf_event *event,
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.mmap)
+       if ((!executable && event->attr.mmap_data) ||
+           (executable && event->attr.mmap))
                return 1;
 
        return 0;
 }
 
 static void perf_event_mmap_ctx(struct perf_event_context *ctx,
-                                 struct perf_mmap_event *mmap_event)
+                                 struct perf_mmap_event *mmap_event,
+                                 int executable)
 {
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-               if (perf_event_mmap_match(event, mmap_event))
+               if (perf_event_mmap_match(event, mmap_event, executable))
                        perf_event_mmap_output(event, mmap_event);
        }
 }
@@ -3827,6 +3873,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                if (!vma->vm_mm) {
                        name = strncpy(tmp, "[vdso]", sizeof(tmp));
                        goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_brk &&
+                               vma->vm_end >= vma->vm_mm->brk) {
+                       name = strncpy(tmp, "[heap]", sizeof(tmp));
+                       goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_stack &&
+                               vma->vm_end >= vma->vm_mm->start_stack) {
+                       name = strncpy(tmp, "[stack]", sizeof(tmp));
+                       goto got_name;
                }
 
                name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3843,17 +3897,17 @@ got_name:
 
        rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
-       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
-               perf_event_mmap_ctx(ctx, mmap_event);
+               perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
        put_cpu_var(perf_cpu_context);
        rcu_read_unlock();
 
        kfree(buf);
 }
 
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
 {
        struct perf_mmap_event mmap_event;
 
@@ -4015,14 +4069,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
        hwc->last_period = hwc->sample_period;
 
 again:
-       old = val = atomic64_read(&hwc->period_left);
+       old = val = local64_read(&hwc->period_left);
        if (val < 0)
                return 0;
 
        nr = div64_u64(period + val, period);
        offset = nr * period;
        val -= offset;
-       if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+       if (local64_cmpxchg(&hwc->period_left, old, val) != old)
                goto again;
 
        return nr;
@@ -4061,7 +4115,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       atomic64_add(nr, &event->count);
+       local64_add(nr, &event->count);
 
        if (!regs)
                return;
@@ -4072,7 +4126,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
                return perf_swevent_overflow(event, 1, nmi, data, regs);
 
-       if (atomic64_add_negative(nr, &hwc->period_left))
+       if (local64_add_negative(nr, &hwc->period_left))
                return;
 
        perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4210,14 +4264,12 @@ int perf_swevent_get_recursion_context(void)
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
 {
        struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        barrier();
        cpuctx->recursion[rctx]--;
 }
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
                            struct pt_regs *regs, u64 addr)
@@ -4365,8 +4417,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
        u64 now;
 
        now = cpu_clock(cpu);
-       prev = atomic64_xchg(&event->hw.prev_count, now);
-       atomic64_add(now - prev, &event->count);
+       prev = local64_xchg(&event->hw.prev_count, now);
+       local64_add(now - prev, &event->count);
 }
 
 static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4374,7 +4426,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        int cpu = raw_smp_processor_id();
 
-       atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+       local64_set(&hwc->prev_count, cpu_clock(cpu));
        perf_swevent_start_hrtimer(event);
 
        return 0;
@@ -4406,9 +4458,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
        u64 prev;
        s64 delta;
 
-       prev = atomic64_xchg(&event->hw.prev_count, now);
+       prev = local64_xchg(&event->hw.prev_count, now);
        delta = now - prev;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 }
 
 static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4418,7 +4470,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
 
        now = event->ctx->time;
 
-       atomic64_set(&hwc->prev_count, now);
+       local64_set(&hwc->prev_count, now);
 
        perf_swevent_start_hrtimer(event);
 
@@ -4598,7 +4650,7 @@ static int perf_tp_event_match(struct perf_event *event,
 }
 
 void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
-                  struct pt_regs *regs, struct hlist_head *head)
+                  struct pt_regs *regs, struct hlist_head *head, int rctx)
 {
        struct perf_sample_data data;
        struct perf_event *event;
@@ -4612,12 +4664,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
        perf_sample_data_init(&data, addr);
        data.raw = &raw;
 
-       rcu_read_lock();
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs))
                        perf_swevent_add(event, count, 1, &data, regs);
        }
-       rcu_read_unlock();
+
+       perf_swevent_put_recursion_context(rctx);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
@@ -4861,7 +4913,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                hwc->sample_period = 1;
        hwc->last_period = hwc->sample_period;
 
-       atomic64_set(&hwc->period_left, hwc->sample_period);
+       local64_set(&hwc->period_left, hwc->sample_period);
 
        /*
         * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4910,7 +4962,7 @@ done:
 
        if (!event->parent) {
                atomic_inc(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_inc(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_inc(&nr_comm_events);
@@ -5004,7 +5056,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-       struct perf_mmap_data *data = NULL, *old_data = NULL;
+       struct perf_buffer *buffer = NULL, *old_buffer = NULL;
        int ret = -EINVAL;
 
        if (!output_event)
@@ -5034,19 +5086,19 @@ set:
 
        if (output_event) {
                /* get the buffer we want to redirect to */
-               data = perf_mmap_data_get(output_event);
-               if (!data)
+               buffer = perf_buffer_get(output_event);
+               if (!buffer)
                        goto unlock;
        }
 
-       old_data = event->data;
-       rcu_assign_pointer(event->data, data);
+       old_buffer = event->buffer;
+       rcu_assign_pointer(event->buffer, buffer);
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
 
-       if (old_data)
-               perf_mmap_data_put(old_data);
+       if (old_buffer)
+               perf_buffer_put(old_buffer);
 out:
        return ret;
 }
@@ -5295,7 +5347,7 @@ inherit_event(struct perf_event *parent_event,
                hwc->sample_period = sample_period;
                hwc->last_period   = sample_period;
 
-               atomic64_set(&hwc->period_left, sample_period);
+               local64_set(&hwc->period_left, sample_period);
        }
 
        child_event->overflow_handler = parent_event->overflow_handler;
@@ -5356,12 +5408,12 @@ static void sync_child_event(struct perf_event *child_event,
        if (child_event->attr.inherit_stat)
                perf_event_read_event(child_event, child);
 
-       child_val = atomic64_read(&child_event->count);
+       child_val = perf_event_count(child_event);
 
        /*
         * Add back the child's count to the parent's count:
         */
-       atomic64_add(child_val, &parent_event->count);
+       atomic64_add(child_val, &parent_event->child_count);
        atomic64_add(child_event->total_time_enabled,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,