]> git.karo-electronics.de Git - mv-sheeva.git/blob - arch/powerpc/kernel/perf_callchain.c
perf: Generalize callchain_store()
[mv-sheeva.git] / arch / powerpc / kernel / perf_callchain.c
1 /*
2  * Performance counter callchain support - powerpc architecture code
3  *
4  * Copyright © 2009 Paul Mackerras, IBM Corporation.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/perf_event.h>
14 #include <linux/percpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/mm.h>
17 #include <asm/ptrace.h>
18 #include <asm/pgtable.h>
19 #include <asm/sigcontext.h>
20 #include <asm/ucontext.h>
21 #include <asm/vdso.h>
22 #ifdef CONFIG_PPC64
23 #include "ppc32.h"
24 #endif
25
26
27 /*
28  * Is sp valid as the address of the next kernel stack frame after prev_sp?
29  * The next frame may be in a different stack area but should not go
30  * back down in the same stack area.
31  */
32 static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
33 {
34         if (sp & 0xf)
35                 return 0;               /* must be 16-byte aligned */
36         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
37                 return 0;
38         if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
39                 return 1;
40         /*
41          * sp could decrease when we jump off an interrupt stack
42          * back to the regular process stack.
43          */
44         if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
45                 return 1;
46         return 0;
47 }
48
49 static void perf_callchain_kernel(struct pt_regs *regs,
50                                   struct perf_callchain_entry *entry)
51 {
52         unsigned long sp, next_sp;
53         unsigned long next_ip;
54         unsigned long lr;
55         long level = 0;
56         unsigned long *fp;
57
58         lr = regs->link;
59         sp = regs->gpr[1];
60         perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
61         perf_callchain_store(entry, regs->nip);
62
63         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
64                 return;
65
66         for (;;) {
67                 fp = (unsigned long *) sp;
68                 next_sp = fp[0];
69
70                 if (next_sp == sp + STACK_INT_FRAME_SIZE &&
71                     fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
72                         /*
73                          * This looks like an interrupt frame for an
74                          * interrupt that occurred in the kernel
75                          */
76                         regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
77                         next_ip = regs->nip;
78                         lr = regs->link;
79                         level = 0;
80                         perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
81
82                 } else {
83                         if (level == 0)
84                                 next_ip = lr;
85                         else
86                                 next_ip = fp[STACK_FRAME_LR_SAVE];
87
88                         /*
89                          * We can't tell which of the first two addresses
90                          * we get are valid, but we can filter out the
91                          * obviously bogus ones here.  We replace them
92                          * with 0 rather than removing them entirely so
93                          * that userspace can tell which is which.
94                          */
95                         if ((level == 1 && next_ip == lr) ||
96                             (level <= 1 && !kernel_text_address(next_ip)))
97                                 next_ip = 0;
98
99                         ++level;
100                 }
101
102                 perf_callchain_store(entry, next_ip);
103                 if (!valid_next_sp(next_sp, sp))
104                         return;
105                 sp = next_sp;
106         }
107 }
108
109 #ifdef CONFIG_PPC64
110 /*
111  * On 64-bit we don't want to invoke hash_page on user addresses from
112  * interrupt context, so if the access faults, we read the page tables
113  * to find which page (if any) is mapped and access it directly.
114  */
115 static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
116 {
117         pgd_t *pgdir;
118         pte_t *ptep, pte;
119         unsigned shift;
120         unsigned long addr = (unsigned long) ptr;
121         unsigned long offset;
122         unsigned long pfn;
123         void *kaddr;
124
125         pgdir = current->mm->pgd;
126         if (!pgdir)
127                 return -EFAULT;
128
129         ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
130         if (!shift)
131                 shift = PAGE_SHIFT;
132
133         /* align address to page boundary */
134         offset = addr & ((1UL << shift) - 1);
135         addr -= offset;
136
137         if (ptep == NULL)
138                 return -EFAULT;
139         pte = *ptep;
140         if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
141                 return -EFAULT;
142         pfn = pte_pfn(pte);
143         if (!page_is_ram(pfn))
144                 return -EFAULT;
145
146         /* no highmem to worry about here */
147         kaddr = pfn_to_kaddr(pfn);
148         memcpy(ret, kaddr + offset, nb);
149         return 0;
150 }
151
152 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
153 {
154         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
155             ((unsigned long)ptr & 7))
156                 return -EFAULT;
157
158         if (!__get_user_inatomic(*ret, ptr))
159                 return 0;
160
161         return read_user_stack_slow(ptr, ret, 8);
162 }
163
164 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
165 {
166         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
167             ((unsigned long)ptr & 3))
168                 return -EFAULT;
169
170         if (!__get_user_inatomic(*ret, ptr))
171                 return 0;
172
173         return read_user_stack_slow(ptr, ret, 4);
174 }
175
176 static inline int valid_user_sp(unsigned long sp, int is_64)
177 {
178         if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
179                 return 0;
180         return 1;
181 }
182
183 /*
184  * 64-bit user processes use the same stack frame for RT and non-RT signals.
185  */
186 struct signal_frame_64 {
187         char            dummy[__SIGNAL_FRAMESIZE];
188         struct ucontext uc;
189         unsigned long   unused[2];
190         unsigned int    tramp[6];
191         struct siginfo  *pinfo;
192         void            *puc;
193         struct siginfo  info;
194         char            abigap[288];
195 };
196
197 static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
198 {
199         if (nip == fp + offsetof(struct signal_frame_64, tramp))
200                 return 1;
201         if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
202             nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
203                 return 1;
204         return 0;
205 }
206
207 /*
208  * Do some sanity checking on the signal frame pointed to by sp.
209  * We check the pinfo and puc pointers in the frame.
210  */
211 static int sane_signal_64_frame(unsigned long sp)
212 {
213         struct signal_frame_64 __user *sf;
214         unsigned long pinfo, puc;
215
216         sf = (struct signal_frame_64 __user *) sp;
217         if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
218             read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
219                 return 0;
220         return pinfo == (unsigned long) &sf->info &&
221                 puc == (unsigned long) &sf->uc;
222 }
223
224 static void perf_callchain_user_64(struct pt_regs *regs,
225                                    struct perf_callchain_entry *entry)
226 {
227         unsigned long sp, next_sp;
228         unsigned long next_ip;
229         unsigned long lr;
230         long level = 0;
231         struct signal_frame_64 __user *sigframe;
232         unsigned long __user *fp, *uregs;
233
234         next_ip = regs->nip;
235         lr = regs->link;
236         sp = regs->gpr[1];
237         perf_callchain_store(entry, PERF_CONTEXT_USER);
238         perf_callchain_store(entry, next_ip);
239
240         for (;;) {
241                 fp = (unsigned long __user *) sp;
242                 if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
243                         return;
244                 if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
245                         return;
246
247                 /*
248                  * Note: the next_sp - sp >= signal frame size check
249                  * is true when next_sp < sp, which can happen when
250                  * transitioning from an alternate signal stack to the
251                  * normal stack.
252                  */
253                 if (next_sp - sp >= sizeof(struct signal_frame_64) &&
254                     (is_sigreturn_64_address(next_ip, sp) ||
255                      (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
256                     sane_signal_64_frame(sp)) {
257                         /*
258                          * This looks like an signal frame
259                          */
260                         sigframe = (struct signal_frame_64 __user *) sp;
261                         uregs = sigframe->uc.uc_mcontext.gp_regs;
262                         if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
263                             read_user_stack_64(&uregs[PT_LNK], &lr) ||
264                             read_user_stack_64(&uregs[PT_R1], &sp))
265                                 return;
266                         level = 0;
267                         perf_callchain_store(entry, PERF_CONTEXT_USER);
268                         perf_callchain_store(entry, next_ip);
269                         continue;
270                 }
271
272                 if (level == 0)
273                         next_ip = lr;
274                 perf_callchain_store(entry, next_ip);
275                 ++level;
276                 sp = next_sp;
277         }
278 }
279
280 static inline int current_is_64bit(void)
281 {
282         /*
283          * We can't use test_thread_flag() here because we may be on an
284          * interrupt stack, and the thread flags don't get copied over
285          * from the thread_info on the main stack to the interrupt stack.
286          */
287         return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
288 }
289
290 #else  /* CONFIG_PPC64 */
291 /*
292  * On 32-bit we just access the address and let hash_page create a
293  * HPTE if necessary, so there is no need to fall back to reading
294  * the page tables.  Since this is called at interrupt level,
295  * do_page_fault() won't treat a DSI as a page fault.
296  */
297 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
298 {
299         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
300             ((unsigned long)ptr & 3))
301                 return -EFAULT;
302
303         return __get_user_inatomic(*ret, ptr);
304 }
305
306 static inline void perf_callchain_user_64(struct pt_regs *regs,
307                                           struct perf_callchain_entry *entry)
308 {
309 }
310
311 static inline int current_is_64bit(void)
312 {
313         return 0;
314 }
315
316 static inline int valid_user_sp(unsigned long sp, int is_64)
317 {
318         if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
319                 return 0;
320         return 1;
321 }
322
323 #define __SIGNAL_FRAMESIZE32    __SIGNAL_FRAMESIZE
324 #define sigcontext32            sigcontext
325 #define mcontext32              mcontext
326 #define ucontext32              ucontext
327 #define compat_siginfo_t        struct siginfo
328
329 #endif /* CONFIG_PPC64 */
330
331 /*
332  * Layout for non-RT signal frames
333  */
334 struct signal_frame_32 {
335         char                    dummy[__SIGNAL_FRAMESIZE32];
336         struct sigcontext32     sctx;
337         struct mcontext32       mctx;
338         int                     abigap[56];
339 };
340
341 /*
342  * Layout for RT signal frames
343  */
344 struct rt_signal_frame_32 {
345         char                    dummy[__SIGNAL_FRAMESIZE32 + 16];
346         compat_siginfo_t        info;
347         struct ucontext32       uc;
348         int                     abigap[56];
349 };
350
351 static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
352 {
353         if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
354                 return 1;
355         if (vdso32_sigtramp && current->mm->context.vdso_base &&
356             nip == current->mm->context.vdso_base + vdso32_sigtramp)
357                 return 1;
358         return 0;
359 }
360
361 static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
362 {
363         if (nip == fp + offsetof(struct rt_signal_frame_32,
364                                  uc.uc_mcontext.mc_pad))
365                 return 1;
366         if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
367             nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
368                 return 1;
369         return 0;
370 }
371
372 static int sane_signal_32_frame(unsigned int sp)
373 {
374         struct signal_frame_32 __user *sf;
375         unsigned int regs;
376
377         sf = (struct signal_frame_32 __user *) (unsigned long) sp;
378         if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
379                 return 0;
380         return regs == (unsigned long) &sf->mctx;
381 }
382
383 static int sane_rt_signal_32_frame(unsigned int sp)
384 {
385         struct rt_signal_frame_32 __user *sf;
386         unsigned int regs;
387
388         sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
389         if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
390                 return 0;
391         return regs == (unsigned long) &sf->uc.uc_mcontext;
392 }
393
394 static unsigned int __user *signal_frame_32_regs(unsigned int sp,
395                                 unsigned int next_sp, unsigned int next_ip)
396 {
397         struct mcontext32 __user *mctx = NULL;
398         struct signal_frame_32 __user *sf;
399         struct rt_signal_frame_32 __user *rt_sf;
400
401         /*
402          * Note: the next_sp - sp >= signal frame size check
403          * is true when next_sp < sp, for example, when
404          * transitioning from an alternate signal stack to the
405          * normal stack.
406          */
407         if (next_sp - sp >= sizeof(struct signal_frame_32) &&
408             is_sigreturn_32_address(next_ip, sp) &&
409             sane_signal_32_frame(sp)) {
410                 sf = (struct signal_frame_32 __user *) (unsigned long) sp;
411                 mctx = &sf->mctx;
412         }
413
414         if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
415             is_rt_sigreturn_32_address(next_ip, sp) &&
416             sane_rt_signal_32_frame(sp)) {
417                 rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
418                 mctx = &rt_sf->uc.uc_mcontext;
419         }
420
421         if (!mctx)
422                 return NULL;
423         return mctx->mc_gregs;
424 }
425
426 static void perf_callchain_user_32(struct pt_regs *regs,
427                                    struct perf_callchain_entry *entry)
428 {
429         unsigned int sp, next_sp;
430         unsigned int next_ip;
431         unsigned int lr;
432         long level = 0;
433         unsigned int __user *fp, *uregs;
434
435         next_ip = regs->nip;
436         lr = regs->link;
437         sp = regs->gpr[1];
438         perf_callchain_store(entry, PERF_CONTEXT_USER);
439         perf_callchain_store(entry, next_ip);
440
441         while (entry->nr < PERF_MAX_STACK_DEPTH) {
442                 fp = (unsigned int __user *) (unsigned long) sp;
443                 if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
444                         return;
445                 if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
446                         return;
447
448                 uregs = signal_frame_32_regs(sp, next_sp, next_ip);
449                 if (!uregs && level <= 1)
450                         uregs = signal_frame_32_regs(sp, next_sp, lr);
451                 if (uregs) {
452                         /*
453                          * This looks like an signal frame, so restart
454                          * the stack trace with the values in it.
455                          */
456                         if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
457                             read_user_stack_32(&uregs[PT_LNK], &lr) ||
458                             read_user_stack_32(&uregs[PT_R1], &sp))
459                                 return;
460                         level = 0;
461                         perf_callchain_store(entry, PERF_CONTEXT_USER);
462                         perf_callchain_store(entry, next_ip);
463                         continue;
464                 }
465
466                 if (level == 0)
467                         next_ip = lr;
468                 perf_callchain_store(entry, next_ip);
469                 ++level;
470                 sp = next_sp;
471         }
472 }
473
474 /*
475  * Since we can't get PMU interrupts inside a PMU interrupt handler,
476  * we don't need separate irq and nmi entries here.
477  */
478 static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
479
480 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
481 {
482         struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
483
484         entry->nr = 0;
485
486         if (!user_mode(regs)) {
487                 perf_callchain_kernel(regs, entry);
488                 if (current->mm)
489                         regs = task_pt_regs(current);
490                 else
491                         regs = NULL;
492         }
493
494         if (regs) {
495                 if (current_is_64bit())
496                         perf_callchain_user_64(regs, entry);
497                 else
498                         perf_callchain_user_32(regs, entry);
499         }
500
501         return entry;
502 }