Using 'pahole --packable' I found some structs that could be reorganized
to eliminate alignment holes, in some cases getting them to be cacheline
multiples.
[acme@doppio linux-2.6-tip]$ codiff perf.old ~/bin/perf
builtin-annotate.c:
struct perf_session | -8
struct perf_header | -8
2 structs changed
builtin-diff.c:
struct sample_data | -8
1 struct changed
diff__process_sample_event | -8
1 function changed, 8 bytes removed, diff: -8
builtin-sched.c:
struct sched_atom | -8
1 struct changed
builtin-timechart.c:
struct per_pid | -8
1 struct changed
cmd_timechart | -16
1 function changed, 16 bytes removed, diff: -16
builtin-probe.c:
struct perf_probe_point | -8
struct perf_probe_event | -8
2 structs changed
opt_add_probe_event | -3
1 function changed, 3 bytes removed, diff: -3
util/probe-finder.c:
struct probe_finder | -8
1 struct changed
find_kprobe_trace_events | -16
1 function changed, 16 bytes removed, diff: -16
/home/acme/bin/perf:
4 functions changed, 43 bytes removed, diff: -43
[acme@doppio linux-2.6-tip]$
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
struct sched_atom {
enum sched_event_type type;
+ int specific_wait;
u64 timestamp;
u64 duration;
unsigned long nr;
- int specific_wait;
sem_t *wait_sem;
struct task_desc *wakee;
};
struct per_pidcomm *all;
struct per_pidcomm *current;
-
- int painted;
};
u64 addr;
u64 id;
u64 stream_id;
- u32 cpu;
u64 period;
- struct ip_callchain *callchain;
+ u32 cpu;
u32 raw_size;
void *raw_data;
+ struct ip_callchain *callchain;
};
#define BUILD_ID_SIZE 20
struct perf_header {
int frozen;
int attrs, size;
+ bool needs_swap;
struct perf_header_attr **attr;
s64 attr_offset;
u64 data_offset;
u64 data_size;
u64 event_offset;
u64 event_size;
- bool needs_swap;
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
char *file; /* File path */
char *function; /* Function name */
int line; /* Line number */
+ bool retprobe; /* Return probe flag */
char *lazy_line; /* Lazy matching pattern */
unsigned long offset; /* Offset from function entry */
- bool retprobe; /* Return probe flag */
};
/* Perf probe probing argument field chain */
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
- int ntevs; /* number of trace events */
struct kprobe_trace_event *tevs; /* Result trace events */
+ int ntevs; /* number of trace events */
/* For function searching */
+ int lno; /* Line number */
Dwarf_Addr addr; /* Address */
const char *fname; /* Real file name */
- int lno; /* Line number */
Dwarf_Die cu_die; /* Current CU */
struct list_head lcache; /* Line cache for lazy match */