From 231ebb3b15380466ef91fda0cf33150acc3e03a6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 7 Apr 2013 21:49:33 -0400 Subject: [PATCH] tools/power: fspin -- a utility for power measurements fspin is a simple utility for generating a controlled workload for use when testing various Linux power management features. In particular, for testing the various flavors of power capping. When measuring the benefit of these features, it is important to simultaneously measure not only the power benefit, but the performance impact. See the man page for details. Signed-off-by: Len Brown --- tools/power/fspin/Makefile | 21 ++ tools/power/fspin/fspin.1 | 68 ++++++ tools/power/fspin/fspin.c | 443 +++++++++++++++++++++++++++++++++++++ 3 files changed, 532 insertions(+) create mode 100644 tools/power/fspin/Makefile create mode 100644 tools/power/fspin/fspin.1 create mode 100644 tools/power/fspin/fspin.c diff --git a/tools/power/fspin/Makefile b/tools/power/fspin/Makefile new file mode 100644 index 000000000000..527400782943 --- /dev/null +++ b/tools/power/fspin/Makefile @@ -0,0 +1,21 @@ +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(PWD) +PREFIX := /usr +DESTDIR := + +fspin : fspin.c +CFLAGS += -Wall + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ -lpthread + +.PHONY : clean +clean : + @rm -f $(BUILD_OUTPUT)/fspin + +install : fspin + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/fspin $(DESTDIR)$(PREFIX)/bin/fspin + install -d $(DESTDIR)$(PREFIX)/share/man/man1 + install fspin.1 $(DESTDIR)$(PREFIX)/share/man/man1 diff --git a/tools/power/fspin/fspin.1 b/tools/power/fspin/fspin.1 new file mode 100644 index 000000000000..b57308e03b82 --- /dev/null +++ b/tools/power/fspin/fspin.1 @@ -0,0 +1,68 @@ +.\" This page Copyright (C) 2013 Len Brown +.\" Distributed under the GPL, Copyleft 1994. +.TH FSPIN 8 +.SH NAME +fspin \- simple workload for power experiments +.SH SYNOPSIS +.ft B +.B fspin +.RB [ "\-v" ] +.RB [ "\-i iterations" ] +.RB [ "\-s sec_per_iteration" ] +.RB [ "\-t threads" ] +.RB [ "\-b bin_to_cpus" ] +.RB [ "\-m memory (b|k|m)" ] +.br +.SH DESCRIPTION +\fBfspin\fP +heats up the hardware by running a +floating-point spin loop per processor. +Every +.I interval_sec +fspin presents the sum of the work completed +by all threads. +.SS Options +.PP +\fB-v\fP increases verbosity. +By default, fspin prints only the quantity work completed. +.PP +\fB-s sec_per_iteration\fP +Print the indicator of work completed every +sec_per_interval seconds. By default, 5 sec. +.PP +\fB-t threads\fP +Create +.I threads +software threads. Default is number of +logical processors available, or if '-b' option is used, +one thread per bound processor. +.PP +\fB-b bind_to_cpus\fP +Bind the threads to the indicated list of comma-separated CPU numbers. +A range of CPUs can be specified by using '-'. +.PP +\fB-i iterations\fP +Exit after +.I iterations +and print total of work completed. +Default is to continue running forever, printing work per iteration/sec. +.PP +\fB-m memory\fP +Allocate arrays of +.I memory_size, +which is followed by a modifier b|k|m, for bytes, kilobytes, or megabytes, +respectively. Default is 512 bytes, which will spin in-cache. +Increase this number to exercise larger caches and memory. + +.SH WHAT FSPIN IS NOT +Fspin is just a simple tool, +and has not be characterized as a +.I performance benchmark. +Fspin is not a +.I power virus for cooling HW design, +as there are better tools, specialized for that purpose. + +.PP +.SH AUTHORS +.nf +Written by Len Brown diff --git a/tools/power/fspin/fspin.c b/tools/power/fspin/fspin.c new file mode 100644 index 000000000000..38288c1d61bb --- /dev/null +++ b/tools/power/fspin/fspin.c @@ -0,0 +1,443 @@ +/* + * fspin.c - user utility to burn CPU cycles, thrash the cache and memory + * + * Copyright (c) 2013, Intel Corporation. + * Len Brown + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +/* + * Creates one thread per logical processor (override with -t). + * Threads run on any processor (override with -b). + * Each thread allocates and initializes its own data. + * Then it processes the data using an infinite DAXPY loop: + * Double precision Y[i] = A*X[i] + Y[i] + * + * The parent thread wakes up every reporting interval, + * (override 5 sec default with -i), + * sums up and prints aggregate performance. + * + * The actual computation is somewhat arbitrary, if not random. + * The performance number is intended only to be compared to itself + * on the same machine, to illustrate how various power limiting + * techniques impact performance. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BANNER "fspin v1.1, April 7, 2013 - Len Brown " + +#define handle_error_en(en, msg) \ + do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +struct thread_info { /* Used as argument to spin_loop() */ + pthread_t thread_id; /* ID returned by pthread_create() */ + int thread_num; /* Application-defined thread # */ +}; + +struct padded { + double counter; /* 8 bytes */ + double pad[(32 - 1)]; /* round up to 256 byte line */ +} *thread_data; + +int num_threads; +int thread_num_override; +int data_bytes = 512; +int nrcpus = 64; +int sec_per_interval = 5; /* seconds */ +int iterations; +int verbose; +int do_binding; + +cpu_set_t *cpu_affinity_set; +size_t cpu_affinity_setsize; + +void +allocate_cpusets() +{ + /* + * Allocate and initialize cpu_affinity_set + */ + cpu_affinity_set = CPU_ALLOC(nrcpus); + if (cpu_affinity_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_affinity_setsize = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); +} + +void +bind_to_cpus() +{ + if (!do_binding) + return; + + if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) { + fprintf(stderr, "bind_to_cpus() failed\n"); + perror("sched_setaffinity"); + exit(-1); + } +} + +int get_num_cpus() +{ + cpu_set_t *mask; + size_t size; + int num_cpus; + +realloc: + mask = CPU_ALLOC(nrcpus); + size = CPU_ALLOC_SIZE(nrcpus); + CPU_ZERO_S(size, mask); + if (sched_getaffinity(0, size, mask) == -1) { + CPU_FREE(mask); + if (errno == EINVAL && + nrcpus < (1024 << 8)) { + nrcpus = nrcpus << 2; + goto realloc; + } + perror("sched_getaffinity"); + return -1; + } + + num_cpus = CPU_COUNT_S(size, mask); + + CPU_FREE(mask); + + return num_cpus; +} + +static void *spin_loop(void *arg) +{ + struct thread_info *tinfo = (struct thread_info *)arg; + double *x, *y; + int i = 0; + int data_entries = data_bytes / sizeof(double); + unsigned long long bitmask = random(); + + + x = malloc(data_bytes); + y = malloc(data_bytes); + + if (x == NULL || y == NULL) { + perror("malloc"); + exit(-1); + } + + /* + * seed data array with random bits + */ + for (i = 0; i < data_entries; ++i) { + x[i] = 1.0 + i * bitmask; + y[i] = 1.0 + i * bitmask; + } + + for (i = 0; ; i++) { + + double a = 3.1415926535 * i; + + y[i] = a * x[i] + y[i]; /* DAXPY */ + + thread_data[tinfo->thread_num].counter++; + + if (i >= data_entries) + i = 0; + } + /* not reached */ +} + +void usage() +{ + fprintf(stderr, + "Usage: fspin [-v][-s sec_per_iteration][-i iterations][-t num_threads][-b cpu_list][-m memory(b|k|m)]\n"); + fprintf(stderr, "\twhere 'cpu_list' is comma and dash separated numbers with no spaces\n"); + exit(EXIT_FAILURE); +} + +void parse_error(char *string, char c) +{ + fprintf(stderr, "parse error on '%s' at '%c'\n", string, c); + usage(); +} + +int add_cpu_to_bind_mask(int cpu) { + static int num_added; + + /* check if cpu is valid */ + if (cpu < 0 || cpu > nrcpus) { + fprintf(stderr, "invalid cpu %d\n", cpu); + exit(1); + } + + if (CPU_ISSET_S(cpu, cpu_affinity_setsize, cpu_affinity_set)) { + fprintf(stderr, "can't bind to cpu %d more than once\n", cpu); + exit(1); + } + + /* add cpu to set */ + CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); + + if (verbose) + printf("%d, ", cpu); + + num_added += 1; + + return num_added; +} + + +int +parse_bind_cpu_list(char *cpu_list) +{ + char *p; + int range_next = -1; + int total_cpus_added = 0; + + allocate_cpusets(); + + for(p = cpu_list; *p != '\0'; ) { + int num, retval; + + /* remaining list must start w/ valid cpu number */ + + if (!isdigit(*p)) + parse_error(p, *p); + + retval = sscanf(p, "%u", &num); + if (retval == EOF) + usage(); + else if (retval == 0) + parse_error(p, *p); + + if (range_next >= 0) { + if (num <= range_next) /* range must be low to high */ + parse_error(p, *p); + + for ( ; range_next < num; range_next++) + total_cpus_added = add_cpu_to_bind_mask(range_next); + + range_next = -1; + } + + total_cpus_added = add_cpu_to_bind_mask(num); + + while (isdigit(*p)) + p++; + + switch (*p) { + case ',': + p++; + continue; + case '-': + range_next = num + 1; + p++; + continue; + } + + } + return total_cpus_added; +} + +int parse_memory_param(char *p) +{ + int bytes; + char units; + + if (2 != sscanf(p, "%d%c", &bytes, &units)) { + fprintf(stderr, "failed to parse -m\n"); + usage(); + } + switch (units) { + case 'b': + case 'B': + break; + case 'k': + case 'K': + bytes *= 1024; + break; + case 'm': + case 'M': + bytes *= 1024*1024; + break; + case 'g': + case 'G': + bytes *= 1024*1024*1024; + break; + default: + fprintf(stderr, "-m: bad memory units, use b, k, m, g\n"); + + } + return bytes; + +} + +void parse_args(int argc, char *argv[]) +{ + int opt; + + nrcpus = get_num_cpus(); + + while ((opt = getopt(argc, argv, "s:i:t:b:m:v")) != -1) { + switch (opt) { + case 's': + sec_per_interval = atoi(optarg); + if (verbose) + printf("sec_per_interval %d\n", sec_per_interval); + break; + case 'i': + iterations = atoi(optarg); + if (verbose) + printf("iterations %d\n", iterations); + break; + case 't': + thread_num_override = atoi(optarg); + if (verbose) + printf("Thread Count Override: %d\n", thread_num_override); + break; + case 'b': + do_binding = parse_bind_cpu_list(optarg); + if (verbose) + printf("Binding to %d CPUs.\n", do_binding); + break; + case 'm': + data_bytes = parse_memory_param(optarg); + if (verbose) + printf("Memory Override: %d\n", data_bytes); + break; + case 'v': + verbose++; + break; + default: /* '?' */ + usage(); /* does not return */ + } + } +} + +unsigned long long lsum_old; + + +struct thread_info *tinfo; +pthread_attr_t attr; + +void create_threads() +{ + int s, tnum; + + if (thread_num_override) + num_threads = thread_num_override; + else if (do_binding) + num_threads = do_binding; + else + num_threads = nrcpus; + + thread_data = calloc(num_threads, sizeof(struct padded)); + if (thread_data == NULL) + handle_error("calloc"); + + /* Initialize thread creation attributes */ + + s = pthread_attr_init(&attr); + if (s != 0) + handle_error_en(s, "pthread_attr_init"); + + /* Allocate memory for pthread_create() arguments */ + + tinfo = calloc(num_threads, sizeof(struct thread_info)); + if (tinfo == NULL) + handle_error("calloc"); + + for (tnum = 0; tnum < num_threads; tnum++) { + tinfo[tnum].thread_num = tnum; + + /* The pthread_create() call stores the thread ID into + * corresponding element of tinfo[] + */ + + s = pthread_create(&tinfo[tnum].thread_id, &attr, + &spin_loop, &tinfo[tnum]); + if (s != 0) + handle_error_en(s, "pthread_create"); + } + printf("%d threads created\n", num_threads); + return; +} + + +void monitor_threads() +{ + struct timespec ts; + struct timeval tv_old, tv_new, tv_delta; + int i, j; + double interval_float; + unsigned long long lsum; + + ts.tv_sec = sec_per_interval; + ts.tv_nsec = 0; + gettimeofday(&tv_old, (struct timezone *)NULL); + + for (i = 0; iterations ? i < iterations : 1 ; i++) { + + if (nanosleep(&ts, NULL) != 0) { + perror("nanosleep"); + exit(-1); + } + + for (j = 0, lsum = 0; j < num_threads; ++j) + lsum += thread_data[j].counter; + + gettimeofday(&tv_new, NULL); + timersub(&tv_new, &tv_old, &tv_delta); + + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + printf("%.2f\n", (lsum - lsum_old)/interval_float/1000000); + + tv_old = tv_new; + lsum_old = lsum; + } + /* summary */ + for (j = 0, lsum = 0; j < num_threads; ++j) { + printf("%d %.2f\n", j, thread_data[j].counter/1000000.0); + lsum += thread_data[j].counter; + } + printf("Total %.2f\n", lsum/1000000.0); + +} + + +void print_banner() +{ + puts(BANNER); +} + +int main(int argc, char *argv[]) +{ + parse_args(argc, argv); + + + print_banner(); + + bind_to_cpus(); + + create_threads(); + + monitor_threads(); /* never returns */ + + return 0; +} -- 2.39.5