From: Kent Overstreet Date: Fri, 26 Apr 2013 22:39:55 +0000 (-0700) Subject: bcache: Fix/revamp tracepoints X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=c37511b863f36c1cc6e18440717fd4cc0e881b8a;p=linux-beck.git bcache: Fix/revamp tracepoints The tracepoints were reworked to be more sensible, and fixed a null pointer deref in one of the tracepoints. Converted some of the pr_debug()s to tracepoints - this is partly a performance optimization; it used to be that with DEBUG or CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it was changed to an empty inline function. Some of the pr_debug() statements had rather expensive function calls as part of the arguments, so this code was getting run unnecessarily even on non debug kernels - in some fast paths, too. Signed-off-by: Kent Overstreet --- diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 38428f46ea74..b54b73b9b2b7 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -65,6 +65,7 @@ #include #include +#include #define MAX_IN_FLIGHT_DISCARDS 8U @@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca) break; } - pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu", - fifo_used(&ca->free), ca->free.size, - fifo_used(&ca->free_inc), ca->free_inc.size, - fifo_used(&ca->unused), ca->unused.size); + trace_bcache_alloc_invalidate(ca); } #define allocator_wait(ca, cond) \ @@ -473,9 +471,7 @@ again: return r; } - pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu", - atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free), - fifo_used(&ca->free_inc), fifo_used(&ca->unused)); + trace_bcache_alloc_fail(ca); if (cl) { closure_wait(&ca->set->bucket_wait, cl); diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index ad4957b52f10..59c15e09e4dd 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #include -#include #include #include #include @@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void) return local_clock() >> 10; } -#define MAX_BSETS 4U - #define BTREE_PRIO USHRT_MAX #define INITIAL_PRIO 32768 @@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k) atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); } -/* Blktrace macros */ - -#define blktrace_msg(c, fmt, ...) \ -do { \ - struct request_queue *q = bdev_get_queue(c->bdev); \ - if (q) \ - blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ -} while (0) - -#define blktrace_msg_all(s, fmt, ...) \ -do { \ - struct cache *_c; \ - unsigned i; \ - for_each_cache(_c, (s), i) \ - blktrace_msg(_c, fmt, ##__VA_ARGS__); \ -} while (0) - static inline void cached_dev_put(struct cached_dev *dc) { if (atomic_dec_and_test(&dc->count)) diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 57a9cff41546..ae115a253d73 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -1,6 +1,8 @@ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H +#include + /* * BKEYS: * @@ -142,6 +144,8 @@ /* Btree key comparison/iteration */ +#define MAX_BSETS 4U + struct btree_iter { size_t size, used; struct btree_iter_set { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index aaec186f7ba6..218d486259a3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b) struct closure cl; struct bio *bio; + trace_bcache_btree_read(b); + closure_init_stack(&cl); - pr_debug("%s", pbtree(b)); bio = bch_bbio_alloc(b->c); bio->bi_rw = REQ_META|READ_SYNC; @@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b) bch_bio_map(bio, b->sets[0].data); - trace_bcache_btree_read(bio); bch_submit_bbio(bio, b->c, &b->key, 0); closure_sync(&cl); @@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b) memcpy(page_address(bv->bv_page), base + j * PAGE_SIZE, PAGE_SIZE); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); continue_at(cl, btree_node_write_done, NULL); @@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); - trace_bcache_btree_write(b->bio); bch_submit_bbio(b->bio, b->c, &k.key, 0); closure_sync(cl); @@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; + trace_bcache_btree_write(b); + BUG_ON(current->bio_list); BUG_ON(b->written >= btree_blocks(b)); BUG_ON(b->written && !i->keys); BUG_ON(b->sets->data->seq != i->seq); + bch_check_key_order(b, i); cancel_delayed_work(&b->work); @@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) clear_bit(BTREE_NODE_dirty, &b->flags); change_bit(BTREE_NODE_write_idx, &b->flags); - bch_check_key_order(b, i); - do_btree_node_write(b); - pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys); - b->written += set_blocks(i, b->c); atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); @@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, int ret = -ENOMEM; struct btree *i; + trace_bcache_btree_cache_cannibalize(c); + if (!cl) return ERR_PTR(-ENOMEM); @@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k, return ERR_PTR(-EAGAIN); } - /* XXX: tracepoint */ c->try_harder = cl; c->try_harder_start = local_clock(); retry: @@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op) { unsigned i; + trace_bcache_btree_node_free(b); + /* * The BUG_ON() in btree_node_get() implies that we must have a write * lock on parent to free or even invalidate a node */ BUG_ON(op->lock <= b->level); BUG_ON(b == b->c->root); - pr_debug("bucket %s", pbtree(b)); if (btree_node_dirty(b)) btree_complete_write(b, btree_current_write(b)); @@ -1012,12 +1011,16 @@ retry: bch_bset_init_next(b); mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc(b); return b; err_free: bch_bucket_free(c, &k.key); __bkey_put(c, &k.key); err: mutex_unlock(&c->bucket_lock); + + trace_bcache_btree_node_alloc_fail(b); return b; } @@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op, btree_node_free(r->b, op); up_write(&r->b->lock); - pr_debug("coalesced %u nodes", nodes); + trace_bcache_btree_gc_coalesce(nodes); gc->nodes--; nodes--; @@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl) struct btree_op op; uint64_t start_time = local_clock(); - trace_bcache_gc_start(c->sb.set_uuid); - blktrace_msg_all(c, "Starting gc"); + trace_bcache_gc_start(c); memset(&stats, 0, sizeof(struct gc_stat)); closure_init_stack(&writes); @@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl) closure_sync(&writes); if (ret) { - blktrace_msg_all(c, "Stopped gc"); pr_warn("gc failed!"); - continue_at(cl, bch_btree_gc, bch_gc_wq); } @@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl) stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); - blktrace_msg_all(c, "Finished gc"); - trace_bcache_gc_end(c->sb.set_uuid); + trace_bcache_gc_end(c); continue_at(cl, bch_moving_gc, bch_gc_wq); } @@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op) split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; - pr_debug("%ssplitting at %s keys %i", split ? "" : "not ", - pbtree(b), n1->sets[0].data->keys); - if (split) { unsigned keys = 0; + trace_bcache_btree_node_split(b, n1->sets[0].data->keys); + n2 = bch_btree_node_alloc(b->c, b->level, &op->cl); if (IS_ERR(n2)) goto err_free1; @@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op) bch_keylist_add(&op->keys, &n2->key); bch_btree_node_write(n2, &op->cl); rw_unlock(true, n2); - } else + } else { + trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); + bch_btree_insert_keys(n1, op); + } bch_keylist_add(&op->keys, &n1->key); bch_btree_node_write(n1, &op->cl); @@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b) { unsigned i; + trace_bcache_btree_set_root(b); + BUG_ON(!b->written); for (i = 0; i < KEY_PTRS(&b->key); i++) @@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b) __bkey_put(b->c, &b->key); bch_journal_meta(b->c, NULL); - pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); } /* Cache lookup */ @@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op, n->bi_end_io = bch_cache_read_endio; n->bi_private = &s->cl; - trace_bcache_cache_hit(n); __bch_submit_bbio(n, b->c); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index d285cd49104c..0f6d69658b61 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -9,6 +9,8 @@ #include "bset.h" #include "debug.h" +#include + static void bch_bi_idx_hack_endio(struct bio *bio, int error) { struct bio *p = bio->bi_private; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 970d819d4350..5ca22149b749 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include + /* * Journal replay/recovery: * @@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list, for (k = i->j.start; k < end(&i->j); k = bkey_next(k)) { - pr_debug("%s", pkey(k)); + trace_bcache_journal_replay_key(k); + bkey_copy(op->keys.top, k); bch_keylist_push(&op->keys); @@ -712,7 +715,8 @@ void bch_journal(struct closure *cl) spin_lock(&c->journal.lock); if (journal_full(&c->journal)) { - /* XXX: tracepoint */ + trace_bcache_journal_full(c); + closure_wait(&c->journal.wait, cl); journal_reclaim(c); @@ -728,13 +732,15 @@ void bch_journal(struct closure *cl) if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS || b > c->journal.blocks_free) { - /* XXX: If we were inserting so many keys that they won't fit in + trace_bcache_journal_entry_full(c); + + /* + * XXX: If we were inserting so many keys that they won't fit in * an _empty_ journal write, we'll deadlock. For now, handle * this in bch_keylist_realloc() - but something to think about. */ BUG_ON(!w->data->keys); - /* XXX: tracepoint */ BUG_ON(!closure_wait(&w->wait, cl)); closure_flush(&c->journal.io); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 8589512c972e..04f6b97ffda6 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -9,6 +9,8 @@ #include "debug.h" #include "request.h" +#include + struct moving_io { struct keybuf_key *w; struct search s; @@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl) while (bv-- != bio->bi_io_vec) __free_page(bv->bv_page); - pr_debug("%s %s", io->s.op.insert_collision - ? "collision moving" : "moved", - pkey(&io->w->key)); + if (io->s.op.insert_collision) + trace_bcache_gc_copy_collision(&io->w->key); bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w); @@ -94,8 +95,6 @@ static void write_moving(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); if (!s->error) { - trace_bcache_write_moving(&io->bio.bio); - moving_init(io); io->bio.bio.bi_sector = KEY_START(&io->w->key); @@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl) struct moving_io *io = container_of(s, struct moving_io, s); struct bio *bio = &io->bio.bio; - trace_bcache_read_moving(bio); bch_submit_bbio(bio, s->op.c, &io->w->key, 0); continue_at(cl, write_moving, bch_gc_wq); @@ -162,7 +160,7 @@ static void read_moving(struct closure *cl) if (bch_bio_alloc_pages(bio, GFP_KERNEL)) goto err; - pr_debug("%s", pkey(&w->key)); + trace_bcache_gc_copy(&w->key); closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e5ff12e52d5b..695469958c1e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl) if (KEY_CSUM(k)) bio_csum(n, k); - pr_debug("%s", pkey(k)); + trace_bcache_cache_insert(k); bch_keylist_push(&op->keys); - trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev); n->bi_rw |= REQ_WRITE; bch_submit_bbio(n, op->c, k, 0); } while (n != bio); @@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl) int i; if (s->recoverable) { - /* The cache read failed, but we can retry from the backing - * device. - */ - pr_debug("recovering at sector %llu", - (uint64_t) s->orig_bio->bi_sector); + /* Retry from the backing device: */ + trace_bcache_read_retry(s->orig_bio); s->error = 0; bv = s->bio.bio.bi_io_vec; @@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl) /* XXX: invalidate cache */ - trace_bcache_read_retry(&s->bio.bio); closure_bio_submit(&s->bio.bio, &s->cl, s->d); } @@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); + trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip); if (s->error) continue_at_nobarrier(cl, request_read_error, bcache_wq); @@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; bio_get(s->op.cache_bio); - trace_bcache_cache_miss(s->orig_bio); closure_bio_submit(s->op.cache_bio, &s->cl, s->d); return ret; @@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s) if (should_writeback(dc, s->orig_bio)) s->writeback = true; + trace_bcache_write(s->orig_bio, s->writeback, s->op.skip); + if (!s->writeback) { s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, dc->disk.bio_split); - trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { s->op.cache_bio = bio; - trace_bcache_writeback(s->orig_bio); bch_writeback_add(dc, bio_sectors(bio)); } out: @@ -1058,7 +1053,6 @@ skip: s->op.skip = true; s->op.cache_bio = s->orig_bio; bio_get(s->op.cache_bio); - trace_bcache_write_skip(s->orig_bio); if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(dc->bdev))) @@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s) /* Cached devices - read & write stuff */ -int bch_get_congested(struct cache_set *c) +unsigned bch_get_congested(struct cache_set *c) { int i; + long rand; if (!c->congested_read_threshold_us && !c->congested_write_threshold_us) @@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c) i += CONGESTED_MAX; - return i <= 0 ? 1 : fract_exp_two(i, 6); + if (i > 0) + i = fract_exp_two(i, 6); + + rand = get_random_int(); + i -= bitmap_weight(&rand, BITS_PER_LONG); + + return i > 0 ? i : 1; } static void add_sequential(struct task_struct *t) @@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) { struct cache_set *c = s->op.c; struct bio *bio = &s->bio.bio; - - long rand; - int cutoff = bch_get_congested(c); unsigned mode = cache_mode(dc, bio); + unsigned sectors, congested = bch_get_congested(c); if (atomic_read(&dc->disk.detaching) || c->gc_stats.in_use > CUTOFF_CACHE_ADD || @@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s) goto skip; } - if (!cutoff) { - cutoff = dc->sequential_cutoff >> 9; + if (!congested && !dc->sequential_cutoff) + goto rescale; - if (!cutoff) - goto rescale; - - if (mode == CACHE_MODE_WRITEBACK && - (bio->bi_rw & REQ_WRITE) && - (bio->bi_rw & REQ_SYNC)) - goto rescale; - } + if (!congested && + mode == CACHE_MODE_WRITEBACK && + (bio->bi_rw & REQ_WRITE) && + (bio->bi_rw & REQ_SYNC)) + goto rescale; if (dc->sequential_merge) { struct io *i; @@ -1192,12 +1188,19 @@ found: add_sequential(s->task); } - rand = get_random_int(); - cutoff -= bitmap_weight(&rand, BITS_PER_LONG); + sectors = max(s->task->sequential_io, + s->task->sequential_io_avg) >> 9; - if (cutoff <= (int) (max(s->task->sequential_io, - s->task->sequential_io_avg) >> 9)) + if (dc->sequential_cutoff && + sectors >= dc->sequential_cutoff >> 9) { + trace_bcache_bypass_sequential(s->orig_bio); goto skip; + } + + if (congested && sectors >= congested) { + trace_bcache_bypass_congested(s->orig_bio); + goto skip; + } rescale: bch_rescale_priorities(c, bio_sectors(bio)); diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 254d9ab5707c..57dc4784f4f4 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -30,7 +30,7 @@ struct search { }; void bch_cache_read_endio(struct bio *, int); -int bch_get_congested(struct cache_set *); +unsigned bch_get_congested(struct cache_set *); void bch_insert_data(struct closure *cl); void bch_btree_insert_async(struct closure *); void bch_cache_read_endio(struct bio *, int); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e53f89988b08..47bc13745068 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -11,6 +11,7 @@ #include "debug.h" #include "request.h" +#include #include #include #include @@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca) pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free), fifo_used(&ca->free_inc), fifo_used(&ca->unused)); - blktrace_msg(ca, "Starting priorities: " buckets_free(ca)); for (i = prio_buckets(ca) - 1; i >= 0; --i) { long bucket; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 29228b8a6ffe..f5c2d8695230 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -10,6 +10,7 @@ #include "btree.h" #include "request.h" +#include #include static const char * const cache_replacement_policies[] = { diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c index 983f9bb411bc..7f4f38aa16ae 100644 --- a/drivers/md/bcache/trace.c +++ b/drivers/md/bcache/trace.c @@ -2,6 +2,7 @@ #include "btree.h" #include "request.h" +#include #include #define CREATE_TRACE_POINTS @@ -9,18 +10,42 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize); + EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write); -EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start); EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail); + +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback); +EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 577393e38c3a..e02780545f12 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -15,8 +15,6 @@ struct closure; -#include - #ifdef CONFIG_BCACHE_EDEBUG #define atomic_dec_bug(v) BUG_ON(atomic_dec_return(v) < 0) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 2714ed3991d1..82f6d4577be2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -10,6 +10,8 @@ #include "btree.h" #include "debug.h" +#include + static struct workqueue_struct *dirty_wq; static void read_dirty(struct closure *); @@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl) for (i = 0; i < KEY_PTRS(&w->key); i++) atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin); - pr_debug("clearing %s", pkey(&w->key)); bch_btree_insert(&op, dc->disk.c); closure_sync(&op.cl); + if (op.insert_collision) + trace_bcache_writeback_collision(&w->key); + atomic_long_inc(op.insert_collision ? &dc->disk.c->writeback_keys_failed : &dc->disk.c->writeback_keys_done); @@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl) io->bio.bi_bdev = io->dc->bdev; io->bio.bi_end_io = dirty_endio; - trace_bcache_write_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty_finish, dirty_wq); @@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - trace_bcache_read_dirty(&io->bio); closure_bio_submit(&io->bio, cl, &io->dc->disk); continue_at(cl, write_dirty, dirty_wq); @@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl) if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; - pr_debug("%s", pkey(&w->key)); + trace_bcache_writeback(&w->key); closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index 3cc5a0b278c3..c9952b36fcea 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -9,9 +9,7 @@ struct search; DECLARE_EVENT_CLASS(bcache_request, - TP_PROTO(struct search *s, struct bio *bio), - TP_ARGS(s, bio), TP_STRUCT__entry( @@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request, __field(dev_t, orig_sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request, __entry->orig_sector = bio->bi_sector - 16; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)", + TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - __entry->orig_major, __entry->orig_minor, + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->orig_major, __entry->orig_minor, (unsigned long long)__entry->orig_sector) ); -DEFINE_EVENT(bcache_request, bcache_request_start, +DECLARE_EVENT_CLASS(bkey, + TP_PROTO(struct bkey *k), + TP_ARGS(k), - TP_PROTO(struct search *s, struct bio *bio), + TP_STRUCT__entry( + __field(u32, size ) + __field(u32, inode ) + __field(u64, offset ) + __field(bool, dirty ) + ), - TP_ARGS(s, bio) + TP_fast_assign( + __entry->inode = KEY_INODE(k); + __entry->offset = KEY_OFFSET(k); + __entry->size = KEY_SIZE(k); + __entry->dirty = KEY_DIRTY(k); + ), + + TP_printk("%u:%llu len %u dirty %u", __entry->inode, + __entry->offset, __entry->size, __entry->dirty) ); -DEFINE_EVENT(bcache_request, bcache_request_end, +DECLARE_EVENT_CLASS(btree_node, + TP_PROTO(struct btree *b), + TP_ARGS(b), + + TP_STRUCT__entry( + __field(size_t, bucket ) + ), + + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + ), + TP_printk("bucket %zu", __entry->bucket) +); + +/* request.c */ + +DEFINE_EVENT(bcache_request, bcache_request_start, TP_PROTO(struct search *s, struct bio *bio), + TP_ARGS(s, bio) +); +DEFINE_EVENT(bcache_request, bcache_request_end, + TP_PROTO(struct search *s, struct bio *bio), TP_ARGS(s, bio) ); DECLARE_EVENT_CLASS(bcache_bio, - TP_PROTO(struct bio *bio), - TP_ARGS(bio), TP_STRUCT__entry( @@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio, __field(sector_t, sector ) __field(unsigned int, nr_sector ) __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) ), TP_fast_assign( @@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio, __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_printk("%d,%d %s %llu + %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, __entry->nr_sector) ); - -DEFINE_EVENT(bcache_bio, bcache_passthrough, - +DEFINE_EVENT(bcache_bio, bcache_bypass_sequential, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_cache_hit, - +DEFINE_EVENT(bcache_bio, bcache_bypass_congested, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_cache_miss, +TRACE_EVENT(bcache_read, + TP_PROTO(struct bio *bio, bool hit, bool bypass), + TP_ARGS(bio, hit, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, cache_hit ) + __field(bool, bypass ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->cache_hit = hit; + __entry->bypass = bypass; + ), + + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->cache_hit, __entry->bypass) ); -DEFINE_EVENT(bcache_bio, bcache_read_retry, +TRACE_EVENT(bcache_write, + TP_PROTO(struct bio *bio, bool writeback, bool bypass), + TP_ARGS(bio, writeback, bypass), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(sector_t, sector ) + __field(unsigned int, nr_sector ) + __array(char, rwbs, 6 ) + __field(bool, writeback ) + __field(bool, bypass ) + ), - TP_ARGS(bio) -); + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + __entry->writeback = writeback; + __entry->bypass = bypass; + ), -DEFINE_EVENT(bcache_bio, bcache_writethrough, + TP_printk("%d,%d %s %llu + %u hit %u bypass %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->writeback, __entry->bypass) +); +DEFINE_EVENT(bcache_bio, bcache_read_retry, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_writeback, +DEFINE_EVENT(bkey, bcache_cache_insert, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio), +/* Journal */ - TP_ARGS(bio) -); +DECLARE_EVENT_CLASS(cache_set, + TP_PROTO(struct cache_set *c), + TP_ARGS(c), -DEFINE_EVENT(bcache_bio, bcache_write_skip, + TP_STRUCT__entry( + __array(char, uuid, 16 ) + ), - TP_PROTO(struct bio *bio), + TP_fast_assign( + memcpy(__entry->uuid, c->sb.set_uuid, 16); + ), - TP_ARGS(bio) + TP_printk("%pU", __entry->uuid) ); -DEFINE_EVENT(bcache_bio, bcache_btree_read, - - TP_PROTO(struct bio *bio), +DEFINE_EVENT(bkey, bcache_journal_replay_key, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(bio) +DEFINE_EVENT(cache_set, bcache_journal_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); -DEFINE_EVENT(bcache_bio, bcache_btree_write, +DEFINE_EVENT(cache_set, bcache_journal_entry_full, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); +DEFINE_EVENT(bcache_bio, bcache_journal_write, TP_PROTO(struct bio *bio), - TP_ARGS(bio) ); -DEFINE_EVENT(bcache_bio, bcache_write_dirty, +/* Btree */ - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_read, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_read_dirty, +TRACE_EVENT(bcache_btree_write, + TP_PROTO(struct btree *b), + TP_ARGS(b), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(size_t, bucket ) + __field(unsigned, block ) + __field(unsigned, keys ) + ), - TP_ARGS(bio) + TP_fast_assign( + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->block = b->written; + __entry->keys = b->sets[b->nsets].data->keys; + ), + + TP_printk("bucket %zu", __entry->bucket) ); -DEFINE_EVENT(bcache_bio, bcache_write_moving, +DEFINE_EVENT(btree_node, bcache_btree_node_alloc, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_ARGS(bio) +DEFINE_EVENT(btree_node, bcache_btree_node_free, + TP_PROTO(struct btree *b), + TP_ARGS(b) ); -DEFINE_EVENT(bcache_bio, bcache_read_moving, +TRACE_EVENT(bcache_btree_gc_coalesce, + TP_PROTO(unsigned nodes), + TP_ARGS(nodes), - TP_PROTO(struct bio *bio), + TP_STRUCT__entry( + __field(unsigned, nodes ) + ), - TP_ARGS(bio) -); + TP_fast_assign( + __entry->nodes = nodes; + ), -DEFINE_EVENT(bcache_bio, bcache_journal_write, + TP_printk("coalesced %u nodes", __entry->nodes) +); - TP_PROTO(struct bio *bio), +DEFINE_EVENT(cache_set, bcache_gc_start, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) +); - TP_ARGS(bio) +DEFINE_EVENT(cache_set, bcache_gc_end, + TP_PROTO(struct cache_set *c), + TP_ARGS(c) ); -DECLARE_EVENT_CLASS(bcache_cache_bio, +DEFINE_EVENT(bkey, bcache_gc_copy, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device* orig_bdev), +DEFINE_EVENT(bkey, bcache_gc_copy_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(bio, orig_sector, orig_bdev), +DECLARE_EVENT_CLASS(btree_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys), TP_STRUCT__entry( - __field(dev_t, dev ) - __field(dev_t, orig_dev ) - __field(sector_t, sector ) - __field(sector_t, orig_sector ) - __field(unsigned int, nr_sector ) - __array(char, rwbs, 6 ) - __array(char, comm, TASK_COMM_LEN ) + __field(size_t, bucket ) + __field(unsigned, keys ) ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->orig_dev = orig_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->orig_sector = orig_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->bucket = PTR_BUCKET_NR(b->c, &b->key, 0); + __entry->keys = keys; ), - TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d %llu)", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm, - MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev), - (unsigned long long)__entry->orig_sector) + TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys) ); -DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert, - - TP_PROTO(struct bio *bio, - sector_t orig_sector, - struct block_device *orig_bdev), +DEFINE_EVENT(btree_split, bcache_btree_node_split, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) +); - TP_ARGS(bio, orig_sector, orig_bdev) +DEFINE_EVENT(btree_split, bcache_btree_node_compact, + TP_PROTO(struct btree *b, unsigned keys), + TP_ARGS(b, keys) ); -DECLARE_EVENT_CLASS(bcache_gc, +DEFINE_EVENT(btree_node, bcache_btree_set_root, + TP_PROTO(struct btree *b), + TP_ARGS(b) +); - TP_PROTO(uint8_t *uuid), +/* Allocator */ - TP_ARGS(uuid), +TRACE_EVENT(bcache_alloc_invalidate, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), TP_STRUCT__entry( - __field(uint8_t *, uuid) + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, free_inc_size ) + __field(unsigned, unused ) ), TP_fast_assign( - __entry->uuid = uuid; + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->free_inc_size = ca->free_inc.size; + __entry->unused = fifo_used(&ca->unused); ), - TP_printk("%pU", __entry->uuid) + TP_printk("free %u free_inc %u/%u unused %u", __entry->free, + __entry->free_inc, __entry->free_inc_size, __entry->unused) ); +TRACE_EVENT(bcache_alloc_fail, + TP_PROTO(struct cache *ca), + TP_ARGS(ca), -DEFINE_EVENT(bcache_gc, bcache_gc_start, + TP_STRUCT__entry( + __field(unsigned, free ) + __field(unsigned, free_inc ) + __field(unsigned, unused ) + __field(unsigned, blocked ) + ), - TP_PROTO(uint8_t *uuid), + TP_fast_assign( + __entry->free = fifo_used(&ca->free); + __entry->free_inc = fifo_used(&ca->free_inc); + __entry->unused = fifo_used(&ca->unused); + __entry->blocked = atomic_read(&ca->set->prio_blocked); + ), - TP_ARGS(uuid) + TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free, + __entry->free_inc, __entry->unused, __entry->blocked) ); -DEFINE_EVENT(bcache_gc, bcache_gc_end, +/* Background writeback */ - TP_PROTO(uint8_t *uuid), +DEFINE_EVENT(bkey, bcache_writeback, + TP_PROTO(struct bkey *k), + TP_ARGS(k) +); - TP_ARGS(uuid) +DEFINE_EVENT(bkey, bcache_writeback_collision, + TP_PROTO(struct bkey *k), + TP_ARGS(k) ); #endif /* _TRACE_BCACHE_H */