]> git.karo-electronics.de Git - karo-tx-linux.git/blob - lib/rhashtable.c
rhashtable: Per bucket locks & deferred expansion/shrinking
[karo-tx-linux.git] / lib / rhashtable.c
1 /*
2  * Resizable, Scalable, Concurrent Hash Table
3  *
4  * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
5  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6  *
7  * Based on the following paper:
8  * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
9  *
10  * Code partially derived from nft_hash
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #include <linux/kernel.h>
18 #include <linux/init.h>
19 #include <linux/log2.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/mm.h>
23 #include <linux/jhash.h>
24 #include <linux/random.h>
25 #include <linux/rhashtable.h>
26
27 #define HASH_DEFAULT_SIZE       64UL
28 #define HASH_MIN_SIZE           4UL
29 #define BUCKET_LOCKS_PER_CPU   128UL
30
31 enum {
32         RHT_LOCK_NORMAL,
33         RHT_LOCK_NESTED,
34         RHT_LOCK_NESTED2,
35 };
36
37 /* The bucket lock is selected based on the hash and protects mutations
38  * on a group of hash buckets.
39  *
40  * IMPORTANT: When holding the bucket lock of both the old and new table
41  * during expansions and shrinking, the old bucket lock must always be
42  * acquired first.
43  */
44 static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
45 {
46         return &tbl->locks[hash & tbl->locks_mask];
47 }
48
49 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
50 #define ASSERT_BUCKET_LOCK(TBL, HASH) \
51         BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
52
53 #ifdef CONFIG_PROVE_LOCKING
54 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
55 {
56         return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
57 }
58 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
59
60 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
61 {
62         spinlock_t *lock = bucket_lock(tbl, hash);
63
64         return (debug_locks) ? lockdep_is_held(lock) : 1;
65 }
66 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
67 #endif
68
69 static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
70 {
71         return (void *) he - ht->p.head_offset;
72 }
73
74 static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
75 {
76         return hash & (tbl->size - 1);
77 }
78
79 static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
80 {
81         u32 hash;
82
83         if (unlikely(!ht->p.key_len))
84                 hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
85         else
86                 hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
87                                     ht->p.hash_rnd);
88
89         return hash;
90 }
91
92 static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
93 {
94         struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
95         u32 hash;
96
97         hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
98
99         return rht_bucket_index(tbl, hash);
100 }
101
102 static u32 head_hashfn(const struct rhashtable *ht,
103                        const struct bucket_table *tbl,
104                        const struct rhash_head *he)
105 {
106         return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
107 }
108
109 static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
110 {
111         struct rhash_head __rcu **pprev;
112
113         for (pprev = &tbl->buckets[n];
114              rht_dereference_bucket(*pprev, tbl, n);
115              pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
116                 ;
117
118         return pprev;
119 }
120
121 static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
122 {
123         unsigned int i, size;
124 #if defined(CONFIG_PROVE_LOCKING)
125         unsigned int nr_pcpus = 2;
126 #else
127         unsigned int nr_pcpus = num_possible_cpus();
128 #endif
129
130         nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
131         size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
132
133         /* Never allocate more than one lock per bucket */
134         size = min_t(unsigned int, size, tbl->size);
135
136         if (sizeof(spinlock_t) != 0) {
137 #ifdef CONFIG_NUMA
138                 if (size * sizeof(spinlock_t) > PAGE_SIZE)
139                         tbl->locks = vmalloc(size * sizeof(spinlock_t));
140                 else
141 #endif
142                 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
143                                            GFP_KERNEL);
144                 if (!tbl->locks)
145                         return -ENOMEM;
146                 for (i = 0; i < size; i++)
147                         spin_lock_init(&tbl->locks[i]);
148         }
149         tbl->locks_mask = size - 1;
150
151         return 0;
152 }
153
154 static void bucket_table_free(const struct bucket_table *tbl)
155 {
156         if (tbl)
157                 kvfree(tbl->locks);
158
159         kvfree(tbl);
160 }
161
162 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
163                                                size_t nbuckets)
164 {
165         struct bucket_table *tbl;
166         size_t size;
167
168         size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
169         tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
170         if (tbl == NULL)
171                 tbl = vzalloc(size);
172
173         if (tbl == NULL)
174                 return NULL;
175
176         tbl->size = nbuckets;
177
178         if (alloc_bucket_locks(ht, tbl) < 0) {
179                 bucket_table_free(tbl);
180                 return NULL;
181         }
182
183         return tbl;
184 }
185
186 /**
187  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
188  * @ht:         hash table
189  * @new_size:   new table size
190  */
191 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
192 {
193         /* Expand table when exceeding 75% load */
194         return atomic_read(&ht->nelems) > (new_size / 4 * 3);
195 }
196 EXPORT_SYMBOL_GPL(rht_grow_above_75);
197
198 /**
199  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
200  * @ht:         hash table
201  * @new_size:   new table size
202  */
203 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
204 {
205         /* Shrink table beneath 30% load */
206         return atomic_read(&ht->nelems) < (new_size * 3 / 10);
207 }
208 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
209
210 static void hashtable_chain_unzip(const struct rhashtable *ht,
211                                   const struct bucket_table *new_tbl,
212                                   struct bucket_table *old_tbl,
213                                   size_t old_hash)
214 {
215         struct rhash_head *he, *p, *next;
216         spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
217         unsigned int new_hash, new_hash2;
218
219         ASSERT_BUCKET_LOCK(old_tbl, old_hash);
220
221         /* Old bucket empty, no work needed. */
222         p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
223                                    old_hash);
224         if (!p)
225                 return;
226
227         new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
228         new_bucket_lock = bucket_lock(new_tbl, new_hash);
229
230         /* Advance the old bucket pointer one or more times until it
231          * reaches a node that doesn't hash to the same bucket as the
232          * previous node p. Call the previous node p;
233          */
234         rht_for_each_continue(he, p->next, old_tbl, old_hash) {
235                 new_hash2 = head_hashfn(ht, new_tbl, he);
236                 if (new_hash != new_hash2)
237                         break;
238                 p = he;
239         }
240         rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
241
242         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
243
244         /* If we have encountered an entry that maps to a different bucket in
245          * the new table, lock down that bucket as well as we might cut off
246          * the end of the chain.
247          */
248         new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
249         if (new_bucket_lock != new_bucket_lock2)
250                 spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
251
252         /* Find the subsequent node which does hash to the same
253          * bucket as node P, or NULL if no such node exists.
254          */
255         next = NULL;
256         if (he) {
257                 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
258                         if (head_hashfn(ht, new_tbl, he) == new_hash) {
259                                 next = he;
260                                 break;
261                         }
262                 }
263         }
264
265         /* Set p's next pointer to that subsequent node pointer,
266          * bypassing the nodes which do not hash to p's bucket
267          */
268         rcu_assign_pointer(p->next, next);
269
270         if (new_bucket_lock != new_bucket_lock2)
271                 spin_unlock_bh(new_bucket_lock2);
272         spin_unlock_bh(new_bucket_lock);
273 }
274
275 static void link_old_to_new(struct bucket_table *new_tbl,
276                             unsigned int new_hash, struct rhash_head *entry)
277 {
278         spinlock_t *new_bucket_lock;
279
280         new_bucket_lock = bucket_lock(new_tbl, new_hash);
281
282         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
283         rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
284         spin_unlock_bh(new_bucket_lock);
285 }
286
287 /**
288  * rhashtable_expand - Expand hash table while allowing concurrent lookups
289  * @ht:         the hash table to expand
290  *
291  * A secondary bucket array is allocated and the hash entries are migrated
292  * while keeping them on both lists until the end of the RCU grace period.
293  *
294  * This function may only be called in a context where it is safe to call
295  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
296  *
297  * The caller must ensure that no concurrent resizing occurs by holding
298  * ht->mutex.
299  *
300  * It is valid to have concurrent insertions and deletions protected by per
301  * bucket locks or concurrent RCU protected lookups and traversals.
302  */
303 int rhashtable_expand(struct rhashtable *ht)
304 {
305         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
306         struct rhash_head *he;
307         spinlock_t *old_bucket_lock;
308         unsigned int new_hash, old_hash;
309         bool complete = false;
310
311         ASSERT_RHT_MUTEX(ht);
312
313         if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
314                 return 0;
315
316         new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
317         if (new_tbl == NULL)
318                 return -ENOMEM;
319
320         ht->shift++;
321
322         /* Make insertions go into the new, empty table right away. Deletions
323          * and lookups will be attempted in both tables until we synchronize.
324          * The synchronize_rcu() guarantees for the new table to be picked up
325          * so no new additions go into the old table while we relink.
326          */
327         rcu_assign_pointer(ht->future_tbl, new_tbl);
328         synchronize_rcu();
329
330         /* For each new bucket, search the corresponding old bucket for the
331          * first entry that hashes to the new bucket, and link the end of
332          * newly formed bucket chain (containing entries added to future
333          * table) to that entry. Since all the entries which will end up in
334          * the new bucket appear in the same old bucket, this constructs an
335          * entirely valid new hash table, but with multiple buckets
336          * "zipped" together into a single imprecise chain.
337          */
338         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
339                 old_hash = rht_bucket_index(old_tbl, new_hash);
340                 old_bucket_lock = bucket_lock(old_tbl, old_hash);
341
342                 spin_lock_bh(old_bucket_lock);
343                 rht_for_each(he, old_tbl, old_hash) {
344                         if (head_hashfn(ht, new_tbl, he) == new_hash) {
345                                 link_old_to_new(new_tbl, new_hash, he);
346                                 break;
347                         }
348                 }
349                 spin_unlock_bh(old_bucket_lock);
350         }
351
352         /* Publish the new table pointer. Lookups may now traverse
353          * the new table, but they will not benefit from any
354          * additional efficiency until later steps unzip the buckets.
355          */
356         rcu_assign_pointer(ht->tbl, new_tbl);
357
358         /* Unzip interleaved hash chains */
359         while (!complete && !ht->being_destroyed) {
360                 /* Wait for readers. All new readers will see the new
361                  * table, and thus no references to the old table will
362                  * remain.
363                  */
364                 synchronize_rcu();
365
366                 /* For each bucket in the old table (each of which
367                  * contains items from multiple buckets of the new
368                  * table): ...
369                  */
370                 complete = true;
371                 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
372                         old_bucket_lock = bucket_lock(old_tbl, old_hash);
373                         spin_lock_bh(old_bucket_lock);
374
375                         hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
376                         if (old_tbl->buckets[old_hash] != NULL)
377                                 complete = false;
378
379                         spin_unlock_bh(old_bucket_lock);
380                 }
381         }
382
383         bucket_table_free(old_tbl);
384         return 0;
385 }
386 EXPORT_SYMBOL_GPL(rhashtable_expand);
387
388 /**
389  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
390  * @ht:         the hash table to shrink
391  *
392  * This function may only be called in a context where it is safe to call
393  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
394  *
395  * The caller must ensure that no concurrent resizing occurs by holding
396  * ht->mutex.
397  *
398  * The caller must ensure that no concurrent table mutations take place.
399  * It is however valid to have concurrent lookups if they are RCU protected.
400  *
401  * It is valid to have concurrent insertions and deletions protected by per
402  * bucket locks or concurrent RCU protected lookups and traversals.
403  */
404 int rhashtable_shrink(struct rhashtable *ht)
405 {
406         struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
407         spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
408         unsigned int new_hash;
409
410         ASSERT_RHT_MUTEX(ht);
411
412         if (ht->shift <= ht->p.min_shift)
413                 return 0;
414
415         new_tbl = bucket_table_alloc(ht, tbl->size / 2);
416         if (new_tbl == NULL)
417                 return -ENOMEM;
418
419         rcu_assign_pointer(ht->future_tbl, new_tbl);
420         synchronize_rcu();
421
422         /* Link the first entry in the old bucket to the end of the
423          * bucket in the new table. As entries are concurrently being
424          * added to the new table, lock down the new bucket. As we
425          * always divide the size in half when shrinking, each bucket
426          * in the new table maps to exactly two buckets in the old
427          * table.
428          *
429          * As removals can occur concurrently on the old table, we need
430          * to lock down both matching buckets in the old table.
431          */
432         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
433                 old_bucket_lock1 = bucket_lock(tbl, new_hash);
434                 old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
435                 new_bucket_lock = bucket_lock(new_tbl, new_hash);
436
437                 spin_lock_bh(old_bucket_lock1);
438                 spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
439                 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
440
441                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
442                                    tbl->buckets[new_hash]);
443                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
444                                    tbl->buckets[new_hash + new_tbl->size]);
445
446                 spin_unlock_bh(new_bucket_lock);
447                 spin_unlock_bh(old_bucket_lock2);
448                 spin_unlock_bh(old_bucket_lock1);
449         }
450
451         /* Publish the new, valid hash table */
452         rcu_assign_pointer(ht->tbl, new_tbl);
453         ht->shift--;
454
455         /* Wait for readers. No new readers will have references to the
456          * old hash table.
457          */
458         synchronize_rcu();
459
460         bucket_table_free(tbl);
461
462         return 0;
463 }
464 EXPORT_SYMBOL_GPL(rhashtable_shrink);
465
466 static void rht_deferred_worker(struct work_struct *work)
467 {
468         struct rhashtable *ht;
469         struct bucket_table *tbl;
470
471         ht = container_of(work, struct rhashtable, run_work.work);
472         mutex_lock(&ht->mutex);
473         tbl = rht_dereference(ht->tbl, ht);
474
475         if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
476                 rhashtable_expand(ht);
477         else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
478                 rhashtable_shrink(ht);
479
480         mutex_unlock(&ht->mutex);
481 }
482
483 /**
484  * rhashtable_insert - insert object into hash hash table
485  * @ht:         hash table
486  * @obj:        pointer to hash head inside object
487  *
488  * Will take a per bucket spinlock to protect against mutual mutations
489  * on the same bucket. Multiple insertions may occur in parallel unless
490  * they map to the same bucket lock.
491  *
492  * It is safe to call this function from atomic context.
493  *
494  * Will trigger an automatic deferred table resizing if the size grows
495  * beyond the watermark indicated by grow_decision() which can be passed
496  * to rhashtable_init().
497  */
498 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
499 {
500         struct bucket_table *tbl;
501         spinlock_t *lock;
502         unsigned hash;
503
504         rcu_read_lock();
505
506         tbl = rht_dereference_rcu(ht->future_tbl, ht);
507         hash = head_hashfn(ht, tbl, obj);
508         lock = bucket_lock(tbl, hash);
509
510         spin_lock_bh(lock);
511         RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
512         rcu_assign_pointer(tbl->buckets[hash], obj);
513         spin_unlock_bh(lock);
514
515         atomic_inc(&ht->nelems);
516
517         /* Only grow the table if no resizing is currently in progress. */
518         if (ht->tbl != ht->future_tbl &&
519             ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
520                 schedule_delayed_work(&ht->run_work, 0);
521
522         rcu_read_unlock();
523 }
524 EXPORT_SYMBOL_GPL(rhashtable_insert);
525
526 /**
527  * rhashtable_remove - remove object from hash table
528  * @ht:         hash table
529  * @obj:        pointer to hash head inside object
530  *
531  * Since the hash chain is single linked, the removal operation needs to
532  * walk the bucket chain upon removal. The removal operation is thus
533  * considerable slow if the hash table is not correctly sized.
534  *
535  * Will automatically shrink the table via rhashtable_expand() if the the
536  * shrink_decision function specified at rhashtable_init() returns true.
537  *
538  * The caller must ensure that no concurrent table mutations occur. It is
539  * however valid to have concurrent lookups if they are RCU protected.
540  */
541 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
542 {
543         struct bucket_table *tbl;
544         struct rhash_head __rcu **pprev;
545         struct rhash_head *he;
546         spinlock_t *lock;
547         unsigned int hash;
548
549         rcu_read_lock();
550         tbl = rht_dereference_rcu(ht->tbl, ht);
551         hash = head_hashfn(ht, tbl, obj);
552
553         lock = bucket_lock(tbl, hash);
554         spin_lock_bh(lock);
555
556 restart:
557         pprev = &tbl->buckets[hash];
558         rht_for_each(he, tbl, hash) {
559                 if (he != obj) {
560                         pprev = &he->next;
561                         continue;
562                 }
563
564                 rcu_assign_pointer(*pprev, obj->next);
565                 atomic_dec(&ht->nelems);
566
567                 spin_unlock_bh(lock);
568
569                 if (ht->tbl != ht->future_tbl &&
570                     ht->p.shrink_decision &&
571                     ht->p.shrink_decision(ht, tbl->size))
572                         schedule_delayed_work(&ht->run_work, 0);
573
574                 rcu_read_unlock();
575
576                 return true;
577         }
578
579         if (tbl != rht_dereference_rcu(ht->tbl, ht)) {
580                 spin_unlock_bh(lock);
581
582                 tbl = rht_dereference_rcu(ht->tbl, ht);
583                 hash = head_hashfn(ht, tbl, obj);
584
585                 lock = bucket_lock(tbl, hash);
586                 spin_lock_bh(lock);
587                 goto restart;
588         }
589
590         spin_unlock_bh(lock);
591         rcu_read_unlock();
592
593         return false;
594 }
595 EXPORT_SYMBOL_GPL(rhashtable_remove);
596
597 /**
598  * rhashtable_lookup - lookup key in hash table
599  * @ht:         hash table
600  * @key:        pointer to key
601  *
602  * Computes the hash value for the key and traverses the bucket chain looking
603  * for a entry with an identical key. The first matching entry is returned.
604  *
605  * This lookup function may only be used for fixed key hash table (key_len
606  * paramter set). It will BUG() if used inappropriately.
607  *
608  * Lookups may occur in parallel with hashtable mutations and resizing.
609  */
610 void *rhashtable_lookup(struct rhashtable *ht, const void *key)
611 {
612         const struct bucket_table *tbl, *old_tbl;
613         struct rhash_head *he;
614         u32 hash;
615
616         BUG_ON(!ht->p.key_len);
617
618         rcu_read_lock();
619         old_tbl = rht_dereference_rcu(ht->tbl, ht);
620         tbl = rht_dereference_rcu(ht->future_tbl, ht);
621         hash = key_hashfn(ht, key, ht->p.key_len);
622 restart:
623         rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
624                 if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
625                            ht->p.key_len))
626                         continue;
627                 rcu_read_unlock();
628                 return rht_obj(ht, he);
629         }
630
631         if (unlikely(tbl != old_tbl)) {
632                 tbl = old_tbl;
633                 goto restart;
634         }
635
636         rcu_read_unlock();
637         return NULL;
638 }
639 EXPORT_SYMBOL_GPL(rhashtable_lookup);
640
641 /**
642  * rhashtable_lookup_compare - search hash table with compare function
643  * @ht:         hash table
644  * @key:        the pointer to the key
645  * @compare:    compare function, must return true on match
646  * @arg:        argument passed on to compare function
647  *
648  * Traverses the bucket chain behind the provided hash value and calls the
649  * specified compare function for each entry.
650  *
651  * Lookups may occur in parallel with hashtable mutations and resizing.
652  *
653  * Returns the first entry on which the compare function returned true.
654  */
655 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
656                                 bool (*compare)(void *, void *), void *arg)
657 {
658         const struct bucket_table *tbl, *old_tbl;
659         struct rhash_head *he;
660         u32 hash;
661
662         rcu_read_lock();
663
664         old_tbl = rht_dereference_rcu(ht->tbl, ht);
665         tbl = rht_dereference_rcu(ht->future_tbl, ht);
666         hash = key_hashfn(ht, key, ht->p.key_len);
667 restart:
668         rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
669                 if (!compare(rht_obj(ht, he), arg))
670                         continue;
671                 rcu_read_unlock();
672                 return rht_obj(ht, he);
673         }
674
675         if (unlikely(tbl != old_tbl)) {
676                 tbl = old_tbl;
677                 goto restart;
678         }
679         rcu_read_unlock();
680
681         return NULL;
682 }
683 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
684
685 static size_t rounded_hashtable_size(struct rhashtable_params *params)
686 {
687         return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
688                    1UL << params->min_shift);
689 }
690
691 /**
692  * rhashtable_init - initialize a new hash table
693  * @ht:         hash table to be initialized
694  * @params:     configuration parameters
695  *
696  * Initializes a new hash table based on the provided configuration
697  * parameters. A table can be configured either with a variable or
698  * fixed length key:
699  *
700  * Configuration Example 1: Fixed length keys
701  * struct test_obj {
702  *      int                     key;
703  *      void *                  my_member;
704  *      struct rhash_head       node;
705  * };
706  *
707  * struct rhashtable_params params = {
708  *      .head_offset = offsetof(struct test_obj, node),
709  *      .key_offset = offsetof(struct test_obj, key),
710  *      .key_len = sizeof(int),
711  *      .hashfn = jhash,
712  * };
713  *
714  * Configuration Example 2: Variable length keys
715  * struct test_obj {
716  *      [...]
717  *      struct rhash_head       node;
718  * };
719  *
720  * u32 my_hash_fn(const void *data, u32 seed)
721  * {
722  *      struct test_obj *obj = data;
723  *
724  *      return [... hash ...];
725  * }
726  *
727  * struct rhashtable_params params = {
728  *      .head_offset = offsetof(struct test_obj, node),
729  *      .hashfn = jhash,
730  *      .obj_hashfn = my_hash_fn,
731  * };
732  */
733 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
734 {
735         struct bucket_table *tbl;
736         size_t size;
737
738         size = HASH_DEFAULT_SIZE;
739
740         if ((params->key_len && !params->hashfn) ||
741             (!params->key_len && !params->obj_hashfn))
742                 return -EINVAL;
743
744         params->min_shift = max_t(size_t, params->min_shift,
745                                   ilog2(HASH_MIN_SIZE));
746
747         if (params->nelem_hint)
748                 size = rounded_hashtable_size(params);
749
750         memset(ht, 0, sizeof(*ht));
751         mutex_init(&ht->mutex);
752         memcpy(&ht->p, params, sizeof(*params));
753
754         if (params->locks_mul)
755                 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
756         else
757                 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
758
759         tbl = bucket_table_alloc(ht, size);
760         if (tbl == NULL)
761                 return -ENOMEM;
762
763         ht->shift = ilog2(tbl->size);
764         RCU_INIT_POINTER(ht->tbl, tbl);
765         RCU_INIT_POINTER(ht->future_tbl, tbl);
766
767         if (!ht->p.hash_rnd)
768                 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
769
770         if (ht->p.grow_decision || ht->p.shrink_decision)
771                 INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
772
773         return 0;
774 }
775 EXPORT_SYMBOL_GPL(rhashtable_init);
776
777 /**
778  * rhashtable_destroy - destroy hash table
779  * @ht:         the hash table to destroy
780  *
781  * Frees the bucket array. This function is not rcu safe, therefore the caller
782  * has to make sure that no resizing may happen by unpublishing the hashtable
783  * and waiting for the quiescent cycle before releasing the bucket array.
784  */
785 void rhashtable_destroy(struct rhashtable *ht)
786 {
787         ht->being_destroyed = true;
788
789         mutex_lock(&ht->mutex);
790
791         cancel_delayed_work(&ht->run_work);
792         bucket_table_free(rht_dereference(ht->tbl, ht));
793
794         mutex_unlock(&ht->mutex);
795 }
796 EXPORT_SYMBOL_GPL(rhashtable_destroy);
797
798 /**************************************************************************
799  * Self Test
800  **************************************************************************/
801
802 #ifdef CONFIG_TEST_RHASHTABLE
803
804 #define TEST_HT_SIZE    8
805 #define TEST_ENTRIES    2048
806 #define TEST_PTR        ((void *) 0xdeadbeef)
807 #define TEST_NEXPANDS   4
808
809 struct test_obj {
810         void                    *ptr;
811         int                     value;
812         struct rhash_head       node;
813 };
814
815 static int __init test_rht_lookup(struct rhashtable *ht)
816 {
817         unsigned int i;
818
819         for (i = 0; i < TEST_ENTRIES * 2; i++) {
820                 struct test_obj *obj;
821                 bool expected = !(i % 2);
822                 u32 key = i;
823
824                 obj = rhashtable_lookup(ht, &key);
825
826                 if (expected && !obj) {
827                         pr_warn("Test failed: Could not find key %u\n", key);
828                         return -ENOENT;
829                 } else if (!expected && obj) {
830                         pr_warn("Test failed: Unexpected entry found for key %u\n",
831                                 key);
832                         return -EEXIST;
833                 } else if (expected && obj) {
834                         if (obj->ptr != TEST_PTR || obj->value != i) {
835                                 pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n",
836                                         obj->ptr, TEST_PTR, obj->value, i);
837                                 return -EINVAL;
838                         }
839                 }
840         }
841
842         return 0;
843 }
844
845 static void test_bucket_stats(struct rhashtable *ht, bool quiet)
846 {
847         unsigned int cnt, rcu_cnt, i, total = 0;
848         struct rhash_head *pos;
849         struct test_obj *obj;
850         struct bucket_table *tbl;
851
852         tbl = rht_dereference_rcu(ht->tbl, ht);
853         for (i = 0; i < tbl->size; i++) {
854                 rcu_cnt = cnt = 0;
855
856                 if (!quiet)
857                         pr_info(" [%#4x/%zu]", i, tbl->size);
858
859                 rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
860                         cnt++;
861                         total++;
862                         if (!quiet)
863                                 pr_cont(" [%p],", obj);
864                 }
865
866                 rht_for_each_entry_rcu(obj, pos, tbl, i, node)
867                         rcu_cnt++;
868
869                 if (rcu_cnt != cnt)
870                         pr_warn("Test failed: Chain count mismach %d != %d",
871                                 cnt, rcu_cnt);
872
873                 if (!quiet)
874                         pr_cont("\n  [%#x] first element: %p, chain length: %u\n",
875                                 i, tbl->buckets[i], cnt);
876         }
877
878         pr_info("  Traversal complete: counted=%u, nelems=%u, entries=%d\n",
879                 total, atomic_read(&ht->nelems), TEST_ENTRIES);
880
881         if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES)
882                 pr_warn("Test failed: Total count mismatch ^^^");
883 }
884
885 static int __init test_rhashtable(struct rhashtable *ht)
886 {
887         struct bucket_table *tbl;
888         struct test_obj *obj;
889         struct rhash_head *pos, *next;
890         int err;
891         unsigned int i;
892
893         /*
894          * Insertion Test:
895          * Insert TEST_ENTRIES into table with all keys even numbers
896          */
897         pr_info("  Adding %d keys\n", TEST_ENTRIES);
898         for (i = 0; i < TEST_ENTRIES; i++) {
899                 struct test_obj *obj;
900
901                 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
902                 if (!obj) {
903                         err = -ENOMEM;
904                         goto error;
905                 }
906
907                 obj->ptr = TEST_PTR;
908                 obj->value = i * 2;
909
910                 rhashtable_insert(ht, &obj->node);
911         }
912
913         rcu_read_lock();
914         test_bucket_stats(ht, true);
915         test_rht_lookup(ht);
916         rcu_read_unlock();
917
918         for (i = 0; i < TEST_NEXPANDS; i++) {
919                 pr_info("  Table expansion iteration %u...\n", i);
920                 mutex_lock(&ht->mutex);
921                 rhashtable_expand(ht);
922                 mutex_unlock(&ht->mutex);
923
924                 rcu_read_lock();
925                 pr_info("  Verifying lookups...\n");
926                 test_rht_lookup(ht);
927                 rcu_read_unlock();
928         }
929
930         for (i = 0; i < TEST_NEXPANDS; i++) {
931                 pr_info("  Table shrinkage iteration %u...\n", i);
932                 mutex_lock(&ht->mutex);
933                 rhashtable_shrink(ht);
934                 mutex_unlock(&ht->mutex);
935
936                 rcu_read_lock();
937                 pr_info("  Verifying lookups...\n");
938                 test_rht_lookup(ht);
939                 rcu_read_unlock();
940         }
941
942         rcu_read_lock();
943         test_bucket_stats(ht, true);
944         rcu_read_unlock();
945
946         pr_info("  Deleting %d keys\n", TEST_ENTRIES);
947         for (i = 0; i < TEST_ENTRIES; i++) {
948                 u32 key = i * 2;
949
950                 obj = rhashtable_lookup(ht, &key);
951                 BUG_ON(!obj);
952
953                 rhashtable_remove(ht, &obj->node);
954                 kfree(obj);
955         }
956
957         return 0;
958
959 error:
960         tbl = rht_dereference_rcu(ht->tbl, ht);
961         for (i = 0; i < tbl->size; i++)
962                 rht_for_each_entry_safe(obj, pos, next, tbl, i, node)
963                         kfree(obj);
964
965         return err;
966 }
967
968 static int __init test_rht_init(void)
969 {
970         struct rhashtable ht;
971         struct rhashtable_params params = {
972                 .nelem_hint = TEST_HT_SIZE,
973                 .head_offset = offsetof(struct test_obj, node),
974                 .key_offset = offsetof(struct test_obj, value),
975                 .key_len = sizeof(int),
976                 .hashfn = jhash,
977                 .grow_decision = rht_grow_above_75,
978                 .shrink_decision = rht_shrink_below_30,
979         };
980         int err;
981
982         pr_info("Running resizable hashtable tests...\n");
983
984         err = rhashtable_init(&ht, &params);
985         if (err < 0) {
986                 pr_warn("Test failed: Unable to initialize hashtable: %d\n",
987                         err);
988                 return err;
989         }
990
991         err = test_rhashtable(&ht);
992
993         rhashtable_destroy(&ht);
994
995         return err;
996 }
997
998 subsys_initcall(test_rht_init);
999
1000 #endif /* CONFIG_TEST_RHASHTABLE */