]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
60ab2eaf668345e7b2d7b97e7723e35ad79be949
[karo-tx-linux.git] / drivers / staging / lustre / lustre / ptlrpc / sec_bulk.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/sec_bulk.c
37  *
38  * Author: Eric Mei <ericm@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_SEC
42
43 #include <linux/libcfs/libcfs.h>
44 #include <linux/crypto.h>
45
46 #include <obd.h>
47 #include <obd_cksum.h>
48 #include <obd_class.h>
49 #include <obd_support.h>
50 #include <lustre_net.h>
51 #include <lustre_import.h>
52 #include <lustre_dlm.h>
53 #include <lustre_sec.h>
54
55 #include "ptlrpc_internal.h"
56
57 /****************************************
58  * bulk encryption page pools      *
59  ****************************************/
60
61
62 #define PTRS_PER_PAGE   (PAGE_CACHE_SIZE / sizeof(void *))
63 #define PAGES_PER_POOL  (PTRS_PER_PAGE)
64
65 #define IDLE_IDX_MAX        (100)
66 #define IDLE_IDX_WEIGHT  (3)
67
68 #define CACHE_QUIESCENT_PERIOD  (20)
69
70 static struct ptlrpc_enc_page_pool {
71         /*
72          * constants
73          */
74         unsigned long    epp_max_pages;   /* maximum pages can hold, const */
75         unsigned int     epp_max_pools;   /* number of pools, const */
76
77         /*
78          * wait queue in case of not enough free pages.
79          */
80         wait_queue_head_t      epp_waitq;       /* waiting threads */
81         unsigned int     epp_waitqlen;    /* wait queue length */
82         unsigned long    epp_pages_short; /* # of pages wanted of in-q users */
83         unsigned int     epp_growing:1;   /* during adding pages */
84
85         /*
86          * indicating how idle the pools are, from 0 to MAX_IDLE_IDX
87          * this is counted based on each time when getting pages from
88          * the pools, not based on time. which means in case that system
89          * is idled for a while but the idle_idx might still be low if no
90          * activities happened in the pools.
91          */
92         unsigned long    epp_idle_idx;
93
94         /* last shrink time due to mem tight */
95         long         epp_last_shrink;
96         long         epp_last_access;
97
98         /*
99          * in-pool pages bookkeeping
100          */
101         spinlock_t       epp_lock;         /* protect following fields */
102         unsigned long    epp_total_pages; /* total pages in pools */
103         unsigned long    epp_free_pages;  /* current pages available */
104
105         /*
106          * statistics
107          */
108         unsigned long    epp_st_max_pages;      /* # of pages ever reached */
109         unsigned int     epp_st_grows;    /* # of grows */
110         unsigned int     epp_st_grow_fails;     /* # of add pages failures */
111         unsigned int     epp_st_shrinks;        /* # of shrinks */
112         unsigned long    epp_st_access;  /* # of access */
113         unsigned long    epp_st_missings;       /* # of cache missing */
114         unsigned long    epp_st_lowfree;        /* lowest free pages reached */
115         unsigned int     epp_st_max_wqlen;      /* highest waitqueue length */
116         cfs_time_t       epp_st_max_wait;       /* in jeffies */
117         /*
118          * pointers to pools
119          */
120         struct page    ***epp_pools;
121 } page_pools;
122
123 /*
124  * memory shrinker
125  */
126 const int pools_shrinker_seeks = DEFAULT_SEEKS;
127 static struct shrinker *pools_shrinker = NULL;
128
129
130 /*
131  * /proc/fs/lustre/sptlrpc/encrypt_page_pools
132  */
133 int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count,
134                                int *eof, void *data)
135 {
136         int     rc;
137
138         spin_lock(&page_pools.epp_lock);
139
140         rc = snprintf(page, count,
141                       "physical pages:    %lu\n"
142                       "pages per pool:    %lu\n"
143                       "max pages:              %lu\n"
144                       "max pools:              %u\n"
145                       "total pages:          %lu\n"
146                       "total free:            %lu\n"
147                       "idle index:            %lu/100\n"
148                       "last shrink:          %lds\n"
149                       "last access:          %lds\n"
150                       "max pages reached:       %lu\n"
151                       "grows:              %u\n"
152                       "grows failure:      %u\n"
153                       "shrinks:          %u\n"
154                       "cache access:        %lu\n"
155                       "cache missing:      %lu\n"
156                       "low free mark:      %lu\n"
157                       "max waitqueue depth:     %u\n"
158                       "max wait time:      "CFS_TIME_T"/%u\n"
159                       ,
160                       num_physpages,
161                       PAGES_PER_POOL,
162                       page_pools.epp_max_pages,
163                       page_pools.epp_max_pools,
164                       page_pools.epp_total_pages,
165                       page_pools.epp_free_pages,
166                       page_pools.epp_idle_idx,
167                       cfs_time_current_sec() - page_pools.epp_last_shrink,
168                       cfs_time_current_sec() - page_pools.epp_last_access,
169                       page_pools.epp_st_max_pages,
170                       page_pools.epp_st_grows,
171                       page_pools.epp_st_grow_fails,
172                       page_pools.epp_st_shrinks,
173                       page_pools.epp_st_access,
174                       page_pools.epp_st_missings,
175                       page_pools.epp_st_lowfree,
176                       page_pools.epp_st_max_wqlen,
177                       page_pools.epp_st_max_wait, HZ
178                      );
179
180         spin_unlock(&page_pools.epp_lock);
181         return rc;
182 }
183
184 static void enc_pools_release_free_pages(long npages)
185 {
186         int     p_idx, g_idx;
187         int     p_idx_max1, p_idx_max2;
188
189         LASSERT(npages > 0);
190         LASSERT(npages <= page_pools.epp_free_pages);
191         LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages);
192
193         /* max pool index before the release */
194         p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL;
195
196         page_pools.epp_free_pages -= npages;
197         page_pools.epp_total_pages -= npages;
198
199         /* max pool index after the release */
200         p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 :
201                      ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL);
202
203         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
204         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
205         LASSERT(page_pools.epp_pools[p_idx]);
206
207         while (npages--) {
208                 LASSERT(page_pools.epp_pools[p_idx]);
209                 LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
210
211                 __free_page(page_pools.epp_pools[p_idx][g_idx]);
212                 page_pools.epp_pools[p_idx][g_idx] = NULL;
213
214                 if (++g_idx == PAGES_PER_POOL) {
215                         p_idx++;
216                         g_idx = 0;
217                 }
218         };
219
220         /* free unused pools */
221         while (p_idx_max1 < p_idx_max2) {
222                 LASSERT(page_pools.epp_pools[p_idx_max2]);
223                 OBD_FREE(page_pools.epp_pools[p_idx_max2], PAGE_CACHE_SIZE);
224                 page_pools.epp_pools[p_idx_max2] = NULL;
225                 p_idx_max2--;
226         }
227 }
228
229 /*
230  * could be called frequently for query (@nr_to_scan == 0).
231  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
232  */
233 static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
234 {
235         if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
236                 spin_lock(&page_pools.epp_lock);
237                 shrink_param(sc, nr_to_scan) = min_t(unsigned long,
238                                                    shrink_param(sc, nr_to_scan),
239                                                    page_pools.epp_free_pages -
240                                                    PTLRPC_MAX_BRW_PAGES);
241                 if (shrink_param(sc, nr_to_scan) > 0) {
242                         enc_pools_release_free_pages(shrink_param(sc,
243                                                                   nr_to_scan));
244                         CDEBUG(D_SEC, "released %ld pages, %ld left\n",
245                                (long)shrink_param(sc, nr_to_scan),
246                                page_pools.epp_free_pages);
247
248                         page_pools.epp_st_shrinks++;
249                         page_pools.epp_last_shrink = cfs_time_current_sec();
250                 }
251                 spin_unlock(&page_pools.epp_lock);
252         }
253
254         /*
255          * if no pool access for a long time, we consider it's fully idle.
256          * a little race here is fine.
257          */
258         if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
259                      CACHE_QUIESCENT_PERIOD)) {
260                 spin_lock(&page_pools.epp_lock);
261                 page_pools.epp_idle_idx = IDLE_IDX_MAX;
262                 spin_unlock(&page_pools.epp_lock);
263         }
264
265         LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
266         return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
267                 (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
268 }
269
270 static inline
271 int npages_to_npools(unsigned long npages)
272 {
273         return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
274 }
275
276 /*
277  * return how many pages cleaned up.
278  */
279 static unsigned long enc_pools_cleanup(struct page ***pools, int npools)
280 {
281         unsigned long cleaned = 0;
282         int        i, j;
283
284         for (i = 0; i < npools; i++) {
285                 if (pools[i]) {
286                         for (j = 0; j < PAGES_PER_POOL; j++) {
287                                 if (pools[i][j]) {
288                                         __free_page(pools[i][j]);
289                                         cleaned++;
290                                 }
291                         }
292                         OBD_FREE(pools[i], PAGE_CACHE_SIZE);
293                         pools[i] = NULL;
294                 }
295         }
296
297         return cleaned;
298 }
299
300 /*
301  * merge @npools pointed by @pools which contains @npages new pages
302  * into current pools.
303  *
304  * we have options to avoid most memory copy with some tricks. but we choose
305  * the simplest way to avoid complexity. It's not frequently called.
306  */
307 static void enc_pools_insert(struct page ***pools, int npools, int npages)
308 {
309         int     freeslot;
310         int     op_idx, np_idx, og_idx, ng_idx;
311         int     cur_npools, end_npools;
312
313         LASSERT(npages > 0);
314         LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
315         LASSERT(npages_to_npools(npages) == npools);
316         LASSERT(page_pools.epp_growing);
317
318         spin_lock(&page_pools.epp_lock);
319
320         /*
321          * (1) fill all the free slots of current pools.
322          */
323         /* free slots are those left by rent pages, and the extra ones with
324          * index >= total_pages, locate at the tail of last pool. */
325         freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
326         if (freeslot != 0)
327                 freeslot = PAGES_PER_POOL - freeslot;
328         freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
329
330         op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
331         og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
332         np_idx = npools - 1;
333         ng_idx = (npages - 1) % PAGES_PER_POOL;
334
335         while (freeslot) {
336                 LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
337                 LASSERT(pools[np_idx][ng_idx] != NULL);
338
339                 page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
340                 pools[np_idx][ng_idx] = NULL;
341
342                 freeslot--;
343
344                 if (++og_idx == PAGES_PER_POOL) {
345                         op_idx++;
346                         og_idx = 0;
347                 }
348                 if (--ng_idx < 0) {
349                         if (np_idx == 0)
350                                 break;
351                         np_idx--;
352                         ng_idx = PAGES_PER_POOL - 1;
353                 }
354         }
355
356         /*
357          * (2) add pools if needed.
358          */
359         cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
360                      PAGES_PER_POOL;
361         end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) /
362                      PAGES_PER_POOL;
363         LASSERT(end_npools <= page_pools.epp_max_pools);
364
365         np_idx = 0;
366         while (cur_npools < end_npools) {
367                 LASSERT(page_pools.epp_pools[cur_npools] == NULL);
368                 LASSERT(np_idx < npools);
369                 LASSERT(pools[np_idx] != NULL);
370
371                 page_pools.epp_pools[cur_npools++] = pools[np_idx];
372                 pools[np_idx++] = NULL;
373         }
374
375         page_pools.epp_total_pages += npages;
376         page_pools.epp_free_pages += npages;
377         page_pools.epp_st_lowfree = page_pools.epp_free_pages;
378
379         if (page_pools.epp_total_pages > page_pools.epp_st_max_pages)
380                 page_pools.epp_st_max_pages = page_pools.epp_total_pages;
381
382         CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
383                page_pools.epp_total_pages);
384
385         spin_unlock(&page_pools.epp_lock);
386 }
387
388 static int enc_pools_add_pages(int npages)
389 {
390         static DEFINE_MUTEX(add_pages_mutex);
391         struct page   ***pools;
392         int          npools, alloced = 0;
393         int          i, j, rc = -ENOMEM;
394
395         if (npages < PTLRPC_MAX_BRW_PAGES)
396                 npages = PTLRPC_MAX_BRW_PAGES;
397
398         mutex_lock(&add_pages_mutex);
399
400         if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages)
401                 npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
402         LASSERT(npages > 0);
403
404         page_pools.epp_st_grows++;
405
406         npools = npages_to_npools(npages);
407         OBD_ALLOC(pools, npools * sizeof(*pools));
408         if (pools == NULL)
409                 goto out;
410
411         for (i = 0; i < npools; i++) {
412                 OBD_ALLOC(pools[i], PAGE_CACHE_SIZE);
413                 if (pools[i] == NULL)
414                         goto out_pools;
415
416                 for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
417                         pools[i][j] = alloc_page(__GFP_IO |
418                                                      __GFP_HIGHMEM);
419                         if (pools[i][j] == NULL)
420                                 goto out_pools;
421
422                         alloced++;
423                 }
424         }
425         LASSERT(alloced == npages);
426
427         enc_pools_insert(pools, npools, npages);
428         CDEBUG(D_SEC, "added %d pages into pools\n", npages);
429         rc = 0;
430
431 out_pools:
432         enc_pools_cleanup(pools, npools);
433         OBD_FREE(pools, npools * sizeof(*pools));
434 out:
435         if (rc) {
436                 page_pools.epp_st_grow_fails++;
437                 CERROR("Failed to allocate %d enc pages\n", npages);
438         }
439
440         mutex_unlock(&add_pages_mutex);
441         return rc;
442 }
443
444 static inline void enc_pools_wakeup(void)
445 {
446         LASSERT(spin_is_locked(&page_pools.epp_lock));
447         LASSERT(page_pools.epp_waitqlen >= 0);
448
449         if (unlikely(page_pools.epp_waitqlen)) {
450                 LASSERT(waitqueue_active(&page_pools.epp_waitq));
451                 wake_up_all(&page_pools.epp_waitq);
452         }
453 }
454
455 static int enc_pools_should_grow(int page_needed, long now)
456 {
457         /* don't grow if someone else is growing the pools right now,
458          * or the pools has reached its full capacity
459          */
460         if (page_pools.epp_growing ||
461             page_pools.epp_total_pages == page_pools.epp_max_pages)
462                 return 0;
463
464         /* if total pages is not enough, we need to grow */
465         if (page_pools.epp_total_pages < page_needed)
466                 return 1;
467
468         /*
469          * we wanted to return 0 here if there was a shrink just happened
470          * moment ago, but this may cause deadlock if both client and ost
471          * live on single node.
472          */
473 #if 0
474         if (now - page_pools.epp_last_shrink < 2)
475                 return 0;
476 #endif
477
478         /*
479          * here we perhaps need consider other factors like wait queue
480          * length, idle index, etc. ?
481          */
482
483         /* grow the pools in any other cases */
484         return 1;
485 }
486
487 /*
488  * we allocate the requested pages atomically.
489  */
490 int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
491 {
492         wait_queue_t  waitlink;
493         unsigned long   this_idle = -1;
494         cfs_time_t      tick = 0;
495         long        now;
496         int          p_idx, g_idx;
497         int          i;
498
499         LASSERT(desc->bd_iov_count > 0);
500         LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
501
502         /* resent bulk, enc iov might have been allocated previously */
503         if (desc->bd_enc_iov != NULL)
504                 return 0;
505
506         OBD_ALLOC(desc->bd_enc_iov,
507                   desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
508         if (desc->bd_enc_iov == NULL)
509                 return -ENOMEM;
510
511         spin_lock(&page_pools.epp_lock);
512
513         page_pools.epp_st_access++;
514 again:
515         if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
516                 if (tick == 0)
517                         tick = cfs_time_current();
518
519                 now = cfs_time_current_sec();
520
521                 page_pools.epp_st_missings++;
522                 page_pools.epp_pages_short += desc->bd_iov_count;
523
524                 if (enc_pools_should_grow(desc->bd_iov_count, now)) {
525                         page_pools.epp_growing = 1;
526
527                         spin_unlock(&page_pools.epp_lock);
528                         enc_pools_add_pages(page_pools.epp_pages_short / 2);
529                         spin_lock(&page_pools.epp_lock);
530
531                         page_pools.epp_growing = 0;
532
533                         enc_pools_wakeup();
534                 } else {
535                         if (++page_pools.epp_waitqlen >
536                             page_pools.epp_st_max_wqlen)
537                                 page_pools.epp_st_max_wqlen =
538                                                 page_pools.epp_waitqlen;
539
540                         set_current_state(TASK_UNINTERRUPTIBLE);
541                         init_waitqueue_entry_current(&waitlink);
542                         add_wait_queue(&page_pools.epp_waitq, &waitlink);
543
544                         spin_unlock(&page_pools.epp_lock);
545                         waitq_wait(&waitlink, TASK_UNINTERRUPTIBLE);
546                         remove_wait_queue(&page_pools.epp_waitq, &waitlink);
547                         LASSERT(page_pools.epp_waitqlen > 0);
548                         spin_lock(&page_pools.epp_lock);
549                         page_pools.epp_waitqlen--;
550                 }
551
552                 LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
553                 page_pools.epp_pages_short -= desc->bd_iov_count;
554
555                 this_idle = 0;
556                 goto again;
557         }
558
559         /* record max wait time */
560         if (unlikely(tick != 0)) {
561                 tick = cfs_time_current() - tick;
562                 if (tick > page_pools.epp_st_max_wait)
563                         page_pools.epp_st_max_wait = tick;
564         }
565
566         /* proceed with rest of allocation */
567         page_pools.epp_free_pages -= desc->bd_iov_count;
568
569         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
570         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
571
572         for (i = 0; i < desc->bd_iov_count; i++) {
573                 LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
574                 desc->bd_enc_iov[i].kiov_page =
575                                         page_pools.epp_pools[p_idx][g_idx];
576                 page_pools.epp_pools[p_idx][g_idx] = NULL;
577
578                 if (++g_idx == PAGES_PER_POOL) {
579                         p_idx++;
580                         g_idx = 0;
581                 }
582         }
583
584         if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
585                 page_pools.epp_st_lowfree = page_pools.epp_free_pages;
586
587         /*
588          * new idle index = (old * weight + new) / (weight + 1)
589          */
590         if (this_idle == -1) {
591                 this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX /
592                             page_pools.epp_total_pages;
593         }
594         page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT +
595                                    this_idle) /
596                                   (IDLE_IDX_WEIGHT + 1);
597
598         page_pools.epp_last_access = cfs_time_current_sec();
599
600         spin_unlock(&page_pools.epp_lock);
601         return 0;
602 }
603 EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
604
605 void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
606 {
607         int     p_idx, g_idx;
608         int     i;
609
610         if (desc->bd_enc_iov == NULL)
611                 return;
612
613         LASSERT(desc->bd_iov_count > 0);
614
615         spin_lock(&page_pools.epp_lock);
616
617         p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
618         g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
619
620         LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
621                 page_pools.epp_total_pages);
622         LASSERT(page_pools.epp_pools[p_idx]);
623
624         for (i = 0; i < desc->bd_iov_count; i++) {
625                 LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
626                 LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
627                 LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
628
629                 page_pools.epp_pools[p_idx][g_idx] =
630                                         desc->bd_enc_iov[i].kiov_page;
631
632                 if (++g_idx == PAGES_PER_POOL) {
633                         p_idx++;
634                         g_idx = 0;
635                 }
636         }
637
638         page_pools.epp_free_pages += desc->bd_iov_count;
639
640         enc_pools_wakeup();
641
642         spin_unlock(&page_pools.epp_lock);
643
644         OBD_FREE(desc->bd_enc_iov,
645                  desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
646         desc->bd_enc_iov = NULL;
647 }
648 EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
649
650 /*
651  * we don't do much stuff for add_user/del_user anymore, except adding some
652  * initial pages in add_user() if current pools are empty, rest would be
653  * handled by the pools's self-adaption.
654  */
655 int sptlrpc_enc_pool_add_user(void)
656 {
657         int     need_grow = 0;
658
659         spin_lock(&page_pools.epp_lock);
660         if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) {
661                 page_pools.epp_growing = 1;
662                 need_grow = 1;
663         }
664         spin_unlock(&page_pools.epp_lock);
665
666         if (need_grow) {
667                 enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
668                                     PTLRPC_MAX_BRW_PAGES);
669
670                 spin_lock(&page_pools.epp_lock);
671                 page_pools.epp_growing = 0;
672                 enc_pools_wakeup();
673                 spin_unlock(&page_pools.epp_lock);
674         }
675         return 0;
676 }
677 EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
678
679 int sptlrpc_enc_pool_del_user(void)
680 {
681         return 0;
682 }
683 EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
684
685 static inline void enc_pools_alloc(void)
686 {
687         LASSERT(page_pools.epp_max_pools);
688         OBD_ALLOC_LARGE(page_pools.epp_pools,
689                         page_pools.epp_max_pools *
690                         sizeof(*page_pools.epp_pools));
691 }
692
693 static inline void enc_pools_free(void)
694 {
695         LASSERT(page_pools.epp_max_pools);
696         LASSERT(page_pools.epp_pools);
697
698         OBD_FREE_LARGE(page_pools.epp_pools,
699                        page_pools.epp_max_pools *
700                        sizeof(*page_pools.epp_pools));
701 }
702
703 int sptlrpc_enc_pool_init(void)
704 {
705         /*
706          * maximum capacity is 1/8 of total physical memory.
707          * is the 1/8 a good number?
708          */
709         page_pools.epp_max_pages = num_physpages / 8;
710         page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
711
712         init_waitqueue_head(&page_pools.epp_waitq);
713         page_pools.epp_waitqlen = 0;
714         page_pools.epp_pages_short = 0;
715
716         page_pools.epp_growing = 0;
717
718         page_pools.epp_idle_idx = 0;
719         page_pools.epp_last_shrink = cfs_time_current_sec();
720         page_pools.epp_last_access = cfs_time_current_sec();
721
722         spin_lock_init(&page_pools.epp_lock);
723         page_pools.epp_total_pages = 0;
724         page_pools.epp_free_pages = 0;
725
726         page_pools.epp_st_max_pages = 0;
727         page_pools.epp_st_grows = 0;
728         page_pools.epp_st_grow_fails = 0;
729         page_pools.epp_st_shrinks = 0;
730         page_pools.epp_st_access = 0;
731         page_pools.epp_st_missings = 0;
732         page_pools.epp_st_lowfree = 0;
733         page_pools.epp_st_max_wqlen = 0;
734         page_pools.epp_st_max_wait = 0;
735
736         enc_pools_alloc();
737         if (page_pools.epp_pools == NULL)
738                 return -ENOMEM;
739
740         pools_shrinker = set_shrinker(pools_shrinker_seeks,
741                                           enc_pools_shrink);
742         if (pools_shrinker == NULL) {
743                 enc_pools_free();
744                 return -ENOMEM;
745         }
746
747         return 0;
748 }
749
750 void sptlrpc_enc_pool_fini(void)
751 {
752         unsigned long cleaned, npools;
753
754         LASSERT(pools_shrinker);
755         LASSERT(page_pools.epp_pools);
756         LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
757
758         remove_shrinker(pools_shrinker);
759
760         npools = npages_to_npools(page_pools.epp_total_pages);
761         cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
762         LASSERT(cleaned == page_pools.epp_total_pages);
763
764         enc_pools_free();
765
766         if (page_pools.epp_st_access > 0) {
767                 CDEBUG(D_SEC,
768                        "max pages %lu, grows %u, grow fails %u, shrinks %u, "
769                        "access %lu, missing %lu, max qlen %u, max wait "
770                        CFS_TIME_T"/%d\n",
771                        page_pools.epp_st_max_pages, page_pools.epp_st_grows,
772                        page_pools.epp_st_grow_fails,
773                        page_pools.epp_st_shrinks, page_pools.epp_st_access,
774                        page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
775                        page_pools.epp_st_max_wait, HZ);
776         }
777 }
778
779
780 static int cfs_hash_alg_id[] = {
781         [BULK_HASH_ALG_NULL]    = CFS_HASH_ALG_NULL,
782         [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
783         [BULK_HASH_ALG_CRC32]   = CFS_HASH_ALG_CRC32,
784         [BULK_HASH_ALG_MD5]     = CFS_HASH_ALG_MD5,
785         [BULK_HASH_ALG_SHA1]    = CFS_HASH_ALG_SHA1,
786         [BULK_HASH_ALG_SHA256]  = CFS_HASH_ALG_SHA256,
787         [BULK_HASH_ALG_SHA384]  = CFS_HASH_ALG_SHA384,
788         [BULK_HASH_ALG_SHA512]  = CFS_HASH_ALG_SHA512,
789 };
790 const char * sptlrpc_get_hash_name(__u8 hash_alg)
791 {
792         return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
793 }
794 EXPORT_SYMBOL(sptlrpc_get_hash_name);
795
796 __u8 sptlrpc_get_hash_alg(const char *algname)
797 {
798         return cfs_crypto_hash_alg(algname);
799 }
800 EXPORT_SYMBOL(sptlrpc_get_hash_alg);
801
802 int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
803 {
804         struct ptlrpc_bulk_sec_desc *bsd;
805         int                       size = msg->lm_buflens[offset];
806
807         bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
808         if (bsd == NULL) {
809                 CERROR("Invalid bulk sec desc: size %d\n", size);
810                 return -EINVAL;
811         }
812
813         if (swabbed) {
814                 __swab32s(&bsd->bsd_nob);
815         }
816
817         if (unlikely(bsd->bsd_version != 0)) {
818                 CERROR("Unexpected version %u\n", bsd->bsd_version);
819                 return -EPROTO;
820         }
821
822         if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
823                 CERROR("Invalid type %u\n", bsd->bsd_type);
824                 return -EPROTO;
825         }
826
827         /* FIXME more sanity check here */
828
829         if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
830                      bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
831                      bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
832                 CERROR("Invalid svc %u\n", bsd->bsd_svc);
833                 return -EPROTO;
834         }
835
836         return 0;
837 }
838 EXPORT_SYMBOL(bulk_sec_desc_unpack);
839
840 int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
841                               void *buf, int buflen)
842 {
843         struct cfs_crypto_hash_desc     *hdesc;
844         int                             hashsize;
845         char                            hashbuf[64];
846         unsigned int                    bufsize;
847         int                             i, err;
848
849         LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
850         LASSERT(buflen >= 4);
851
852         hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
853         if (IS_ERR(hdesc)) {
854                 CERROR("Unable to initialize checksum hash %s\n",
855                        cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
856                 return PTR_ERR(hdesc);
857         }
858
859         hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
860
861         for (i = 0; i < desc->bd_iov_count; i++) {
862                 cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
863                                   desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK,
864                                   desc->bd_iov[i].kiov_len);
865         }
866         if (hashsize > buflen) {
867                 bufsize = sizeof(hashbuf);
868                 err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf,
869                                             &bufsize);
870                 memcpy(buf, hashbuf, buflen);
871         } else {
872                 bufsize = buflen;
873                 err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf,
874                                             &bufsize);
875         }
876
877         if (err)
878                 cfs_crypto_hash_final(hdesc, NULL, NULL);
879         return err;
880 }
881 EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);