]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/staging/lustre/lustre/libcfs/workitem.c
staging/lustre/libcfs: Cleanup: parenthesis alignment adjustments
[karo-tx-linux.git] / drivers / staging / lustre / lustre / libcfs / workitem.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/workitem.c
37  *
38  * Author: Isaac Huang <isaac@clusterfs.com>
39  *       Liang Zhen  <zhen.liang@sun.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_LNET
43
44 #include "../../include/linux/libcfs/libcfs.h"
45
46 #define CFS_WS_NAME_LEN  16
47
48 struct cfs_wi_sched {
49         struct list_head                ws_list;        /* chain on global list */
50         /** serialised workitems */
51         spinlock_t              ws_lock;
52         /** where schedulers sleep */
53         wait_queue_head_t               ws_waitq;
54         /** concurrent workitems */
55         struct list_head                ws_runq;
56         /** rescheduled running-workitems, a workitem can be rescheduled
57          * while running in wi_action(), but we don't to execute it again
58          * unless it returns from wi_action(), so we put it on ws_rerunq
59          * while rescheduling, and move it to runq after it returns
60          * from wi_action() */
61         struct list_head                ws_rerunq;
62         /** CPT-table for this scheduler */
63         struct cfs_cpt_table    *ws_cptab;
64         /** CPT id for affinity */
65         int                     ws_cpt;
66         /** number of scheduled workitems */
67         int                     ws_nscheduled;
68         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
69         unsigned int            ws_nthreads:30;
70         /** shutting down, protected by cfs_wi_data::wi_glock */
71         unsigned int            ws_stopping:1;
72         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
73         unsigned int            ws_starting:1;
74         /** scheduler name */
75         char                    ws_name[CFS_WS_NAME_LEN];
76 };
77
78 static struct cfs_workitem_data {
79         /** serialize */
80         spinlock_t              wi_glock;
81         /** list of all schedulers */
82         struct list_head                wi_scheds;
83         /** WI module is initialized */
84         int                     wi_init;
85         /** shutting down the whole WI module */
86         int                     wi_stopping;
87 } cfs_wi_data;
88
89 static inline int
90 cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
91 {
92         spin_lock(&sched->ws_lock);
93         if (sched->ws_stopping) {
94                 spin_unlock(&sched->ws_lock);
95                 return 0;
96         }
97
98         if (!list_empty(&sched->ws_runq)) {
99                 spin_unlock(&sched->ws_lock);
100                 return 0;
101         }
102         spin_unlock(&sched->ws_lock);
103         return 1;
104 }
105
106 /* XXX:
107  * 0. it only works when called from wi->wi_action.
108  * 1. when it returns no one shall try to schedule the workitem.
109  */
110 void
111 cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
112 {
113         LASSERT(!in_interrupt()); /* because we use plain spinlock */
114         LASSERT(!sched->ws_stopping);
115
116         spin_lock(&sched->ws_lock);
117
118         LASSERT(wi->wi_running);
119         if (wi->wi_scheduled) { /* cancel pending schedules */
120                 LASSERT(!list_empty(&wi->wi_list));
121                 list_del_init(&wi->wi_list);
122
123                 LASSERT(sched->ws_nscheduled > 0);
124                 sched->ws_nscheduled--;
125         }
126
127         LASSERT(list_empty(&wi->wi_list));
128
129         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
130         spin_unlock(&sched->ws_lock);
131
132         return;
133 }
134 EXPORT_SYMBOL(cfs_wi_exit);
135
136 /**
137  * cancel schedule request of workitem \a wi
138  */
139 int
140 cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
141 {
142         int     rc;
143
144         LASSERT(!in_interrupt()); /* because we use plain spinlock */
145         LASSERT(!sched->ws_stopping);
146
147         /*
148          * return 0 if it's running already, otherwise return 1, which
149          * means the workitem will not be scheduled and will not have
150          * any race with wi_action.
151          */
152         spin_lock(&sched->ws_lock);
153
154         rc = !(wi->wi_running);
155
156         if (wi->wi_scheduled) { /* cancel pending schedules */
157                 LASSERT(!list_empty(&wi->wi_list));
158                 list_del_init(&wi->wi_list);
159
160                 LASSERT(sched->ws_nscheduled > 0);
161                 sched->ws_nscheduled--;
162
163                 wi->wi_scheduled = 0;
164         }
165
166         LASSERT (list_empty(&wi->wi_list));
167
168         spin_unlock(&sched->ws_lock);
169         return rc;
170 }
171 EXPORT_SYMBOL(cfs_wi_deschedule);
172
173 /*
174  * Workitem scheduled with (serial == 1) is strictly serialised not only with
175  * itself, but also with others scheduled this way.
176  *
177  * Now there's only one static serialised queue, but in the future more might
178  * be added, and even dynamic creation of serialised queues might be supported.
179  */
180 void
181 cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
182 {
183         LASSERT(!in_interrupt()); /* because we use plain spinlock */
184         LASSERT(!sched->ws_stopping);
185
186         spin_lock(&sched->ws_lock);
187
188         if (!wi->wi_scheduled) {
189                 LASSERT (list_empty(&wi->wi_list));
190
191                 wi->wi_scheduled = 1;
192                 sched->ws_nscheduled++;
193                 if (!wi->wi_running) {
194                         list_add_tail(&wi->wi_list, &sched->ws_runq);
195                         wake_up(&sched->ws_waitq);
196                 } else {
197                         list_add(&wi->wi_list, &sched->ws_rerunq);
198                 }
199         }
200
201         LASSERT (!list_empty(&wi->wi_list));
202         spin_unlock(&sched->ws_lock);
203         return;
204 }
205 EXPORT_SYMBOL(cfs_wi_schedule);
206
207 static int
208 cfs_wi_scheduler (void *arg)
209 {
210         struct cfs_wi_sched     *sched = (struct cfs_wi_sched *)arg;
211
212         cfs_block_allsigs();
213
214         /* CPT affinity scheduler? */
215         if (sched->ws_cptab)
216                 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
217                         CWARN("Failed to bind %s on CPT %d\n",
218                               sched->ws_name, sched->ws_cpt);
219
220         spin_lock(&cfs_wi_data.wi_glock);
221
222         LASSERT(sched->ws_starting == 1);
223         sched->ws_starting--;
224         sched->ws_nthreads++;
225
226         spin_unlock(&cfs_wi_data.wi_glock);
227
228         spin_lock(&sched->ws_lock);
229
230         while (!sched->ws_stopping) {
231                 int          nloops = 0;
232                 int          rc;
233                 cfs_workitem_t *wi;
234
235                 while (!list_empty(&sched->ws_runq) &&
236                        nloops < CFS_WI_RESCHED) {
237                         wi = list_entry(sched->ws_runq.next, cfs_workitem_t,
238                                         wi_list);
239                         LASSERT(wi->wi_scheduled && !wi->wi_running);
240
241                         list_del_init(&wi->wi_list);
242
243                         LASSERT(sched->ws_nscheduled > 0);
244                         sched->ws_nscheduled--;
245
246                         wi->wi_running   = 1;
247                         wi->wi_scheduled = 0;
248
249                         spin_unlock(&sched->ws_lock);
250                         nloops++;
251
252                         rc = (*wi->wi_action) (wi);
253
254                         spin_lock(&sched->ws_lock);
255                         if (rc != 0) /* WI should be dead, even be freed! */
256                                 continue;
257
258                         wi->wi_running = 0;
259                         if (list_empty(&wi->wi_list))
260                                 continue;
261
262                         LASSERT(wi->wi_scheduled);
263                         /* wi is rescheduled, should be on rerunq now, we
264                          * move it to runq so it can run action now */
265                         list_move_tail(&wi->wi_list, &sched->ws_runq);
266                 }
267
268                 if (!list_empty(&sched->ws_runq)) {
269                         spin_unlock(&sched->ws_lock);
270                         /* don't sleep because some workitems still
271                          * expect me to come back soon */
272                         cond_resched();
273                         spin_lock(&sched->ws_lock);
274                         continue;
275                 }
276
277                 spin_unlock(&sched->ws_lock);
278                 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
279                                                 !cfs_wi_sched_cansleep(sched));
280                 spin_lock(&sched->ws_lock);
281         }
282
283         spin_unlock(&sched->ws_lock);
284
285         spin_lock(&cfs_wi_data.wi_glock);
286         sched->ws_nthreads--;
287         spin_unlock(&cfs_wi_data.wi_glock);
288
289         return 0;
290 }
291
292 void
293 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
294 {
295         int     i;
296
297         LASSERT(cfs_wi_data.wi_init);
298         LASSERT(!cfs_wi_data.wi_stopping);
299
300         spin_lock(&cfs_wi_data.wi_glock);
301         if (sched->ws_stopping) {
302                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
303                        sched->ws_name);
304                 spin_unlock(&cfs_wi_data.wi_glock);
305                 return;
306         }
307
308         LASSERT(!list_empty(&sched->ws_list));
309         sched->ws_stopping = 1;
310
311         spin_unlock(&cfs_wi_data.wi_glock);
312
313         i = 2;
314         wake_up_all(&sched->ws_waitq);
315
316         spin_lock(&cfs_wi_data.wi_glock);
317         while (sched->ws_nthreads > 0) {
318                 CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
319                        "waiting for %d threads of WI sched[%s] to terminate\n",
320                        sched->ws_nthreads, sched->ws_name);
321
322                 spin_unlock(&cfs_wi_data.wi_glock);
323                 set_current_state(TASK_UNINTERRUPTIBLE);
324                 schedule_timeout(cfs_time_seconds(1) / 20);
325                 spin_lock(&cfs_wi_data.wi_glock);
326         }
327
328         list_del(&sched->ws_list);
329
330         spin_unlock(&cfs_wi_data.wi_glock);
331         LASSERT(sched->ws_nscheduled == 0);
332
333         LIBCFS_FREE(sched, sizeof(*sched));
334 }
335 EXPORT_SYMBOL(cfs_wi_sched_destroy);
336
337 int
338 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
339                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
340 {
341         struct cfs_wi_sched     *sched;
342         int                     rc;
343
344         LASSERT(cfs_wi_data.wi_init);
345         LASSERT(!cfs_wi_data.wi_stopping);
346         LASSERT(!cptab || cpt == CFS_CPT_ANY ||
347                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
348
349         LIBCFS_ALLOC(sched, sizeof(*sched));
350         if (!sched)
351                 return -ENOMEM;
352
353         strlcpy(sched->ws_name, name, CFS_WS_NAME_LEN);
354
355         sched->ws_cptab = cptab;
356         sched->ws_cpt = cpt;
357
358         spin_lock_init(&sched->ws_lock);
359         init_waitqueue_head(&sched->ws_waitq);
360         INIT_LIST_HEAD(&sched->ws_runq);
361         INIT_LIST_HEAD(&sched->ws_rerunq);
362         INIT_LIST_HEAD(&sched->ws_list);
363
364         rc = 0;
365         while (nthrs > 0)  {
366                 char    name[16];
367                 struct task_struct *task;
368
369                 spin_lock(&cfs_wi_data.wi_glock);
370                 while (sched->ws_starting > 0) {
371                         spin_unlock(&cfs_wi_data.wi_glock);
372                         schedule();
373                         spin_lock(&cfs_wi_data.wi_glock);
374                 }
375
376                 sched->ws_starting++;
377                 spin_unlock(&cfs_wi_data.wi_glock);
378
379                 if (sched->ws_cptab && sched->ws_cpt >= 0) {
380                         snprintf(name, sizeof(name), "%s_%02d_%02u",
381                                  sched->ws_name, sched->ws_cpt,
382                                  sched->ws_nthreads);
383                 } else {
384                         snprintf(name, sizeof(name), "%s_%02u",
385                                  sched->ws_name, sched->ws_nthreads);
386                 }
387
388                 task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
389                 if (!IS_ERR(task)) {
390                         nthrs--;
391                         continue;
392                 }
393                 rc = PTR_ERR(task);
394
395                 CERROR("Failed to create thread for WI scheduler %s: %d\n",
396                        name, rc);
397
398                 spin_lock(&cfs_wi_data.wi_glock);
399
400                 /* make up for cfs_wi_sched_destroy */
401                 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
402                 sched->ws_starting--;
403
404                 spin_unlock(&cfs_wi_data.wi_glock);
405
406                 cfs_wi_sched_destroy(sched);
407                 return rc;
408         }
409         spin_lock(&cfs_wi_data.wi_glock);
410         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
411         spin_unlock(&cfs_wi_data.wi_glock);
412
413         *sched_pp = sched;
414         return 0;
415 }
416 EXPORT_SYMBOL(cfs_wi_sched_create);
417
418 int
419 cfs_wi_startup(void)
420 {
421         memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
422
423         spin_lock_init(&cfs_wi_data.wi_glock);
424         INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
425         cfs_wi_data.wi_init = 1;
426
427         return 0;
428 }
429
430 void
431 cfs_wi_shutdown(void)
432 {
433         struct cfs_wi_sched     *sched;
434
435         spin_lock(&cfs_wi_data.wi_glock);
436         cfs_wi_data.wi_stopping = 1;
437         spin_unlock(&cfs_wi_data.wi_glock);
438
439         /* nobody should contend on this list */
440         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
441                 sched->ws_stopping = 1;
442                 wake_up_all(&sched->ws_waitq);
443         }
444
445         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
446                 spin_lock(&cfs_wi_data.wi_glock);
447
448                 while (sched->ws_nthreads != 0) {
449                         spin_unlock(&cfs_wi_data.wi_glock);
450                         set_current_state(TASK_UNINTERRUPTIBLE);
451                         schedule_timeout(cfs_time_seconds(1) / 20);
452                         spin_lock(&cfs_wi_data.wi_glock);
453                 }
454                 spin_unlock(&cfs_wi_data.wi_glock);
455         }
456         while (!list_empty(&cfs_wi_data.wi_scheds)) {
457                 sched = list_entry(cfs_wi_data.wi_scheds.next,
458                                    struct cfs_wi_sched, ws_list);
459                 list_del(&sched->ws_list);
460                 LIBCFS_FREE(sched, sizeof(*sched));
461         }
462
463         cfs_wi_data.wi_stopping = 0;
464         cfs_wi_data.wi_init = 0;
465 }