4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * libcfs/libcfs/workitem.c
38 * Author: Isaac Huang <isaac@clusterfs.com>
39 * Liang Zhen <zhen.liang@sun.com>
42 #define DEBUG_SUBSYSTEM S_LNET
44 #include <linux/libcfs/libcfs.h>
46 #define CFS_WS_NAME_LEN 16
48 typedef struct cfs_wi_sched {
49 struct list_head ws_list; /* chain on global list */
50 /** serialised workitems */
52 /** where schedulers sleep */
53 wait_queue_head_t ws_waitq;
54 /** concurrent workitems */
55 struct list_head ws_runq;
56 /** rescheduled running-workitems, a workitem can be rescheduled
57 * while running in wi_action(), but we don't to execute it again
58 * unless it returns from wi_action(), so we put it on ws_rerunq
59 * while rescheduling, and move it to runq after it returns
61 struct list_head ws_rerunq;
62 /** CPT-table for this scheduler */
63 struct cfs_cpt_table *ws_cptab;
64 /** CPT id for affinity */
66 /** number of scheduled workitems */
68 /** started scheduler thread, protected by cfs_wi_data::wi_glock */
69 unsigned int ws_nthreads:30;
70 /** shutting down, protected by cfs_wi_data::wi_glock */
71 unsigned int ws_stopping:1;
72 /** serialize starting thread, protected by cfs_wi_data::wi_glock */
73 unsigned int ws_starting:1;
75 char ws_name[CFS_WS_NAME_LEN];
78 struct cfs_workitem_data {
81 /** list of all schedulers */
82 struct list_head wi_scheds;
83 /** WI module is initialized */
85 /** shutting down the whole WI module */
90 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
92 spin_lock(&sched->ws_lock);
96 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
98 spin_unlock(&sched->ws_lock);
102 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
104 cfs_wi_sched_lock(sched);
105 if (sched->ws_stopping) {
106 cfs_wi_sched_unlock(sched);
110 if (!list_empty(&sched->ws_runq)) {
111 cfs_wi_sched_unlock(sched);
114 cfs_wi_sched_unlock(sched);
120 * 0. it only works when called from wi->wi_action.
121 * 1. when it returns no one shall try to schedule the workitem.
124 cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
126 LASSERT(!in_interrupt()); /* because we use plain spinlock */
127 LASSERT(!sched->ws_stopping);
129 cfs_wi_sched_lock(sched);
131 LASSERT(wi->wi_running);
132 if (wi->wi_scheduled) { /* cancel pending schedules */
133 LASSERT(!list_empty(&wi->wi_list));
134 list_del_init(&wi->wi_list);
136 LASSERT(sched->ws_nscheduled > 0);
137 sched->ws_nscheduled--;
140 LASSERT(list_empty(&wi->wi_list));
142 wi->wi_scheduled = 1; /* LBUG future schedule attempts */
143 cfs_wi_sched_unlock(sched);
147 EXPORT_SYMBOL(cfs_wi_exit);
150 * cancel schedule request of workitem \a wi
153 cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
157 LASSERT(!in_interrupt()); /* because we use plain spinlock */
158 LASSERT(!sched->ws_stopping);
161 * return 0 if it's running already, otherwise return 1, which
162 * means the workitem will not be scheduled and will not have
163 * any race with wi_action.
165 cfs_wi_sched_lock(sched);
167 rc = !(wi->wi_running);
169 if (wi->wi_scheduled) { /* cancel pending schedules */
170 LASSERT(!list_empty(&wi->wi_list));
171 list_del_init(&wi->wi_list);
173 LASSERT(sched->ws_nscheduled > 0);
174 sched->ws_nscheduled--;
176 wi->wi_scheduled = 0;
179 LASSERT (list_empty(&wi->wi_list));
181 cfs_wi_sched_unlock(sched);
184 EXPORT_SYMBOL(cfs_wi_deschedule);
187 * Workitem scheduled with (serial == 1) is strictly serialised not only with
188 * itself, but also with others scheduled this way.
190 * Now there's only one static serialised queue, but in the future more might
191 * be added, and even dynamic creation of serialised queues might be supported.
194 cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
196 LASSERT(!in_interrupt()); /* because we use plain spinlock */
197 LASSERT(!sched->ws_stopping);
199 cfs_wi_sched_lock(sched);
201 if (!wi->wi_scheduled) {
202 LASSERT (list_empty(&wi->wi_list));
204 wi->wi_scheduled = 1;
205 sched->ws_nscheduled++;
206 if (!wi->wi_running) {
207 list_add_tail(&wi->wi_list, &sched->ws_runq);
208 wake_up(&sched->ws_waitq);
210 list_add(&wi->wi_list, &sched->ws_rerunq);
214 LASSERT (!list_empty(&wi->wi_list));
215 cfs_wi_sched_unlock(sched);
218 EXPORT_SYMBOL(cfs_wi_schedule);
222 cfs_wi_scheduler (void *arg)
224 struct cfs_wi_sched *sched = (cfs_wi_sched_t *)arg;
228 /* CPT affinity scheduler? */
229 if (sched->ws_cptab != NULL)
230 cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt);
232 spin_lock(&cfs_wi_data.wi_glock);
234 LASSERT(sched->ws_starting == 1);
235 sched->ws_starting--;
236 sched->ws_nthreads++;
238 spin_unlock(&cfs_wi_data.wi_glock);
240 cfs_wi_sched_lock(sched);
242 while (!sched->ws_stopping) {
247 while (!list_empty(&sched->ws_runq) &&
248 nloops < CFS_WI_RESCHED) {
249 wi = list_entry(sched->ws_runq.next,
250 cfs_workitem_t, wi_list);
251 LASSERT(wi->wi_scheduled && !wi->wi_running);
253 list_del_init(&wi->wi_list);
255 LASSERT(sched->ws_nscheduled > 0);
256 sched->ws_nscheduled--;
259 wi->wi_scheduled = 0;
262 cfs_wi_sched_unlock(sched);
265 rc = (*wi->wi_action) (wi);
267 cfs_wi_sched_lock(sched);
268 if (rc != 0) /* WI should be dead, even be freed! */
272 if (list_empty(&wi->wi_list))
275 LASSERT(wi->wi_scheduled);
276 /* wi is rescheduled, should be on rerunq now, we
277 * move it to runq so it can run action now */
278 list_move_tail(&wi->wi_list, &sched->ws_runq);
281 if (!list_empty(&sched->ws_runq)) {
282 cfs_wi_sched_unlock(sched);
283 /* don't sleep because some workitems still
284 * expect me to come back soon */
286 cfs_wi_sched_lock(sched);
290 cfs_wi_sched_unlock(sched);
291 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
292 !cfs_wi_sched_cansleep(sched), rc);
293 cfs_wi_sched_lock(sched);
296 cfs_wi_sched_unlock(sched);
298 spin_lock(&cfs_wi_data.wi_glock);
299 sched->ws_nthreads--;
300 spin_unlock(&cfs_wi_data.wi_glock);
307 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
311 LASSERT(cfs_wi_data.wi_init);
312 LASSERT(!cfs_wi_data.wi_stopping);
314 spin_lock(&cfs_wi_data.wi_glock);
315 if (sched->ws_stopping) {
316 CDEBUG(D_INFO, "%s is in progress of stopping\n",
318 spin_unlock(&cfs_wi_data.wi_glock);
322 LASSERT(!list_empty(&sched->ws_list));
323 sched->ws_stopping = 1;
325 spin_unlock(&cfs_wi_data.wi_glock);
328 wake_up_all(&sched->ws_waitq);
330 spin_lock(&cfs_wi_data.wi_glock);
331 while (sched->ws_nthreads > 0) {
332 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
333 "waiting for %d threads of WI sched[%s] to terminate\n",
334 sched->ws_nthreads, sched->ws_name);
336 spin_unlock(&cfs_wi_data.wi_glock);
337 cfs_pause(cfs_time_seconds(1) / 20);
338 spin_lock(&cfs_wi_data.wi_glock);
341 list_del(&sched->ws_list);
343 spin_unlock(&cfs_wi_data.wi_glock);
344 LASSERT(sched->ws_nscheduled == 0);
346 LIBCFS_FREE(sched, sizeof(*sched));
348 EXPORT_SYMBOL(cfs_wi_sched_destroy);
351 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
352 int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
354 struct cfs_wi_sched *sched;
357 LASSERT(cfs_wi_data.wi_init);
358 LASSERT(!cfs_wi_data.wi_stopping);
359 LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
360 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
362 LIBCFS_ALLOC(sched, sizeof(*sched));
366 strncpy(sched->ws_name, name, CFS_WS_NAME_LEN);
367 sched->ws_cptab = cptab;
370 spin_lock_init(&sched->ws_lock);
371 init_waitqueue_head(&sched->ws_waitq);
372 INIT_LIST_HEAD(&sched->ws_runq);
373 INIT_LIST_HEAD(&sched->ws_rerunq);
374 INIT_LIST_HEAD(&sched->ws_list);
379 struct task_struct *task;
381 spin_lock(&cfs_wi_data.wi_glock);
382 while (sched->ws_starting > 0) {
383 spin_unlock(&cfs_wi_data.wi_glock);
385 spin_lock(&cfs_wi_data.wi_glock);
388 sched->ws_starting++;
389 spin_unlock(&cfs_wi_data.wi_glock);
391 if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
392 snprintf(name, sizeof(name), "%s_%02d_%02d",
393 sched->ws_name, sched->ws_cpt,
396 snprintf(name, sizeof(name), "%s_%02d",
397 sched->ws_name, sched->ws_nthreads);
400 task = kthread_run(cfs_wi_scheduler, sched, name);
407 CERROR("Failed to create thread for WI scheduler %s: %d\n",
410 spin_lock(&cfs_wi_data.wi_glock);
412 /* make up for cfs_wi_sched_destroy */
413 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
414 sched->ws_starting--;
416 spin_unlock(&cfs_wi_data.wi_glock);
418 cfs_wi_sched_destroy(sched);
421 spin_lock(&cfs_wi_data.wi_glock);
422 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
423 spin_unlock(&cfs_wi_data.wi_glock);
428 EXPORT_SYMBOL(cfs_wi_sched_create);
433 memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
435 spin_lock_init(&cfs_wi_data.wi_glock);
436 INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
437 cfs_wi_data.wi_init = 1;
443 cfs_wi_shutdown (void)
445 struct cfs_wi_sched *sched;
447 spin_lock(&cfs_wi_data.wi_glock);
448 cfs_wi_data.wi_stopping = 1;
449 spin_unlock(&cfs_wi_data.wi_glock);
451 /* nobody should contend on this list */
452 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
453 sched->ws_stopping = 1;
454 wake_up_all(&sched->ws_waitq);
457 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
458 spin_lock(&cfs_wi_data.wi_glock);
460 while (sched->ws_nthreads != 0) {
461 spin_unlock(&cfs_wi_data.wi_glock);
462 cfs_pause(cfs_time_seconds(1) / 20);
463 spin_lock(&cfs_wi_data.wi_glock);
465 spin_unlock(&cfs_wi_data.wi_glock);
467 while (!list_empty(&cfs_wi_data.wi_scheds)) {
468 sched = list_entry(cfs_wi_data.wi_scheds.next,
469 struct cfs_wi_sched, ws_list);
470 list_del(&sched->ws_list);
471 LIBCFS_FREE(sched, sizeof(*sched));
474 cfs_wi_data.wi_stopping = 0;
475 cfs_wi_data.wi_init = 0;