2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2008 Dave Chinner
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "xfs_types.h"
24 #include "xfs_trans.h"
27 #include "xfs_mount.h"
28 #include "xfs_trans_priv.h"
29 #include "xfs_error.h"
31 struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
33 STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t);
34 STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
35 STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
36 STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
39 STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
41 #define xfs_ail_check(a,l)
46 * This is called by the log manager code to determine the LSN
47 * of the tail of the log. This is exactly the LSN of the first
48 * item in the AIL. If the AIL is empty, then this function
51 * We need the AIL lock in order to get a coherent read of the
52 * lsn of the last item in the AIL.
61 spin_lock(&ailp->xa_lock);
62 lip = xfs_ail_min(ailp);
68 spin_unlock(&ailp->xa_lock);
74 * AIL traversal cursor initialisation.
76 * The cursor keeps track of where our current traversal is up
77 * to by tracking the next ƣtem in the list for us. However, for
78 * this to be safe, removing an object from the AIL needs to invalidate
79 * any cursor that points to it. hence the traversal cursor needs to
80 * be linked to the struct xfs_ail so that deletion can search all the
81 * active cursors for invalidation.
83 * We don't link the push cursor because it is embedded in the struct
84 * xfs_ail and hence easily findable.
87 xfs_trans_ail_cursor_init(
89 struct xfs_ail_cursor *cur)
92 if (cur == &ailp->xa_cursors)
95 cur->next = ailp->xa_cursors.next;
96 ailp->xa_cursors.next = cur;
100 * Set the cursor to the next item, because when we look
101 * up the cursor the current item may have been freed.
104 xfs_trans_ail_cursor_set(
105 struct xfs_ail *ailp,
106 struct xfs_ail_cursor *cur,
107 struct xfs_log_item *lip)
110 cur->item = xfs_ail_next(ailp, lip);
114 * Get the next item in the traversal and advance the cursor.
115 * If the cursor was invalidated (inidicated by a lip of 1),
116 * restart the traversal.
118 struct xfs_log_item *
119 xfs_trans_ail_cursor_next(
120 struct xfs_ail *ailp,
121 struct xfs_ail_cursor *cur)
123 struct xfs_log_item *lip = cur->item;
125 if ((__psint_t)lip & 1)
126 lip = xfs_ail_min(ailp);
127 xfs_trans_ail_cursor_set(ailp, cur, lip);
132 * Now that the traversal is complete, we need to remove the cursor
133 * from the list of traversing cursors. Avoid removing the embedded
134 * push cursor, but use the fact it is always present to make the
135 * list deletion simple.
138 xfs_trans_ail_cursor_done(
139 struct xfs_ail *ailp,
140 struct xfs_ail_cursor *done)
142 struct xfs_ail_cursor *prev = NULL;
143 struct xfs_ail_cursor *cur;
146 if (done == &ailp->xa_cursors)
148 prev = &ailp->xa_cursors;
149 for (cur = prev->next; cur; prev = cur, cur = prev->next) {
151 prev->next = cur->next;
159 * Invalidate any cursor that is pointing to this item. This is
160 * called when an item is removed from the AIL. Any cursor pointing
161 * to this object is now invalid and the traversal needs to be
162 * terminated so it doesn't reference a freed object. We set the
163 * cursor item to a value of 1 so we can distinguish between an
164 * invalidation and the end of the list when getting the next item
168 xfs_trans_ail_cursor_clear(
169 struct xfs_ail *ailp,
170 struct xfs_log_item *lip)
172 struct xfs_ail_cursor *cur;
174 /* need to search all cursors */
175 for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
176 if (cur->item == lip)
177 cur->item = (struct xfs_log_item *)
178 ((__psint_t)cur->item | 1);
183 * Return the item in the AIL with the current lsn.
184 * Return the current tree generation number for use
185 * in calls to xfs_trans_next_ail().
188 xfs_trans_ail_cursor_first(
189 struct xfs_ail *ailp,
190 struct xfs_ail_cursor *cur,
195 xfs_trans_ail_cursor_init(ailp, cur);
196 lip = xfs_ail_min(ailp);
200 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
201 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
206 xfs_trans_ail_cursor_set(ailp, cur, lip);
211 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
212 * to run at a later time if there is more work to do to complete the push.
216 struct work_struct *work)
218 struct xfs_ail *ailp = container_of(to_delayed_work(work),
219 struct xfs_ail, xa_work);
221 xfs_lsn_t target = ailp->xa_target;
224 int flush_log, count, stuck;
225 xfs_mount_t *mp = ailp->xa_mount;
226 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
227 int push_xfsbufd = 0;
229 spin_lock(&ailp->xa_lock);
230 xfs_trans_ail_cursor_init(ailp, cur);
231 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
232 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
234 * AIL is empty or our push has reached the end.
236 xfs_trans_ail_cursor_done(ailp, cur);
237 spin_unlock(&ailp->xa_lock);
238 ailp->xa_last_pushed_lsn = 0;
242 XFS_STATS_INC(xs_push_ail);
245 * While the item we are looking at is below the given threshold
246 * try to flush it out. We'd like not to stop until we've at least
247 * tried to push on everything in the AIL with an LSN less than
248 * the given threshold.
250 * However, we will stop after a certain number of pushes and wait
251 * for a reduced timeout to fire before pushing further. This
252 * prevents use from spinning when we can't do anything or there is
253 * lots of contention on the AIL lists.
256 flush_log = stuck = count = 0;
257 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
260 * If we can lock the item without sleeping, unlock the AIL
261 * lock and flush the item. Then re-grab the AIL lock so we
262 * can look for the next item on the AIL. List changes are
263 * handled by the AIL lookup functions internally
265 * If we can't lock the item, either its holder will flush it
266 * or it is already being flushed or it is being relogged. In
267 * any of these case it is being taken care of and we can just
268 * skip to the next item in the list.
270 lock_result = IOP_TRYLOCK(lip);
271 spin_unlock(&ailp->xa_lock);
272 switch (lock_result) {
273 case XFS_ITEM_SUCCESS:
274 XFS_STATS_INC(xs_push_ail_success);
276 ailp->xa_last_pushed_lsn = lsn;
279 case XFS_ITEM_PUSHBUF:
280 XFS_STATS_INC(xs_push_ail_pushbuf);
282 ailp->xa_last_pushed_lsn = lsn;
286 case XFS_ITEM_PINNED:
287 XFS_STATS_INC(xs_push_ail_pinned);
292 case XFS_ITEM_LOCKED:
293 XFS_STATS_INC(xs_push_ail_locked);
294 ailp->xa_last_pushed_lsn = lsn;
303 spin_lock(&ailp->xa_lock);
304 /* should we bother continuing? */
305 if (XFS_FORCED_SHUTDOWN(mp))
312 * Are there too many items we can't do anything with?
313 * If we we are skipping too many items because we can't flush
314 * them or they are already being flushed, we back off and
315 * given them time to complete whatever operation is being
316 * done. i.e. remove pressure from the AIL while we can't make
317 * progress so traversals don't slow down further inserts and
318 * removals to/from the AIL.
320 * The value of 100 is an arbitrary magic number based on
326 lip = xfs_trans_ail_cursor_next(ailp, cur);
331 xfs_trans_ail_cursor_done(ailp, cur);
332 spin_unlock(&ailp->xa_lock);
336 * If something we need to push out was pinned, then
337 * push out the log so it will become unpinned and
338 * move forward in the AIL.
340 XFS_STATS_INC(xs_push_ail_flush);
341 xfs_log_force(mp, 0);
345 /* we've got delayed write buffers to flush */
346 wake_up_process(mp->m_ddev_targp->bt_task);
349 /* assume we have more work to do in a short while */
352 /* We're past our target or empty, so idle */
353 ailp->xa_last_pushed_lsn = 0;
356 * Check for an updated push target before clearing the
357 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
358 * work to do. Wait a bit longer before starting that work.
361 if (ailp->xa_target == target) {
362 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
366 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
368 * We reached the target so wait a bit longer for I/O to
369 * complete and remove pushed items from the AIL before we
370 * start the next scan from the start of the AIL.
373 ailp->xa_last_pushed_lsn = 0;
374 } else if ((stuck * 100) / count > 90) {
376 * Either there is a lot of contention on the AIL or we
377 * are stuck due to operations in progress. "Stuck" in this
378 * case is defined as >90% of the items we tried to push
381 * Backoff a bit more to allow some I/O to complete before
382 * continuing from where we were.
387 /* There is more to do, requeue us. */
388 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
389 msecs_to_jiffies(tout));
393 * This routine is called to move the tail of the AIL forward. It does this by
394 * trying to flush items in the AIL whose lsns are below the given
397 * The push is run asynchronously in a workqueue, which means the caller needs
398 * to handle waiting on the async flush for space to become available.
399 * We don't want to interrupt any push that is in progress, hence we only queue
400 * work if we set the pushing bit approriately.
402 * We do this unlocked - we only need to know whether there is anything in the
403 * AIL at the time we are called. We don't need to access the contents of
404 * any of the objects, so the lock is not needed.
408 struct xfs_ail *ailp,
409 xfs_lsn_t threshold_lsn)
413 lip = xfs_ail_min(ailp);
414 if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
415 XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
419 * Ensure that the new target is noticed in push code before it clears
420 * the XFS_AIL_PUSHING_BIT.
423 ailp->xa_target = threshold_lsn;
424 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
425 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
429 * This is to be called when an item is unlocked that may have
430 * been in the AIL. It will wake up the first member of the AIL
431 * wait list if this item's unlocking might allow it to progress.
432 * If the item is in the AIL, then we need to get the AIL lock
433 * while doing our checking so we don't race with someone going
434 * to sleep waiting for this event in xfs_trans_push_ail().
437 xfs_trans_unlocked_item(
438 struct xfs_ail *ailp,
441 xfs_log_item_t *min_lip;
444 * If we're forcibly shutting down, we may have
445 * unlocked log items arbitrarily. The last thing
446 * we want to do is to move the tail of the log
447 * over some potentially valid data.
449 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
450 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
455 * This is the one case where we can call into xfs_ail_min()
456 * without holding the AIL lock because we only care about the
457 * case where we are at the tail of the AIL. If the object isn't
458 * at the tail, it doesn't matter what result we get back. This
459 * is slightly racy because since we were just unlocked, we could
460 * go to sleep between the call to xfs_ail_min and the call to
461 * xfs_log_move_tail, have someone else lock us, commit to us disk,
462 * move us out of the tail of the AIL, and then we wake up. However,
463 * the call to xfs_log_move_tail() doesn't do anything if there's
464 * not enough free space to wake people up so we're safe calling it.
466 min_lip = xfs_ail_min(ailp);
469 xfs_log_move_tail(ailp->xa_mount, 1);
470 } /* xfs_trans_unlocked_item */
473 * xfs_trans_ail_update - bulk AIL insertion operation.
475 * @xfs_trans_ail_update takes an array of log items that all need to be
476 * positioned at the same LSN in the AIL. If an item is not in the AIL, it will
477 * be added. Otherwise, it will be repositioned by removing it and re-adding
478 * it to the AIL. If we move the first item in the AIL, update the log tail to
479 * match the new minimum LSN in the AIL.
481 * This function takes the AIL lock once to execute the update operations on
482 * all the items in the array, and as such should not be called with the AIL
483 * lock held. As a result, once we have the AIL lock, we need to check each log
484 * item LSN to confirm it needs to be moved forward in the AIL.
486 * To optimise the insert operation, we delete all the items from the AIL in
487 * the first pass, moving them into a temporary list, then splice the temporary
488 * list into the correct position in the AIL. This avoids needing to do an
489 * insert operation on every item.
491 * This function must be called with the AIL lock held. The lock is dropped
495 xfs_trans_ail_update_bulk(
496 struct xfs_ail *ailp,
497 struct xfs_log_item **log_items,
499 xfs_lsn_t lsn) __releases(ailp->xa_lock)
501 xfs_log_item_t *mlip;
503 int mlip_changed = 0;
507 mlip = xfs_ail_min(ailp);
509 for (i = 0; i < nr_items; i++) {
510 struct xfs_log_item *lip = log_items[i];
511 if (lip->li_flags & XFS_LI_IN_AIL) {
512 /* check if we really need to move the item */
513 if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
516 xfs_ail_delete(ailp, lip);
520 lip->li_flags |= XFS_LI_IN_AIL;
523 list_add(&lip->li_ail, &tmp);
526 xfs_ail_splice(ailp, &tmp, lsn);
529 spin_unlock(&ailp->xa_lock);
534 * It is not safe to access mlip after the AIL lock is dropped, so we
535 * must get a copy of li_lsn before we do so. This is especially
536 * important on 32-bit platforms where accessing and updating 64-bit
537 * values like li_lsn is not atomic.
539 mlip = xfs_ail_min(ailp);
540 tail_lsn = mlip->li_lsn;
541 spin_unlock(&ailp->xa_lock);
542 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
546 * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL
548 * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
549 * removed from the AIL. The caller is already holding the AIL lock, and done
550 * all the checks necessary to ensure the items passed in via @log_items are
551 * ready for deletion. This includes checking that the items are in the AIL.
553 * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
554 * flag from the item and reset the item's lsn to 0. If we remove the first
555 * item in the AIL, update the log tail to match the new minimum LSN in the
558 * This function will not drop the AIL lock until all items are removed from
559 * the AIL to minimise the amount of lock traffic on the AIL. This does not
560 * greatly increase the AIL hold time, but does significantly reduce the amount
561 * of traffic on the lock, especially during IO completion.
563 * This function must be called with the AIL lock held. The lock is dropped
567 xfs_trans_ail_delete_bulk(
568 struct xfs_ail *ailp,
569 struct xfs_log_item **log_items,
570 int nr_items) __releases(ailp->xa_lock)
572 xfs_log_item_t *mlip;
574 int mlip_changed = 0;
577 mlip = xfs_ail_min(ailp);
579 for (i = 0; i < nr_items; i++) {
580 struct xfs_log_item *lip = log_items[i];
581 if (!(lip->li_flags & XFS_LI_IN_AIL)) {
582 struct xfs_mount *mp = ailp->xa_mount;
584 spin_unlock(&ailp->xa_lock);
585 if (!XFS_FORCED_SHUTDOWN(mp)) {
586 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
587 "%s: attempting to delete a log item that is not in the AIL",
589 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
594 xfs_ail_delete(ailp, lip);
595 lip->li_flags &= ~XFS_LI_IN_AIL;
602 spin_unlock(&ailp->xa_lock);
607 * It is not safe to access mlip after the AIL lock is dropped, so we
608 * must get a copy of li_lsn before we do so. This is especially
609 * important on 32-bit platforms where accessing and updating 64-bit
610 * values like li_lsn is not atomic. It is possible we've emptied the
611 * AIL here, so if that is the case, pass an LSN of 0 to the tail move.
613 mlip = xfs_ail_min(ailp);
614 tail_lsn = mlip ? mlip->li_lsn : 0;
615 spin_unlock(&ailp->xa_lock);
616 xfs_log_move_tail(ailp->xa_mount, tail_lsn);
620 * The active item list (AIL) is a doubly linked list of log
621 * items sorted by ascending lsn. The base of the list is
622 * a forw/back pointer pair embedded in the xfs mount structure.
623 * The base is initialized with both pointers pointing to the
624 * base. This case always needs to be distinguished, because
625 * the base has no lsn to look at. We almost always insert
626 * at the end of the list, so on inserts we search from the
627 * end of the list to find where the new item belongs.
631 * Initialize the doubly linked list to point only to itself.
637 struct xfs_ail *ailp;
639 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
644 INIT_LIST_HEAD(&ailp->xa_ail);
645 spin_lock_init(&ailp->xa_lock);
646 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
652 xfs_trans_ail_destroy(
655 struct xfs_ail *ailp = mp->m_ail;
657 cancel_delayed_work_sync(&ailp->xa_work);
662 * splice the log item list into the AIL at the given LSN.
666 struct xfs_ail *ailp,
667 struct list_head *list,
670 xfs_log_item_t *next_lip;
673 * If the list is empty, just insert the item.
675 if (list_empty(&ailp->xa_ail)) {
676 list_splice(list, &ailp->xa_ail);
680 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
681 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
685 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
686 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
688 list_splice_init(list, &next_lip->li_ail);
693 * Delete the given item from the AIL. Return a pointer to the item.
697 struct xfs_ail *ailp,
700 xfs_ail_check(ailp, lip);
701 list_del(&lip->li_ail);
702 xfs_trans_ail_cursor_clear(ailp, lip);
706 * Return a pointer to the first item in the AIL.
707 * If the AIL is empty, then return NULL.
709 STATIC xfs_log_item_t *
711 struct xfs_ail *ailp)
713 if (list_empty(&ailp->xa_ail))
716 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
720 * Return a pointer to the item which follows
721 * the given item in the AIL. If the given item
722 * is the last item in the list, then return NULL.
724 STATIC xfs_log_item_t *
726 struct xfs_ail *ailp,
729 if (lip->li_ail.next == &ailp->xa_ail)
732 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
737 * Check that the list is sorted as it should be.
741 struct xfs_ail *ailp,
744 xfs_log_item_t *prev_lip;
746 if (list_empty(&ailp->xa_ail))
750 * Check the next and previous entries are valid.
752 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
753 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
754 if (&prev_lip->li_ail != &ailp->xa_ail)
755 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
757 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
758 if (&prev_lip->li_ail != &ailp->xa_ail)
759 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
762 #ifdef XFS_TRANS_DEBUG
764 * Walk the list checking lsn ordering, and that every entry has the
765 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
766 * when specifically debugging the transaction subsystem.
768 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
769 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
770 if (&prev_lip->li_ail != &ailp->xa_ail)
771 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
772 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
775 #endif /* XFS_TRANS_DEBUG */