Merge branch 'master'
[pandora-kernel.git] / fs / gfs2 / recovery.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License v.2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <asm/semaphore.h>
18
19 #include "gfs2.h"
20 #include "lm_interface.h"
21 #include "incore.h"
22 #include "bmap.h"
23 #include "glock.h"
24 #include "glops.h"
25 #include "lm.h"
26 #include "lops.h"
27 #include "meta_io.h"
28 #include "recovery.h"
29 #include "super.h"
30 #include "util.h"
31 #include "dir.h"
32
33 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
34                            struct buffer_head **bh)
35 {
36         struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
37         struct gfs2_glock *gl = ip->i_gl;
38         int new = 0;
39         uint64_t dblock;
40         uint32_t extlen;
41         int error;
42
43         error = gfs2_extent_map(ip->i_vnode, blk, &new, &dblock, &extlen);
44         if (error)
45                 return error;
46         if (!dblock) {
47                 gfs2_consist_inode(ip);
48                 return -EIO;
49         }
50
51         gfs2_meta_ra(gl, dblock, extlen);
52         error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
53
54         return error;
55 }
56
57 int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
58 {
59         struct list_head *head = &sdp->sd_revoke_list;
60         struct gfs2_revoke_replay *rr;
61         int found = 0;
62
63         list_for_each_entry(rr, head, rr_list) {
64                 if (rr->rr_blkno == blkno) {
65                         found = 1;
66                         break;
67                 }
68         }
69
70         if (found) {
71                 rr->rr_where = where;
72                 return 0;
73         }
74
75         rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
76         if (!rr)
77                 return -ENOMEM;
78
79         rr->rr_blkno = blkno;
80         rr->rr_where = where;
81         list_add(&rr->rr_list, head);
82
83         return 1;
84 }
85
86 int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
87 {
88         struct gfs2_revoke_replay *rr;
89         int wrap, a, b, revoke;
90         int found = 0;
91
92         list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
93                 if (rr->rr_blkno == blkno) {
94                         found = 1;
95                         break;
96                 }
97         }
98
99         if (!found)
100                 return 0;
101
102         wrap = (rr->rr_where < sdp->sd_replay_tail);
103         a = (sdp->sd_replay_tail < where);
104         b = (where < rr->rr_where);
105         revoke = (wrap) ? (a || b) : (a && b);
106
107         return revoke;
108 }
109
110 void gfs2_revoke_clean(struct gfs2_sbd *sdp)
111 {
112         struct list_head *head = &sdp->sd_revoke_list;
113         struct gfs2_revoke_replay *rr;
114
115         while (!list_empty(head)) {
116                 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
117                 list_del(&rr->rr_list);
118                 kfree(rr);
119         }
120 }
121
122 /**
123  * get_log_header - read the log header for a given segment
124  * @jd: the journal
125  * @blk: the block to look at
126  * @lh: the log header to return
127  *
128  * Read the log header for a given segement in a given journal.  Do a few
129  * sanity checks on it.
130  *
131  * Returns: 0 on success,
132  *          1 if the header was invalid or incomplete,
133  *          errno on error
134  */
135
136 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
137                           struct gfs2_log_header *head)
138 {
139         struct buffer_head *bh;
140         struct gfs2_log_header lh;
141         uint32_t hash;
142         int error;
143
144         error = gfs2_replay_read_block(jd, blk, &bh);
145         if (error)
146                 return error;
147
148         memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
149         lh.lh_hash = 0;
150         hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
151         gfs2_log_header_in(&lh, bh->b_data);
152
153         brelse(bh);
154
155         if (lh.lh_header.mh_magic != GFS2_MAGIC ||
156             lh.lh_header.mh_type != GFS2_METATYPE_LH ||
157             lh.lh_blkno != blk ||
158             lh.lh_hash != hash)
159                 return 1;
160
161         *head = lh;
162
163         return 0;
164 }
165
166 /**
167  * find_good_lh - find a good log header
168  * @jd: the journal
169  * @blk: the segment to start searching from
170  * @lh: the log header to fill in
171  * @forward: if true search forward in the log, else search backward
172  *
173  * Call get_log_header() to get a log header for a segment, but if the
174  * segment is bad, either scan forward or backward until we find a good one.
175  *
176  * Returns: errno
177  */
178
179 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
180                         struct gfs2_log_header *head)
181 {
182         unsigned int orig_blk = *blk;
183         int error;
184
185         for (;;) {
186                 error = get_log_header(jd, *blk, head);
187                 if (error <= 0)
188                         return error;
189
190                 if (++*blk == jd->jd_blocks)
191                         *blk = 0;
192
193                 if (*blk == orig_blk) {
194                         gfs2_consist_inode(jd->jd_inode->u.generic_ip);
195                         return -EIO;
196                 }
197         }
198 }
199
200 /**
201  * jhead_scan - make sure we've found the head of the log
202  * @jd: the journal
203  * @head: this is filled in with the log descriptor of the head
204  *
205  * At this point, seg and lh should be either the head of the log or just
206  * before.  Scan forward until we find the head.
207  *
208  * Returns: errno
209  */
210
211 static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
212 {
213         unsigned int blk = head->lh_blkno;
214         struct gfs2_log_header lh;
215         int error;
216
217         for (;;) {
218                 if (++blk == jd->jd_blocks)
219                         blk = 0;
220
221                 error = get_log_header(jd, blk, &lh);
222                 if (error < 0)
223                         return error;
224                 if (error == 1)
225                         continue;
226
227                 if (lh.lh_sequence == head->lh_sequence) {
228                         gfs2_consist_inode(jd->jd_inode->u.generic_ip);
229                         return -EIO;
230                 }
231                 if (lh.lh_sequence < head->lh_sequence)
232                         break;
233
234                 *head = lh;
235         }
236
237         return 0;
238 }
239
240 /**
241  * gfs2_find_jhead - find the head of a log
242  * @jd: the journal
243  * @head: the log descriptor for the head of the log is returned here
244  *
245  * Do a binary search of a journal and find the valid log entry with the
246  * highest sequence number.  (i.e. the log head)
247  *
248  * Returns: errno
249  */
250
251 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
252 {
253         struct gfs2_log_header lh_1, lh_m;
254         uint32_t blk_1, blk_2, blk_m;
255         int error;
256
257         blk_1 = 0;
258         blk_2 = jd->jd_blocks - 1;
259
260         for (;;) {
261                 blk_m = (blk_1 + blk_2) / 2;
262
263                 error = find_good_lh(jd, &blk_1, &lh_1);
264                 if (error)
265                         return error;
266
267                 error = find_good_lh(jd, &blk_m, &lh_m);
268                 if (error)
269                         return error;
270
271                 if (blk_1 == blk_m || blk_m == blk_2)
272                         break;
273
274                 if (lh_1.lh_sequence <= lh_m.lh_sequence)
275                         blk_1 = blk_m;
276                 else
277                         blk_2 = blk_m;
278         }
279
280         error = jhead_scan(jd, &lh_1);
281         if (error)
282                 return error;
283
284         *head = lh_1;
285
286         return error;
287 }
288
289 /**
290  * foreach_descriptor - go through the active part of the log
291  * @jd: the journal
292  * @start: the first log header in the active region
293  * @end: the last log header (don't process the contents of this entry))
294  *
295  * Call a given function once for every log descriptor in the active
296  * portion of the log.
297  *
298  * Returns: errno
299  */
300
301 static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
302                               unsigned int end, int pass)
303 {
304         struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
305         struct gfs2_sbd *sdp = ip->i_sbd;
306         struct buffer_head *bh;
307         struct gfs2_log_descriptor *ld;
308         int error = 0;
309         u32 length;
310         __be64 *ptr;
311         unsigned int offset = sizeof(struct gfs2_log_descriptor);
312         offset += (sizeof(__be64)-1);
313         offset &= ~(sizeof(__be64)-1);
314
315         while (start != end) {
316                 error = gfs2_replay_read_block(jd, start, &bh);
317                 if (error)
318                         return error;
319                 if (gfs2_meta_check(sdp, bh)) {
320                         brelse(bh);
321                         return -EIO;
322                 }
323                 ld = (struct gfs2_log_descriptor *)bh->b_data;
324                 length = be32_to_cpu(ld->ld_length);
325
326                 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
327                         struct gfs2_log_header lh;
328                         error = get_log_header(jd, start, &lh);
329                         if (!error) {
330                                 gfs2_replay_incr_blk(sdp, &start);
331                                 continue;
332                         }
333                         if (error == 1) {
334                                 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
335                                 error = -EIO;
336                         }
337                         brelse(bh);
338                         return error;
339                 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
340                         brelse(bh);
341                         return -EIO;
342                 }
343                 ptr = (__be64 *)(bh->b_data + offset);
344                 error = lops_scan_elements(jd, start, ld, ptr, pass);
345                 if (error) {
346                         brelse(bh);
347                         return error;
348                 }
349
350                 while (length--)
351                         gfs2_replay_incr_blk(sdp, &start);
352
353                 brelse(bh);
354         }
355
356         return 0;
357 }
358
359 /**
360  * clean_journal - mark a dirty journal as being clean
361  * @sdp: the filesystem
362  * @jd: the journal
363  * @gl: the journal's glock
364  * @head: the head journal to start from
365  *
366  * Returns: errno
367  */
368
369 static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
370 {
371         struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
372         struct gfs2_sbd *sdp = ip->i_sbd;
373         unsigned int lblock;
374         int new = 0;
375         uint64_t dblock;
376         struct gfs2_log_header *lh;
377         uint32_t hash;
378         struct buffer_head *bh;
379         int error;
380         int boundary;
381
382         lblock = head->lh_blkno;
383         gfs2_replay_incr_blk(sdp, &lblock);
384         error = gfs2_block_map(ip->i_vnode, lblock, &new, &dblock, &boundary);
385         if (error)
386                 return error;
387         if (!dblock) {
388                 gfs2_consist_inode(ip);
389                 return -EIO;
390         }
391
392         bh = sb_getblk(sdp->sd_vfs, dblock);
393         lock_buffer(bh);
394         memset(bh->b_data, 0, bh->b_size);
395         set_buffer_uptodate(bh);
396         clear_buffer_dirty(bh);
397         unlock_buffer(bh);
398
399         lh = (struct gfs2_log_header *)bh->b_data;
400         memset(lh, 0, sizeof(struct gfs2_log_header));
401         lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
402         lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
403         lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
404         lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
405         lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
406         lh->lh_blkno = cpu_to_be32(lblock);
407         hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
408         lh->lh_hash = cpu_to_be32(hash);
409
410         set_buffer_dirty(bh);
411         if (sync_dirty_buffer(bh))
412                 gfs2_io_error_bh(sdp, bh);
413         brelse(bh);
414
415         return error;
416 }
417
418 /**
419  * gfs2_recover_journal - recovery a given journal
420  * @jd: the struct gfs2_jdesc describing the journal
421  *
422  * Acquire the journal's lock, check to see if the journal is clean, and
423  * do recovery if necessary.
424  *
425  * Returns: errno
426  */
427
428 int gfs2_recover_journal(struct gfs2_jdesc *jd)
429 {
430         struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
431         struct gfs2_sbd *sdp = ip->i_sbd;
432         struct gfs2_log_header head;
433         struct gfs2_holder j_gh, ji_gh, t_gh;
434         unsigned long t;
435         int ro = 0;
436         unsigned int pass;
437         int error;
438
439         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
440                 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
441                         jd->jd_jid);
442
443                 /* Aquire the journal lock so we can do recovery */
444
445                 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
446                                           LM_ST_EXCLUSIVE,
447                                           LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
448                                           &j_gh);
449                 switch (error) {
450                 case 0:
451                         break;
452         
453                 case GLR_TRYFAILED:
454                         fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
455                         error = 0;
456         
457                 default:
458                         goto fail;
459                 };
460
461                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
462                                            LM_FLAG_NOEXP, &ji_gh);
463                 if (error)
464                         goto fail_gunlock_j;
465         } else {
466                 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
467         }
468
469         fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
470
471         error = gfs2_jdesc_check(jd);
472         if (error)
473                 goto fail_gunlock_ji;
474
475         error = gfs2_find_jhead(jd, &head);
476         if (error)
477                 goto fail_gunlock_ji;
478
479         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
480                 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
481                         jd->jd_jid);
482
483                 t = jiffies;
484
485                 /* Acquire a shared hold on the transaction lock */
486
487                 error = gfs2_glock_nq_init(sdp->sd_trans_gl,
488                                            LM_ST_SHARED,
489                                            LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
490                                            GL_NOCANCEL | GL_NOCACHE,
491                                            &t_gh);
492                 if (error)
493                         goto fail_gunlock_ji;
494
495                 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
496                         if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
497                                 ro = 1;
498                 } else {
499                         if (sdp->sd_vfs->s_flags & MS_RDONLY)
500                                 ro = 1;
501                 }
502
503                 if (ro) {
504                         fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
505                                 jd->jd_jid);
506                         error = -EROFS;
507                         goto fail_gunlock_tr;
508                 }
509
510                 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
511
512                 for (pass = 0; pass < 2; pass++) {
513                         lops_before_scan(jd, &head, pass);
514                         error = foreach_descriptor(jd, head.lh_tail,
515                                                    head.lh_blkno, pass);
516                         lops_after_scan(jd, error, pass);
517                         if (error)
518                                 goto fail_gunlock_tr;
519                 }
520
521                 error = clean_journal(jd, &head);
522                 if (error)
523                         goto fail_gunlock_tr;
524
525                 gfs2_glock_dq_uninit(&t_gh);
526                 t = DIV_ROUND_UP(jiffies - t, HZ);
527                 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
528                         jd->jd_jid, t);
529         }
530
531         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
532                 gfs2_glock_dq_uninit(&ji_gh);
533
534         gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
535
536         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
537                 gfs2_glock_dq_uninit(&j_gh);
538
539         fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
540         return 0;
541
542 fail_gunlock_tr:
543         gfs2_glock_dq_uninit(&t_gh);
544 fail_gunlock_ji:
545         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
546                 gfs2_glock_dq_uninit(&ji_gh);
547 fail_gunlock_j:
548                 gfs2_glock_dq_uninit(&j_gh);
549         }
550
551         fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
552
553 fail:
554         gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
555         return error;
556 }
557
558 /**
559  * gfs2_check_journals - Recover any dirty journals
560  * @sdp: the filesystem
561  *
562  */
563
564 void gfs2_check_journals(struct gfs2_sbd *sdp)
565 {
566         struct gfs2_jdesc *jd;
567
568         for (;;) {
569                 jd = gfs2_jdesc_find_dirty(sdp);
570                 if (!jd)
571                         break;
572
573                 if (jd != sdp->sd_jdesc)
574                         gfs2_recover_journal(jd);
575         }
576 }
577