Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
[pandora-kernel.git] / fs / gfs2 / super.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/crc32.h>
16 #include <linux/gfs2_ondisk.h>
17 #include <linux/bio.h>
18 #include <linux/lm_interface.h>
19
20 #include "gfs2.h"
21 #include "incore.h"
22 #include "bmap.h"
23 #include "dir.h"
24 #include "glock.h"
25 #include "glops.h"
26 #include "inode.h"
27 #include "log.h"
28 #include "meta_io.h"
29 #include "quota.h"
30 #include "recovery.h"
31 #include "rgrp.h"
32 #include "super.h"
33 #include "trans.h"
34 #include "util.h"
35
36 /**
37  * gfs2_jindex_hold - Grab a lock on the jindex
38  * @sdp: The GFS2 superblock
39  * @ji_gh: the holder for the jindex glock
40  *
41  * This is very similar to the gfs2_rindex_hold() function, except that
42  * in general we hold the jindex lock for longer periods of time and
43  * we grab it far less frequently (in general) then the rgrp lock.
44  *
45  * Returns: errno
46  */
47
48 int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
49 {
50         struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
51         struct qstr name;
52         char buf[20];
53         struct gfs2_jdesc *jd;
54         int error;
55
56         name.name = buf;
57
58         mutex_lock(&sdp->sd_jindex_mutex);
59
60         for (;;) {
61                 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
62                 if (error)
63                         break;
64
65                 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
66                 name.hash = gfs2_disk_hash(name.name, name.len);
67
68                 error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
69                 if (error == -ENOENT) {
70                         error = 0;
71                         break;
72                 }
73
74                 gfs2_glock_dq_uninit(ji_gh);
75
76                 if (error)
77                         break;
78
79                 error = -ENOMEM;
80                 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
81                 if (!jd)
82                         break;
83
84                 INIT_LIST_HEAD(&jd->extent_list);
85                 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
86                 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
87                         if (!jd->jd_inode)
88                                 error = -ENOENT;
89                         else
90                                 error = PTR_ERR(jd->jd_inode);
91                         kfree(jd);
92                         break;
93                 }
94
95                 spin_lock(&sdp->sd_jindex_spin);
96                 jd->jd_jid = sdp->sd_journals++;
97                 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
98                 spin_unlock(&sdp->sd_jindex_spin);
99         }
100
101         mutex_unlock(&sdp->sd_jindex_mutex);
102
103         return error;
104 }
105
106 /**
107  * gfs2_jindex_free - Clear all the journal index information
108  * @sdp: The GFS2 superblock
109  *
110  */
111
112 void gfs2_jindex_free(struct gfs2_sbd *sdp)
113 {
114         struct list_head list, *head;
115         struct gfs2_jdesc *jd;
116         struct gfs2_journal_extent *jext;
117
118         spin_lock(&sdp->sd_jindex_spin);
119         list_add(&list, &sdp->sd_jindex_list);
120         list_del_init(&sdp->sd_jindex_list);
121         sdp->sd_journals = 0;
122         spin_unlock(&sdp->sd_jindex_spin);
123
124         while (!list_empty(&list)) {
125                 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
126                 head = &jd->extent_list;
127                 while (!list_empty(head)) {
128                         jext = list_entry(head->next,
129                                           struct gfs2_journal_extent,
130                                           extent_list);
131                         list_del(&jext->extent_list);
132                         kfree(jext);
133                 }
134                 list_del(&jd->jd_list);
135                 iput(jd->jd_inode);
136                 kfree(jd);
137         }
138 }
139
140 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
141 {
142         struct gfs2_jdesc *jd;
143         int found = 0;
144
145         list_for_each_entry(jd, head, jd_list) {
146                 if (jd->jd_jid == jid) {
147                         found = 1;
148                         break;
149                 }
150         }
151
152         if (!found)
153                 jd = NULL;
154
155         return jd;
156 }
157
158 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
159 {
160         struct gfs2_jdesc *jd;
161
162         spin_lock(&sdp->sd_jindex_spin);
163         jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
164         spin_unlock(&sdp->sd_jindex_spin);
165
166         return jd;
167 }
168
169 void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
170 {
171         struct gfs2_jdesc *jd;
172
173         spin_lock(&sdp->sd_jindex_spin);
174         jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
175         if (jd)
176                 jd->jd_dirty = 1;
177         spin_unlock(&sdp->sd_jindex_spin);
178 }
179
180 struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
181 {
182         struct gfs2_jdesc *jd;
183         int found = 0;
184
185         spin_lock(&sdp->sd_jindex_spin);
186
187         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
188                 if (jd->jd_dirty) {
189                         jd->jd_dirty = 0;
190                         found = 1;
191                         break;
192                 }
193         }
194         spin_unlock(&sdp->sd_jindex_spin);
195
196         if (!found)
197                 jd = NULL;
198
199         return jd;
200 }
201
202 int gfs2_jdesc_check(struct gfs2_jdesc *jd)
203 {
204         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
205         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
206         int ar;
207         int error;
208
209         if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
210             (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
211                 gfs2_consist_inode(ip);
212                 return -EIO;
213         }
214         jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
215
216         error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
217         if (!error && ar) {
218                 gfs2_consist_inode(ip);
219                 error = -EIO;
220         }
221
222         return error;
223 }
224
225 /**
226  * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
227  * @sdp: the filesystem
228  *
229  * Returns: errno
230  */
231
232 int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
233 {
234         struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
235         struct gfs2_glock *j_gl = ip->i_gl;
236         struct gfs2_holder t_gh;
237         struct gfs2_log_header_host head;
238         int error;
239
240         error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
241         if (error)
242                 return error;
243
244         j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
245
246         error = gfs2_find_jhead(sdp->sd_jdesc, &head);
247         if (error)
248                 goto fail;
249
250         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
251                 gfs2_consist(sdp);
252                 error = -EIO;
253                 goto fail;
254         }
255
256         /*  Initialize some head of the log stuff  */
257         sdp->sd_log_sequence = head.lh_sequence + 1;
258         gfs2_log_pointers_init(sdp, head.lh_blkno);
259
260         error = gfs2_quota_init(sdp);
261         if (error)
262                 goto fail;
263
264         set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
265
266         gfs2_glock_dq_uninit(&t_gh);
267
268         return 0;
269
270 fail:
271         t_gh.gh_flags |= GL_NOCACHE;
272         gfs2_glock_dq_uninit(&t_gh);
273
274         return error;
275 }
276
277 static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
278 {
279         const struct gfs2_statfs_change *str = buf;
280
281         sc->sc_total = be64_to_cpu(str->sc_total);
282         sc->sc_free = be64_to_cpu(str->sc_free);
283         sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
284 }
285
286 static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
287 {
288         struct gfs2_statfs_change *str = buf;
289
290         str->sc_total = cpu_to_be64(sc->sc_total);
291         str->sc_free = cpu_to_be64(sc->sc_free);
292         str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
293 }
294
295 int gfs2_statfs_init(struct gfs2_sbd *sdp)
296 {
297         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
298         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
299         struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
300         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
301         struct buffer_head *m_bh, *l_bh;
302         struct gfs2_holder gh;
303         int error;
304
305         error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
306                                    &gh);
307         if (error)
308                 return error;
309
310         error = gfs2_meta_inode_buffer(m_ip, &m_bh);
311         if (error)
312                 goto out;
313
314         if (sdp->sd_args.ar_spectator) {
315                 spin_lock(&sdp->sd_statfs_spin);
316                 gfs2_statfs_change_in(m_sc, m_bh->b_data +
317                                       sizeof(struct gfs2_dinode));
318                 spin_unlock(&sdp->sd_statfs_spin);
319         } else {
320                 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
321                 if (error)
322                         goto out_m_bh;
323
324                 spin_lock(&sdp->sd_statfs_spin);
325                 gfs2_statfs_change_in(m_sc, m_bh->b_data +
326                                       sizeof(struct gfs2_dinode));
327                 gfs2_statfs_change_in(l_sc, l_bh->b_data +
328                                       sizeof(struct gfs2_dinode));
329                 spin_unlock(&sdp->sd_statfs_spin);
330
331                 brelse(l_bh);
332         }
333
334 out_m_bh:
335         brelse(m_bh);
336 out:
337         gfs2_glock_dq_uninit(&gh);
338         return 0;
339 }
340
341 void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
342                         s64 dinodes)
343 {
344         struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
345         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
346         struct buffer_head *l_bh;
347         int error;
348
349         error = gfs2_meta_inode_buffer(l_ip, &l_bh);
350         if (error)
351                 return;
352
353         gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
354
355         spin_lock(&sdp->sd_statfs_spin);
356         l_sc->sc_total += total;
357         l_sc->sc_free += free;
358         l_sc->sc_dinodes += dinodes;
359         gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
360         spin_unlock(&sdp->sd_statfs_spin);
361
362         brelse(l_bh);
363 }
364
365 int gfs2_statfs_sync(struct gfs2_sbd *sdp)
366 {
367         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
368         struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
369         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
370         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
371         struct gfs2_holder gh;
372         struct buffer_head *m_bh, *l_bh;
373         int error;
374
375         error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
376                                    &gh);
377         if (error)
378                 return error;
379
380         error = gfs2_meta_inode_buffer(m_ip, &m_bh);
381         if (error)
382                 goto out;
383
384         spin_lock(&sdp->sd_statfs_spin);
385         gfs2_statfs_change_in(m_sc, m_bh->b_data +
386                               sizeof(struct gfs2_dinode));
387         if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
388                 spin_unlock(&sdp->sd_statfs_spin);
389                 goto out_bh;
390         }
391         spin_unlock(&sdp->sd_statfs_spin);
392
393         error = gfs2_meta_inode_buffer(l_ip, &l_bh);
394         if (error)
395                 goto out_bh;
396
397         error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
398         if (error)
399                 goto out_bh2;
400
401         gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
402
403         spin_lock(&sdp->sd_statfs_spin);
404         m_sc->sc_total += l_sc->sc_total;
405         m_sc->sc_free += l_sc->sc_free;
406         m_sc->sc_dinodes += l_sc->sc_dinodes;
407         memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
408         memset(l_bh->b_data + sizeof(struct gfs2_dinode),
409                0, sizeof(struct gfs2_statfs_change));
410         spin_unlock(&sdp->sd_statfs_spin);
411
412         gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
413         gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
414
415         gfs2_trans_end(sdp);
416
417 out_bh2:
418         brelse(l_bh);
419 out_bh:
420         brelse(m_bh);
421 out:
422         gfs2_glock_dq_uninit(&gh);
423         return error;
424 }
425
426 /**
427  * gfs2_statfs_i - Do a statfs
428  * @sdp: the filesystem
429  * @sg: the sg structure
430  *
431  * Returns: errno
432  */
433
434 int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
435 {
436         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
437         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
438
439         spin_lock(&sdp->sd_statfs_spin);
440
441         *sc = *m_sc;
442         sc->sc_total += l_sc->sc_total;
443         sc->sc_free += l_sc->sc_free;
444         sc->sc_dinodes += l_sc->sc_dinodes;
445
446         spin_unlock(&sdp->sd_statfs_spin);
447
448         if (sc->sc_free < 0)
449                 sc->sc_free = 0;
450         if (sc->sc_free > sc->sc_total)
451                 sc->sc_free = sc->sc_total;
452         if (sc->sc_dinodes < 0)
453                 sc->sc_dinodes = 0;
454
455         return 0;
456 }
457
458 /**
459  * statfs_fill - fill in the sg for a given RG
460  * @rgd: the RG
461  * @sc: the sc structure
462  *
463  * Returns: 0 on success, -ESTALE if the LVB is invalid
464  */
465
466 static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
467                             struct gfs2_statfs_change_host *sc)
468 {
469         gfs2_rgrp_verify(rgd);
470         sc->sc_total += rgd->rd_data;
471         sc->sc_free += rgd->rd_rg.rg_free;
472         sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
473         return 0;
474 }
475
476 /**
477  * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
478  * @sdp: the filesystem
479  * @sc: the sc info that will be returned
480  *
481  * Any error (other than a signal) will cause this routine to fall back
482  * to the synchronous version.
483  *
484  * FIXME: This really shouldn't busy wait like this.
485  *
486  * Returns: errno
487  */
488
489 int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
490 {
491         struct gfs2_holder ri_gh;
492         struct gfs2_rgrpd *rgd_next;
493         struct gfs2_holder *gha, *gh;
494         unsigned int slots = 64;
495         unsigned int x;
496         int done;
497         int error = 0, err;
498
499         memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
500         gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
501         if (!gha)
502                 return -ENOMEM;
503
504         error = gfs2_rindex_hold(sdp, &ri_gh);
505         if (error)
506                 goto out;
507
508         rgd_next = gfs2_rgrpd_get_first(sdp);
509
510         for (;;) {
511                 done = 1;
512
513                 for (x = 0; x < slots; x++) {
514                         gh = gha + x;
515
516                         if (gh->gh_gl && gfs2_glock_poll(gh)) {
517                                 err = gfs2_glock_wait(gh);
518                                 if (err) {
519                                         gfs2_holder_uninit(gh);
520                                         error = err;
521                                 } else {
522                                         if (!error)
523                                                 error = statfs_slow_fill(
524                                                         gh->gh_gl->gl_object, sc);
525                                         gfs2_glock_dq_uninit(gh);
526                                 }
527                         }
528
529                         if (gh->gh_gl)
530                                 done = 0;
531                         else if (rgd_next && !error) {
532                                 error = gfs2_glock_nq_init(rgd_next->rd_gl,
533                                                            LM_ST_SHARED,
534                                                            GL_ASYNC,
535                                                            gh);
536                                 rgd_next = gfs2_rgrpd_get_next(rgd_next);
537                                 done = 0;
538                         }
539
540                         if (signal_pending(current))
541                                 error = -ERESTARTSYS;
542                 }
543
544                 if (done)
545                         break;
546
547                 yield();
548         }
549
550         gfs2_glock_dq_uninit(&ri_gh);
551
552 out:
553         kfree(gha);
554         return error;
555 }
556
557 struct lfcc {
558         struct list_head list;
559         struct gfs2_holder gh;
560 };
561
562 /**
563  * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
564  *                            journals are clean
565  * @sdp: the file system
566  * @state: the state to put the transaction lock into
567  * @t_gh: the hold on the transaction lock
568  *
569  * Returns: errno
570  */
571
572 static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
573                                     struct gfs2_holder *t_gh)
574 {
575         struct gfs2_inode *ip;
576         struct gfs2_holder ji_gh;
577         struct gfs2_jdesc *jd;
578         struct lfcc *lfcc;
579         LIST_HEAD(list);
580         struct gfs2_log_header_host lh;
581         int error;
582
583         error = gfs2_jindex_hold(sdp, &ji_gh);
584         if (error)
585                 return error;
586
587         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
588                 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
589                 if (!lfcc) {
590                         error = -ENOMEM;
591                         goto out;
592                 }
593                 ip = GFS2_I(jd->jd_inode);
594                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
595                 if (error) {
596                         kfree(lfcc);
597                         goto out;
598                 }
599                 list_add(&lfcc->list, &list);
600         }
601
602         error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
603                                    GL_NOCACHE, t_gh);
604
605         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
606                 error = gfs2_jdesc_check(jd);
607                 if (error)
608                         break;
609                 error = gfs2_find_jhead(jd, &lh);
610                 if (error)
611                         break;
612                 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
613                         error = -EBUSY;
614                         break;
615                 }
616         }
617
618         if (error)
619                 gfs2_glock_dq_uninit(t_gh);
620
621 out:
622         while (!list_empty(&list)) {
623                 lfcc = list_entry(list.next, struct lfcc, list);
624                 list_del(&lfcc->list);
625                 gfs2_glock_dq_uninit(&lfcc->gh);
626                 kfree(lfcc);
627         }
628         gfs2_glock_dq_uninit(&ji_gh);
629         return error;
630 }
631
632 /**
633  * gfs2_freeze_fs - freezes the file system
634  * @sdp: the file system
635  *
636  * This function flushes data and meta data for all machines by
637  * aquiring the transaction log exclusively.  All journals are
638  * ensured to be in a clean state as well.
639  *
640  * Returns: errno
641  */
642
643 int gfs2_freeze_fs(struct gfs2_sbd *sdp)
644 {
645         int error = 0;
646
647         mutex_lock(&sdp->sd_freeze_lock);
648
649         if (!sdp->sd_freeze_count++) {
650                 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
651                 if (error)
652                         sdp->sd_freeze_count--;
653         }
654
655         mutex_unlock(&sdp->sd_freeze_lock);
656
657         return error;
658 }
659
660 /**
661  * gfs2_unfreeze_fs - unfreezes the file system
662  * @sdp: the file system
663  *
664  * This function allows the file system to proceed by unlocking
665  * the exclusively held transaction lock.  Other GFS2 nodes are
666  * now free to acquire the lock shared and go on with their lives.
667  *
668  */
669
670 void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
671 {
672         mutex_lock(&sdp->sd_freeze_lock);
673
674         if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
675                 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
676
677         mutex_unlock(&sdp->sd_freeze_lock);
678 }
679