fs/ext4/mmp.c

   1 #include <linux/fs.h>
   2 #include <linux/random.h>
   3 #include <linux/buffer_head.h>
   4 #include <linux/utsname.h>
   5 #include <linux/kthread.h>
   6
   7 #include "ext4.h"
   8
   9 /*
  10  * Write the MMP block using WRITE_SYNC to try to get the block on-disk
  11  * faster.
  12  */
  13 static int write_mmp_block(struct buffer_head *bh)
  14 {
  15         mark_buffer_dirty(bh);
  16         lock_buffer(bh);
  17         bh->b_end_io = end_buffer_write_sync;
  18         get_bh(bh);
  19         submit_bh(WRITE_SYNC, bh);
  20         wait_on_buffer(bh);
  21         if (unlikely(!buffer_uptodate(bh)))
  22                 return 1;
  23
  24         return 0;
  25 }
  26
  27 /*
  28  * Read the MMP block. It _must_ be read from disk and hence we clear the
  29  * uptodate flag on the buffer.
  30  */
  31 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
  32                           ext4_fsblk_t mmp_block)
  33 {
  34         struct mmp_struct *mmp;
  35
  36         if (*bh)
  37                 clear_buffer_uptodate(*bh);
  38
  39         /* This would be sb_bread(sb, mmp_block), except we need to be sure
  40          * that the MD RAID device cache has been bypassed, and that the read
  41          * is not blocked in the elevator. */
  42         if (!*bh)
  43                 *bh = sb_getblk(sb, mmp_block);
  44         if (!*bh)
  45                 return -ENOMEM;
  46         if (*bh) {
  47                 get_bh(*bh);
  48                 lock_buffer(*bh);
  49                 (*bh)->b_end_io = end_buffer_read_sync;
  50                 submit_bh(READ_SYNC, *bh);
  51                 wait_on_buffer(*bh);
  52                 if (!buffer_uptodate(*bh)) {
  53                         brelse(*bh);
  54                         *bh = NULL;
  55                 }
  56         }
  57         if (!*bh) {
  58                 ext4_warning(sb, "Error while reading MMP block %llu",
  59                              mmp_block);
  60                 return -EIO;
  61         }
  62
  63         mmp = (struct mmp_struct *)((*bh)->b_data);
  64         if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
  65                 return -EINVAL;
  66
  67         return 0;
  68 }
  69
  70 /*
  71  * Dump as much information as possible to help the admin.
  72  */
  73 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
  74                     const char *function, unsigned int line, const char *msg)
  75 {
  76         __ext4_warning(sb, function, line, msg);
  77         __ext4_warning(sb, function, line,
  78                        "MMP failure info: last update time: %llu, last update "
  79                        "node: %s, last update device: %s\n",
  80                        (long long unsigned int) le64_to_cpu(mmp->mmp_time),
  81                        mmp->mmp_nodename, mmp->mmp_bdevname);
  82 }
  83
  84 /*
  85  * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
  86  */
  87 static int kmmpd(void *data)
  88 {
  89         struct super_block *sb = ((struct mmpd_data *) data)->sb;
  90         struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
  91         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
  92         struct mmp_struct *mmp;
  93         ext4_fsblk_t mmp_block;
  94         u32 seq = 0;
  95         unsigned long failed_writes = 0;
  96         int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
  97         unsigned mmp_check_interval;
  98         unsigned long last_update_time;
  99         unsigned long diff;
 100         int retval;
 101
 102         mmp_block = le64_to_cpu(es->s_mmp_block);
 103         mmp = (struct mmp_struct *)(bh->b_data);
 104         mmp->mmp_time = cpu_to_le64(get_seconds());
 105         /*
 106          * Start with the higher mmp_check_interval and reduce it if
 107          * the MMP block is being updated on time.
 108          */
 109         mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
 110                                  EXT4_MMP_MIN_CHECK_INTERVAL);
 111         mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 112         bdevname(bh->b_bdev, mmp->mmp_bdevname);
 113
 114         memcpy(mmp->mmp_nodename, init_utsname()->nodename,
 115                sizeof(mmp->mmp_nodename));
 116
 117         while (!kthread_should_stop()) {
 118                 if (++seq > EXT4_MMP_SEQ_MAX)
 119                         seq = 1;
 120
 121                 mmp->mmp_seq = cpu_to_le32(seq);
 122                 mmp->mmp_time = cpu_to_le64(get_seconds());
 123                 last_update_time = jiffies;
 124
 125                 retval = write_mmp_block(bh);
 126                 /*
 127                  * Don't spew too many error messages. Print one every
 128                  * (s_mmp_update_interval * 60) seconds.
 129                  */
 130                 if (retval) {
 131                         if ((failed_writes % 60) == 0)
 132                                 ext4_error(sb, "Error writing to MMP block");
 133                         failed_writes++;
 134                 }
 135
 136                 if (!(le32_to_cpu(es->s_feature_incompat) &
 137                     EXT4_FEATURE_INCOMPAT_MMP)) {
 138                         ext4_warning(sb, "kmmpd being stopped since MMP feature"
 139                                      " has been disabled.");
 140                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 141                         goto failed;
 142                 }
 143
 144                 if (sb->s_flags & MS_RDONLY) {
 145                         ext4_warning(sb, "kmmpd being stopped since filesystem "
 146                                      "has been remounted as readonly.");
 147                         EXT4_SB(sb)->s_mmp_tsk = NULL;
 148                         goto failed;
 149                 }
 150
 151                 diff = jiffies - last_update_time;
 152                 if (diff < mmp_update_interval * HZ)
 153                         schedule_timeout_interruptible(mmp_update_interval *
 154                                                        HZ - diff);
 155
 156                 /*
 157                  * We need to make sure that more than mmp_check_interval
 158                  * seconds have not passed since writing. If that has happened
 159                  * we need to check if the MMP block is as we left it.
 160                  */
 161                 diff = jiffies - last_update_time;
 162                 if (diff > mmp_check_interval * HZ) {
 163                         struct buffer_head *bh_check = NULL;
 164                         struct mmp_struct *mmp_check;
 165
 166                         retval = read_mmp_block(sb, &bh_check, mmp_block);
 167                         if (retval) {
 168                                 ext4_error(sb, "error reading MMP data: %d",
 169                                            retval);
 170
 171                                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 172                                 goto failed;
 173                         }
 174
 175                         mmp_check = (struct mmp_struct *)(bh_check->b_data);
 176                         if (mmp->mmp_seq != mmp_check->mmp_seq ||
 177                             memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
 178                                    sizeof(mmp->mmp_nodename))) {
 179                                 dump_mmp_msg(sb, mmp_check,
 180                                              "Error while updating MMP info. "
 181                                              "The filesystem seems to have been"
 182                                              " multiply mounted.");
 183                                 ext4_error(sb, "abort");
 184                                 goto failed;
 185                         }
 186                         put_bh(bh_check);
 187                 }
 188
 189                  /*
 190                  * Adjust the mmp_check_interval depending on how much time
 191                  * it took for the MMP block to be written.
 192                  */
 193                 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
 194                                              EXT4_MMP_MAX_CHECK_INTERVAL),
 195                                          EXT4_MMP_MIN_CHECK_INTERVAL);
 196                 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
 197         }
 198
 199         /*
 200          * Unmount seems to be clean.
 201          */
 202         mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
 203         mmp->mmp_time = cpu_to_le64(get_seconds());
 204
 205         retval = write_mmp_block(bh);
 206
 207 failed:
 208         kfree(data);
 209         brelse(bh);
 210         return retval;
 211 }
 212
 213 /*
 214  * Get a random new sequence number but make sure it is not greater than
 215  * EXT4_MMP_SEQ_MAX.
 216  */
 217 static unsigned int mmp_new_seq(void)
 218 {
 219         u32 new_seq;
 220
 221         do {
 222                 get_random_bytes(&new_seq, sizeof(u32));
 223         } while (new_seq > EXT4_MMP_SEQ_MAX);
 224
 225         return new_seq;
 226 }
 227
 228 /*
 229  * Protect the filesystem from being mounted more than once.
 230  */
 231 int ext4_multi_mount_protect(struct super_block *sb,
 232                                     ext4_fsblk_t mmp_block)
 233 {
 234         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 235         struct buffer_head *bh = NULL;
 236         struct mmp_struct *mmp = NULL;
 237         struct mmpd_data *mmpd_data;
 238         u32 seq;
 239         unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
 240         unsigned int wait_time = 0;
 241         int retval;
 242
 243         if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
 244             mmp_block >= ext4_blocks_count(es)) {
 245                 ext4_warning(sb, "Invalid MMP block in superblock");
 246                 goto failed;
 247         }
 248
 249         retval = read_mmp_block(sb, &bh, mmp_block);
 250         if (retval)
 251                 goto failed;
 252
 253         mmp = (struct mmp_struct *)(bh->b_data);
 254
 255         if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
 256                 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
 257
 258         /*
 259          * If check_interval in MMP block is larger, use that instead of
 260          * update_interval from the superblock.
 261          */
 262         if (mmp->mmp_check_interval > mmp_check_interval)
 263                 mmp_check_interval = mmp->mmp_check_interval;
 264
 265         seq = le32_to_cpu(mmp->mmp_seq);
 266         if (seq == EXT4_MMP_SEQ_CLEAN)
 267                 goto skip;
 268
 269         if (seq == EXT4_MMP_SEQ_FSCK) {
 270                 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
 271                 goto failed;
 272         }
 273
 274         wait_time = min(mmp_check_interval * 2 + 1,
 275                         mmp_check_interval + 60);
 276
 277         /* Print MMP interval if more than 20 secs. */
 278         if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
 279                 ext4_warning(sb, "MMP interval %u higher than expected, please"
 280                              " wait.\n", wait_time * 2);
 281
 282         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 283                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 284                 goto failed;
 285         }
 286
 287         retval = read_mmp_block(sb, &bh, mmp_block);
 288         if (retval)
 289                 goto failed;
 290         mmp = (struct mmp_struct *)(bh->b_data);
 291         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 292                 dump_mmp_msg(sb, mmp,
 293                              "Device is already active on another node.");
 294                 goto failed;
 295         }
 296
 297 skip:
 298         /*
 299          * write a new random sequence number.
 300          */
 301         seq = mmp_new_seq();
 302         mmp->mmp_seq = cpu_to_le32(seq);
 303
 304         retval = write_mmp_block(bh);
 305         if (retval)
 306                 goto failed;
 307
 308         /*
 309          * wait for MMP interval and check mmp_seq.
 310          */
 311         if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
 312                 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
 313                 goto failed;
 314         }
 315
 316         retval = read_mmp_block(sb, &bh, mmp_block);
 317         if (retval)
 318                 goto failed;
 319         mmp = (struct mmp_struct *)(bh->b_data);
 320         if (seq != le32_to_cpu(mmp->mmp_seq)) {
 321                 dump_mmp_msg(sb, mmp,
 322                              "Device is already active on another node.");
 323                 goto failed;
 324         }
 325
 326         mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
 327         if (!mmpd_data) {
 328                 ext4_warning(sb, "not enough memory for mmpd_data");
 329                 goto failed;
 330         }
 331         mmpd_data->sb = sb;
 332         mmpd_data->bh = bh;
 333
 334         /*
 335          * Start a kernel thread to update the MMP block periodically.
 336          */
 337         EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
 338                                              bdevname(bh->b_bdev,
 339                                                       mmp->mmp_bdevname));
 340         if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
 341                 EXT4_SB(sb)->s_mmp_tsk = NULL;
 342                 kfree(mmpd_data);
 343                 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
 344                              sb->s_id);
 345                 goto failed;
 346         }
 347
 348         return 0;
 349
 350 failed:
 351         brelse(bh);
 352         return 1;
 353 }
 354
 355