]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - fs/btrfs/super.c
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[mv-sheeva.git] / fs / btrfs / super.c
index 883c6fa1367eb866002d7f67d3ae2ce6497f4367..d39a9895d93288a6315e838cfe05dc1e1a65e7f4 100644 (file)
 
 static const struct super_operations btrfs_super_ops;
 
+static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
+                                     char nbuf[16])
+{
+       char *errstr = NULL;
+
+       switch (errno) {
+       case -EIO:
+               errstr = "IO failure";
+               break;
+       case -ENOMEM:
+               errstr = "Out of memory";
+               break;
+       case -EROFS:
+               errstr = "Readonly filesystem";
+               break;
+       default:
+               if (nbuf) {
+                       if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+                               errstr = nbuf;
+               }
+               break;
+       }
+
+       return errstr;
+}
+
+static void __save_error_info(struct btrfs_fs_info *fs_info)
+{
+       /*
+        * today we only save the error info into ram.  Long term we'll
+        * also send it down to the disk
+        */
+       fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
+}
+
+/* NOTE:
+ *     We move write_super stuff at umount in order to avoid deadlock
+ *     for umount hold all lock.
+ */
+static void save_error_info(struct btrfs_fs_info *fs_info)
+{
+       __save_error_info(fs_info);
+}
+
+/* btrfs handle error by forcing the filesystem readonly */
+static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
+{
+       struct super_block *sb = fs_info->sb;
+
+       if (sb->s_flags & MS_RDONLY)
+               return;
+
+       if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+               sb->s_flags |= MS_RDONLY;
+               printk(KERN_INFO "btrfs is forced readonly\n");
+       }
+}
+
+/*
+ * __btrfs_std_error decodes expected errors from the caller and
+ * invokes the approciate error response.
+ */
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+                    unsigned int line, int errno)
+{
+       struct super_block *sb = fs_info->sb;
+       char nbuf[16];
+       const char *errstr;
+
+       /*
+        * Special case: if the error is EROFS, and we're already
+        * under MS_RDONLY, then it is safe here.
+        */
+       if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+               return;
+
+       errstr = btrfs_decode_error(fs_info, errno, nbuf);
+       printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
+               sb->s_id, function, line, errstr);
+       save_error_info(fs_info);
+
+       btrfs_handle_error(fs_info);
+}
+
 static void btrfs_put_super(struct super_block *sb)
 {
        struct btrfs_root *root = btrfs_sb(sb);
@@ -69,9 +153,10 @@ enum {
        Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
        Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
-       Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-       Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
-       Opt_user_subvol_rm_allowed,
+       Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
+       Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+       Opt_enospc_debug, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -86,7 +171,9 @@ static match_table_t tokens = {
        {Opt_alloc_start, "alloc_start=%s"},
        {Opt_thread_pool, "thread_pool=%d"},
        {Opt_compress, "compress"},
+       {Opt_compress_type, "compress=%s"},
        {Opt_compress_force, "compress-force"},
+       {Opt_compress_force_type, "compress-force=%s"},
        {Opt_ssd, "ssd"},
        {Opt_ssd_spread, "ssd_spread"},
        {Opt_nossd, "nossd"},
@@ -98,6 +185,7 @@ static match_table_t tokens = {
        {Opt_space_cache, "space_cache"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+       {Opt_enospc_debug, "enospc_debug"},
        {Opt_err, NULL},
 };
 
@@ -112,6 +200,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        char *p, *num, *orig;
        int intarg;
        int ret = 0;
+       char *compress_type;
+       bool compress_force = false;
 
        if (!options)
                return 0;
@@ -154,14 +244,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        btrfs_set_opt(info->mount_opt, NODATACOW);
                        btrfs_set_opt(info->mount_opt, NODATASUM);
                        break;
-               case Opt_compress:
-                       printk(KERN_INFO "btrfs: use compression\n");
-                       btrfs_set_opt(info->mount_opt, COMPRESS);
-                       break;
                case Opt_compress_force:
-                       printk(KERN_INFO "btrfs: forcing compression\n");
-                       btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+               case Opt_compress_force_type:
+                       compress_force = true;
+               case Opt_compress:
+               case Opt_compress_type:
+                       if (token == Opt_compress ||
+                           token == Opt_compress_force ||
+                           strcmp(args[0].from, "zlib") == 0) {
+                               compress_type = "zlib";
+                               info->compress_type = BTRFS_COMPRESS_ZLIB;
+                       } else if (strcmp(args[0].from, "lzo") == 0) {
+                               compress_type = "lzo";
+                               info->compress_type = BTRFS_COMPRESS_LZO;
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
                        btrfs_set_opt(info->mount_opt, COMPRESS);
+                       if (compress_force) {
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+                               pr_info("btrfs: force %s compression\n",
+                                       compress_type);
+                       } else
+                               pr_info("btrfs: use %s compression\n",
+                                       compress_type);
                        break;
                case Opt_ssd:
                        printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -252,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_user_subvol_rm_allowed:
                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
                        break;
+               case Opt_enospc_debug:
+                       btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -277,7 +388,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                struct btrfs_fs_devices **fs_devices)
 {
        substring_t args[MAX_OPT_ARGS];
-       char *opts, *p;
+       char *opts, *orig, *p;
        int error = 0;
        int intarg;
 
@@ -291,6 +402,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
        opts = kstrdup(options, GFP_KERNEL);
        if (!opts)
                return -ENOMEM;
+       orig = opts;
 
        while ((p = strsep(&opts, ",")) != NULL) {
                int token;
@@ -326,7 +438,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
        }
 
  out_free_opts:
-       kfree(opts);
+       kfree(orig);
  out:
        /*
         * If no subvolume name is specified we use the default one.  Allocate
@@ -460,6 +572,7 @@ static int btrfs_fill_super(struct super_block *sb,
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_magic = BTRFS_SUPER_MAGIC;
        sb->s_op = &btrfs_super_ops;
+       sb->s_d_op = &btrfs_dentry_operations;
        sb->s_export_op = &btrfs_export_ops;
        sb->s_xattr = btrfs_xattr_handlers;
        sb->s_time_gran = 1;
@@ -516,6 +629,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        btrfs_wait_ordered_extents(root, 0, 0);
 
        trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
        ret = btrfs_commit_transaction(trans, root);
        return ret;
 }
@@ -654,6 +769,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                }
 
                btrfs_close_devices(fs_devices);
+               kfree(fs_info);
+               kfree(tree_root);
        } else {
                char b[BDEVNAME_SIZE];
 
@@ -752,6 +869,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
        return 0;
 }
 
+/*
+ * The helper to calc the free space on the devices that can be used to store
+ * file data.
+ */
+static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_device_info *devices_info;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_device *device;
+       u64 skip_space;
+       u64 type;
+       u64 avail_space;
+       u64 used_space;
+       u64 min_stripe_size;
+       int min_stripes = 1;
+       int i = 0, nr_devices;
+       int ret;
+
+       nr_devices = fs_info->fs_devices->rw_devices;
+       BUG_ON(!nr_devices);
+
+       devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+                              GFP_NOFS);
+       if (!devices_info)
+               return -ENOMEM;
+
+       /* calc min stripe number for data space alloction */
+       type = btrfs_get_alloc_profile(root, 1);
+       if (type & BTRFS_BLOCK_GROUP_RAID0)
+               min_stripes = 2;
+       else if (type & BTRFS_BLOCK_GROUP_RAID1)
+               min_stripes = 2;
+       else if (type & BTRFS_BLOCK_GROUP_RAID10)
+               min_stripes = 4;
+
+       if (type & BTRFS_BLOCK_GROUP_DUP)
+               min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+       else
+               min_stripe_size = BTRFS_STRIPE_LEN;
+
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+               if (!device->in_fs_metadata)
+                       continue;
+
+               avail_space = device->total_bytes - device->bytes_used;
+
+               /* align with stripe_len */
+               do_div(avail_space, BTRFS_STRIPE_LEN);
+               avail_space *= BTRFS_STRIPE_LEN;
+
+               /*
+                * In order to avoid overwritting the superblock on the drive,
+                * btrfs starts at an offset of at least 1MB when doing chunk
+                * allocation.
+                */
+               skip_space = 1024 * 1024;
+
+               /* user can set the offset in fs_info->alloc_start. */
+               if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
+                   device->total_bytes)
+                       skip_space = max(fs_info->alloc_start, skip_space);
+
+               /*
+                * btrfs can not use the free space in [0, skip_space - 1],
+                * we must subtract it from the total. In order to implement
+                * it, we account the used space in this range first.
+                */
+               ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
+                                                    &used_space);
+               if (ret) {
+                       kfree(devices_info);
+                       return ret;
+               }
+
+               /* calc the free space in [0, skip_space - 1] */
+               skip_space -= used_space;
+
+               /*
+                * we can use the free space in [0, skip_space - 1], subtract
+                * it from the total.
+                */
+               if (avail_space && avail_space >= skip_space)
+                       avail_space -= skip_space;
+               else
+                       avail_space = 0;
+
+               if (avail_space < min_stripe_size)
+                       continue;
+
+               devices_info[i].dev = device;
+               devices_info[i].max_avail = avail_space;
+
+               i++;
+       }
+
+       nr_devices = i;
+
+       btrfs_descending_sort_devices(devices_info, nr_devices);
+
+       i = nr_devices - 1;
+       avail_space = 0;
+       while (nr_devices >= min_stripes) {
+               if (devices_info[i].max_avail >= min_stripe_size) {
+                       int j;
+                       u64 alloc_size;
+
+                       avail_space += devices_info[i].max_avail * min_stripes;
+                       alloc_size = devices_info[i].max_avail;
+                       for (j = i + 1 - min_stripes; j <= i; j++)
+                               devices_info[j].max_avail -= alloc_size;
+               }
+               i--;
+               nr_devices--;
+       }
+
+       kfree(devices_info);
+       *free_bytes = avail_space;
+       return 0;
+}
+
 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -759,17 +997,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct list_head *head = &root->fs_info->space_info;
        struct btrfs_space_info *found;
        u64 total_used = 0;
-       u64 total_used_data = 0;
+       u64 total_free_data = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)root->fs_info->fsid;
+       int ret;
 
+       /* holding chunk_muext to avoid allocating new chunks */
+       mutex_lock(&root->fs_info->chunk_mutex);
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
-               if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
-                                   BTRFS_BLOCK_GROUP_SYSTEM))
-                       total_used_data += found->disk_total;
-               else
-                       total_used_data += found->disk_used;
+               if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+                       total_free_data += found->disk_total - found->disk_used;
+                       total_free_data -=
+                               btrfs_account_ro_block_groups_free_space(found);
+               }
+
                total_used += found->disk_used;
        }
        rcu_read_unlock();
@@ -777,9 +1019,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_namelen = BTRFS_NAME_LEN;
        buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
        buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_type = BTRFS_SUPER_MAGIC;
+       buf->f_bavail = total_free_data;
+       ret = btrfs_calc_avail_data_space(root, &total_free_data);
+       if (ret) {
+               mutex_unlock(&root->fs_info->chunk_mutex);
+               return ret;
+       }
+       buf->f_bavail += total_free_data;
+       buf->f_bavail = buf->f_bavail >> bits;
+       mutex_unlock(&root->fs_info->chunk_mutex);
 
        /* We treat it as constant endianness (it doesn't matter _which_)
           because we want the fsid to come out the same whether mounted
@@ -896,10 +1146,14 @@ static int __init init_btrfs_fs(void)
        if (err)
                return err;
 
-       err = btrfs_init_cachep();
+       err = btrfs_init_compress();
        if (err)
                goto free_sysfs;
 
+       err = btrfs_init_cachep();
+       if (err)
+               goto free_compress;
+
        err = extent_io_init();
        if (err)
                goto free_cachep;
@@ -927,6 +1181,8 @@ free_extent_io:
        extent_io_exit();
 free_cachep:
        btrfs_destroy_cachep();
+free_compress:
+       btrfs_exit_compress();
 free_sysfs:
        btrfs_exit_sysfs();
        return err;
@@ -941,7 +1197,7 @@ static void __exit exit_btrfs_fs(void)
        unregister_filesystem(&btrfs_fs_type);
        btrfs_exit_sysfs();
        btrfs_cleanup_fs_uuids();
-       btrfs_zlib_exit();
+       btrfs_exit_compress();
 }
 
 module_init(init_btrfs_fs)