return NULL;
}
+/* return the offset of the super block in 512byte sectors */
static inline sector_t calc_dev_sboffset(struct block_device *bdev)
{
- sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
- return MD_NEW_SIZE_BLOCKS(size);
+ sector_t num_sectors = bdev->bd_inode->i_size / 512;
+ return MD_NEW_SIZE_SECTORS(num_sectors);
}
-static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size)
+static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size)
{
- sector_t size;
-
- size = rdev->sb_offset;
+ sector_t num_sectors = rdev->sb_offset * 2;
if (chunk_size)
- size &= ~((sector_t)chunk_size/1024 - 1);
- return size;
+ num_sectors &= ~((sector_t)chunk_size/512 - 1);
+ return num_sectors;
}
static int alloc_disk_sb(mdk_rdev_t * rdev)
rdev->sb_page = alloc_page(GFP_KERNEL);
if (!rdev->sb_page) {
printk(KERN_ALERT "md: out of memory.\n");
- return -EINVAL;
+ return -ENOMEM;
}
return 0;
static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
- if ( (sb1->set_uuid0 == sb2->set_uuid0) &&
- (sb1->set_uuid1 == sb2->set_uuid1) &&
- (sb1->set_uuid2 == sb2->set_uuid2) &&
- (sb1->set_uuid3 == sb2->set_uuid3))
-
- return 1;
-
- return 0;
+ return sb1->set_uuid0 == sb2->set_uuid0 &&
+ sb1->set_uuid1 == sb2->set_uuid1 &&
+ sb1->set_uuid2 == sb2->set_uuid2 &&
+ sb1->set_uuid3 == sb2->set_uuid3;
}
-
static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
{
int ret;
if (!tmp1 || !tmp2) {
ret = 0;
- printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
+ printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
goto abort;
}
tmp1->nr_disks = 0;
tmp2->nr_disks = 0;
- if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
- ret = 0;
- else
- ret = 1;
-
+ ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
abort:
kfree(tmp1);
kfree(tmp2);
*/
struct super_type {
- char *name;
- struct module *owner;
- int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version);
- int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
- void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+ char *name;
+ struct module *owner;
+ int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
+ int minor_version);
+ int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+ void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+ unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
+ unsigned long long size);
};
/*
*
* It also happens to be a multiple of 4Kb.
*/
- sb_offset = calc_dev_sboffset(rdev->bdev);
+ sb_offset = calc_dev_sboffset(rdev->bdev) / 2;
rdev->sb_offset = sb_offset;
ret = read_disk_sb(rdev, MD_SB_BYTES);
else
ret = 0;
}
- rdev->size = calc_dev_size(rdev, sb->chunk_size);
+ rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2;
if (rdev->size < sb->size && sb->level > 1)
/* "this cannot possibly happen" ... */
sb->sb_csum = calc_sb_csum(sb);
}
+/*
+ * rdev_size_change for 0.90.0
+ */
+static unsigned long long
+super_90_rdev_size_change(mdk_rdev_t *rdev, unsigned long long size)
+{
+ if (size && size < rdev->mddev->size)
+ return 0; /* component must fit device */
+ size *= 2; /* convert to sectors */
+ if (rdev->mddev->bitmap_offset)
+ return 0; /* can't move bitmap */
+ rdev->sb_offset = calc_dev_sboffset(rdev->bdev) / 2;
+ if (!size || size > rdev->sb_offset*2)
+ size = rdev->sb_offset*2;
+ md_super_write(rdev->mddev, rdev, rdev->sb_offset << 1, rdev->sb_size,
+ rdev->sb_page);
+ md_super_wait(rdev->mddev);
+ return size/2; /* kB for sysfs */
+}
+
+
/*
* version 1 superblock
*/
sb->sb_csum = calc_sb_1_csum(sb);
}
+static unsigned long long
+super_1_rdev_size_change(mdk_rdev_t *rdev, unsigned long long size)
+{
+ struct mdp_superblock_1 *sb;
+ unsigned long long max_size;
+ if (size && size < rdev->mddev->size)
+ return 0; /* component must fit device */
+ size *= 2; /* convert to sectors */
+ if (rdev->sb_offset < rdev->data_offset/2) {
+ /* minor versions 1 and 2; superblock before data */
+ max_size = (rdev->bdev->bd_inode->i_size >> 9);
+ max_size -= rdev->data_offset;
+ if (!size || size > max_size)
+ size = max_size;
+ } else if (rdev->mddev->bitmap_offset) {
+ /* minor version 0 with bitmap we can't move */
+ return 0;
+ } else {
+ /* minor version 0; superblock after data */
+ sector_t sb_offset;
+ sb_offset = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
+ sb_offset &= ~(sector_t)(4*2 - 1);
+ max_size = rdev->size*2 + sb_offset - rdev->sb_offset*2;
+ if (!size || size > max_size)
+ size = max_size;
+ rdev->sb_offset = sb_offset/2;
+ }
+ sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
+ sb->data_size = cpu_to_le64(size);
+ sb->super_offset = rdev->sb_offset*2;
+ sb->sb_csum = calc_sb_1_csum(sb);
+ md_super_write(rdev->mddev, rdev, rdev->sb_offset << 1, rdev->sb_size,
+ rdev->sb_page);
+ md_super_wait(rdev->mddev);
+ return size/2; /* kB for sysfs */
+}
static struct super_type super_types[] = {
[0] = {
.name = "0.90.0",
.owner = THIS_MODULE,
- .load_super = super_90_load,
- .validate_super = super_90_validate,
- .sync_super = super_90_sync,
+ .load_super = super_90_load,
+ .validate_super = super_90_validate,
+ .sync_super = super_90_sync,
+ .rdev_size_change = super_90_rdev_size_change,
},
[1] = {
.name = "md-1",
.owner = THIS_MODULE,
- .load_super = super_1_load,
- .validate_super = super_1_validate,
- .sync_super = super_1_sync,
+ .load_super = super_1_load,
+ .validate_super = super_1_validate,
+ .sync_super = super_1_sync,
+ .rdev_size_change = super_1_rdev_size_change,
},
};
}
-/* words written to sysfs files may, or my not, be \n terminated.
+/* words written to sysfs files may, or may not, be \n terminated.
* We want to accept with case. For this we use cmd_match.
*/
static int cmd_match(const char *cmd, const char *str)
err = 0;
}
+ if (!err)
+ sysfs_notify(&rdev->kobj, NULL, "state");
return err ? err : len;
}
static struct rdev_sysfs_entry rdev_state =
if (err) {
rdev->raid_disk = -1;
return err;
- }
+ } else
+ sysfs_notify(&rdev->kobj, NULL, "state");
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
printk(KERN_WARNING
clear_bit(Faulty, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
+ sysfs_notify(&rdev->kobj, NULL, "state");
}
return len;
}
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
- if (my_mddev->pers && rdev->raid_disk >= 0)
- return -EBUSY;
+ if (my_mddev->pers && rdev->raid_disk >= 0) {
+ if (rdev->mddev->persistent) {
+ size = super_types[rdev->mddev->major_version].
+ rdev_size_change(rdev, size);
+ if (!size)
+ return -EBUSY;
+ } else if (!size) {
+ size = (rdev->bdev->bd_inode->i_size >> 10);
+ size -= rdev->data_offset/2;
+ }
+ if (size < rdev->mddev->size)
+ return -EINVAL; /* component must fit device */
+ }
+
rdev->size = size;
if (size > oldsize && rdev->mddev->external) {
/* need to check that all other rdevs with the same ->bdev
* When written, doesn't tear down array, but just stops it
* suspended (not supported yet)
* All IO requests will block. The array can be reconfigured.
- * Writing this, if accepted, will block until array is quiessent
+ * Writing this, if accepted, will block until array is quiescent
* readonly
* no resync can happen. no superblocks get written.
* write requests fail
return sprintf(page, "%s\n", array_states[st]);
}
-static int do_md_stop(mddev_t * mddev, int ro);
+static int do_md_stop(mddev_t * mddev, int ro, int is_open);
static int do_md_run(mddev_t * mddev);
static int restart_array(mddev_t *mddev);
/* stopping an active array */
if (atomic_read(&mddev->active) > 1)
return -EBUSY;
- err = do_md_stop(mddev, 0);
+ err = do_md_stop(mddev, 0, 0);
break;
case inactive:
/* stopping an active array */
if (mddev->pers) {
if (atomic_read(&mddev->active) > 1)
return -EBUSY;
- err = do_md_stop(mddev, 2);
+ err = do_md_stop(mddev, 2, 0);
} else
err = 0; /* already inactive */
break;
break; /* not supported yet */
case readonly:
if (mddev->pers)
- err = do_md_stop(mddev, 1);
+ err = do_md_stop(mddev, 1, 0);
else {
mddev->ro = 1;
set_disk_ro(mddev->gendisk, 1);
case read_auto:
if (mddev->pers) {
if (mddev->ro != 1)
- err = do_md_stop(mddev, 1);
+ err = do_md_stop(mddev, 1, 0);
else
err = restart_array(mddev);
if (err == 0) {
return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
}
-static int update_size(mddev_t *mddev, unsigned long size);
+static int update_size(mddev_t *mddev, sector_t num_sectors);
static ssize_t
size_store(mddev_t *mddev, const char *buf, size_t len)
return -EINVAL;
if (mddev->pers) {
- err = update_size(mddev, size);
+ err = update_size(mddev, size * 2);
md_update_sb(mddev, 1);
} else {
if (mddev->size == 0 ||
err = mddev->pers->start_reshape(mddev);
if (err)
return err;
+ sysfs_notify(&mddev->kobj, NULL, "degraded");
} else {
if (cmd_match(page, "check"))
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
- resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active));
- dt = ((jiffies - mddev->resync_mark) / HZ);
+ resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
+ dt = (jiffies - mddev->resync_mark) / HZ;
if (!dt) dt++;
- db = resync - (mddev->resync_mark_cnt);
- return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
+ db = resync - mddev->resync_mark_cnt;
+ return sprintf(page, "%lu\n", db/dt/2); /* K/sec */
}
static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
return -EINVAL;
}
}
+ sysfs_notify(&rdev->kobj, NULL, "state");
}
md_probe(mddev->unit, NULL, NULL);
mddev->ro = 2; /* read-only, but switch on first write */
err = mddev->pers->run(mddev);
- if (!err && mddev->pers->sync_request) {
+ if (err)
+ printk(KERN_ERR "md: pers->run() failed ...\n");
+ else if (mddev->pers->sync_request) {
err = bitmap_create(mddev);
if (err) {
printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
}
}
if (err) {
- printk(KERN_ERR "md: pers->run() failed ...\n");
module_put(mddev->pers->owner);
mddev->pers = NULL;
bitmap_destroy(mddev);
md_new_event(mddev);
sysfs_notify(&mddev->kobj, NULL, "array_state");
sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ sysfs_notify(&mddev->kobj, NULL, "degraded");
kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
return 0;
}
static int restart_array(mddev_t *mddev)
{
struct gendisk *disk = mddev->gendisk;
- int err;
- /*
- * Complain if it has no devices
- */
- err = -ENXIO;
+ /* Complain if it has no devices */
if (list_empty(&mddev->disks))
- goto out;
-
- if (mddev->pers) {
- err = -EBUSY;
- if (!mddev->ro)
- goto out;
-
- mddev->safemode = 0;
- mddev->ro = 0;
- set_disk_ro(disk, 0);
-
- printk(KERN_INFO "md: %s switched to read-write mode.\n",
- mdname(mddev));
- /*
- * Kick recovery or resync if necessary
- */
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
- md_wakeup_thread(mddev->sync_thread);
- err = 0;
- sysfs_notify(&mddev->kobj, NULL, "array_state");
-
- } else
- err = -EINVAL;
-
-out:
- return err;
+ return -ENXIO;
+ if (!mddev->pers)
+ return -EINVAL;
+ if (!mddev->ro)
+ return -EBUSY;
+ mddev->safemode = 0;
+ mddev->ro = 0;
+ set_disk_ro(disk, 0);
+ printk(KERN_INFO "md: %s switched to read-write mode.\n",
+ mdname(mddev));
+ /* Kick recovery or resync if necessary */
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(mddev->sync_thread);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ return 0;
}
/* similar to deny_write_access, but accounts for our holding a reference
* 1 - switch to readonly
* 2 - stop but do not disassemble array
*/
-static int do_md_stop(mddev_t * mddev, int mode)
+static int do_md_stop(mddev_t * mddev, int mode, int is_open)
{
int err = 0;
struct gendisk *disk = mddev->gendisk;
+ if (atomic_read(&mddev->active) > 1 + is_open) {
+ printk("md: %s still in use.\n",mdname(mddev));
+ return -EBUSY;
+ }
+
if (mddev->pers) {
- if (atomic_read(&mddev->active)>2) {
- printk("md: %s still in use.\n",mdname(mddev));
- return -EBUSY;
- }
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
err = do_md_run (mddev);
if (err) {
printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
- do_md_stop (mddev, 0);
+ do_md_stop (mddev, 0, 0);
}
}
char *ptr, *buf = NULL;
int err = -ENOMEM;
- md_allow_write(mddev);
+ if (md_allow_write(mddev))
+ file = kmalloc(sizeof(*file), GFP_NOIO);
+ else
+ file = kmalloc(sizeof(*file), GFP_KERNEL);
- file = kmalloc(sizeof(*file), GFP_KERNEL);
if (!file)
goto out;
static int get_disk_info(mddev_t * mddev, void __user * arg)
{
mdu_disk_info_t info;
- unsigned int nr;
mdk_rdev_t *rdev;
if (copy_from_user(&info, arg, sizeof(info)))
return -EFAULT;
- nr = info.number;
-
- rdev = find_rdev_nr(mddev, nr);
+ rdev = find_rdev_nr(mddev, info.number);
if (rdev) {
info.major = MAJOR(rdev->bdev->bd_dev);
info.minor = MINOR(rdev->bdev->bd_dev);
}
if (err)
export_rdev(rdev);
+ else
+ sysfs_notify(&rdev->kobj, NULL, "state");
md_update_sb(mddev, 1);
if (mddev->degraded)
printk(KERN_INFO "md: nonpersistent superblock ...\n");
rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
} else
- rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
- rdev->size = calc_dev_size(rdev, mddev->chunk_size);
+ rdev->sb_offset = calc_dev_sboffset(rdev->bdev) / 2;
+ rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
err = bind_rdev_to_array(rdev, mddev);
if (err) {
{
char b[BDEVNAME_SIZE];
int err;
- unsigned int size;
mdk_rdev_t *rdev;
if (!mddev->pers)
}
if (mddev->persistent)
- rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
+ rdev->sb_offset = calc_dev_sboffset(rdev->bdev) / 2;
else
rdev->sb_offset =
rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
- size = calc_dev_size(rdev, mddev->chunk_size);
- rdev->size = size;
+ rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
if (test_bit(Faulty, &rdev->flags)) {
printk(KERN_WARNING
return 0;
}
-static int update_size(mddev_t *mddev, unsigned long size)
+static int update_size(mddev_t *mddev, sector_t num_sectors)
{
mdk_rdev_t * rdev;
int rv;
struct list_head *tmp;
- int fit = (size == 0);
+ int fit = (num_sectors == 0);
if (mddev->pers->resize == NULL)
return -EINVAL;
- /* The "size" is the amount of each device that is used.
- * This can only make sense for arrays with redundancy.
- * linear and raid0 always use whatever space is available
- * We can only consider changing the size if no resync
- * or reconstruction is happening, and if the new size
- * is acceptable. It must fit before the sb_offset or,
- * if that is <data_offset, it must fit before the
- * size of each device.
- * If size is zero, we find the largest size that fits.
+ /* The "num_sectors" is the number of sectors of each device that
+ * is used. This can only make sense for arrays with redundancy.
+ * linear and raid0 always use whatever space is available. We can only
+ * consider changing this number if no resync or reconstruction is
+ * happening, and if the new size is acceptable. It must fit before the
+ * sb_offset or, if that is <data_offset, it must fit before the size
+ * of each device. If num_sectors is zero, we find the largest size
+ * that fits.
+
*/
if (mddev->sync_thread)
return -EBUSY;
sector_t avail;
avail = rdev->size * 2;
- if (fit && (size == 0 || size > avail/2))
- size = avail/2;
- if (avail < ((sector_t)size << 1))
+ if (fit && (num_sectors == 0 || num_sectors > avail))
+ num_sectors = avail;
+ if (avail < num_sectors)
return -ENOSPC;
}
- rv = mddev->pers->resize(mddev, (sector_t)size *2);
+ rv = mddev->pers->resize(mddev, num_sectors);
if (!rv) {
struct block_device *bdev;
return mddev->pers->reconfig(mddev, info->layout, -1);
}
if (info->size >= 0 && mddev->size != info->size)
- rv = update_size(mddev, info->size);
+ rv = update_size(mddev, (sector_t)info->size * 2);
if (mddev->raid_disks != info->raid_disks)
rv = update_raid_disks(mddev, info->raid_disks);
return 0;
}
+/*
+ * We have a problem here : there is no easy way to give a CHS
+ * virtual geometry. We currently pretend that we have a 2 heads
+ * 4 sectors (with a BIG number of cylinders...). This drives
+ * dosfs just mad... ;-)
+ */
static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
mddev_t *mddev = bdev->bd_disk->private_data;
goto done_unlock;
case STOP_ARRAY:
- err = do_md_stop (mddev, 0);
+ err = do_md_stop (mddev, 0, 1);
goto done_unlock;
case STOP_ARRAY_RO:
- err = do_md_stop (mddev, 1);
+ err = do_md_stop (mddev, 1, 1);
goto done_unlock;
- /*
- * We have a problem here : there is no easy way to give a CHS
- * virtual geometry. We currently pretend that we have a 2 heads
- * 4 sectors (with a BIG number of cylinders...). This drives
- * dosfs just mad... ;-)
- */
}
/*
* here and hit the 'default' below, so only disallow
* 'md' ioctls, and switch to rw mode if started auto-readonly.
*/
- if (_IOC_TYPE(cmd) == MD_MAJOR &&
- mddev->ro && mddev->pers) {
+ if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
if (mddev->ro == 2) {
mddev->ro = 0;
sysfs_notify(&mddev->kobj, NULL, "array_state");
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
-
} else {
err = -EROFS;
goto abort_unlock;
mddev->pers->error_handler(mddev,rdev);
if (mddev->degraded)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ set_bit(StateChanged, &rdev->flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
* may proceed without blocking. It is important to call this before
* attempting a GFP_KERNEL allocation while holding the mddev lock.
* Must be called with mddev_lock held.
+ *
+ * In the ->external case MD_CHANGE_CLEAN can not be cleared until mddev->lock
+ * is dropped, so return -EAGAIN after notifying userspace.
*/
-void md_allow_write(mddev_t *mddev)
+int md_allow_write(mddev_t *mddev)
{
if (!mddev->pers)
- return;
+ return 0;
if (mddev->ro)
- return;
+ return 0;
if (!mddev->pers->sync_request)
- return;
+ return 0;
spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) {
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
-
sysfs_notify(&mddev->kobj, NULL, "array_state");
- /* wait for the dirty state to be recorded in the metadata */
- wait_event(mddev->sb_wait,
- !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
- !test_bit(MD_CHANGE_PENDING, &mddev->flags));
} else
spin_unlock_irq(&mddev->write_lock);
+
+ if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ return -EAGAIN;
+ else
+ return 0;
}
EXPORT_SYMBOL_GPL(md_allow_write);
if (mddev->flags)
md_update_sb(mddev, 0);
+ rdev_for_each(rdev, rtmp, mddev)
+ if (test_and_clear_bit(StateChanged, &rdev->flags))
+ sysfs_notify(&rdev->kobj, NULL, "state");
+
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* success...*/
/* activate any spares */
- mddev->pers->spare_active(mddev);
+ if (mddev->pers->spare_active(mddev))
+ sysfs_notify(&mddev->kobj, NULL,
+ "degraded");
}
md_update_sb(mddev, 1);
for_each_mddev(mddev, tmp)
if (mddev_trylock(mddev)) {
- do_md_stop (mddev, 1);
+ do_md_stop (mddev, 1, 0);
mddev_unlock(mddev);
}
/*