return mddev;
}
+static void mddev_delayed_delete(struct work_struct *ws)
+{
+ mddev_t *mddev = container_of(ws, mddev_t, del_work);
+ kobject_del(&mddev->kobj);
+ kobject_put(&mddev->kobj);
+}
+
static void mddev_put(mddev_t *mddev)
{
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
- if (!mddev->raid_disks && list_empty(&mddev->disks)) {
+ if (!mddev->raid_disks && list_empty(&mddev->disks) &&
+ !mddev->hold_active) {
list_del(&mddev->all_mddevs);
- spin_unlock(&all_mddevs_lock);
- if (mddev->queue)
- blk_cleanup_queue(mddev->queue);
- mddev->queue = NULL;
- if (mddev->sysfs_state)
- sysfs_put(mddev->sysfs_state);
- mddev->sysfs_state = NULL;
- kobject_put(&mddev->kobj);
- } else
- spin_unlock(&all_mddevs_lock);
+ if (mddev->gendisk) {
+ /* we did a probe so need to clean up.
+ * Call schedule_work inside the spinlock
+ * so that flush_scheduled_work() after
+ * mddev_find will succeed in waiting for the
+ * work to be done.
+ */
+ INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+ schedule_work(&mddev->del_work);
+ } else
+ kfree(mddev);
+ }
+ spin_unlock(&all_mddevs_lock);
}
static mddev_t * mddev_find(dev_t unit)
retry:
spin_lock(&all_mddevs_lock);
- list_for_each_entry(mddev, &all_mddevs, all_mddevs)
- if (mddev->unit == unit) {
- mddev_get(mddev);
+
+ if (unit) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+ if (mddev->unit == unit) {
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
+ kfree(new);
+ return mddev;
+ }
+
+ if (new) {
+ list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
- kfree(new);
- return mddev;
+ new->hold_active = UNTIL_IOCTL;
+ return new;
}
-
- if (new) {
+ } else if (new) {
+ /* find an unused unit number */
+ static int next_minor = 512;
+ int start = next_minor;
+ int is_free = 0;
+ int dev = 0;
+ while (!is_free) {
+ dev = MKDEV(MD_MAJOR, next_minor);
+ next_minor++;
+ if (next_minor > MINORMASK)
+ next_minor = 0;
+ if (next_minor == start) {
+ /* Oh dear, all in use. */
+ spin_unlock(&all_mddevs_lock);
+ kfree(new);
+ return NULL;
+ }
+
+ is_free = 1;
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+ if (mddev->unit == dev) {
+ is_free = 0;
+ break;
+ }
+ }
+ new->unit = dev;
+ new->md_minor = MINOR(dev);
+ new->hold_active = UNTIL_STOP;
list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
return new;
if (find_rdev_nr(mddev, rdev->desc_nr))
return -EBUSY;
}
+ if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
+ printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
+ mdname(mddev), mddev->max_disks);
+ return -EBUSY;
+ }
bdevname(rdev->bdev,b);
while ( (s=strchr(b, '/')) != NULL)
*s = '!';
list_add_rcu(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+
+ /* May as well allow recovery to be retried once */
+ mddev->recovery_disabled = 0;
return 0;
fail:
i = 0;
rdev_for_each(rdev, tmp, mddev) {
+ if (rdev->desc_nr >= mddev->max_disks ||
+ i > mddev->max_disks) {
+ printk(KERN_WARNING
+ "md: %s: %s: only %d devices permitted\n",
+ mdname(mddev), bdevname(rdev->bdev, b),
+ mddev->max_disks);
+ kick_rdev_from_array(rdev);
+ continue;
+ }
if (rdev != freshest)
if (super_types[mddev->major_version].
validate_super(mddev, rdev)) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
rv = mddev_lock(mddev);
+ if (mddev->hold_active == UNTIL_IOCTL)
+ mddev->hold_active = 0;
if (!rv) {
rv = entry->store(mddev, page, length);
mddev_unlock(mddev);
static void md_free(struct kobject *ko)
{
mddev_t *mddev = container_of(ko, mddev_t, kobj);
+
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+
+ if (mddev->gendisk) {
+ del_gendisk(mddev->gendisk);
+ put_disk(mddev->gendisk);
+ }
+ if (mddev->queue)
+ blk_cleanup_queue(mddev->queue);
+
kfree(mddev);
}
int mdp_major = 0;
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static int md_alloc(dev_t dev, char *name)
{
static DEFINE_MUTEX(disks_mutex);
mddev_t *mddev = mddev_find(dev);
struct gendisk *disk;
- int partitioned = (MAJOR(dev) != MD_MAJOR);
- int shift = partitioned ? MdpMinorShift : 0;
- int unit = MINOR(dev) >> shift;
+ int partitioned;
+ int shift;
+ int unit;
int error;
if (!mddev)
- return NULL;
+ return -ENODEV;
+
+ partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
+ shift = partitioned ? MdpMinorShift : 0;
+ unit = MINOR(mddev->unit) >> shift;
+
+ /* wait for any previous instance if this device
+ * to be completed removed (mddev_delayed_delete).
+ */
+ flush_scheduled_work();
mutex_lock(&disks_mutex);
if (mddev->gendisk) {
mutex_unlock(&disks_mutex);
mddev_put(mddev);
- return NULL;
+ return -EEXIST;
+ }
+
+ if (name) {
+ /* Need to ensure that 'name' is not a duplicate.
+ */
+ mddev_t *mddev2;
+ spin_lock(&all_mddevs_lock);
+
+ list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
+ if (mddev2->gendisk &&
+ strcmp(mddev2->gendisk->disk_name, name) == 0) {
+ spin_unlock(&all_mddevs_lock);
+ return -EEXIST;
+ }
+ spin_unlock(&all_mddevs_lock);
}
mddev->queue = blk_alloc_queue(GFP_KERNEL);
if (!mddev->queue) {
mutex_unlock(&disks_mutex);
mddev_put(mddev);
- return NULL;
+ return -ENOMEM;
}
/* Can be unlocked because the queue is new: no concurrency */
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
blk_cleanup_queue(mddev->queue);
mddev->queue = NULL;
mddev_put(mddev);
- return NULL;
+ return -ENOMEM;
}
- disk->major = MAJOR(dev);
+ disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
- if (partitioned)
+ if (name)
+ strcpy(disk->disk_name, name);
+ else if (partitioned)
sprintf(disk->disk_name, "md_d%d", unit);
else
sprintf(disk->disk_name, "md%d", unit);
disk->private_data = mddev;
disk->queue = mddev->queue;
/* Allow extended partitions. This makes the
- * 'mdp' device redundant, but we can really
+ * 'mdp' device redundant, but we can't really
* remove it now.
*/
disk->flags |= GENHD_FL_EXT_DEVT;
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
}
+ mddev_put(mddev);
+ return 0;
+}
+
+static struct kobject *md_probe(dev_t dev, int *part, void *data)
+{
+ md_alloc(dev, NULL);
return NULL;
}
+static int add_named_array(const char *val, struct kernel_param *kp)
+{
+ /* val must be "md_*" where * is not all digits.
+ * We allocate an array with a large free minor number, and
+ * set the name to val. val must not already be an active name.
+ */
+ int len = strlen(val);
+ char buf[DISK_NAME_LEN];
+
+ while (len && val[len-1] == '\n')
+ len--;
+ if (len >= DISK_NAME_LEN)
+ return -E2BIG;
+ strlcpy(buf, val, len+1);
+ if (strncmp(buf, "md_", 3) != 0)
+ return -EINVAL;
+ return md_alloc(0, buf);
+}
+
static void md_safemode_timeout(unsigned long data)
{
mddev_t *mddev = (mddev_t *) data;
mddev->barriers_work = 0;
mddev->safemode = 0;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
+ if (mddev->hold_active == UNTIL_STOP)
+ mddev->hold_active = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
* noticed in interrupt contexts ...
*/
- if (rdev->desc_nr == mddev->max_disks) {
- printk(KERN_WARNING "%s: can not hot-add to full array!\n",
- mdname(mddev));
- err = -EBUSY;
- goto abort_unbind_export;
- }
-
rdev->raid_disk = -1;
md_update_sb(mddev, 1);
md_new_event(mddev);
return 0;
-abort_unbind_export:
- unbind_rdev_from_array(rdev);
-
abort_export:
export_rdev(rdev);
return err;
done_unlock:
abort_unlock:
+ if (mddev->hold_active == UNTIL_IOCTL &&
+ err != -EINVAL)
+ mddev->hold_active = 0;
mddev_unlock(mddev);
return err;
* Succeed if we can lock the mddev, which confirms that
* it isn't being stopped right now.
*/
- mddev_t *mddev = bdev->bd_disk->private_data;
+ mddev_t *mddev = mddev_find(bdev->bd_dev);
int err;
+ if (mddev->gendisk != bdev->bd_disk) {
+ /* we are racing with mddev_put which is discarding this
+ * bd_disk.
+ */
+ mddev_put(mddev);
+ /* Wait until bdev->bd_disk is definitely gone */
+ flush_scheduled_work();
+ /* Then retry the open from the top */
+ return -ERESTARTSYS;
+ }
+ BUG_ON(mddev != bdev->bd_disk->private_data);
+
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
goto out;
err = 0;
- mddev_get(mddev);
atomic_inc(&mddev->openers);
mddev_unlock(mddev);
}
}
- if (mddev->degraded && ! mddev->ro) {
+ if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL);
for_each_mddev(mddev, tmp) {
- struct gendisk *disk = mddev->gendisk;
- if (!disk)
- continue;
export_array(mddev);
- del_gendisk(disk);
- put_disk(disk);
- mddev->gendisk = NULL;
- mddev_put(mddev);
+ mddev->hold_active = 0;
}
}
module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
+module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality);