]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - drivers/block/drbd/drbd_nl.c
drbd: Fixed processing of disk-barrier, disk-flushes and disk-drain
[karo-tx-linux.git] / drivers / block / drbd / drbd_nl.c
index e7933e04e7b8e1a7e2be20b9452a369207fb960e..ce9f4ca55ce20f391951022d9402c3efa1ff60f6 100644 (file)
@@ -273,9 +273,6 @@ fail:
 
 static int drbd_adm_finish(struct genl_info *info, int retcode)
 {
-       struct nlattr *nla;
-       const char *resource_name = NULL;
-
        if (adm_ctx.tconn) {
                kref_put(&adm_ctx.tconn->kref, &conn_destroy);
                adm_ctx.tconn = NULL;
@@ -285,15 +282,6 @@ static int drbd_adm_finish(struct genl_info *info, int retcode)
                return -ENOMEM;
 
        adm_ctx.reply_dh->ret_code = retcode;
-
-       nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
-       if (nla) {
-               int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
-               nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
-               if (nla && !IS_ERR(nla))
-                       resource_name = nla_data(nla);
-       }
-
        drbd_adm_send_reply(adm_ctx.reply_skb, info);
        return 0;
 }
@@ -666,7 +654,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
                /* if this was forced, we should consider sync */
                if (forced)
                        drbd_send_uuids(mdev);
-               drbd_send_state(mdev);
+               drbd_send_current_state(mdev);
        }
 
        drbd_md_sync(mdev);
@@ -1074,10 +1062,13 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
           BIOs for a single peer_request */
        if (mdev->state.conn >= C_CONNECTED) {
                if (mdev->tconn->agreed_pro_version < 94)
-                       peer = mdev->peer_max_bio_size;
+                       peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+                       /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
                else if (mdev->tconn->agreed_pro_version == 94)
                        peer = DRBD_MAX_SIZE_H80_PACKET;
-               else /* drbd 8.3.8 onwards */
+               else if (mdev->tconn->agreed_pro_version < 100)
+                       peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
+               else
                        peer = DRBD_MAX_BIO_SIZE;
        }
 
@@ -1104,7 +1095,8 @@ static void conn_reconfig_done(struct drbd_tconn *tconn)
 {
        bool stop_threads;
        spin_lock_irq(&tconn->req_lock);
-       stop_threads = conn_all_vols_unconf(tconn);
+       stop_threads = conn_all_vols_unconf(tconn) &&
+               tconn->cstate == C_STANDALONE;
        spin_unlock_irq(&tconn->req_lock);
        if (stop_threads) {
                /* asender is implicitly stopped by receiver
@@ -1237,6 +1229,9 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
        }
 
        mutex_unlock(&mdev->tconn->conf_update);
+
+       drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
+
        drbd_md_sync(mdev);
 
        if (mdev->state.conn >= C_CONNECTED)
@@ -1245,6 +1240,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
        synchronize_rcu();
        kfree(old_disk_conf);
        kfree(old_plan);
+       mod_timer(&mdev->request_timer, jiffies + HZ);
        goto success;
 
 fail_unlock:
@@ -1275,7 +1271,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        union drbd_state ns, os;
        enum drbd_state_rv rv;
        struct net_conf *nc;
-       int cp_discovered = 0;
 
        retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
        if (!adm_ctx.reply_skb)
@@ -1485,11 +1480,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                goto force_diskless_dec;
        }
 
-       if (!drbd_al_read_log(mdev, nbc)) {
-               retcode = ERR_IO_MD_DISK;
-               goto force_diskless_dec;
-       }
-
        /* Reset the "barriers don't work" bits here, then force meta data to
         * be written, to ensure we determine if barriers are supported. */
        if (new_disk_conf->md_flushes)
@@ -1510,8 +1500,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        new_disk_conf = NULL;
        new_plan = NULL;
 
-       mdev->write_ordering = WO_bdev_flush;
-       drbd_bump_write_ordering(mdev, WO_bdev_flush);
+       drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
 
        if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
                set_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1519,10 +1508,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
        if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-           !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) {
+           !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
                set_bit(CRASHED_PRIMARY, &mdev->flags);
-               cp_discovered = 1;
-       }
 
        mdev->send_cnt = 0;
        mdev->recv_cnt = 0;
@@ -1574,15 +1561,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                }
        }
 
-       if (cp_discovered) {
-               drbd_al_apply_to_bm(mdev);
-               if (drbd_bitmap_io(mdev, &drbd_bm_write,
-                       "crashed primary apply AL", BM_LOCKED_MASK)) {
-                       retcode = ERR_IO_MD_DISK;
-                       goto force_diskless_dec;
-               }
-       }
-
        if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
                drbd_suspend_al(mdev); /* IO is still suspended here... */
 
@@ -1637,6 +1615,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        if (rv < SS_SUCCESS)
                goto force_diskless_dec;
 
+       mod_timer(&mdev->request_timer, jiffies + HZ);
+
        if (mdev->state.role == R_PRIMARY)
                mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
        else
@@ -1654,7 +1634,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  force_diskless_dec:
        put_ldev(mdev);
  force_diskless:
-       drbd_force_state(mdev, NS(disk, D_FAILED));
+       drbd_force_state(mdev, NS(disk, D_DISKLESS));
        drbd_md_sync(mdev);
  fail:
        conn_reconfig_done(mdev->tconn);
@@ -1676,12 +1656,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
-static int adm_detach(struct drbd_conf *mdev)
+static int adm_detach(struct drbd_conf *mdev, int force)
 {
        enum drbd_state_rv retcode;
        int ret;
+
+       if (force) {
+               drbd_force_state(mdev, NS(disk, D_FAILED));
+               retcode = SS_SUCCESS;
+               goto out;
+       }
+
        drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
+       drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */
        retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+       drbd_md_put_buffer(mdev);
        /* D_FAILED will transition to DISKLESS. */
        ret = wait_event_interruptible(mdev->misc_wait,
                        mdev->state.disk != D_FAILED);
@@ -1690,6 +1679,7 @@ static int adm_detach(struct drbd_conf *mdev)
                retcode = SS_NOTHING_TO_DO;
        if (ret)
                retcode = ERR_INTR;
+out:
        return retcode;
 }
 
@@ -1701,6 +1691,8 @@ static int adm_detach(struct drbd_conf *mdev)
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
        enum drbd_ret_code retcode;
+       struct detach_parms parms = { };
+       int err;
 
        retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
        if (!adm_ctx.reply_skb)
@@ -1708,7 +1700,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
        if (retcode != NO_ERROR)
                goto out;
 
-       retcode = adm_detach(adm_ctx.mdev);
+       if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
+               err = detach_parms_from_attrs(&parms, info);
+               if (err) {
+                       retcode = ERR_MANDATORY_TAG;
+                       drbd_msg_put_info(from_attrs_err_to_txt(err));
+                       goto out;
+               }
+       }
+
+       retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
 out:
        drbd_adm_finish(info, retcode);
        return 0;
@@ -1827,8 +1828,6 @@ struct crypto {
        struct crypto_hash *csums_tfm;
        struct crypto_hash *cram_hmac_tfm;
        struct crypto_hash *integrity_tfm;
-       void *int_dig_in;
-       void *int_dig_vv;
 };
 
 static int
@@ -1851,7 +1850,6 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf)
 {
        char hmac_name[CRYPTO_MAX_ALG_NAME];
        enum drbd_ret_code rv;
-       int hash_size;
 
        rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg,
                       ERR_CSUMS_ALG);
@@ -1872,23 +1870,12 @@ alloc_crypto(struct crypto *crypto, struct net_conf *new_conf)
                rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
                               ERR_AUTH_ALG);
        }
-       if (crypto->integrity_tfm) {
-               hash_size = crypto_hash_digestsize(crypto->integrity_tfm);
-               crypto->int_dig_in = kmalloc(hash_size, GFP_KERNEL);
-               if (!crypto->int_dig_in)
-                       return ERR_NOMEM;
-               crypto->int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
-               if (!crypto->int_dig_vv)
-                       return ERR_NOMEM;
-       }
 
        return rv;
 }
 
 static void free_crypto(struct crypto *crypto)
 {
-       kfree(crypto->int_dig_in);
-       kfree(crypto->int_dig_vv);
        crypto_free_hash(crypto->cram_hmac_tfm);
        crypto_free_hash(crypto->integrity_tfm);
        crypto_free_hash(crypto->csums_tfm);
@@ -1977,10 +1964,6 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
                crypto.verify_tfm = NULL;
        }
 
-       kfree(tconn->int_dig_in);
-       tconn->int_dig_in = crypto.int_dig_in;
-       kfree(tconn->int_dig_vv);
-       tconn->int_dig_vv = crypto.int_dig_vv;
        crypto_free_hash(tconn->integrity_tfm);
        tconn->integrity_tfm = crypto.integrity_tfm;
        if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100)
@@ -2059,7 +2042,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
                goto fail;
        }
 
-       /* allocation not in the IO path, cqueue thread context */
+       /* allocation not in the IO path, drbdsetup / netlink process context */
        new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL);
        if (!new_conf) {
                retcode = ERR_NOMEM;
@@ -2069,7 +2052,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        set_net_conf_defaults(new_conf);
 
        err = net_conf_from_attrs(new_conf, info);
-       if (err) {
+       if (err && err != -ENOMSG) {
                retcode = ERR_MANDATORY_TAG;
                drbd_msg_put_info(from_attrs_err_to_txt(err));
                goto fail;
@@ -2097,8 +2080,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
        rcu_assign_pointer(tconn->net_conf, new_conf);
 
        conn_free_crypto(tconn);
-       tconn->int_dig_in = crypto.int_dig_in;
-       tconn->int_dig_vv = crypto.int_dig_vv;
        tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
        tconn->integrity_tfm = crypto.integrity_tfm;
        tconn->csums_tfm = crypto.csums_tfm;
@@ -2285,7 +2266,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
        if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
                retcode = ERR_NEED_APV_93;
-               goto fail;
+               goto fail_ldev;
        }
 
        rcu_read_lock();
@@ -2295,7 +2276,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
                new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
                if (!new_disk_conf) {
                        retcode = ERR_NOMEM;
-                       goto fail;
+                       goto fail_ldev;
                }
        }
 
@@ -2333,17 +2314,15 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
  fail:
        drbd_adm_finish(info, retcode);
        return 0;
-}
 
-void drbd_set_res_opts_defaults(struct res_opts *r)
-{
-       return set_res_opts_defaults(r);
+ fail_ldev:
+       put_ldev(mdev);
+       goto fail;
 }
 
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
        enum drbd_ret_code retcode;
-       cpumask_var_t new_cpu_mask;
        struct drbd_tconn *tconn;
        struct res_opts res_opts;
        int err;
@@ -2355,12 +2334,6 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
                goto fail;
        tconn = adm_ctx.tconn;
 
-       if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
-               retcode = ERR_NOMEM;
-               drbd_msg_put_info("unable to allocate cpumask");
-               goto fail;
-       }
-
        res_opts = tconn->res_opts;
        if (should_set_defaults(info))
                set_res_opts_defaults(&res_opts);
@@ -2372,31 +2345,14 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
                goto fail;
        }
 
-       /* silently ignore cpu mask on UP kernel */
-       if (nr_cpu_ids > 1 && res_opts.cpu_mask[0] != 0) {
-               err = __bitmap_parse(res_opts.cpu_mask, 32, 0,
-                               cpumask_bits(new_cpu_mask), nr_cpu_ids);
-               if (err) {
-                       conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
-                       retcode = ERR_CPU_MASK_PARSE;
-                       goto fail;
-               }
-       }
-
-
-       tconn->res_opts = res_opts;
-
-       if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
-               cpumask_copy(tconn->cpu_mask, new_cpu_mask);
-               drbd_calc_cpu_mask(tconn);
-               tconn->receiver.reset_cpu_mask = 1;
-               tconn->asender.reset_cpu_mask = 1;
-               tconn->worker.reset_cpu_mask = 1;
+       err = set_resource_options(tconn, &res_opts);
+       if (err) {
+               retcode = ERR_INVALID_REQUEST;
+               if (err == -ENOMEM)
+                       retcode = ERR_NOMEM;
        }
 
 fail:
-       free_cpumask_var(new_cpu_mask);
-
        drbd_adm_finish(info, retcode);
        return 0;
 }
@@ -2440,6 +2396,23 @@ out:
        return 0;
 }
 
+static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
+               union drbd_state mask, union drbd_state val)
+{
+       enum drbd_ret_code retcode;
+
+       retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+       if (!adm_ctx.reply_skb)
+               return retcode;
+       if (retcode != NO_ERROR)
+               goto out;
+
+       retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+out:
+       drbd_adm_finish(info, retcode);
+       return 0;
+}
+
 static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
 {
        int rv;
@@ -2449,10 +2422,10 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
        return rv;
 }
 
-static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
-               union drbd_state mask, union drbd_state val)
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-       enum drbd_ret_code retcode;
+       int retcode; /* drbd_ret_code, drbd_state_rv */
+       struct drbd_conf *mdev;
 
        retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
        if (!adm_ctx.reply_skb)
@@ -2460,17 +2433,29 @@ static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *
        if (retcode != NO_ERROR)
                goto out;
 
-       retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+       mdev = adm_ctx.mdev;
+
+       retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
+       if (retcode < SS_SUCCESS) {
+               if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
+                       /* The peer will get a resync upon connect anyways.
+                        * Just make that into a full resync. */
+                       retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+                       if (retcode >= SS_SUCCESS) {
+                               if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+                                                  "set_n_write from invalidate_peer",
+                                                  BM_LOCKED_SET_ALLOWED))
+                                       retcode = ERR_IO_MD_DISK;
+                       }
+               } else
+                       retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+       }
+
 out:
        drbd_adm_finish(info, retcode);
        return 0;
 }
 
-int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
-{
-       return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
-}
-
 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
 {
        enum drbd_ret_code retcode;
@@ -2763,14 +2748,18 @@ next_tconn:
                        goto out;
 
                if (!mdev) {
-                       /* this is a tconn without a single volume */
+                       /* This is a tconn without a single volume.
+                        * Suprisingly enough, it may have a network
+                        * configuration. */
+                       struct net_conf *nc;
                        dh->minor = -1U;
                        dh->ret_code = NO_ERROR;
                        if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
-                               genlmsg_cancel(skb, dh);
-                       else
-                               genlmsg_end(skb, dh);
-                       goto out;
+                               goto cancel;
+                       nc = rcu_dereference(tconn->net_conf);
+                       if (nc && net_conf_to_skb(skb, nc, 1) != 0)
+                               goto cancel;
+                       goto done;
                }
 
                D_ASSERT(mdev->vnr == volume);
@@ -2780,9 +2769,11 @@ next_tconn:
                dh->ret_code = NO_ERROR;
 
                if (nla_put_status_info(skb, mdev, NULL)) {
+cancel:
                        genlmsg_cancel(skb, dh);
                        goto out;
                }
+done:
                genlmsg_end(skb, dh);
         }
 
@@ -3012,6 +3003,8 @@ drbd_check_resource_name(const char *name)
 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
        enum drbd_ret_code retcode;
+       struct res_opts res_opts;
+       int err;
 
        retcode = drbd_adm_prepare(skb, info, 0);
        if (!adm_ctx.reply_skb)
@@ -3019,6 +3012,14 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
        if (retcode != NO_ERROR)
                goto out;
 
+       set_res_opts_defaults(&res_opts);
+       err = res_opts_from_attrs(&res_opts, info);
+       if (err && err != -ENOMSG) {
+               retcode = ERR_MANDATORY_TAG;
+               drbd_msg_put_info(from_attrs_err_to_txt(err));
+               goto out;
+       }
+
        retcode = drbd_check_resource_name(adm_ctx.resource_name);
        if (retcode != NO_ERROR)
                goto out;
@@ -3032,7 +3033,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
                goto out;
        }
 
-       if (!conn_create(adm_ctx.resource_name))
+       if (!conn_create(adm_ctx.resource_name, &res_opts))
                retcode = ERR_NOMEM;
 out:
        drbd_adm_finish(info, retcode);
@@ -3050,8 +3051,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
        if (retcode != NO_ERROR)
                goto out;
 
-       /* FIXME drop minor_count parameter, limit to MINORMASK */
-       if (dh->minor >= minor_count) {
+       if (dh->minor > MINORMASK) {
                drbd_msg_put_info("requested minor out of range");
                retcode = ERR_INVALID_REQUEST;
                goto out;
@@ -3084,6 +3084,8 @@ static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
             * we may want to delete a minor from a live replication group.
             */
            mdev->state.role == R_SECONDARY) {
+               _drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS),
+                                   CS_VERBOSE + CS_WAIT_COMPLETE);
                idr_remove(&mdev->tconn->volumes, mdev->vnr);
                idr_remove(&minors, mdev_to_minor(mdev));
                del_gendisk(mdev->vdisk);
@@ -3144,7 +3146,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 
        /* detach */
        idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
-               retcode = adm_detach(mdev);
+               retcode = adm_detach(mdev, 0);
                if (retcode < SS_SUCCESS) {
                        drbd_msg_put_info("failed to detach");
                        goto out;