]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - net/ceph/crush/mapper.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[karo-tx-linux.git] / net / ceph / crush / mapper.c
index 71192b1f8501e8467e4df43f5831b24ee895eda7..b703790b4e44788e109bb91ab61f272015000654 100644 (file)
@@ -282,7 +282,7 @@ static int is_out(const struct crush_map *map,
 }
 
 /**
- * crush_choose - choose numrep distinct items of given type
+ * crush_choose_firstn - choose numrep distinct items of given type
  * @map: the crush_map
  * @bucket: the bucket we are choose an item from
  * @x: crush input value
@@ -290,18 +290,24 @@ static int is_out(const struct crush_map *map,
  * @type: the type of item to choose
  * @out: pointer to output vector
  * @outpos: our position in that vector
- * @firstn: true if choosing "first n" items, false if choosing "indep"
- * @recurse_to_leaf: true if we want one device under each item of given type
- * @descend_once: true if we should only try one descent before giving up
+ * @tries: number of attempts to make
+ * @recurse_tries: number of attempts to have recursive chooseleaf make
+ * @local_tries: localized retries
+ * @local_fallback_tries: localized fallback retries
+ * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  */
-static int crush_choose(const struct crush_map *map,
-                       struct crush_bucket *bucket,
-                       const __u32 *weight, int weight_max,
-                       int x, int numrep, int type,
-                       int *out, int outpos,
-                       int firstn, int recurse_to_leaf,
-                       int descend_once, int *out2)
+static int crush_choose_firstn(const struct crush_map *map,
+                              struct crush_bucket *bucket,
+                              const __u32 *weight, int weight_max,
+                              int x, int numrep, int type,
+                              int *out, int outpos,
+                              unsigned int tries,
+                              unsigned int recurse_tries,
+                              unsigned int local_tries,
+                              unsigned int local_fallback_tries,
+                              int recurse_to_leaf,
+                              int *out2)
 {
        int rep;
        unsigned int ftotal, flocal;
@@ -330,35 +336,17 @@ static int crush_choose(const struct crush_map *map,
                                collide = 0;
                                retry_bucket = 0;
                                r = rep;
-                               if (in->alg == CRUSH_BUCKET_UNIFORM) {
-                                       /* be careful */
-                                       if (firstn || (__u32)numrep >= in->size)
-                                               /* r' = r + f_total */
-                                               r += ftotal;
-                                       else if (in->size % numrep == 0)
-                                               /* r'=r+(n+1)*f_local */
-                                               r += (numrep+1) *
-                                                       (flocal+ftotal);
-                                       else
-                                               /* r' = r + n*f_local */
-                                               r += numrep * (flocal+ftotal);
-                               } else {
-                                       if (firstn)
-                                               /* r' = r + f_total */
-                                               r += ftotal;
-                                       else
-                                               /* r' = r + n*f_local */
-                                               r += numrep * (flocal+ftotal);
-                               }
+                               /* r' = r + f_total */
+                               r += ftotal;
 
                                /* bucket choose */
                                if (in->size == 0) {
                                        reject = 1;
                                        goto reject;
                                }
-                               if (map->choose_local_fallback_tries > 0 &&
+                               if (local_fallback_tries > 0 &&
                                    flocal >= (in->size>>1) &&
-                                   flocal > map->choose_local_fallback_tries)
+                                   flocal > local_fallback_tries)
                                        item = bucket_perm_choose(in, x, r);
                                else
                                        item = crush_bucket_choose(in, x, r);
@@ -399,13 +387,15 @@ static int crush_choose(const struct crush_map *map,
                                reject = 0;
                                if (!collide && recurse_to_leaf) {
                                        if (item < 0) {
-                                               if (crush_choose(map,
+                                               if (crush_choose_firstn(map,
                                                         map->buckets[-1-item],
                                                         weight, weight_max,
                                                         x, outpos+1, 0,
                                                         out2, outpos,
-                                                        firstn, 0,
-                                                        map->chooseleaf_descend_once,
+                                                        recurse_tries, 0,
+                                                        local_tries,
+                                                        local_fallback_tries,
+                                                        0,
                                                         NULL) <= outpos)
                                                        /* didn't get leaf */
                                                        reject = 1;
@@ -430,17 +420,14 @@ reject:
                                        ftotal++;
                                        flocal++;
 
-                                       if (reject && descend_once)
-                                               /* let outer call try again */
-                                               skip_rep = 1;
-                                       else if (collide && flocal <= map->choose_local_tries)
+                                       if (collide && flocal <= local_tries)
                                                /* retry locally a few times */
                                                retry_bucket = 1;
-                                       else if (map->choose_local_fallback_tries > 0 &&
-                                                flocal <= in->size + map->choose_local_fallback_tries)
+                                       else if (local_fallback_tries > 0 &&
+                                                flocal <= in->size + local_fallback_tries)
                                                /* exhaustive bucket search */
                                                retry_bucket = 1;
-                                       else if (ftotal <= map->choose_total_tries)
+                                       else if (ftotal <= tries)
                                                /* then retry descent */
                                                retry_descent = 1;
                                        else
@@ -469,6 +456,160 @@ reject:
 }
 
 
+/**
+ * crush_choose_indep: alternative breadth-first positionally stable mapping
+ *
+ */
+static void crush_choose_indep(const struct crush_map *map,
+                              struct crush_bucket *bucket,
+                              const __u32 *weight, int weight_max,
+                              int x, int left, int numrep, int type,
+                              int *out, int outpos,
+                              unsigned int tries,
+                              unsigned int recurse_tries,
+                              int recurse_to_leaf,
+                              int *out2,
+                              int parent_r)
+{
+       struct crush_bucket *in = bucket;
+       int endpos = outpos + left;
+       int rep;
+       unsigned int ftotal;
+       int r;
+       int i;
+       int item = 0;
+       int itemtype;
+       int collide;
+
+       dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+               bucket->id, x, outpos, numrep);
+
+       /* initially my result is undefined */
+       for (rep = outpos; rep < endpos; rep++) {
+               out[rep] = CRUSH_ITEM_UNDEF;
+               if (out2)
+                       out2[rep] = CRUSH_ITEM_UNDEF;
+       }
+
+       for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
+               for (rep = outpos; rep < endpos; rep++) {
+                       if (out[rep] != CRUSH_ITEM_UNDEF)
+                               continue;
+
+                       in = bucket;  /* initial bucket */
+
+                       /* choose through intervening buckets */
+                       for (;;) {
+                               /* note: we base the choice on the position
+                                * even in the nested call.  that means that
+                                * if the first layer chooses the same bucket
+                                * in a different position, we will tend to
+                                * choose a different item in that bucket.
+                                * this will involve more devices in data
+                                * movement and tend to distribute the load.
+                                */
+                               r = rep + parent_r;
+
+                               /* be careful */
+                               if (in->alg == CRUSH_BUCKET_UNIFORM &&
+                                   in->size % numrep == 0)
+                                       /* r'=r+(n+1)*f_total */
+                                       r += (numrep+1) * ftotal;
+                               else
+                                       /* r' = r + n*f_total */
+                                       r += numrep * ftotal;
+
+                               /* bucket choose */
+                               if (in->size == 0) {
+                                       dprintk("   empty bucket\n");
+                                       break;
+                               }
+
+                               item = crush_bucket_choose(in, x, r);
+                               if (item >= map->max_devices) {
+                                       dprintk("   bad item %d\n", item);
+                                       out[rep] = CRUSH_ITEM_NONE;
+                                       if (out2)
+                                               out2[rep] = CRUSH_ITEM_NONE;
+                                       left--;
+                                       break;
+                               }
+
+                               /* desired type? */
+                               if (item < 0)
+                                       itemtype = map->buckets[-1-item]->type;
+                               else
+                                       itemtype = 0;
+                               dprintk("  item %d type %d\n", item, itemtype);
+
+                               /* keep going? */
+                               if (itemtype != type) {
+                                       if (item >= 0 ||
+                                           (-1-item) >= map->max_buckets) {
+                                               dprintk("   bad item type %d\n", type);
+                                               out[rep] = CRUSH_ITEM_NONE;
+                                               if (out2)
+                                                       out2[rep] =
+                                                               CRUSH_ITEM_NONE;
+                                               left--;
+                                               break;
+                                       }
+                                       in = map->buckets[-1-item];
+                                       continue;
+                               }
+
+                               /* collision? */
+                               collide = 0;
+                               for (i = outpos; i < endpos; i++) {
+                                       if (out[i] == item) {
+                                               collide = 1;
+                                               break;
+                                       }
+                               }
+                               if (collide)
+                                       break;
+
+                               if (recurse_to_leaf) {
+                                       if (item < 0) {
+                                               crush_choose_indep(map,
+                                                  map->buckets[-1-item],
+                                                  weight, weight_max,
+                                                  x, 1, numrep, 0,
+                                                  out2, rep,
+                                                  recurse_tries, 0,
+                                                  0, NULL, r);
+                                               if (out2[rep] == CRUSH_ITEM_NONE) {
+                                                       /* placed nothing; no leaf */
+                                                       break;
+                                               }
+                                       } else {
+                                               /* we already have a leaf! */
+                                               out2[rep] = item;
+                                       }
+                               }
+
+                               /* out? */
+                               if (itemtype == 0 &&
+                                   is_out(map, weight, weight_max, item, x))
+                                       break;
+
+                               /* yay! */
+                               out[rep] = item;
+                               left--;
+                               break;
+                       }
+               }
+       }
+       for (rep = outpos; rep < endpos; rep++) {
+               if (out[rep] == CRUSH_ITEM_UNDEF) {
+                       out[rep] = CRUSH_ITEM_NONE;
+               }
+               if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
+                       out2[rep] = CRUSH_ITEM_NONE;
+               }
+       }
+}
+
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
  * @map: the crush_map
@@ -478,15 +619,17 @@ reject:
  * @result_max: maximum result size
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
+ * @scratch: scratch vector for private use; must be >= 3 * result_max
  */
 int crush_do_rule(const struct crush_map *map,
                  int ruleno, int x, int *result, int result_max,
-                 const __u32 *weight, int weight_max)
+                 const __u32 *weight, int weight_max,
+                 int *scratch)
 {
        int result_len;
-       int a[CRUSH_MAX_SET];
-       int b[CRUSH_MAX_SET];
-       int c[CRUSH_MAX_SET];
+       int *a = scratch;
+       int *b = scratch + result_max;
+       int *c = scratch + result_max*2;
        int recurse_to_leaf;
        int *w;
        int wsize = 0;
@@ -497,7 +640,10 @@ int crush_do_rule(const struct crush_map *map,
        __u32 step;
        int i, j;
        int numrep;
-       const int descend_once = 0;
+       int choose_tries = map->choose_total_tries;
+       int choose_local_tries = map->choose_local_tries;
+       int choose_local_fallback_tries = map->choose_local_fallback_tries;
+       int choose_leaf_tries = 0;
 
        if ((__u32)ruleno >= map->max_rules) {
                dprintk(" bad ruleno %d\n", ruleno);
@@ -519,20 +665,40 @@ int crush_do_rule(const struct crush_map *map,
                        wsize = 1;
                        break;
 
-               case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
+               case CRUSH_RULE_SET_CHOOSE_TRIES:
+                       if (curstep->arg1 > 0)
+                               choose_tries = curstep->arg1;
+                       break;
+
+               case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
+                       if (curstep->arg1 > 0)
+                               choose_leaf_tries = curstep->arg1;
+                       break;
+
+               case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
+                       if (curstep->arg1 > 0)
+                               choose_local_tries = curstep->arg1;
+                       break;
+
+               case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
+                       if (curstep->arg1 > 0)
+                               choose_local_fallback_tries = curstep->arg1;
+                       break;
+
+               case CRUSH_RULE_CHOOSELEAF_FIRSTN:
                case CRUSH_RULE_CHOOSE_FIRSTN:
                        firstn = 1;
                        /* fall through */
-               case CRUSH_RULE_CHOOSE_LEAF_INDEP:
+               case CRUSH_RULE_CHOOSELEAF_INDEP:
                case CRUSH_RULE_CHOOSE_INDEP:
                        if (wsize == 0)
                                break;
 
                        recurse_to_leaf =
                                curstep->op ==
-                                CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
+                                CRUSH_RULE_CHOOSELEAF_FIRSTN ||
                                curstep->op ==
-                               CRUSH_RULE_CHOOSE_LEAF_INDEP;
+                               CRUSH_RULE_CHOOSELEAF_INDEP;
 
                        /* reset output */
                        osize = 0;
@@ -550,22 +716,51 @@ int crush_do_rule(const struct crush_map *map,
                                                continue;
                                }
                                j = 0;
-                               osize += crush_choose(map,
-                                                     map->buckets[-1-w[i]],
-                                                     weight, weight_max,
-                                                     x, numrep,
-                                                     curstep->arg2,
-                                                     o+osize, j,
-                                                     firstn,
-                                                     recurse_to_leaf,
-                                                     descend_once, c+osize);
+                               if (firstn) {
+                                       int recurse_tries;
+                                       if (choose_leaf_tries)
+                                               recurse_tries =
+                                                       choose_leaf_tries;
+                                       else if (map->chooseleaf_descend_once)
+                                               recurse_tries = 1;
+                                       else
+                                               recurse_tries = choose_tries;
+                                       osize += crush_choose_firstn(
+                                               map,
+                                               map->buckets[-1-w[i]],
+                                               weight, weight_max,
+                                               x, numrep,
+                                               curstep->arg2,
+                                               o+osize, j,
+                                               choose_tries,
+                                               recurse_tries,
+                                               choose_local_tries,
+                                               choose_local_fallback_tries,
+                                               recurse_to_leaf,
+                                               c+osize);
+                               } else {
+                                       crush_choose_indep(
+                                               map,
+                                               map->buckets[-1-w[i]],
+                                               weight, weight_max,
+                                               x, numrep, numrep,
+                                               curstep->arg2,
+                                               o+osize, j,
+                                               choose_tries,
+                                               choose_leaf_tries ?
+                                                  choose_leaf_tries : 1,
+                                               recurse_to_leaf,
+                                               c+osize,
+                                               0);
+                                       osize += numrep;
+                               }
                        }
 
                        if (recurse_to_leaf)
                                /* copy final _leaf_ values to output set */
                                memcpy(o, c, osize*sizeof(*o));
 
-                       /* swap t and w arrays */
+                       /* swap o and w arrays */
                        tmp = o;
                        o = w;
                        w = tmp;