]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 7 Apr 2014 18:09:13 +0000 (11:09 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 7 Apr 2014 18:09:13 +0000 (11:09 -0700)
Pull Ceph updates from Sage Weil:
 "The biggest chunk is a series of patches from Ilya that add support
  for new Ceph osd and crush map features, including some new tunables,
  primary affinity, and the new encoding that is needed for erasure
  coding support.  This brings things into parity with the server side
  and the looming firefly release.  There is also support for allocation
  hints in RBD that help limit fragmentation on the server side.

  There is also a series of patches from Zheng fixing NFS reexport,
  directory fragmentation support, flock vs fnctl behavior, and some
  issues with clustered MDS.

  Finally, there are some miscellaneous fixes from Yunchuan Wen for
  fscache, Fabian Frederick for ACLs, and from me for fsync(dirfd)
  behavior"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (79 commits)
  ceph: skip invalid dentry during dcache readdir
  libceph: dump pool {read,write}_tier to debugfs
  libceph: output primary affinity values on osdmap updates
  ceph: flush cap release queue when trimming session caps
  ceph: don't grabs open file reference for aborted request
  ceph: drop extra open file reference in ceph_atomic_open()
  ceph: preallocate buffer for readdir reply
  libceph: enable PRIMARY_AFFINITY feature bit
  libceph: redo ceph_calc_pg_primary() in terms of ceph_calc_pg_acting()
  libceph: add support for osd primary affinity
  libceph: add support for primary_temp mappings
  libceph: return primary from ceph_calc_pg_acting()
  libceph: switch ceph_calc_pg_acting() to new helpers
  libceph: introduce apply_temps() helper
  libceph: introduce pg_to_raw_osds() and raw_to_up_osds() helpers
  libceph: ceph_can_shift_osds(pool) and pool type defines
  libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions
  libceph: enable OSDMAP_ENC feature bit
  libceph: primary_affinity decode bits
  libceph: primary_affinity infrastructure
  ...

1  2 
net/ceph/osd_client.c

diff --combined net/ceph/osd_client.c
index 82750f9158655225ad7dab9e903932d38f97b8a5,b4157dc2219989ed2ef94a1f36f1f24fb8c787af..b0dfce77656a0ba9c43d6d6616be1d28fb4dc137
@@@ -436,6 -436,7 +436,7 @@@ static bool osd_req_opcode_valid(u16 op
        case CEPH_OSD_OP_OMAPCLEAR:
        case CEPH_OSD_OP_OMAPRMKEYS:
        case CEPH_OSD_OP_OMAP_CMP:
+       case CEPH_OSD_OP_SETALLOCHINT:
        case CEPH_OSD_OP_CLONERANGE:
        case CEPH_OSD_OP_ASSERT_SRC_VERSION:
        case CEPH_OSD_OP_SRC_CMPXATTR:
@@@ -591,6 -592,26 +592,26 @@@ void osd_req_op_watch_init(struct ceph_
  }
  EXPORT_SYMBOL(osd_req_op_watch_init);
  
+ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+                               unsigned int which,
+                               u64 expected_object_size,
+                               u64 expected_write_size)
+ {
+       struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
+                                                     CEPH_OSD_OP_SETALLOCHINT);
+       op->alloc_hint.expected_object_size = expected_object_size;
+       op->alloc_hint.expected_write_size = expected_write_size;
+       /*
+        * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed
+        * not worth a feature bit.  Set FAILOK per-op flag to make
+        * sure older osds don't trip over an unsupported opcode.
+        */
+       op->flags |= CEPH_OSD_OP_FLAG_FAILOK;
+ }
+ EXPORT_SYMBOL(osd_req_op_alloc_hint_init);
  static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
                                struct ceph_osd_data *osd_data)
  {
@@@ -681,6 -702,12 +702,12 @@@ static u64 osd_req_encode_op(struct cep
                dst->watch.ver = cpu_to_le64(src->watch.ver);
                dst->watch.flag = src->watch.flag;
                break;
+       case CEPH_OSD_OP_SETALLOCHINT:
+               dst->alloc_hint.expected_object_size =
+                   cpu_to_le64(src->alloc_hint.expected_object_size);
+               dst->alloc_hint.expected_write_size =
+                   cpu_to_le64(src->alloc_hint.expected_write_size);
+               break;
        default:
                pr_err("unsupported osd opcode %s\n",
                        ceph_osd_op_name(src->op));
  
                return 0;
        }
        dst->op = cpu_to_le16(src->op);
+       dst->flags = cpu_to_le32(src->flags);
        dst->payload_len = cpu_to_le32(src->payload_len);
  
        return request_data_len;
@@@ -1304,7 -1333,7 +1333,7 @@@ static int __map_request(struct ceph_os
  {
        struct ceph_pg pgid;
        int acting[CEPH_PG_MAX_SIZE];
-       int o = -1, num = 0;
+       int num, o;
        int err;
        bool was_paused;
  
        }
        req->r_pgid = pgid;
  
-       err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
-       if (err > 0) {
-               o = acting[0];
-               num = err;
-       }
+       num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o);
+       if (num < 0)
+               num = 0;
  
        was_paused = req->r_paused;
        req->r_paused = __req_should_be_paused(osdc, req);
@@@ -2033,7 -2060,7 +2060,7 @@@ void ceph_osdc_handle_map(struct ceph_o
                        int skipped_map = 0;
  
                        dout("taking full map %u len %d\n", epoch, maplen);
-                       newmap = osdmap_decode(&p, p+maplen);
+                       newmap = ceph_osdmap_decode(&p, p+maplen);
                        if (IS_ERR(newmap)) {
                                err = PTR_ERR(newmap);
                                goto bad;
@@@ -2082,6 -2109,7 +2109,6 @@@ bad
        pr_err("osdc handle_map corrupt msg\n");
        ceph_msg_dump(msg);
        up_write(&osdc->map_sem);
 -      return;
  }
  
  /*
@@@ -2280,6 -2308,7 +2307,6 @@@ done_err
  
  bad:
        pr_err("osdc handle_watch_notify corrupt msg\n");
 -      return;
  }
  
  /*