drm/i915: Extract i915_gem_obj_prepare_shmem_write()

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 18 Aug 2016 16:16:47 +0000 (17:16 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Thu, 18 Aug 2016 21:36:44 +0000 (22:36 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 18 Aug 2016 16:16:47 +0000 (17:16 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Thu, 18 Aug 2016 21:36:44 +0000 (22:36 +0100)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c

index 1db829c8b912207169655adba271089a6e240f8b..e586e15e172f0a1c584c1e5f7ff717ae37fdcaa9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -973,7 +973,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
                        u32 batch_start_offset,
                        u32 batch_len)
  {
-       int needs_clflush = 0;
+       unsigned int needs_clflush;
         void *src_base, *src;
         void *dst = NULL;
         int ret;
@@ -1020,7 +1020,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
  unmap_src:
         vunmap(src_base);
  unpin_src:
-       i915_gem_object_unpin_pages(src_obj);
+       i915_gem_obj_finish_shmem_access(src_obj);
  
         return ret ? ERR_PTR(ret) : dst;
  }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 5b778ceba82e50865cbb126486ef655c2ec99267..91861a08787c5464b2fa7a6c983b3afc305b6cf7 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3098,9 +3098,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
  void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
  void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
  
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-                                   int *needs_clflush);
-
  int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
  
  static inline int __sg_page_count(struct scatterlist *sg)
@@ -3201,6 +3198,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
         i915_gem_object_unpin_pages(obj);
  }
  
+int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
+                                   unsigned int *needs_clflush);
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+                                    unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE 0x1
+#define CLFLUSH_AFTER 0x2
+#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
+{
+       i915_gem_object_unpin_pages(obj);
+}
+
  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
                          struct drm_i915_gem_request *to);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index a609522221ed272d5ba0b4389b3a77444b4c8580..f27c340bb8eeea2385286121e8ab2b69a6ddc3f3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -609,35 +609,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
   * flush the object from the CPU cache.
   */
  int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-                                   int *needs_clflush)
+                                   unsigned int *needs_clflush)
  {
         int ret;
  
         *needs_clflush = 0;
  
-       if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
-               return -EINVAL;
+       if (!i915_gem_object_has_struct_page(obj))
+               return -ENODEV;
  
         ret = i915_gem_object_wait_rendering(obj, true);
         if (ret)
                 return ret;
  
-       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
-               /* If we're not in the cpu read domain, set ourself into the gtt
-                * read domain and manually flush cachelines (if required). This
-                * optimizes for the case when the gpu will dirty the data
-                * anyway again before the next pread happens. */
+       /* If we're not in the cpu read domain, set ourself into the gtt
+        * read domain and manually flush cachelines (if required). This
+        * optimizes for the case when the gpu will dirty the data
+        * anyway again before the next pread happens.
+        */
+       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
                                                         obj->cache_level);
+
+       ret = i915_gem_object_get_pages(obj);
+       if (ret)
+               return ret;
+
+       i915_gem_object_pin_pages(obj);
+
+       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, false);
+               if (ret) {
+                       i915_gem_object_unpin_pages(obj);
+                       return ret;
+               }
+               *needs_clflush = 0;
         }
  
+       return 0;
+}
+
+int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
+                                    unsigned int *needs_clflush)
+{
+       int ret;
+
+       *needs_clflush = 0;
+       if (!i915_gem_object_has_struct_page(obj))
+               return -ENODEV;
+
+       ret = i915_gem_object_wait_rendering(obj, false);
+       if (ret)
+               return ret;
+
+       /* If we're not in the cpu write domain, set ourself into the
+        * gtt write domain and manually flush cachelines (as required).
+        * This optimizes for the case when the gpu will use the data
+        * right away and we therefore have to clflush anyway.
+        */
+       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+               *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+
+       /* Same trick applies to invalidate partially written cachelines read
+        * before writing.
+        */
+       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+               *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
+                                                        obj->cache_level);
+
         ret = i915_gem_object_get_pages(obj);
         if (ret)
                 return ret;
  
         i915_gem_object_pin_pages(obj);
  
-       return ret;
+       if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+               ret = i915_gem_object_set_to_cpu_domain(obj, true);
+               if (ret) {
+                       i915_gem_object_unpin_pages(obj);
+                       return ret;
+               }
+               *needs_clflush = 0;
+       }
+
+       if ((*needs_clflush & CLFLUSH_AFTER) == 0)
+               obj->cache_dirty = true;
+
+       intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+       obj->dirty = 1;
+       return 0;
  }
  
  /* Per-page copy function for the shmem pread fastpath.
@@ -872,19 +932,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
         int needs_clflush = 0;
         struct sg_page_iter sg_iter;
  
-       if (!i915_gem_object_has_struct_page(obj))
-               return -ENODEV;
-
-       user_data = u64_to_user_ptr(args->data_ptr);
-       remain = args->size;
-
-       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
         if (ret)
                 return ret;
  
+       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+       user_data = u64_to_user_ptr(args->data_ptr);
         offset = args->offset;
+       remain = args->size;
  
         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                          offset >> PAGE_SHIFT) {
@@ -940,7 +995,7 @@ next_page:
         }
  
  out:
-       i915_gem_object_unpin_pages(obj);
+       i915_gem_obj_finish_shmem_access(obj);
  
         return ret;
  }
@@ -1248,42 +1303,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
         int shmem_page_offset, page_length, ret = 0;
         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
         int hit_slowpath = 0;
-       int needs_clflush_after = 0;
-       int needs_clflush_before = 0;
+       unsigned int needs_clflush;
         struct sg_page_iter sg_iter;
  
-       user_data = u64_to_user_ptr(args->data_ptr);
-       remain = args->size;
-
-       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
-       ret = i915_gem_object_wait_rendering(obj, false);
+       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
         if (ret)
                 return ret;
  
-       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-               /* If we're not in the cpu write domain, set ourself into the gtt
-                * write domain and manually flush cachelines (if required). This
-                * optimizes for the case when the gpu will use the data
-                * right away and we therefore have to clflush anyway. */
-               needs_clflush_after = cpu_write_needs_clflush(obj);
-       }
-       /* Same trick applies to invalidate partially written cachelines read
-        * before writing. */
-       if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
-               needs_clflush_before =
-                       !cpu_cache_is_coherent(dev, obj->cache_level);
-
-       ret = i915_gem_object_get_pages(obj);
-       if (ret)
-               return ret;
-
-       intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-
-       i915_gem_object_pin_pages(obj);
-
+       obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+       user_data = u64_to_user_ptr(args->data_ptr);
         offset = args->offset;
-       obj->dirty = 1;
+       remain = args->size;
  
         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
                          offset >> PAGE_SHIFT) {
@@ -1307,7 +1337,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                 /* If we don't overwrite a cacheline completely we need to be
                  * careful to have up-to-date data by first clflushing. Don't
                  * overcomplicate things and flush the entire patch. */
-               partial_cacheline_write = needs_clflush_before &&
+               partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
                         ((shmem_page_offset | page_length)
                                 & (boot_cpu_data.x86_clflush_size - 1));
  
@@ -1317,7 +1347,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
                                         user_data, page_do_bit17_swizzling,
                                         partial_cacheline_write,
-                                       needs_clflush_after);
+                                       needs_clflush & CLFLUSH_AFTER);
                 if (ret == 0)
                         goto next_page;
  
@@ -1326,7 +1356,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
                                         user_data, page_do_bit17_swizzling,
                                         partial_cacheline_write,
-                                       needs_clflush_after);
+                                       needs_clflush & CLFLUSH_AFTER);
  
                 mutex_lock(&dev->struct_mutex);
  
@@ -1340,7 +1370,7 @@ next_page:
         }
  
  out:
-       i915_gem_object_unpin_pages(obj);
+       i915_gem_obj_finish_shmem_access(obj);
  
         if (hit_slowpath) {
                 /*
@@ -1348,17 +1378,15 @@ out:
                  * cachelines in-line while writing and the object moved
                  * out of the cpu write domain while we've dropped the lock.
                  */
-               if (!needs_clflush_after &&
+               if (!(needs_clflush & CLFLUSH_AFTER) &&
                     obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
                         if (i915_gem_clflush_object(obj, obj->pin_display))
-                               needs_clflush_after = true;
+                               needs_clflush |= CLFLUSH_AFTER;
                 }
         }
  
-       if (needs_clflush_after)
+       if (needs_clflush & CLFLUSH_AFTER)
                 i915_gem_chipset_flush(to_i915(dev));
-       else
-               obj->cache_dirty = true;
  
         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
         return ret;
@@ -1437,10 +1465,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
         if (ret == -EFAULT || ret == -ENOSPC) {
                 if (obj->phys_handle)
                         ret = i915_gem_phys_pwrite(obj, args, file);
-               else if (i915_gem_object_has_struct_page(obj))
-                       ret = i915_gem_shmem_pwrite(dev, obj, args, file);
                 else
-                       ret = -ENODEV;
+                       ret = i915_gem_shmem_pwrite(dev, obj, args, file);
         }
  
         i915_gem_object_put(obj);
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 18 Aug 2016 16:16:47 +0000 (17:16 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 18 Aug 2016 21:36:44 +0000 (22:36 +0100)
drivers/gpu/drm/i915/i915_cmd_parser.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history