]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branch 'drm-intel-fixes' into drm-intel-next
authorKeith Packard <keithp@keithp.com>
Tue, 21 Jun 2011 19:02:57 +0000 (12:02 -0700)
committerKeith Packard <keithp@keithp.com>
Tue, 21 Jun 2011 19:02:57 +0000 (12:02 -0700)
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_overlay.c
drivers/gpu/drm/i915/intel_ringbuffer.c

index f63ee162f1245dcffc5844c6c401df7ce9b885c8..8a9fd917786068f4efeb0e678976a9d80fbe1833 100644 (file)
@@ -1190,7 +1190,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
                                            uint32_t read_domains,
                                            uint32_t write_domain);
-int __must_check i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
@@ -1209,7 +1209,8 @@ int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
                                  bool write);
 int __must_check
-i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
                                     struct intel_ring_buffer *pipelined);
 int i915_gem_attach_phys_object(struct drm_device *dev,
                                struct drm_i915_gem_object *obj,
@@ -1223,9 +1224,14 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 uint32_t
 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level);
+
 /* i915_gem_gtt.c */
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
+void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
+                               enum i915_cache_level cache_level);
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 
 /* i915_gem_evict.c */
index c6389de531612c11d78439207ebd051ba804e3a1..b29e0f2b780af5c62ea804761b2160db71f86c9f 100644 (file)
@@ -2151,6 +2151,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
        return 0;
 }
 
+static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+{
+       u32 old_write_domain, old_read_domains;
+
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+               return;
+
+       /* Act a barrier for all accesses through the GTT */
+       mb();
+
+       /* Force a pagefault for domain tracking on next user access */
+       i915_gem_release_mmap(obj);
+
+       old_read_domains = obj->base.read_domains;
+       old_write_domain = obj->base.write_domain;
+
+       obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
+       obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+}
+
 /**
  * Unbinds an object from the GTT aperture.
  */
@@ -2167,23 +2191,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
                return -EINVAL;
        }
 
-       /* blow away mappings if mapped through GTT */
-       i915_gem_release_mmap(obj);
-
-       /* Move the object to the CPU domain to ensure that
-        * any possible CPU writes while it's not in the GTT
-        * are flushed when we go to remap it. This will
-        * also ensure that all pending GPU writes are finished
-        * before we unbind.
-        */
-       ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       ret = i915_gem_object_finish_gpu(obj);
        if (ret == -ERESTARTSYS)
                return ret;
        /* Continue on if we fail due to EIO, the GPU is hung so we
         * should be safe and we need to cleanup or else we might
         * cause memory corruption through use-after-free.
         */
+
+       i915_gem_object_finish_gtt(obj);
+
+       /* Move the object to the CPU domain to ensure that
+        * any possible CPU writes while it's not in the GTT
+        * are flushed when we go to remap it.
+        */
+       if (ret == 0)
+               ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+       if (ret == -ERESTARTSYS)
+               return ret;
        if (ret) {
+               /* In the event of a disaster, abandon all caches and
+                * hope for the best.
+                */
                i915_gem_clflush_object(obj);
                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
@@ -3005,51 +3034,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        return 0;
 }
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+                                   enum i915_cache_level cache_level)
+{
+       int ret;
+
+       if (obj->cache_level == cache_level)
+               return 0;
+
+       if (obj->pin_count) {
+               DRM_DEBUG("can not change the cache level of pinned objects\n");
+               return -EBUSY;
+       }
+
+       if (obj->gtt_space) {
+               ret = i915_gem_object_finish_gpu(obj);
+               if (ret)
+                       return ret;
+
+               i915_gem_object_finish_gtt(obj);
+
+               /* Before SandyBridge, you could not use tiling or fence
+                * registers with snooped memory, so relinquish any fences
+                * currently pointing to our region in the aperture.
+                */
+               if (INTEL_INFO(obj->base.dev)->gen < 6) {
+                       ret = i915_gem_object_put_fence(obj);
+                       if (ret)
+                               return ret;
+               }
+
+               i915_gem_gtt_rebind_object(obj, cache_level);
+       }
+
+       if (cache_level == I915_CACHE_NONE) {
+               u32 old_read_domains, old_write_domain;
+
+               /* If we're coming from LLC cached, then we haven't
+                * actually been tracking whether the data is in the
+                * CPU cache or not, since we only allow one bit set
+                * in obj->write_domain and have been skipping the clflushes.
+                * Just set it to the CPU cache for now.
+                */
+               WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+               WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+               old_read_domains = obj->base.read_domains;
+               old_write_domain = obj->base.write_domain;
+
+               obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+               obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   old_read_domains,
+                                                   old_write_domain);
+       }
+
+       obj->cache_level = cache_level;
+       return 0;
+}
+
 /*
- * Prepare buffer for display plane. Use uninterruptible for possible flush
- * wait, as in modesetting process we're not supposed to be interrupted.
+ * Prepare buffer for display plane (scanout, cursors, etc).
+ * Can be called from an uninterruptible phase (modesetting) and allows
+ * any flushes to be pipelined (for pageflips).
+ *
+ * For the display plane, we want to be in the GTT but out of any write
+ * domains. So in many ways this looks like set_to_gtt_domain() apart from the
+ * ability to pipeline the waits, pinning and any additional subtleties
+ * that may differentiate the display plane from ordinary buffers.
  */
 int
-i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+                                    u32 alignment,
                                     struct intel_ring_buffer *pipelined)
 {
-       uint32_t old_read_domains;
+       u32 old_read_domains, old_write_domain;
        int ret;
 
-       /* Not valid to be called on unbound objects. */
-       if (obj->gtt_space == NULL)
-               return -EINVAL;
-
        ret = i915_gem_object_flush_gpu_write_domain(obj);
        if (ret)
                return ret;
 
-
-       /* Currently, we are always called from an non-interruptible context. */
        if (pipelined != obj->ring) {
                ret = i915_gem_object_wait_rendering(obj);
                if (ret)
                        return ret;
        }
 
+       /* The display engine is not coherent with the LLC cache on gen6.  As
+        * a result, we make sure that the pinning that is about to occur is
+        * done with uncached PTEs. This is lowest common denominator for all
+        * chipsets.
+        *
+        * However for gen6+, we could do better by using the GFDT bit instead
+        * of uncaching, which would allow us to flush all the LLC-cached data
+        * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+        */
+       ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+       if (ret)
+               return ret;
+
+       /* As the user may map the buffer once pinned in the display plane
+        * (e.g. libkms for the bootup splash), we have to ensure that we
+        * always use map_and_fenceable for all scanout buffers.
+        */
+       ret = i915_gem_object_pin(obj, alignment, true);
+       if (ret)
+               return ret;
+
        i915_gem_object_flush_cpu_write_domain(obj);
 
+       old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 
        trace_i915_gem_object_change_domain(obj,
                                            old_read_domains,
-                                           obj->base.write_domain);
+                                           old_write_domain);
 
        return 0;
 }
 
 int
-i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 {
        int ret;
 
-       if (!obj->active)
+       if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
                return 0;
 
        if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
@@ -3058,6 +3175,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
                        return ret;
        }
 
+       /* Ensure that we invalidate the GPU's caches and TLBs. */
+       obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
+
        return i915_gem_object_wait_rendering(obj);
 }
 
@@ -3580,7 +3700,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-       obj->cache_level = I915_CACHE_NONE;
+       if (IS_GEN6(dev)) {
+               /* On Gen6, we can have the GPU use the LLC (the CPU
+                * cache) for about a 10% performance improvement
+                * compared to uncached.  Graphics requests other than
+                * display scanout are coherent with the CPU in
+                * accessing this cache.  This means in this mode we
+                * don't need to clflush on the CPU side, and on the
+                * GPU side we only need to flush internal caches to
+                * get data visible to the CPU.
+                *
+                * However, we maintain the display planes as UC, and so
+                * need to rebind when first used as such.
+                */
+               obj->cache_level = I915_CACHE_LLC;
+       } else
+               obj->cache_level = I915_CACHE_NONE;
+
        obj->base.driver_private = NULL;
        obj->fence_reg = I915_FENCE_REG_NONE;
        INIT_LIST_HEAD(&obj->mm_list);
index e46b645773cfcb7ca65352996d49761a8cf7872f..7a709cd8d543e61e09233400537cf21074c17dd3 100644 (file)
@@ -59,24 +59,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
                              (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
 
        list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
-               unsigned int agp_type =
-                       cache_level_to_agp_type(dev, obj->cache_level);
-
                i915_gem_clflush_object(obj);
-
-               if (dev_priv->mm.gtt->needs_dmar) {
-                       BUG_ON(!obj->sg_list);
-
-                       intel_gtt_insert_sg_entries(obj->sg_list,
-                                                   obj->num_sg,
-                                                   obj->gtt_space->start >> PAGE_SHIFT,
-                                                   agp_type);
-               } else
-                       intel_gtt_insert_pages(obj->gtt_space->start
-                                                  >> PAGE_SHIFT,
-                                              obj->base.size >> PAGE_SHIFT,
-                                              obj->pages,
-                                              agp_type);
+               i915_gem_gtt_rebind_object(obj, obj->cache_level);
        }
 
        intel_gtt_chipset_flush();
@@ -110,6 +94,27 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
        return 0;
 }
 
+void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
+                               enum i915_cache_level cache_level)
+{
+       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
+
+       if (dev_priv->mm.gtt->needs_dmar) {
+               BUG_ON(!obj->sg_list);
+
+               intel_gtt_insert_sg_entries(obj->sg_list,
+                                           obj->num_sg,
+                                           obj->gtt_space->start >> PAGE_SHIFT,
+                                           agp_type);
+       } else
+               intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
+                                      obj->base.size >> PAGE_SHIFT,
+                                      obj->pages,
+                                      agp_type);
+}
+
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
        intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
index aa43e7be6053bdfd6dc6fd7274d17ccd544b799f..86a3ec1469ba4318e92d60b587a0418af32bf4d6 100644 (file)
@@ -1812,14 +1812,10 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
        }
 
        dev_priv->mm.interruptible = false;
-       ret = i915_gem_object_pin(obj, alignment, true);
+       ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
        if (ret)
                goto err_interruptible;
 
-       ret = i915_gem_object_set_to_display_plane(obj, pipelined);
-       if (ret)
-               goto err_unpin;
-
        /* Install a fence for tiled scan-out. Pre-i965 always needs a
         * fence, whereas 965+ only requires a fence if using
         * framebuffer compression.  For simplicity, we always install
@@ -1971,7 +1967,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                 * This should only fail upon a hung GPU, in which case we
                 * can safely continue.
                 */
-               ret = i915_gem_object_flush_gpu(obj);
+               ret = i915_gem_object_finish_gpu(obj);
                (void) ret;
        }
 
@@ -5434,21 +5430,15 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
                        goto fail_locked;
                }
 
-               ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
-               if (ret) {
-                       DRM_ERROR("failed to pin cursor bo\n");
-                       goto fail_locked;
-               }
-
-               ret = i915_gem_object_set_to_gtt_domain(obj, 0);
+               ret = i915_gem_object_pin_to_display_plane(obj, 0, NULL);
                if (ret) {
                        DRM_ERROR("failed to move cursor bo into the GTT\n");
-                       goto fail_unpin;
+                       goto fail_locked;
                }
 
                ret = i915_gem_object_put_fence(obj);
                if (ret) {
-                       DRM_ERROR("failed to move cursor bo into the GTT\n");
+                       DRM_ERROR("failed to release fence for cursor");
                        goto fail_unpin;
                }
 
index a670c006982e5056e418f81aaec1d439a589a7ae..fcf6fcb0b482d29f613bbd9deee9c173b1590d7d 100644 (file)
@@ -773,14 +773,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
        if (ret != 0)
                return ret;
 
-       ret = i915_gem_object_pin(new_bo, PAGE_SIZE, true);
+       ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
        if (ret != 0)
                return ret;
 
-       ret = i915_gem_object_set_to_gtt_domain(new_bo, 0);
-       if (ret != 0)
-               goto out_unpin;
-
        ret = i915_gem_object_put_fence(new_bo);
        if (ret)
                goto out_unpin;
index 95c4b1429935d562a6ee54f464f0bd52ba3ae6ba..e9615685a39cd771239e2a0d087b7128327d9575 100644 (file)
@@ -236,7 +236,8 @@ init_pipe_control(struct intel_ring_buffer *ring)
                ret = -ENOMEM;
                goto err;
        }
-       obj->cache_level = I915_CACHE_LLC;
+
+       i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
        ret = i915_gem_object_pin(obj, 4096, true);
        if (ret)
@@ -776,7 +777,8 @@ static int init_status_page(struct intel_ring_buffer *ring)
                ret = -ENOMEM;
                goto err;
        }
-       obj->cache_level = I915_CACHE_LLC;
+
+       i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
        ret = i915_gem_object_pin(obj, 4096, true);
        if (ret != 0) {