drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "drmP.h"
  29 #include "drm.h"
  30 #include "i915_drm.h"
  31 #include "i915_drv.h"
  32 #include "i915_trace.h"
  33 #include "intel_drv.h"
  34 #include <linux/shmem_fs.h>
  35 #include <linux/slab.h>
  36 #include <linux/swap.h>
  37 #include <linux/pci.h>
  38 #include <linux/dma-buf.h>
  39
  40 static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
  41 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  42 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  43 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
  44                                                     unsigned alignment,
  45                                                     bool map_and_fenceable);
  46 static int i915_gem_phys_pwrite(struct drm_device *dev,
  47                                 struct drm_i915_gem_object *obj,
  48                                 struct drm_i915_gem_pwrite *args,
  49                                 struct drm_file *file);
  50
  51 static void i915_gem_write_fence(struct drm_device *dev, int reg,
  52                                  struct drm_i915_gem_object *obj);
  53 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
  54                                          struct drm_i915_fence_reg *fence,
  55                                          bool enable);
  56
  57 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
  58                                     struct shrink_control *sc);
  59 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
  60
  61 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
  62 {
  63         if (obj->tiling_mode)
  64                 i915_gem_release_mmap(obj);
  65
  66         /* As we do not have an associated fence register, we will force
  67          * a tiling change if we ever need to acquire one.
  68          */
  69         obj->fence_dirty = false;
  70         obj->fence_reg = I915_FENCE_REG_NONE;
  71 }
  72
  73 /* some bookkeeping */
  74 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  75                                   size_t size)
  76 {
  77         dev_priv->mm.object_count++;
  78         dev_priv->mm.object_memory += size;
  79 }
  80
  81 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  82                                      size_t size)
  83 {
  84         dev_priv->mm.object_count--;
  85         dev_priv->mm.object_memory -= size;
  86 }
  87
  88 static int
  89 i915_gem_wait_for_error(struct drm_device *dev)
  90 {
  91         struct drm_i915_private *dev_priv = dev->dev_private;
  92         struct completion *x = &dev_priv->error_completion;
  93         unsigned long flags;
  94         int ret;
  95
  96         if (!atomic_read(&dev_priv->mm.wedged))
  97                 return 0;
  98
  99         ret = wait_for_completion_interruptible(x);
 100         if (ret)
 101                 return ret;
 102
 103         if (atomic_read(&dev_priv->mm.wedged)) {
 104                 /* GPU is hung, bump the completion count to account for
 105                  * the token we just consumed so that we never hit zero and
 106                  * end up waiting upon a subsequent completion event that
 107                  * will never happen.
 108                  */
 109                 spin_lock_irqsave(&x->wait.lock, flags);
 110                 x->done++;
 111                 spin_unlock_irqrestore(&x->wait.lock, flags);
 112         }
 113         return 0;
 114 }
 115
 116 int i915_mutex_lock_interruptible(struct drm_device *dev)
 117 {
 118         int ret;
 119
 120         ret = i915_gem_wait_for_error(dev);
 121         if (ret)
 122                 return ret;
 123
 124         ret = mutex_lock_interruptible(&dev->struct_mutex);
 125         if (ret)
 126                 return ret;
 127
 128         WARN_ON(i915_verify_lists(dev));
 129         return 0;
 130 }
 131
 132 static inline bool
 133 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
 134 {
 135         return !obj->active;
 136 }
 137
 138 int
 139 i915_gem_init_ioctl(struct drm_device *dev, void *data,
 140                     struct drm_file *file)
 141 {
 142         struct drm_i915_gem_init *args = data;
 143
 144         if (drm_core_check_feature(dev, DRIVER_MODESET))
 145                 return -ENODEV;
 146
 147         if (args->gtt_start >= args->gtt_end ||
 148             (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
 149                 return -EINVAL;
 150
 151         /* GEM with user mode setting was never supported on ilk and later. */
 152         if (INTEL_INFO(dev)->gen >= 5)
 153                 return -ENODEV;
 154
 155         mutex_lock(&dev->struct_mutex);
 156         i915_gem_init_global_gtt(dev, args->gtt_start,
 157                                  args->gtt_end, args->gtt_end);
 158         mutex_unlock(&dev->struct_mutex);
 159
 160         return 0;
 161 }
 162
 163 int
 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 165                             struct drm_file *file)
 166 {
 167         struct drm_i915_private *dev_priv = dev->dev_private;
 168         struct drm_i915_gem_get_aperture *args = data;
 169         struct drm_i915_gem_object *obj;
 170         size_t pinned;
 171
 172         pinned = 0;
 173         mutex_lock(&dev->struct_mutex);
 174         list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
 175                 if (obj->pin_count)
 176                         pinned += obj->gtt_space->size;
 177         mutex_unlock(&dev->struct_mutex);
 178
 179         args->aper_size = dev_priv->mm.gtt_total;
 180         args->aper_available_size = args->aper_size - pinned;
 181
 182         return 0;
 183 }
 184
 185 static int
 186 i915_gem_create(struct drm_file *file,
 187                 struct drm_device *dev,
 188                 uint64_t size,
 189                 uint32_t *handle_p)
 190 {
 191         struct drm_i915_gem_object *obj;
 192         int ret;
 193         u32 handle;
 194
 195         size = roundup(size, PAGE_SIZE);
 196         if (size == 0)
 197                 return -EINVAL;
 198
 199         /* Allocate the new object */
 200         obj = i915_gem_alloc_object(dev, size);
 201         if (obj == NULL)
 202                 return -ENOMEM;
 203
 204         ret = drm_gem_handle_create(file, &obj->base, &handle);
 205         if (ret) {
 206                 drm_gem_object_release(&obj->base);
 207                 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
 208                 kfree(obj);
 209                 return ret;
 210         }
 211
 212         /* drop reference from allocate - handle holds it now */
 213         drm_gem_object_unreference(&obj->base);
 214         trace_i915_gem_object_create(obj);
 215
 216         *handle_p = handle;
 217         return 0;
 218 }
 219
 220 int
 221 i915_gem_dumb_create(struct drm_file *file,
 222                      struct drm_device *dev,
 223                      struct drm_mode_create_dumb *args)
 224 {
 225         /* have to work out size/pitch and return them */
 226         args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 227         args->size = args->pitch * args->height;
 228         return i915_gem_create(file, dev,
 229                                args->size, &args->handle);
 230 }
 231
 232 int i915_gem_dumb_destroy(struct drm_file *file,
 233                           struct drm_device *dev,
 234                           uint32_t handle)
 235 {
 236         return drm_gem_handle_delete(file, handle);
 237 }
 238
 239 /**
 240  * Creates a new mm object and returns a handle to it.
 241  */
 242 int
 243 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 244                       struct drm_file *file)
 245 {
 246         struct drm_i915_gem_create *args = data;
 247
 248         return i915_gem_create(file, dev,
 249                                args->size, &args->handle);
 250 }
 251
 252 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 253 {
 254         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 255
 256         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 257                 obj->tiling_mode != I915_TILING_NONE;
 258 }
 259
 260 static inline int
 261 __copy_to_user_swizzled(char __user *cpu_vaddr,
 262                         const char *gpu_vaddr, int gpu_offset,
 263                         int length)
 264 {
 265         int ret, cpu_offset = 0;
 266
 267         while (length > 0) {
 268                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 269                 int this_length = min(cacheline_end - gpu_offset, length);
 270                 int swizzled_gpu_offset = gpu_offset ^ 64;
 271
 272                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
 273                                      gpu_vaddr + swizzled_gpu_offset,
 274                                      this_length);
 275                 if (ret)
 276                         return ret + length;
 277
 278                 cpu_offset += this_length;
 279                 gpu_offset += this_length;
 280                 length -= this_length;
 281         }
 282
 283         return 0;
 284 }
 285
 286 static inline int
 287 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 288                           const char __user *cpu_vaddr,
 289                           int length)
 290 {
 291         int ret, cpu_offset = 0;
 292
 293         while (length > 0) {
 294                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 295                 int this_length = min(cacheline_end - gpu_offset, length);
 296                 int swizzled_gpu_offset = gpu_offset ^ 64;
 297
 298                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 299                                        cpu_vaddr + cpu_offset,
 300                                        this_length);
 301                 if (ret)
 302                         return ret + length;
 303
 304                 cpu_offset += this_length;
 305                 gpu_offset += this_length;
 306                 length -= this_length;
 307         }
 308
 309         return 0;
 310 }
 311
 312 /* Per-page copy function for the shmem pread fastpath.
 313  * Flushes invalid cachelines before reading the target if
 314  * needs_clflush is set. */
 315 static int
 316 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
 317                  char __user *user_data,
 318                  bool page_do_bit17_swizzling, bool needs_clflush)
 319 {
 320         char *vaddr;
 321         int ret;
 322
 323         if (unlikely(page_do_bit17_swizzling))
 324                 return -EINVAL;
 325
 326         vaddr = kmap_atomic(page);
 327         if (needs_clflush)
 328                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 329                                        page_length);
 330         ret = __copy_to_user_inatomic(user_data,
 331                                       vaddr + shmem_page_offset,
 332                                       page_length);
 333         kunmap_atomic(vaddr);
 334
 335         return ret;
 336 }
 337
 338 static void
 339 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 340                              bool swizzled)
 341 {
 342         if (unlikely(swizzled)) {
 343                 unsigned long start = (unsigned long) addr;
 344                 unsigned long end = (unsigned long) addr + length;
 345
 346                 /* For swizzling simply ensure that we always flush both
 347                  * channels. Lame, but simple and it works. Swizzled
 348                  * pwrite/pread is far from a hotpath - current userspace
 349                  * doesn't use it at all. */
 350                 start = round_down(start, 128);
 351                 end = round_up(end, 128);
 352
 353                 drm_clflush_virt_range((void *)start, end - start);
 354         } else {
 355                 drm_clflush_virt_range(addr, length);
 356         }
 357
 358 }
 359
 360 /* Only difference to the fast-path function is that this can handle bit17
 361  * and uses non-atomic copy and kmap functions. */
 362 static int
 363 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 364                  char __user *user_data,
 365                  bool page_do_bit17_swizzling, bool needs_clflush)
 366 {
 367         char *vaddr;
 368         int ret;
 369
 370         vaddr = kmap(page);
 371         if (needs_clflush)
 372                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 373                                              page_length,
 374                                              page_do_bit17_swizzling);
 375
 376         if (page_do_bit17_swizzling)
 377                 ret = __copy_to_user_swizzled(user_data,
 378                                               vaddr, shmem_page_offset,
 379                                               page_length);
 380         else
 381                 ret = __copy_to_user(user_data,
 382                                      vaddr + shmem_page_offset,
 383                                      page_length);
 384         kunmap(page);
 385
 386         return ret;
 387 }
 388
 389 static int
 390 i915_gem_shmem_pread(struct drm_device *dev,
 391                      struct drm_i915_gem_object *obj,
 392                      struct drm_i915_gem_pread *args,
 393                      struct drm_file *file)
 394 {
 395         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 396         char __user *user_data;
 397         ssize_t remain;
 398         loff_t offset;
 399         int shmem_page_offset, page_length, ret = 0;
 400         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 401         int hit_slowpath = 0;
 402         int prefaulted = 0;
 403         int needs_clflush = 0;
 404         int release_page;
 405
 406         user_data = (char __user *) (uintptr_t) args->data_ptr;
 407         remain = args->size;
 408
 409         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 410
 411         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
 412                 /* If we're not in the cpu read domain, set ourself into the gtt
 413                  * read domain and manually flush cachelines (if required). This
 414                  * optimizes for the case when the gpu will dirty the data
 415                  * anyway again before the next pread happens. */
 416                 if (obj->cache_level == I915_CACHE_NONE)
 417                         needs_clflush = 1;
 418                 ret = i915_gem_object_set_to_gtt_domain(obj, false);
 419                 if (ret)
 420                         return ret;
 421         }
 422
 423         offset = args->offset;
 424
 425         while (remain > 0) {
 426                 struct page *page;
 427
 428                 /* Operation in this page
 429                  *
 430                  * shmem_page_offset = offset within page in shmem file
 431                  * page_length = bytes to copy for this page
 432                  */
 433                 shmem_page_offset = offset_in_page(offset);
 434                 page_length = remain;
 435                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 436                         page_length = PAGE_SIZE - shmem_page_offset;
 437
 438                 if (obj->pages) {
 439                         page = obj->pages[offset >> PAGE_SHIFT];
 440                         release_page = 0;
 441                 } else {
 442                         page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 443                         if (IS_ERR(page)) {
 444                                 ret = PTR_ERR(page);
 445                                 goto out;
 446                         }
 447                         release_page = 1;
 448                 }
 449
 450                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 451                         (page_to_phys(page) & (1 << 17)) != 0;
 452
 453                 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
 454                                        user_data, page_do_bit17_swizzling,
 455                                        needs_clflush);
 456                 if (ret == 0)
 457                         goto next_page;
 458
 459                 hit_slowpath = 1;
 460                 page_cache_get(page);
 461                 mutex_unlock(&dev->struct_mutex);
 462
 463                 if (!prefaulted) {
 464                         ret = fault_in_multipages_writeable(user_data, remain);
 465                         /* Userspace is tricking us, but we've already clobbered
 466                          * its pages with the prefault and promised to write the
 467                          * data up to the first fault. Hence ignore any errors
 468                          * and just continue. */
 469                         (void)ret;
 470                         prefaulted = 1;
 471                 }
 472
 473                 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
 474                                        user_data, page_do_bit17_swizzling,
 475                                        needs_clflush);
 476
 477                 mutex_lock(&dev->struct_mutex);
 478                 page_cache_release(page);
 479 next_page:
 480                 mark_page_accessed(page);
 481                 if (release_page)
 482                         page_cache_release(page);
 483
 484                 if (ret) {
 485                         ret = -EFAULT;
 486                         goto out;
 487                 }
 488
 489                 remain -= page_length;
 490                 user_data += page_length;
 491                 offset += page_length;
 492         }
 493
 494 out:
 495         if (hit_slowpath) {
 496                 /* Fixup: Kill any reinstated backing storage pages */
 497                 if (obj->madv == __I915_MADV_PURGED)
 498                         i915_gem_object_truncate(obj);
 499         }
 500
 501         return ret;
 502 }
 503
 504 /**
 505  * Reads data from the object referenced by handle.
 506  *
 507  * On error, the contents of *data are undefined.
 508  */
 509 int
 510 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 511                      struct drm_file *file)
 512 {
 513         struct drm_i915_gem_pread *args = data;
 514         struct drm_i915_gem_object *obj;
 515         int ret = 0;
 516
 517         if (args->size == 0)
 518                 return 0;
 519
 520         if (!access_ok(VERIFY_WRITE,
 521                        (char __user *)(uintptr_t)args->data_ptr,
 522                        args->size))
 523                 return -EFAULT;
 524
 525         ret = i915_mutex_lock_interruptible(dev);
 526         if (ret)
 527                 return ret;
 528
 529         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 530         if (&obj->base == NULL) {
 531                 ret = -ENOENT;
 532                 goto unlock;
 533         }
 534
 535         /* Bounds check source.  */
 536         if (args->offset > obj->base.size ||
 537             args->size > obj->base.size - args->offset) {
 538                 ret = -EINVAL;
 539                 goto out;
 540         }
 541
 542         /* prime objects have no backing filp to GEM pread/pwrite
 543          * pages from.
 544          */
 545         if (!obj->base.filp) {
 546                 ret = -EINVAL;
 547                 goto out;
 548         }
 549
 550         trace_i915_gem_object_pread(obj, args->offset, args->size);
 551
 552         ret = i915_gem_shmem_pread(dev, obj, args, file);
 553
 554 out:
 555         drm_gem_object_unreference(&obj->base);
 556 unlock:
 557         mutex_unlock(&dev->struct_mutex);
 558         return ret;
 559 }
 560
 561 /* This is the fast write path which cannot handle
 562  * page faults in the source data
 563  */
 564
 565 static inline int
 566 fast_user_write(struct io_mapping *mapping,
 567                 loff_t page_base, int page_offset,
 568                 char __user *user_data,
 569                 int length)
 570 {
 571         void __iomem *vaddr_atomic;
 572         void *vaddr;
 573         unsigned long unwritten;
 574
 575         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 576         /* We can use the cpu mem copy function because this is X86. */
 577         vaddr = (void __force*)vaddr_atomic + page_offset;
 578         unwritten = __copy_from_user_inatomic_nocache(vaddr,
 579                                                       user_data, length);
 580         io_mapping_unmap_atomic(vaddr_atomic);
 581         return unwritten;
 582 }
 583
 584 /**
 585  * This is the fast pwrite path, where we copy the data directly from the
 586  * user into the GTT, uncached.
 587  */
 588 static int
 589 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 590                          struct drm_i915_gem_object *obj,
 591                          struct drm_i915_gem_pwrite *args,
 592                          struct drm_file *file)
 593 {
 594         drm_i915_private_t *dev_priv = dev->dev_private;
 595         ssize_t remain;
 596         loff_t offset, page_base;
 597         char __user *user_data;
 598         int page_offset, page_length, ret;
 599
 600         ret = i915_gem_object_pin(obj, 0, true);
 601         if (ret)
 602                 goto out;
 603
 604         ret = i915_gem_object_set_to_gtt_domain(obj, true);
 605         if (ret)
 606                 goto out_unpin;
 607
 608         ret = i915_gem_object_put_fence(obj);
 609         if (ret)
 610                 goto out_unpin;
 611
 612         user_data = (char __user *) (uintptr_t) args->data_ptr;
 613         remain = args->size;
 614
 615         offset = obj->gtt_offset + args->offset;
 616
 617         while (remain > 0) {
 618                 /* Operation in this page
 619                  *
 620                  * page_base = page offset within aperture
 621                  * page_offset = offset within page
 622                  * page_length = bytes to copy for this page
 623                  */
 624                 page_base = offset & PAGE_MASK;
 625                 page_offset = offset_in_page(offset);
 626                 page_length = remain;
 627                 if ((page_offset + remain) > PAGE_SIZE)
 628                         page_length = PAGE_SIZE - page_offset;
 629
 630                 /* If we get a fault while copying data, then (presumably) our
 631                  * source page isn't available.  Return the error and we'll
 632                  * retry in the slow path.
 633                  */
 634                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 635                                     page_offset, user_data, page_length)) {
 636                         ret = -EFAULT;
 637                         goto out_unpin;
 638                 }
 639
 640                 remain -= page_length;
 641                 user_data += page_length;
 642                 offset += page_length;
 643         }
 644
 645 out_unpin:
 646         i915_gem_object_unpin(obj);
 647 out:
 648         return ret;
 649 }
 650
 651 /* Per-page copy function for the shmem pwrite fastpath.
 652  * Flushes invalid cachelines before writing to the target if
 653  * needs_clflush_before is set and flushes out any written cachelines after
 654  * writing if needs_clflush is set. */
 655 static int
 656 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
 657                   char __user *user_data,
 658                   bool page_do_bit17_swizzling,
 659                   bool needs_clflush_before,
 660                   bool needs_clflush_after)
 661 {
 662         char *vaddr;
 663         int ret;
 664
 665         if (unlikely(page_do_bit17_swizzling))
 666                 return -EINVAL;
 667
 668         vaddr = kmap_atomic(page);
 669         if (needs_clflush_before)
 670                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 671                                        page_length);
 672         ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
 673                                                 user_data,
 674                                                 page_length);
 675         if (needs_clflush_after)
 676                 drm_clflush_virt_range(vaddr + shmem_page_offset,
 677                                        page_length);
 678         kunmap_atomic(vaddr);
 679
 680         return ret;
 681 }
 682
 683 /* Only difference to the fast-path function is that this can handle bit17
 684  * and uses non-atomic copy and kmap functions. */
 685 static int
 686 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 687                   char __user *user_data,
 688                   bool page_do_bit17_swizzling,
 689                   bool needs_clflush_before,
 690                   bool needs_clflush_after)
 691 {
 692         char *vaddr;
 693         int ret;
 694
 695         vaddr = kmap(page);
 696         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
 697                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 698                                              page_length,
 699                                              page_do_bit17_swizzling);
 700         if (page_do_bit17_swizzling)
 701                 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
 702                                                 user_data,
 703                                                 page_length);
 704         else
 705                 ret = __copy_from_user(vaddr + shmem_page_offset,
 706                                        user_data,
 707                                        page_length);
 708         if (needs_clflush_after)
 709                 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
 710                                              page_length,
 711                                              page_do_bit17_swizzling);
 712         kunmap(page);
 713
 714         return ret;
 715 }
 716
 717 static int
 718 i915_gem_shmem_pwrite(struct drm_device *dev,
 719                       struct drm_i915_gem_object *obj,
 720                       struct drm_i915_gem_pwrite *args,
 721                       struct drm_file *file)
 722 {
 723         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 724         ssize_t remain;
 725         loff_t offset;
 726         char __user *user_data;
 727         int shmem_page_offset, page_length, ret = 0;
 728         int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 729         int hit_slowpath = 0;
 730         int needs_clflush_after = 0;
 731         int needs_clflush_before = 0;
 732         int release_page;
 733
 734         user_data = (char __user *) (uintptr_t) args->data_ptr;
 735         remain = args->size;
 736
 737         obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 738
 739         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 740                 /* If we're not in the cpu write domain, set ourself into the gtt
 741                  * write domain and manually flush cachelines (if required). This
 742                  * optimizes for the case when the gpu will use the data
 743                  * right away and we therefore have to clflush anyway. */
 744                 if (obj->cache_level == I915_CACHE_NONE)
 745                         needs_clflush_after = 1;
 746                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
 747                 if (ret)
 748                         return ret;
 749         }
 750         /* Same trick applies for invalidate partially written cachelines before
 751          * writing.  */
 752         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
 753             && obj->cache_level == I915_CACHE_NONE)
 754                 needs_clflush_before = 1;
 755
 756         offset = args->offset;
 757         obj->dirty = 1;
 758
 759         while (remain > 0) {
 760                 struct page *page;
 761                 int partial_cacheline_write;
 762
 763                 /* Operation in this page
 764                  *
 765                  * shmem_page_offset = offset within page in shmem file
 766                  * page_length = bytes to copy for this page
 767                  */
 768                 shmem_page_offset = offset_in_page(offset);
 769
 770                 page_length = remain;
 771                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 772                         page_length = PAGE_SIZE - shmem_page_offset;
 773
 774                 /* If we don't overwrite a cacheline completely we need to be
 775                  * careful to have up-to-date data by first clflushing. Don't
 776                  * overcomplicate things and flush the entire patch. */
 777                 partial_cacheline_write = needs_clflush_before &&
 778                         ((shmem_page_offset | page_length)
 779                                 & (boot_cpu_data.x86_clflush_size - 1));
 780
 781                 if (obj->pages) {
 782                         page = obj->pages[offset >> PAGE_SHIFT];
 783                         release_page = 0;
 784                 } else {
 785                         page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 786                         if (IS_ERR(page)) {
 787                                 ret = PTR_ERR(page);
 788                                 goto out;
 789                         }
 790                         release_page = 1;
 791                 }
 792
 793                 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 794                         (page_to_phys(page) & (1 << 17)) != 0;
 795
 796                 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
 797                                         user_data, page_do_bit17_swizzling,
 798                                         partial_cacheline_write,
 799                                         needs_clflush_after);
 800                 if (ret == 0)
 801                         goto next_page;
 802
 803                 hit_slowpath = 1;
 804                 page_cache_get(page);
 805                 mutex_unlock(&dev->struct_mutex);
 806
 807                 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
 808                                         user_data, page_do_bit17_swizzling,
 809                                         partial_cacheline_write,
 810                                         needs_clflush_after);
 811
 812                 mutex_lock(&dev->struct_mutex);
 813                 page_cache_release(page);
 814 next_page:
 815                 set_page_dirty(page);
 816                 mark_page_accessed(page);
 817                 if (release_page)
 818                         page_cache_release(page);
 819
 820                 if (ret) {
 821                         ret = -EFAULT;
 822                         goto out;
 823                 }
 824
 825                 remain -= page_length;
 826                 user_data += page_length;
 827                 offset += page_length;
 828         }
 829
 830 out:
 831         if (hit_slowpath) {
 832                 /* Fixup: Kill any reinstated backing storage pages */
 833                 if (obj->madv == __I915_MADV_PURGED)
 834                         i915_gem_object_truncate(obj);
 835                 /* and flush dirty cachelines in case the object isn't in the cpu write
 836                  * domain anymore. */
 837                 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 838                         i915_gem_clflush_object(obj);
 839                         intel_gtt_chipset_flush();
 840                 }
 841         }
 842
 843         if (needs_clflush_after)
 844                 intel_gtt_chipset_flush();
 845
 846         return ret;
 847 }
 848
 849 /**
 850  * Writes data to the object referenced by handle.
 851  *
 852  * On error, the contents of the buffer that were to be modified are undefined.
 853  */
 854 int
 855 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 856                       struct drm_file *file)
 857 {
 858         struct drm_i915_gem_pwrite *args = data;
 859         struct drm_i915_gem_object *obj;
 860         int ret;
 861
 862         if (args->size == 0)
 863                 return 0;
 864
 865         if (!access_ok(VERIFY_READ,
 866                        (char __user *)(uintptr_t)args->data_ptr,
 867                        args->size))
 868                 return -EFAULT;
 869
 870         ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
 871                                            args->size);
 872         if (ret)
 873                 return -EFAULT;
 874
 875         ret = i915_mutex_lock_interruptible(dev);
 876         if (ret)
 877                 return ret;
 878
 879         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 880         if (&obj->base == NULL) {
 881                 ret = -ENOENT;
 882                 goto unlock;
 883         }
 884
 885         /* Bounds check destination. */
 886         if (args->offset > obj->base.size ||
 887             args->size > obj->base.size - args->offset) {
 888                 ret = -EINVAL;
 889                 goto out;
 890         }
 891
 892         /* prime objects have no backing filp to GEM pread/pwrite
 893          * pages from.
 894          */
 895         if (!obj->base.filp) {
 896                 ret = -EINVAL;
 897                 goto out;
 898         }
 899
 900         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 901
 902         ret = -EFAULT;
 903         /* We can only do the GTT pwrite on untiled buffers, as otherwise
 904          * it would end up going through the fenced access, and we'll get
 905          * different detiling behavior between reading and writing.
 906          * pread/pwrite currently are reading and writing from the CPU
 907          * perspective, requiring manual detiling by the client.
 908          */
 909         if (obj->phys_obj) {
 910                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
 911                 goto out;
 912         }
 913
 914         if (obj->gtt_space &&
 915             obj->cache_level == I915_CACHE_NONE &&
 916             obj->tiling_mode == I915_TILING_NONE &&
 917             obj->map_and_fenceable &&
 918             obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 919                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 920                 /* Note that the gtt paths might fail with non-page-backed user
 921                  * pointers (e.g. gtt mappings when moving data between
 922                  * textures). Fallback to the shmem path in that case. */
 923         }
 924
 925         if (ret == -EFAULT)
 926                 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
 927
 928 out:
 929         drm_gem_object_unreference(&obj->base);
 930 unlock:
 931         mutex_unlock(&dev->struct_mutex);
 932         return ret;
 933 }
 934
 935 /**
 936  * Called when user space prepares to use an object with the CPU, either
 937  * through the mmap ioctl's mapping or a GTT mapping.
 938  */
 939 int
 940 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 941                           struct drm_file *file)
 942 {
 943         struct drm_i915_gem_set_domain *args = data;
 944         struct drm_i915_gem_object *obj;
 945         uint32_t read_domains = args->read_domains;
 946         uint32_t write_domain = args->write_domain;
 947         int ret;
 948
 949         /* Only handle setting domains to types used by the CPU. */
 950         if (write_domain & I915_GEM_GPU_DOMAINS)
 951                 return -EINVAL;
 952
 953         if (read_domains & I915_GEM_GPU_DOMAINS)
 954                 return -EINVAL;
 955
 956         /* Having something in the write domain implies it's in the read
 957          * domain, and only that read domain.  Enforce that in the request.
 958          */
 959         if (write_domain != 0 && read_domains != write_domain)
 960                 return -EINVAL;
 961
 962         ret = i915_mutex_lock_interruptible(dev);
 963         if (ret)
 964                 return ret;
 965
 966         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 967         if (&obj->base == NULL) {
 968                 ret = -ENOENT;
 969                 goto unlock;
 970         }
 971
 972         if (read_domains & I915_GEM_DOMAIN_GTT) {
 973                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 974
 975                 /* Silently promote "you're not bound, there was nothing to do"
 976                  * to success, since the client was just asking us to
 977                  * make sure everything was done.
 978                  */
 979                 if (ret == -EINVAL)
 980                         ret = 0;
 981         } else {
 982                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 983         }
 984
 985         drm_gem_object_unreference(&obj->base);
 986 unlock:
 987         mutex_unlock(&dev->struct_mutex);
 988         return ret;
 989 }
 990
 991 /**
 992  * Called when user space has done writes to this buffer
 993  */
 994 int
 995 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 996                          struct drm_file *file)
 997 {
 998         struct drm_i915_gem_sw_finish *args = data;
 999         struct drm_i915_gem_object *obj;
1000         int ret = 0;
1001
1002         ret = i915_mutex_lock_interruptible(dev);
1003         if (ret)
1004                 return ret;
1005
1006         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1007         if (&obj->base == NULL) {
1008                 ret = -ENOENT;
1009                 goto unlock;
1010         }
1011
1012         /* Pinned buffers may be scanout, so flush the cache */
1013         if (obj->pin_count)
1014                 i915_gem_object_flush_cpu_write_domain(obj);
1015
1016         drm_gem_object_unreference(&obj->base);
1017 unlock:
1018         mutex_unlock(&dev->struct_mutex);
1019         return ret;
1020 }
1021
1022 /**
1023  * Maps the contents of an object, returning the address it is mapped
1024  * into.
1025  *
1026  * While the mapping holds a reference on the contents of the object, it doesn't
1027  * imply a ref on the object itself.
1028  */
1029 int
1030 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1031                     struct drm_file *file)
1032 {
1033         struct drm_i915_gem_mmap *args = data;
1034         struct drm_gem_object *obj;
1035         unsigned long addr;
1036
1037         obj = drm_gem_object_lookup(dev, file, args->handle);
1038         if (obj == NULL)
1039                 return -ENOENT;
1040
1041         /* prime objects have no backing filp to GEM mmap
1042          * pages from.
1043          */
1044         if (!obj->filp) {
1045                 drm_gem_object_unreference_unlocked(obj);
1046                 return -EINVAL;
1047         }
1048
1049         addr = vm_mmap(obj->filp, 0, args->size,
1050                        PROT_READ | PROT_WRITE, MAP_SHARED,
1051                        args->offset);
1052         drm_gem_object_unreference_unlocked(obj);
1053         if (IS_ERR((void *)addr))
1054                 return addr;
1055
1056         args->addr_ptr = (uint64_t) addr;
1057
1058         return 0;
1059 }
1060
1061 /**
1062  * i915_gem_fault - fault a page into the GTT
1063  * vma: VMA in question
1064  * vmf: fault info
1065  *
1066  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1067  * from userspace.  The fault handler takes care of binding the object to
1068  * the GTT (if needed), allocating and programming a fence register (again,
1069  * only if needed based on whether the old reg is still valid or the object
1070  * is tiled) and inserting a new PTE into the faulting process.
1071  *
1072  * Note that the faulting process may involve evicting existing objects
1073  * from the GTT and/or fence registers to make room.  So performance may
1074  * suffer if the GTT working set is large or there are few fence registers
1075  * left.
1076  */
1077 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1078 {
1079         struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1080         struct drm_device *dev = obj->base.dev;
1081         drm_i915_private_t *dev_priv = dev->dev_private;
1082         pgoff_t page_offset;
1083         unsigned long pfn;
1084         int ret = 0;
1085         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1086
1087         /* We don't use vmf->pgoff since that has the fake offset */
1088         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1089                 PAGE_SHIFT;
1090
1091         ret = i915_mutex_lock_interruptible(dev);
1092         if (ret)
1093                 goto out;
1094
1095         trace_i915_gem_object_fault(obj, page_offset, true, write);
1096
1097         /* Now bind it into the GTT if needed */
1098         if (!obj->map_and_fenceable) {
1099                 ret = i915_gem_object_unbind(obj);
1100                 if (ret)
1101                         goto unlock;
1102         }
1103         if (!obj->gtt_space) {
1104                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1105                 if (ret)
1106                         goto unlock;
1107
1108                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1109                 if (ret)
1110                         goto unlock;
1111         }
1112
1113         if (!obj->has_global_gtt_mapping)
1114                 i915_gem_gtt_bind_object(obj, obj->cache_level);
1115
1116         ret = i915_gem_object_get_fence(obj);
1117         if (ret)
1118                 goto unlock;
1119
1120         if (i915_gem_object_is_inactive(obj))
1121                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1122
1123         obj->fault_mappable = true;
1124
1125         pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1126                 page_offset;
1127
1128         /* Finally, remap it using the new GTT offset */
1129         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1130 unlock:
1131         mutex_unlock(&dev->struct_mutex);
1132 out:
1133         switch (ret) {
1134         case -EIO:
1135         case -EAGAIN:
1136                 /* Give the error handler a chance to run and move the
1137                  * objects off the GPU active list. Next time we service the
1138                  * fault, we should be able to transition the page into the
1139                  * GTT without touching the GPU (and so avoid further
1140                  * EIO/EGAIN). If the GPU is wedged, then there is no issue
1141                  * with coherency, just lost writes.
1142                  */
1143                 set_need_resched();
1144         case 0:
1145         case -ERESTARTSYS:
1146         case -EINTR:
1147                 return VM_FAULT_NOPAGE;
1148         case -ENOMEM:
1149                 return VM_FAULT_OOM;
1150         default:
1151                 return VM_FAULT_SIGBUS;
1152         }
1153 }
1154
1155 /**
1156  * i915_gem_release_mmap - remove physical page mappings
1157  * @obj: obj in question
1158  *
1159  * Preserve the reservation of the mmapping with the DRM core code, but
1160  * relinquish ownership of the pages back to the system.
1161  *
1162  * It is vital that we remove the page mapping if we have mapped a tiled
1163  * object through the GTT and then lose the fence register due to
1164  * resource pressure. Similarly if the object has been moved out of the
1165  * aperture, than pages mapped into userspace must be revoked. Removing the
1166  * mapping will then trigger a page fault on the next user access, allowing
1167  * fixup by i915_gem_fault().
1168  */
1169 void
1170 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1171 {
1172         if (!obj->fault_mappable)
1173                 return;
1174
1175         if (obj->base.dev->dev_mapping)
1176                 unmap_mapping_range(obj->base.dev->dev_mapping,
1177                                     (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1178                                     obj->base.size, 1);
1179
1180         obj->fault_mappable = false;
1181 }
1182
1183 static uint32_t
1184 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1185 {
1186         uint32_t gtt_size;
1187
1188         if (INTEL_INFO(dev)->gen >= 4 ||
1189             tiling_mode == I915_TILING_NONE)
1190                 return size;
1191
1192         /* Previous chips need a power-of-two fence region when tiling */
1193         if (INTEL_INFO(dev)->gen == 3)
1194                 gtt_size = 1024*1024;
1195         else
1196                 gtt_size = 512*1024;
1197
1198         while (gtt_size < size)
1199                 gtt_size <<= 1;
1200
1201         return gtt_size;
1202 }
1203
1204 /**
1205  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1206  * @obj: object to check
1207  *
1208  * Return the required GTT alignment for an object, taking into account
1209  * potential fence register mapping.
1210  */
1211 static uint32_t
1212 i915_gem_get_gtt_alignment(struct drm_device *dev,
1213                            uint32_t size,
1214                            int tiling_mode)
1215 {
1216         /*
1217          * Minimum alignment is 4k (GTT page size), but might be greater
1218          * if a fence register is needed for the object.
1219          */
1220         if (INTEL_INFO(dev)->gen >= 4 ||
1221             tiling_mode == I915_TILING_NONE)
1222                 return 4096;
1223
1224         /*
1225          * Previous chips need to be aligned to the size of the smallest
1226          * fence register that can contain the object.
1227          */
1228         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1229 }
1230
1231 /**
1232  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1233  *                                       unfenced object
1234  * @dev: the device
1235  * @size: size of the object
1236  * @tiling_mode: tiling mode of the object
1237  *
1238  * Return the required GTT alignment for an object, only taking into account
1239  * unfenced tiled surface requirements.
1240  */
1241 uint32_t
1242 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1243                                     uint32_t size,
1244                                     int tiling_mode)
1245 {
1246         /*
1247          * Minimum alignment is 4k (GTT page size) for sane hw.
1248          */
1249         if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1250             tiling_mode == I915_TILING_NONE)
1251                 return 4096;
1252
1253         /* Previous hardware however needs to be aligned to a power-of-two
1254          * tile height. The simplest method for determining this is to reuse
1255          * the power-of-tile object size.
1256          */
1257         return i915_gem_get_gtt_size(dev, size, tiling_mode);
1258 }
1259
1260 int
1261 i915_gem_mmap_gtt(struct drm_file *file,
1262                   struct drm_device *dev,
1263                   uint32_t handle,
1264                   uint64_t *offset)
1265 {
1266         struct drm_i915_private *dev_priv = dev->dev_private;
1267         struct drm_i915_gem_object *obj;
1268         int ret;
1269
1270         ret = i915_mutex_lock_interruptible(dev);
1271         if (ret)
1272                 return ret;
1273
1274         obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1275         if (&obj->base == NULL) {
1276                 ret = -ENOENT;
1277                 goto unlock;
1278         }
1279
1280         if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1281                 ret = -E2BIG;
1282                 goto out;
1283         }
1284
1285         if (obj->madv != I915_MADV_WILLNEED) {
1286                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1287                 ret = -EINVAL;
1288                 goto out;
1289         }
1290
1291         if (!obj->base.map_list.map) {
1292                 ret = drm_gem_create_mmap_offset(&obj->base);
1293                 if (ret)
1294                         goto out;
1295         }
1296
1297         *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1298
1299 out:
1300         drm_gem_object_unreference(&obj->base);
1301 unlock:
1302         mutex_unlock(&dev->struct_mutex);
1303         return ret;
1304 }
1305
1306 /**
1307  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1308  * @dev: DRM device
1309  * @data: GTT mapping ioctl data
1310  * @file: GEM object info
1311  *
1312  * Simply returns the fake offset to userspace so it can mmap it.
1313  * The mmap call will end up in drm_gem_mmap(), which will set things
1314  * up so we can get faults in the handler above.
1315  *
1316  * The fault handler will take care of binding the object into the GTT
1317  * (since it may have been evicted to make room for something), allocating
1318  * a fence register, and mapping the appropriate aperture address into
1319  * userspace.
1320  */
1321 int
1322 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1323                         struct drm_file *file)
1324 {
1325         struct drm_i915_gem_mmap_gtt *args = data;
1326
1327         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1328 }
1329
1330 int
1331 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1332                               gfp_t gfpmask)
1333 {
1334         int page_count, i;
1335         struct address_space *mapping;
1336         struct inode *inode;
1337         struct page *page;
1338
1339         if (obj->pages || obj->sg_table)
1340                 return 0;
1341
1342         /* Get the list of pages out of our struct file.  They'll be pinned
1343          * at this point until we release them.
1344          */
1345         page_count = obj->base.size / PAGE_SIZE;
1346         BUG_ON(obj->pages != NULL);
1347         obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1348         if (obj->pages == NULL)
1349                 return -ENOMEM;
1350
1351         inode = obj->base.filp->f_path.dentry->d_inode;
1352         mapping = inode->i_mapping;
1353         gfpmask |= mapping_gfp_mask(mapping);
1354
1355         for (i = 0; i < page_count; i++) {
1356                 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
1357                 if (IS_ERR(page))
1358                         goto err_pages;
1359
1360                 obj->pages[i] = page;
1361         }
1362
1363         if (i915_gem_object_needs_bit17_swizzle(obj))
1364                 i915_gem_object_do_bit_17_swizzle(obj);
1365
1366         return 0;
1367
1368 err_pages:
1369         while (i--)
1370                 page_cache_release(obj->pages[i]);
1371
1372         drm_free_large(obj->pages);
1373         obj->pages = NULL;
1374         return PTR_ERR(page);
1375 }
1376
1377 static void
1378 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1379 {
1380         int page_count = obj->base.size / PAGE_SIZE;
1381         int i;
1382
1383         if (!obj->pages)
1384                 return;
1385
1386         BUG_ON(obj->madv == __I915_MADV_PURGED);
1387
1388         if (i915_gem_object_needs_bit17_swizzle(obj))
1389                 i915_gem_object_save_bit_17_swizzle(obj);
1390
1391         if (obj->madv == I915_MADV_DONTNEED)
1392                 obj->dirty = 0;
1393
1394         for (i = 0; i < page_count; i++) {
1395                 if (obj->dirty)
1396                         set_page_dirty(obj->pages[i]);
1397
1398                 if (obj->madv == I915_MADV_WILLNEED)
1399                         mark_page_accessed(obj->pages[i]);
1400
1401                 page_cache_release(obj->pages[i]);
1402         }
1403         obj->dirty = 0;
1404
1405         drm_free_large(obj->pages);
1406         obj->pages = NULL;
1407 }
1408
1409 void
1410 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1411                                struct intel_ring_buffer *ring,
1412                                u32 seqno)
1413 {
1414         struct drm_device *dev = obj->base.dev;
1415         struct drm_i915_private *dev_priv = dev->dev_private;
1416
1417         BUG_ON(ring == NULL);
1418         obj->ring = ring;
1419
1420         /* Add a reference if we're newly entering the active list. */
1421         if (!obj->active) {
1422                 drm_gem_object_reference(&obj->base);
1423                 obj->active = 1;
1424         }
1425
1426         /* Move from whatever list we were on to the tail of execution. */
1427         list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1428         list_move_tail(&obj->ring_list, &ring->active_list);
1429
1430         obj->last_rendering_seqno = seqno;
1431
1432         if (obj->fenced_gpu_access) {
1433                 obj->last_fenced_seqno = seqno;
1434
1435                 /* Bump MRU to take account of the delayed flush */
1436                 if (obj->fence_reg != I915_FENCE_REG_NONE) {
1437                         struct drm_i915_fence_reg *reg;
1438
1439                         reg = &dev_priv->fence_regs[obj->fence_reg];
1440                         list_move_tail(&reg->lru_list,
1441                                        &dev_priv->mm.fence_list);
1442                 }
1443         }
1444 }
1445
1446 static void
1447 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1448 {
1449         list_del_init(&obj->ring_list);
1450         obj->last_rendering_seqno = 0;
1451         obj->last_fenced_seqno = 0;
1452 }
1453
1454 static void
1455 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1456 {
1457         struct drm_device *dev = obj->base.dev;
1458         drm_i915_private_t *dev_priv = dev->dev_private;
1459
1460         BUG_ON(!obj->active);
1461         list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1462
1463         i915_gem_object_move_off_active(obj);
1464 }
1465
1466 static void
1467 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1468 {
1469         struct drm_device *dev = obj->base.dev;
1470         struct drm_i915_private *dev_priv = dev->dev_private;
1471
1472         list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1473
1474         BUG_ON(!list_empty(&obj->gpu_write_list));
1475         BUG_ON(!obj->active);
1476         obj->ring = NULL;
1477
1478         i915_gem_object_move_off_active(obj);
1479         obj->fenced_gpu_access = false;
1480
1481         obj->active = 0;
1482         obj->pending_gpu_write = false;
1483         drm_gem_object_unreference(&obj->base);
1484
1485         WARN_ON(i915_verify_lists(dev));
1486 }
1487
1488 /* Immediately discard the backing storage */
1489 static void
1490 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1491 {
1492         struct inode *inode;
1493
1494         /* Our goal here is to return as much of the memory as
1495          * is possible back to the system as we are called from OOM.
1496          * To do this we must instruct the shmfs to drop all of its
1497          * backing pages, *now*.
1498          */
1499         inode = obj->base.filp->f_path.dentry->d_inode;
1500         shmem_truncate_range(inode, 0, (loff_t)-1);
1501
1502         if (obj->base.map_list.map)
1503                 drm_gem_free_mmap_offset(&obj->base);
1504
1505         obj->madv = __I915_MADV_PURGED;
1506 }
1507
1508 static inline int
1509 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1510 {
1511         return obj->madv == I915_MADV_DONTNEED;
1512 }
1513
1514 static void
1515 i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1516                                uint32_t flush_domains)
1517 {
1518         struct drm_i915_gem_object *obj, *next;
1519
1520         list_for_each_entry_safe(obj, next,
1521                                  &ring->gpu_write_list,
1522                                  gpu_write_list) {
1523                 if (obj->base.write_domain & flush_domains) {
1524                         uint32_t old_write_domain = obj->base.write_domain;
1525
1526                         obj->base.write_domain = 0;
1527                         list_del_init(&obj->gpu_write_list);
1528                         i915_gem_object_move_to_active(obj, ring,
1529                                                        i915_gem_next_request_seqno(ring));
1530
1531                         trace_i915_gem_object_change_domain(obj,
1532                                                             obj->base.read_domains,
1533                                                             old_write_domain);
1534                 }
1535         }
1536 }
1537
1538 static u32
1539 i915_gem_get_seqno(struct drm_device *dev)
1540 {
1541         drm_i915_private_t *dev_priv = dev->dev_private;
1542         u32 seqno = dev_priv->next_seqno;
1543
1544         /* reserve 0 for non-seqno */
1545         if (++dev_priv->next_seqno == 0)
1546                 dev_priv->next_seqno = 1;
1547
1548         return seqno;
1549 }
1550
1551 u32
1552 i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
1553 {
1554         if (ring->outstanding_lazy_request == 0)
1555                 ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
1556
1557         return ring->outstanding_lazy_request;
1558 }
1559
1560 int
1561 i915_add_request(struct intel_ring_buffer *ring,
1562                  struct drm_file *file,
1563                  struct drm_i915_gem_request *request)
1564 {
1565         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1566         uint32_t seqno;
1567         u32 request_ring_position;
1568         int was_empty;
1569         int ret;
1570
1571         BUG_ON(request == NULL);
1572         seqno = i915_gem_next_request_seqno(ring);
1573
1574         /* Record the position of the start of the request so that
1575          * should we detect the updated seqno part-way through the
1576          * GPU processing the request, we never over-estimate the
1577          * position of the head.
1578          */
1579         request_ring_position = intel_ring_get_tail(ring);
1580
1581         ret = ring->add_request(ring, &seqno);
1582         if (ret)
1583             return ret;
1584
1585         trace_i915_gem_request_add(ring, seqno);
1586
1587         request->seqno = seqno;
1588         request->ring = ring;
1589         request->tail = request_ring_position;
1590         request->emitted_jiffies = jiffies;
1591         was_empty = list_empty(&ring->request_list);
1592         list_add_tail(&request->list, &ring->request_list);
1593
1594         if (file) {
1595                 struct drm_i915_file_private *file_priv = file->driver_priv;
1596
1597                 spin_lock(&file_priv->mm.lock);
1598                 request->file_priv = file_priv;
1599                 list_add_tail(&request->client_list,
1600                               &file_priv->mm.request_list);
1601                 spin_unlock(&file_priv->mm.lock);
1602         }
1603
1604         ring->outstanding_lazy_request = 0;
1605
1606         if (!dev_priv->mm.suspended) {
1607                 if (i915_enable_hangcheck) {
1608                         mod_timer(&dev_priv->hangcheck_timer,
1609                                   jiffies +
1610                                   msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1611                 }
1612                 if (was_empty)
1613                         queue_delayed_work(dev_priv->wq,
1614                                            &dev_priv->mm.retire_work, HZ);
1615         }
1616         return 0;
1617 }
1618
1619 static inline void
1620 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1621 {
1622         struct drm_i915_file_private *file_priv = request->file_priv;
1623
1624         if (!file_priv)
1625                 return;
1626
1627         spin_lock(&file_priv->mm.lock);
1628         if (request->file_priv) {
1629                 list_del(&request->client_list);
1630                 request->file_priv = NULL;
1631         }
1632         spin_unlock(&file_priv->mm.lock);
1633 }
1634
1635 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1636                                       struct intel_ring_buffer *ring)
1637 {
1638         while (!list_empty(&ring->request_list)) {
1639                 struct drm_i915_gem_request *request;
1640
1641                 request = list_first_entry(&ring->request_list,
1642                                            struct drm_i915_gem_request,
1643                                            list);
1644
1645                 list_del(&request->list);
1646                 i915_gem_request_remove_from_client(request);
1647                 kfree(request);
1648         }
1649
1650         while (!list_empty(&ring->active_list)) {
1651                 struct drm_i915_gem_object *obj;
1652
1653                 obj = list_first_entry(&ring->active_list,
1654                                        struct drm_i915_gem_object,
1655                                        ring_list);
1656
1657                 obj->base.write_domain = 0;
1658                 list_del_init(&obj->gpu_write_list);
1659                 i915_gem_object_move_to_inactive(obj);
1660         }
1661 }
1662
1663 static void i915_gem_reset_fences(struct drm_device *dev)
1664 {
1665         struct drm_i915_private *dev_priv = dev->dev_private;
1666         int i;
1667
1668         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1669                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1670
1671                 i915_gem_write_fence(dev, i, NULL);
1672
1673                 if (reg->obj)
1674                         i915_gem_object_fence_lost(reg->obj);
1675
1676                 reg->pin_count = 0;
1677                 reg->obj = NULL;
1678                 INIT_LIST_HEAD(&reg->lru_list);
1679         }
1680
1681         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1682 }
1683
1684 void i915_gem_reset(struct drm_device *dev)
1685 {
1686         struct drm_i915_private *dev_priv = dev->dev_private;
1687         struct drm_i915_gem_object *obj;
1688         struct intel_ring_buffer *ring;
1689         int i;
1690
1691         for_each_ring(ring, dev_priv, i)
1692                 i915_gem_reset_ring_lists(dev_priv, ring);
1693
1694         /* Remove anything from the flushing lists. The GPU cache is likely
1695          * to be lost on reset along with the data, so simply move the
1696          * lost bo to the inactive list.
1697          */
1698         while (!list_empty(&dev_priv->mm.flushing_list)) {
1699                 obj = list_first_entry(&dev_priv->mm.flushing_list,
1700                                       struct drm_i915_gem_object,
1701                                       mm_list);
1702
1703                 obj->base.write_domain = 0;
1704                 list_del_init(&obj->gpu_write_list);
1705                 i915_gem_object_move_to_inactive(obj);
1706         }
1707
1708         /* Move everything out of the GPU domains to ensure we do any
1709          * necessary invalidation upon reuse.
1710          */
1711         list_for_each_entry(obj,
1712                             &dev_priv->mm.inactive_list,
1713                             mm_list)
1714         {
1715                 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1716         }
1717
1718         /* The fence registers are invalidated so clear them out */
1719         i915_gem_reset_fences(dev);
1720 }
1721
1722 /**
1723  * This function clears the request list as sequence numbers are passed.
1724  */
1725 void
1726 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1727 {
1728         uint32_t seqno;
1729         int i;
1730
1731         if (list_empty(&ring->request_list))
1732                 return;
1733
1734         WARN_ON(i915_verify_lists(ring->dev));
1735
1736         seqno = ring->get_seqno(ring);
1737
1738         for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1739                 if (seqno >= ring->sync_seqno[i])
1740                         ring->sync_seqno[i] = 0;
1741
1742         while (!list_empty(&ring->request_list)) {
1743                 struct drm_i915_gem_request *request;
1744
1745                 request = list_first_entry(&ring->request_list,
1746                                            struct drm_i915_gem_request,
1747                                            list);
1748
1749                 if (!i915_seqno_passed(seqno, request->seqno))
1750                         break;
1751
1752                 trace_i915_gem_request_retire(ring, request->seqno);
1753                 /* We know the GPU must have read the request to have
1754                  * sent us the seqno + interrupt, so use the position
1755                  * of tail of the request to update the last known position
1756                  * of the GPU head.
1757                  */
1758                 ring->last_retired_head = request->tail;
1759
1760                 list_del(&request->list);
1761                 i915_gem_request_remove_from_client(request);
1762                 kfree(request);
1763         }
1764
1765         /* Move any buffers on the active list that are no longer referenced
1766          * by the ringbuffer to the flushing/inactive lists as appropriate.
1767          */
1768         while (!list_empty(&ring->active_list)) {
1769                 struct drm_i915_gem_object *obj;
1770
1771                 obj = list_first_entry(&ring->active_list,
1772                                       struct drm_i915_gem_object,
1773                                       ring_list);
1774
1775                 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1776                         break;
1777
1778                 if (obj->base.write_domain != 0)
1779                         i915_gem_object_move_to_flushing(obj);
1780                 else
1781                         i915_gem_object_move_to_inactive(obj);
1782         }
1783
1784         if (unlikely(ring->trace_irq_seqno &&
1785                      i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1786                 ring->irq_put(ring);
1787                 ring->trace_irq_seqno = 0;
1788         }
1789
1790         WARN_ON(i915_verify_lists(ring->dev));
1791 }
1792
1793 void
1794 i915_gem_retire_requests(struct drm_device *dev)
1795 {
1796         drm_i915_private_t *dev_priv = dev->dev_private;
1797         struct intel_ring_buffer *ring;
1798         int i;
1799
1800         for_each_ring(ring, dev_priv, i)
1801                 i915_gem_retire_requests_ring(ring);
1802 }
1803
1804 static void
1805 i915_gem_retire_work_handler(struct work_struct *work)
1806 {
1807         drm_i915_private_t *dev_priv;
1808         struct drm_device *dev;
1809         struct intel_ring_buffer *ring;
1810         bool idle;
1811         int i;
1812
1813         dev_priv = container_of(work, drm_i915_private_t,
1814                                 mm.retire_work.work);
1815         dev = dev_priv->dev;
1816
1817         /* Come back later if the device is busy... */
1818         if (!mutex_trylock(&dev->struct_mutex)) {
1819                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1820                 return;
1821         }
1822
1823         i915_gem_retire_requests(dev);
1824
1825         /* Send a periodic flush down the ring so we don't hold onto GEM
1826          * objects indefinitely.
1827          */
1828         idle = true;
1829         for_each_ring(ring, dev_priv, i) {
1830                 if (!list_empty(&ring->gpu_write_list)) {
1831                         struct drm_i915_gem_request *request;
1832                         int ret;
1833
1834                         ret = i915_gem_flush_ring(ring,
1835                                                   0, I915_GEM_GPU_DOMAINS);
1836                         request = kzalloc(sizeof(*request), GFP_KERNEL);
1837                         if (ret || request == NULL ||
1838                             i915_add_request(ring, NULL, request))
1839                             kfree(request);
1840                 }
1841
1842                 idle &= list_empty(&ring->request_list);
1843         }
1844
1845         if (!dev_priv->mm.suspended && !idle)
1846                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1847
1848         mutex_unlock(&dev->struct_mutex);
1849 }
1850
1851 static int
1852 i915_gem_check_wedge(struct drm_i915_private *dev_priv)
1853 {
1854         BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
1855
1856         if (atomic_read(&dev_priv->mm.wedged)) {
1857                 struct completion *x = &dev_priv->error_completion;
1858                 bool recovery_complete;
1859                 unsigned long flags;
1860
1861                 /* Give the error handler a chance to run. */
1862                 spin_lock_irqsave(&x->wait.lock, flags);
1863                 recovery_complete = x->done > 0;
1864                 spin_unlock_irqrestore(&x->wait.lock, flags);
1865
1866                 return recovery_complete ? -EIO : -EAGAIN;
1867         }
1868
1869         return 0;
1870 }
1871
1872 /*
1873  * Compare seqno against outstanding lazy request. Emit a request if they are
1874  * equal.
1875  */
1876 static int
1877 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1878 {
1879         int ret = 0;
1880
1881         BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1882
1883         if (seqno == ring->outstanding_lazy_request) {
1884                 struct drm_i915_gem_request *request;
1885
1886                 request = kzalloc(sizeof(*request), GFP_KERNEL);
1887                 if (request == NULL)
1888                         return -ENOMEM;
1889
1890                 ret = i915_add_request(ring, NULL, request);
1891                 if (ret) {
1892                         kfree(request);
1893                         return ret;
1894                 }
1895
1896                 BUG_ON(seqno != request->seqno);
1897         }
1898
1899         return ret;
1900 }
1901
1902 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1903                         bool interruptible)
1904 {
1905         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1906         int ret = 0;
1907
1908         if (i915_seqno_passed(ring->get_seqno(ring), seqno))
1909                 return 0;
1910
1911         trace_i915_gem_request_wait_begin(ring, seqno);
1912         if (WARN_ON(!ring->irq_get(ring)))
1913                 return -ENODEV;
1914
1915 #define EXIT_COND \
1916         (i915_seqno_passed(ring->get_seqno(ring), seqno) || \
1917         atomic_read(&dev_priv->mm.wedged))
1918
1919         if (interruptible)
1920                 ret = wait_event_interruptible(ring->irq_queue,
1921                                                EXIT_COND);
1922         else
1923                 wait_event(ring->irq_queue, EXIT_COND);
1924
1925         ring->irq_put(ring);
1926         trace_i915_gem_request_wait_end(ring, seqno);
1927 #undef EXIT_COND
1928
1929         return ret;
1930 }
1931
1932 /**
1933  * Waits for a sequence number to be signaled, and cleans up the
1934  * request and object lists appropriately for that event.
1935  */
1936 int
1937 i915_wait_request(struct intel_ring_buffer *ring,
1938                   uint32_t seqno)
1939 {
1940         drm_i915_private_t *dev_priv = ring->dev->dev_private;
1941         int ret = 0;
1942
1943         BUG_ON(seqno == 0);
1944
1945         ret = i915_gem_check_wedge(dev_priv);
1946         if (ret)
1947                 return ret;
1948
1949         ret = i915_gem_check_olr(ring, seqno);
1950         if (ret)
1951                 return ret;
1952
1953         ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible);
1954         if (atomic_read(&dev_priv->mm.wedged))
1955                 ret = -EAGAIN;
1956
1957         return ret;
1958 }
1959
1960 /**
1961  * Ensures that all rendering to the object has completed and the object is
1962  * safe to unbind from the GTT or access from the CPU.
1963  */
1964 int
1965 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
1966 {
1967         int ret;
1968
1969         /* This function only exists to support waiting for existing rendering,
1970          * not for emitting required flushes.
1971          */
1972         BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
1973
1974         /* If there is rendering queued on the buffer being evicted, wait for
1975          * it.
1976          */
1977         if (obj->active) {
1978                 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
1979                 if (ret)
1980                         return ret;
1981                 i915_gem_retire_requests_ring(obj->ring);
1982         }
1983
1984         return 0;
1985 }
1986
1987 /**
1988  * i915_gem_object_sync - sync an object to a ring.
1989  *
1990  * @obj: object which may be in use on another ring.
1991  * @to: ring we wish to use the object on. May be NULL.
1992  *
1993  * This code is meant to abstract object synchronization with the GPU.
1994  * Calling with NULL implies synchronizing the object with the CPU
1995  * rather than a particular GPU ring.
1996  *
1997  * Returns 0 if successful, else propagates up the lower layer error.
1998  */
1999 int
2000 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2001                      struct intel_ring_buffer *to)
2002 {
2003         struct intel_ring_buffer *from = obj->ring;
2004         u32 seqno;
2005         int ret, idx;
2006
2007         if (from == NULL || to == from)
2008                 return 0;
2009
2010         if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2011                 return i915_gem_object_wait_rendering(obj);
2012
2013         idx = intel_ring_sync_index(from, to);
2014
2015         seqno = obj->last_rendering_seqno;
2016         if (seqno <= from->sync_seqno[idx])
2017                 return 0;
2018
2019         ret = i915_gem_check_olr(obj->ring, seqno);
2020         if (ret)
2021                 return ret;
2022
2023         ret = to->sync_to(to, from, seqno);
2024         if (!ret)
2025                 from->sync_seqno[idx] = seqno;
2026
2027         return ret;
2028 }
2029
2030 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2031 {
2032         u32 old_write_domain, old_read_domains;
2033
2034         /* Act a barrier for all accesses through the GTT */
2035         mb();
2036
2037         /* Force a pagefault for domain tracking on next user access */
2038         i915_gem_release_mmap(obj);
2039
2040         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2041                 return;
2042
2043         old_read_domains = obj->base.read_domains;
2044         old_write_domain = obj->base.write_domain;
2045
2046         obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2047         obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2048
2049         trace_i915_gem_object_change_domain(obj,
2050                                             old_read_domains,
2051                                             old_write_domain);
2052 }
2053
2054 /**
2055  * Unbinds an object from the GTT aperture.
2056  */
2057 int
2058 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2059 {
2060         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2061         int ret = 0;
2062
2063         if (obj->gtt_space == NULL)
2064                 return 0;
2065
2066         if (obj->pin_count != 0) {
2067                 DRM_ERROR("Attempting to unbind pinned buffer\n");
2068                 return -EINVAL;
2069         }
2070
2071         ret = i915_gem_object_finish_gpu(obj);
2072         if (ret)
2073                 return ret;
2074         /* Continue on if we fail due to EIO, the GPU is hung so we
2075          * should be safe and we need to cleanup or else we might
2076          * cause memory corruption through use-after-free.
2077          */
2078
2079         i915_gem_object_finish_gtt(obj);
2080
2081         /* Move the object to the CPU domain to ensure that
2082          * any possible CPU writes while it's not in the GTT
2083          * are flushed when we go to remap it.
2084          */
2085         if (ret == 0)
2086                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2087         if (ret == -ERESTARTSYS)
2088                 return ret;
2089         if (ret) {
2090                 /* In the event of a disaster, abandon all caches and
2091                  * hope for the best.
2092                  */
2093                 i915_gem_clflush_object(obj);
2094                 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2095         }
2096
2097         /* release the fence reg _after_ flushing */
2098         ret = i915_gem_object_put_fence(obj);
2099         if (ret)
2100                 return ret;
2101
2102         trace_i915_gem_object_unbind(obj);
2103
2104         if (obj->has_global_gtt_mapping)
2105                 i915_gem_gtt_unbind_object(obj);
2106         if (obj->has_aliasing_ppgtt_mapping) {
2107                 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2108                 obj->has_aliasing_ppgtt_mapping = 0;
2109         }
2110         i915_gem_gtt_finish_object(obj);
2111
2112         i915_gem_object_put_pages_gtt(obj);
2113
2114         list_del_init(&obj->gtt_list);
2115         list_del_init(&obj->mm_list);
2116         /* Avoid an unnecessary call to unbind on rebind. */
2117         obj->map_and_fenceable = true;
2118
2119         drm_mm_put_block(obj->gtt_space);
2120         obj->gtt_space = NULL;
2121         obj->gtt_offset = 0;
2122
2123         if (i915_gem_object_is_purgeable(obj))
2124                 i915_gem_object_truncate(obj);
2125
2126         return ret;
2127 }
2128
2129 int
2130 i915_gem_flush_ring(struct intel_ring_buffer *ring,
2131                     uint32_t invalidate_domains,
2132                     uint32_t flush_domains)
2133 {
2134         int ret;
2135
2136         if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2137                 return 0;
2138
2139         trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2140
2141         ret = ring->flush(ring, invalidate_domains, flush_domains);
2142         if (ret)
2143                 return ret;
2144
2145         if (flush_domains & I915_GEM_GPU_DOMAINS)
2146                 i915_gem_process_flushing_list(ring, flush_domains);
2147
2148         return 0;
2149 }
2150
2151 static int i915_ring_idle(struct intel_ring_buffer *ring)
2152 {
2153         int ret;
2154
2155         if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2156                 return 0;
2157
2158         if (!list_empty(&ring->gpu_write_list)) {
2159                 ret = i915_gem_flush_ring(ring,
2160                                     I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2161                 if (ret)
2162                         return ret;
2163         }
2164
2165         return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
2166 }
2167
2168 int i915_gpu_idle(struct drm_device *dev)
2169 {
2170         drm_i915_private_t *dev_priv = dev->dev_private;
2171         struct intel_ring_buffer *ring;
2172         int ret, i;
2173
2174         /* Flush everything onto the inactive list. */
2175         for_each_ring(ring, dev_priv, i) {
2176                 ret = i915_ring_idle(ring);
2177                 if (ret)
2178                         return ret;
2179
2180                 /* Is the device fubar? */
2181                 if (WARN_ON(!list_empty(&ring->gpu_write_list)))
2182                         return -EBUSY;
2183         }
2184
2185         return 0;
2186 }
2187
2188 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2189                                         struct drm_i915_gem_object *obj)
2190 {
2191         drm_i915_private_t *dev_priv = dev->dev_private;
2192         uint64_t val;
2193
2194         if (obj) {
2195                 u32 size = obj->gtt_space->size;
2196
2197                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2198                                  0xfffff000) << 32;
2199                 val |= obj->gtt_offset & 0xfffff000;
2200                 val |= (uint64_t)((obj->stride / 128) - 1) <<
2201                         SANDYBRIDGE_FENCE_PITCH_SHIFT;
2202
2203                 if (obj->tiling_mode == I915_TILING_Y)
2204                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2205                 val |= I965_FENCE_REG_VALID;
2206         } else
2207                 val = 0;
2208
2209         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2210         POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2211 }
2212
2213 static void i965_write_fence_reg(struct drm_device *dev, int reg,
2214                                  struct drm_i915_gem_object *obj)
2215 {
2216         drm_i915_private_t *dev_priv = dev->dev_private;
2217         uint64_t val;
2218
2219         if (obj) {
2220                 u32 size = obj->gtt_space->size;
2221
2222                 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2223                                  0xfffff000) << 32;
2224                 val |= obj->gtt_offset & 0xfffff000;
2225                 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2226                 if (obj->tiling_mode == I915_TILING_Y)
2227                         val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2228                 val |= I965_FENCE_REG_VALID;
2229         } else
2230                 val = 0;
2231
2232         I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2233         POSTING_READ(FENCE_REG_965_0 + reg * 8);
2234 }
2235
2236 static void i915_write_fence_reg(struct drm_device *dev, int reg,
2237                                  struct drm_i915_gem_object *obj)
2238 {
2239         drm_i915_private_t *dev_priv = dev->dev_private;
2240         u32 val;
2241
2242         if (obj) {
2243                 u32 size = obj->gtt_space->size;
2244                 int pitch_val;
2245                 int tile_width;
2246
2247                 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2248                      (size & -size) != size ||
2249                      (obj->gtt_offset & (size - 1)),
2250                      "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2251                      obj->gtt_offset, obj->map_and_fenceable, size);
2252
2253                 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2254                         tile_width = 128;
2255                 else
2256                         tile_width = 512;
2257
2258                 /* Note: pitch better be a power of two tile widths */
2259                 pitch_val = obj->stride / tile_width;
2260                 pitch_val = ffs(pitch_val) - 1;
2261
2262                 val = obj->gtt_offset;
2263                 if (obj->tiling_mode == I915_TILING_Y)
2264                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2265                 val |= I915_FENCE_SIZE_BITS(size);
2266                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2267                 val |= I830_FENCE_REG_VALID;
2268         } else
2269                 val = 0;
2270
2271         if (reg < 8)
2272                 reg = FENCE_REG_830_0 + reg * 4;
2273         else
2274                 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2275
2276         I915_WRITE(reg, val);
2277         POSTING_READ(reg);
2278 }
2279
2280 static void i830_write_fence_reg(struct drm_device *dev, int reg,
2281                                 struct drm_i915_gem_object *obj)
2282 {
2283         drm_i915_private_t *dev_priv = dev->dev_private;
2284         uint32_t val;
2285
2286         if (obj) {
2287                 u32 size = obj->gtt_space->size;
2288                 uint32_t pitch_val;
2289
2290                 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2291                      (size & -size) != size ||
2292                      (obj->gtt_offset & (size - 1)),
2293                      "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2294                      obj->gtt_offset, size);
2295
2296                 pitch_val = obj->stride / 128;
2297                 pitch_val = ffs(pitch_val) - 1;
2298
2299                 val = obj->gtt_offset;
2300                 if (obj->tiling_mode == I915_TILING_Y)
2301                         val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2302                 val |= I830_FENCE_SIZE_BITS(size);
2303                 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2304                 val |= I830_FENCE_REG_VALID;
2305         } else
2306                 val = 0;
2307
2308         I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2309         POSTING_READ(FENCE_REG_830_0 + reg * 4);
2310 }
2311
2312 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2313                                  struct drm_i915_gem_object *obj)
2314 {
2315         switch (INTEL_INFO(dev)->gen) {
2316         case 7:
2317         case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2318         case 5:
2319         case 4: i965_write_fence_reg(dev, reg, obj); break;
2320         case 3: i915_write_fence_reg(dev, reg, obj); break;
2321         case 2: i830_write_fence_reg(dev, reg, obj); break;
2322         default: break;
2323         }
2324 }
2325
2326 static inline int fence_number(struct drm_i915_private *dev_priv,
2327                                struct drm_i915_fence_reg *fence)
2328 {
2329         return fence - dev_priv->fence_regs;
2330 }
2331
2332 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2333                                          struct drm_i915_fence_reg *fence,
2334                                          bool enable)
2335 {
2336         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2337         int reg = fence_number(dev_priv, fence);
2338
2339         i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2340
2341         if (enable) {
2342                 obj->fence_reg = reg;
2343                 fence->obj = obj;
2344                 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2345         } else {
2346                 obj->fence_reg = I915_FENCE_REG_NONE;
2347                 fence->obj = NULL;
2348                 list_del_init(&fence->lru_list);
2349         }
2350 }
2351
2352 static int
2353 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2354 {
2355         int ret;
2356
2357         if (obj->fenced_gpu_access) {
2358                 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2359                         ret = i915_gem_flush_ring(obj->ring,
2360                                                   0, obj->base.write_domain);
2361                         if (ret)
2362                                 return ret;
2363                 }
2364
2365                 obj->fenced_gpu_access = false;
2366         }
2367
2368         if (obj->last_fenced_seqno) {
2369                 ret = i915_wait_request(obj->ring, obj->last_fenced_seqno);
2370                 if (ret)
2371                         return ret;
2372
2373                 obj->last_fenced_seqno = 0;
2374         }
2375
2376         /* Ensure that all CPU reads are completed before installing a fence
2377          * and all writes before removing the fence.
2378          */
2379         if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2380                 mb();
2381
2382         return 0;
2383 }
2384
2385 int
2386 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2387 {
2388         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2389         int ret;
2390
2391         ret = i915_gem_object_flush_fence(obj);
2392         if (ret)
2393                 return ret;
2394
2395         if (obj->fence_reg == I915_FENCE_REG_NONE)
2396                 return 0;
2397
2398         i915_gem_object_update_fence(obj,
2399                                      &dev_priv->fence_regs[obj->fence_reg],
2400                                      false);
2401         i915_gem_object_fence_lost(obj);
2402
2403         return 0;
2404 }
2405
2406 static struct drm_i915_fence_reg *
2407 i915_find_fence_reg(struct drm_device *dev)
2408 {
2409         struct drm_i915_private *dev_priv = dev->dev_private;
2410         struct drm_i915_fence_reg *reg, *avail;
2411         int i;
2412
2413         /* First try to find a free reg */
2414         avail = NULL;
2415         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2416                 reg = &dev_priv->fence_regs[i];
2417                 if (!reg->obj)
2418                         return reg;
2419
2420                 if (!reg->pin_count)
2421                         avail = reg;
2422         }
2423
2424         if (avail == NULL)
2425                 return NULL;
2426
2427         /* None available, try to steal one or wait for a user to finish */
2428         list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2429                 if (reg->pin_count)
2430                         continue;
2431
2432                 return reg;
2433         }
2434
2435         return NULL;
2436 }
2437
2438 /**
2439  * i915_gem_object_get_fence - set up fencing for an object
2440  * @obj: object to map through a fence reg
2441  *
2442  * When mapping objects through the GTT, userspace wants to be able to write
2443  * to them without having to worry about swizzling if the object is tiled.
2444  * This function walks the fence regs looking for a free one for @obj,
2445  * stealing one if it can't find any.
2446  *
2447  * It then sets up the reg based on the object's properties: address, pitch
2448  * and tiling format.
2449  *
2450  * For an untiled surface, this removes any existing fence.
2451  */
2452 int
2453 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2454 {
2455         struct drm_device *dev = obj->base.dev;
2456         struct drm_i915_private *dev_priv = dev->dev_private;
2457         bool enable = obj->tiling_mode != I915_TILING_NONE;
2458         struct drm_i915_fence_reg *reg;
2459         int ret;
2460
2461         /* Have we updated the tiling parameters upon the object and so
2462          * will need to serialise the write to the associated fence register?
2463          */
2464         if (obj->fence_dirty) {
2465                 ret = i915_gem_object_flush_fence(obj);
2466                 if (ret)
2467                         return ret;
2468         }
2469
2470         /* Just update our place in the LRU if our fence is getting reused. */
2471         if (obj->fence_reg != I915_FENCE_REG_NONE) {
2472                 reg = &dev_priv->fence_regs[obj->fence_reg];
2473                 if (!obj->fence_dirty) {
2474                         list_move_tail(&reg->lru_list,
2475                                        &dev_priv->mm.fence_list);
2476                         return 0;
2477                 }
2478         } else if (enable) {
2479                 reg = i915_find_fence_reg(dev);
2480                 if (reg == NULL)
2481                         return -EDEADLK;
2482
2483                 if (reg->obj) {
2484                         struct drm_i915_gem_object *old = reg->obj;
2485
2486                         ret = i915_gem_object_flush_fence(old);
2487                         if (ret)
2488                                 return ret;
2489
2490                         i915_gem_object_fence_lost(old);
2491                 }
2492         } else
2493                 return 0;
2494
2495         i915_gem_object_update_fence(obj, reg, enable);
2496         obj->fence_dirty = false;
2497
2498         return 0;
2499 }
2500
2501 /**
2502  * Finds free space in the GTT aperture and binds the object there.
2503  */
2504 static int
2505 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2506                             unsigned alignment,
2507                             bool map_and_fenceable)
2508 {
2509         struct drm_device *dev = obj->base.dev;
2510         drm_i915_private_t *dev_priv = dev->dev_private;
2511         struct drm_mm_node *free_space;
2512         gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2513         u32 size, fence_size, fence_alignment, unfenced_alignment;
2514         bool mappable, fenceable;
2515         int ret;
2516
2517         if (obj->madv != I915_MADV_WILLNEED) {
2518                 DRM_ERROR("Attempting to bind a purgeable object\n");
2519                 return -EINVAL;
2520         }
2521
2522         fence_size = i915_gem_get_gtt_size(dev,
2523                                            obj->base.size,
2524                                            obj->tiling_mode);
2525         fence_alignment = i915_gem_get_gtt_alignment(dev,
2526                                                      obj->base.size,
2527                                                      obj->tiling_mode);
2528         unfenced_alignment =
2529                 i915_gem_get_unfenced_gtt_alignment(dev,
2530                                                     obj->base.size,
2531                                                     obj->tiling_mode);
2532
2533         if (alignment == 0)
2534                 alignment = map_and_fenceable ? fence_alignment :
2535                                                 unfenced_alignment;
2536         if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2537                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2538                 return -EINVAL;
2539         }
2540
2541         size = map_and_fenceable ? fence_size : obj->base.size;
2542
2543         /* If the object is bigger than the entire aperture, reject it early
2544          * before evicting everything in a vain attempt to find space.
2545          */
2546         if (obj->base.size >
2547             (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2548                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2549                 return -E2BIG;
2550         }
2551
2552  search_free:
2553         if (map_and_fenceable)
2554                 free_space =
2555                         drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2556                                                     size, alignment, 0,
2557                                                     dev_priv->mm.gtt_mappable_end,
2558                                                     0);
2559         else
2560                 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2561                                                 size, alignment, 0);
2562
2563         if (free_space != NULL) {
2564                 if (map_and_fenceable)
2565                         obj->gtt_space =
2566                                 drm_mm_get_block_range_generic(free_space,
2567                                                                size, alignment, 0,
2568                                                                dev_priv->mm.gtt_mappable_end,
2569                                                                0);
2570                 else
2571                         obj->gtt_space =
2572                                 drm_mm_get_block(free_space, size, alignment);
2573         }
2574         if (obj->gtt_space == NULL) {
2575                 /* If the gtt is empty and we're still having trouble
2576                  * fitting our object in, we're out of memory.
2577                  */
2578                 ret = i915_gem_evict_something(dev, size, alignment,
2579                                                map_and_fenceable);
2580                 if (ret)
2581                         return ret;
2582
2583                 goto search_free;
2584         }
2585
2586         ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2587         if (ret) {
2588                 drm_mm_put_block(obj->gtt_space);
2589                 obj->gtt_space = NULL;
2590
2591                 if (ret == -ENOMEM) {
2592                         /* first try to reclaim some memory by clearing the GTT */
2593                         ret = i915_gem_evict_everything(dev, false);
2594                         if (ret) {
2595                                 /* now try to shrink everyone else */
2596                                 if (gfpmask) {
2597                                         gfpmask = 0;
2598                                         goto search_free;
2599                                 }
2600
2601                                 return -ENOMEM;
2602                         }
2603
2604                         goto search_free;
2605                 }
2606
2607                 return ret;
2608         }
2609
2610         ret = i915_gem_gtt_prepare_object(obj);
2611         if (ret) {
2612                 i915_gem_object_put_pages_gtt(obj);
2613                 drm_mm_put_block(obj->gtt_space);
2614                 obj->gtt_space = NULL;
2615
2616                 if (i915_gem_evict_everything(dev, false))
2617                         return ret;
2618
2619                 goto search_free;
2620         }
2621
2622         if (!dev_priv->mm.aliasing_ppgtt)
2623                 i915_gem_gtt_bind_object(obj, obj->cache_level);
2624
2625         list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2626         list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2627
2628         /* Assert that the object is not currently in any GPU domain. As it
2629          * wasn't in the GTT, there shouldn't be any way it could have been in
2630          * a GPU cache
2631          */
2632         BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2633         BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2634
2635         obj->gtt_offset = obj->gtt_space->start;
2636
2637         fenceable =
2638                 obj->gtt_space->size == fence_size &&
2639                 (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2640
2641         mappable =
2642                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2643
2644         obj->map_and_fenceable = mappable && fenceable;
2645
2646         trace_i915_gem_object_bind(obj, map_and_fenceable);
2647         return 0;
2648 }
2649
2650 void
2651 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2652 {
2653         /* If we don't have a page list set up, then we're not pinned
2654          * to GPU, and we can ignore the cache flush because it'll happen
2655          * again at bind time.
2656          */
2657         if (obj->pages == NULL)
2658                 return;
2659
2660         /* If the GPU is snooping the contents of the CPU cache,
2661          * we do not need to manually clear the CPU cache lines.  However,
2662          * the caches are only snooped when the render cache is
2663          * flushed/invalidated.  As we always have to emit invalidations
2664          * and flushes when moving into and out of the RENDER domain, correct
2665          * snooping behaviour occurs naturally as the result of our domain
2666          * tracking.
2667          */
2668         if (obj->cache_level != I915_CACHE_NONE)
2669                 return;
2670
2671         trace_i915_gem_object_clflush(obj);
2672
2673         drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2674 }
2675
2676 /** Flushes any GPU write domain for the object if it's dirty. */
2677 static int
2678 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2679 {
2680         if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2681                 return 0;
2682
2683         /* Queue the GPU write cache flushing we need. */
2684         return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2685 }
2686
2687 /** Flushes the GTT write domain for the object if it's dirty. */
2688 static void
2689 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2690 {
2691         uint32_t old_write_domain;
2692
2693         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2694                 return;
2695
2696         /* No actual flushing is required for the GTT write domain.  Writes
2697          * to it immediately go to main memory as far as we know, so there's
2698          * no chipset flush.  It also doesn't land in render cache.
2699          *
2700          * However, we do have to enforce the order so that all writes through
2701          * the GTT land before any writes to the device, such as updates to
2702          * the GATT itself.
2703          */
2704         wmb();
2705
2706         old_write_domain = obj->base.write_domain;
2707         obj->base.write_domain = 0;
2708
2709         trace_i915_gem_object_change_domain(obj,
2710                                             obj->base.read_domains,
2711                                             old_write_domain);
2712 }
2713
2714 /** Flushes the CPU write domain for the object if it's dirty. */
2715 static void
2716 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2717 {
2718         uint32_t old_write_domain;
2719
2720         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2721                 return;
2722
2723         i915_gem_clflush_object(obj);
2724         intel_gtt_chipset_flush();
2725         old_write_domain = obj->base.write_domain;
2726         obj->base.write_domain = 0;
2727
2728         trace_i915_gem_object_change_domain(obj,
2729                                             obj->base.read_domains,
2730                                             old_write_domain);
2731 }
2732
2733 /**
2734  * Moves a single object to the GTT read, and possibly write domain.
2735  *
2736  * This function returns when the move is complete, including waiting on
2737  * flushes to occur.
2738  */
2739 int
2740 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2741 {
2742         drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2743         uint32_t old_write_domain, old_read_domains;
2744         int ret;
2745
2746         /* Not valid to be called on unbound objects. */
2747         if (obj->gtt_space == NULL)
2748                 return -EINVAL;
2749
2750         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2751                 return 0;
2752
2753         ret = i915_gem_object_flush_gpu_write_domain(obj);
2754         if (ret)
2755                 return ret;
2756
2757         if (obj->pending_gpu_write || write) {
2758                 ret = i915_gem_object_wait_rendering(obj);
2759                 if (ret)
2760                         return ret;
2761         }
2762
2763         i915_gem_object_flush_cpu_write_domain(obj);
2764
2765         old_write_domain = obj->base.write_domain;
2766         old_read_domains = obj->base.read_domains;
2767
2768         /* It should now be out of any other write domains, and we can update
2769          * the domain values for our changes.
2770          */
2771         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2772         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2773         if (write) {
2774                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2775                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2776                 obj->dirty = 1;
2777         }
2778
2779         trace_i915_gem_object_change_domain(obj,
2780                                             old_read_domains,
2781                                             old_write_domain);
2782
2783         /* And bump the LRU for this access */
2784         if (i915_gem_object_is_inactive(obj))
2785                 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2786
2787         return 0;
2788 }
2789
2790 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2791                                     enum i915_cache_level cache_level)
2792 {
2793         struct drm_device *dev = obj->base.dev;
2794         drm_i915_private_t *dev_priv = dev->dev_private;
2795         int ret;
2796
2797         if (obj->cache_level == cache_level)
2798                 return 0;
2799
2800         if (obj->pin_count) {
2801                 DRM_DEBUG("can not change the cache level of pinned objects\n");
2802                 return -EBUSY;
2803         }
2804
2805         if (obj->gtt_space) {
2806                 ret = i915_gem_object_finish_gpu(obj);
2807                 if (ret)
2808                         return ret;
2809
2810                 i915_gem_object_finish_gtt(obj);
2811
2812                 /* Before SandyBridge, you could not use tiling or fence
2813                  * registers with snooped memory, so relinquish any fences
2814                  * currently pointing to our region in the aperture.
2815                  */
2816                 if (INTEL_INFO(obj->base.dev)->gen < 6) {
2817                         ret = i915_gem_object_put_fence(obj);
2818                         if (ret)
2819                                 return ret;
2820                 }
2821
2822                 if (obj->has_global_gtt_mapping)
2823                         i915_gem_gtt_bind_object(obj, cache_level);
2824                 if (obj->has_aliasing_ppgtt_mapping)
2825                         i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2826                                                obj, cache_level);
2827         }
2828
2829         if (cache_level == I915_CACHE_NONE) {
2830                 u32 old_read_domains, old_write_domain;
2831
2832                 /* If we're coming from LLC cached, then we haven't
2833                  * actually been tracking whether the data is in the
2834                  * CPU cache or not, since we only allow one bit set
2835                  * in obj->write_domain and have been skipping the clflushes.
2836                  * Just set it to the CPU cache for now.
2837                  */
2838                 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
2839                 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
2840
2841                 old_read_domains = obj->base.read_domains;
2842                 old_write_domain = obj->base.write_domain;
2843
2844                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2845                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2846
2847                 trace_i915_gem_object_change_domain(obj,
2848                                                     old_read_domains,
2849                                                     old_write_domain);
2850         }
2851
2852         obj->cache_level = cache_level;
2853         return 0;
2854 }
2855
2856 /*
2857  * Prepare buffer for display plane (scanout, cursors, etc).
2858  * Can be called from an uninterruptible phase (modesetting) and allows
2859  * any flushes to be pipelined (for pageflips).
2860  */
2861 int
2862 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
2863                                      u32 alignment,
2864                                      struct intel_ring_buffer *pipelined)
2865 {
2866         u32 old_read_domains, old_write_domain;
2867         int ret;
2868
2869         ret = i915_gem_object_flush_gpu_write_domain(obj);
2870         if (ret)
2871                 return ret;
2872
2873         if (pipelined != obj->ring) {
2874                 ret = i915_gem_object_sync(obj, pipelined);
2875                 if (ret)
2876                         return ret;
2877         }
2878
2879         /* The display engine is not coherent with the LLC cache on gen6.  As
2880          * a result, we make sure that the pinning that is about to occur is
2881          * done with uncached PTEs. This is lowest common denominator for all
2882          * chipsets.
2883          *
2884          * However for gen6+, we could do better by using the GFDT bit instead
2885          * of uncaching, which would allow us to flush all the LLC-cached data
2886          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
2887          */
2888         ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
2889         if (ret)
2890                 return ret;
2891
2892         /* As the user may map the buffer once pinned in the display plane
2893          * (e.g. libkms for the bootup splash), we have to ensure that we
2894          * always use map_and_fenceable for all scanout buffers.
2895          */
2896         ret = i915_gem_object_pin(obj, alignment, true);
2897         if (ret)
2898                 return ret;
2899
2900         i915_gem_object_flush_cpu_write_domain(obj);
2901
2902         old_write_domain = obj->base.write_domain;
2903         old_read_domains = obj->base.read_domains;
2904
2905         /* It should now be out of any other write domains, and we can update
2906          * the domain values for our changes.
2907          */
2908         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2909         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2910
2911         trace_i915_gem_object_change_domain(obj,
2912                                             old_read_domains,
2913                                             old_write_domain);
2914
2915         return 0;
2916 }
2917
2918 int
2919 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
2920 {
2921         int ret;
2922
2923         if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
2924                 return 0;
2925
2926         if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2927                 ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2928                 if (ret)
2929                         return ret;
2930         }
2931
2932         ret = i915_gem_object_wait_rendering(obj);
2933         if (ret)
2934                 return ret;
2935
2936         /* Ensure that we invalidate the GPU's caches and TLBs. */
2937         obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2938         return 0;
2939 }
2940
2941 /**
2942  * Moves a single object to the CPU read, and possibly write domain.
2943  *
2944  * This function returns when the move is complete, including waiting on
2945  * flushes to occur.
2946  */
2947 int
2948 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2949 {
2950         uint32_t old_write_domain, old_read_domains;
2951         int ret;
2952
2953         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
2954                 return 0;
2955
2956         ret = i915_gem_object_flush_gpu_write_domain(obj);
2957         if (ret)
2958                 return ret;
2959
2960         if (write || obj->pending_gpu_write) {
2961                 ret = i915_gem_object_wait_rendering(obj);
2962                 if (ret)
2963                         return ret;
2964         }
2965
2966         i915_gem_object_flush_gtt_write_domain(obj);
2967
2968         old_write_domain = obj->base.write_domain;
2969         old_read_domains = obj->base.read_domains;
2970
2971         /* Flush the CPU cache if it's still invalid. */
2972         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2973                 i915_gem_clflush_object(obj);
2974
2975                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2976         }
2977
2978         /* It should now be out of any other write domains, and we can update
2979          * the domain values for our changes.
2980          */
2981         BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2982
2983         /* If we're writing through the CPU, then the GPU read domains will
2984          * need to be invalidated at next use.
2985          */
2986         if (write) {
2987                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2988                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2989         }
2990
2991         trace_i915_gem_object_change_domain(obj,
2992                                             old_read_domains,
2993                                             old_write_domain);
2994
2995         return 0;
2996 }
2997
2998 /* Throttle our rendering by waiting until the ring has completed our requests
2999  * emitted over 20 msec ago.
3000  *
3001  * Note that if we were to use the current jiffies each time around the loop,
3002  * we wouldn't escape the function with any frames outstanding if the time to
3003  * render a frame was over 20ms.
3004  *
3005  * This should get us reasonable parallelism between CPU and GPU but also
3006  * relatively low latency when blocking on a particular request to finish.
3007  */
3008 static int
3009 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3010 {
3011         struct drm_i915_private *dev_priv = dev->dev_private;
3012         struct drm_i915_file_private *file_priv = file->driver_priv;
3013         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3014         struct drm_i915_gem_request *request;
3015         struct intel_ring_buffer *ring = NULL;
3016         u32 seqno = 0;
3017         int ret;
3018
3019         if (atomic_read(&dev_priv->mm.wedged))
3020                 return -EIO;
3021
3022         spin_lock(&file_priv->mm.lock);
3023         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3024                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3025                         break;
3026
3027                 ring = request->ring;
3028                 seqno = request->seqno;
3029         }
3030         spin_unlock(&file_priv->mm.lock);
3031
3032         if (seqno == 0)
3033                 return 0;
3034
3035         ret = __wait_seqno(ring, seqno, true);
3036         if (ret == 0)
3037                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3038
3039         return ret;
3040 }
3041
3042 int
3043 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3044                     uint32_t alignment,
3045                     bool map_and_fenceable)
3046 {
3047         int ret;
3048
3049         BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3050
3051         if (obj->gtt_space != NULL) {
3052                 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3053                     (map_and_fenceable && !obj->map_and_fenceable)) {
3054                         WARN(obj->pin_count,
3055                              "bo is already pinned with incorrect alignment:"
3056                              " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3057                              " obj->map_and_fenceable=%d\n",
3058                              obj->gtt_offset, alignment,
3059                              map_and_fenceable,
3060                              obj->map_and_fenceable);
3061                         ret = i915_gem_object_unbind(obj);
3062                         if (ret)
3063                                 return ret;
3064                 }
3065         }
3066
3067         if (obj->gtt_space == NULL) {
3068                 ret = i915_gem_object_bind_to_gtt(obj, alignment,
3069                                                   map_and_fenceable);
3070                 if (ret)
3071                         return ret;
3072         }
3073
3074         if (!obj->has_global_gtt_mapping && map_and_fenceable)
3075                 i915_gem_gtt_bind_object(obj, obj->cache_level);
3076
3077         obj->pin_count++;
3078         obj->pin_mappable |= map_and_fenceable;
3079
3080         return 0;
3081 }
3082
3083 void
3084 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3085 {
3086         BUG_ON(obj->pin_count == 0);
3087         BUG_ON(obj->gtt_space == NULL);
3088
3089         if (--obj->pin_count == 0)
3090                 obj->pin_mappable = false;
3091 }
3092
3093 int
3094 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3095                    struct drm_file *file)
3096 {
3097         struct drm_i915_gem_pin *args = data;
3098         struct drm_i915_gem_object *obj;
3099         int ret;
3100
3101         ret = i915_mutex_lock_interruptible(dev);
3102         if (ret)
3103                 return ret;
3104
3105         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3106         if (&obj->base == NULL) {
3107                 ret = -ENOENT;
3108                 goto unlock;
3109         }
3110
3111         if (obj->madv != I915_MADV_WILLNEED) {
3112                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3113                 ret = -EINVAL;
3114                 goto out;
3115         }
3116
3117         if (obj->pin_filp != NULL && obj->pin_filp != file) {
3118                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3119                           args->handle);
3120                 ret = -EINVAL;
3121                 goto out;
3122         }
3123
3124         obj->user_pin_count++;
3125         obj->pin_filp = file;
3126         if (obj->user_pin_count == 1) {
3127                 ret = i915_gem_object_pin(obj, args->alignment, true);
3128                 if (ret)
3129                         goto out;
3130         }
3131
3132         /* XXX - flush the CPU caches for pinned objects
3133          * as the X server doesn't manage domains yet
3134          */
3135         i915_gem_object_flush_cpu_write_domain(obj);
3136         args->offset = obj->gtt_offset;
3137 out:
3138         drm_gem_object_unreference(&obj->base);
3139 unlock:
3140         mutex_unlock(&dev->struct_mutex);
3141         return ret;
3142 }
3143
3144 int
3145 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3146                      struct drm_file *file)
3147 {
3148         struct drm_i915_gem_pin *args = data;
3149         struct drm_i915_gem_object *obj;
3150         int ret;
3151
3152         ret = i915_mutex_lock_interruptible(dev);
3153         if (ret)
3154                 return ret;
3155
3156         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3157         if (&obj->base == NULL) {
3158                 ret = -ENOENT;
3159                 goto unlock;
3160         }
3161
3162         if (obj->pin_filp != file) {
3163                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3164                           args->handle);
3165                 ret = -EINVAL;
3166                 goto out;
3167         }
3168         obj->user_pin_count--;
3169         if (obj->user_pin_count == 0) {
3170                 obj->pin_filp = NULL;
3171                 i915_gem_object_unpin(obj);
3172         }
3173
3174 out:
3175         drm_gem_object_unreference(&obj->base);
3176 unlock:
3177         mutex_unlock(&dev->struct_mutex);
3178         return ret;
3179 }
3180
3181 int
3182 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3183                     struct drm_file *file)
3184 {
3185         struct drm_i915_gem_busy *args = data;
3186         struct drm_i915_gem_object *obj;
3187         int ret;
3188
3189         ret = i915_mutex_lock_interruptible(dev);
3190         if (ret)
3191                 return ret;
3192
3193         obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3194         if (&obj->base == NULL) {
3195                 ret = -ENOENT;
3196                 goto unlock;
3197         }
3198
3199         /* Count all active objects as busy, even if they are currently not used
3200          * by the gpu. Users of this interface expect objects to eventually
3201          * become non-busy without any further actions, therefore emit any
3202          * necessary flushes here.
3203          */
3204         args->busy = obj->active;
3205         if (args->busy) {
3206                 /* Unconditionally flush objects, even when the gpu still uses this
3207                  * object. Userspace calling this function indicates that it wants to
3208                  * use this buffer rather sooner than later, so issuing the required
3209                  * flush earlier is beneficial.
3210                  */
3211                 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3212                         ret = i915_gem_flush_ring(obj->ring,
3213                                                   0, obj->base.write_domain);
3214                 } else {
3215                         ret = i915_gem_check_olr(obj->ring,
3216                                                  obj->last_rendering_seqno);
3217                 }
3218
3219                 /* Update the active list for the hardware's current position.
3220                  * Otherwise this only updates on a delayed timer or when irqs
3221                  * are actually unmasked, and our working set ends up being
3222                  * larger than required.
3223                  */
3224                 i915_gem_retire_requests_ring(obj->ring);
3225
3226                 args->busy = obj->active;
3227         }
3228
3229         drm_gem_object_unreference(&obj->base);
3230 unlock:
3231         mutex_unlock(&dev->struct_mutex);
3232         return ret;
3233 }
3234
3235 int
3236 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3237                         struct drm_file *file_priv)
3238 {
3239         return i915_gem_ring_throttle(dev, file_priv);
3240 }
3241
3242 int
3243 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3244                        struct drm_file *file_priv)
3245 {
3246         struct drm_i915_gem_madvise *args = data;
3247         struct drm_i915_gem_object *obj;
3248         int ret;
3249
3250         switch (args->madv) {
3251         case I915_MADV_DONTNEED:
3252         case I915_MADV_WILLNEED:
3253             break;
3254         default:
3255             return -EINVAL;
3256         }
3257
3258         ret = i915_mutex_lock_interruptible(dev);
3259         if (ret)
3260                 return ret;
3261
3262         obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3263         if (&obj->base == NULL) {
3264                 ret = -ENOENT;
3265                 goto unlock;
3266         }
3267
3268         if (obj->pin_count) {
3269                 ret = -EINVAL;
3270                 goto out;
3271         }
3272
3273         if (obj->madv != __I915_MADV_PURGED)
3274                 obj->madv = args->madv;
3275
3276         /* if the object is no longer bound, discard its backing storage */
3277         if (i915_gem_object_is_purgeable(obj) &&
3278             obj->gtt_space == NULL)
3279                 i915_gem_object_truncate(obj);
3280
3281         args->retained = obj->madv != __I915_MADV_PURGED;
3282
3283 out:
3284         drm_gem_object_unreference(&obj->base);
3285 unlock:
3286         mutex_unlock(&dev->struct_mutex);
3287         return ret;
3288 }
3289
3290 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3291                                                   size_t size)
3292 {
3293         struct drm_i915_private *dev_priv = dev->dev_private;
3294         struct drm_i915_gem_object *obj;
3295         struct address_space *mapping;
3296
3297         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3298         if (obj == NULL)
3299                 return NULL;
3300
3301         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3302                 kfree(obj);
3303                 return NULL;
3304         }
3305
3306         mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3307         mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3308
3309         i915_gem_info_add_obj(dev_priv, size);
3310
3311         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3312         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3313
3314         if (HAS_LLC(dev)) {
3315                 /* On some devices, we can have the GPU use the LLC (the CPU
3316                  * cache) for about a 10% performance improvement
3317                  * compared to uncached.  Graphics requests other than
3318                  * display scanout are coherent with the CPU in
3319                  * accessing this cache.  This means in this mode we
3320                  * don't need to clflush on the CPU side, and on the
3321                  * GPU side we only need to flush internal caches to
3322                  * get data visible to the CPU.
3323                  *
3324                  * However, we maintain the display planes as UC, and so
3325                  * need to rebind when first used as such.
3326                  */
3327                 obj->cache_level = I915_CACHE_LLC;
3328         } else
3329                 obj->cache_level = I915_CACHE_NONE;
3330
3331         obj->base.driver_private = NULL;
3332         obj->fence_reg = I915_FENCE_REG_NONE;
3333         INIT_LIST_HEAD(&obj->mm_list);
3334         INIT_LIST_HEAD(&obj->gtt_list);
3335         INIT_LIST_HEAD(&obj->ring_list);
3336         INIT_LIST_HEAD(&obj->exec_list);
3337         INIT_LIST_HEAD(&obj->gpu_write_list);
3338         obj->madv = I915_MADV_WILLNEED;
3339         /* Avoid an unnecessary call to unbind on the first bind. */
3340         obj->map_and_fenceable = true;
3341
3342         return obj;
3343 }
3344
3345 int i915_gem_init_object(struct drm_gem_object *obj)
3346 {
3347         BUG();
3348
3349         return 0;
3350 }
3351
3352 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3353 {
3354         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3355         struct drm_device *dev = obj->base.dev;
3356         drm_i915_private_t *dev_priv = dev->dev_private;
3357
3358         trace_i915_gem_object_destroy(obj);
3359
3360         if (gem_obj->import_attach)
3361                 drm_prime_gem_destroy(gem_obj, obj->sg_table);
3362
3363         if (obj->phys_obj)
3364                 i915_gem_detach_phys_object(dev, obj);
3365
3366         obj->pin_count = 0;
3367         if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3368                 bool was_interruptible;
3369
3370                 was_interruptible = dev_priv->mm.interruptible;
3371                 dev_priv->mm.interruptible = false;
3372
3373                 WARN_ON(i915_gem_object_unbind(obj));
3374
3375                 dev_priv->mm.interruptible = was_interruptible;
3376         }
3377
3378         if (obj->base.map_list.map)
3379                 drm_gem_free_mmap_offset(&obj->base);
3380
3381         drm_gem_object_release(&obj->base);
3382         i915_gem_info_remove_obj(dev_priv, obj->base.size);
3383
3384         kfree(obj->bit_17);
3385         kfree(obj);
3386 }
3387
3388 int
3389 i915_gem_idle(struct drm_device *dev)
3390 {
3391         drm_i915_private_t *dev_priv = dev->dev_private;
3392         int ret;
3393
3394         mutex_lock(&dev->struct_mutex);
3395
3396         if (dev_priv->mm.suspended) {
3397                 mutex_unlock(&dev->struct_mutex);
3398                 return 0;
3399         }
3400
3401         ret = i915_gpu_idle(dev);
3402         if (ret) {
3403                 mutex_unlock(&dev->struct_mutex);
3404                 return ret;
3405         }
3406         i915_gem_retire_requests(dev);
3407
3408         /* Under UMS, be paranoid and evict. */
3409         if (!drm_core_check_feature(dev, DRIVER_MODESET))
3410                 i915_gem_evict_everything(dev, false);
3411
3412         i915_gem_reset_fences(dev);
3413
3414         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
3415          * We need to replace this with a semaphore, or something.
3416          * And not confound mm.suspended!
3417          */
3418         dev_priv->mm.suspended = 1;
3419         del_timer_sync(&dev_priv->hangcheck_timer);
3420
3421         i915_kernel_lost_context(dev);
3422         i915_gem_cleanup_ringbuffer(dev);
3423
3424         mutex_unlock(&dev->struct_mutex);
3425
3426         /* Cancel the retire work handler, which should be idle now. */
3427         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3428
3429         return 0;
3430 }
3431
3432 void i915_gem_init_swizzling(struct drm_device *dev)
3433 {
3434         drm_i915_private_t *dev_priv = dev->dev_private;
3435
3436         if (INTEL_INFO(dev)->gen < 5 ||
3437             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3438                 return;
3439
3440         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3441                                  DISP_TILE_SURFACE_SWIZZLING);
3442
3443         if (IS_GEN5(dev))
3444                 return;
3445
3446         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3447         if (IS_GEN6(dev))
3448                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3449         else
3450                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3451 }
3452
3453 void i915_gem_init_ppgtt(struct drm_device *dev)
3454 {
3455         drm_i915_private_t *dev_priv = dev->dev_private;
3456         uint32_t pd_offset;
3457         struct intel_ring_buffer *ring;
3458         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
3459         uint32_t __iomem *pd_addr;
3460         uint32_t pd_entry;
3461         int i;
3462
3463         if (!dev_priv->mm.aliasing_ppgtt)
3464                 return;
3465
3466
3467         pd_addr = dev_priv->mm.gtt->gtt + ppgtt->pd_offset/sizeof(uint32_t);
3468         for (i = 0; i < ppgtt->num_pd_entries; i++) {
3469                 dma_addr_t pt_addr;
3470
3471                 if (dev_priv->mm.gtt->needs_dmar)
3472                         pt_addr = ppgtt->pt_dma_addr[i];
3473                 else
3474                         pt_addr = page_to_phys(ppgtt->pt_pages[i]);
3475
3476                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
3477                 pd_entry |= GEN6_PDE_VALID;
3478
3479                 writel(pd_entry, pd_addr + i);
3480         }
3481         readl(pd_addr);
3482
3483         pd_offset = ppgtt->pd_offset;
3484         pd_offset /= 64; /* in cachelines, */
3485         pd_offset <<= 16;
3486
3487         if (INTEL_INFO(dev)->gen == 6) {
3488                 uint32_t ecochk, gab_ctl, ecobits;
3489
3490                 ecobits = I915_READ(GAC_ECO_BITS);
3491                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
3492
3493                 gab_ctl = I915_READ(GAB_CTL);
3494                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
3495
3496                 ecochk = I915_READ(GAM_ECOCHK);
3497                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
3498                                        ECOCHK_PPGTT_CACHE64B);
3499                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3500         } else if (INTEL_INFO(dev)->gen >= 7) {
3501                 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
3502                 /* GFX_MODE is per-ring on gen7+ */
3503         }
3504
3505         for_each_ring(ring, dev_priv, i) {
3506                 if (INTEL_INFO(dev)->gen >= 7)
3507                         I915_WRITE(RING_MODE_GEN7(ring),
3508                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3509
3510                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
3511                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
3512         }
3513 }
3514
3515 int
3516 i915_gem_init_hw(struct drm_device *dev)
3517 {
3518         drm_i915_private_t *dev_priv = dev->dev_private;
3519         int ret;
3520
3521         i915_gem_init_swizzling(dev);
3522
3523         ret = intel_init_render_ring_buffer(dev);
3524         if (ret)
3525                 return ret;
3526
3527         if (HAS_BSD(dev)) {
3528                 ret = intel_init_bsd_ring_buffer(dev);
3529                 if (ret)
3530                         goto cleanup_render_ring;
3531         }
3532
3533         if (HAS_BLT(dev)) {
3534                 ret = intel_init_blt_ring_buffer(dev);
3535                 if (ret)
3536                         goto cleanup_bsd_ring;
3537         }
3538
3539         dev_priv->next_seqno = 1;
3540
3541         i915_gem_init_ppgtt(dev);
3542
3543         return 0;
3544
3545 cleanup_bsd_ring:
3546         intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3547 cleanup_render_ring:
3548         intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3549         return ret;
3550 }
3551
3552 static bool
3553 intel_enable_ppgtt(struct drm_device *dev)
3554 {
3555         if (i915_enable_ppgtt >= 0)
3556                 return i915_enable_ppgtt;
3557
3558 #ifdef CONFIG_INTEL_IOMMU
3559         /* Disable ppgtt on SNB if VT-d is on. */
3560         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
3561                 return false;
3562 #endif
3563
3564         return true;
3565 }
3566
3567 int i915_gem_init(struct drm_device *dev)
3568 {
3569         struct drm_i915_private *dev_priv = dev->dev_private;
3570         unsigned long gtt_size, mappable_size;
3571         int ret;
3572
3573         gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3574         mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3575
3576         mutex_lock(&dev->struct_mutex);
3577         if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3578                 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3579                  * aperture accordingly when using aliasing ppgtt. */
3580                 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3581
3582                 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3583
3584                 ret = i915_gem_init_aliasing_ppgtt(dev);
3585                 if (ret) {
3586                         mutex_unlock(&dev->struct_mutex);
3587                         return ret;
3588                 }
3589         } else {
3590                 /* Let GEM Manage all of the aperture.
3591                  *
3592                  * However, leave one page at the end still bound to the scratch
3593                  * page.  There are a number of places where the hardware
3594                  * apparently prefetches past the end of the object, and we've
3595                  * seen multiple hangs with the GPU head pointer stuck in a
3596                  * batchbuffer bound at the last page of the aperture.  One page
3597                  * should be enough to keep any prefetching inside of the
3598                  * aperture.
3599                  */
3600                 i915_gem_init_global_gtt(dev, 0, mappable_size,
3601                                          gtt_size);
3602         }
3603
3604         ret = i915_gem_init_hw(dev);
3605         mutex_unlock(&dev->struct_mutex);
3606         if (ret) {
3607                 i915_gem_cleanup_aliasing_ppgtt(dev);
3608                 return ret;
3609         }
3610
3611         /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3612         if (!drm_core_check_feature(dev, DRIVER_MODESET))
3613                 dev_priv->dri1.allow_batchbuffer = 1;
3614         return 0;
3615 }
3616
3617 void
3618 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3619 {
3620         drm_i915_private_t *dev_priv = dev->dev_private;
3621         struct intel_ring_buffer *ring;
3622         int i;
3623
3624         for_each_ring(ring, dev_priv, i)
3625                 intel_cleanup_ring_buffer(ring);
3626 }
3627
3628 int
3629 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3630                        struct drm_file *file_priv)
3631 {
3632         drm_i915_private_t *dev_priv = dev->dev_private;
3633         int ret;
3634
3635         if (drm_core_check_feature(dev, DRIVER_MODESET))
3636                 return 0;
3637
3638         if (atomic_read(&dev_priv->mm.wedged)) {
3639                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
3640                 atomic_set(&dev_priv->mm.wedged, 0);
3641         }
3642
3643         mutex_lock(&dev->struct_mutex);
3644         dev_priv->mm.suspended = 0;
3645
3646         ret = i915_gem_init_hw(dev);
3647         if (ret != 0) {
3648                 mutex_unlock(&dev->struct_mutex);
3649                 return ret;
3650         }
3651
3652         BUG_ON(!list_empty(&dev_priv->mm.active_list));
3653         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3654         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3655         mutex_unlock(&dev->struct_mutex);
3656
3657         ret = drm_irq_install(dev);
3658         if (ret)
3659                 goto cleanup_ringbuffer;
3660
3661         return 0;
3662
3663 cleanup_ringbuffer:
3664         mutex_lock(&dev->struct_mutex);
3665         i915_gem_cleanup_ringbuffer(dev);
3666         dev_priv->mm.suspended = 1;
3667         mutex_unlock(&dev->struct_mutex);
3668
3669         return ret;
3670 }
3671
3672 int
3673 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3674                        struct drm_file *file_priv)
3675 {
3676         if (drm_core_check_feature(dev, DRIVER_MODESET))
3677                 return 0;
3678
3679         drm_irq_uninstall(dev);
3680         return i915_gem_idle(dev);
3681 }
3682
3683 void
3684 i915_gem_lastclose(struct drm_device *dev)
3685 {
3686         int ret;
3687
3688         if (drm_core_check_feature(dev, DRIVER_MODESET))
3689                 return;
3690
3691         ret = i915_gem_idle(dev);
3692         if (ret)
3693                 DRM_ERROR("failed to idle hardware: %d\n", ret);
3694 }
3695
3696 static void
3697 init_ring_lists(struct intel_ring_buffer *ring)
3698 {
3699         INIT_LIST_HEAD(&ring->active_list);
3700         INIT_LIST_HEAD(&ring->request_list);
3701         INIT_LIST_HEAD(&ring->gpu_write_list);
3702 }
3703
3704 void
3705 i915_gem_load(struct drm_device *dev)
3706 {
3707         int i;
3708         drm_i915_private_t *dev_priv = dev->dev_private;
3709
3710         INIT_LIST_HEAD(&dev_priv->mm.active_list);
3711         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3712         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3713         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3714         INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3715         for (i = 0; i < I915_NUM_RINGS; i++)
3716                 init_ring_lists(&dev_priv->ring[i]);
3717         for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3718                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3719         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3720                           i915_gem_retire_work_handler);
3721         init_completion(&dev_priv->error_completion);
3722
3723         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3724         if (IS_GEN3(dev)) {
3725                 I915_WRITE(MI_ARB_STATE,
3726                            _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
3727         }
3728
3729         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3730
3731         /* Old X drivers will take 0-2 for front, back, depth buffers */
3732         if (!drm_core_check_feature(dev, DRIVER_MODESET))
3733                 dev_priv->fence_reg_start = 3;
3734
3735         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3736                 dev_priv->num_fence_regs = 16;
3737         else
3738                 dev_priv->num_fence_regs = 8;
3739
3740         /* Initialize fence registers to zero */
3741         i915_gem_reset_fences(dev);
3742
3743         i915_gem_detect_bit_6_swizzle(dev);
3744         init_waitqueue_head(&dev_priv->pending_flip_queue);
3745
3746         dev_priv->mm.interruptible = true;
3747
3748         dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3749         dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3750         register_shrinker(&dev_priv->mm.inactive_shrinker);
3751 }
3752
3753 /*
3754  * Create a physically contiguous memory object for this object
3755  * e.g. for cursor + overlay regs
3756  */
3757 static int i915_gem_init_phys_object(struct drm_device *dev,
3758                                      int id, int size, int align)
3759 {
3760         drm_i915_private_t *dev_priv = dev->dev_private;
3761         struct drm_i915_gem_phys_object *phys_obj;
3762         int ret;
3763
3764         if (dev_priv->mm.phys_objs[id - 1] || !size)
3765                 return 0;
3766
3767         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3768         if (!phys_obj)
3769                 return -ENOMEM;
3770
3771         phys_obj->id = id;
3772
3773         phys_obj->handle = drm_pci_alloc(dev, size, align);
3774         if (!phys_obj->handle) {
3775                 ret = -ENOMEM;
3776                 goto kfree_obj;
3777         }
3778 #ifdef CONFIG_X86
3779         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3780 #endif
3781
3782         dev_priv->mm.phys_objs[id - 1] = phys_obj;
3783
3784         return 0;
3785 kfree_obj:
3786         kfree(phys_obj);
3787         return ret;
3788 }
3789
3790 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3791 {
3792         drm_i915_private_t *dev_priv = dev->dev_private;
3793         struct drm_i915_gem_phys_object *phys_obj;
3794
3795         if (!dev_priv->mm.phys_objs[id - 1])
3796                 return;
3797
3798         phys_obj = dev_priv->mm.phys_objs[id - 1];
3799         if (phys_obj->cur_obj) {
3800                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3801         }
3802
3803 #ifdef CONFIG_X86
3804         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3805 #endif
3806         drm_pci_free(dev, phys_obj->handle);
3807         kfree(phys_obj);
3808         dev_priv->mm.phys_objs[id - 1] = NULL;
3809 }
3810
3811 void i915_gem_free_all_phys_object(struct drm_device *dev)
3812 {
3813         int i;
3814
3815         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3816                 i915_gem_free_phys_object(dev, i);
3817 }
3818
3819 void i915_gem_detach_phys_object(struct drm_device *dev,
3820                                  struct drm_i915_gem_object *obj)
3821 {
3822         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3823         char *vaddr;
3824         int i;
3825         int page_count;
3826
3827         if (!obj->phys_obj)
3828                 return;
3829         vaddr = obj->phys_obj->handle->vaddr;
3830
3831         page_count = obj->base.size / PAGE_SIZE;
3832         for (i = 0; i < page_count; i++) {
3833                 struct page *page = shmem_read_mapping_page(mapping, i);
3834                 if (!IS_ERR(page)) {
3835                         char *dst = kmap_atomic(page);
3836                         memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
3837                         kunmap_atomic(dst);
3838
3839                         drm_clflush_pages(&page, 1);
3840
3841                         set_page_dirty(page);
3842                         mark_page_accessed(page);
3843                         page_cache_release(page);
3844                 }
3845         }
3846         intel_gtt_chipset_flush();
3847
3848         obj->phys_obj->cur_obj = NULL;
3849         obj->phys_obj = NULL;
3850 }
3851
3852 int
3853 i915_gem_attach_phys_object(struct drm_device *dev,
3854                             struct drm_i915_gem_object *obj,
3855                             int id,
3856                             int align)
3857 {
3858         struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3859         drm_i915_private_t *dev_priv = dev->dev_private;
3860         int ret = 0;
3861         int page_count;
3862         int i;
3863
3864         if (id > I915_MAX_PHYS_OBJECT)
3865                 return -EINVAL;
3866
3867         if (obj->phys_obj) {
3868                 if (obj->phys_obj->id == id)
3869                         return 0;
3870                 i915_gem_detach_phys_object(dev, obj);
3871         }
3872
3873         /* create a new object */
3874         if (!dev_priv->mm.phys_objs[id - 1]) {
3875                 ret = i915_gem_init_phys_object(dev, id,
3876                                                 obj->base.size, align);
3877                 if (ret) {
3878                         DRM_ERROR("failed to init phys object %d size: %zu\n",
3879                                   id, obj->base.size);
3880                         return ret;
3881                 }
3882         }
3883
3884         /* bind to the object */
3885         obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
3886         obj->phys_obj->cur_obj = obj;
3887
3888         page_count = obj->base.size / PAGE_SIZE;
3889
3890         for (i = 0; i < page_count; i++) {
3891                 struct page *page;
3892                 char *dst, *src;
3893
3894                 page = shmem_read_mapping_page(mapping, i);
3895                 if (IS_ERR(page))
3896                         return PTR_ERR(page);
3897
3898                 src = kmap_atomic(page);
3899                 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
3900                 memcpy(dst, src, PAGE_SIZE);
3901                 kunmap_atomic(src);
3902
3903                 mark_page_accessed(page);
3904                 page_cache_release(page);
3905         }
3906
3907         return 0;
3908 }
3909
3910 static int
3911 i915_gem_phys_pwrite(struct drm_device *dev,
3912                      struct drm_i915_gem_object *obj,
3913                      struct drm_i915_gem_pwrite *args,
3914                      struct drm_file *file_priv)
3915 {
3916         void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
3917         char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
3918
3919         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
3920                 unsigned long unwritten;
3921
3922                 /* The physical object once assigned is fixed for the lifetime
3923                  * of the obj, so we can safely drop the lock and continue
3924                  * to access vaddr.
3925                  */
3926                 mutex_unlock(&dev->struct_mutex);
3927                 unwritten = copy_from_user(vaddr, user_data, args->size);
3928                 mutex_lock(&dev->struct_mutex);
3929                 if (unwritten)
3930                         return -EFAULT;
3931         }
3932
3933         intel_gtt_chipset_flush();
3934         return 0;
3935 }
3936
3937 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
3938 {
3939         struct drm_i915_file_private *file_priv = file->driver_priv;
3940
3941         /* Clean up our request list when the client is going away, so that
3942          * later retire_requests won't dereference our soon-to-be-gone
3943          * file_priv.
3944          */
3945         spin_lock(&file_priv->mm.lock);
3946         while (!list_empty(&file_priv->mm.request_list)) {
3947                 struct drm_i915_gem_request *request;
3948
3949                 request = list_first_entry(&file_priv->mm.request_list,
3950                                            struct drm_i915_gem_request,
3951                                            client_list);
3952                 list_del(&request->client_list);
3953                 request->file_priv = NULL;
3954         }
3955         spin_unlock(&file_priv->mm.lock);
3956 }
3957
3958 static int
3959 i915_gpu_is_active(struct drm_device *dev)
3960 {
3961         drm_i915_private_t *dev_priv = dev->dev_private;
3962         int lists_empty;
3963
3964         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
3965                       list_empty(&dev_priv->mm.active_list);
3966
3967         return !lists_empty;
3968 }
3969
3970 static int
3971 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
3972 {
3973         struct drm_i915_private *dev_priv =
3974                 container_of(shrinker,
3975                              struct drm_i915_private,
3976                              mm.inactive_shrinker);
3977         struct drm_device *dev = dev_priv->dev;
3978         struct drm_i915_gem_object *obj, *next;
3979         int nr_to_scan = sc->nr_to_scan;
3980         int cnt;
3981
3982         if (!mutex_trylock(&dev->struct_mutex))
3983                 return 0;
3984
3985         /* "fast-path" to count number of available objects */
3986         if (nr_to_scan == 0) {
3987                 cnt = 0;
3988                 list_for_each_entry(obj,
3989                                     &dev_priv->mm.inactive_list,
3990                                     mm_list)
3991                         cnt++;
3992                 mutex_unlock(&dev->struct_mutex);
3993                 return cnt / 100 * sysctl_vfs_cache_pressure;
3994         }
3995
3996 rescan:
3997         /* first scan for clean buffers */
3998         i915_gem_retire_requests(dev);
3999
4000         list_for_each_entry_safe(obj, next,
4001                                  &dev_priv->mm.inactive_list,
4002                                  mm_list) {
4003                 if (i915_gem_object_is_purgeable(obj)) {
4004                         if (i915_gem_object_unbind(obj) == 0 &&
4005                             --nr_to_scan == 0)
4006                                 break;
4007                 }
4008         }
4009
4010         /* second pass, evict/count anything still on the inactive list */
4011         cnt = 0;
4012         list_for_each_entry_safe(obj, next,
4013                                  &dev_priv->mm.inactive_list,
4014                                  mm_list) {
4015                 if (nr_to_scan &&
4016                     i915_gem_object_unbind(obj) == 0)
4017                         nr_to_scan--;
4018                 else
4019                         cnt++;
4020         }
4021
4022         if (nr_to_scan && i915_gpu_is_active(dev)) {
4023                 /*
4024                  * We are desperate for pages, so as a last resort, wait
4025                  * for the GPU to finish and discard whatever we can.
4026                  * This has a dramatic impact to reduce the number of
4027                  * OOM-killer events whilst running the GPU aggressively.
4028                  */
4029                 if (i915_gpu_idle(dev) == 0)
4030                         goto rescan;
4031         }
4032         mutex_unlock(&dev->struct_mutex);
4033         return cnt / 100 * sysctl_vfs_cache_pressure;
4034 }