drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include "drmP.h"
  29 #include "drm.h"
  30 #include "i915_drm.h"
  31 #include "i915_drv.h"
  32 #include "i915_trace.h"
  33 #include "intel_drv.h"
  34 #include <linux/slab.h>
  35 #include <linux/swap.h>
  36 #include <linux/pci.h>
  37 #include <linux/intel-gtt.h>
  38
  39 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
  40
  41 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
  42                                                   bool pipelined);
  43 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
  44 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
  45 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
  46                                              int write);
  47 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
  48                                                      uint64_t offset,
  49                                                      uint64_t size);
  50 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
  51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
  52                                           bool interruptible);
  53 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
  54                                        unsigned alignment, bool mappable);
  55 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
  56 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
  57                                 struct drm_i915_gem_pwrite *args,
  58                                 struct drm_file *file_priv);
  59 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
  60
  61 static int
  62 i915_gem_object_get_pages(struct drm_gem_object *obj,
  63                           gfp_t gfpmask);
  64
  65 static void
  66 i915_gem_object_put_pages(struct drm_gem_object *obj);
  67
  68 static LIST_HEAD(shrink_list);
  69 static DEFINE_SPINLOCK(shrink_list_lock);
  70
  71 /* some bookkeeping */
  72 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  73                                   size_t size)
  74 {
  75         dev_priv->mm.object_count++;
  76         dev_priv->mm.object_memory += size;
  77 }
  78
  79 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  80                                      size_t size)
  81 {
  82         dev_priv->mm.object_count--;
  83         dev_priv->mm.object_memory -= size;
  84 }
  85
  86 static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
  87                                   struct drm_gem_object *obj)
  88 {
  89         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
  90         dev_priv->mm.gtt_count++;
  91         dev_priv->mm.gtt_memory += obj->size;
  92         if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
  93                 dev_priv->mm.mappable_gtt_used +=
  94                         min_t(size_t, obj->size,
  95                               dev_priv->mm.gtt_mappable_end
  96                                         - obj_priv->gtt_offset);
  97         }
  98 }
  99
 100 static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
 101                                      struct drm_gem_object *obj)
 102 {
 103         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 104         dev_priv->mm.gtt_count--;
 105         dev_priv->mm.gtt_memory -= obj->size;
 106         if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
 107                 dev_priv->mm.mappable_gtt_used -=
 108                         min_t(size_t, obj->size,
 109                               dev_priv->mm.gtt_mappable_end
 110                                         - obj_priv->gtt_offset);
 111         }
 112 }
 113
 114 /**
 115  * Update the mappable working set counters. Call _only_ when there is a change
 116  * in one of (pin|fault)_mappable and update *_mappable _before_ calling.
 117  * @mappable: new state the changed mappable flag (either pin_ or fault_).
 118  */
 119 static void
 120 i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
 121                               struct drm_gem_object *obj,
 122                               bool mappable)
 123 {
 124         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 125
 126         if (mappable) {
 127                 if (obj_priv->pin_mappable && obj_priv->fault_mappable)
 128                         /* Combined state was already mappable. */
 129                         return;
 130                 dev_priv->mm.gtt_mappable_count++;
 131                 dev_priv->mm.gtt_mappable_memory += obj->size;
 132         } else {
 133                 if (obj_priv->pin_mappable || obj_priv->fault_mappable)
 134                         /* Combined state still mappable. */
 135                         return;
 136                 dev_priv->mm.gtt_mappable_count--;
 137                 dev_priv->mm.gtt_mappable_memory -= obj->size;
 138         }
 139 }
 140
 141 static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
 142                                   struct drm_gem_object *obj,
 143                                   bool mappable)
 144 {
 145         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 146         dev_priv->mm.pin_count++;
 147         dev_priv->mm.pin_memory += obj->size;
 148         if (mappable) {
 149                 obj_priv->pin_mappable = true;
 150                 i915_gem_info_update_mappable(dev_priv, obj, true);
 151         }
 152 }
 153
 154 static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
 155                                      struct drm_gem_object *obj)
 156 {
 157         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 158         dev_priv->mm.pin_count--;
 159         dev_priv->mm.pin_memory -= obj->size;
 160         if (obj_priv->pin_mappable) {
 161                 obj_priv->pin_mappable = false;
 162                 i915_gem_info_update_mappable(dev_priv, obj, false);
 163         }
 164 }
 165
 166 int
 167 i915_gem_check_is_wedged(struct drm_device *dev)
 168 {
 169         struct drm_i915_private *dev_priv = dev->dev_private;
 170         struct completion *x = &dev_priv->error_completion;
 171         unsigned long flags;
 172         int ret;
 173
 174         if (!atomic_read(&dev_priv->mm.wedged))
 175                 return 0;
 176
 177         ret = wait_for_completion_interruptible(x);
 178         if (ret)
 179                 return ret;
 180
 181         /* Success, we reset the GPU! */
 182         if (!atomic_read(&dev_priv->mm.wedged))
 183                 return 0;
 184
 185         /* GPU is hung, bump the completion count to account for
 186          * the token we just consumed so that we never hit zero and
 187          * end up waiting upon a subsequent completion event that
 188          * will never happen.
 189          */
 190         spin_lock_irqsave(&x->wait.lock, flags);
 191         x->done++;
 192         spin_unlock_irqrestore(&x->wait.lock, flags);
 193         return -EIO;
 194 }
 195
 196 static int i915_mutex_lock_interruptible(struct drm_device *dev)
 197 {
 198         struct drm_i915_private *dev_priv = dev->dev_private;
 199         int ret;
 200
 201         ret = i915_gem_check_is_wedged(dev);
 202         if (ret)
 203                 return ret;
 204
 205         ret = mutex_lock_interruptible(&dev->struct_mutex);
 206         if (ret)
 207                 return ret;
 208
 209         if (atomic_read(&dev_priv->mm.wedged)) {
 210                 mutex_unlock(&dev->struct_mutex);
 211                 return -EAGAIN;
 212         }
 213
 214         WARN_ON(i915_verify_lists(dev));
 215         return 0;
 216 }
 217
 218 static inline bool
 219 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
 220 {
 221         return obj_priv->gtt_space &&
 222                 !obj_priv->active &&
 223                 obj_priv->pin_count == 0;
 224 }
 225
 226 int i915_gem_do_init(struct drm_device *dev,
 227                      unsigned long start,
 228                      unsigned long mappable_end,
 229                      unsigned long end)
 230 {
 231         drm_i915_private_t *dev_priv = dev->dev_private;
 232
 233         if (start >= end ||
 234             (start & (PAGE_SIZE - 1)) != 0 ||
 235             (end & (PAGE_SIZE - 1)) != 0) {
 236                 return -EINVAL;
 237         }
 238
 239         drm_mm_init(&dev_priv->mm.gtt_space, start,
 240                     end - start);
 241
 242         dev_priv->mm.gtt_total = end - start;
 243         dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 244         dev_priv->mm.gtt_mappable_end = mappable_end;
 245
 246         return 0;
 247 }
 248
 249 int
 250 i915_gem_init_ioctl(struct drm_device *dev, void *data,
 251                     struct drm_file *file_priv)
 252 {
 253         struct drm_i915_gem_init *args = data;
 254         int ret;
 255
 256         mutex_lock(&dev->struct_mutex);
 257         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
 258         mutex_unlock(&dev->struct_mutex);
 259
 260         return ret;
 261 }
 262
 263 int
 264 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 265                             struct drm_file *file_priv)
 266 {
 267         struct drm_i915_private *dev_priv = dev->dev_private;
 268         struct drm_i915_gem_get_aperture *args = data;
 269
 270         if (!(dev->driver->driver_features & DRIVER_GEM))
 271                 return -ENODEV;
 272
 273         mutex_lock(&dev->struct_mutex);
 274         args->aper_size = dev_priv->mm.gtt_total;
 275         args->aper_available_size = args->aper_size - dev_priv->mm.pin_memory;
 276         mutex_unlock(&dev->struct_mutex);
 277
 278         return 0;
 279 }
 280
 281
 282 /**
 283  * Creates a new mm object and returns a handle to it.
 284  */
 285 int
 286 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 287                       struct drm_file *file_priv)
 288 {
 289         struct drm_i915_gem_create *args = data;
 290         struct drm_gem_object *obj;
 291         int ret;
 292         u32 handle;
 293
 294         args->size = roundup(args->size, PAGE_SIZE);
 295
 296         /* Allocate the new object */
 297         obj = i915_gem_alloc_object(dev, args->size);
 298         if (obj == NULL)
 299                 return -ENOMEM;
 300
 301         ret = drm_gem_handle_create(file_priv, obj, &handle);
 302         if (ret) {
 303                 drm_gem_object_release(obj);
 304                 i915_gem_info_remove_obj(dev->dev_private, obj->size);
 305                 kfree(obj);
 306                 return ret;
 307         }
 308
 309         /* drop reference from allocate - handle holds it now */
 310         drm_gem_object_unreference(obj);
 311         trace_i915_gem_object_create(obj);
 312
 313         args->handle = handle;
 314         return 0;
 315 }
 316
 317 static bool
 318 i915_gem_object_cpu_accessible(struct drm_i915_gem_object *obj)
 319 {
 320         struct drm_device *dev = obj->base.dev;
 321         drm_i915_private_t *dev_priv = dev->dev_private;
 322
 323         return obj->gtt_space == NULL ||
 324                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
 325 }
 326
 327 static inline int
 328 fast_shmem_read(struct page **pages,
 329                 loff_t page_base, int page_offset,
 330                 char __user *data,
 331                 int length)
 332 {
 333         char *vaddr;
 334         int ret;
 335
 336         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
 337         ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
 338         kunmap_atomic(vaddr);
 339
 340         return ret;
 341 }
 342
 343 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
 344 {
 345         drm_i915_private_t *dev_priv = obj->dev->dev_private;
 346         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 347
 348         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 349                 obj_priv->tiling_mode != I915_TILING_NONE;
 350 }
 351
 352 static inline void
 353 slow_shmem_copy(struct page *dst_page,
 354                 int dst_offset,
 355                 struct page *src_page,
 356                 int src_offset,
 357                 int length)
 358 {
 359         char *dst_vaddr, *src_vaddr;
 360
 361         dst_vaddr = kmap(dst_page);
 362         src_vaddr = kmap(src_page);
 363
 364         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
 365
 366         kunmap(src_page);
 367         kunmap(dst_page);
 368 }
 369
 370 static inline void
 371 slow_shmem_bit17_copy(struct page *gpu_page,
 372                       int gpu_offset,
 373                       struct page *cpu_page,
 374                       int cpu_offset,
 375                       int length,
 376                       int is_read)
 377 {
 378         char *gpu_vaddr, *cpu_vaddr;
 379
 380         /* Use the unswizzled path if this page isn't affected. */
 381         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
 382                 if (is_read)
 383                         return slow_shmem_copy(cpu_page, cpu_offset,
 384                                                gpu_page, gpu_offset, length);
 385                 else
 386                         return slow_shmem_copy(gpu_page, gpu_offset,
 387                                                cpu_page, cpu_offset, length);
 388         }
 389
 390         gpu_vaddr = kmap(gpu_page);
 391         cpu_vaddr = kmap(cpu_page);
 392
 393         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
 394          * XORing with the other bits (A9 for Y, A9 and A10 for X)
 395          */
 396         while (length > 0) {
 397                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 398                 int this_length = min(cacheline_end - gpu_offset, length);
 399                 int swizzled_gpu_offset = gpu_offset ^ 64;
 400
 401                 if (is_read) {
 402                         memcpy(cpu_vaddr + cpu_offset,
 403                                gpu_vaddr + swizzled_gpu_offset,
 404                                this_length);
 405                 } else {
 406                         memcpy(gpu_vaddr + swizzled_gpu_offset,
 407                                cpu_vaddr + cpu_offset,
 408                                this_length);
 409                 }
 410                 cpu_offset += this_length;
 411                 gpu_offset += this_length;
 412                 length -= this_length;
 413         }
 414
 415         kunmap(cpu_page);
 416         kunmap(gpu_page);
 417 }
 418
 419 /**
 420  * This is the fast shmem pread path, which attempts to copy_from_user directly
 421  * from the backing pages of the object to the user's address space.  On a
 422  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
 423  */
 424 static int
 425 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
 426                           struct drm_i915_gem_pread *args,
 427                           struct drm_file *file_priv)
 428 {
 429         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 430         ssize_t remain;
 431         loff_t offset, page_base;
 432         char __user *user_data;
 433         int page_offset, page_length;
 434
 435         user_data = (char __user *) (uintptr_t) args->data_ptr;
 436         remain = args->size;
 437
 438         obj_priv = to_intel_bo(obj);
 439         offset = args->offset;
 440
 441         while (remain > 0) {
 442                 /* Operation in this page
 443                  *
 444                  * page_base = page offset within aperture
 445                  * page_offset = offset within page
 446                  * page_length = bytes to copy for this page
 447                  */
 448                 page_base = (offset & ~(PAGE_SIZE-1));
 449                 page_offset = offset & (PAGE_SIZE-1);
 450                 page_length = remain;
 451                 if ((page_offset + remain) > PAGE_SIZE)
 452                         page_length = PAGE_SIZE - page_offset;
 453
 454                 if (fast_shmem_read(obj_priv->pages,
 455                                     page_base, page_offset,
 456                                     user_data, page_length))
 457                         return -EFAULT;
 458
 459                 remain -= page_length;
 460                 user_data += page_length;
 461                 offset += page_length;
 462         }
 463
 464         return 0;
 465 }
 466
 467 static int
 468 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
 469 {
 470         int ret;
 471
 472         ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
 473
 474         /* If we've insufficient memory to map in the pages, attempt
 475          * to make some space by throwing out some old buffers.
 476          */
 477         if (ret == -ENOMEM) {
 478                 struct drm_device *dev = obj->dev;
 479
 480                 ret = i915_gem_evict_something(dev, obj->size,
 481                                                i915_gem_get_gtt_alignment(obj),
 482                                                false);
 483                 if (ret)
 484                         return ret;
 485
 486                 ret = i915_gem_object_get_pages(obj, 0);
 487         }
 488
 489         return ret;
 490 }
 491
 492 /**
 493  * This is the fallback shmem pread path, which allocates temporary storage
 494  * in kernel space to copy_to_user into outside of the struct_mutex, so we
 495  * can copy out of the object's backing pages while holding the struct mutex
 496  * and not take page faults.
 497  */
 498 static int
 499 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 500                           struct drm_i915_gem_pread *args,
 501                           struct drm_file *file_priv)
 502 {
 503         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 504         struct mm_struct *mm = current->mm;
 505         struct page **user_pages;
 506         ssize_t remain;
 507         loff_t offset, pinned_pages, i;
 508         loff_t first_data_page, last_data_page, num_pages;
 509         int shmem_page_index, shmem_page_offset;
 510         int data_page_index,  data_page_offset;
 511         int page_length;
 512         int ret;
 513         uint64_t data_ptr = args->data_ptr;
 514         int do_bit17_swizzling;
 515
 516         remain = args->size;
 517
 518         /* Pin the user pages containing the data.  We can't fault while
 519          * holding the struct mutex, yet we want to hold it while
 520          * dereferencing the user data.
 521          */
 522         first_data_page = data_ptr / PAGE_SIZE;
 523         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 524         num_pages = last_data_page - first_data_page + 1;
 525
 526         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 527         if (user_pages == NULL)
 528                 return -ENOMEM;
 529
 530         mutex_unlock(&dev->struct_mutex);
 531         down_read(&mm->mmap_sem);
 532         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 533                                       num_pages, 1, 0, user_pages, NULL);
 534         up_read(&mm->mmap_sem);
 535         mutex_lock(&dev->struct_mutex);
 536         if (pinned_pages < num_pages) {
 537                 ret = -EFAULT;
 538                 goto out;
 539         }
 540
 541         ret = i915_gem_object_set_cpu_read_domain_range(obj,
 542                                                         args->offset,
 543                                                         args->size);
 544         if (ret)
 545                 goto out;
 546
 547         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 548
 549         obj_priv = to_intel_bo(obj);
 550         offset = args->offset;
 551
 552         while (remain > 0) {
 553                 /* Operation in this page
 554                  *
 555                  * shmem_page_index = page number within shmem file
 556                  * shmem_page_offset = offset within page in shmem file
 557                  * data_page_index = page number in get_user_pages return
 558                  * data_page_offset = offset with data_page_index page.
 559                  * page_length = bytes to copy for this page
 560                  */
 561                 shmem_page_index = offset / PAGE_SIZE;
 562                 shmem_page_offset = offset & ~PAGE_MASK;
 563                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 564                 data_page_offset = data_ptr & ~PAGE_MASK;
 565
 566                 page_length = remain;
 567                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
 568                         page_length = PAGE_SIZE - shmem_page_offset;
 569                 if ((data_page_offset + page_length) > PAGE_SIZE)
 570                         page_length = PAGE_SIZE - data_page_offset;
 571
 572                 if (do_bit17_swizzling) {
 573                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
 574                                               shmem_page_offset,
 575                                               user_pages[data_page_index],
 576                                               data_page_offset,
 577                                               page_length,
 578                                               1);
 579                 } else {
 580                         slow_shmem_copy(user_pages[data_page_index],
 581                                         data_page_offset,
 582                                         obj_priv->pages[shmem_page_index],
 583                                         shmem_page_offset,
 584                                         page_length);
 585                 }
 586
 587                 remain -= page_length;
 588                 data_ptr += page_length;
 589                 offset += page_length;
 590         }
 591
 592 out:
 593         for (i = 0; i < pinned_pages; i++) {
 594                 SetPageDirty(user_pages[i]);
 595                 page_cache_release(user_pages[i]);
 596         }
 597         drm_free_large(user_pages);
 598
 599         return ret;
 600 }
 601
 602 /**
 603  * Reads data from the object referenced by handle.
 604  *
 605  * On error, the contents of *data are undefined.
 606  */
 607 int
 608 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 609                      struct drm_file *file_priv)
 610 {
 611         struct drm_i915_gem_pread *args = data;
 612         struct drm_gem_object *obj;
 613         struct drm_i915_gem_object *obj_priv;
 614         int ret = 0;
 615
 616         ret = i915_mutex_lock_interruptible(dev);
 617         if (ret)
 618                 return ret;
 619
 620         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 621         if (obj == NULL) {
 622                 ret = -ENOENT;
 623                 goto unlock;
 624         }
 625         obj_priv = to_intel_bo(obj);
 626
 627         /* Bounds check source.  */
 628         if (args->offset > obj->size || args->size > obj->size - args->offset) {
 629                 ret = -EINVAL;
 630                 goto out;
 631         }
 632
 633         if (args->size == 0)
 634                 goto out;
 635
 636         if (!access_ok(VERIFY_WRITE,
 637                        (char __user *)(uintptr_t)args->data_ptr,
 638                        args->size)) {
 639                 ret = -EFAULT;
 640                 goto out;
 641         }
 642
 643         ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
 644                                        args->size);
 645         if (ret) {
 646                 ret = -EFAULT;
 647                 goto out;
 648         }
 649
 650         ret = i915_gem_object_get_pages_or_evict(obj);
 651         if (ret)
 652                 goto out;
 653
 654         ret = i915_gem_object_set_cpu_read_domain_range(obj,
 655                                                         args->offset,
 656                                                         args->size);
 657         if (ret)
 658                 goto out_put;
 659
 660         ret = -EFAULT;
 661         if (!i915_gem_object_needs_bit17_swizzle(obj))
 662                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
 663         if (ret == -EFAULT)
 664                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
 665
 666 out_put:
 667         i915_gem_object_put_pages(obj);
 668 out:
 669         drm_gem_object_unreference(obj);
 670 unlock:
 671         mutex_unlock(&dev->struct_mutex);
 672         return ret;
 673 }
 674
 675 /* This is the fast write path which cannot handle
 676  * page faults in the source data
 677  */
 678
 679 static inline int
 680 fast_user_write(struct io_mapping *mapping,
 681                 loff_t page_base, int page_offset,
 682                 char __user *user_data,
 683                 int length)
 684 {
 685         char *vaddr_atomic;
 686         unsigned long unwritten;
 687
 688         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 689         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 690                                                       user_data, length);
 691         io_mapping_unmap_atomic(vaddr_atomic);
 692         return unwritten;
 693 }
 694
 695 /* Here's the write path which can sleep for
 696  * page faults
 697  */
 698
 699 static inline void
 700 slow_kernel_write(struct io_mapping *mapping,
 701                   loff_t gtt_base, int gtt_offset,
 702                   struct page *user_page, int user_offset,
 703                   int length)
 704 {
 705         char __iomem *dst_vaddr;
 706         char *src_vaddr;
 707
 708         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
 709         src_vaddr = kmap(user_page);
 710
 711         memcpy_toio(dst_vaddr + gtt_offset,
 712                     src_vaddr + user_offset,
 713                     length);
 714
 715         kunmap(user_page);
 716         io_mapping_unmap(dst_vaddr);
 717 }
 718
 719 static inline int
 720 fast_shmem_write(struct page **pages,
 721                  loff_t page_base, int page_offset,
 722                  char __user *data,
 723                  int length)
 724 {
 725         char *vaddr;
 726         int ret;
 727
 728         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
 729         ret = __copy_from_user_inatomic(vaddr + page_offset, data, length);
 730         kunmap_atomic(vaddr);
 731
 732         return ret;
 733 }
 734
 735 /**
 736  * This is the fast pwrite path, where we copy the data directly from the
 737  * user into the GTT, uncached.
 738  */
 739 static int
 740 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
 741                          struct drm_i915_gem_pwrite *args,
 742                          struct drm_file *file_priv)
 743 {
 744         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 745         drm_i915_private_t *dev_priv = dev->dev_private;
 746         ssize_t remain;
 747         loff_t offset, page_base;
 748         char __user *user_data;
 749         int page_offset, page_length;
 750
 751         user_data = (char __user *) (uintptr_t) args->data_ptr;
 752         remain = args->size;
 753
 754         obj_priv = to_intel_bo(obj);
 755         offset = obj_priv->gtt_offset + args->offset;
 756
 757         while (remain > 0) {
 758                 /* Operation in this page
 759                  *
 760                  * page_base = page offset within aperture
 761                  * page_offset = offset within page
 762                  * page_length = bytes to copy for this page
 763                  */
 764                 page_base = (offset & ~(PAGE_SIZE-1));
 765                 page_offset = offset & (PAGE_SIZE-1);
 766                 page_length = remain;
 767                 if ((page_offset + remain) > PAGE_SIZE)
 768                         page_length = PAGE_SIZE - page_offset;
 769
 770                 /* If we get a fault while copying data, then (presumably) our
 771                  * source page isn't available.  Return the error and we'll
 772                  * retry in the slow path.
 773                  */
 774                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 775                                     page_offset, user_data, page_length))
 776
 777                         return -EFAULT;
 778
 779                 remain -= page_length;
 780                 user_data += page_length;
 781                 offset += page_length;
 782         }
 783
 784         return 0;
 785 }
 786
 787 /**
 788  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
 789  * the memory and maps it using kmap_atomic for copying.
 790  *
 791  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
 792  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
 793  */
 794 static int
 795 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 796                          struct drm_i915_gem_pwrite *args,
 797                          struct drm_file *file_priv)
 798 {
 799         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 800         drm_i915_private_t *dev_priv = dev->dev_private;
 801         ssize_t remain;
 802         loff_t gtt_page_base, offset;
 803         loff_t first_data_page, last_data_page, num_pages;
 804         loff_t pinned_pages, i;
 805         struct page **user_pages;
 806         struct mm_struct *mm = current->mm;
 807         int gtt_page_offset, data_page_offset, data_page_index, page_length;
 808         int ret;
 809         uint64_t data_ptr = args->data_ptr;
 810
 811         remain = args->size;
 812
 813         /* Pin the user pages containing the data.  We can't fault while
 814          * holding the struct mutex, and all of the pwrite implementations
 815          * want to hold it while dereferencing the user data.
 816          */
 817         first_data_page = data_ptr / PAGE_SIZE;
 818         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 819         num_pages = last_data_page - first_data_page + 1;
 820
 821         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 822         if (user_pages == NULL)
 823                 return -ENOMEM;
 824
 825         mutex_unlock(&dev->struct_mutex);
 826         down_read(&mm->mmap_sem);
 827         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 828                                       num_pages, 0, 0, user_pages, NULL);
 829         up_read(&mm->mmap_sem);
 830         mutex_lock(&dev->struct_mutex);
 831         if (pinned_pages < num_pages) {
 832                 ret = -EFAULT;
 833                 goto out_unpin_pages;
 834         }
 835
 836         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 837         if (ret)
 838                 goto out_unpin_pages;
 839
 840         obj_priv = to_intel_bo(obj);
 841         offset = obj_priv->gtt_offset + args->offset;
 842
 843         while (remain > 0) {
 844                 /* Operation in this page
 845                  *
 846                  * gtt_page_base = page offset within aperture
 847                  * gtt_page_offset = offset within page in aperture
 848                  * data_page_index = page number in get_user_pages return
 849                  * data_page_offset = offset with data_page_index page.
 850                  * page_length = bytes to copy for this page
 851                  */
 852                 gtt_page_base = offset & PAGE_MASK;
 853                 gtt_page_offset = offset & ~PAGE_MASK;
 854                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 855                 data_page_offset = data_ptr & ~PAGE_MASK;
 856
 857                 page_length = remain;
 858                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
 859                         page_length = PAGE_SIZE - gtt_page_offset;
 860                 if ((data_page_offset + page_length) > PAGE_SIZE)
 861                         page_length = PAGE_SIZE - data_page_offset;
 862
 863                 slow_kernel_write(dev_priv->mm.gtt_mapping,
 864                                   gtt_page_base, gtt_page_offset,
 865                                   user_pages[data_page_index],
 866                                   data_page_offset,
 867                                   page_length);
 868
 869                 remain -= page_length;
 870                 offset += page_length;
 871                 data_ptr += page_length;
 872         }
 873
 874 out_unpin_pages:
 875         for (i = 0; i < pinned_pages; i++)
 876                 page_cache_release(user_pages[i]);
 877         drm_free_large(user_pages);
 878
 879         return ret;
 880 }
 881
 882 /**
 883  * This is the fast shmem pwrite path, which attempts to directly
 884  * copy_from_user into the kmapped pages backing the object.
 885  */
 886 static int
 887 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
 888                            struct drm_i915_gem_pwrite *args,
 889                            struct drm_file *file_priv)
 890 {
 891         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 892         ssize_t remain;
 893         loff_t offset, page_base;
 894         char __user *user_data;
 895         int page_offset, page_length;
 896
 897         user_data = (char __user *) (uintptr_t) args->data_ptr;
 898         remain = args->size;
 899
 900         obj_priv = to_intel_bo(obj);
 901         offset = args->offset;
 902         obj_priv->dirty = 1;
 903
 904         while (remain > 0) {
 905                 /* Operation in this page
 906                  *
 907                  * page_base = page offset within aperture
 908                  * page_offset = offset within page
 909                  * page_length = bytes to copy for this page
 910                  */
 911                 page_base = (offset & ~(PAGE_SIZE-1));
 912                 page_offset = offset & (PAGE_SIZE-1);
 913                 page_length = remain;
 914                 if ((page_offset + remain) > PAGE_SIZE)
 915                         page_length = PAGE_SIZE - page_offset;
 916
 917                 if (fast_shmem_write(obj_priv->pages,
 918                                        page_base, page_offset,
 919                                        user_data, page_length))
 920                         return -EFAULT;
 921
 922                 remain -= page_length;
 923                 user_data += page_length;
 924                 offset += page_length;
 925         }
 926
 927         return 0;
 928 }
 929
 930 /**
 931  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
 932  * the memory and maps it using kmap_atomic for copying.
 933  *
 934  * This avoids taking mmap_sem for faulting on the user's address while the
 935  * struct_mutex is held.
 936  */
 937 static int
 938 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 939                            struct drm_i915_gem_pwrite *args,
 940                            struct drm_file *file_priv)
 941 {
 942         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 943         struct mm_struct *mm = current->mm;
 944         struct page **user_pages;
 945         ssize_t remain;
 946         loff_t offset, pinned_pages, i;
 947         loff_t first_data_page, last_data_page, num_pages;
 948         int shmem_page_index, shmem_page_offset;
 949         int data_page_index,  data_page_offset;
 950         int page_length;
 951         int ret;
 952         uint64_t data_ptr = args->data_ptr;
 953         int do_bit17_swizzling;
 954
 955         remain = args->size;
 956
 957         /* Pin the user pages containing the data.  We can't fault while
 958          * holding the struct mutex, and all of the pwrite implementations
 959          * want to hold it while dereferencing the user data.
 960          */
 961         first_data_page = data_ptr / PAGE_SIZE;
 962         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 963         num_pages = last_data_page - first_data_page + 1;
 964
 965         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 966         if (user_pages == NULL)
 967                 return -ENOMEM;
 968
 969         mutex_unlock(&dev->struct_mutex);
 970         down_read(&mm->mmap_sem);
 971         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 972                                       num_pages, 0, 0, user_pages, NULL);
 973         up_read(&mm->mmap_sem);
 974         mutex_lock(&dev->struct_mutex);
 975         if (pinned_pages < num_pages) {
 976                 ret = -EFAULT;
 977                 goto out;
 978         }
 979
 980         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 981         if (ret)
 982                 goto out;
 983
 984         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 985
 986         obj_priv = to_intel_bo(obj);
 987         offset = args->offset;
 988         obj_priv->dirty = 1;
 989
 990         while (remain > 0) {
 991                 /* Operation in this page
 992                  *
 993                  * shmem_page_index = page number within shmem file
 994                  * shmem_page_offset = offset within page in shmem file
 995                  * data_page_index = page number in get_user_pages return
 996                  * data_page_offset = offset with data_page_index page.
 997                  * page_length = bytes to copy for this page
 998                  */
 999                 shmem_page_index = offset / PAGE_SIZE;
1000                 shmem_page_offset = offset & ~PAGE_MASK;
1001                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
1002                 data_page_offset = data_ptr & ~PAGE_MASK;
1003
1004                 page_length = remain;
1005                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1006                         page_length = PAGE_SIZE - shmem_page_offset;
1007                 if ((data_page_offset + page_length) > PAGE_SIZE)
1008                         page_length = PAGE_SIZE - data_page_offset;
1009
1010                 if (do_bit17_swizzling) {
1011                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
1012                                               shmem_page_offset,
1013                                               user_pages[data_page_index],
1014                                               data_page_offset,
1015                                               page_length,
1016                                               0);
1017                 } else {
1018                         slow_shmem_copy(obj_priv->pages[shmem_page_index],
1019                                         shmem_page_offset,
1020                                         user_pages[data_page_index],
1021                                         data_page_offset,
1022                                         page_length);
1023                 }
1024
1025                 remain -= page_length;
1026                 data_ptr += page_length;
1027                 offset += page_length;
1028         }
1029
1030 out:
1031         for (i = 0; i < pinned_pages; i++)
1032                 page_cache_release(user_pages[i]);
1033         drm_free_large(user_pages);
1034
1035         return ret;
1036 }
1037
1038 /**
1039  * Writes data to the object referenced by handle.
1040  *
1041  * On error, the contents of the buffer that were to be modified are undefined.
1042  */
1043 int
1044 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1045                       struct drm_file *file)
1046 {
1047         struct drm_i915_gem_pwrite *args = data;
1048         struct drm_gem_object *obj;
1049         struct drm_i915_gem_object *obj_priv;
1050         int ret = 0;
1051
1052         ret = i915_mutex_lock_interruptible(dev);
1053         if (ret)
1054                 return ret;
1055
1056         obj = drm_gem_object_lookup(dev, file, args->handle);
1057         if (obj == NULL) {
1058                 ret = -ENOENT;
1059                 goto unlock;
1060         }
1061         obj_priv = to_intel_bo(obj);
1062
1063
1064         /* Bounds check destination. */
1065         if (args->offset > obj->size || args->size > obj->size - args->offset) {
1066                 ret = -EINVAL;
1067                 goto out;
1068         }
1069
1070         if (args->size == 0)
1071                 goto out;
1072
1073         if (!access_ok(VERIFY_READ,
1074                        (char __user *)(uintptr_t)args->data_ptr,
1075                        args->size)) {
1076                 ret = -EFAULT;
1077                 goto out;
1078         }
1079
1080         ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
1081                                       args->size);
1082         if (ret) {
1083                 ret = -EFAULT;
1084                 goto out;
1085         }
1086
1087         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1088          * it would end up going through the fenced access, and we'll get
1089          * different detiling behavior between reading and writing.
1090          * pread/pwrite currently are reading and writing from the CPU
1091          * perspective, requiring manual detiling by the client.
1092          */
1093         if (obj_priv->phys_obj)
1094                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
1095         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
1096                  obj_priv->gtt_space &&
1097                  obj->write_domain != I915_GEM_DOMAIN_CPU) {
1098                 ret = i915_gem_object_pin(obj, 0, true);
1099                 if (ret)
1100                         goto out;
1101
1102                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1103                 if (ret)
1104                         goto out_unpin;
1105
1106                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1107                 if (ret == -EFAULT)
1108                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
1109
1110 out_unpin:
1111                 i915_gem_object_unpin(obj);
1112         } else {
1113                 ret = i915_gem_object_get_pages_or_evict(obj);
1114                 if (ret)
1115                         goto out;
1116
1117                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1118                 if (ret)
1119                         goto out_put;
1120
1121                 ret = -EFAULT;
1122                 if (!i915_gem_object_needs_bit17_swizzle(obj))
1123                         ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1124                 if (ret == -EFAULT)
1125                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1126
1127 out_put:
1128                 i915_gem_object_put_pages(obj);
1129         }
1130
1131 out:
1132         drm_gem_object_unreference(obj);
1133 unlock:
1134         mutex_unlock(&dev->struct_mutex);
1135         return ret;
1136 }
1137
1138 /**
1139  * Called when user space prepares to use an object with the CPU, either
1140  * through the mmap ioctl's mapping or a GTT mapping.
1141  */
1142 int
1143 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1144                           struct drm_file *file_priv)
1145 {
1146         struct drm_i915_private *dev_priv = dev->dev_private;
1147         struct drm_i915_gem_set_domain *args = data;
1148         struct drm_gem_object *obj;
1149         struct drm_i915_gem_object *obj_priv;
1150         uint32_t read_domains = args->read_domains;
1151         uint32_t write_domain = args->write_domain;
1152         int ret;
1153
1154         if (!(dev->driver->driver_features & DRIVER_GEM))
1155                 return -ENODEV;
1156
1157         /* Only handle setting domains to types used by the CPU. */
1158         if (write_domain & I915_GEM_GPU_DOMAINS)
1159                 return -EINVAL;
1160
1161         if (read_domains & I915_GEM_GPU_DOMAINS)
1162                 return -EINVAL;
1163
1164         /* Having something in the write domain implies it's in the read
1165          * domain, and only that read domain.  Enforce that in the request.
1166          */
1167         if (write_domain != 0 && read_domains != write_domain)
1168                 return -EINVAL;
1169
1170         ret = i915_mutex_lock_interruptible(dev);
1171         if (ret)
1172                 return ret;
1173
1174         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1175         if (obj == NULL) {
1176                 ret = -ENOENT;
1177                 goto unlock;
1178         }
1179         obj_priv = to_intel_bo(obj);
1180
1181         intel_mark_busy(dev, obj);
1182
1183         if (read_domains & I915_GEM_DOMAIN_GTT) {
1184                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1185
1186                 /* Update the LRU on the fence for the CPU access that's
1187                  * about to occur.
1188                  */
1189                 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1190                         struct drm_i915_fence_reg *reg =
1191                                 &dev_priv->fence_regs[obj_priv->fence_reg];
1192                         list_move_tail(&reg->lru_list,
1193                                        &dev_priv->mm.fence_list);
1194                 }
1195
1196                 /* Silently promote "you're not bound, there was nothing to do"
1197                  * to success, since the client was just asking us to
1198                  * make sure everything was done.
1199                  */
1200                 if (ret == -EINVAL)
1201                         ret = 0;
1202         } else {
1203                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1204         }
1205
1206         /* Maintain LRU order of "inactive" objects */
1207         if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1208                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1209
1210         drm_gem_object_unreference(obj);
1211 unlock:
1212         mutex_unlock(&dev->struct_mutex);
1213         return ret;
1214 }
1215
1216 /**
1217  * Called when user space has done writes to this buffer
1218  */
1219 int
1220 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1221                       struct drm_file *file_priv)
1222 {
1223         struct drm_i915_gem_sw_finish *args = data;
1224         struct drm_gem_object *obj;
1225         int ret = 0;
1226
1227         if (!(dev->driver->driver_features & DRIVER_GEM))
1228                 return -ENODEV;
1229
1230         ret = i915_mutex_lock_interruptible(dev);
1231         if (ret)
1232                 return ret;
1233
1234         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1235         if (obj == NULL) {
1236                 ret = -ENOENT;
1237                 goto unlock;
1238         }
1239
1240         /* Pinned buffers may be scanout, so flush the cache */
1241         if (to_intel_bo(obj)->pin_count)
1242                 i915_gem_object_flush_cpu_write_domain(obj);
1243
1244         drm_gem_object_unreference(obj);
1245 unlock:
1246         mutex_unlock(&dev->struct_mutex);
1247         return ret;
1248 }
1249
1250 /**
1251  * Maps the contents of an object, returning the address it is mapped
1252  * into.
1253  *
1254  * While the mapping holds a reference on the contents of the object, it doesn't
1255  * imply a ref on the object itself.
1256  */
1257 int
1258 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1259                    struct drm_file *file_priv)
1260 {
1261         struct drm_i915_gem_mmap *args = data;
1262         struct drm_gem_object *obj;
1263         loff_t offset;
1264         unsigned long addr;
1265
1266         if (!(dev->driver->driver_features & DRIVER_GEM))
1267                 return -ENODEV;
1268
1269         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1270         if (obj == NULL)
1271                 return -ENOENT;
1272
1273         offset = args->offset;
1274
1275         down_write(&current->mm->mmap_sem);
1276         addr = do_mmap(obj->filp, 0, args->size,
1277                        PROT_READ | PROT_WRITE, MAP_SHARED,
1278                        args->offset);
1279         up_write(&current->mm->mmap_sem);
1280         drm_gem_object_unreference_unlocked(obj);
1281         if (IS_ERR((void *)addr))
1282                 return addr;
1283
1284         args->addr_ptr = (uint64_t) addr;
1285
1286         return 0;
1287 }
1288
1289 /**
1290  * i915_gem_fault - fault a page into the GTT
1291  * vma: VMA in question
1292  * vmf: fault info
1293  *
1294  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1295  * from userspace.  The fault handler takes care of binding the object to
1296  * the GTT (if needed), allocating and programming a fence register (again,
1297  * only if needed based on whether the old reg is still valid or the object
1298  * is tiled) and inserting a new PTE into the faulting process.
1299  *
1300  * Note that the faulting process may involve evicting existing objects
1301  * from the GTT and/or fence registers to make room.  So performance may
1302  * suffer if the GTT working set is large or there are few fence registers
1303  * left.
1304  */
1305 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1306 {
1307         struct drm_gem_object *obj = vma->vm_private_data;
1308         struct drm_device *dev = obj->dev;
1309         drm_i915_private_t *dev_priv = dev->dev_private;
1310         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1311         pgoff_t page_offset;
1312         unsigned long pfn;
1313         int ret = 0;
1314         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1315
1316         /* We don't use vmf->pgoff since that has the fake offset */
1317         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1318                 PAGE_SHIFT;
1319
1320         /* Now bind it into the GTT if needed */
1321         mutex_lock(&dev->struct_mutex);
1322         BUG_ON(obj_priv->pin_count && !obj_priv->pin_mappable);
1323         if (!i915_gem_object_cpu_accessible(obj_priv))
1324                 i915_gem_object_unbind(obj);
1325
1326         if (!obj_priv->gtt_space) {
1327                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1328                 if (ret)
1329                         goto unlock;
1330
1331                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1332                 if (ret)
1333                         goto unlock;
1334         }
1335
1336         if (!obj_priv->fault_mappable) {
1337                 obj_priv->fault_mappable = true;
1338                 i915_gem_info_update_mappable(dev_priv, obj, true);
1339         }
1340
1341         /* Need a new fence register? */
1342         if (obj_priv->tiling_mode != I915_TILING_NONE) {
1343                 ret = i915_gem_object_get_fence_reg(obj, true);
1344                 if (ret)
1345                         goto unlock;
1346         }
1347
1348         if (i915_gem_object_is_inactive(obj_priv))
1349                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1350
1351         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1352                 page_offset;
1353
1354         /* Finally, remap it using the new GTT offset */
1355         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1356 unlock:
1357         mutex_unlock(&dev->struct_mutex);
1358
1359         switch (ret) {
1360         case 0:
1361         case -ERESTARTSYS:
1362                 return VM_FAULT_NOPAGE;
1363         case -ENOMEM:
1364         case -EAGAIN:
1365                 return VM_FAULT_OOM;
1366         default:
1367                 return VM_FAULT_SIGBUS;
1368         }
1369 }
1370
1371 /**
1372  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1373  * @obj: obj in question
1374  *
1375  * GEM memory mapping works by handing back to userspace a fake mmap offset
1376  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1377  * up the object based on the offset and sets up the various memory mapping
1378  * structures.
1379  *
1380  * This routine allocates and attaches a fake offset for @obj.
1381  */
1382 static int
1383 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1384 {
1385         struct drm_device *dev = obj->dev;
1386         struct drm_gem_mm *mm = dev->mm_private;
1387         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1388         struct drm_map_list *list;
1389         struct drm_local_map *map;
1390         int ret = 0;
1391
1392         /* Set the object up for mmap'ing */
1393         list = &obj->map_list;
1394         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1395         if (!list->map)
1396                 return -ENOMEM;
1397
1398         map = list->map;
1399         map->type = _DRM_GEM;
1400         map->size = obj->size;
1401         map->handle = obj;
1402
1403         /* Get a DRM GEM mmap offset allocated... */
1404         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1405                                                     obj->size / PAGE_SIZE, 0, 0);
1406         if (!list->file_offset_node) {
1407                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1408                 ret = -ENOSPC;
1409                 goto out_free_list;
1410         }
1411
1412         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1413                                                   obj->size / PAGE_SIZE, 0);
1414         if (!list->file_offset_node) {
1415                 ret = -ENOMEM;
1416                 goto out_free_list;
1417         }
1418
1419         list->hash.key = list->file_offset_node->start;
1420         ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1421         if (ret) {
1422                 DRM_ERROR("failed to add to map hash\n");
1423                 goto out_free_mm;
1424         }
1425
1426         /* By now we should be all set, any drm_mmap request on the offset
1427          * below will get to our mmap & fault handler */
1428         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1429
1430         return 0;
1431
1432 out_free_mm:
1433         drm_mm_put_block(list->file_offset_node);
1434 out_free_list:
1435         kfree(list->map);
1436
1437         return ret;
1438 }
1439
1440 /**
1441  * i915_gem_release_mmap - remove physical page mappings
1442  * @obj: obj in question
1443  *
1444  * Preserve the reservation of the mmapping with the DRM core code, but
1445  * relinquish ownership of the pages back to the system.
1446  *
1447  * It is vital that we remove the page mapping if we have mapped a tiled
1448  * object through the GTT and then lose the fence register due to
1449  * resource pressure. Similarly if the object has been moved out of the
1450  * aperture, than pages mapped into userspace must be revoked. Removing the
1451  * mapping will then trigger a page fault on the next user access, allowing
1452  * fixup by i915_gem_fault().
1453  */
1454 void
1455 i915_gem_release_mmap(struct drm_gem_object *obj)
1456 {
1457         struct drm_device *dev = obj->dev;
1458         struct drm_i915_private *dev_priv = dev->dev_private;
1459         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1460
1461         if (dev->dev_mapping)
1462                 unmap_mapping_range(dev->dev_mapping,
1463                                     obj_priv->mmap_offset, obj->size, 1);
1464
1465         if (obj_priv->fault_mappable) {
1466                 obj_priv->fault_mappable = false;
1467                 i915_gem_info_update_mappable(dev_priv, obj, false);
1468         }
1469 }
1470
1471 static void
1472 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1473 {
1474         struct drm_device *dev = obj->dev;
1475         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1476         struct drm_gem_mm *mm = dev->mm_private;
1477         struct drm_map_list *list;
1478
1479         list = &obj->map_list;
1480         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1481
1482         if (list->file_offset_node) {
1483                 drm_mm_put_block(list->file_offset_node);
1484                 list->file_offset_node = NULL;
1485         }
1486
1487         if (list->map) {
1488                 kfree(list->map);
1489                 list->map = NULL;
1490         }
1491
1492         obj_priv->mmap_offset = 0;
1493 }
1494
1495 /**
1496  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1497  * @obj: object to check
1498  *
1499  * Return the required GTT alignment for an object, taking into account
1500  * potential fence register mapping if needed.
1501  */
1502 static uint32_t
1503 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1504 {
1505         struct drm_device *dev = obj->dev;
1506         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1507         int start, i;
1508
1509         /*
1510          * Minimum alignment is 4k (GTT page size), but might be greater
1511          * if a fence register is needed for the object.
1512          */
1513         if (INTEL_INFO(dev)->gen >= 4 || obj_priv->tiling_mode == I915_TILING_NONE)
1514                 return 4096;
1515
1516         /*
1517          * Previous chips need to be aligned to the size of the smallest
1518          * fence register that can contain the object.
1519          */
1520         if (INTEL_INFO(dev)->gen == 3)
1521                 start = 1024*1024;
1522         else
1523                 start = 512*1024;
1524
1525         for (i = start; i < obj->size; i <<= 1)
1526                 ;
1527
1528         return i;
1529 }
1530
1531 /**
1532  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1533  * @dev: DRM device
1534  * @data: GTT mapping ioctl data
1535  * @file_priv: GEM object info
1536  *
1537  * Simply returns the fake offset to userspace so it can mmap it.
1538  * The mmap call will end up in drm_gem_mmap(), which will set things
1539  * up so we can get faults in the handler above.
1540  *
1541  * The fault handler will take care of binding the object into the GTT
1542  * (since it may have been evicted to make room for something), allocating
1543  * a fence register, and mapping the appropriate aperture address into
1544  * userspace.
1545  */
1546 int
1547 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1548                         struct drm_file *file_priv)
1549 {
1550         struct drm_i915_gem_mmap_gtt *args = data;
1551         struct drm_gem_object *obj;
1552         struct drm_i915_gem_object *obj_priv;
1553         int ret;
1554
1555         if (!(dev->driver->driver_features & DRIVER_GEM))
1556                 return -ENODEV;
1557
1558         ret = i915_mutex_lock_interruptible(dev);
1559         if (ret)
1560                 return ret;
1561
1562         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1563         if (obj == NULL) {
1564                 ret = -ENOENT;
1565                 goto unlock;
1566         }
1567         obj_priv = to_intel_bo(obj);
1568
1569         if (obj_priv->madv != I915_MADV_WILLNEED) {
1570                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1571                 ret = -EINVAL;
1572                 goto out;
1573         }
1574
1575         if (!obj_priv->mmap_offset) {
1576                 ret = i915_gem_create_mmap_offset(obj);
1577                 if (ret)
1578                         goto out;
1579         }
1580
1581         args->offset = obj_priv->mmap_offset;
1582
1583         /*
1584          * Pull it into the GTT so that we have a page list (makes the
1585          * initial fault faster and any subsequent flushing possible).
1586          */
1587         if (!obj_priv->agp_mem) {
1588                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1589                 if (ret)
1590                         goto out;
1591         }
1592
1593 out:
1594         drm_gem_object_unreference(obj);
1595 unlock:
1596         mutex_unlock(&dev->struct_mutex);
1597         return ret;
1598 }
1599
1600 static void
1601 i915_gem_object_put_pages(struct drm_gem_object *obj)
1602 {
1603         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1604         int page_count = obj->size / PAGE_SIZE;
1605         int i;
1606
1607         BUG_ON(obj_priv->pages_refcount == 0);
1608         BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1609
1610         if (--obj_priv->pages_refcount != 0)
1611                 return;
1612
1613         if (obj_priv->tiling_mode != I915_TILING_NONE)
1614                 i915_gem_object_save_bit_17_swizzle(obj);
1615
1616         if (obj_priv->madv == I915_MADV_DONTNEED)
1617                 obj_priv->dirty = 0;
1618
1619         for (i = 0; i < page_count; i++) {
1620                 if (obj_priv->dirty)
1621                         set_page_dirty(obj_priv->pages[i]);
1622
1623                 if (obj_priv->madv == I915_MADV_WILLNEED)
1624                         mark_page_accessed(obj_priv->pages[i]);
1625
1626                 page_cache_release(obj_priv->pages[i]);
1627         }
1628         obj_priv->dirty = 0;
1629
1630         drm_free_large(obj_priv->pages);
1631         obj_priv->pages = NULL;
1632 }
1633
1634 static uint32_t
1635 i915_gem_next_request_seqno(struct drm_device *dev,
1636                             struct intel_ring_buffer *ring)
1637 {
1638         drm_i915_private_t *dev_priv = dev->dev_private;
1639
1640         ring->outstanding_lazy_request = true;
1641         return dev_priv->next_seqno;
1642 }
1643
1644 static void
1645 i915_gem_object_move_to_active(struct drm_gem_object *obj,
1646                                struct intel_ring_buffer *ring)
1647 {
1648         struct drm_device *dev = obj->dev;
1649         struct drm_i915_private *dev_priv = dev->dev_private;
1650         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1651         uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1652
1653         BUG_ON(ring == NULL);
1654         obj_priv->ring = ring;
1655
1656         /* Add a reference if we're newly entering the active list. */
1657         if (!obj_priv->active) {
1658                 drm_gem_object_reference(obj);
1659                 obj_priv->active = 1;
1660         }
1661
1662         /* Move from whatever list we were on to the tail of execution. */
1663         list_move_tail(&obj_priv->mm_list, &dev_priv->mm.active_list);
1664         list_move_tail(&obj_priv->ring_list, &ring->active_list);
1665         obj_priv->last_rendering_seqno = seqno;
1666 }
1667
1668 static void
1669 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1670 {
1671         struct drm_device *dev = obj->dev;
1672         drm_i915_private_t *dev_priv = dev->dev_private;
1673         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1674
1675         BUG_ON(!obj_priv->active);
1676         list_move_tail(&obj_priv->mm_list, &dev_priv->mm.flushing_list);
1677         list_del_init(&obj_priv->ring_list);
1678         obj_priv->last_rendering_seqno = 0;
1679 }
1680
1681 /* Immediately discard the backing storage */
1682 static void
1683 i915_gem_object_truncate(struct drm_gem_object *obj)
1684 {
1685         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1686         struct inode *inode;
1687
1688         /* Our goal here is to return as much of the memory as
1689          * is possible back to the system as we are called from OOM.
1690          * To do this we must instruct the shmfs to drop all of its
1691          * backing pages, *now*. Here we mirror the actions taken
1692          * when by shmem_delete_inode() to release the backing store.
1693          */
1694         inode = obj->filp->f_path.dentry->d_inode;
1695         truncate_inode_pages(inode->i_mapping, 0);
1696         if (inode->i_op->truncate_range)
1697                 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1698
1699         obj_priv->madv = __I915_MADV_PURGED;
1700 }
1701
1702 static inline int
1703 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1704 {
1705         return obj_priv->madv == I915_MADV_DONTNEED;
1706 }
1707
1708 static void
1709 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1710 {
1711         struct drm_device *dev = obj->dev;
1712         drm_i915_private_t *dev_priv = dev->dev_private;
1713         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1714
1715         if (obj_priv->pin_count != 0)
1716                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.pinned_list);
1717         else
1718                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1719         list_del_init(&obj_priv->ring_list);
1720
1721         BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1722
1723         obj_priv->last_rendering_seqno = 0;
1724         obj_priv->ring = NULL;
1725         if (obj_priv->active) {
1726                 obj_priv->active = 0;
1727                 drm_gem_object_unreference(obj);
1728         }
1729         WARN_ON(i915_verify_lists(dev));
1730 }
1731
1732 static void
1733 i915_gem_process_flushing_list(struct drm_device *dev,
1734                                uint32_t flush_domains,
1735                                struct intel_ring_buffer *ring)
1736 {
1737         drm_i915_private_t *dev_priv = dev->dev_private;
1738         struct drm_i915_gem_object *obj_priv, *next;
1739
1740         list_for_each_entry_safe(obj_priv, next,
1741                                  &ring->gpu_write_list,
1742                                  gpu_write_list) {
1743                 struct drm_gem_object *obj = &obj_priv->base;
1744
1745                 if (obj->write_domain & flush_domains) {
1746                         uint32_t old_write_domain = obj->write_domain;
1747
1748                         obj->write_domain = 0;
1749                         list_del_init(&obj_priv->gpu_write_list);
1750                         i915_gem_object_move_to_active(obj, ring);
1751
1752                         /* update the fence lru list */
1753                         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1754                                 struct drm_i915_fence_reg *reg =
1755                                         &dev_priv->fence_regs[obj_priv->fence_reg];
1756                                 list_move_tail(&reg->lru_list,
1757                                                 &dev_priv->mm.fence_list);
1758                         }
1759
1760                         trace_i915_gem_object_change_domain(obj,
1761                                                             obj->read_domains,
1762                                                             old_write_domain);
1763                 }
1764         }
1765 }
1766
1767 int
1768 i915_add_request(struct drm_device *dev,
1769                  struct drm_file *file,
1770                  struct drm_i915_gem_request *request,
1771                  struct intel_ring_buffer *ring)
1772 {
1773         drm_i915_private_t *dev_priv = dev->dev_private;
1774         struct drm_i915_file_private *file_priv = NULL;
1775         uint32_t seqno;
1776         int was_empty;
1777         int ret;
1778
1779         BUG_ON(request == NULL);
1780
1781         if (file != NULL)
1782                 file_priv = file->driver_priv;
1783
1784         ret = ring->add_request(ring, &seqno);
1785         if (ret)
1786             return ret;
1787
1788         ring->outstanding_lazy_request = false;
1789
1790         request->seqno = seqno;
1791         request->ring = ring;
1792         request->emitted_jiffies = jiffies;
1793         was_empty = list_empty(&ring->request_list);
1794         list_add_tail(&request->list, &ring->request_list);
1795
1796         if (file_priv) {
1797                 spin_lock(&file_priv->mm.lock);
1798                 request->file_priv = file_priv;
1799                 list_add_tail(&request->client_list,
1800                               &file_priv->mm.request_list);
1801                 spin_unlock(&file_priv->mm.lock);
1802         }
1803
1804         if (!dev_priv->mm.suspended) {
1805                 mod_timer(&dev_priv->hangcheck_timer,
1806                           jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1807                 if (was_empty)
1808                         queue_delayed_work(dev_priv->wq,
1809                                            &dev_priv->mm.retire_work, HZ);
1810         }
1811         return 0;
1812 }
1813
1814 /**
1815  * Command execution barrier
1816  *
1817  * Ensures that all commands in the ring are finished
1818  * before signalling the CPU
1819  */
1820 static void
1821 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1822 {
1823         uint32_t flush_domains = 0;
1824
1825         /* The sampler always gets flushed on i965 (sigh) */
1826         if (INTEL_INFO(dev)->gen >= 4)
1827                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1828
1829         ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
1830 }
1831
1832 static inline void
1833 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1834 {
1835         struct drm_i915_file_private *file_priv = request->file_priv;
1836
1837         if (!file_priv)
1838                 return;
1839
1840         spin_lock(&file_priv->mm.lock);
1841         list_del(&request->client_list);
1842         request->file_priv = NULL;
1843         spin_unlock(&file_priv->mm.lock);
1844 }
1845
1846 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1847                                       struct intel_ring_buffer *ring)
1848 {
1849         while (!list_empty(&ring->request_list)) {
1850                 struct drm_i915_gem_request *request;
1851
1852                 request = list_first_entry(&ring->request_list,
1853                                            struct drm_i915_gem_request,
1854                                            list);
1855
1856                 list_del(&request->list);
1857                 i915_gem_request_remove_from_client(request);
1858                 kfree(request);
1859         }
1860
1861         while (!list_empty(&ring->active_list)) {
1862                 struct drm_i915_gem_object *obj_priv;
1863
1864                 obj_priv = list_first_entry(&ring->active_list,
1865                                             struct drm_i915_gem_object,
1866                                             ring_list);
1867
1868                 obj_priv->base.write_domain = 0;
1869                 list_del_init(&obj_priv->gpu_write_list);
1870                 i915_gem_object_move_to_inactive(&obj_priv->base);
1871         }
1872 }
1873
1874 void i915_gem_reset(struct drm_device *dev)
1875 {
1876         struct drm_i915_private *dev_priv = dev->dev_private;
1877         struct drm_i915_gem_object *obj_priv;
1878         int i;
1879
1880         i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
1881         i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
1882         i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
1883
1884         /* Remove anything from the flushing lists. The GPU cache is likely
1885          * to be lost on reset along with the data, so simply move the
1886          * lost bo to the inactive list.
1887          */
1888         while (!list_empty(&dev_priv->mm.flushing_list)) {
1889                 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1890                                             struct drm_i915_gem_object,
1891                                             mm_list);
1892
1893                 obj_priv->base.write_domain = 0;
1894                 list_del_init(&obj_priv->gpu_write_list);
1895                 i915_gem_object_move_to_inactive(&obj_priv->base);
1896         }
1897
1898         /* Move everything out of the GPU domains to ensure we do any
1899          * necessary invalidation upon reuse.
1900          */
1901         list_for_each_entry(obj_priv,
1902                             &dev_priv->mm.inactive_list,
1903                             mm_list)
1904         {
1905                 obj_priv->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1906         }
1907
1908         /* The fence registers are invalidated so clear them out */
1909         for (i = 0; i < 16; i++) {
1910                 struct drm_i915_fence_reg *reg;
1911
1912                 reg = &dev_priv->fence_regs[i];
1913                 if (!reg->obj)
1914                         continue;
1915
1916                 i915_gem_clear_fence_reg(reg->obj);
1917         }
1918 }
1919
1920 /**
1921  * This function clears the request list as sequence numbers are passed.
1922  */
1923 static void
1924 i915_gem_retire_requests_ring(struct drm_device *dev,
1925                               struct intel_ring_buffer *ring)
1926 {
1927         drm_i915_private_t *dev_priv = dev->dev_private;
1928         uint32_t seqno;
1929
1930         if (!ring->status_page.page_addr ||
1931             list_empty(&ring->request_list))
1932                 return;
1933
1934         WARN_ON(i915_verify_lists(dev));
1935
1936         seqno = ring->get_seqno(ring);
1937         while (!list_empty(&ring->request_list)) {
1938                 struct drm_i915_gem_request *request;
1939
1940                 request = list_first_entry(&ring->request_list,
1941                                            struct drm_i915_gem_request,
1942                                            list);
1943
1944                 if (!i915_seqno_passed(seqno, request->seqno))
1945                         break;
1946
1947                 trace_i915_gem_request_retire(dev, request->seqno);
1948
1949                 list_del(&request->list);
1950                 i915_gem_request_remove_from_client(request);
1951                 kfree(request);
1952         }
1953
1954         /* Move any buffers on the active list that are no longer referenced
1955          * by the ringbuffer to the flushing/inactive lists as appropriate.
1956          */
1957         while (!list_empty(&ring->active_list)) {
1958                 struct drm_gem_object *obj;
1959                 struct drm_i915_gem_object *obj_priv;
1960
1961                 obj_priv = list_first_entry(&ring->active_list,
1962                                             struct drm_i915_gem_object,
1963                                             ring_list);
1964
1965                 if (!i915_seqno_passed(seqno, obj_priv->last_rendering_seqno))
1966                         break;
1967
1968                 obj = &obj_priv->base;
1969                 if (obj->write_domain != 0)
1970                         i915_gem_object_move_to_flushing(obj);
1971                 else
1972                         i915_gem_object_move_to_inactive(obj);
1973         }
1974
1975         if (unlikely (dev_priv->trace_irq_seqno &&
1976                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1977                 ring->user_irq_put(ring);
1978                 dev_priv->trace_irq_seqno = 0;
1979         }
1980
1981         WARN_ON(i915_verify_lists(dev));
1982 }
1983
1984 void
1985 i915_gem_retire_requests(struct drm_device *dev)
1986 {
1987         drm_i915_private_t *dev_priv = dev->dev_private;
1988
1989         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1990             struct drm_i915_gem_object *obj_priv, *tmp;
1991
1992             /* We must be careful that during unbind() we do not
1993              * accidentally infinitely recurse into retire requests.
1994              * Currently:
1995              *   retire -> free -> unbind -> wait -> retire_ring
1996              */
1997             list_for_each_entry_safe(obj_priv, tmp,
1998                                      &dev_priv->mm.deferred_free_list,
1999                                      mm_list)
2000                     i915_gem_free_object_tail(&obj_priv->base);
2001         }
2002
2003         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
2004         i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
2005         i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
2006 }
2007
2008 static void
2009 i915_gem_retire_work_handler(struct work_struct *work)
2010 {
2011         drm_i915_private_t *dev_priv;
2012         struct drm_device *dev;
2013
2014         dev_priv = container_of(work, drm_i915_private_t,
2015                                 mm.retire_work.work);
2016         dev = dev_priv->dev;
2017
2018         /* Come back later if the device is busy... */
2019         if (!mutex_trylock(&dev->struct_mutex)) {
2020                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2021                 return;
2022         }
2023
2024         i915_gem_retire_requests(dev);
2025
2026         if (!dev_priv->mm.suspended &&
2027                 (!list_empty(&dev_priv->render_ring.request_list) ||
2028                  !list_empty(&dev_priv->bsd_ring.request_list) ||
2029                  !list_empty(&dev_priv->blt_ring.request_list)))
2030                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2031         mutex_unlock(&dev->struct_mutex);
2032 }
2033
2034 int
2035 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
2036                      bool interruptible, struct intel_ring_buffer *ring)
2037 {
2038         drm_i915_private_t *dev_priv = dev->dev_private;
2039         u32 ier;
2040         int ret = 0;
2041
2042         BUG_ON(seqno == 0);
2043
2044         if (atomic_read(&dev_priv->mm.wedged))
2045                 return -EAGAIN;
2046
2047         if (ring->outstanding_lazy_request) {
2048                 struct drm_i915_gem_request *request;
2049
2050                 request = kzalloc(sizeof(*request), GFP_KERNEL);
2051                 if (request == NULL)
2052                         return -ENOMEM;
2053
2054                 ret = i915_add_request(dev, NULL, request, ring);
2055                 if (ret) {
2056                         kfree(request);
2057                         return ret;
2058                 }
2059
2060                 seqno = request->seqno;
2061         }
2062         BUG_ON(seqno == dev_priv->next_seqno);
2063
2064         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2065                 if (HAS_PCH_SPLIT(dev))
2066                         ier = I915_READ(DEIER) | I915_READ(GTIER);
2067                 else
2068                         ier = I915_READ(IER);
2069                 if (!ier) {
2070                         DRM_ERROR("something (likely vbetool) disabled "
2071                                   "interrupts, re-enabling\n");
2072                         i915_driver_irq_preinstall(dev);
2073                         i915_driver_irq_postinstall(dev);
2074                 }
2075
2076                 trace_i915_gem_request_wait_begin(dev, seqno);
2077
2078                 ring->waiting_seqno = seqno;
2079                 ring->user_irq_get(ring);
2080                 if (interruptible)
2081                         ret = wait_event_interruptible(ring->irq_queue,
2082                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2083                                 || atomic_read(&dev_priv->mm.wedged));
2084                 else
2085                         wait_event(ring->irq_queue,
2086                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2087                                 || atomic_read(&dev_priv->mm.wedged));
2088
2089                 ring->user_irq_put(ring);
2090                 ring->waiting_seqno = 0;
2091
2092                 trace_i915_gem_request_wait_end(dev, seqno);
2093         }
2094         if (atomic_read(&dev_priv->mm.wedged))
2095                 ret = -EAGAIN;
2096
2097         if (ret && ret != -ERESTARTSYS)
2098                 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2099                           __func__, ret, seqno, ring->get_seqno(ring),
2100                           dev_priv->next_seqno);
2101
2102         /* Directly dispatch request retiring.  While we have the work queue
2103          * to handle this, the waiter on a request often wants an associated
2104          * buffer to have made it to the inactive list, and we would need
2105          * a separate wait queue to handle that.
2106          */
2107         if (ret == 0)
2108                 i915_gem_retire_requests_ring(dev, ring);
2109
2110         return ret;
2111 }
2112
2113 /**
2114  * Waits for a sequence number to be signaled, and cleans up the
2115  * request and object lists appropriately for that event.
2116  */
2117 static int
2118 i915_wait_request(struct drm_device *dev, uint32_t seqno,
2119                   struct intel_ring_buffer *ring)
2120 {
2121         return i915_do_wait_request(dev, seqno, 1, ring);
2122 }
2123
2124 static void
2125 i915_gem_flush_ring(struct drm_device *dev,
2126                     struct drm_file *file_priv,
2127                     struct intel_ring_buffer *ring,
2128                     uint32_t invalidate_domains,
2129                     uint32_t flush_domains)
2130 {
2131         ring->flush(ring, invalidate_domains, flush_domains);
2132         i915_gem_process_flushing_list(dev, flush_domains, ring);
2133 }
2134
2135 static void
2136 i915_gem_flush(struct drm_device *dev,
2137                struct drm_file *file_priv,
2138                uint32_t invalidate_domains,
2139                uint32_t flush_domains,
2140                uint32_t flush_rings)
2141 {
2142         drm_i915_private_t *dev_priv = dev->dev_private;
2143
2144         if (flush_domains & I915_GEM_DOMAIN_CPU)
2145                 drm_agp_chipset_flush(dev);
2146
2147         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
2148                 if (flush_rings & RING_RENDER)
2149                         i915_gem_flush_ring(dev, file_priv,
2150                                             &dev_priv->render_ring,
2151                                             invalidate_domains, flush_domains);
2152                 if (flush_rings & RING_BSD)
2153                         i915_gem_flush_ring(dev, file_priv,
2154                                             &dev_priv->bsd_ring,
2155                                             invalidate_domains, flush_domains);
2156                 if (flush_rings & RING_BLT)
2157                         i915_gem_flush_ring(dev, file_priv,
2158                                             &dev_priv->blt_ring,
2159                                             invalidate_domains, flush_domains);
2160         }
2161 }
2162
2163 /**
2164  * Ensures that all rendering to the object has completed and the object is
2165  * safe to unbind from the GTT or access from the CPU.
2166  */
2167 static int
2168 i915_gem_object_wait_rendering(struct drm_gem_object *obj,
2169                                bool interruptible)
2170 {
2171         struct drm_device *dev = obj->dev;
2172         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2173         int ret;
2174
2175         /* This function only exists to support waiting for existing rendering,
2176          * not for emitting required flushes.
2177          */
2178         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
2179
2180         /* If there is rendering queued on the buffer being evicted, wait for
2181          * it.
2182          */
2183         if (obj_priv->active) {
2184                 ret = i915_do_wait_request(dev,
2185                                            obj_priv->last_rendering_seqno,
2186                                            interruptible,
2187                                            obj_priv->ring);
2188                 if (ret)
2189                         return ret;
2190         }
2191
2192         return 0;
2193 }
2194
2195 /**
2196  * Unbinds an object from the GTT aperture.
2197  */
2198 int
2199 i915_gem_object_unbind(struct drm_gem_object *obj)
2200 {
2201         struct drm_device *dev = obj->dev;
2202         struct drm_i915_private *dev_priv = dev->dev_private;
2203         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2204         int ret = 0;
2205
2206         if (obj_priv->gtt_space == NULL)
2207                 return 0;
2208
2209         if (obj_priv->pin_count != 0) {
2210                 DRM_ERROR("Attempting to unbind pinned buffer\n");
2211                 return -EINVAL;
2212         }
2213
2214         /* blow away mappings if mapped through GTT */
2215         i915_gem_release_mmap(obj);
2216
2217         /* Move the object to the CPU domain to ensure that
2218          * any possible CPU writes while it's not in the GTT
2219          * are flushed when we go to remap it. This will
2220          * also ensure that all pending GPU writes are finished
2221          * before we unbind.
2222          */
2223         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2224         if (ret == -ERESTARTSYS)
2225                 return ret;
2226         /* Continue on if we fail due to EIO, the GPU is hung so we
2227          * should be safe and we need to cleanup or else we might
2228          * cause memory corruption through use-after-free.
2229          */
2230         if (ret) {
2231                 i915_gem_clflush_object(obj);
2232                 obj->read_domains = obj->write_domain = I915_GEM_DOMAIN_CPU;
2233         }
2234
2235         /* release the fence reg _after_ flushing */
2236         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
2237                 i915_gem_clear_fence_reg(obj);
2238
2239         drm_unbind_agp(obj_priv->agp_mem);
2240         drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
2241
2242         i915_gem_object_put_pages(obj);
2243         BUG_ON(obj_priv->pages_refcount);
2244
2245         i915_gem_info_remove_gtt(dev_priv, obj);
2246         list_del_init(&obj_priv->mm_list);
2247
2248         drm_mm_put_block(obj_priv->gtt_space);
2249         obj_priv->gtt_space = NULL;
2250         obj_priv->gtt_offset = 0;
2251
2252         if (i915_gem_object_is_purgeable(obj_priv))
2253                 i915_gem_object_truncate(obj);
2254
2255         trace_i915_gem_object_unbind(obj);
2256
2257         return ret;
2258 }
2259
2260 static int i915_ring_idle(struct drm_device *dev,
2261                           struct intel_ring_buffer *ring)
2262 {
2263         if (list_empty(&ring->gpu_write_list))
2264                 return 0;
2265
2266         i915_gem_flush_ring(dev, NULL, ring,
2267                             I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2268         return i915_wait_request(dev,
2269                                  i915_gem_next_request_seqno(dev, ring),
2270                                  ring);
2271 }
2272
2273 int
2274 i915_gpu_idle(struct drm_device *dev)
2275 {
2276         drm_i915_private_t *dev_priv = dev->dev_private;
2277         bool lists_empty;
2278         int ret;
2279
2280         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2281                        list_empty(&dev_priv->render_ring.active_list) &&
2282                        list_empty(&dev_priv->bsd_ring.active_list) &&
2283                        list_empty(&dev_priv->blt_ring.active_list));
2284         if (lists_empty)
2285                 return 0;
2286
2287         /* Flush everything onto the inactive list. */
2288         ret = i915_ring_idle(dev, &dev_priv->render_ring);
2289         if (ret)
2290                 return ret;
2291
2292         ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
2293         if (ret)
2294                 return ret;
2295
2296         ret = i915_ring_idle(dev, &dev_priv->blt_ring);
2297         if (ret)
2298                 return ret;
2299
2300         return 0;
2301 }
2302
2303 static int
2304 i915_gem_object_get_pages(struct drm_gem_object *obj,
2305                           gfp_t gfpmask)
2306 {
2307         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2308         int page_count, i;
2309         struct address_space *mapping;
2310         struct inode *inode;
2311         struct page *page;
2312
2313         BUG_ON(obj_priv->pages_refcount
2314                         == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2315
2316         if (obj_priv->pages_refcount++ != 0)
2317                 return 0;
2318
2319         /* Get the list of pages out of our struct file.  They'll be pinned
2320          * at this point until we release them.
2321          */
2322         page_count = obj->size / PAGE_SIZE;
2323         BUG_ON(obj_priv->pages != NULL);
2324         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2325         if (obj_priv->pages == NULL) {
2326                 obj_priv->pages_refcount--;
2327                 return -ENOMEM;
2328         }
2329
2330         inode = obj->filp->f_path.dentry->d_inode;
2331         mapping = inode->i_mapping;
2332         for (i = 0; i < page_count; i++) {
2333                 page = read_cache_page_gfp(mapping, i,
2334                                            GFP_HIGHUSER |
2335                                            __GFP_COLD |
2336                                            __GFP_RECLAIMABLE |
2337                                            gfpmask);
2338                 if (IS_ERR(page))
2339                         goto err_pages;
2340
2341                 obj_priv->pages[i] = page;
2342         }
2343
2344         if (obj_priv->tiling_mode != I915_TILING_NONE)
2345                 i915_gem_object_do_bit_17_swizzle(obj);
2346
2347         return 0;
2348
2349 err_pages:
2350         while (i--)
2351                 page_cache_release(obj_priv->pages[i]);
2352
2353         drm_free_large(obj_priv->pages);
2354         obj_priv->pages = NULL;
2355         obj_priv->pages_refcount--;
2356         return PTR_ERR(page);
2357 }
2358
2359 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2360 {
2361         struct drm_gem_object *obj = reg->obj;
2362         struct drm_device *dev = obj->dev;
2363         drm_i915_private_t *dev_priv = dev->dev_private;
2364         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2365         int regnum = obj_priv->fence_reg;
2366         uint64_t val;
2367
2368         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2369                     0xfffff000) << 32;
2370         val |= obj_priv->gtt_offset & 0xfffff000;
2371         val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2372                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2373
2374         if (obj_priv->tiling_mode == I915_TILING_Y)
2375                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2376         val |= I965_FENCE_REG_VALID;
2377
2378         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2379 }
2380
2381 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2382 {
2383         struct drm_gem_object *obj = reg->obj;
2384         struct drm_device *dev = obj->dev;
2385         drm_i915_private_t *dev_priv = dev->dev_private;
2386         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2387         int regnum = obj_priv->fence_reg;
2388         uint64_t val;
2389
2390         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2391                     0xfffff000) << 32;
2392         val |= obj_priv->gtt_offset & 0xfffff000;
2393         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2394         if (obj_priv->tiling_mode == I915_TILING_Y)
2395                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2396         val |= I965_FENCE_REG_VALID;
2397
2398         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2399 }
2400
2401 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2402 {
2403         struct drm_gem_object *obj = reg->obj;
2404         struct drm_device *dev = obj->dev;
2405         drm_i915_private_t *dev_priv = dev->dev_private;
2406         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2407         int regnum = obj_priv->fence_reg;
2408         int tile_width;
2409         uint32_t fence_reg, val;
2410         uint32_t pitch_val;
2411
2412         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2413             (obj_priv->gtt_offset & (obj->size - 1))) {
2414                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2415                      __func__, obj_priv->gtt_offset, obj->size);
2416                 return;
2417         }
2418
2419         if (obj_priv->tiling_mode == I915_TILING_Y &&
2420             HAS_128_BYTE_Y_TILING(dev))
2421                 tile_width = 128;
2422         else
2423                 tile_width = 512;
2424
2425         /* Note: pitch better be a power of two tile widths */
2426         pitch_val = obj_priv->stride / tile_width;
2427         pitch_val = ffs(pitch_val) - 1;
2428
2429         if (obj_priv->tiling_mode == I915_TILING_Y &&
2430             HAS_128_BYTE_Y_TILING(dev))
2431                 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2432         else
2433                 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2434
2435         val = obj_priv->gtt_offset;
2436         if (obj_priv->tiling_mode == I915_TILING_Y)
2437                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2438         val |= I915_FENCE_SIZE_BITS(obj->size);
2439         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2440         val |= I830_FENCE_REG_VALID;
2441
2442         if (regnum < 8)
2443                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2444         else
2445                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2446         I915_WRITE(fence_reg, val);
2447 }
2448
2449 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2450 {
2451         struct drm_gem_object *obj = reg->obj;
2452         struct drm_device *dev = obj->dev;
2453         drm_i915_private_t *dev_priv = dev->dev_private;
2454         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2455         int regnum = obj_priv->fence_reg;
2456         uint32_t val;
2457         uint32_t pitch_val;
2458         uint32_t fence_size_bits;
2459
2460         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2461             (obj_priv->gtt_offset & (obj->size - 1))) {
2462                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2463                      __func__, obj_priv->gtt_offset);
2464                 return;
2465         }
2466
2467         pitch_val = obj_priv->stride / 128;
2468         pitch_val = ffs(pitch_val) - 1;
2469         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2470
2471         val = obj_priv->gtt_offset;
2472         if (obj_priv->tiling_mode == I915_TILING_Y)
2473                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2474         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2475         WARN_ON(fence_size_bits & ~0x00000f00);
2476         val |= fence_size_bits;
2477         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2478         val |= I830_FENCE_REG_VALID;
2479
2480         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2481 }
2482
2483 static int i915_find_fence_reg(struct drm_device *dev,
2484                                bool interruptible)
2485 {
2486         struct drm_i915_fence_reg *reg = NULL;
2487         struct drm_i915_gem_object *obj_priv = NULL;
2488         struct drm_i915_private *dev_priv = dev->dev_private;
2489         struct drm_gem_object *obj = NULL;
2490         int i, avail, ret;
2491
2492         /* First try to find a free reg */
2493         avail = 0;
2494         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2495                 reg = &dev_priv->fence_regs[i];
2496                 if (!reg->obj)
2497                         return i;
2498
2499                 obj_priv = to_intel_bo(reg->obj);
2500                 if (!obj_priv->pin_count)
2501                     avail++;
2502         }
2503
2504         if (avail == 0)
2505                 return -ENOSPC;
2506
2507         /* None available, try to steal one or wait for a user to finish */
2508         i = I915_FENCE_REG_NONE;
2509         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2510                             lru_list) {
2511                 obj = reg->obj;
2512                 obj_priv = to_intel_bo(obj);
2513
2514                 if (obj_priv->pin_count)
2515                         continue;
2516
2517                 /* found one! */
2518                 i = obj_priv->fence_reg;
2519                 break;
2520         }
2521
2522         BUG_ON(i == I915_FENCE_REG_NONE);
2523
2524         /* We only have a reference on obj from the active list. put_fence_reg
2525          * might drop that one, causing a use-after-free in it. So hold a
2526          * private reference to obj like the other callers of put_fence_reg
2527          * (set_tiling ioctl) do. */
2528         drm_gem_object_reference(obj);
2529         ret = i915_gem_object_put_fence_reg(obj, interruptible);
2530         drm_gem_object_unreference(obj);
2531         if (ret != 0)
2532                 return ret;
2533
2534         return i;
2535 }
2536
2537 /**
2538  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2539  * @obj: object to map through a fence reg
2540  *
2541  * When mapping objects through the GTT, userspace wants to be able to write
2542  * to them without having to worry about swizzling if the object is tiled.
2543  *
2544  * This function walks the fence regs looking for a free one for @obj,
2545  * stealing one if it can't find any.
2546  *
2547  * It then sets up the reg based on the object's properties: address, pitch
2548  * and tiling format.
2549  */
2550 int
2551 i915_gem_object_get_fence_reg(struct drm_gem_object *obj,
2552                               bool interruptible)
2553 {
2554         struct drm_device *dev = obj->dev;
2555         struct drm_i915_private *dev_priv = dev->dev_private;
2556         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2557         struct drm_i915_fence_reg *reg = NULL;
2558         int ret;
2559
2560         /* Just update our place in the LRU if our fence is getting used. */
2561         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2562                 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2563                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2564                 return 0;
2565         }
2566
2567         switch (obj_priv->tiling_mode) {
2568         case I915_TILING_NONE:
2569                 WARN(1, "allocating a fence for non-tiled object?\n");
2570                 break;
2571         case I915_TILING_X:
2572                 if (!obj_priv->stride)
2573                         return -EINVAL;
2574                 WARN((obj_priv->stride & (512 - 1)),
2575                      "object 0x%08x is X tiled but has non-512B pitch\n",
2576                      obj_priv->gtt_offset);
2577                 break;
2578         case I915_TILING_Y:
2579                 if (!obj_priv->stride)
2580                         return -EINVAL;
2581                 WARN((obj_priv->stride & (128 - 1)),
2582                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2583                      obj_priv->gtt_offset);
2584                 break;
2585         }
2586
2587         ret = i915_find_fence_reg(dev, interruptible);
2588         if (ret < 0)
2589                 return ret;
2590
2591         obj_priv->fence_reg = ret;
2592         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2593         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2594
2595         reg->obj = obj;
2596
2597         switch (INTEL_INFO(dev)->gen) {
2598         case 6:
2599                 sandybridge_write_fence_reg(reg);
2600                 break;
2601         case 5:
2602         case 4:
2603                 i965_write_fence_reg(reg);
2604                 break;
2605         case 3:
2606                 i915_write_fence_reg(reg);
2607                 break;
2608         case 2:
2609                 i830_write_fence_reg(reg);
2610                 break;
2611         }
2612
2613         trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2614                         obj_priv->tiling_mode);
2615
2616         return 0;
2617 }
2618
2619 /**
2620  * i915_gem_clear_fence_reg - clear out fence register info
2621  * @obj: object to clear
2622  *
2623  * Zeroes out the fence register itself and clears out the associated
2624  * data structures in dev_priv and obj_priv.
2625  */
2626 static void
2627 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2628 {
2629         struct drm_device *dev = obj->dev;
2630         drm_i915_private_t *dev_priv = dev->dev_private;
2631         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2632         struct drm_i915_fence_reg *reg =
2633                 &dev_priv->fence_regs[obj_priv->fence_reg];
2634         uint32_t fence_reg;
2635
2636         switch (INTEL_INFO(dev)->gen) {
2637         case 6:
2638                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2639                              (obj_priv->fence_reg * 8), 0);
2640                 break;
2641         case 5:
2642         case 4:
2643                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2644                 break;
2645         case 3:
2646                 if (obj_priv->fence_reg >= 8)
2647                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2648                 else
2649         case 2:
2650                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2651
2652                 I915_WRITE(fence_reg, 0);
2653                 break;
2654         }
2655
2656         reg->obj = NULL;
2657         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2658         list_del_init(&reg->lru_list);
2659 }
2660
2661 /**
2662  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2663  * to the buffer to finish, and then resets the fence register.
2664  * @obj: tiled object holding a fence register.
2665  * @bool: whether the wait upon the fence is interruptible
2666  *
2667  * Zeroes out the fence register itself and clears out the associated
2668  * data structures in dev_priv and obj_priv.
2669  */
2670 int
2671 i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
2672                               bool interruptible)
2673 {
2674         struct drm_device *dev = obj->dev;
2675         struct drm_i915_private *dev_priv = dev->dev_private;
2676         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2677         struct drm_i915_fence_reg *reg;
2678
2679         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2680                 return 0;
2681
2682         /* If we've changed tiling, GTT-mappings of the object
2683          * need to re-fault to ensure that the correct fence register
2684          * setup is in place.
2685          */
2686         i915_gem_release_mmap(obj);
2687
2688         /* On the i915, GPU access to tiled buffers is via a fence,
2689          * therefore we must wait for any outstanding access to complete
2690          * before clearing the fence.
2691          */
2692         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2693         if (reg->gpu) {
2694                 int ret;
2695
2696                 ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2697                 if (ret)
2698                         return ret;
2699
2700                 ret = i915_gem_object_wait_rendering(obj, interruptible);
2701                 if (ret)
2702                         return ret;
2703
2704                 reg->gpu = false;
2705         }
2706
2707         i915_gem_object_flush_gtt_write_domain(obj);
2708         i915_gem_clear_fence_reg(obj);
2709
2710         return 0;
2711 }
2712
2713 /**
2714  * Finds free space in the GTT aperture and binds the object there.
2715  */
2716 static int
2717 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
2718                             unsigned alignment,
2719                             bool mappable)
2720 {
2721         struct drm_device *dev = obj->dev;
2722         drm_i915_private_t *dev_priv = dev->dev_private;
2723         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2724         struct drm_mm_node *free_space;
2725         gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2726         int ret;
2727
2728         if (obj_priv->madv != I915_MADV_WILLNEED) {
2729                 DRM_ERROR("Attempting to bind a purgeable object\n");
2730                 return -EINVAL;
2731         }
2732
2733         if (alignment == 0)
2734                 alignment = i915_gem_get_gtt_alignment(obj);
2735         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2736                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2737                 return -EINVAL;
2738         }
2739
2740         /* If the object is bigger than the entire aperture, reject it early
2741          * before evicting everything in a vain attempt to find space.
2742          */
2743         if (obj->size >
2744             (mappable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2745                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2746                 return -E2BIG;
2747         }
2748
2749  search_free:
2750         if (mappable)
2751                 free_space =
2752                         drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2753                                                     obj->size, alignment, 0,
2754                                                     dev_priv->mm.gtt_mappable_end,
2755                                                     0);
2756         else
2757                 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2758                                                 obj->size, alignment, 0);
2759
2760         if (free_space != NULL) {
2761                 if (mappable)
2762                         obj_priv->gtt_space =
2763                                 drm_mm_get_block_range_generic(free_space,
2764                                                                obj->size,
2765                                                                alignment, 0,
2766                                                                dev_priv->mm.gtt_mappable_end,
2767                                                                0);
2768                 else
2769                         obj_priv->gtt_space =
2770                                 drm_mm_get_block(free_space, obj->size,
2771                                                  alignment);
2772         }
2773         if (obj_priv->gtt_space == NULL) {
2774                 /* If the gtt is empty and we're still having trouble
2775                  * fitting our object in, we're out of memory.
2776                  */
2777                 ret = i915_gem_evict_something(dev, obj->size, alignment,
2778                                                mappable);
2779                 if (ret)
2780                         return ret;
2781
2782                 goto search_free;
2783         }
2784
2785         ret = i915_gem_object_get_pages(obj, gfpmask);
2786         if (ret) {
2787                 drm_mm_put_block(obj_priv->gtt_space);
2788                 obj_priv->gtt_space = NULL;
2789
2790                 if (ret == -ENOMEM) {
2791                         /* first try to clear up some space from the GTT */
2792                         ret = i915_gem_evict_something(dev, obj->size,
2793                                                        alignment, mappable);
2794                         if (ret) {
2795                                 /* now try to shrink everyone else */
2796                                 if (gfpmask) {
2797                                         gfpmask = 0;
2798                                         goto search_free;
2799                                 }
2800
2801                                 return ret;
2802                         }
2803
2804                         goto search_free;
2805                 }
2806
2807                 return ret;
2808         }
2809
2810         /* Create an AGP memory structure pointing at our pages, and bind it
2811          * into the GTT.
2812          */
2813         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2814                                                obj_priv->pages,
2815                                                obj->size >> PAGE_SHIFT,
2816                                                obj_priv->gtt_space->start,
2817                                                obj_priv->agp_type);
2818         if (obj_priv->agp_mem == NULL) {
2819                 i915_gem_object_put_pages(obj);
2820                 drm_mm_put_block(obj_priv->gtt_space);
2821                 obj_priv->gtt_space = NULL;
2822
2823                 ret = i915_gem_evict_something(dev, obj->size, alignment,
2824                                                mappable);
2825                 if (ret)
2826                         return ret;
2827
2828                 goto search_free;
2829         }
2830
2831         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2832
2833         /* keep track of bounds object by adding it to the inactive list */
2834         list_add_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
2835         i915_gem_info_add_gtt(dev_priv, obj);
2836
2837         /* Assert that the object is not currently in any GPU domain. As it
2838          * wasn't in the GTT, there shouldn't be any way it could have been in
2839          * a GPU cache
2840          */
2841         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2842         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2843
2844         trace_i915_gem_object_bind(obj, obj_priv->gtt_offset, mappable);
2845
2846         return 0;
2847 }
2848
2849 void
2850 i915_gem_clflush_object(struct drm_gem_object *obj)
2851 {
2852         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2853
2854         /* If we don't have a page list set up, then we're not pinned
2855          * to GPU, and we can ignore the cache flush because it'll happen
2856          * again at bind time.
2857          */
2858         if (obj_priv->pages == NULL)
2859                 return;
2860
2861         trace_i915_gem_object_clflush(obj);
2862
2863         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2864 }
2865
2866 /** Flushes any GPU write domain for the object if it's dirty. */
2867 static int
2868 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
2869                                        bool pipelined)
2870 {
2871         struct drm_device *dev = obj->dev;
2872         uint32_t old_write_domain;
2873
2874         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2875                 return 0;
2876
2877         /* Queue the GPU write cache flushing we need. */
2878         old_write_domain = obj->write_domain;
2879         i915_gem_flush_ring(dev, NULL,
2880                             to_intel_bo(obj)->ring,
2881                             0, obj->write_domain);
2882         BUG_ON(obj->write_domain);
2883
2884         trace_i915_gem_object_change_domain(obj,
2885                                             obj->read_domains,
2886                                             old_write_domain);
2887
2888         if (pipelined)
2889                 return 0;
2890
2891         return i915_gem_object_wait_rendering(obj, true);
2892 }
2893
2894 /** Flushes the GTT write domain for the object if it's dirty. */
2895 static void
2896 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2897 {
2898         uint32_t old_write_domain;
2899
2900         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2901                 return;
2902
2903         /* No actual flushing is required for the GTT write domain.   Writes
2904          * to it immediately go to main memory as far as we know, so there's
2905          * no chipset flush.  It also doesn't land in render cache.
2906          */
2907         old_write_domain = obj->write_domain;
2908         obj->write_domain = 0;
2909
2910         trace_i915_gem_object_change_domain(obj,
2911                                             obj->read_domains,
2912                                             old_write_domain);
2913 }
2914
2915 /** Flushes the CPU write domain for the object if it's dirty. */
2916 static void
2917 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2918 {
2919         struct drm_device *dev = obj->dev;
2920         uint32_t old_write_domain;
2921
2922         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2923                 return;
2924
2925         i915_gem_clflush_object(obj);
2926         drm_agp_chipset_flush(dev);
2927         old_write_domain = obj->write_domain;
2928         obj->write_domain = 0;
2929
2930         trace_i915_gem_object_change_domain(obj,
2931                                             obj->read_domains,
2932                                             old_write_domain);
2933 }
2934
2935 /**
2936  * Moves a single object to the GTT read, and possibly write domain.
2937  *
2938  * This function returns when the move is complete, including waiting on
2939  * flushes to occur.
2940  */
2941 int
2942 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2943 {
2944         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2945         uint32_t old_write_domain, old_read_domains;
2946         int ret;
2947
2948         /* Not valid to be called on unbound objects. */
2949         if (obj_priv->gtt_space == NULL)
2950                 return -EINVAL;
2951
2952         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2953         if (ret != 0)
2954                 return ret;
2955
2956         i915_gem_object_flush_cpu_write_domain(obj);
2957
2958         if (write) {
2959                 ret = i915_gem_object_wait_rendering(obj, true);
2960                 if (ret)
2961                         return ret;
2962         }
2963
2964         old_write_domain = obj->write_domain;
2965         old_read_domains = obj->read_domains;
2966
2967         /* It should now be out of any other write domains, and we can update
2968          * the domain values for our changes.
2969          */
2970         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2971         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2972         if (write) {
2973                 obj->read_domains = I915_GEM_DOMAIN_GTT;
2974                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2975                 obj_priv->dirty = 1;
2976         }
2977
2978         trace_i915_gem_object_change_domain(obj,
2979                                             old_read_domains,
2980                                             old_write_domain);
2981
2982         return 0;
2983 }
2984
2985 /*
2986  * Prepare buffer for display plane. Use uninterruptible for possible flush
2987  * wait, as in modesetting process we're not supposed to be interrupted.
2988  */
2989 int
2990 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj,
2991                                      bool pipelined)
2992 {
2993         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2994         uint32_t old_read_domains;
2995         int ret;
2996
2997         /* Not valid to be called on unbound objects. */
2998         if (obj_priv->gtt_space == NULL)
2999                 return -EINVAL;
3000
3001         ret = i915_gem_object_flush_gpu_write_domain(obj, true);
3002         if (ret)
3003                 return ret;
3004
3005         /* Currently, we are always called from an non-interruptible context. */
3006         if (!pipelined) {
3007                 ret = i915_gem_object_wait_rendering(obj, false);
3008                 if (ret)
3009                         return ret;
3010         }
3011
3012         i915_gem_object_flush_cpu_write_domain(obj);
3013
3014         old_read_domains = obj->read_domains;
3015         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3016
3017         trace_i915_gem_object_change_domain(obj,
3018                                             old_read_domains,
3019                                             obj->write_domain);
3020
3021         return 0;
3022 }
3023
3024 /**
3025  * Moves a single object to the CPU read, and possibly write domain.
3026  *
3027  * This function returns when the move is complete, including waiting on
3028  * flushes to occur.
3029  */
3030 static int
3031 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
3032 {
3033         uint32_t old_write_domain, old_read_domains;
3034         int ret;
3035
3036         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3037         if (ret != 0)
3038                 return ret;
3039
3040         i915_gem_object_flush_gtt_write_domain(obj);
3041
3042         /* If we have a partially-valid cache of the object in the CPU,
3043          * finish invalidating it and free the per-page flags.
3044          */
3045         i915_gem_object_set_to_full_cpu_read_domain(obj);
3046
3047         if (write) {
3048                 ret = i915_gem_object_wait_rendering(obj, true);
3049                 if (ret)
3050                         return ret;
3051         }
3052
3053         old_write_domain = obj->write_domain;
3054         old_read_domains = obj->read_domains;
3055
3056         /* Flush the CPU cache if it's still invalid. */
3057         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3058                 i915_gem_clflush_object(obj);
3059
3060                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3061         }
3062
3063         /* It should now be out of any other write domains, and we can update
3064          * the domain values for our changes.
3065          */
3066         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3067
3068         /* If we're writing through the CPU, then the GPU read domains will
3069          * need to be invalidated at next use.
3070          */
3071         if (write) {
3072                 obj->read_domains = I915_GEM_DOMAIN_CPU;
3073                 obj->write_domain = I915_GEM_DOMAIN_CPU;
3074         }
3075
3076         trace_i915_gem_object_change_domain(obj,
3077                                             old_read_domains,
3078                                             old_write_domain);
3079
3080         return 0;
3081 }
3082
3083 /*
3084  * Set the next domain for the specified object. This
3085  * may not actually perform the necessary flushing/invaliding though,
3086  * as that may want to be batched with other set_domain operations
3087  *
3088  * This is (we hope) the only really tricky part of gem. The goal
3089  * is fairly simple -- track which caches hold bits of the object
3090  * and make sure they remain coherent. A few concrete examples may
3091  * help to explain how it works. For shorthand, we use the notation
3092  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3093  * a pair of read and write domain masks.
3094  *
3095  * Case 1: the batch buffer
3096  *
3097  *      1. Allocated
3098  *      2. Written by CPU
3099  *      3. Mapped to GTT
3100  *      4. Read by GPU
3101  *      5. Unmapped from GTT
3102  *      6. Freed
3103  *
3104  *      Let's take these a step at a time
3105  *
3106  *      1. Allocated
3107  *              Pages allocated from the kernel may still have
3108  *              cache contents, so we set them to (CPU, CPU) always.
3109  *      2. Written by CPU (using pwrite)
3110  *              The pwrite function calls set_domain (CPU, CPU) and
3111  *              this function does nothing (as nothing changes)
3112  *      3. Mapped by GTT
3113  *              This function asserts that the object is not
3114  *              currently in any GPU-based read or write domains
3115  *      4. Read by GPU
3116  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
3117  *              As write_domain is zero, this function adds in the
3118  *              current read domains (CPU+COMMAND, 0).
3119  *              flush_domains is set to CPU.
3120  *              invalidate_domains is set to COMMAND
3121  *              clflush is run to get data out of the CPU caches
3122  *              then i915_dev_set_domain calls i915_gem_flush to
3123  *              emit an MI_FLUSH and drm_agp_chipset_flush
3124  *      5. Unmapped from GTT
3125  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
3126  *              flush_domains and invalidate_domains end up both zero
3127  *              so no flushing/invalidating happens
3128  *      6. Freed
3129  *              yay, done
3130  *
3131  * Case 2: The shared render buffer
3132  *
3133  *      1. Allocated
3134  *      2. Mapped to GTT
3135  *      3. Read/written by GPU
3136  *      4. set_domain to (CPU,CPU)
3137  *      5. Read/written by CPU
3138  *      6. Read/written by GPU
3139  *
3140  *      1. Allocated
3141  *              Same as last example, (CPU, CPU)
3142  *      2. Mapped to GTT
3143  *              Nothing changes (assertions find that it is not in the GPU)
3144  *      3. Read/written by GPU
3145  *              execbuffer calls set_domain (RENDER, RENDER)
3146  *              flush_domains gets CPU
3147  *              invalidate_domains gets GPU
3148  *              clflush (obj)
3149  *              MI_FLUSH and drm_agp_chipset_flush
3150  *      4. set_domain (CPU, CPU)
3151  *              flush_domains gets GPU
3152  *              invalidate_domains gets CPU
3153  *              wait_rendering (obj) to make sure all drawing is complete.
3154  *              This will include an MI_FLUSH to get the data from GPU
3155  *              to memory
3156  *              clflush (obj) to invalidate the CPU cache
3157  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3158  *      5. Read/written by CPU
3159  *              cache lines are loaded and dirtied
3160  *      6. Read written by GPU
3161  *              Same as last GPU access
3162  *
3163  * Case 3: The constant buffer
3164  *
3165  *      1. Allocated
3166  *      2. Written by CPU
3167  *      3. Read by GPU
3168  *      4. Updated (written) by CPU again
3169  *      5. Read by GPU
3170  *
3171  *      1. Allocated
3172  *              (CPU, CPU)
3173  *      2. Written by CPU
3174  *              (CPU, CPU)
3175  *      3. Read by GPU
3176  *              (CPU+RENDER, 0)
3177  *              flush_domains = CPU
3178  *              invalidate_domains = RENDER
3179  *              clflush (obj)
3180  *              MI_FLUSH
3181  *              drm_agp_chipset_flush
3182  *      4. Updated (written) by CPU again
3183  *              (CPU, CPU)
3184  *              flush_domains = 0 (no previous write domain)
3185  *              invalidate_domains = 0 (no new read domains)
3186  *      5. Read by GPU
3187  *              (CPU+RENDER, 0)
3188  *              flush_domains = CPU
3189  *              invalidate_domains = RENDER
3190  *              clflush (obj)
3191  *              MI_FLUSH
3192  *              drm_agp_chipset_flush
3193  */
3194 static void
3195 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
3196                                   struct intel_ring_buffer *ring)
3197 {
3198         struct drm_device               *dev = obj->dev;
3199         struct drm_i915_private         *dev_priv = dev->dev_private;
3200         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
3201         uint32_t                        invalidate_domains = 0;
3202         uint32_t                        flush_domains = 0;
3203
3204         /*
3205          * If the object isn't moving to a new write domain,
3206          * let the object stay in multiple read domains
3207          */
3208         if (obj->pending_write_domain == 0)
3209                 obj->pending_read_domains |= obj->read_domains;
3210
3211         /*
3212          * Flush the current write domain if
3213          * the new read domains don't match. Invalidate
3214          * any read domains which differ from the old
3215          * write domain
3216          */
3217         if (obj->write_domain &&
3218             obj->write_domain != obj->pending_read_domains) {
3219                 flush_domains |= obj->write_domain;
3220                 invalidate_domains |=
3221                         obj->pending_read_domains & ~obj->write_domain;
3222         }
3223         /*
3224          * Invalidate any read caches which may have
3225          * stale data. That is, any new read domains.
3226          */
3227         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3228         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
3229                 i915_gem_clflush_object(obj);
3230
3231         /* The actual obj->write_domain will be updated with
3232          * pending_write_domain after we emit the accumulated flush for all
3233          * of our domain changes in execbuffers (which clears objects'
3234          * write_domains).  So if we have a current write domain that we
3235          * aren't changing, set pending_write_domain to that.
3236          */
3237         if (flush_domains == 0 && obj->pending_write_domain == 0)
3238                 obj->pending_write_domain = obj->write_domain;
3239
3240         dev->invalidate_domains |= invalidate_domains;
3241         dev->flush_domains |= flush_domains;
3242         if (flush_domains & I915_GEM_GPU_DOMAINS)
3243                 dev_priv->mm.flush_rings |= obj_priv->ring->id;
3244         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
3245                 dev_priv->mm.flush_rings |= ring->id;
3246 }
3247
3248 /**
3249  * Moves the object from a partially CPU read to a full one.
3250  *
3251  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3252  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3253  */
3254 static void
3255 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3256 {
3257         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3258
3259         if (!obj_priv->page_cpu_valid)
3260                 return;
3261
3262         /* If we're partially in the CPU read domain, finish moving it in.
3263          */
3264         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3265                 int i;
3266
3267                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3268                         if (obj_priv->page_cpu_valid[i])
3269                                 continue;
3270                         drm_clflush_pages(obj_priv->pages + i, 1);
3271                 }
3272         }
3273
3274         /* Free the page_cpu_valid mappings which are now stale, whether
3275          * or not we've got I915_GEM_DOMAIN_CPU.
3276          */
3277         kfree(obj_priv->page_cpu_valid);
3278         obj_priv->page_cpu_valid = NULL;
3279 }
3280
3281 /**
3282  * Set the CPU read domain on a range of the object.
3283  *
3284  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3285  * not entirely valid.  The page_cpu_valid member of the object flags which
3286  * pages have been flushed, and will be respected by
3287  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3288  * of the whole object.
3289  *
3290  * This function returns when the move is complete, including waiting on
3291  * flushes to occur.
3292  */
3293 static int
3294 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3295                                           uint64_t offset, uint64_t size)
3296 {
3297         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3298         uint32_t old_read_domains;
3299         int i, ret;
3300
3301         if (offset == 0 && size == obj->size)
3302                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3303
3304         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3305         if (ret != 0)
3306                 return ret;
3307         i915_gem_object_flush_gtt_write_domain(obj);
3308
3309         /* If we're already fully in the CPU read domain, we're done. */
3310         if (obj_priv->page_cpu_valid == NULL &&
3311             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3312                 return 0;
3313
3314         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3315          * newly adding I915_GEM_DOMAIN_CPU
3316          */
3317         if (obj_priv->page_cpu_valid == NULL) {
3318                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3319                                                    GFP_KERNEL);
3320                 if (obj_priv->page_cpu_valid == NULL)
3321                         return -ENOMEM;
3322         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3323                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3324
3325         /* Flush the cache on any pages that are still invalid from the CPU's
3326          * perspective.
3327          */
3328         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3329              i++) {
3330                 if (obj_priv->page_cpu_valid[i])
3331                         continue;
3332
3333                 drm_clflush_pages(obj_priv->pages + i, 1);
3334
3335                 obj_priv->page_cpu_valid[i] = 1;
3336         }
3337
3338         /* It should now be out of any other write domains, and we can update
3339          * the domain values for our changes.
3340          */
3341         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3342
3343         old_read_domains = obj->read_domains;
3344         obj->read_domains |= I915_GEM_DOMAIN_CPU;
3345
3346         trace_i915_gem_object_change_domain(obj,
3347                                             old_read_domains,
3348                                             obj->write_domain);
3349
3350         return 0;
3351 }
3352
3353 /**
3354  * Pin an object to the GTT and evaluate the relocations landing in it.
3355  */
3356 static int
3357 i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
3358                              struct drm_file *file_priv,
3359                              struct drm_i915_gem_exec_object2 *entry)
3360 {
3361         struct drm_device *dev = obj->base.dev;
3362         drm_i915_private_t *dev_priv = dev->dev_private;
3363         struct drm_i915_gem_relocation_entry __user *user_relocs;
3364         struct drm_gem_object *target_obj = NULL;
3365         uint32_t target_handle = 0;
3366         int i, ret = 0;
3367
3368         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3369         for (i = 0; i < entry->relocation_count; i++) {
3370                 struct drm_i915_gem_relocation_entry reloc;
3371                 uint32_t target_offset;
3372
3373                 if (__copy_from_user_inatomic(&reloc,
3374                                               user_relocs+i,
3375                                               sizeof(reloc))) {
3376                         ret = -EFAULT;
3377                         break;
3378                 }
3379
3380                 if (reloc.target_handle != target_handle) {
3381                         drm_gem_object_unreference(target_obj);
3382
3383                         target_obj = drm_gem_object_lookup(dev, file_priv,
3384                                                            reloc.target_handle);
3385                         if (target_obj == NULL) {
3386                                 ret = -ENOENT;
3387                                 break;
3388                         }
3389
3390                         target_handle = reloc.target_handle;
3391                 }
3392                 target_offset = to_intel_bo(target_obj)->gtt_offset;
3393
3394 #if WATCH_RELOC
3395                 DRM_INFO("%s: obj %p offset %08x target %d "
3396                          "read %08x write %08x gtt %08x "
3397                          "presumed %08x delta %08x\n",
3398                          __func__,
3399                          obj,
3400                          (int) reloc.offset,
3401                          (int) reloc.target_handle,
3402                          (int) reloc.read_domains,
3403                          (int) reloc.write_domain,
3404                          (int) target_offset,
3405                          (int) reloc.presumed_offset,
3406                          reloc.delta);
3407 #endif
3408
3409                 /* The target buffer should have appeared before us in the
3410                  * exec_object list, so it should have a GTT space bound by now.
3411                  */
3412                 if (target_offset == 0) {
3413                         DRM_ERROR("No GTT space found for object %d\n",
3414                                   reloc.target_handle);
3415                         ret = -EINVAL;
3416                         break;
3417                 }
3418
3419                 /* Validate that the target is in a valid r/w GPU domain */
3420                 if (reloc.write_domain & (reloc.write_domain - 1)) {
3421                         DRM_ERROR("reloc with multiple write domains: "
3422                                   "obj %p target %d offset %d "
3423                                   "read %08x write %08x",
3424                                   obj, reloc.target_handle,
3425                                   (int) reloc.offset,
3426                                   reloc.read_domains,
3427                                   reloc.write_domain);
3428                         ret = -EINVAL;
3429                         break;
3430                 }
3431                 if (reloc.write_domain & I915_GEM_DOMAIN_CPU ||
3432                     reloc.read_domains & I915_GEM_DOMAIN_CPU) {
3433                         DRM_ERROR("reloc with read/write CPU domains: "
3434                                   "obj %p target %d offset %d "
3435                                   "read %08x write %08x",
3436                                   obj, reloc.target_handle,
3437                                   (int) reloc.offset,
3438                                   reloc.read_domains,
3439                                   reloc.write_domain);
3440                         ret = -EINVAL;
3441                         break;
3442                 }
3443                 if (reloc.write_domain && target_obj->pending_write_domain &&
3444                     reloc.write_domain != target_obj->pending_write_domain) {
3445                         DRM_ERROR("Write domain conflict: "
3446                                   "obj %p target %d offset %d "
3447                                   "new %08x old %08x\n",
3448                                   obj, reloc.target_handle,
3449                                   (int) reloc.offset,
3450                                   reloc.write_domain,
3451                                   target_obj->pending_write_domain);
3452                         ret = -EINVAL;
3453                         break;
3454                 }
3455
3456                 target_obj->pending_read_domains |= reloc.read_domains;
3457                 target_obj->pending_write_domain |= reloc.write_domain;
3458
3459                 /* If the relocation already has the right value in it, no
3460                  * more work needs to be done.
3461                  */
3462                 if (target_offset == reloc.presumed_offset)
3463                         continue;
3464
3465                 /* Check that the relocation address is valid... */
3466                 if (reloc.offset > obj->base.size - 4) {
3467                         DRM_ERROR("Relocation beyond object bounds: "
3468                                   "obj %p target %d offset %d size %d.\n",
3469                                   obj, reloc.target_handle,
3470                                   (int) reloc.offset, (int) obj->base.size);
3471                         ret = -EINVAL;
3472                         break;
3473                 }
3474                 if (reloc.offset & 3) {
3475                         DRM_ERROR("Relocation not 4-byte aligned: "
3476                                   "obj %p target %d offset %d.\n",
3477                                   obj, reloc.target_handle,
3478                                   (int) reloc.offset);
3479                         ret = -EINVAL;
3480                         break;
3481                 }
3482
3483                 /* and points to somewhere within the target object. */
3484                 if (reloc.delta >= target_obj->size) {
3485                         DRM_ERROR("Relocation beyond target object bounds: "
3486                                   "obj %p target %d delta %d size %d.\n",
3487                                   obj, reloc.target_handle,
3488                                   (int) reloc.delta, (int) target_obj->size);
3489                         ret = -EINVAL;
3490                         break;
3491                 }
3492
3493                 reloc.delta += target_offset;
3494                 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3495                         uint32_t page_offset = reloc.offset & ~PAGE_MASK;
3496                         char *vaddr;
3497
3498                         vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT]);
3499                         *(uint32_t *)(vaddr + page_offset) = reloc.delta;
3500                         kunmap_atomic(vaddr);
3501                 } else {
3502                         uint32_t __iomem *reloc_entry;
3503                         void __iomem *reloc_page;
3504
3505                         ret = i915_gem_object_set_to_gtt_domain(&obj->base, 1);
3506                         if (ret)
3507                                 break;
3508
3509                         /* Map the page containing the relocation we're going to perform.  */
3510                         reloc.offset += obj->gtt_offset;
3511                         reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3512                                                               reloc.offset & PAGE_MASK);
3513                         reloc_entry = (uint32_t __iomem *)
3514                                 (reloc_page + (reloc.offset & ~PAGE_MASK));
3515                         iowrite32(reloc.delta, reloc_entry);
3516                         io_mapping_unmap_atomic(reloc_page);
3517                 }
3518
3519                 /* and update the user's relocation entry */
3520                 reloc.presumed_offset = target_offset;
3521                 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
3522                                               &reloc.presumed_offset,
3523                                               sizeof(reloc.presumed_offset))) {
3524                     ret = -EFAULT;
3525                     break;
3526                 }
3527         }
3528
3529         drm_gem_object_unreference(target_obj);
3530         return ret;
3531 }
3532
3533 static int
3534 i915_gem_execbuffer_pin(struct drm_device *dev,
3535                         struct drm_file *file,
3536                         struct drm_gem_object **object_list,
3537                         struct drm_i915_gem_exec_object2 *exec_list,
3538                         int count)
3539 {
3540         struct drm_i915_private *dev_priv = dev->dev_private;
3541         int ret, i, retry;
3542
3543         /* attempt to pin all of the buffers into the GTT */
3544         for (retry = 0; retry < 2; retry++) {
3545                 ret = 0;
3546                 for (i = 0; i < count; i++) {
3547                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3548                         struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
3549                         bool need_fence =
3550                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3551                                 obj->tiling_mode != I915_TILING_NONE;
3552
3553                         /* g33/pnv can't fence buffers in the unmappable part */
3554                         bool need_mappable =
3555                                 entry->relocation_count ? true : need_fence;
3556
3557                         /* Check fence reg constraints and rebind if necessary */
3558                         if (need_fence &&
3559                             !i915_gem_object_fence_offset_ok(&obj->base,
3560                                                              obj->tiling_mode)) {
3561                                 ret = i915_gem_object_unbind(&obj->base);
3562                                 if (ret)
3563                                         break;
3564                         }
3565
3566                         ret = i915_gem_object_pin(&obj->base,
3567                                                   entry->alignment,
3568                                                   need_mappable);
3569                         if (ret)
3570                                 break;
3571
3572                         /*
3573                          * Pre-965 chips need a fence register set up in order
3574                          * to properly handle blits to/from tiled surfaces.
3575                          */
3576                         if (need_fence) {
3577                                 ret = i915_gem_object_get_fence_reg(&obj->base, true);
3578                                 if (ret) {
3579                                         i915_gem_object_unpin(&obj->base);
3580                                         break;
3581                                 }
3582
3583                                 dev_priv->fence_regs[obj->fence_reg].gpu = true;
3584                         }
3585
3586                         entry->offset = obj->gtt_offset;
3587                 }
3588
3589                 while (i--)
3590                         i915_gem_object_unpin(object_list[i]);
3591
3592                 if (ret == 0)
3593                         break;
3594
3595                 if (ret != -ENOSPC || retry)
3596                         return ret;
3597
3598                 ret = i915_gem_evict_everything(dev);
3599                 if (ret)
3600                         return ret;
3601         }
3602
3603         return 0;
3604 }
3605
3606 /* Throttle our rendering by waiting until the ring has completed our requests
3607  * emitted over 20 msec ago.
3608  *
3609  * Note that if we were to use the current jiffies each time around the loop,
3610  * we wouldn't escape the function with any frames outstanding if the time to
3611  * render a frame was over 20ms.
3612  *
3613  * This should get us reasonable parallelism between CPU and GPU but also
3614  * relatively low latency when blocking on a particular request to finish.
3615  */
3616 static int
3617 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3618 {
3619         struct drm_i915_private *dev_priv = dev->dev_private;
3620         struct drm_i915_file_private *file_priv = file->driver_priv;
3621         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3622         struct drm_i915_gem_request *request;
3623         struct intel_ring_buffer *ring = NULL;
3624         u32 seqno = 0;
3625         int ret;
3626
3627         spin_lock(&file_priv->mm.lock);
3628         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3629                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3630                         break;
3631
3632                 ring = request->ring;
3633                 seqno = request->seqno;
3634         }
3635         spin_unlock(&file_priv->mm.lock);
3636
3637         if (seqno == 0)
3638                 return 0;
3639
3640         ret = 0;
3641         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3642                 /* And wait for the seqno passing without holding any locks and
3643                  * causing extra latency for others. This is safe as the irq
3644                  * generation is designed to be run atomically and so is
3645                  * lockless.
3646                  */
3647                 ring->user_irq_get(ring);
3648                 ret = wait_event_interruptible(ring->irq_queue,
3649                                                i915_seqno_passed(ring->get_seqno(ring), seqno)
3650                                                || atomic_read(&dev_priv->mm.wedged));
3651                 ring->user_irq_put(ring);
3652
3653                 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3654                         ret = -EIO;
3655         }
3656
3657         if (ret == 0)
3658                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3659
3660         return ret;
3661 }
3662
3663 static int
3664 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3665                           uint64_t exec_offset)
3666 {
3667         uint32_t exec_start, exec_len;
3668
3669         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3670         exec_len = (uint32_t) exec->batch_len;
3671
3672         if ((exec_start | exec_len) & 0x7)
3673                 return -EINVAL;
3674
3675         if (!exec_start)
3676                 return -EINVAL;
3677
3678         return 0;
3679 }
3680
3681 static int
3682 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3683                    int count)
3684 {
3685         int i;
3686
3687         for (i = 0; i < count; i++) {
3688                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
3689                 size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry);
3690
3691                 if (!access_ok(VERIFY_READ, ptr, length))
3692                         return -EFAULT;
3693
3694                 /* we may also need to update the presumed offsets */
3695                 if (!access_ok(VERIFY_WRITE, ptr, length))
3696                         return -EFAULT;
3697
3698                 if (fault_in_pages_readable(ptr, length))
3699                         return -EFAULT;
3700         }
3701
3702         return 0;
3703 }
3704
3705 static int
3706 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3707                        struct drm_file *file,
3708                        struct drm_i915_gem_execbuffer2 *args,
3709                        struct drm_i915_gem_exec_object2 *exec_list)
3710 {
3711         drm_i915_private_t *dev_priv = dev->dev_private;
3712         struct drm_gem_object **object_list = NULL;
3713         struct drm_gem_object *batch_obj;
3714         struct drm_clip_rect *cliprects = NULL;
3715         struct drm_i915_gem_request *request = NULL;
3716         int ret, i, flips;
3717         uint64_t exec_offset;
3718
3719         struct intel_ring_buffer *ring = NULL;
3720
3721         ret = i915_gem_check_is_wedged(dev);
3722         if (ret)
3723                 return ret;
3724
3725         ret = validate_exec_list(exec_list, args->buffer_count);
3726         if (ret)
3727                 return ret;
3728
3729 #if WATCH_EXEC
3730         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3731                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3732 #endif
3733         switch (args->flags & I915_EXEC_RING_MASK) {
3734         case I915_EXEC_DEFAULT:
3735         case I915_EXEC_RENDER:
3736                 ring = &dev_priv->render_ring;
3737                 break;
3738         case I915_EXEC_BSD:
3739                 if (!HAS_BSD(dev)) {
3740                         DRM_ERROR("execbuf with invalid ring (BSD)\n");
3741                         return -EINVAL;
3742                 }
3743                 ring = &dev_priv->bsd_ring;
3744                 break;
3745         case I915_EXEC_BLT:
3746                 if (!HAS_BLT(dev)) {
3747                         DRM_ERROR("execbuf with invalid ring (BLT)\n");
3748                         return -EINVAL;
3749                 }
3750                 ring = &dev_priv->blt_ring;
3751                 break;
3752         default:
3753                 DRM_ERROR("execbuf with unknown ring: %d\n",
3754                           (int)(args->flags & I915_EXEC_RING_MASK));
3755                 return -EINVAL;
3756         }
3757
3758         if (args->buffer_count < 1) {
3759                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3760                 return -EINVAL;
3761         }
3762         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3763         if (object_list == NULL) {
3764                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3765                           args->buffer_count);
3766                 ret = -ENOMEM;
3767                 goto pre_mutex_err;
3768         }
3769
3770         if (args->num_cliprects != 0) {
3771                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3772                                     GFP_KERNEL);
3773                 if (cliprects == NULL) {
3774                         ret = -ENOMEM;
3775                         goto pre_mutex_err;
3776                 }
3777
3778                 ret = copy_from_user(cliprects,
3779                                      (struct drm_clip_rect __user *)
3780                                      (uintptr_t) args->cliprects_ptr,
3781                                      sizeof(*cliprects) * args->num_cliprects);
3782                 if (ret != 0) {
3783                         DRM_ERROR("copy %d cliprects failed: %d\n",
3784                                   args->num_cliprects, ret);
3785                         ret = -EFAULT;
3786                         goto pre_mutex_err;
3787                 }
3788         }
3789
3790         request = kzalloc(sizeof(*request), GFP_KERNEL);
3791         if (request == NULL) {
3792                 ret = -ENOMEM;
3793                 goto pre_mutex_err;
3794         }
3795
3796         ret = i915_mutex_lock_interruptible(dev);
3797         if (ret)
3798                 goto pre_mutex_err;
3799
3800         if (dev_priv->mm.suspended) {
3801                 mutex_unlock(&dev->struct_mutex);
3802                 ret = -EBUSY;
3803                 goto pre_mutex_err;
3804         }
3805
3806         /* Look up object handles */
3807         for (i = 0; i < args->buffer_count; i++) {
3808                 struct drm_i915_gem_object *obj_priv;
3809
3810                 object_list[i] = drm_gem_object_lookup(dev, file,
3811                                                        exec_list[i].handle);
3812                 if (object_list[i] == NULL) {
3813                         DRM_ERROR("Invalid object handle %d at index %d\n",
3814                                    exec_list[i].handle, i);
3815                         /* prevent error path from reading uninitialized data */
3816                         args->buffer_count = i + 1;
3817                         ret = -ENOENT;
3818                         goto err;
3819                 }
3820
3821                 obj_priv = to_intel_bo(object_list[i]);
3822                 if (obj_priv->in_execbuffer) {
3823                         DRM_ERROR("Object %p appears more than once in object list\n",
3824                                    object_list[i]);
3825                         /* prevent error path from reading uninitialized data */
3826                         args->buffer_count = i + 1;
3827                         ret = -EINVAL;
3828                         goto err;
3829                 }
3830                 obj_priv->in_execbuffer = true;
3831         }
3832
3833         /* Move the objects en-masse into the GTT, evicting if necessary. */
3834         ret = i915_gem_execbuffer_pin(dev, file,
3835                                       object_list, exec_list,
3836                                       args->buffer_count);
3837         if (ret)
3838                 goto err;
3839
3840         /* The objects are in their final locations, apply the relocations. */
3841         for (i = 0; i < args->buffer_count; i++) {
3842                 struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
3843                 obj->base.pending_read_domains = 0;
3844                 obj->base.pending_write_domain = 0;
3845                 ret = i915_gem_execbuffer_relocate(obj, file, &exec_list[i]);
3846                 if (ret)
3847                         goto err;
3848         }
3849
3850         /* Set the pending read domains for the batch buffer to COMMAND */
3851         batch_obj = object_list[args->buffer_count-1];
3852         if (batch_obj->pending_write_domain) {
3853                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3854                 ret = -EINVAL;
3855                 goto err;
3856         }
3857         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3858
3859         /* Sanity check the batch buffer */
3860         exec_offset = to_intel_bo(batch_obj)->gtt_offset;
3861         ret = i915_gem_check_execbuffer(args, exec_offset);
3862         if (ret != 0) {
3863                 DRM_ERROR("execbuf with invalid offset/length\n");
3864                 goto err;
3865         }
3866
3867         /* Zero the global flush/invalidate flags. These
3868          * will be modified as new domains are computed
3869          * for each object
3870          */
3871         dev->invalidate_domains = 0;
3872         dev->flush_domains = 0;
3873         dev_priv->mm.flush_rings = 0;
3874         for (i = 0; i < args->buffer_count; i++)
3875                 i915_gem_object_set_to_gpu_domain(object_list[i], ring);
3876
3877         if (dev->invalidate_domains | dev->flush_domains) {
3878 #if WATCH_EXEC
3879                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3880                           __func__,
3881                          dev->invalidate_domains,
3882                          dev->flush_domains);
3883 #endif
3884                 i915_gem_flush(dev, file,
3885                                dev->invalidate_domains,
3886                                dev->flush_domains,
3887                                dev_priv->mm.flush_rings);
3888         }
3889
3890 #if WATCH_COHERENCY
3891         for (i = 0; i < args->buffer_count; i++) {
3892                 i915_gem_object_check_coherency(object_list[i],
3893                                                 exec_list[i].handle);
3894         }
3895 #endif
3896
3897 #if WATCH_EXEC
3898         i915_gem_dump_object(batch_obj,
3899                               args->batch_len,
3900                               __func__,
3901                               ~0);
3902 #endif
3903
3904         /* Check for any pending flips. As we only maintain a flip queue depth
3905          * of 1, we can simply insert a WAIT for the next display flip prior
3906          * to executing the batch and avoid stalling the CPU.
3907          */
3908         flips = 0;
3909         for (i = 0; i < args->buffer_count; i++) {
3910                 if (object_list[i]->write_domain)
3911                         flips |= atomic_read(&to_intel_bo(object_list[i])->pending_flip);
3912         }
3913         if (flips) {
3914                 int plane, flip_mask;
3915
3916                 for (plane = 0; flips >> plane; plane++) {
3917                         if (((flips >> plane) & 1) == 0)
3918                                 continue;
3919
3920                         if (plane)
3921                                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
3922                         else
3923                                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
3924
3925                         ret = intel_ring_begin(ring, 2);
3926                         if (ret)
3927                                 goto err;
3928
3929                         intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
3930                         intel_ring_emit(ring, MI_NOOP);
3931                         intel_ring_advance(ring);
3932                 }
3933         }
3934
3935         /* Exec the batchbuffer */
3936         ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
3937         if (ret) {
3938                 DRM_ERROR("dispatch failed %d\n", ret);
3939                 goto err;
3940         }
3941
3942         for (i = 0; i < args->buffer_count; i++) {
3943                 struct drm_gem_object *obj = object_list[i];
3944
3945                 obj->read_domains = obj->pending_read_domains;
3946                 obj->write_domain = obj->pending_write_domain;
3947
3948                 i915_gem_object_move_to_active(obj, ring);
3949                 if (obj->write_domain) {
3950                         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3951                         obj_priv->dirty = 1;
3952                         list_move_tail(&obj_priv->gpu_write_list,
3953                                        &ring->gpu_write_list);
3954                         intel_mark_busy(dev, obj);
3955                 }
3956
3957                 trace_i915_gem_object_change_domain(obj,
3958                                                     obj->read_domains,
3959                                                     obj->write_domain);
3960         }
3961
3962         /*
3963          * Ensure that the commands in the batch buffer are
3964          * finished before the interrupt fires
3965          */
3966         i915_retire_commands(dev, ring);
3967
3968         if (i915_add_request(dev, file, request, ring))
3969                 ring->outstanding_lazy_request = true;
3970         else
3971                 request = NULL;
3972
3973 err:
3974         for (i = 0; i < args->buffer_count; i++) {
3975                 if (object_list[i] == NULL)
3976                     break;
3977
3978                 to_intel_bo(object_list[i])->in_execbuffer = false;
3979                 drm_gem_object_unreference(object_list[i]);
3980         }
3981
3982         mutex_unlock(&dev->struct_mutex);
3983
3984 pre_mutex_err:
3985         drm_free_large(object_list);
3986         kfree(cliprects);
3987         kfree(request);
3988
3989         return ret;
3990 }
3991
3992 /*
3993  * Legacy execbuffer just creates an exec2 list from the original exec object
3994  * list array and passes it to the real function.
3995  */
3996 int
3997 i915_gem_execbuffer(struct drm_device *dev, void *data,
3998                     struct drm_file *file_priv)
3999 {
4000         struct drm_i915_gem_execbuffer *args = data;
4001         struct drm_i915_gem_execbuffer2 exec2;
4002         struct drm_i915_gem_exec_object *exec_list = NULL;
4003         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4004         int ret, i;
4005
4006 #if WATCH_EXEC
4007         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4008                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4009 #endif
4010
4011         if (args->buffer_count < 1) {
4012                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4013                 return -EINVAL;
4014         }
4015
4016         /* Copy in the exec list from userland */
4017         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4018         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4019         if (exec_list == NULL || exec2_list == NULL) {
4020                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4021                           args->buffer_count);
4022                 drm_free_large(exec_list);
4023                 drm_free_large(exec2_list);
4024                 return -ENOMEM;
4025         }
4026         ret = copy_from_user(exec_list,
4027                              (struct drm_i915_relocation_entry __user *)
4028                              (uintptr_t) args->buffers_ptr,
4029                              sizeof(*exec_list) * args->buffer_count);
4030         if (ret != 0) {
4031                 DRM_ERROR("copy %d exec entries failed %d\n",
4032                           args->buffer_count, ret);
4033                 drm_free_large(exec_list);
4034                 drm_free_large(exec2_list);
4035                 return -EFAULT;
4036         }
4037
4038         for (i = 0; i < args->buffer_count; i++) {
4039                 exec2_list[i].handle = exec_list[i].handle;
4040                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4041                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4042                 exec2_list[i].alignment = exec_list[i].alignment;
4043                 exec2_list[i].offset = exec_list[i].offset;
4044                 if (INTEL_INFO(dev)->gen < 4)
4045                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4046                 else
4047                         exec2_list[i].flags = 0;
4048         }
4049
4050         exec2.buffers_ptr = args->buffers_ptr;
4051         exec2.buffer_count = args->buffer_count;
4052         exec2.batch_start_offset = args->batch_start_offset;
4053         exec2.batch_len = args->batch_len;
4054         exec2.DR1 = args->DR1;
4055         exec2.DR4 = args->DR4;
4056         exec2.num_cliprects = args->num_cliprects;
4057         exec2.cliprects_ptr = args->cliprects_ptr;
4058         exec2.flags = I915_EXEC_RENDER;
4059
4060         ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
4061         if (!ret) {
4062                 /* Copy the new buffer offsets back to the user's exec list. */
4063                 for (i = 0; i < args->buffer_count; i++)
4064                         exec_list[i].offset = exec2_list[i].offset;
4065                 /* ... and back out to userspace */
4066                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4067                                    (uintptr_t) args->buffers_ptr,
4068                                    exec_list,
4069                                    sizeof(*exec_list) * args->buffer_count);
4070                 if (ret) {
4071                         ret = -EFAULT;
4072                         DRM_ERROR("failed to copy %d exec entries "
4073                                   "back to user (%d)\n",
4074                                   args->buffer_count, ret);
4075                 }
4076         }
4077
4078         drm_free_large(exec_list);
4079         drm_free_large(exec2_list);
4080         return ret;
4081 }
4082
4083 int
4084 i915_gem_execbuffer2(struct drm_device *dev, void *data,
4085                      struct drm_file *file_priv)
4086 {
4087         struct drm_i915_gem_execbuffer2 *args = data;
4088         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4089         int ret;
4090
4091 #if WATCH_EXEC
4092         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4093                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4094 #endif
4095
4096         if (args->buffer_count < 1) {
4097                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4098                 return -EINVAL;
4099         }
4100
4101         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4102         if (exec2_list == NULL) {
4103                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4104                           args->buffer_count);
4105                 return -ENOMEM;
4106         }
4107         ret = copy_from_user(exec2_list,
4108                              (struct drm_i915_relocation_entry __user *)
4109                              (uintptr_t) args->buffers_ptr,
4110                              sizeof(*exec2_list) * args->buffer_count);
4111         if (ret != 0) {
4112                 DRM_ERROR("copy %d exec entries failed %d\n",
4113                           args->buffer_count, ret);
4114                 drm_free_large(exec2_list);
4115                 return -EFAULT;
4116         }
4117
4118         ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4119         if (!ret) {
4120                 /* Copy the new buffer offsets back to the user's exec list. */
4121                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4122                                    (uintptr_t) args->buffers_ptr,
4123                                    exec2_list,
4124                                    sizeof(*exec2_list) * args->buffer_count);
4125                 if (ret) {
4126                         ret = -EFAULT;
4127                         DRM_ERROR("failed to copy %d exec entries "
4128                                   "back to user (%d)\n",
4129                                   args->buffer_count, ret);
4130                 }
4131         }
4132
4133         drm_free_large(exec2_list);
4134         return ret;
4135 }
4136
4137 int
4138 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
4139                     bool mappable)
4140 {
4141         struct drm_device *dev = obj->dev;
4142         struct drm_i915_private *dev_priv = dev->dev_private;
4143         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4144         int ret;
4145
4146         BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4147         WARN_ON(i915_verify_lists(dev));
4148
4149         if (obj_priv->gtt_space != NULL) {
4150                 if (alignment == 0)
4151                         alignment = i915_gem_get_gtt_alignment(obj);
4152                 if (obj_priv->gtt_offset & (alignment - 1) ||
4153                     (mappable && !i915_gem_object_cpu_accessible(obj_priv))) {
4154                         WARN(obj_priv->pin_count,
4155                              "bo is already pinned with incorrect alignment:"
4156                              " offset=%x, req.alignment=%x\n",
4157                              obj_priv->gtt_offset, alignment);
4158                         ret = i915_gem_object_unbind(obj);
4159                         if (ret)
4160                                 return ret;
4161                 }
4162         }
4163
4164         if (obj_priv->gtt_space == NULL) {
4165                 ret = i915_gem_object_bind_to_gtt(obj, alignment, mappable);
4166                 if (ret)
4167                         return ret;
4168         }
4169
4170         obj_priv->pin_count++;
4171
4172         /* If the object is not active and not pending a flush,
4173          * remove it from the inactive list
4174          */
4175         if (obj_priv->pin_count == 1) {
4176                 i915_gem_info_add_pin(dev_priv, obj, mappable);
4177                 if (!obj_priv->active)
4178                         list_move_tail(&obj_priv->mm_list,
4179                                        &dev_priv->mm.pinned_list);
4180         }
4181         BUG_ON(!obj_priv->pin_mappable && mappable);
4182
4183         WARN_ON(i915_verify_lists(dev));
4184         return 0;
4185 }
4186
4187 void
4188 i915_gem_object_unpin(struct drm_gem_object *obj)
4189 {
4190         struct drm_device *dev = obj->dev;
4191         drm_i915_private_t *dev_priv = dev->dev_private;
4192         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4193
4194         WARN_ON(i915_verify_lists(dev));
4195         obj_priv->pin_count--;
4196         BUG_ON(obj_priv->pin_count < 0);
4197         BUG_ON(obj_priv->gtt_space == NULL);
4198
4199         /* If the object is no longer pinned, and is
4200          * neither active nor being flushed, then stick it on
4201          * the inactive list
4202          */
4203         if (obj_priv->pin_count == 0) {
4204                 if (!obj_priv->active)
4205                         list_move_tail(&obj_priv->mm_list,
4206                                        &dev_priv->mm.inactive_list);
4207                 i915_gem_info_remove_pin(dev_priv, obj);
4208         }
4209         WARN_ON(i915_verify_lists(dev));
4210 }
4211
4212 int
4213 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4214                    struct drm_file *file_priv)
4215 {
4216         struct drm_i915_gem_pin *args = data;
4217         struct drm_gem_object *obj;
4218         struct drm_i915_gem_object *obj_priv;
4219         int ret;
4220
4221         ret = i915_mutex_lock_interruptible(dev);
4222         if (ret)
4223                 return ret;
4224
4225         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4226         if (obj == NULL) {
4227                 ret = -ENOENT;
4228                 goto unlock;
4229         }
4230         obj_priv = to_intel_bo(obj);
4231
4232         if (obj_priv->madv != I915_MADV_WILLNEED) {
4233                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4234                 ret = -EINVAL;
4235                 goto out;
4236         }
4237
4238         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4239                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4240                           args->handle);
4241                 ret = -EINVAL;
4242                 goto out;
4243         }
4244
4245         obj_priv->user_pin_count++;
4246         obj_priv->pin_filp = file_priv;
4247         if (obj_priv->user_pin_count == 1) {
4248                 ret = i915_gem_object_pin(obj, args->alignment, true);
4249                 if (ret)
4250                         goto out;
4251         }
4252
4253         /* XXX - flush the CPU caches for pinned objects
4254          * as the X server doesn't manage domains yet
4255          */
4256         i915_gem_object_flush_cpu_write_domain(obj);
4257         args->offset = obj_priv->gtt_offset;
4258 out:
4259         drm_gem_object_unreference(obj);
4260 unlock:
4261         mutex_unlock(&dev->struct_mutex);
4262         return ret;
4263 }
4264
4265 int
4266 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4267                      struct drm_file *file_priv)
4268 {
4269         struct drm_i915_gem_pin *args = data;
4270         struct drm_gem_object *obj;
4271         struct drm_i915_gem_object *obj_priv;
4272         int ret;
4273
4274         ret = i915_mutex_lock_interruptible(dev);
4275         if (ret)
4276                 return ret;
4277
4278         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4279         if (obj == NULL) {
4280                 ret = -ENOENT;
4281                 goto unlock;
4282         }
4283         obj_priv = to_intel_bo(obj);
4284
4285         if (obj_priv->pin_filp != file_priv) {
4286                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4287                           args->handle);
4288                 ret = -EINVAL;
4289                 goto out;
4290         }
4291         obj_priv->user_pin_count--;
4292         if (obj_priv->user_pin_count == 0) {
4293                 obj_priv->pin_filp = NULL;
4294                 i915_gem_object_unpin(obj);
4295         }
4296
4297 out:
4298         drm_gem_object_unreference(obj);
4299 unlock:
4300         mutex_unlock(&dev->struct_mutex);
4301         return ret;
4302 }
4303
4304 int
4305 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4306                     struct drm_file *file_priv)
4307 {
4308         struct drm_i915_gem_busy *args = data;
4309         struct drm_gem_object *obj;
4310         struct drm_i915_gem_object *obj_priv;
4311         int ret;
4312
4313         ret = i915_mutex_lock_interruptible(dev);
4314         if (ret)
4315                 return ret;
4316
4317         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4318         if (obj == NULL) {
4319                 ret = -ENOENT;
4320                 goto unlock;
4321         }
4322         obj_priv = to_intel_bo(obj);
4323
4324         /* Count all active objects as busy, even if they are currently not used
4325          * by the gpu. Users of this interface expect objects to eventually
4326          * become non-busy without any further actions, therefore emit any
4327          * necessary flushes here.
4328          */
4329         args->busy = obj_priv->active;
4330         if (args->busy) {
4331                 /* Unconditionally flush objects, even when the gpu still uses this
4332                  * object. Userspace calling this function indicates that it wants to
4333                  * use this buffer rather sooner than later, so issuing the required
4334                  * flush earlier is beneficial.
4335                  */
4336                 if (obj->write_domain & I915_GEM_GPU_DOMAINS)
4337                         i915_gem_flush_ring(dev, file_priv,
4338                                             obj_priv->ring,
4339                                             0, obj->write_domain);
4340
4341                 /* Update the active list for the hardware's current position.
4342                  * Otherwise this only updates on a delayed timer or when irqs
4343                  * are actually unmasked, and our working set ends up being
4344                  * larger than required.
4345                  */
4346                 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4347
4348                 args->busy = obj_priv->active;
4349         }
4350
4351         drm_gem_object_unreference(obj);
4352 unlock:
4353         mutex_unlock(&dev->struct_mutex);
4354         return ret;
4355 }
4356
4357 int
4358 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4359                         struct drm_file *file_priv)
4360 {
4361     return i915_gem_ring_throttle(dev, file_priv);
4362 }
4363
4364 int
4365 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4366                        struct drm_file *file_priv)
4367 {
4368         struct drm_i915_gem_madvise *args = data;
4369         struct drm_gem_object *obj;
4370         struct drm_i915_gem_object *obj_priv;
4371         int ret;
4372
4373         switch (args->madv) {
4374         case I915_MADV_DONTNEED:
4375         case I915_MADV_WILLNEED:
4376             break;
4377         default:
4378             return -EINVAL;
4379         }
4380
4381         ret = i915_mutex_lock_interruptible(dev);
4382         if (ret)
4383                 return ret;
4384
4385         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4386         if (obj == NULL) {
4387                 ret = -ENOENT;
4388                 goto unlock;
4389         }
4390         obj_priv = to_intel_bo(obj);
4391
4392         if (obj_priv->pin_count) {
4393                 ret = -EINVAL;
4394                 goto out;
4395         }
4396
4397         if (obj_priv->madv != __I915_MADV_PURGED)
4398                 obj_priv->madv = args->madv;
4399
4400         /* if the object is no longer bound, discard its backing storage */
4401         if (i915_gem_object_is_purgeable(obj_priv) &&
4402             obj_priv->gtt_space == NULL)
4403                 i915_gem_object_truncate(obj);
4404
4405         args->retained = obj_priv->madv != __I915_MADV_PURGED;
4406
4407 out:
4408         drm_gem_object_unreference(obj);
4409 unlock:
4410         mutex_unlock(&dev->struct_mutex);
4411         return ret;
4412 }
4413
4414 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4415                                               size_t size)
4416 {
4417         struct drm_i915_private *dev_priv = dev->dev_private;
4418         struct drm_i915_gem_object *obj;
4419
4420         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4421         if (obj == NULL)
4422                 return NULL;
4423
4424         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4425                 kfree(obj);
4426                 return NULL;
4427         }
4428
4429         i915_gem_info_add_obj(dev_priv, size);
4430
4431         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4432         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4433
4434         obj->agp_type = AGP_USER_MEMORY;
4435         obj->base.driver_private = NULL;
4436         obj->fence_reg = I915_FENCE_REG_NONE;
4437         INIT_LIST_HEAD(&obj->mm_list);
4438         INIT_LIST_HEAD(&obj->ring_list);
4439         INIT_LIST_HEAD(&obj->gpu_write_list);
4440         obj->madv = I915_MADV_WILLNEED;
4441
4442         return &obj->base;
4443 }
4444
4445 int i915_gem_init_object(struct drm_gem_object *obj)
4446 {
4447         BUG();
4448
4449         return 0;
4450 }
4451
4452 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4453 {
4454         struct drm_device *dev = obj->dev;
4455         drm_i915_private_t *dev_priv = dev->dev_private;
4456         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4457         int ret;
4458
4459         ret = i915_gem_object_unbind(obj);
4460         if (ret == -ERESTARTSYS) {
4461                 list_move(&obj_priv->mm_list,
4462                           &dev_priv->mm.deferred_free_list);
4463                 return;
4464         }
4465
4466         if (obj_priv->mmap_offset)
4467                 i915_gem_free_mmap_offset(obj);
4468
4469         drm_gem_object_release(obj);
4470         i915_gem_info_remove_obj(dev_priv, obj->size);
4471
4472         kfree(obj_priv->page_cpu_valid);
4473         kfree(obj_priv->bit_17);
4474         kfree(obj_priv);
4475 }
4476
4477 void i915_gem_free_object(struct drm_gem_object *obj)
4478 {
4479         struct drm_device *dev = obj->dev;
4480         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4481
4482         trace_i915_gem_object_destroy(obj);
4483
4484         while (obj_priv->pin_count > 0)
4485                 i915_gem_object_unpin(obj);
4486
4487         if (obj_priv->phys_obj)
4488                 i915_gem_detach_phys_object(dev, obj);
4489
4490         i915_gem_free_object_tail(obj);
4491 }
4492
4493 int
4494 i915_gem_idle(struct drm_device *dev)
4495 {
4496         drm_i915_private_t *dev_priv = dev->dev_private;
4497         int ret;
4498
4499         mutex_lock(&dev->struct_mutex);
4500
4501         if (dev_priv->mm.suspended) {
4502                 mutex_unlock(&dev->struct_mutex);
4503                 return 0;
4504         }
4505
4506         ret = i915_gpu_idle(dev);
4507         if (ret) {
4508                 mutex_unlock(&dev->struct_mutex);
4509                 return ret;
4510         }
4511
4512         /* Under UMS, be paranoid and evict. */
4513         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4514                 ret = i915_gem_evict_inactive(dev);
4515                 if (ret) {
4516                         mutex_unlock(&dev->struct_mutex);
4517                         return ret;
4518                 }
4519         }
4520
4521         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4522          * We need to replace this with a semaphore, or something.
4523          * And not confound mm.suspended!
4524          */
4525         dev_priv->mm.suspended = 1;
4526         del_timer_sync(&dev_priv->hangcheck_timer);
4527
4528         i915_kernel_lost_context(dev);
4529         i915_gem_cleanup_ringbuffer(dev);
4530
4531         mutex_unlock(&dev->struct_mutex);
4532
4533         /* Cancel the retire work handler, which should be idle now. */
4534         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4535
4536         return 0;
4537 }
4538
4539 /*
4540  * 965+ support PIPE_CONTROL commands, which provide finer grained control
4541  * over cache flushing.
4542  */
4543 static int
4544 i915_gem_init_pipe_control(struct drm_device *dev)
4545 {
4546         drm_i915_private_t *dev_priv = dev->dev_private;
4547         struct drm_gem_object *obj;
4548         struct drm_i915_gem_object *obj_priv;
4549         int ret;
4550
4551         obj = i915_gem_alloc_object(dev, 4096);
4552         if (obj == NULL) {
4553                 DRM_ERROR("Failed to allocate seqno page\n");
4554                 ret = -ENOMEM;
4555                 goto err;
4556         }
4557         obj_priv = to_intel_bo(obj);
4558         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4559
4560         ret = i915_gem_object_pin(obj, 4096, true);
4561         if (ret)
4562                 goto err_unref;
4563
4564         dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4565         dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4566         if (dev_priv->seqno_page == NULL)
4567                 goto err_unpin;
4568
4569         dev_priv->seqno_obj = obj;
4570         memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4571
4572         return 0;
4573
4574 err_unpin:
4575         i915_gem_object_unpin(obj);
4576 err_unref:
4577         drm_gem_object_unreference(obj);
4578 err:
4579         return ret;
4580 }
4581
4582
4583 static void
4584 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4585 {
4586         drm_i915_private_t *dev_priv = dev->dev_private;
4587         struct drm_gem_object *obj;
4588         struct drm_i915_gem_object *obj_priv;
4589
4590         obj = dev_priv->seqno_obj;
4591         obj_priv = to_intel_bo(obj);
4592         kunmap(obj_priv->pages[0]);
4593         i915_gem_object_unpin(obj);
4594         drm_gem_object_unreference(obj);
4595         dev_priv->seqno_obj = NULL;
4596
4597         dev_priv->seqno_page = NULL;
4598 }
4599
4600 int
4601 i915_gem_init_ringbuffer(struct drm_device *dev)
4602 {
4603         drm_i915_private_t *dev_priv = dev->dev_private;
4604         int ret;
4605
4606         if (HAS_PIPE_CONTROL(dev)) {
4607                 ret = i915_gem_init_pipe_control(dev);
4608                 if (ret)
4609                         return ret;
4610         }
4611
4612         ret = intel_init_render_ring_buffer(dev);
4613         if (ret)
4614                 goto cleanup_pipe_control;
4615
4616         if (HAS_BSD(dev)) {
4617                 ret = intel_init_bsd_ring_buffer(dev);
4618                 if (ret)
4619                         goto cleanup_render_ring;
4620         }
4621
4622         if (HAS_BLT(dev)) {
4623                 ret = intel_init_blt_ring_buffer(dev);
4624                 if (ret)
4625                         goto cleanup_bsd_ring;
4626         }
4627
4628         dev_priv->next_seqno = 1;
4629
4630         return 0;
4631
4632 cleanup_bsd_ring:
4633         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4634 cleanup_render_ring:
4635         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4636 cleanup_pipe_control:
4637         if (HAS_PIPE_CONTROL(dev))
4638                 i915_gem_cleanup_pipe_control(dev);
4639         return ret;
4640 }
4641
4642 void
4643 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4644 {
4645         drm_i915_private_t *dev_priv = dev->dev_private;
4646
4647         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4648         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4649         intel_cleanup_ring_buffer(&dev_priv->blt_ring);
4650         if (HAS_PIPE_CONTROL(dev))
4651                 i915_gem_cleanup_pipe_control(dev);
4652 }
4653
4654 int
4655 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4656                        struct drm_file *file_priv)
4657 {
4658         drm_i915_private_t *dev_priv = dev->dev_private;
4659         int ret;
4660
4661         if (drm_core_check_feature(dev, DRIVER_MODESET))
4662                 return 0;
4663
4664         if (atomic_read(&dev_priv->mm.wedged)) {
4665                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4666                 atomic_set(&dev_priv->mm.wedged, 0);
4667         }
4668
4669         mutex_lock(&dev->struct_mutex);
4670         dev_priv->mm.suspended = 0;
4671
4672         ret = i915_gem_init_ringbuffer(dev);
4673         if (ret != 0) {
4674                 mutex_unlock(&dev->struct_mutex);
4675                 return ret;
4676         }
4677
4678         BUG_ON(!list_empty(&dev_priv->mm.active_list));
4679         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4680         BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
4681         BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
4682         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4683         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4684         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4685         BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
4686         BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
4687         mutex_unlock(&dev->struct_mutex);
4688
4689         ret = drm_irq_install(dev);
4690         if (ret)
4691                 goto cleanup_ringbuffer;
4692
4693         return 0;
4694
4695 cleanup_ringbuffer:
4696         mutex_lock(&dev->struct_mutex);
4697         i915_gem_cleanup_ringbuffer(dev);
4698         dev_priv->mm.suspended = 1;
4699         mutex_unlock(&dev->struct_mutex);
4700
4701         return ret;
4702 }
4703
4704 int
4705 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4706                        struct drm_file *file_priv)
4707 {
4708         if (drm_core_check_feature(dev, DRIVER_MODESET))
4709                 return 0;
4710
4711         drm_irq_uninstall(dev);
4712         return i915_gem_idle(dev);
4713 }
4714
4715 void
4716 i915_gem_lastclose(struct drm_device *dev)
4717 {
4718         int ret;
4719
4720         if (drm_core_check_feature(dev, DRIVER_MODESET))
4721                 return;
4722
4723         ret = i915_gem_idle(dev);
4724         if (ret)
4725                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4726 }
4727
4728 static void
4729 init_ring_lists(struct intel_ring_buffer *ring)
4730 {
4731         INIT_LIST_HEAD(&ring->active_list);
4732         INIT_LIST_HEAD(&ring->request_list);
4733         INIT_LIST_HEAD(&ring->gpu_write_list);
4734 }
4735
4736 void
4737 i915_gem_load(struct drm_device *dev)
4738 {
4739         int i;
4740         drm_i915_private_t *dev_priv = dev->dev_private;
4741
4742         INIT_LIST_HEAD(&dev_priv->mm.active_list);
4743         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4744         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4745         INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
4746         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4747         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4748         init_ring_lists(&dev_priv->render_ring);
4749         init_ring_lists(&dev_priv->bsd_ring);
4750         init_ring_lists(&dev_priv->blt_ring);
4751         for (i = 0; i < 16; i++)
4752                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4753         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4754                           i915_gem_retire_work_handler);
4755         init_completion(&dev_priv->error_completion);
4756         spin_lock(&shrink_list_lock);
4757         list_add(&dev_priv->mm.shrink_list, &shrink_list);
4758         spin_unlock(&shrink_list_lock);
4759
4760         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4761         if (IS_GEN3(dev)) {
4762                 u32 tmp = I915_READ(MI_ARB_STATE);
4763                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4764                         /* arb state is a masked write, so set bit + bit in mask */
4765                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4766                         I915_WRITE(MI_ARB_STATE, tmp);
4767                 }
4768         }
4769
4770         /* Old X drivers will take 0-2 for front, back, depth buffers */
4771         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4772                 dev_priv->fence_reg_start = 3;
4773
4774         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4775                 dev_priv->num_fence_regs = 16;
4776         else
4777                 dev_priv->num_fence_regs = 8;
4778
4779         /* Initialize fence registers to zero */
4780         switch (INTEL_INFO(dev)->gen) {
4781         case 6:
4782                 for (i = 0; i < 16; i++)
4783                         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
4784                 break;
4785         case 5:
4786         case 4:
4787                 for (i = 0; i < 16; i++)
4788                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4789                 break;
4790         case 3:
4791                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4792                         for (i = 0; i < 8; i++)
4793                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4794         case 2:
4795                 for (i = 0; i < 8; i++)
4796                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4797                 break;
4798         }
4799         i915_gem_detect_bit_6_swizzle(dev);
4800         init_waitqueue_head(&dev_priv->pending_flip_queue);
4801 }
4802
4803 /*
4804  * Create a physically contiguous memory object for this object
4805  * e.g. for cursor + overlay regs
4806  */
4807 static int i915_gem_init_phys_object(struct drm_device *dev,
4808                                      int id, int size, int align)
4809 {
4810         drm_i915_private_t *dev_priv = dev->dev_private;
4811         struct drm_i915_gem_phys_object *phys_obj;
4812         int ret;
4813
4814         if (dev_priv->mm.phys_objs[id - 1] || !size)
4815                 return 0;
4816
4817         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4818         if (!phys_obj)
4819                 return -ENOMEM;
4820
4821         phys_obj->id = id;
4822
4823         phys_obj->handle = drm_pci_alloc(dev, size, align);
4824         if (!phys_obj->handle) {
4825                 ret = -ENOMEM;
4826                 goto kfree_obj;
4827         }
4828 #ifdef CONFIG_X86
4829         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4830 #endif
4831
4832         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4833
4834         return 0;
4835 kfree_obj:
4836         kfree(phys_obj);
4837         return ret;
4838 }
4839
4840 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4841 {
4842         drm_i915_private_t *dev_priv = dev->dev_private;
4843         struct drm_i915_gem_phys_object *phys_obj;
4844
4845         if (!dev_priv->mm.phys_objs[id - 1])
4846                 return;
4847
4848         phys_obj = dev_priv->mm.phys_objs[id - 1];
4849         if (phys_obj->cur_obj) {
4850                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4851         }
4852
4853 #ifdef CONFIG_X86
4854         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4855 #endif
4856         drm_pci_free(dev, phys_obj->handle);
4857         kfree(phys_obj);
4858         dev_priv->mm.phys_objs[id - 1] = NULL;
4859 }
4860
4861 void i915_gem_free_all_phys_object(struct drm_device *dev)
4862 {
4863         int i;
4864
4865         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4866                 i915_gem_free_phys_object(dev, i);
4867 }
4868
4869 void i915_gem_detach_phys_object(struct drm_device *dev,
4870                                  struct drm_gem_object *obj)
4871 {
4872         struct drm_i915_gem_object *obj_priv;
4873         int i;
4874         int ret;
4875         int page_count;
4876
4877         obj_priv = to_intel_bo(obj);
4878         if (!obj_priv->phys_obj)
4879                 return;
4880
4881         ret = i915_gem_object_get_pages(obj, 0);
4882         if (ret)
4883                 goto out;
4884
4885         page_count = obj->size / PAGE_SIZE;
4886
4887         for (i = 0; i < page_count; i++) {
4888                 char *dst = kmap_atomic(obj_priv->pages[i]);
4889                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4890
4891                 memcpy(dst, src, PAGE_SIZE);
4892                 kunmap_atomic(dst);
4893         }
4894         drm_clflush_pages(obj_priv->pages, page_count);
4895         drm_agp_chipset_flush(dev);
4896
4897         i915_gem_object_put_pages(obj);
4898 out:
4899         obj_priv->phys_obj->cur_obj = NULL;
4900         obj_priv->phys_obj = NULL;
4901 }
4902
4903 int
4904 i915_gem_attach_phys_object(struct drm_device *dev,
4905                             struct drm_gem_object *obj,
4906                             int id,
4907                             int align)
4908 {
4909         drm_i915_private_t *dev_priv = dev->dev_private;
4910         struct drm_i915_gem_object *obj_priv;
4911         int ret = 0;
4912         int page_count;
4913         int i;
4914
4915         if (id > I915_MAX_PHYS_OBJECT)
4916                 return -EINVAL;
4917
4918         obj_priv = to_intel_bo(obj);
4919
4920         if (obj_priv->phys_obj) {
4921                 if (obj_priv->phys_obj->id == id)
4922                         return 0;
4923                 i915_gem_detach_phys_object(dev, obj);
4924         }
4925
4926         /* create a new object */
4927         if (!dev_priv->mm.phys_objs[id - 1]) {
4928                 ret = i915_gem_init_phys_object(dev, id,
4929                                                 obj->size, align);
4930                 if (ret) {
4931                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4932                         goto out;
4933                 }
4934         }
4935
4936         /* bind to the object */
4937         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4938         obj_priv->phys_obj->cur_obj = obj;
4939
4940         ret = i915_gem_object_get_pages(obj, 0);
4941         if (ret) {
4942                 DRM_ERROR("failed to get page list\n");
4943                 goto out;
4944         }
4945
4946         page_count = obj->size / PAGE_SIZE;
4947
4948         for (i = 0; i < page_count; i++) {
4949                 char *src = kmap_atomic(obj_priv->pages[i]);
4950                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4951
4952                 memcpy(dst, src, PAGE_SIZE);
4953                 kunmap_atomic(src);
4954         }
4955
4956         i915_gem_object_put_pages(obj);
4957
4958         return 0;
4959 out:
4960         return ret;
4961 }
4962
4963 static int
4964 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4965                      struct drm_i915_gem_pwrite *args,
4966                      struct drm_file *file_priv)
4967 {
4968         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4969         void *obj_addr;
4970         int ret;
4971         char __user *user_data;
4972
4973         user_data = (char __user *) (uintptr_t) args->data_ptr;
4974         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4975
4976         DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4977         ret = copy_from_user(obj_addr, user_data, args->size);
4978         if (ret)
4979                 return -EFAULT;
4980
4981         drm_agp_chipset_flush(dev);
4982         return 0;
4983 }
4984
4985 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4986 {
4987         struct drm_i915_file_private *file_priv = file->driver_priv;
4988
4989         /* Clean up our request list when the client is going away, so that
4990          * later retire_requests won't dereference our soon-to-be-gone
4991          * file_priv.
4992          */
4993         spin_lock(&file_priv->mm.lock);
4994         while (!list_empty(&file_priv->mm.request_list)) {
4995                 struct drm_i915_gem_request *request;
4996
4997                 request = list_first_entry(&file_priv->mm.request_list,
4998                                            struct drm_i915_gem_request,
4999                                            client_list);
5000                 list_del(&request->client_list);
5001                 request->file_priv = NULL;
5002         }
5003         spin_unlock(&file_priv->mm.lock);
5004 }
5005
5006 static int
5007 i915_gpu_is_active(struct drm_device *dev)
5008 {
5009         drm_i915_private_t *dev_priv = dev->dev_private;
5010         int lists_empty;
5011
5012         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5013                       list_empty(&dev_priv->render_ring.active_list) &&
5014                       list_empty(&dev_priv->bsd_ring.active_list) &&
5015                       list_empty(&dev_priv->blt_ring.active_list);
5016
5017         return !lists_empty;
5018 }
5019
5020 static int
5021 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
5022 {
5023         drm_i915_private_t *dev_priv, *next_dev;
5024         struct drm_i915_gem_object *obj_priv, *next_obj;
5025         int cnt = 0;
5026         int would_deadlock = 1;
5027
5028         /* "fast-path" to count number of available objects */
5029         if (nr_to_scan == 0) {
5030                 spin_lock(&shrink_list_lock);
5031                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5032                         struct drm_device *dev = dev_priv->dev;
5033
5034                         if (mutex_trylock(&dev->struct_mutex)) {
5035                                 list_for_each_entry(obj_priv,
5036                                                     &dev_priv->mm.inactive_list,
5037                                                     mm_list)
5038                                         cnt++;
5039                                 mutex_unlock(&dev->struct_mutex);
5040                         }
5041                 }
5042                 spin_unlock(&shrink_list_lock);
5043
5044                 return (cnt / 100) * sysctl_vfs_cache_pressure;
5045         }
5046
5047         spin_lock(&shrink_list_lock);
5048
5049 rescan:
5050         /* first scan for clean buffers */
5051         list_for_each_entry_safe(dev_priv, next_dev,
5052                                  &shrink_list, mm.shrink_list) {
5053                 struct drm_device *dev = dev_priv->dev;
5054
5055                 if (! mutex_trylock(&dev->struct_mutex))
5056                         continue;
5057
5058                 spin_unlock(&shrink_list_lock);
5059                 i915_gem_retire_requests(dev);
5060
5061                 list_for_each_entry_safe(obj_priv, next_obj,
5062                                          &dev_priv->mm.inactive_list,
5063                                          mm_list) {
5064                         if (i915_gem_object_is_purgeable(obj_priv)) {
5065                                 i915_gem_object_unbind(&obj_priv->base);
5066                                 if (--nr_to_scan <= 0)
5067                                         break;
5068                         }
5069                 }
5070
5071                 spin_lock(&shrink_list_lock);
5072                 mutex_unlock(&dev->struct_mutex);
5073
5074                 would_deadlock = 0;
5075
5076                 if (nr_to_scan <= 0)
5077                         break;
5078         }
5079
5080         /* second pass, evict/count anything still on the inactive list */
5081         list_for_each_entry_safe(dev_priv, next_dev,
5082                                  &shrink_list, mm.shrink_list) {
5083                 struct drm_device *dev = dev_priv->dev;
5084
5085                 if (! mutex_trylock(&dev->struct_mutex))
5086                         continue;
5087
5088                 spin_unlock(&shrink_list_lock);
5089
5090                 list_for_each_entry_safe(obj_priv, next_obj,
5091                                          &dev_priv->mm.inactive_list,
5092                                          mm_list) {
5093                         if (nr_to_scan > 0) {
5094                                 i915_gem_object_unbind(&obj_priv->base);
5095                                 nr_to_scan--;
5096                         } else
5097                                 cnt++;
5098                 }
5099
5100                 spin_lock(&shrink_list_lock);
5101                 mutex_unlock(&dev->struct_mutex);
5102
5103                 would_deadlock = 0;
5104         }
5105
5106         if (nr_to_scan) {
5107                 int active = 0;
5108
5109                 /*
5110                  * We are desperate for pages, so as a last resort, wait
5111                  * for the GPU to finish and discard whatever we can.
5112                  * This has a dramatic impact to reduce the number of
5113                  * OOM-killer events whilst running the GPU aggressively.
5114                  */
5115                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5116                         struct drm_device *dev = dev_priv->dev;
5117
5118                         if (!mutex_trylock(&dev->struct_mutex))
5119                                 continue;
5120
5121                         spin_unlock(&shrink_list_lock);
5122
5123                         if (i915_gpu_is_active(dev)) {
5124                                 i915_gpu_idle(dev);
5125                                 active++;
5126                         }
5127
5128                         spin_lock(&shrink_list_lock);
5129                         mutex_unlock(&dev->struct_mutex);
5130                 }
5131
5132                 if (active)
5133                         goto rescan;
5134         }
5135
5136         spin_unlock(&shrink_list_lock);
5137
5138         if (would_deadlock)
5139                 return -1;
5140         else if (cnt > 0)
5141                 return (cnt / 100) * sysctl_vfs_cache_pressure;
5142         else
5143                 return 0;
5144 }
5145
5146 static struct shrinker shrinker = {
5147         .shrink = i915_gem_shrink,
5148         .seeks = DEFAULT_SEEKS,
5149 };
5150
5151 __init void
5152 i915_gem_shrinker_init(void)
5153 {
5154     register_shrinker(&shrinker);
5155 }
5156
5157 __exit void
5158 i915_gem_shrinker_exit(void)
5159 {
5160     unregister_shrinker(&shrinker);
5161 }