drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include <drm/drmP.h>
  29 #include <drm/drm_vma_manager.h>
  30 #include <drm/i915_drm.h>
  31 #include "i915_drv.h"
  32 #include "i915_vgpu.h"
  33 #include "i915_trace.h"
  34 #include "intel_drv.h"
  35 #include "intel_frontbuffer.h"
  36 #include "intel_mocs.h"
  37 #include <linux/reservation.h>
  38 #include <linux/shmem_fs.h>
  39 #include <linux/slab.h>
  40 #include <linux/swap.h>
  41 #include <linux/pci.h>
  42 #include <linux/dma-buf.h>
  43
  44 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  45 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  46 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  47
  48 static bool cpu_cache_is_coherent(struct drm_device *dev,
  49                                   enum i915_cache_level level)
  50 {
  51         return HAS_LLC(dev) || level != I915_CACHE_NONE;
  52 }
  53
  54 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  55 {
  56         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
  57                 return false;
  58
  59         if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
  60                 return true;
  61
  62         return obj->pin_display;
  63 }
  64
  65 static int
  66 insert_mappable_node(struct i915_ggtt *ggtt,
  67                      struct drm_mm_node *node, u32 size)
  68 {
  69         memset(node, 0, sizeof(*node));
  70         return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
  71                                                    size, 0, -1,
  72                                                    0, ggtt->mappable_end,
  73                                                    DRM_MM_SEARCH_DEFAULT,
  74                                                    DRM_MM_CREATE_DEFAULT);
  75 }
  76
  77 static void
  78 remove_mappable_node(struct drm_mm_node *node)
  79 {
  80         drm_mm_remove_node(node);
  81 }
  82
  83 /* some bookkeeping */
  84 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  85                                   u64 size)
  86 {
  87         spin_lock(&dev_priv->mm.object_stat_lock);
  88         dev_priv->mm.object_count++;
  89         dev_priv->mm.object_memory += size;
  90         spin_unlock(&dev_priv->mm.object_stat_lock);
  91 }
  92
  93 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  94                                      u64 size)
  95 {
  96         spin_lock(&dev_priv->mm.object_stat_lock);
  97         dev_priv->mm.object_count--;
  98         dev_priv->mm.object_memory -= size;
  99         spin_unlock(&dev_priv->mm.object_stat_lock);
 100 }
 101
 102 static int
 103 i915_gem_wait_for_error(struct i915_gpu_error *error)
 104 {
 105         int ret;
 106
 107         might_sleep();
 108
 109         if (!i915_reset_in_progress(error))
 110                 return 0;
 111
 112         /*
 113          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
 114          * userspace. If it takes that long something really bad is going on and
 115          * we should simply try to bail out and fail as gracefully as possible.
 116          */
 117         ret = wait_event_interruptible_timeout(error->reset_queue,
 118                                                !i915_reset_in_progress(error),
 119                                                I915_RESET_TIMEOUT);
 120         if (ret == 0) {
 121                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
 122                 return -EIO;
 123         } else if (ret < 0) {
 124                 return ret;
 125         } else {
 126                 return 0;
 127         }
 128 }
 129
 130 int i915_mutex_lock_interruptible(struct drm_device *dev)
 131 {
 132         struct drm_i915_private *dev_priv = to_i915(dev);
 133         int ret;
 134
 135         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 136         if (ret)
 137                 return ret;
 138
 139         ret = mutex_lock_interruptible(&dev->struct_mutex);
 140         if (ret)
 141                 return ret;
 142
 143         return 0;
 144 }
 145
 146 int
 147 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 148                             struct drm_file *file)
 149 {
 150         struct drm_i915_private *dev_priv = to_i915(dev);
 151         struct i915_ggtt *ggtt = &dev_priv->ggtt;
 152         struct drm_i915_gem_get_aperture *args = data;
 153         struct i915_vma *vma;
 154         size_t pinned;
 155
 156         pinned = 0;
 157         mutex_lock(&dev->struct_mutex);
 158         list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
 159                 if (i915_vma_is_pinned(vma))
 160                         pinned += vma->node.size;
 161         list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
 162                 if (i915_vma_is_pinned(vma))
 163                         pinned += vma->node.size;
 164         mutex_unlock(&dev->struct_mutex);
 165
 166         args->aper_size = ggtt->base.total;
 167         args->aper_available_size = args->aper_size - pinned;
 168
 169         return 0;
 170 }
 171
 172 static struct sg_table *
 173 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 174 {
 175         struct address_space *mapping = obj->base.filp->f_mapping;
 176         char *vaddr = obj->phys_handle->vaddr;
 177         struct sg_table *st;
 178         struct scatterlist *sg;
 179         int i;
 180
 181         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 182                 return ERR_PTR(-EINVAL);
 183
 184         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 185                 struct page *page;
 186                 char *src;
 187
 188                 page = shmem_read_mapping_page(mapping, i);
 189                 if (IS_ERR(page))
 190                         return ERR_CAST(page);
 191
 192                 src = kmap_atomic(page);
 193                 memcpy(vaddr, src, PAGE_SIZE);
 194                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
 195                 kunmap_atomic(src);
 196
 197                 put_page(page);
 198                 vaddr += PAGE_SIZE;
 199         }
 200
 201         i915_gem_chipset_flush(to_i915(obj->base.dev));
 202
 203         st = kmalloc(sizeof(*st), GFP_KERNEL);
 204         if (st == NULL)
 205                 return ERR_PTR(-ENOMEM);
 206
 207         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 208                 kfree(st);
 209                 return ERR_PTR(-ENOMEM);
 210         }
 211
 212         sg = st->sgl;
 213         sg->offset = 0;
 214         sg->length = obj->base.size;
 215
 216         sg_dma_address(sg) = obj->phys_handle->busaddr;
 217         sg_dma_len(sg) = obj->base.size;
 218
 219         return st;
 220 }
 221
 222 static void
 223 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj)
 224 {
 225         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 226
 227         if (obj->mm.madv == I915_MADV_DONTNEED)
 228                 obj->mm.dirty = false;
 229
 230         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
 231                 i915_gem_clflush_object(obj, false);
 232
 233         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 234         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 235 }
 236
 237 static void
 238 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 239                                struct sg_table *pages)
 240 {
 241         __i915_gem_object_release_shmem(obj);
 242
 243         if (obj->mm.dirty) {
 244                 struct address_space *mapping = obj->base.filp->f_mapping;
 245                 char *vaddr = obj->phys_handle->vaddr;
 246                 int i;
 247
 248                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 249                         struct page *page;
 250                         char *dst;
 251
 252                         page = shmem_read_mapping_page(mapping, i);
 253                         if (IS_ERR(page))
 254                                 continue;
 255
 256                         dst = kmap_atomic(page);
 257                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 258                         memcpy(dst, vaddr, PAGE_SIZE);
 259                         kunmap_atomic(dst);
 260
 261                         set_page_dirty(page);
 262                         if (obj->mm.madv == I915_MADV_WILLNEED)
 263                                 mark_page_accessed(page);
 264                         put_page(page);
 265                         vaddr += PAGE_SIZE;
 266                 }
 267                 obj->mm.dirty = false;
 268         }
 269
 270         sg_free_table(pages);
 271         kfree(pages);
 272 }
 273
 274 static void
 275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 276 {
 277         drm_pci_free(obj->base.dev, obj->phys_handle);
 278         i915_gem_object_unpin_pages(obj);
 279 }
 280
 281 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 282         .get_pages = i915_gem_object_get_pages_phys,
 283         .put_pages = i915_gem_object_put_pages_phys,
 284         .release = i915_gem_object_release_phys,
 285 };
 286
 287 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 288 {
 289         struct i915_vma *vma;
 290         LIST_HEAD(still_in_list);
 291         int ret;
 292
 293         lockdep_assert_held(&obj->base.dev->struct_mutex);
 294
 295         /* Closed vma are removed from the obj->vma_list - but they may
 296          * still have an active binding on the object. To remove those we
 297          * must wait for all rendering to complete to the object (as unbinding
 298          * must anyway), and retire the requests.
 299          */
 300         ret = i915_gem_object_wait(obj,
 301                                    I915_WAIT_INTERRUPTIBLE |
 302                                    I915_WAIT_LOCKED |
 303                                    I915_WAIT_ALL,
 304                                    MAX_SCHEDULE_TIMEOUT,
 305                                    NULL);
 306         if (ret)
 307                 return ret;
 308
 309         i915_gem_retire_requests(to_i915(obj->base.dev));
 310
 311         while ((vma = list_first_entry_or_null(&obj->vma_list,
 312                                                struct i915_vma,
 313                                                obj_link))) {
 314                 list_move_tail(&vma->obj_link, &still_in_list);
 315                 ret = i915_vma_unbind(vma);
 316                 if (ret)
 317                         break;
 318         }
 319         list_splice(&still_in_list, &obj->vma_list);
 320
 321         return ret;
 322 }
 323
 324 static long
 325 i915_gem_object_wait_fence(struct dma_fence *fence,
 326                            unsigned int flags,
 327                            long timeout,
 328                            struct intel_rps_client *rps)
 329 {
 330         struct drm_i915_gem_request *rq;
 331
 332         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 333
 334         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 335                 return timeout;
 336
 337         if (!dma_fence_is_i915(fence))
 338                 return dma_fence_wait_timeout(fence,
 339                                               flags & I915_WAIT_INTERRUPTIBLE,
 340                                               timeout);
 341
 342         rq = to_request(fence);
 343         if (i915_gem_request_completed(rq))
 344                 goto out;
 345
 346         /* This client is about to stall waiting for the GPU. In many cases
 347          * this is undesirable and limits the throughput of the system, as
 348          * many clients cannot continue processing user input/output whilst
 349          * blocked. RPS autotuning may take tens of milliseconds to respond
 350          * to the GPU load and thus incurs additional latency for the client.
 351          * We can circumvent that by promoting the GPU frequency to maximum
 352          * before we wait. This makes the GPU throttle up much more quickly
 353          * (good for benchmarks and user experience, e.g. window animations),
 354          * but at a cost of spending more power processing the workload
 355          * (bad for battery). Not all clients even want their results
 356          * immediately and for them we should just let the GPU select its own
 357          * frequency to maximise efficiency. To prevent a single client from
 358          * forcing the clocks too high for the whole system, we only allow
 359          * each client to waitboost once in a busy period.
 360          */
 361         if (rps) {
 362                 if (INTEL_GEN(rq->i915) >= 6)
 363                         gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
 364                 else
 365                         rps = NULL;
 366         }
 367
 368         timeout = i915_wait_request(rq, flags, timeout);
 369
 370 out:
 371         if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 372                 i915_gem_request_retire_upto(rq);
 373
 374         if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
 375                 /* The GPU is now idle and this client has stalled.
 376                  * Since no other client has submitted a request in the
 377                  * meantime, assume that this client is the only one
 378                  * supplying work to the GPU but is unable to keep that
 379                  * work supplied because it is waiting. Since the GPU is
 380                  * then never kept fully busy, RPS autoclocking will
 381                  * keep the clocks relatively low, causing further delays.
 382                  * Compensate by giving the synchronous client credit for
 383                  * a waitboost next time.
 384                  */
 385                 spin_lock(&rq->i915->rps.client_lock);
 386                 list_del_init(&rps->link);
 387                 spin_unlock(&rq->i915->rps.client_lock);
 388         }
 389
 390         return timeout;
 391 }
 392
 393 static long
 394 i915_gem_object_wait_reservation(struct reservation_object *resv,
 395                                  unsigned int flags,
 396                                  long timeout,
 397                                  struct intel_rps_client *rps)
 398 {
 399         struct dma_fence *excl;
 400
 401         if (flags & I915_WAIT_ALL) {
 402                 struct dma_fence **shared;
 403                 unsigned int count, i;
 404                 int ret;
 405
 406                 ret = reservation_object_get_fences_rcu(resv,
 407                                                         &excl, &count, &shared);
 408                 if (ret)
 409                         return ret;
 410
 411                 for (i = 0; i < count; i++) {
 412                         timeout = i915_gem_object_wait_fence(shared[i],
 413                                                              flags, timeout,
 414                                                              rps);
 415                         if (timeout <= 0)
 416                                 break;
 417
 418                         dma_fence_put(shared[i]);
 419                 }
 420
 421                 for (; i < count; i++)
 422                         dma_fence_put(shared[i]);
 423                 kfree(shared);
 424         } else {
 425                 excl = reservation_object_get_excl_rcu(resv);
 426         }
 427
 428         if (excl && timeout > 0)
 429                 timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
 430
 431         dma_fence_put(excl);
 432
 433         return timeout;
 434 }
 435
 436 /**
 437  * Waits for rendering to the object to be completed
 438  * @obj: i915 gem object
 439  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 440  * @timeout: how long to wait
 441  * @rps: client (user process) to charge for any waitboosting
 442  */
 443 int
 444 i915_gem_object_wait(struct drm_i915_gem_object *obj,
 445                      unsigned int flags,
 446                      long timeout,
 447                      struct intel_rps_client *rps)
 448 {
 449         might_sleep();
 450 #if IS_ENABLED(CONFIG_LOCKDEP)
 451         GEM_BUG_ON(debug_locks &&
 452                    !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
 453                    !!(flags & I915_WAIT_LOCKED));
 454 #endif
 455         GEM_BUG_ON(timeout < 0);
 456
 457         timeout = i915_gem_object_wait_reservation(obj->resv,
 458                                                    flags, timeout,
 459                                                    rps);
 460         return timeout < 0 ? timeout : 0;
 461 }
 462
 463 static struct intel_rps_client *to_rps_client(struct drm_file *file)
 464 {
 465         struct drm_i915_file_private *fpriv = file->driver_priv;
 466
 467         return &fpriv->rps;
 468 }
 469
 470 int
 471 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 472                             int align)
 473 {
 474         drm_dma_handle_t *phys;
 475         int ret;
 476
 477         if (obj->phys_handle) {
 478                 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
 479                         return -EBUSY;
 480
 481                 return 0;
 482         }
 483
 484         if (obj->mm.madv != I915_MADV_WILLNEED)
 485                 return -EFAULT;
 486
 487         if (obj->base.filp == NULL)
 488                 return -EINVAL;
 489
 490         ret = i915_gem_object_unbind(obj);
 491         if (ret)
 492                 return ret;
 493
 494         __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
 495         if (obj->mm.pages)
 496                 return -EBUSY;
 497
 498         /* create a new object */
 499         phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 500         if (!phys)
 501                 return -ENOMEM;
 502
 503         obj->phys_handle = phys;
 504         obj->ops = &i915_gem_phys_ops;
 505
 506         return i915_gem_object_pin_pages(obj);
 507 }
 508
 509 static int
 510 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 511                      struct drm_i915_gem_pwrite *args,
 512                      struct drm_file *file)
 513 {
 514         struct drm_device *dev = obj->base.dev;
 515         void *vaddr = obj->phys_handle->vaddr + args->offset;
 516         char __user *user_data = u64_to_user_ptr(args->data_ptr);
 517         int ret;
 518
 519         /* We manually control the domain here and pretend that it
 520          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 521          */
 522         lockdep_assert_held(&obj->base.dev->struct_mutex);
 523         ret = i915_gem_object_wait(obj,
 524                                    I915_WAIT_INTERRUPTIBLE |
 525                                    I915_WAIT_LOCKED |
 526                                    I915_WAIT_ALL,
 527                                    MAX_SCHEDULE_TIMEOUT,
 528                                    to_rps_client(file));
 529         if (ret)
 530                 return ret;
 531
 532         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 533         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
 534                 unsigned long unwritten;
 535
 536                 /* The physical object once assigned is fixed for the lifetime
 537                  * of the obj, so we can safely drop the lock and continue
 538                  * to access vaddr.
 539                  */
 540                 mutex_unlock(&dev->struct_mutex);
 541                 unwritten = copy_from_user(vaddr, user_data, args->size);
 542                 mutex_lock(&dev->struct_mutex);
 543                 if (unwritten) {
 544                         ret = -EFAULT;
 545                         goto out;
 546                 }
 547         }
 548
 549         drm_clflush_virt_range(vaddr, args->size);
 550         i915_gem_chipset_flush(to_i915(dev));
 551
 552 out:
 553         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 554         return ret;
 555 }
 556
 557 void *i915_gem_object_alloc(struct drm_device *dev)
 558 {
 559         struct drm_i915_private *dev_priv = to_i915(dev);
 560         return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 561 }
 562
 563 void i915_gem_object_free(struct drm_i915_gem_object *obj)
 564 {
 565         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 566         kmem_cache_free(dev_priv->objects, obj);
 567 }
 568
 569 static int
 570 i915_gem_create(struct drm_file *file,
 571                 struct drm_device *dev,
 572                 uint64_t size,
 573                 uint32_t *handle_p)
 574 {
 575         struct drm_i915_gem_object *obj;
 576         int ret;
 577         u32 handle;
 578
 579         size = roundup(size, PAGE_SIZE);
 580         if (size == 0)
 581                 return -EINVAL;
 582
 583         /* Allocate the new object */
 584         obj = i915_gem_object_create(dev, size);
 585         if (IS_ERR(obj))
 586                 return PTR_ERR(obj);
 587
 588         ret = drm_gem_handle_create(file, &obj->base, &handle);
 589         /* drop reference from allocate - handle holds it now */
 590         i915_gem_object_put(obj);
 591         if (ret)
 592                 return ret;
 593
 594         *handle_p = handle;
 595         return 0;
 596 }
 597
 598 int
 599 i915_gem_dumb_create(struct drm_file *file,
 600                      struct drm_device *dev,
 601                      struct drm_mode_create_dumb *args)
 602 {
 603         /* have to work out size/pitch and return them */
 604         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 605         args->size = args->pitch * args->height;
 606         return i915_gem_create(file, dev,
 607                                args->size, &args->handle);
 608 }
 609
 610 /**
 611  * Creates a new mm object and returns a handle to it.
 612  * @dev: drm device pointer
 613  * @data: ioctl data blob
 614  * @file: drm file pointer
 615  */
 616 int
 617 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 618                       struct drm_file *file)
 619 {
 620         struct drm_i915_gem_create *args = data;
 621
 622         i915_gem_flush_free_objects(to_i915(dev));
 623
 624         return i915_gem_create(file, dev,
 625                                args->size, &args->handle);
 626 }
 627
 628 static inline int
 629 __copy_to_user_swizzled(char __user *cpu_vaddr,
 630                         const char *gpu_vaddr, int gpu_offset,
 631                         int length)
 632 {
 633         int ret, cpu_offset = 0;
 634
 635         while (length > 0) {
 636                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 637                 int this_length = min(cacheline_end - gpu_offset, length);
 638                 int swizzled_gpu_offset = gpu_offset ^ 64;
 639
 640                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
 641                                      gpu_vaddr + swizzled_gpu_offset,
 642                                      this_length);
 643                 if (ret)
 644                         return ret + length;
 645
 646                 cpu_offset += this_length;
 647                 gpu_offset += this_length;
 648                 length -= this_length;
 649         }
 650
 651         return 0;
 652 }
 653
 654 static inline int
 655 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 656                           const char __user *cpu_vaddr,
 657                           int length)
 658 {
 659         int ret, cpu_offset = 0;
 660
 661         while (length > 0) {
 662                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 663                 int this_length = min(cacheline_end - gpu_offset, length);
 664                 int swizzled_gpu_offset = gpu_offset ^ 64;
 665
 666                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 667                                        cpu_vaddr + cpu_offset,
 668                                        this_length);
 669                 if (ret)
 670                         return ret + length;
 671
 672                 cpu_offset += this_length;
 673                 gpu_offset += this_length;
 674                 length -= this_length;
 675         }
 676
 677         return 0;
 678 }
 679
 680 /*
 681  * Pins the specified object's pages and synchronizes the object with
 682  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 683  * flush the object from the CPU cache.
 684  */
 685 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 686                                     unsigned int *needs_clflush)
 687 {
 688         int ret;
 689
 690         lockdep_assert_held(&obj->base.dev->struct_mutex);
 691
 692         *needs_clflush = 0;
 693         if (!i915_gem_object_has_struct_page(obj))
 694                 return -ENODEV;
 695
 696         ret = i915_gem_object_wait(obj,
 697                                    I915_WAIT_INTERRUPTIBLE |
 698                                    I915_WAIT_LOCKED,
 699                                    MAX_SCHEDULE_TIMEOUT,
 700                                    NULL);
 701         if (ret)
 702                 return ret;
 703
 704         ret = i915_gem_object_pin_pages(obj);
 705         if (ret)
 706                 return ret;
 707
 708         i915_gem_object_flush_gtt_write_domain(obj);
 709
 710         /* If we're not in the cpu read domain, set ourself into the gtt
 711          * read domain and manually flush cachelines (if required). This
 712          * optimizes for the case when the gpu will dirty the data
 713          * anyway again before the next pread happens.
 714          */
 715         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 716                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 717                                                         obj->cache_level);
 718
 719         if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 720                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 721                 if (ret)
 722                         goto err_unpin;
 723
 724                 *needs_clflush = 0;
 725         }
 726
 727         /* return with the pages pinned */
 728         return 0;
 729
 730 err_unpin:
 731         i915_gem_object_unpin_pages(obj);
 732         return ret;
 733 }
 734
 735 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 736                                      unsigned int *needs_clflush)
 737 {
 738         int ret;
 739
 740         lockdep_assert_held(&obj->base.dev->struct_mutex);
 741
 742         *needs_clflush = 0;
 743         if (!i915_gem_object_has_struct_page(obj))
 744                 return -ENODEV;
 745
 746         ret = i915_gem_object_wait(obj,
 747                                    I915_WAIT_INTERRUPTIBLE |
 748                                    I915_WAIT_LOCKED |
 749                                    I915_WAIT_ALL,
 750                                    MAX_SCHEDULE_TIMEOUT,
 751                                    NULL);
 752         if (ret)
 753                 return ret;
 754
 755         ret = i915_gem_object_pin_pages(obj);
 756         if (ret)
 757                 return ret;
 758
 759         i915_gem_object_flush_gtt_write_domain(obj);
 760
 761         /* If we're not in the cpu write domain, set ourself into the
 762          * gtt write domain and manually flush cachelines (as required).
 763          * This optimizes for the case when the gpu will use the data
 764          * right away and we therefore have to clflush anyway.
 765          */
 766         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 767                 *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
 768
 769         /* Same trick applies to invalidate partially written cachelines read
 770          * before writing.
 771          */
 772         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 773                 *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
 774                                                          obj->cache_level);
 775
 776         if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 777                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 778                 if (ret)
 779                         goto err_unpin;
 780
 781                 *needs_clflush = 0;
 782         }
 783
 784         if ((*needs_clflush & CLFLUSH_AFTER) == 0)
 785                 obj->cache_dirty = true;
 786
 787         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 788         obj->mm.dirty = true;
 789         /* return with the pages pinned */
 790         return 0;
 791
 792 err_unpin:
 793         i915_gem_object_unpin_pages(obj);
 794         return ret;
 795 }
 796
 797 static void
 798 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 799                              bool swizzled)
 800 {
 801         if (unlikely(swizzled)) {
 802                 unsigned long start = (unsigned long) addr;
 803                 unsigned long end = (unsigned long) addr + length;
 804
 805                 /* For swizzling simply ensure that we always flush both
 806                  * channels. Lame, but simple and it works. Swizzled
 807                  * pwrite/pread is far from a hotpath - current userspace
 808                  * doesn't use it at all. */
 809                 start = round_down(start, 128);
 810                 end = round_up(end, 128);
 811
 812                 drm_clflush_virt_range((void *)start, end - start);
 813         } else {
 814                 drm_clflush_virt_range(addr, length);
 815         }
 816
 817 }
 818
 819 /* Only difference to the fast-path function is that this can handle bit17
 820  * and uses non-atomic copy and kmap functions. */
 821 static int
 822 shmem_pread_slow(struct page *page, int offset, int length,
 823                  char __user *user_data,
 824                  bool page_do_bit17_swizzling, bool needs_clflush)
 825 {
 826         char *vaddr;
 827         int ret;
 828
 829         vaddr = kmap(page);
 830         if (needs_clflush)
 831                 shmem_clflush_swizzled_range(vaddr + offset, length,
 832                                              page_do_bit17_swizzling);
 833
 834         if (page_do_bit17_swizzling)
 835                 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
 836         else
 837                 ret = __copy_to_user(user_data, vaddr + offset, length);
 838         kunmap(page);
 839
 840         return ret ? - EFAULT : 0;
 841 }
 842
 843 static int
 844 shmem_pread(struct page *page, int offset, int length, char __user *user_data,
 845             bool page_do_bit17_swizzling, bool needs_clflush)
 846 {
 847         int ret;
 848
 849         ret = -ENODEV;
 850         if (!page_do_bit17_swizzling) {
 851                 char *vaddr = kmap_atomic(page);
 852
 853                 if (needs_clflush)
 854                         drm_clflush_virt_range(vaddr + offset, length);
 855                 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
 856                 kunmap_atomic(vaddr);
 857         }
 858         if (ret == 0)
 859                 return 0;
 860
 861         return shmem_pread_slow(page, offset, length, user_data,
 862                                 page_do_bit17_swizzling, needs_clflush);
 863 }
 864
 865 static int
 866 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 867                      struct drm_i915_gem_pread *args)
 868 {
 869         char __user *user_data;
 870         u64 remain;
 871         unsigned int obj_do_bit17_swizzling;
 872         unsigned int needs_clflush;
 873         unsigned int idx, offset;
 874         int ret;
 875
 876         obj_do_bit17_swizzling = 0;
 877         if (i915_gem_object_needs_bit17_swizzle(obj))
 878                 obj_do_bit17_swizzling = BIT(17);
 879
 880         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 881         if (ret)
 882                 return ret;
 883
 884         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 885         mutex_unlock(&obj->base.dev->struct_mutex);
 886         if (ret)
 887                 return ret;
 888
 889         remain = args->size;
 890         user_data = u64_to_user_ptr(args->data_ptr);
 891         offset = offset_in_page(args->offset);
 892         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 893                 struct page *page = i915_gem_object_get_page(obj, idx);
 894                 int length;
 895
 896                 length = remain;
 897                 if (offset + length > PAGE_SIZE)
 898                         length = PAGE_SIZE - offset;
 899
 900                 ret = shmem_pread(page, offset, length, user_data,
 901                                   page_to_phys(page) & obj_do_bit17_swizzling,
 902                                   needs_clflush);
 903                 if (ret)
 904                         break;
 905
 906                 remain -= length;
 907                 user_data += length;
 908                 offset = 0;
 909         }
 910
 911         i915_gem_obj_finish_shmem_access(obj);
 912         return ret;
 913 }
 914
 915 static inline bool
 916 gtt_user_read(struct io_mapping *mapping,
 917               loff_t base, int offset,
 918               char __user *user_data, int length)
 919 {
 920         void *vaddr;
 921         unsigned long unwritten;
 922
 923         /* We can use the cpu mem copy function because this is X86. */
 924         vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
 925         unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
 926         io_mapping_unmap_atomic(vaddr);
 927         if (unwritten) {
 928                 vaddr = (void __force *)
 929                         io_mapping_map_wc(mapping, base, PAGE_SIZE);
 930                 unwritten = copy_to_user(user_data, vaddr + offset, length);
 931                 io_mapping_unmap(vaddr);
 932         }
 933         return unwritten;
 934 }
 935
 936 static int
 937 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 938                    const struct drm_i915_gem_pread *args)
 939 {
 940         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 941         struct i915_ggtt *ggtt = &i915->ggtt;
 942         struct drm_mm_node node;
 943         struct i915_vma *vma;
 944         void __user *user_data;
 945         u64 remain, offset;
 946         int ret;
 947
 948         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 949         if (ret)
 950                 return ret;
 951
 952         intel_runtime_pm_get(i915);
 953         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 954                                        PIN_MAPPABLE | PIN_NONBLOCK);
 955         if (!IS_ERR(vma)) {
 956                 node.start = i915_ggtt_offset(vma);
 957                 node.allocated = false;
 958                 ret = i915_vma_put_fence(vma);
 959                 if (ret) {
 960                         i915_vma_unpin(vma);
 961                         vma = ERR_PTR(ret);
 962                 }
 963         }
 964         if (IS_ERR(vma)) {
 965                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 966                 if (ret)
 967                         goto out_unlock;
 968                 GEM_BUG_ON(!node.allocated);
 969         }
 970
 971         ret = i915_gem_object_set_to_gtt_domain(obj, false);
 972         if (ret)
 973                 goto out_unpin;
 974
 975         mutex_unlock(&i915->drm.struct_mutex);
 976
 977         user_data = u64_to_user_ptr(args->data_ptr);
 978         remain = args->size;
 979         offset = args->offset;
 980
 981         while (remain > 0) {
 982                 /* Operation in this page
 983                  *
 984                  * page_base = page offset within aperture
 985                  * page_offset = offset within page
 986                  * page_length = bytes to copy for this page
 987                  */
 988                 u32 page_base = node.start;
 989                 unsigned page_offset = offset_in_page(offset);
 990                 unsigned page_length = PAGE_SIZE - page_offset;
 991                 page_length = remain < page_length ? remain : page_length;
 992                 if (node.allocated) {
 993                         wmb();
 994                         ggtt->base.insert_page(&ggtt->base,
 995                                                i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 996                                                node.start, I915_CACHE_NONE, 0);
 997                         wmb();
 998                 } else {
 999                         page_base += offset & PAGE_MASK;
1000                 }
1001
1002                 if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
1003                                   user_data, page_length)) {
1004                         ret = -EFAULT;
1005                         break;
1006                 }
1007
1008                 remain -= page_length;
1009                 user_data += page_length;
1010                 offset += page_length;
1011         }
1012
1013         mutex_lock(&i915->drm.struct_mutex);
1014 out_unpin:
1015         if (node.allocated) {
1016                 wmb();
1017                 ggtt->base.clear_range(&ggtt->base,
1018                                        node.start, node.size);
1019                 remove_mappable_node(&node);
1020         } else {
1021                 i915_vma_unpin(vma);
1022         }
1023 out_unlock:
1024         intel_runtime_pm_put(i915);
1025         mutex_unlock(&i915->drm.struct_mutex);
1026
1027         return ret;
1028 }
1029
1030 /**
1031  * Reads data from the object referenced by handle.
1032  * @dev: drm device pointer
1033  * @data: ioctl data blob
1034  * @file: drm file pointer
1035  *
1036  * On error, the contents of *data are undefined.
1037  */
1038 int
1039 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1040                      struct drm_file *file)
1041 {
1042         struct drm_i915_gem_pread *args = data;
1043         struct drm_i915_gem_object *obj;
1044         int ret;
1045
1046         if (args->size == 0)
1047                 return 0;
1048
1049         if (!access_ok(VERIFY_WRITE,
1050                        u64_to_user_ptr(args->data_ptr),
1051                        args->size))
1052                 return -EFAULT;
1053
1054         obj = i915_gem_object_lookup(file, args->handle);
1055         if (!obj)
1056                 return -ENOENT;
1057
1058         /* Bounds check source.  */
1059         if (args->offset > obj->base.size ||
1060             args->size > obj->base.size - args->offset) {
1061                 ret = -EINVAL;
1062                 goto out;
1063         }
1064
1065         trace_i915_gem_object_pread(obj, args->offset, args->size);
1066
1067         ret = i915_gem_object_wait(obj,
1068                                    I915_WAIT_INTERRUPTIBLE,
1069                                    MAX_SCHEDULE_TIMEOUT,
1070                                    to_rps_client(file));
1071         if (ret)
1072                 goto out;
1073
1074         ret = i915_gem_object_pin_pages(obj);
1075         if (ret)
1076                 goto out;
1077
1078         ret = i915_gem_shmem_pread(obj, args);
1079         if (ret == -EFAULT || ret == -ENODEV)
1080                 ret = i915_gem_gtt_pread(obj, args);
1081
1082         i915_gem_object_unpin_pages(obj);
1083 out:
1084         i915_gem_object_put(obj);
1085         return ret;
1086 }
1087
1088 /* This is the fast write path which cannot handle
1089  * page faults in the source data
1090  */
1091
1092 static inline bool
1093 ggtt_write(struct io_mapping *mapping,
1094            loff_t base, int offset,
1095            char __user *user_data, int length)
1096 {
1097         void *vaddr;
1098         unsigned long unwritten;
1099
1100         /* We can use the cpu mem copy function because this is X86. */
1101         vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
1102         unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
1103                                                       user_data, length);
1104         io_mapping_unmap_atomic(vaddr);
1105         if (unwritten) {
1106                 vaddr = (void __force *)
1107                         io_mapping_map_wc(mapping, base, PAGE_SIZE);
1108                 unwritten = copy_from_user(vaddr + offset, user_data, length);
1109                 io_mapping_unmap(vaddr);
1110         }
1111
1112         return unwritten;
1113 }
1114
1115 /**
1116  * This is the fast pwrite path, where we copy the data directly from the
1117  * user into the GTT, uncached.
1118  * @obj: i915 GEM object
1119  * @args: pwrite arguments structure
1120  */
1121 static int
1122 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1123                          const struct drm_i915_gem_pwrite *args)
1124 {
1125         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1126         struct i915_ggtt *ggtt = &i915->ggtt;
1127         struct drm_mm_node node;
1128         struct i915_vma *vma;
1129         u64 remain, offset;
1130         void __user *user_data;
1131         int ret;
1132
1133         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1134         if (ret)
1135                 return ret;
1136
1137         intel_runtime_pm_get(i915);
1138         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1139                                        PIN_MAPPABLE | PIN_NONBLOCK);
1140         if (!IS_ERR(vma)) {
1141                 node.start = i915_ggtt_offset(vma);
1142                 node.allocated = false;
1143                 ret = i915_vma_put_fence(vma);
1144                 if (ret) {
1145                         i915_vma_unpin(vma);
1146                         vma = ERR_PTR(ret);
1147                 }
1148         }
1149         if (IS_ERR(vma)) {
1150                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1151                 if (ret)
1152                         goto out_unlock;
1153                 GEM_BUG_ON(!node.allocated);
1154         }
1155
1156         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1157         if (ret)
1158                 goto out_unpin;
1159
1160         mutex_unlock(&i915->drm.struct_mutex);
1161
1162         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1163
1164         user_data = u64_to_user_ptr(args->data_ptr);
1165         offset = args->offset;
1166         remain = args->size;
1167         while (remain) {
1168                 /* Operation in this page
1169                  *
1170                  * page_base = page offset within aperture
1171                  * page_offset = offset within page
1172                  * page_length = bytes to copy for this page
1173                  */
1174                 u32 page_base = node.start;
1175                 unsigned int page_offset = offset_in_page(offset);
1176                 unsigned int page_length = PAGE_SIZE - page_offset;
1177                 page_length = remain < page_length ? remain : page_length;
1178                 if (node.allocated) {
1179                         wmb(); /* flush the write before we modify the GGTT */
1180                         ggtt->base.insert_page(&ggtt->base,
1181                                                i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1182                                                node.start, I915_CACHE_NONE, 0);
1183                         wmb(); /* flush modifications to the GGTT (insert_page) */
1184                 } else {
1185                         page_base += offset & PAGE_MASK;
1186                 }
1187                 /* If we get a fault while copying data, then (presumably) our
1188                  * source page isn't available.  Return the error and we'll
1189                  * retry in the slow path.
1190                  * If the object is non-shmem backed, we retry again with the
1191                  * path that handles page fault.
1192                  */
1193                 if (ggtt_write(&ggtt->mappable, page_base, page_offset,
1194                                user_data, page_length)) {
1195                         ret = -EFAULT;
1196                         break;
1197                 }
1198
1199                 remain -= page_length;
1200                 user_data += page_length;
1201                 offset += page_length;
1202         }
1203         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1204
1205         mutex_lock(&i915->drm.struct_mutex);
1206 out_unpin:
1207         if (node.allocated) {
1208                 wmb();
1209                 ggtt->base.clear_range(&ggtt->base,
1210                                        node.start, node.size);
1211                 remove_mappable_node(&node);
1212         } else {
1213                 i915_vma_unpin(vma);
1214         }
1215 out_unlock:
1216         intel_runtime_pm_put(i915);
1217         mutex_unlock(&i915->drm.struct_mutex);
1218         return ret;
1219 }
1220
1221 static int
1222 shmem_pwrite_slow(struct page *page, int offset, int length,
1223                   char __user *user_data,
1224                   bool page_do_bit17_swizzling,
1225                   bool needs_clflush_before,
1226                   bool needs_clflush_after)
1227 {
1228         char *vaddr;
1229         int ret;
1230
1231         vaddr = kmap(page);
1232         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1233                 shmem_clflush_swizzled_range(vaddr + offset, length,
1234                                              page_do_bit17_swizzling);
1235         if (page_do_bit17_swizzling)
1236                 ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1237                                                 length);
1238         else
1239                 ret = __copy_from_user(vaddr + offset, user_data, length);
1240         if (needs_clflush_after)
1241                 shmem_clflush_swizzled_range(vaddr + offset, length,
1242                                              page_do_bit17_swizzling);
1243         kunmap(page);
1244
1245         return ret ? -EFAULT : 0;
1246 }
1247
1248 /* Per-page copy function for the shmem pwrite fastpath.
1249  * Flushes invalid cachelines before writing to the target if
1250  * needs_clflush_before is set and flushes out any written cachelines after
1251  * writing if needs_clflush is set.
1252  */
1253 static int
1254 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1255              bool page_do_bit17_swizzling,
1256              bool needs_clflush_before,
1257              bool needs_clflush_after)
1258 {
1259         int ret;
1260
1261         ret = -ENODEV;
1262         if (!page_do_bit17_swizzling) {
1263                 char *vaddr = kmap_atomic(page);
1264
1265                 if (needs_clflush_before)
1266                         drm_clflush_virt_range(vaddr + offset, len);
1267                 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1268                 if (needs_clflush_after)
1269                         drm_clflush_virt_range(vaddr + offset, len);
1270
1271                 kunmap_atomic(vaddr);
1272         }
1273         if (ret == 0)
1274                 return ret;
1275
1276         return shmem_pwrite_slow(page, offset, len, user_data,
1277                                  page_do_bit17_swizzling,
1278                                  needs_clflush_before,
1279                                  needs_clflush_after);
1280 }
1281
1282 static int
1283 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1284                       const struct drm_i915_gem_pwrite *args)
1285 {
1286         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1287         void __user *user_data;
1288         u64 remain;
1289         unsigned int obj_do_bit17_swizzling;
1290         unsigned int partial_cacheline_write;
1291         unsigned int needs_clflush;
1292         unsigned int offset, idx;
1293         int ret;
1294
1295         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1296         if (ret)
1297                 return ret;
1298
1299         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1300         mutex_unlock(&i915->drm.struct_mutex);
1301         if (ret)
1302                 return ret;
1303
1304         obj_do_bit17_swizzling = 0;
1305         if (i915_gem_object_needs_bit17_swizzle(obj))
1306                 obj_do_bit17_swizzling = BIT(17);
1307
1308         /* If we don't overwrite a cacheline completely we need to be
1309          * careful to have up-to-date data by first clflushing. Don't
1310          * overcomplicate things and flush the entire patch.
1311          */
1312         partial_cacheline_write = 0;
1313         if (needs_clflush & CLFLUSH_BEFORE)
1314                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1315
1316         user_data = u64_to_user_ptr(args->data_ptr);
1317         remain = args->size;
1318         offset = offset_in_page(args->offset);
1319         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1320                 struct page *page = i915_gem_object_get_page(obj, idx);
1321                 int length;
1322
1323                 length = remain;
1324                 if (offset + length > PAGE_SIZE)
1325                         length = PAGE_SIZE - offset;
1326
1327                 ret = shmem_pwrite(page, offset, length, user_data,
1328                                    page_to_phys(page) & obj_do_bit17_swizzling,
1329                                    (offset | length) & partial_cacheline_write,
1330                                    needs_clflush & CLFLUSH_AFTER);
1331                 if (ret)
1332                         break;
1333
1334                 remain -= length;
1335                 user_data += length;
1336                 offset = 0;
1337         }
1338
1339         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1340         i915_gem_obj_finish_shmem_access(obj);
1341         return ret;
1342 }
1343
1344 /**
1345  * Writes data to the object referenced by handle.
1346  * @dev: drm device
1347  * @data: ioctl data blob
1348  * @file: drm file
1349  *
1350  * On error, the contents of the buffer that were to be modified are undefined.
1351  */
1352 int
1353 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1354                       struct drm_file *file)
1355 {
1356         struct drm_i915_gem_pwrite *args = data;
1357         struct drm_i915_gem_object *obj;
1358         int ret;
1359
1360         if (args->size == 0)
1361                 return 0;
1362
1363         if (!access_ok(VERIFY_READ,
1364                        u64_to_user_ptr(args->data_ptr),
1365                        args->size))
1366                 return -EFAULT;
1367
1368         obj = i915_gem_object_lookup(file, args->handle);
1369         if (!obj)
1370                 return -ENOENT;
1371
1372         /* Bounds check destination. */
1373         if (args->offset > obj->base.size ||
1374             args->size > obj->base.size - args->offset) {
1375                 ret = -EINVAL;
1376                 goto err;
1377         }
1378
1379         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1380
1381         ret = i915_gem_object_wait(obj,
1382                                    I915_WAIT_INTERRUPTIBLE |
1383                                    I915_WAIT_ALL,
1384                                    MAX_SCHEDULE_TIMEOUT,
1385                                    to_rps_client(file));
1386         if (ret)
1387                 goto err;
1388
1389         ret = i915_gem_object_pin_pages(obj);
1390         if (ret)
1391                 goto err;
1392
1393         ret = -EFAULT;
1394         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1395          * it would end up going through the fenced access, and we'll get
1396          * different detiling behavior between reading and writing.
1397          * pread/pwrite currently are reading and writing from the CPU
1398          * perspective, requiring manual detiling by the client.
1399          */
1400         if (!i915_gem_object_has_struct_page(obj) ||
1401             cpu_write_needs_clflush(obj))
1402                 /* Note that the gtt paths might fail with non-page-backed user
1403                  * pointers (e.g. gtt mappings when moving data between
1404                  * textures). Fallback to the shmem path in that case.
1405                  */
1406                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1407
1408         if (ret == -EFAULT || ret == -ENOSPC) {
1409                 if (obj->phys_handle)
1410                         ret = i915_gem_phys_pwrite(obj, args, file);
1411                 else
1412                         ret = i915_gem_shmem_pwrite(obj, args);
1413         }
1414
1415         i915_gem_object_unpin_pages(obj);
1416 err:
1417         i915_gem_object_put(obj);
1418         return ret;
1419 }
1420
1421 static inline enum fb_op_origin
1422 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1423 {
1424         return (domain == I915_GEM_DOMAIN_GTT ?
1425                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1426 }
1427
1428 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1429 {
1430         struct drm_i915_private *i915;
1431         struct list_head *list;
1432         struct i915_vma *vma;
1433
1434         list_for_each_entry(vma, &obj->vma_list, obj_link) {
1435                 if (!i915_vma_is_ggtt(vma))
1436                         continue;
1437
1438                 if (i915_vma_is_active(vma))
1439                         continue;
1440
1441                 if (!drm_mm_node_allocated(&vma->node))
1442                         continue;
1443
1444                 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1445         }
1446
1447         i915 = to_i915(obj->base.dev);
1448         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1449         list_move_tail(&obj->global_link, list);
1450 }
1451
1452 /**
1453  * Called when user space prepares to use an object with the CPU, either
1454  * through the mmap ioctl's mapping or a GTT mapping.
1455  * @dev: drm device
1456  * @data: ioctl data blob
1457  * @file: drm file
1458  */
1459 int
1460 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1461                           struct drm_file *file)
1462 {
1463         struct drm_i915_gem_set_domain *args = data;
1464         struct drm_i915_gem_object *obj;
1465         uint32_t read_domains = args->read_domains;
1466         uint32_t write_domain = args->write_domain;
1467         int err;
1468
1469         /* Only handle setting domains to types used by the CPU. */
1470         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1471                 return -EINVAL;
1472
1473         /* Having something in the write domain implies it's in the read
1474          * domain, and only that read domain.  Enforce that in the request.
1475          */
1476         if (write_domain != 0 && read_domains != write_domain)
1477                 return -EINVAL;
1478
1479         obj = i915_gem_object_lookup(file, args->handle);
1480         if (!obj)
1481                 return -ENOENT;
1482
1483         /* Try to flush the object off the GPU without holding the lock.
1484          * We will repeat the flush holding the lock in the normal manner
1485          * to catch cases where we are gazumped.
1486          */
1487         err = i915_gem_object_wait(obj,
1488                                    I915_WAIT_INTERRUPTIBLE |
1489                                    (write_domain ? I915_WAIT_ALL : 0),
1490                                    MAX_SCHEDULE_TIMEOUT,
1491                                    to_rps_client(file));
1492         if (err)
1493                 goto out;
1494
1495         /* Flush and acquire obj->pages so that we are coherent through
1496          * direct access in memory with previous cached writes through
1497          * shmemfs and that our cache domain tracking remains valid.
1498          * For example, if the obj->filp was moved to swap without us
1499          * being notified and releasing the pages, we would mistakenly
1500          * continue to assume that the obj remained out of the CPU cached
1501          * domain.
1502          */
1503         err = i915_gem_object_pin_pages(obj);
1504         if (err)
1505                 goto out;
1506
1507         err = i915_mutex_lock_interruptible(dev);
1508         if (err)
1509                 goto out_unpin;
1510
1511         if (read_domains & I915_GEM_DOMAIN_GTT)
1512                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1513         else
1514                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1515
1516         /* And bump the LRU for this access */
1517         i915_gem_object_bump_inactive_ggtt(obj);
1518
1519         mutex_unlock(&dev->struct_mutex);
1520
1521         if (write_domain != 0)
1522                 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1523
1524 out_unpin:
1525         i915_gem_object_unpin_pages(obj);
1526 out:
1527         i915_gem_object_put(obj);
1528         return err;
1529 }
1530
1531 /**
1532  * Called when user space has done writes to this buffer
1533  * @dev: drm device
1534  * @data: ioctl data blob
1535  * @file: drm file
1536  */
1537 int
1538 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1539                          struct drm_file *file)
1540 {
1541         struct drm_i915_gem_sw_finish *args = data;
1542         struct drm_i915_gem_object *obj;
1543         int err = 0;
1544
1545         obj = i915_gem_object_lookup(file, args->handle);
1546         if (!obj)
1547                 return -ENOENT;
1548
1549         /* Pinned buffers may be scanout, so flush the cache */
1550         if (READ_ONCE(obj->pin_display)) {
1551                 err = i915_mutex_lock_interruptible(dev);
1552                 if (!err) {
1553                         i915_gem_object_flush_cpu_write_domain(obj);
1554                         mutex_unlock(&dev->struct_mutex);
1555                 }
1556         }
1557
1558         i915_gem_object_put(obj);
1559         return err;
1560 }
1561
1562 /**
1563  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1564  *                       it is mapped to.
1565  * @dev: drm device
1566  * @data: ioctl data blob
1567  * @file: drm file
1568  *
1569  * While the mapping holds a reference on the contents of the object, it doesn't
1570  * imply a ref on the object itself.
1571  *
1572  * IMPORTANT:
1573  *
1574  * DRM driver writers who look a this function as an example for how to do GEM
1575  * mmap support, please don't implement mmap support like here. The modern way
1576  * to implement DRM mmap support is with an mmap offset ioctl (like
1577  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1578  * That way debug tooling like valgrind will understand what's going on, hiding
1579  * the mmap call in a driver private ioctl will break that. The i915 driver only
1580  * does cpu mmaps this way because we didn't know better.
1581  */
1582 int
1583 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1584                     struct drm_file *file)
1585 {
1586         struct drm_i915_gem_mmap *args = data;
1587         struct drm_i915_gem_object *obj;
1588         unsigned long addr;
1589
1590         if (args->flags & ~(I915_MMAP_WC))
1591                 return -EINVAL;
1592
1593         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1594                 return -ENODEV;
1595
1596         obj = i915_gem_object_lookup(file, args->handle);
1597         if (!obj)
1598                 return -ENOENT;
1599
1600         /* prime objects have no backing filp to GEM mmap
1601          * pages from.
1602          */
1603         if (!obj->base.filp) {
1604                 i915_gem_object_put(obj);
1605                 return -EINVAL;
1606         }
1607
1608         addr = vm_mmap(obj->base.filp, 0, args->size,
1609                        PROT_READ | PROT_WRITE, MAP_SHARED,
1610                        args->offset);
1611         if (args->flags & I915_MMAP_WC) {
1612                 struct mm_struct *mm = current->mm;
1613                 struct vm_area_struct *vma;
1614
1615                 if (down_write_killable(&mm->mmap_sem)) {
1616                         i915_gem_object_put(obj);
1617                         return -EINTR;
1618                 }
1619                 vma = find_vma(mm, addr);
1620                 if (vma)
1621                         vma->vm_page_prot =
1622                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1623                 else
1624                         addr = -ENOMEM;
1625                 up_write(&mm->mmap_sem);
1626
1627                 /* This may race, but that's ok, it only gets set */
1628                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1629         }
1630         i915_gem_object_put(obj);
1631         if (IS_ERR((void *)addr))
1632                 return addr;
1633
1634         args->addr_ptr = (uint64_t) addr;
1635
1636         return 0;
1637 }
1638
1639 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1640 {
1641         u64 size;
1642
1643         size = i915_gem_object_get_stride(obj);
1644         size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1645
1646         return size >> PAGE_SHIFT;
1647 }
1648
1649 /**
1650  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1651  *
1652  * A history of the GTT mmap interface:
1653  *
1654  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1655  *     aligned and suitable for fencing, and still fit into the available
1656  *     mappable space left by the pinned display objects. A classic problem
1657  *     we called the page-fault-of-doom where we would ping-pong between
1658  *     two objects that could not fit inside the GTT and so the memcpy
1659  *     would page one object in at the expense of the other between every
1660  *     single byte.
1661  *
1662  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1663  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1664  *     object is too large for the available space (or simply too large
1665  *     for the mappable aperture!), a view is created instead and faulted
1666  *     into userspace. (This view is aligned and sized appropriately for
1667  *     fenced access.)
1668  *
1669  * Restrictions:
1670  *
1671  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1672  *    hangs on some architectures, corruption on others. An attempt to service
1673  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1674  *
1675  *  * the object must be able to fit into RAM (physical memory, though no
1676  *    limited to the mappable aperture).
1677  *
1678  *
1679  * Caveats:
1680  *
1681  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1682  *    all data to system memory. Subsequent access will not be synchronized.
1683  *
1684  *  * all mappings are revoked on runtime device suspend.
1685  *
1686  *  * there are only 8, 16 or 32 fence registers to share between all users
1687  *    (older machines require fence register for display and blitter access
1688  *    as well). Contention of the fence registers will cause the previous users
1689  *    to be unmapped and any new access will generate new page faults.
1690  *
1691  *  * running out of memory while servicing a fault may generate a SIGBUS,
1692  *    rather than the expected SIGSEGV.
1693  */
1694 int i915_gem_mmap_gtt_version(void)
1695 {
1696         return 1;
1697 }
1698
1699 /**
1700  * i915_gem_fault - fault a page into the GTT
1701  * @area: CPU VMA in question
1702  * @vmf: fault info
1703  *
1704  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1705  * from userspace.  The fault handler takes care of binding the object to
1706  * the GTT (if needed), allocating and programming a fence register (again,
1707  * only if needed based on whether the old reg is still valid or the object
1708  * is tiled) and inserting a new PTE into the faulting process.
1709  *
1710  * Note that the faulting process may involve evicting existing objects
1711  * from the GTT and/or fence registers to make room.  So performance may
1712  * suffer if the GTT working set is large or there are few fence registers
1713  * left.
1714  *
1715  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1716  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1717  */
1718 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1719 {
1720 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1721         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1722         struct drm_device *dev = obj->base.dev;
1723         struct drm_i915_private *dev_priv = to_i915(dev);
1724         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1725         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1726         struct i915_vma *vma;
1727         pgoff_t page_offset;
1728         unsigned int flags;
1729         int ret;
1730
1731         /* We don't use vmf->pgoff since that has the fake offset */
1732         page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
1733                 PAGE_SHIFT;
1734
1735         trace_i915_gem_object_fault(obj, page_offset, true, write);
1736
1737         /* Try to flush the object off the GPU first without holding the lock.
1738          * Upon acquiring the lock, we will perform our sanity checks and then
1739          * repeat the flush holding the lock in the normal manner to catch cases
1740          * where we are gazumped.
1741          */
1742         ret = i915_gem_object_wait(obj,
1743                                    I915_WAIT_INTERRUPTIBLE,
1744                                    MAX_SCHEDULE_TIMEOUT,
1745                                    NULL);
1746         if (ret)
1747                 goto err;
1748
1749         ret = i915_gem_object_pin_pages(obj);
1750         if (ret)
1751                 goto err;
1752
1753         intel_runtime_pm_get(dev_priv);
1754
1755         ret = i915_mutex_lock_interruptible(dev);
1756         if (ret)
1757                 goto err_rpm;
1758
1759         /* Access to snoopable pages through the GTT is incoherent. */
1760         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
1761                 ret = -EFAULT;
1762                 goto err_unlock;
1763         }
1764
1765         /* If the object is smaller than a couple of partial vma, it is
1766          * not worth only creating a single partial vma - we may as well
1767          * clear enough space for the full object.
1768          */
1769         flags = PIN_MAPPABLE;
1770         if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1771                 flags |= PIN_NONBLOCK | PIN_NONFAULT;
1772
1773         /* Now pin it into the GTT as needed */
1774         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1775         if (IS_ERR(vma)) {
1776                 struct i915_ggtt_view view;
1777                 unsigned int chunk_size;
1778
1779                 /* Use a partial view if it is bigger than available space */
1780                 chunk_size = MIN_CHUNK_PAGES;
1781                 if (i915_gem_object_is_tiled(obj))
1782                         chunk_size = roundup(chunk_size, tile_row_pages(obj));
1783
1784                 memset(&view, 0, sizeof(view));
1785                 view.type = I915_GGTT_VIEW_PARTIAL;
1786                 view.params.partial.offset = rounddown(page_offset, chunk_size);
1787                 view.params.partial.size =
1788                         min_t(unsigned int, chunk_size,
1789                               vma_pages(area) - view.params.partial.offset);
1790
1791                 /* If the partial covers the entire object, just create a
1792                  * normal VMA.
1793                  */
1794                 if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1795                         view.type = I915_GGTT_VIEW_NORMAL;
1796
1797                 /* Userspace is now writing through an untracked VMA, abandon
1798                  * all hope that the hardware is able to track future writes.
1799                  */
1800                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1801
1802                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1803         }
1804         if (IS_ERR(vma)) {
1805                 ret = PTR_ERR(vma);
1806                 goto err_unlock;
1807         }
1808
1809         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1810         if (ret)
1811                 goto err_unpin;
1812
1813         ret = i915_vma_get_fence(vma);
1814         if (ret)
1815                 goto err_unpin;
1816
1817         /* Mark as being mmapped into userspace for later revocation */
1818         assert_rpm_wakelock_held(dev_priv);
1819         if (list_empty(&obj->userfault_link))
1820                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1821
1822         /* Finally, remap it using the new GTT offset */
1823         ret = remap_io_mapping(area,
1824                                area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1825                                (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1826                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1827                                &ggtt->mappable);
1828
1829 err_unpin:
1830         __i915_vma_unpin(vma);
1831 err_unlock:
1832         mutex_unlock(&dev->struct_mutex);
1833 err_rpm:
1834         intel_runtime_pm_put(dev_priv);
1835         i915_gem_object_unpin_pages(obj);
1836 err:
1837         switch (ret) {
1838         case -EIO:
1839                 /*
1840                  * We eat errors when the gpu is terminally wedged to avoid
1841                  * userspace unduly crashing (gl has no provisions for mmaps to
1842                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1843                  * and so needs to be reported.
1844                  */
1845                 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1846                         ret = VM_FAULT_SIGBUS;
1847                         break;
1848                 }
1849         case -EAGAIN:
1850                 /*
1851                  * EAGAIN means the gpu is hung and we'll wait for the error
1852                  * handler to reset everything when re-faulting in
1853                  * i915_mutex_lock_interruptible.
1854                  */
1855         case 0:
1856         case -ERESTARTSYS:
1857         case -EINTR:
1858         case -EBUSY:
1859                 /*
1860                  * EBUSY is ok: this just means that another thread
1861                  * already did the job.
1862                  */
1863                 ret = VM_FAULT_NOPAGE;
1864                 break;
1865         case -ENOMEM:
1866                 ret = VM_FAULT_OOM;
1867                 break;
1868         case -ENOSPC:
1869         case -EFAULT:
1870                 ret = VM_FAULT_SIGBUS;
1871                 break;
1872         default:
1873                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1874                 ret = VM_FAULT_SIGBUS;
1875                 break;
1876         }
1877         return ret;
1878 }
1879
1880 /**
1881  * i915_gem_release_mmap - remove physical page mappings
1882  * @obj: obj in question
1883  *
1884  * Preserve the reservation of the mmapping with the DRM core code, but
1885  * relinquish ownership of the pages back to the system.
1886  *
1887  * It is vital that we remove the page mapping if we have mapped a tiled
1888  * object through the GTT and then lose the fence register due to
1889  * resource pressure. Similarly if the object has been moved out of the
1890  * aperture, than pages mapped into userspace must be revoked. Removing the
1891  * mapping will then trigger a page fault on the next user access, allowing
1892  * fixup by i915_gem_fault().
1893  */
1894 void
1895 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1896 {
1897         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1898
1899         /* Serialisation between user GTT access and our code depends upon
1900          * revoking the CPU's PTE whilst the mutex is held. The next user
1901          * pagefault then has to wait until we release the mutex.
1902          *
1903          * Note that RPM complicates somewhat by adding an additional
1904          * requirement that operations to the GGTT be made holding the RPM
1905          * wakeref.
1906          */
1907         lockdep_assert_held(&i915->drm.struct_mutex);
1908         intel_runtime_pm_get(i915);
1909
1910         if (list_empty(&obj->userfault_link))
1911                 goto out;
1912
1913         list_del_init(&obj->userfault_link);
1914         drm_vma_node_unmap(&obj->base.vma_node,
1915                            obj->base.dev->anon_inode->i_mapping);
1916
1917         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1918          * memory transactions from userspace before we return. The TLB
1919          * flushing implied above by changing the PTE above *should* be
1920          * sufficient, an extra barrier here just provides us with a bit
1921          * of paranoid documentation about our requirement to serialise
1922          * memory writes before touching registers / GSM.
1923          */
1924         wmb();
1925
1926 out:
1927         intel_runtime_pm_put(i915);
1928 }
1929
1930 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1931 {
1932         struct drm_i915_gem_object *obj, *on;
1933         int i;
1934
1935         /*
1936          * Only called during RPM suspend. All users of the userfault_list
1937          * must be holding an RPM wakeref to ensure that this can not
1938          * run concurrently with themselves (and use the struct_mutex for
1939          * protection between themselves).
1940          */
1941
1942         list_for_each_entry_safe(obj, on,
1943                                  &dev_priv->mm.userfault_list, userfault_link) {
1944                 list_del_init(&obj->userfault_link);
1945                 drm_vma_node_unmap(&obj->base.vma_node,
1946                                    obj->base.dev->anon_inode->i_mapping);
1947         }
1948
1949         /* The fence will be lost when the device powers down. If any were
1950          * in use by hardware (i.e. they are pinned), we should not be powering
1951          * down! All other fences will be reacquired by the user upon waking.
1952          */
1953         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1954                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1955
1956                 if (WARN_ON(reg->pin_count))
1957                         continue;
1958
1959                 if (!reg->vma)
1960                         continue;
1961
1962                 GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
1963                 reg->dirty = true;
1964         }
1965 }
1966
1967 /**
1968  * i915_gem_get_ggtt_size - return required global GTT size for an object
1969  * @dev_priv: i915 device
1970  * @size: object size
1971  * @tiling_mode: tiling mode
1972  *
1973  * Return the required global GTT size for an object, taking into account
1974  * potential fence register mapping.
1975  */
1976 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
1977                            u64 size, int tiling_mode)
1978 {
1979         u64 ggtt_size;
1980
1981         GEM_BUG_ON(size == 0);
1982
1983         if (INTEL_GEN(dev_priv) >= 4 ||
1984             tiling_mode == I915_TILING_NONE)
1985                 return size;
1986
1987         /* Previous chips need a power-of-two fence region when tiling */
1988         if (IS_GEN3(dev_priv))
1989                 ggtt_size = 1024*1024;
1990         else
1991                 ggtt_size = 512*1024;
1992
1993         while (ggtt_size < size)
1994                 ggtt_size <<= 1;
1995
1996         return ggtt_size;
1997 }
1998
1999 /**
2000  * i915_gem_get_ggtt_alignment - return required global GTT alignment
2001  * @dev_priv: i915 device
2002  * @size: object size
2003  * @tiling_mode: tiling mode
2004  * @fenced: is fenced alignment required or not
2005  *
2006  * Return the required global GTT alignment for an object, taking into account
2007  * potential fence register mapping.
2008  */
2009 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2010                                 int tiling_mode, bool fenced)
2011 {
2012         GEM_BUG_ON(size == 0);
2013
2014         /*
2015          * Minimum alignment is 4k (GTT page size), but might be greater
2016          * if a fence register is needed for the object.
2017          */
2018         if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2019             tiling_mode == I915_TILING_NONE)
2020                 return 4096;
2021
2022         /*
2023          * Previous chips need to be aligned to the size of the smallest
2024          * fence register that can contain the object.
2025          */
2026         return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2027 }
2028
2029 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2030 {
2031         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2032         int err;
2033
2034         err = drm_gem_create_mmap_offset(&obj->base);
2035         if (!err)
2036                 return 0;
2037
2038         /* We can idle the GPU locklessly to flush stale objects, but in order
2039          * to claim that space for ourselves, we need to take the big
2040          * struct_mutex to free the requests+objects and allocate our slot.
2041          */
2042         err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2043         if (err)
2044                 return err;
2045
2046         err = i915_mutex_lock_interruptible(&dev_priv->drm);
2047         if (!err) {
2048                 i915_gem_retire_requests(dev_priv);
2049                 err = drm_gem_create_mmap_offset(&obj->base);
2050                 mutex_unlock(&dev_priv->drm.struct_mutex);
2051         }
2052
2053         return err;
2054 }
2055
2056 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2057 {
2058         drm_gem_free_mmap_offset(&obj->base);
2059 }
2060
2061 int
2062 i915_gem_mmap_gtt(struct drm_file *file,
2063                   struct drm_device *dev,
2064                   uint32_t handle,
2065                   uint64_t *offset)
2066 {
2067         struct drm_i915_gem_object *obj;
2068         int ret;
2069
2070         obj = i915_gem_object_lookup(file, handle);
2071         if (!obj)
2072                 return -ENOENT;
2073
2074         ret = i915_gem_object_create_mmap_offset(obj);
2075         if (ret == 0)
2076                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2077
2078         i915_gem_object_put(obj);
2079         return ret;
2080 }
2081
2082 /**
2083  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2084  * @dev: DRM device
2085  * @data: GTT mapping ioctl data
2086  * @file: GEM object info
2087  *
2088  * Simply returns the fake offset to userspace so it can mmap it.
2089  * The mmap call will end up in drm_gem_mmap(), which will set things
2090  * up so we can get faults in the handler above.
2091  *
2092  * The fault handler will take care of binding the object into the GTT
2093  * (since it may have been evicted to make room for something), allocating
2094  * a fence register, and mapping the appropriate aperture address into
2095  * userspace.
2096  */
2097 int
2098 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2099                         struct drm_file *file)
2100 {
2101         struct drm_i915_gem_mmap_gtt *args = data;
2102
2103         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2104 }
2105
2106 /* Immediately discard the backing storage */
2107 static void
2108 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2109 {
2110         i915_gem_object_free_mmap_offset(obj);
2111
2112         if (obj->base.filp == NULL)
2113                 return;
2114
2115         /* Our goal here is to return as much of the memory as
2116          * is possible back to the system as we are called from OOM.
2117          * To do this we must instruct the shmfs to drop all of its
2118          * backing pages, *now*.
2119          */
2120         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2121         obj->mm.madv = __I915_MADV_PURGED;
2122 }
2123
2124 /* Try to discard unwanted pages */
2125 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2126 {
2127         struct address_space *mapping;
2128
2129         lockdep_assert_held(&obj->mm.lock);
2130         GEM_BUG_ON(obj->mm.pages);
2131
2132         switch (obj->mm.madv) {
2133         case I915_MADV_DONTNEED:
2134                 i915_gem_object_truncate(obj);
2135         case __I915_MADV_PURGED:
2136                 return;
2137         }
2138
2139         if (obj->base.filp == NULL)
2140                 return;
2141
2142         mapping = obj->base.filp->f_mapping,
2143         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2144 }
2145
2146 static void
2147 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2148                               struct sg_table *pages)
2149 {
2150         struct sgt_iter sgt_iter;
2151         struct page *page;
2152
2153         __i915_gem_object_release_shmem(obj);
2154
2155         i915_gem_gtt_finish_pages(obj, pages);
2156
2157         if (i915_gem_object_needs_bit17_swizzle(obj))
2158                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2159
2160         for_each_sgt_page(page, sgt_iter, pages) {
2161                 if (obj->mm.dirty)
2162                         set_page_dirty(page);
2163
2164                 if (obj->mm.madv == I915_MADV_WILLNEED)
2165                         mark_page_accessed(page);
2166
2167                 put_page(page);
2168         }
2169         obj->mm.dirty = false;
2170
2171         sg_free_table(pages);
2172         kfree(pages);
2173 }
2174
2175 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2176 {
2177         struct radix_tree_iter iter;
2178         void **slot;
2179
2180         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2181                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2182 }
2183
2184 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2185                                  enum i915_mm_subclass subclass)
2186 {
2187         struct sg_table *pages;
2188
2189         if (i915_gem_object_has_pinned_pages(obj))
2190                 return;
2191
2192         GEM_BUG_ON(obj->bind_count);
2193         if (!READ_ONCE(obj->mm.pages))
2194                 return;
2195
2196         /* May be called by shrinker from within get_pages() (on another bo) */
2197         mutex_lock_nested(&obj->mm.lock, subclass);
2198         if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2199                 goto unlock;
2200
2201         /* ->put_pages might need to allocate memory for the bit17 swizzle
2202          * array, hence protect them from being reaped by removing them from gtt
2203          * lists early. */
2204         pages = fetch_and_zero(&obj->mm.pages);
2205         GEM_BUG_ON(!pages);
2206
2207         if (obj->mm.mapping) {
2208                 void *ptr;
2209
2210                 ptr = ptr_mask_bits(obj->mm.mapping);
2211                 if (is_vmalloc_addr(ptr))
2212                         vunmap(ptr);
2213                 else
2214                         kunmap(kmap_to_page(ptr));
2215
2216                 obj->mm.mapping = NULL;
2217         }
2218
2219         __i915_gem_object_reset_page_iter(obj);
2220
2221         obj->ops->put_pages(obj, pages);
2222 unlock:
2223         mutex_unlock(&obj->mm.lock);
2224 }
2225
2226 static unsigned int swiotlb_max_size(void)
2227 {
2228 #if IS_ENABLED(CONFIG_SWIOTLB)
2229         return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2230 #else
2231         return 0;
2232 #endif
2233 }
2234
2235 static struct sg_table *
2236 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2237 {
2238         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2239         int page_count, i;
2240         struct address_space *mapping;
2241         struct sg_table *st;
2242         struct scatterlist *sg;
2243         struct sgt_iter sgt_iter;
2244         struct page *page;
2245         unsigned long last_pfn = 0;     /* suppress gcc warning */
2246         unsigned int max_segment;
2247         int ret;
2248         gfp_t gfp;
2249
2250         /* Assert that the object is not currently in any GPU domain. As it
2251          * wasn't in the GTT, there shouldn't be any way it could have been in
2252          * a GPU cache
2253          */
2254         GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2255         GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2256
2257         max_segment = swiotlb_max_size();
2258         if (!max_segment)
2259                 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
2260
2261         st = kmalloc(sizeof(*st), GFP_KERNEL);
2262         if (st == NULL)
2263                 return ERR_PTR(-ENOMEM);
2264
2265         page_count = obj->base.size / PAGE_SIZE;
2266         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2267                 kfree(st);
2268                 return ERR_PTR(-ENOMEM);
2269         }
2270
2271         /* Get the list of pages out of our struct file.  They'll be pinned
2272          * at this point until we release them.
2273          *
2274          * Fail silently without starting the shrinker
2275          */
2276         mapping = obj->base.filp->f_mapping;
2277         gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2278         gfp |= __GFP_NORETRY | __GFP_NOWARN;
2279         sg = st->sgl;
2280         st->nents = 0;
2281         for (i = 0; i < page_count; i++) {
2282                 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2283                 if (IS_ERR(page)) {
2284                         i915_gem_shrink(dev_priv,
2285                                         page_count,
2286                                         I915_SHRINK_BOUND |
2287                                         I915_SHRINK_UNBOUND |
2288                                         I915_SHRINK_PURGEABLE);
2289                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2290                 }
2291                 if (IS_ERR(page)) {
2292                         /* We've tried hard to allocate the memory by reaping
2293                          * our own buffer, now let the real VM do its job and
2294                          * go down in flames if truly OOM.
2295                          */
2296                         page = shmem_read_mapping_page(mapping, i);
2297                         if (IS_ERR(page)) {
2298                                 ret = PTR_ERR(page);
2299                                 goto err_pages;
2300                         }
2301                 }
2302                 if (!i ||
2303                     sg->length >= max_segment ||
2304                     page_to_pfn(page) != last_pfn + 1) {
2305                         if (i)
2306                                 sg = sg_next(sg);
2307                         st->nents++;
2308                         sg_set_page(sg, page, PAGE_SIZE, 0);
2309                 } else {
2310                         sg->length += PAGE_SIZE;
2311                 }
2312                 last_pfn = page_to_pfn(page);
2313
2314                 /* Check that the i965g/gm workaround works. */
2315                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2316         }
2317         if (sg) /* loop terminated early; short sg table */
2318                 sg_mark_end(sg);
2319
2320         ret = i915_gem_gtt_prepare_pages(obj, st);
2321         if (ret)
2322                 goto err_pages;
2323
2324         if (i915_gem_object_needs_bit17_swizzle(obj))
2325                 i915_gem_object_do_bit_17_swizzle(obj, st);
2326
2327         return st;
2328
2329 err_pages:
2330         sg_mark_end(sg);
2331         for_each_sgt_page(page, sgt_iter, st)
2332                 put_page(page);
2333         sg_free_table(st);
2334         kfree(st);
2335
2336         /* shmemfs first checks if there is enough memory to allocate the page
2337          * and reports ENOSPC should there be insufficient, along with the usual
2338          * ENOMEM for a genuine allocation failure.
2339          *
2340          * We use ENOSPC in our driver to mean that we have run out of aperture
2341          * space and so want to translate the error from shmemfs back to our
2342          * usual understanding of ENOMEM.
2343          */
2344         if (ret == -ENOSPC)
2345                 ret = -ENOMEM;
2346
2347         return ERR_PTR(ret);
2348 }
2349
2350 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2351                                  struct sg_table *pages)
2352 {
2353         lockdep_assert_held(&obj->mm.lock);
2354
2355         obj->mm.get_page.sg_pos = pages->sgl;
2356         obj->mm.get_page.sg_idx = 0;
2357
2358         obj->mm.pages = pages;
2359
2360         if (i915_gem_object_is_tiled(obj) &&
2361             to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2362                 GEM_BUG_ON(obj->mm.quirked);
2363                 __i915_gem_object_pin_pages(obj);
2364                 obj->mm.quirked = true;
2365         }
2366 }
2367
2368 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2369 {
2370         struct sg_table *pages;
2371
2372         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2373
2374         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2375                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2376                 return -EFAULT;
2377         }
2378
2379         pages = obj->ops->get_pages(obj);
2380         if (unlikely(IS_ERR(pages)))
2381                 return PTR_ERR(pages);
2382
2383         __i915_gem_object_set_pages(obj, pages);
2384         return 0;
2385 }
2386
2387 /* Ensure that the associated pages are gathered from the backing storage
2388  * and pinned into our object. i915_gem_object_pin_pages() may be called
2389  * multiple times before they are released by a single call to
2390  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2391  * either as a result of memory pressure (reaping pages under the shrinker)
2392  * or as the object is itself released.
2393  */
2394 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2395 {
2396         int err;
2397
2398         err = mutex_lock_interruptible(&obj->mm.lock);
2399         if (err)
2400                 return err;
2401
2402         if (unlikely(!obj->mm.pages)) {
2403                 err = ____i915_gem_object_get_pages(obj);
2404                 if (err)
2405                         goto unlock;
2406
2407                 smp_mb__before_atomic();
2408         }
2409         atomic_inc(&obj->mm.pages_pin_count);
2410
2411 unlock:
2412         mutex_unlock(&obj->mm.lock);
2413         return err;
2414 }
2415
2416 /* The 'mapping' part of i915_gem_object_pin_map() below */
2417 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2418                                  enum i915_map_type type)
2419 {
2420         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2421         struct sg_table *sgt = obj->mm.pages;
2422         struct sgt_iter sgt_iter;
2423         struct page *page;
2424         struct page *stack_pages[32];
2425         struct page **pages = stack_pages;
2426         unsigned long i = 0;
2427         pgprot_t pgprot;
2428         void *addr;
2429
2430         /* A single page can always be kmapped */
2431         if (n_pages == 1 && type == I915_MAP_WB)
2432                 return kmap(sg_page(sgt->sgl));
2433
2434         if (n_pages > ARRAY_SIZE(stack_pages)) {
2435                 /* Too big for stack -- allocate temporary array instead */
2436                 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2437                 if (!pages)
2438                         return NULL;
2439         }
2440
2441         for_each_sgt_page(page, sgt_iter, sgt)
2442                 pages[i++] = page;
2443
2444         /* Check that we have the expected number of pages */
2445         GEM_BUG_ON(i != n_pages);
2446
2447         switch (type) {
2448         case I915_MAP_WB:
2449                 pgprot = PAGE_KERNEL;
2450                 break;
2451         case I915_MAP_WC:
2452                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2453                 break;
2454         }
2455         addr = vmap(pages, n_pages, 0, pgprot);
2456
2457         if (pages != stack_pages)
2458                 drm_free_large(pages);
2459
2460         return addr;
2461 }
2462
2463 /* get, pin, and map the pages of the object into kernel space */
2464 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2465                               enum i915_map_type type)
2466 {
2467         enum i915_map_type has_type;
2468         bool pinned;
2469         void *ptr;
2470         int ret;
2471
2472         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2473
2474         ret = mutex_lock_interruptible(&obj->mm.lock);
2475         if (ret)
2476                 return ERR_PTR(ret);
2477
2478         pinned = true;
2479         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2480                 if (unlikely(!obj->mm.pages)) {
2481                         ret = ____i915_gem_object_get_pages(obj);
2482                         if (ret)
2483                                 goto err_unlock;
2484
2485                         smp_mb__before_atomic();
2486                 }
2487                 atomic_inc(&obj->mm.pages_pin_count);
2488                 pinned = false;
2489         }
2490         GEM_BUG_ON(!obj->mm.pages);
2491
2492         ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
2493         if (ptr && has_type != type) {
2494                 if (pinned) {
2495                         ret = -EBUSY;
2496                         goto err_unpin;
2497                 }
2498
2499                 if (is_vmalloc_addr(ptr))
2500                         vunmap(ptr);
2501                 else
2502                         kunmap(kmap_to_page(ptr));
2503
2504                 ptr = obj->mm.mapping = NULL;
2505         }
2506
2507         if (!ptr) {
2508                 ptr = i915_gem_object_map(obj, type);
2509                 if (!ptr) {
2510                         ret = -ENOMEM;
2511                         goto err_unpin;
2512                 }
2513
2514                 obj->mm.mapping = ptr_pack_bits(ptr, type);
2515         }
2516
2517 out_unlock:
2518         mutex_unlock(&obj->mm.lock);
2519         return ptr;
2520
2521 err_unpin:
2522         atomic_dec(&obj->mm.pages_pin_count);
2523 err_unlock:
2524         ptr = ERR_PTR(ret);
2525         goto out_unlock;
2526 }
2527
2528 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2529 {
2530         unsigned long elapsed;
2531
2532         if (ctx->hang_stats.banned)
2533                 return true;
2534
2535         elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2536         if (ctx->hang_stats.ban_period_seconds &&
2537             elapsed <= ctx->hang_stats.ban_period_seconds) {
2538                 DRM_DEBUG("context hanging too fast, banning!\n");
2539                 return true;
2540         }
2541
2542         return false;
2543 }
2544
2545 static void i915_set_reset_status(struct i915_gem_context *ctx,
2546                                   const bool guilty)
2547 {
2548         struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2549
2550         if (guilty) {
2551                 hs->banned = i915_context_is_banned(ctx);
2552                 hs->batch_active++;
2553                 hs->guilty_ts = get_seconds();
2554         } else {
2555                 hs->batch_pending++;
2556         }
2557 }
2558
2559 struct drm_i915_gem_request *
2560 i915_gem_find_active_request(struct intel_engine_cs *engine)
2561 {
2562         struct drm_i915_gem_request *request;
2563
2564         /* We are called by the error capture and reset at a random
2565          * point in time. In particular, note that neither is crucially
2566          * ordered with an interrupt. After a hang, the GPU is dead and we
2567          * assume that no more writes can happen (we waited long enough for
2568          * all writes that were in transaction to be flushed) - adding an
2569          * extra delay for a recent interrupt is pointless. Hence, we do
2570          * not need an engine->irq_seqno_barrier() before the seqno reads.
2571          */
2572         list_for_each_entry(request, &engine->timeline->requests, link) {
2573                 if (__i915_gem_request_completed(request))
2574                         continue;
2575
2576                 return request;
2577         }
2578
2579         return NULL;
2580 }
2581
2582 static void reset_request(struct drm_i915_gem_request *request)
2583 {
2584         void *vaddr = request->ring->vaddr;
2585         u32 head;
2586
2587         /* As this request likely depends on state from the lost
2588          * context, clear out all the user operations leaving the
2589          * breadcrumb at the end (so we get the fence notifications).
2590          */
2591         head = request->head;
2592         if (request->postfix < head) {
2593                 memset(vaddr + head, 0, request->ring->size - head);
2594                 head = 0;
2595         }
2596         memset(vaddr + head, 0, request->postfix - head);
2597 }
2598
2599 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2600 {
2601         struct drm_i915_gem_request *request;
2602         struct i915_gem_context *incomplete_ctx;
2603         struct intel_timeline *timeline;
2604         bool ring_hung;
2605
2606         if (engine->irq_seqno_barrier)
2607                 engine->irq_seqno_barrier(engine);
2608
2609         request = i915_gem_find_active_request(engine);
2610         if (!request)
2611                 return;
2612
2613         ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2614         if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2615                 ring_hung = false;
2616
2617         i915_set_reset_status(request->ctx, ring_hung);
2618         if (!ring_hung)
2619                 return;
2620
2621         DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2622                          engine->name, request->global_seqno);
2623
2624         /* Setup the CS to resume from the breadcrumb of the hung request */
2625         engine->reset_hw(engine, request);
2626
2627         /* Users of the default context do not rely on logical state
2628          * preserved between batches. They have to emit full state on
2629          * every batch and so it is safe to execute queued requests following
2630          * the hang.
2631          *
2632          * Other contexts preserve state, now corrupt. We want to skip all
2633          * queued requests that reference the corrupt context.
2634          */
2635         incomplete_ctx = request->ctx;
2636         if (i915_gem_context_is_default(incomplete_ctx))
2637                 return;
2638
2639         list_for_each_entry_continue(request, &engine->timeline->requests, link)
2640                 if (request->ctx == incomplete_ctx)
2641                         reset_request(request);
2642
2643         timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
2644         list_for_each_entry(request, &timeline->requests, link)
2645                 reset_request(request);
2646 }
2647
2648 void i915_gem_reset(struct drm_i915_private *dev_priv)
2649 {
2650         struct intel_engine_cs *engine;
2651         enum intel_engine_id id;
2652
2653         lockdep_assert_held(&dev_priv->drm.struct_mutex);
2654
2655         i915_gem_retire_requests(dev_priv);
2656
2657         for_each_engine(engine, dev_priv, id)
2658                 i915_gem_reset_engine(engine);
2659
2660         i915_gem_restore_fences(&dev_priv->drm);
2661
2662         if (dev_priv->gt.awake) {
2663                 intel_sanitize_gt_powersave(dev_priv);
2664                 intel_enable_gt_powersave(dev_priv);
2665                 if (INTEL_GEN(dev_priv) >= 6)
2666                         gen6_rps_busy(dev_priv);
2667         }
2668 }
2669
2670 static void nop_submit_request(struct drm_i915_gem_request *request)
2671 {
2672 }
2673
2674 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2675 {
2676         engine->submit_request = nop_submit_request;
2677
2678         /* Mark all pending requests as complete so that any concurrent
2679          * (lockless) lookup doesn't try and wait upon the request as we
2680          * reset it.
2681          */
2682         intel_engine_init_global_seqno(engine,
2683                                        intel_engine_last_submit(engine));
2684
2685         /*
2686          * Clear the execlists queue up before freeing the requests, as those
2687          * are the ones that keep the context and ringbuffer backing objects
2688          * pinned in place.
2689          */
2690
2691         if (i915.enable_execlists) {
2692                 spin_lock(&engine->execlist_lock);
2693                 INIT_LIST_HEAD(&engine->execlist_queue);
2694                 i915_gem_request_put(engine->execlist_port[0].request);
2695                 i915_gem_request_put(engine->execlist_port[1].request);
2696                 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2697                 spin_unlock(&engine->execlist_lock);
2698         }
2699 }
2700
2701 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2702 {
2703         struct intel_engine_cs *engine;
2704         enum intel_engine_id id;
2705
2706         lockdep_assert_held(&dev_priv->drm.struct_mutex);
2707         set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2708
2709         i915_gem_context_lost(dev_priv);
2710         for_each_engine(engine, dev_priv, id)
2711                 i915_gem_cleanup_engine(engine);
2712         mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2713
2714         i915_gem_retire_requests(dev_priv);
2715 }
2716
2717 static void
2718 i915_gem_retire_work_handler(struct work_struct *work)
2719 {
2720         struct drm_i915_private *dev_priv =
2721                 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2722         struct drm_device *dev = &dev_priv->drm;
2723
2724         /* Come back later if the device is busy... */
2725         if (mutex_trylock(&dev->struct_mutex)) {
2726                 i915_gem_retire_requests(dev_priv);
2727                 mutex_unlock(&dev->struct_mutex);
2728         }
2729
2730         /* Keep the retire handler running until we are finally idle.
2731          * We do not need to do this test under locking as in the worst-case
2732          * we queue the retire worker once too often.
2733          */
2734         if (READ_ONCE(dev_priv->gt.awake)) {
2735                 i915_queue_hangcheck(dev_priv);
2736                 queue_delayed_work(dev_priv->wq,
2737                                    &dev_priv->gt.retire_work,
2738                                    round_jiffies_up_relative(HZ));
2739         }
2740 }
2741
2742 static void
2743 i915_gem_idle_work_handler(struct work_struct *work)
2744 {
2745         struct drm_i915_private *dev_priv =
2746                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
2747         struct drm_device *dev = &dev_priv->drm;
2748         struct intel_engine_cs *engine;
2749         enum intel_engine_id id;
2750         bool rearm_hangcheck;
2751
2752         if (!READ_ONCE(dev_priv->gt.awake))
2753                 return;
2754
2755         /*
2756          * Wait for last execlists context complete, but bail out in case a
2757          * new request is submitted.
2758          */
2759         wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
2760                  intel_execlists_idle(dev_priv), 10);
2761
2762         if (READ_ONCE(dev_priv->gt.active_requests))
2763                 return;
2764
2765         rearm_hangcheck =
2766                 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2767
2768         if (!mutex_trylock(&dev->struct_mutex)) {
2769                 /* Currently busy, come back later */
2770                 mod_delayed_work(dev_priv->wq,
2771                                  &dev_priv->gt.idle_work,
2772                                  msecs_to_jiffies(50));
2773                 goto out_rearm;
2774         }
2775
2776         /*
2777          * New request retired after this work handler started, extend active
2778          * period until next instance of the work.
2779          */
2780         if (work_pending(work))
2781                 goto out_unlock;
2782
2783         if (dev_priv->gt.active_requests)
2784                 goto out_unlock;
2785
2786         if (wait_for(intel_execlists_idle(dev_priv), 10))
2787                 DRM_ERROR("Timeout waiting for engines to idle\n");
2788
2789         for_each_engine(engine, dev_priv, id)
2790                 i915_gem_batch_pool_fini(&engine->batch_pool);
2791
2792         GEM_BUG_ON(!dev_priv->gt.awake);
2793         dev_priv->gt.awake = false;
2794         rearm_hangcheck = false;
2795
2796         if (INTEL_GEN(dev_priv) >= 6)
2797                 gen6_rps_idle(dev_priv);
2798         intel_runtime_pm_put(dev_priv);
2799 out_unlock:
2800         mutex_unlock(&dev->struct_mutex);
2801
2802 out_rearm:
2803         if (rearm_hangcheck) {
2804                 GEM_BUG_ON(!dev_priv->gt.awake);
2805                 i915_queue_hangcheck(dev_priv);
2806         }
2807 }
2808
2809 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2810 {
2811         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2812         struct drm_i915_file_private *fpriv = file->driver_priv;
2813         struct i915_vma *vma, *vn;
2814
2815         mutex_lock(&obj->base.dev->struct_mutex);
2816         list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2817                 if (vma->vm->file == fpriv)
2818                         i915_vma_close(vma);
2819
2820         if (i915_gem_object_is_active(obj) &&
2821             !i915_gem_object_has_active_reference(obj)) {
2822                 i915_gem_object_set_active_reference(obj);
2823                 i915_gem_object_get(obj);
2824         }
2825         mutex_unlock(&obj->base.dev->struct_mutex);
2826 }
2827
2828 static unsigned long to_wait_timeout(s64 timeout_ns)
2829 {
2830         if (timeout_ns < 0)
2831                 return MAX_SCHEDULE_TIMEOUT;
2832
2833         if (timeout_ns == 0)
2834                 return 0;
2835
2836         return nsecs_to_jiffies_timeout(timeout_ns);
2837 }
2838
2839 /**
2840  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2841  * @dev: drm device pointer
2842  * @data: ioctl data blob
2843  * @file: drm file pointer
2844  *
2845  * Returns 0 if successful, else an error is returned with the remaining time in
2846  * the timeout parameter.
2847  *  -ETIME: object is still busy after timeout
2848  *  -ERESTARTSYS: signal interrupted the wait
2849  *  -ENONENT: object doesn't exist
2850  * Also possible, but rare:
2851  *  -EAGAIN: GPU wedged
2852  *  -ENOMEM: damn
2853  *  -ENODEV: Internal IRQ fail
2854  *  -E?: The add request failed
2855  *
2856  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2857  * non-zero timeout parameter the wait ioctl will wait for the given number of
2858  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2859  * without holding struct_mutex the object may become re-busied before this
2860  * function completes. A similar but shorter * race condition exists in the busy
2861  * ioctl
2862  */
2863 int
2864 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2865 {
2866         struct drm_i915_gem_wait *args = data;
2867         struct drm_i915_gem_object *obj;
2868         ktime_t start;
2869         long ret;
2870
2871         if (args->flags != 0)
2872                 return -EINVAL;
2873
2874         obj = i915_gem_object_lookup(file, args->bo_handle);
2875         if (!obj)
2876                 return -ENOENT;
2877
2878         start = ktime_get();
2879
2880         ret = i915_gem_object_wait(obj,
2881                                    I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
2882                                    to_wait_timeout(args->timeout_ns),
2883                                    to_rps_client(file));
2884
2885         if (args->timeout_ns > 0) {
2886                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2887                 if (args->timeout_ns < 0)
2888                         args->timeout_ns = 0;
2889         }
2890
2891         i915_gem_object_put(obj);
2892         return ret;
2893 }
2894
2895 static void __i915_vma_iounmap(struct i915_vma *vma)
2896 {
2897         GEM_BUG_ON(i915_vma_is_pinned(vma));
2898
2899         if (vma->iomap == NULL)
2900                 return;
2901
2902         io_mapping_unmap(vma->iomap);
2903         vma->iomap = NULL;
2904 }
2905
2906 int i915_vma_unbind(struct i915_vma *vma)
2907 {
2908         struct drm_i915_gem_object *obj = vma->obj;
2909         unsigned long active;
2910         int ret;
2911
2912         lockdep_assert_held(&obj->base.dev->struct_mutex);
2913
2914         /* First wait upon any activity as retiring the request may
2915          * have side-effects such as unpinning or even unbinding this vma.
2916          */
2917         active = i915_vma_get_active(vma);
2918         if (active) {
2919                 int idx;
2920
2921                 /* When a closed VMA is retired, it is unbound - eek.
2922                  * In order to prevent it from being recursively closed,
2923                  * take a pin on the vma so that the second unbind is
2924                  * aborted.
2925                  *
2926                  * Even more scary is that the retire callback may free
2927                  * the object (last active vma). To prevent the explosion
2928                  * we defer the actual object free to a worker that can
2929                  * only proceed once it acquires the struct_mutex (which
2930                  * we currently hold, therefore it cannot free this object
2931                  * before we are finished).
2932                  */
2933                 __i915_vma_pin(vma);
2934
2935                 for_each_active(active, idx) {
2936                         ret = i915_gem_active_retire(&vma->last_read[idx],
2937                                                    &vma->vm->dev->struct_mutex);
2938                         if (ret)
2939                                 break;
2940                 }
2941
2942                 __i915_vma_unpin(vma);
2943                 if (ret)
2944                         return ret;
2945
2946                 GEM_BUG_ON(i915_vma_is_active(vma));
2947         }
2948
2949         if (i915_vma_is_pinned(vma))
2950                 return -EBUSY;
2951
2952         if (!drm_mm_node_allocated(&vma->node))
2953                 goto destroy;
2954
2955         GEM_BUG_ON(obj->bind_count == 0);
2956         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2957
2958         if (i915_vma_is_map_and_fenceable(vma)) {
2959                 /* release the fence reg _after_ flushing */
2960                 ret = i915_vma_put_fence(vma);
2961                 if (ret)
2962                         return ret;
2963
2964                 /* Force a pagefault for domain tracking on next user access */
2965                 i915_gem_release_mmap(obj);
2966
2967                 __i915_vma_iounmap(vma);
2968                 vma->flags &= ~I915_VMA_CAN_FENCE;
2969         }
2970
2971         if (likely(!vma->vm->closed)) {
2972                 trace_i915_vma_unbind(vma);
2973                 vma->vm->unbind_vma(vma);
2974         }
2975         vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
2976
2977         drm_mm_remove_node(&vma->node);
2978         list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
2979
2980         if (vma->pages != obj->mm.pages) {
2981                 GEM_BUG_ON(!vma->pages);
2982                 sg_free_table(vma->pages);
2983                 kfree(vma->pages);
2984         }
2985         vma->pages = NULL;
2986
2987         /* Since the unbound list is global, only move to that list if
2988          * no more VMAs exist. */
2989         if (--obj->bind_count == 0)
2990                 list_move_tail(&obj->global_link,
2991                                &to_i915(obj->base.dev)->mm.unbound_list);
2992
2993         /* And finally now the object is completely decoupled from this vma,
2994          * we can drop its hold on the backing storage and allow it to be
2995          * reaped by the shrinker.
2996          */
2997         i915_gem_object_unpin_pages(obj);
2998
2999 destroy:
3000         if (unlikely(i915_vma_is_closed(vma)))
3001                 i915_vma_destroy(vma);
3002
3003         return 0;
3004 }
3005
3006 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
3007 {
3008         int ret, i;
3009
3010         for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3011                 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3012                 if (ret)
3013                         return ret;
3014         }
3015
3016         return 0;
3017 }
3018
3019 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3020 {
3021         struct i915_gem_timeline *tl;
3022         int ret;
3023
3024         list_for_each_entry(tl, &i915->gt.timelines, link) {
3025                 ret = wait_for_timeline(tl, flags);
3026                 if (ret)
3027                         return ret;
3028         }
3029
3030         return 0;
3031 }
3032
3033 static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3034                                      unsigned long cache_level)
3035 {
3036         struct drm_mm_node *gtt_space = &vma->node;
3037         struct drm_mm_node *other;
3038
3039         /*
3040          * On some machines we have to be careful when putting differing types
3041          * of snoopable memory together to avoid the prefetcher crossing memory
3042          * domains and dying. During vm initialisation, we decide whether or not
3043          * these constraints apply and set the drm_mm.color_adjust
3044          * appropriately.
3045          */
3046         if (vma->vm->mm.color_adjust == NULL)
3047                 return true;
3048
3049         if (!drm_mm_node_allocated(gtt_space))
3050                 return true;
3051
3052         if (list_empty(&gtt_space->node_list))
3053                 return true;
3054
3055         other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3056         if (other->allocated && !other->hole_follows && other->color != cache_level)
3057                 return false;
3058
3059         other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3060         if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3061                 return false;
3062
3063         return true;
3064 }
3065
3066 /**
3067  * i915_vma_insert - finds a slot for the vma in its address space
3068  * @vma: the vma
3069  * @size: requested size in bytes (can be larger than the VMA)
3070  * @alignment: required alignment
3071  * @flags: mask of PIN_* flags to use
3072  *
3073  * First we try to allocate some free space that meets the requirements for
3074  * the VMA. Failiing that, if the flags permit, it will evict an old VMA,
3075  * preferrably the oldest idle entry to make room for the new VMA.
3076  *
3077  * Returns:
3078  * 0 on success, negative error code otherwise.
3079  */
3080 static int
3081 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3082 {
3083         struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
3084         struct drm_i915_gem_object *obj = vma->obj;
3085         u64 start, end;
3086         int ret;
3087
3088         GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
3089         GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
3090
3091         size = max(size, vma->size);
3092         if (flags & PIN_MAPPABLE)
3093                 size = i915_gem_get_ggtt_size(dev_priv, size,
3094                                               i915_gem_object_get_tiling(obj));
3095
3096         alignment = max(max(alignment, vma->display_alignment),
3097                         i915_gem_get_ggtt_alignment(dev_priv, size,
3098                                                     i915_gem_object_get_tiling(obj),
3099                                                     flags & PIN_MAPPABLE));
3100
3101         start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3102
3103         end = vma->vm->total;
3104         if (flags & PIN_MAPPABLE)
3105                 end = min_t(u64, end, dev_priv->ggtt.mappable_end);
3106         if (flags & PIN_ZONE_4G)
3107                 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
3108
3109         /* If binding the object/GGTT view requires more space than the entire
3110          * aperture has, reject it early before evicting everything in a vain
3111          * attempt to find space.
3112          */
3113         if (size > end) {
3114                 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
3115                           size, obj->base.size,
3116                           flags & PIN_MAPPABLE ? "mappable" : "total",
3117                           end);
3118                 return -E2BIG;
3119         }
3120
3121         ret = i915_gem_object_pin_pages(obj);
3122         if (ret)
3123                 return ret;
3124
3125         if (flags & PIN_OFFSET_FIXED) {
3126                 u64 offset = flags & PIN_OFFSET_MASK;
3127                 if (offset & (alignment - 1) || offset > end - size) {
3128                         ret = -EINVAL;
3129                         goto err_unpin;
3130                 }
3131
3132                 vma->node.start = offset;
3133                 vma->node.size = size;
3134                 vma->node.color = obj->cache_level;
3135                 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3136                 if (ret) {
3137                         ret = i915_gem_evict_for_vma(vma);
3138                         if (ret == 0)
3139                                 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node);
3140                         if (ret)
3141                                 goto err_unpin;
3142                 }
3143         } else {
3144                 u32 search_flag, alloc_flag;
3145
3146                 if (flags & PIN_HIGH) {
3147                         search_flag = DRM_MM_SEARCH_BELOW;
3148                         alloc_flag = DRM_MM_CREATE_TOP;
3149                 } else {
3150                         search_flag = DRM_MM_SEARCH_DEFAULT;
3151                         alloc_flag = DRM_MM_CREATE_DEFAULT;
3152                 }
3153
3154                 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3155                  * so we know that we always have a minimum alignment of 4096.
3156                  * The drm_mm range manager is optimised to return results
3157                  * with zero alignment, so where possible use the optimal
3158                  * path.
3159                  */
3160                 if (alignment <= 4096)
3161                         alignment = 0;
3162
3163 search_free:
3164                 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
3165                                                           &vma->node,
3166                                                           size, alignment,
3167                                                           obj->cache_level,
3168                                                           start, end,
3169                                                           search_flag,
3170                                                           alloc_flag);
3171                 if (ret) {
3172                         ret = i915_gem_evict_something(vma->vm, size, alignment,
3173                                                        obj->cache_level,
3174                                                        start, end,
3175                                                        flags);
3176                         if (ret == 0)
3177                                 goto search_free;
3178
3179                         goto err_unpin;
3180                 }
3181
3182                 GEM_BUG_ON(vma->node.start < start);
3183                 GEM_BUG_ON(vma->node.start + vma->node.size > end);
3184         }
3185         GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
3186
3187         list_move_tail(&obj->global_link, &dev_priv->mm.bound_list);
3188         list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3189         obj->bind_count++;
3190         GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
3191
3192         return 0;
3193
3194 err_unpin:
3195         i915_gem_object_unpin_pages(obj);
3196         return ret;
3197 }
3198
3199 bool
3200 i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3201                         bool force)
3202 {
3203         /* If we don't have a page list set up, then we're not pinned
3204          * to GPU, and we can ignore the cache flush because it'll happen
3205          * again at bind time.
3206          */
3207         if (!obj->mm.pages)
3208                 return false;
3209
3210         /*
3211          * Stolen memory is always coherent with the GPU as it is explicitly
3212          * marked as wc by the system, or the system is cache-coherent.
3213          */
3214         if (obj->stolen || obj->phys_handle)
3215                 return false;
3216
3217         /* If the GPU is snooping the contents of the CPU cache,
3218          * we do not need to manually clear the CPU cache lines.  However,
3219          * the caches are only snooped when the render cache is
3220          * flushed/invalidated.  As we always have to emit invalidations
3221          * and flushes when moving into and out of the RENDER domain, correct
3222          * snooping behaviour occurs naturally as the result of our domain
3223          * tracking.
3224          */
3225         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3226                 obj->cache_dirty = true;
3227                 return false;
3228         }
3229
3230         trace_i915_gem_object_clflush(obj);
3231         drm_clflush_sg(obj->mm.pages);
3232         obj->cache_dirty = false;
3233
3234         return true;
3235 }
3236
3237 /** Flushes the GTT write domain for the object if it's dirty. */
3238 static void
3239 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3240 {
3241         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3242
3243         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3244                 return;
3245
3246         /* No actual flushing is required for the GTT write domain.  Writes
3247          * to it "immediately" go to main memory as far as we know, so there's
3248          * no chipset flush.  It also doesn't land in render cache.
3249          *
3250          * However, we do have to enforce the order so that all writes through
3251          * the GTT land before any writes to the device, such as updates to
3252          * the GATT itself.
3253          *
3254          * We also have to wait a bit for the writes to land from the GTT.
3255          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3256          * timing. This issue has only been observed when switching quickly
3257          * between GTT writes and CPU reads from inside the kernel on recent hw,
3258          * and it appears to only affect discrete GTT blocks (i.e. on LLC
3259          * system agents we cannot reproduce this behaviour).
3260          */
3261         wmb();
3262         if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3263                 POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3264
3265         intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3266
3267         obj->base.write_domain = 0;
3268         trace_i915_gem_object_change_domain(obj,
3269                                             obj->base.read_domains,
3270                                             I915_GEM_DOMAIN_GTT);
3271 }
3272
3273 /** Flushes the CPU write domain for the object if it's dirty. */
3274 static void
3275 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3276 {
3277         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3278                 return;
3279
3280         if (i915_gem_clflush_object(obj, obj->pin_display))
3281                 i915_gem_chipset_flush(to_i915(obj->base.dev));
3282
3283         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3284
3285         obj->base.write_domain = 0;
3286         trace_i915_gem_object_change_domain(obj,
3287                                             obj->base.read_domains,
3288                                             I915_GEM_DOMAIN_CPU);
3289 }
3290
3291 /**
3292  * Moves a single object to the GTT read, and possibly write domain.
3293  * @obj: object to act on
3294  * @write: ask for write access or read only
3295  *
3296  * This function returns when the move is complete, including waiting on
3297  * flushes to occur.
3298  */
3299 int
3300 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3301 {
3302         uint32_t old_write_domain, old_read_domains;
3303         int ret;
3304
3305         lockdep_assert_held(&obj->base.dev->struct_mutex);
3306
3307         ret = i915_gem_object_wait(obj,
3308                                    I915_WAIT_INTERRUPTIBLE |
3309                                    I915_WAIT_LOCKED |
3310                                    (write ? I915_WAIT_ALL : 0),
3311                                    MAX_SCHEDULE_TIMEOUT,
3312                                    NULL);
3313         if (ret)
3314                 return ret;
3315
3316         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3317                 return 0;
3318
3319         /* Flush and acquire obj->pages so that we are coherent through
3320          * direct access in memory with previous cached writes through
3321          * shmemfs and that our cache domain tracking remains valid.
3322          * For example, if the obj->filp was moved to swap without us
3323          * being notified and releasing the pages, we would mistakenly
3324          * continue to assume that the obj remained out of the CPU cached
3325          * domain.
3326          */
3327         ret = i915_gem_object_pin_pages(obj);
3328         if (ret)
3329                 return ret;
3330
3331         i915_gem_object_flush_cpu_write_domain(obj);
3332
3333         /* Serialise direct access to this object with the barriers for
3334          * coherent writes from the GPU, by effectively invalidating the
3335          * GTT domain upon first access.
3336          */
3337         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3338                 mb();
3339
3340         old_write_domain = obj->base.write_domain;
3341         old_read_domains = obj->base.read_domains;
3342
3343         /* It should now be out of any other write domains, and we can update
3344          * the domain values for our changes.
3345          */
3346         GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3347         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3348         if (write) {
3349                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3350                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3351                 obj->mm.dirty = true;
3352         }
3353
3354         trace_i915_gem_object_change_domain(obj,
3355                                             old_read_domains,
3356                                             old_write_domain);
3357
3358         i915_gem_object_unpin_pages(obj);
3359         return 0;
3360 }
3361
3362 /**
3363  * Changes the cache-level of an object across all VMA.
3364  * @obj: object to act on
3365  * @cache_level: new cache level to set for the object
3366  *
3367  * After this function returns, the object will be in the new cache-level
3368  * across all GTT and the contents of the backing storage will be coherent,
3369  * with respect to the new cache-level. In order to keep the backing storage
3370  * coherent for all users, we only allow a single cache level to be set
3371  * globally on the object and prevent it from being changed whilst the
3372  * hardware is reading from the object. That is if the object is currently
3373  * on the scanout it will be set to uncached (or equivalent display
3374  * cache coherency) and all non-MOCS GPU access will also be uncached so
3375  * that all direct access to the scanout remains coherent.
3376  */
3377 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3378                                     enum i915_cache_level cache_level)
3379 {
3380         struct i915_vma *vma;
3381         int ret = 0;
3382
3383         lockdep_assert_held(&obj->base.dev->struct_mutex);
3384
3385         if (obj->cache_level == cache_level)
3386                 goto out;
3387
3388         /* Inspect the list of currently bound VMA and unbind any that would
3389          * be invalid given the new cache-level. This is principally to
3390          * catch the issue of the CS prefetch crossing page boundaries and
3391          * reading an invalid PTE on older architectures.
3392          */
3393 restart:
3394         list_for_each_entry(vma, &obj->vma_list, obj_link) {
3395                 if (!drm_mm_node_allocated(&vma->node))
3396                         continue;
3397
3398                 if (i915_vma_is_pinned(vma)) {
3399                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3400                         return -EBUSY;
3401                 }
3402
3403                 if (i915_gem_valid_gtt_space(vma, cache_level))
3404                         continue;
3405
3406                 ret = i915_vma_unbind(vma);
3407                 if (ret)
3408                         return ret;
3409
3410                 /* As unbinding may affect other elements in the
3411                  * obj->vma_list (due to side-effects from retiring
3412                  * an active vma), play safe and restart the iterator.
3413                  */
3414                 goto restart;
3415         }
3416
3417         /* We can reuse the existing drm_mm nodes but need to change the
3418          * cache-level on the PTE. We could simply unbind them all and
3419          * rebind with the correct cache-level on next use. However since
3420          * we already have a valid slot, dma mapping, pages etc, we may as
3421          * rewrite the PTE in the belief that doing so tramples upon less
3422          * state and so involves less work.
3423          */
3424         if (obj->bind_count) {
3425                 /* Before we change the PTE, the GPU must not be accessing it.
3426                  * If we wait upon the object, we know that all the bound
3427                  * VMA are no longer active.
3428                  */
3429                 ret = i915_gem_object_wait(obj,
3430                                            I915_WAIT_INTERRUPTIBLE |
3431                                            I915_WAIT_LOCKED |
3432                                            I915_WAIT_ALL,
3433                                            MAX_SCHEDULE_TIMEOUT,
3434                                            NULL);
3435                 if (ret)
3436                         return ret;
3437
3438                 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
3439                         /* Access to snoopable pages through the GTT is
3440                          * incoherent and on some machines causes a hard
3441                          * lockup. Relinquish the CPU mmaping to force
3442                          * userspace to refault in the pages and we can
3443                          * then double check if the GTT mapping is still
3444                          * valid for that pointer access.
3445                          */
3446                         i915_gem_release_mmap(obj);
3447
3448                         /* As we no longer need a fence for GTT access,
3449                          * we can relinquish it now (and so prevent having
3450                          * to steal a fence from someone else on the next
3451                          * fence request). Note GPU activity would have
3452                          * dropped the fence as all snoopable access is
3453                          * supposed to be linear.
3454                          */
3455                         list_for_each_entry(vma, &obj->vma_list, obj_link) {
3456                                 ret = i915_vma_put_fence(vma);
3457                                 if (ret)
3458                                         return ret;
3459                         }
3460                 } else {
3461                         /* We either have incoherent backing store and
3462                          * so no GTT access or the architecture is fully
3463                          * coherent. In such cases, existing GTT mmaps
3464                          * ignore the cache bit in the PTE and we can
3465                          * rewrite it without confusing the GPU or having
3466                          * to force userspace to fault back in its mmaps.
3467                          */
3468                 }
3469
3470                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3471                         if (!drm_mm_node_allocated(&vma->node))
3472                                 continue;
3473
3474                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3475                         if (ret)
3476                                 return ret;
3477                 }
3478         }
3479
3480         list_for_each_entry(vma, &obj->vma_list, obj_link)
3481                 vma->node.color = cache_level;
3482         obj->cache_level = cache_level;
3483
3484 out:
3485         /* Flush the dirty CPU caches to the backing storage so that the
3486          * object is now coherent at its new cache level (with respect
3487          * to the access domain).
3488          */
3489         if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
3490                 if (i915_gem_clflush_object(obj, true))
3491                         i915_gem_chipset_flush(to_i915(obj->base.dev));
3492         }
3493
3494         return 0;
3495 }
3496
3497 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3498                                struct drm_file *file)
3499 {
3500         struct drm_i915_gem_caching *args = data;
3501         struct drm_i915_gem_object *obj;
3502         int err = 0;
3503
3504         rcu_read_lock();
3505         obj = i915_gem_object_lookup_rcu(file, args->handle);
3506         if (!obj) {
3507                 err = -ENOENT;
3508                 goto out;
3509         }
3510
3511         switch (obj->cache_level) {
3512         case I915_CACHE_LLC:
3513         case I915_CACHE_L3_LLC:
3514                 args->caching = I915_CACHING_CACHED;
3515                 break;
3516
3517         case I915_CACHE_WT:
3518                 args->caching = I915_CACHING_DISPLAY;
3519                 break;
3520
3521         default:
3522                 args->caching = I915_CACHING_NONE;
3523                 break;
3524         }
3525 out:
3526         rcu_read_unlock();
3527         return err;
3528 }
3529
3530 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3531                                struct drm_file *file)
3532 {
3533         struct drm_i915_private *i915 = to_i915(dev);
3534         struct drm_i915_gem_caching *args = data;
3535         struct drm_i915_gem_object *obj;
3536         enum i915_cache_level level;
3537         int ret;
3538
3539         switch (args->caching) {
3540         case I915_CACHING_NONE:
3541                 level = I915_CACHE_NONE;
3542                 break;
3543         case I915_CACHING_CACHED:
3544                 /*
3545                  * Due to a HW issue on BXT A stepping, GPU stores via a
3546                  * snooped mapping may leave stale data in a corresponding CPU
3547                  * cacheline, whereas normally such cachelines would get
3548                  * invalidated.
3549                  */
3550                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3551                         return -ENODEV;
3552
3553                 level = I915_CACHE_LLC;
3554                 break;
3555         case I915_CACHING_DISPLAY:
3556                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3557                 break;
3558         default:
3559                 return -EINVAL;
3560         }
3561
3562         ret = i915_mutex_lock_interruptible(dev);
3563         if (ret)
3564                 return ret;
3565
3566         obj = i915_gem_object_lookup(file, args->handle);
3567         if (!obj) {
3568                 ret = -ENOENT;
3569                 goto unlock;
3570         }
3571
3572         ret = i915_gem_object_set_cache_level(obj, level);
3573         i915_gem_object_put(obj);
3574 unlock:
3575         mutex_unlock(&dev->struct_mutex);
3576         return ret;
3577 }
3578
3579 /*
3580  * Prepare buffer for display plane (scanout, cursors, etc).
3581  * Can be called from an uninterruptible phase (modesetting) and allows
3582  * any flushes to be pipelined (for pageflips).
3583  */
3584 struct i915_vma *
3585 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3586                                      u32 alignment,
3587                                      const struct i915_ggtt_view *view)
3588 {
3589         struct i915_vma *vma;
3590         u32 old_read_domains, old_write_domain;
3591         int ret;
3592
3593         lockdep_assert_held(&obj->base.dev->struct_mutex);
3594
3595         /* Mark the pin_display early so that we account for the
3596          * display coherency whilst setting up the cache domains.
3597          */
3598         obj->pin_display++;
3599
3600         /* The display engine is not coherent with the LLC cache on gen6.  As
3601          * a result, we make sure that the pinning that is about to occur is
3602          * done with uncached PTEs. This is lowest common denominator for all
3603          * chipsets.
3604          *
3605          * However for gen6+, we could do better by using the GFDT bit instead
3606          * of uncaching, which would allow us to flush all the LLC-cached data
3607          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3608          */
3609         ret = i915_gem_object_set_cache_level(obj,
3610                                               HAS_WT(to_i915(obj->base.dev)) ?
3611                                               I915_CACHE_WT : I915_CACHE_NONE);
3612         if (ret) {
3613                 vma = ERR_PTR(ret);
3614                 goto err_unpin_display;
3615         }
3616
3617         /* As the user may map the buffer once pinned in the display plane
3618          * (e.g. libkms for the bootup splash), we have to ensure that we
3619          * always use map_and_fenceable for all scanout buffers. However,
3620          * it may simply be too big to fit into mappable, in which case
3621          * put it anyway and hope that userspace can cope (but always first
3622          * try to preserve the existing ABI).
3623          */
3624         vma = ERR_PTR(-ENOSPC);
3625         if (view->type == I915_GGTT_VIEW_NORMAL)
3626                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3627                                                PIN_MAPPABLE | PIN_NONBLOCK);
3628         if (IS_ERR(vma)) {
3629                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
3630                 unsigned int flags;
3631
3632                 /* Valleyview is definitely limited to scanning out the first
3633                  * 512MiB. Lets presume this behaviour was inherited from the
3634                  * g4x display engine and that all earlier gen are similarly
3635                  * limited. Testing suggests that it is a little more
3636                  * complicated than this. For example, Cherryview appears quite
3637                  * happy to scanout from anywhere within its global aperture.
3638                  */
3639                 flags = 0;
3640                 if (HAS_GMCH_DISPLAY(i915))
3641                         flags = PIN_MAPPABLE;
3642                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3643         }
3644         if (IS_ERR(vma))
3645                 goto err_unpin_display;
3646
3647         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3648
3649         i915_gem_object_flush_cpu_write_domain(obj);
3650
3651         old_write_domain = obj->base.write_domain;
3652         old_read_domains = obj->base.read_domains;
3653
3654         /* It should now be out of any other write domains, and we can update
3655          * the domain values for our changes.
3656          */
3657         obj->base.write_domain = 0;
3658         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3659
3660         trace_i915_gem_object_change_domain(obj,
3661                                             old_read_domains,
3662                                             old_write_domain);
3663
3664         return vma;
3665
3666 err_unpin_display:
3667         obj->pin_display--;
3668         return vma;
3669 }
3670
3671 void
3672 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3673 {
3674         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3675
3676         if (WARN_ON(vma->obj->pin_display == 0))
3677                 return;
3678
3679         if (--vma->obj->pin_display == 0)
3680                 vma->display_alignment = 0;
3681
3682         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3683         if (!i915_vma_is_active(vma))
3684                 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3685
3686         i915_vma_unpin(vma);
3687 }
3688
3689 /**
3690  * Moves a single object to the CPU read, and possibly write domain.
3691  * @obj: object to act on
3692  * @write: requesting write or read-only access
3693  *
3694  * This function returns when the move is complete, including waiting on
3695  * flushes to occur.
3696  */
3697 int
3698 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3699 {
3700         uint32_t old_write_domain, old_read_domains;
3701         int ret;
3702
3703         lockdep_assert_held(&obj->base.dev->struct_mutex);
3704
3705         ret = i915_gem_object_wait(obj,
3706                                    I915_WAIT_INTERRUPTIBLE |
3707                                    I915_WAIT_LOCKED |
3708                                    (write ? I915_WAIT_ALL : 0),
3709                                    MAX_SCHEDULE_TIMEOUT,
3710                                    NULL);
3711         if (ret)
3712                 return ret;
3713
3714         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3715                 return 0;
3716
3717         i915_gem_object_flush_gtt_write_domain(obj);
3718
3719         old_write_domain = obj->base.write_domain;
3720         old_read_domains = obj->base.read_domains;
3721
3722         /* Flush the CPU cache if it's still invalid. */
3723         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3724                 i915_gem_clflush_object(obj, false);
3725
3726                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3727         }
3728
3729         /* It should now be out of any other write domains, and we can update
3730          * the domain values for our changes.
3731          */
3732         GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3733
3734         /* If we're writing through the CPU, then the GPU read domains will
3735          * need to be invalidated at next use.
3736          */
3737         if (write) {
3738                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3739                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3740         }
3741
3742         trace_i915_gem_object_change_domain(obj,
3743                                             old_read_domains,
3744                                             old_write_domain);
3745
3746         return 0;
3747 }
3748
3749 /* Throttle our rendering by waiting until the ring has completed our requests
3750  * emitted over 20 msec ago.
3751  *
3752  * Note that if we were to use the current jiffies each time around the loop,
3753  * we wouldn't escape the function with any frames outstanding if the time to
3754  * render a frame was over 20ms.
3755  *
3756  * This should get us reasonable parallelism between CPU and GPU but also
3757  * relatively low latency when blocking on a particular request to finish.
3758  */
3759 static int
3760 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3761 {
3762         struct drm_i915_private *dev_priv = to_i915(dev);
3763         struct drm_i915_file_private *file_priv = file->driver_priv;
3764         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3765         struct drm_i915_gem_request *request, *target = NULL;
3766         long ret;
3767
3768         /* ABI: return -EIO if already wedged */
3769         if (i915_terminally_wedged(&dev_priv->gpu_error))
3770                 return -EIO;
3771
3772         spin_lock(&file_priv->mm.lock);
3773         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3774                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3775                         break;
3776
3777                 /*
3778                  * Note that the request might not have been submitted yet.
3779                  * In which case emitted_jiffies will be zero.
3780                  */
3781                 if (!request->emitted_jiffies)
3782                         continue;
3783
3784                 target = request;
3785         }
3786         if (target)
3787                 i915_gem_request_get(target);
3788         spin_unlock(&file_priv->mm.lock);
3789
3790         if (target == NULL)
3791                 return 0;
3792
3793         ret = i915_wait_request(target,
3794                                 I915_WAIT_INTERRUPTIBLE,
3795                                 MAX_SCHEDULE_TIMEOUT);
3796         i915_gem_request_put(target);
3797
3798         return ret < 0 ? ret : 0;
3799 }
3800
3801 static bool
3802 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
3803 {
3804         if (!drm_mm_node_allocated(&vma->node))
3805                 return false;
3806
3807         if (vma->node.size < size)
3808                 return true;
3809
3810         if (alignment && vma->node.start & (alignment - 1))
3811                 return true;
3812
3813         if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
3814                 return true;
3815
3816         if (flags & PIN_OFFSET_BIAS &&
3817             vma->node.start < (flags & PIN_OFFSET_MASK))
3818                 return true;
3819
3820         if (flags & PIN_OFFSET_FIXED &&
3821             vma->node.start != (flags & PIN_OFFSET_MASK))
3822                 return true;
3823
3824         return false;
3825 }
3826
3827 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3828 {
3829         struct drm_i915_gem_object *obj = vma->obj;
3830         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3831         bool mappable, fenceable;
3832         u32 fence_size, fence_alignment;
3833
3834         fence_size = i915_gem_get_ggtt_size(dev_priv,
3835                                             vma->size,
3836                                             i915_gem_object_get_tiling(obj));
3837         fence_alignment = i915_gem_get_ggtt_alignment(dev_priv,
3838                                                       vma->size,
3839                                                       i915_gem_object_get_tiling(obj),
3840                                                       true);
3841
3842         fenceable = (vma->node.size == fence_size &&
3843                      (vma->node.start & (fence_alignment - 1)) == 0);
3844
3845         mappable = (vma->node.start + fence_size <=
3846                     dev_priv->ggtt.mappable_end);
3847
3848         /*
3849          * Explicitly disable for rotated VMA since the display does not
3850          * need the fence and the VMA is not accessible to other users.
3851          */
3852         if (mappable && fenceable &&
3853             vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
3854                 vma->flags |= I915_VMA_CAN_FENCE;
3855         else
3856                 vma->flags &= ~I915_VMA_CAN_FENCE;
3857 }
3858
3859 int __i915_vma_do_pin(struct i915_vma *vma,
3860                       u64 size, u64 alignment, u64 flags)
3861 {
3862         unsigned int bound = vma->flags;
3863         int ret;
3864
3865         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3866         GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
3867         GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
3868
3869         if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
3870                 ret = -EBUSY;
3871                 goto err;
3872         }
3873
3874         if ((bound & I915_VMA_BIND_MASK) == 0) {
3875                 ret = i915_vma_insert(vma, size, alignment, flags);
3876                 if (ret)
3877                         goto err;
3878         }
3879
3880         ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
3881         if (ret)
3882                 goto err;
3883
3884         if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
3885                 __i915_vma_set_map_and_fenceable(vma);
3886
3887         GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
3888         return 0;
3889
3890 err:
3891         __i915_vma_unpin(vma);
3892         return ret;
3893 }
3894
3895 struct i915_vma *
3896 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3897                          const struct i915_ggtt_view *view,
3898                          u64 size,
3899                          u64 alignment,
3900                          u64 flags)
3901 {
3902         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3903         struct i915_address_space *vm = &dev_priv->ggtt.base;
3904         struct i915_vma *vma;
3905         int ret;
3906
3907         lockdep_assert_held(&obj->base.dev->struct_mutex);
3908
3909         vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
3910         if (IS_ERR(vma))
3911                 return vma;
3912
3913         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3914                 if (flags & PIN_NONBLOCK &&
3915                     (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3916                         return ERR_PTR(-ENOSPC);
3917
3918                 if (flags & PIN_MAPPABLE) {
3919                         u32 fence_size;
3920
3921                         fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
3922                                                             i915_gem_object_get_tiling(obj));
3923                         /* If the required space is larger than the available
3924                          * aperture, we will not able to find a slot for the
3925                          * object and unbinding the object now will be in
3926                          * vain. Worse, doing so may cause us to ping-pong
3927                          * the object in and out of the Global GTT and
3928                          * waste a lot of cycles under the mutex.
3929                          */
3930                         if (fence_size > dev_priv->ggtt.mappable_end)
3931                                 return ERR_PTR(-E2BIG);
3932
3933                         /* If NONBLOCK is set the caller is optimistically
3934                          * trying to cache the full object within the mappable
3935                          * aperture, and *must* have a fallback in place for
3936                          * situations where we cannot bind the object. We
3937                          * can be a little more lax here and use the fallback
3938                          * more often to avoid costly migrations of ourselves
3939                          * and other objects within the aperture.
3940                          *
3941                          * Half-the-aperture is used as a simple heuristic.
3942                          * More interesting would to do search for a free
3943                          * block prior to making the commitment to unbind.
3944                          * That caters for the self-harm case, and with a
3945                          * little more heuristics (e.g. NOFAULT, NOEVICT)
3946                          * we could try to minimise harm to others.
3947                          */
3948                         if (flags & PIN_NONBLOCK &&
3949                             fence_size > dev_priv->ggtt.mappable_end / 2)
3950                                 return ERR_PTR(-ENOSPC);
3951                 }
3952
3953                 WARN(i915_vma_is_pinned(vma),
3954                      "bo is already pinned in ggtt with incorrect alignment:"
3955                      " offset=%08x, req.alignment=%llx,"
3956                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3957                      i915_ggtt_offset(vma), alignment,
3958                      !!(flags & PIN_MAPPABLE),
3959                      i915_vma_is_map_and_fenceable(vma));
3960                 ret = i915_vma_unbind(vma);
3961                 if (ret)
3962                         return ERR_PTR(ret);
3963         }
3964
3965         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3966         if (ret)
3967                 return ERR_PTR(ret);
3968
3969         return vma;
3970 }
3971
3972 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3973 {
3974         /* Note that we could alias engines in the execbuf API, but
3975          * that would be very unwise as it prevents userspace from
3976          * fine control over engine selection. Ahem.
3977          *
3978          * This should be something like EXEC_MAX_ENGINE instead of
3979          * I915_NUM_ENGINES.
3980          */
3981         BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3982         return 0x10000 << id;
3983 }
3984
3985 static __always_inline unsigned int __busy_write_id(unsigned int id)
3986 {
3987         /* The uABI guarantees an active writer is also amongst the read
3988          * engines. This would be true if we accessed the activity tracking
3989          * under the lock, but as we perform the lookup of the object and
3990          * its activity locklessly we can not guarantee that the last_write
3991          * being active implies that we have set the same engine flag from
3992          * last_read - hence we always set both read and write busy for
3993          * last_write.
3994          */
3995         return id | __busy_read_flag(id);
3996 }
3997
3998 static __always_inline unsigned int
3999 __busy_set_if_active(const struct dma_fence *fence,
4000                      unsigned int (*flag)(unsigned int id))
4001 {
4002         struct drm_i915_gem_request *rq;
4003
4004         /* We have to check the current hw status of the fence as the uABI
4005          * guarantees forward progress. We could rely on the idle worker
4006          * to eventually flush us, but to minimise latency just ask the
4007          * hardware.
4008          *
4009          * Note we only report on the status of native fences.
4010          */
4011         if (!dma_fence_is_i915(fence))
4012                 return 0;
4013
4014         /* opencode to_request() in order to avoid const warnings */
4015         rq = container_of(fence, struct drm_i915_gem_request, fence);
4016         if (i915_gem_request_completed(rq))
4017                 return 0;
4018
4019         return flag(rq->engine->exec_id);
4020 }
4021
4022 static __always_inline unsigned int
4023 busy_check_reader(const struct dma_fence *fence)
4024 {
4025         return __busy_set_if_active(fence, __busy_read_flag);
4026 }
4027
4028 static __always_inline unsigned int
4029 busy_check_writer(const struct dma_fence *fence)
4030 {
4031         if (!fence)
4032                 return 0;
4033
4034         return __busy_set_if_active(fence, __busy_write_id);
4035 }
4036
4037 int
4038 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4039                     struct drm_file *file)
4040 {
4041         struct drm_i915_gem_busy *args = data;
4042         struct drm_i915_gem_object *obj;
4043         struct reservation_object_list *list;
4044         unsigned int seq;
4045         int err;
4046
4047         err = -ENOENT;
4048         rcu_read_lock();
4049         obj = i915_gem_object_lookup_rcu(file, args->handle);
4050         if (!obj)
4051                 goto out;
4052
4053         /* A discrepancy here is that we do not report the status of
4054          * non-i915 fences, i.e. even though we may report the object as idle,
4055          * a call to set-domain may still stall waiting for foreign rendering.
4056          * This also means that wait-ioctl may report an object as busy,
4057          * where busy-ioctl considers it idle.
4058          *
4059          * We trade the ability to warn of foreign fences to report on which
4060          * i915 engines are active for the object.
4061          *
4062          * Alternatively, we can trade that extra information on read/write
4063          * activity with
4064          *      args->busy =
4065          *              !reservation_object_test_signaled_rcu(obj->resv, true);
4066          * to report the overall busyness. This is what the wait-ioctl does.
4067          *
4068          */
4069 retry:
4070         seq = raw_read_seqcount(&obj->resv->seq);
4071
4072         /* Translate the exclusive fence to the READ *and* WRITE engine */
4073         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
4074
4075         /* Translate shared fences to READ set of engines */
4076         list = rcu_dereference(obj->resv->fence);
4077         if (list) {
4078                 unsigned int shared_count = list->shared_count, i;
4079
4080                 for (i = 0; i < shared_count; ++i) {
4081                         struct dma_fence *fence =
4082                                 rcu_dereference(list->shared[i]);
4083
4084                         args->busy |= busy_check_reader(fence);
4085                 }
4086         }
4087
4088         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
4089                 goto retry;
4090
4091         err = 0;
4092 out:
4093         rcu_read_unlock();
4094         return err;
4095 }
4096
4097 int
4098 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4099                         struct drm_file *file_priv)
4100 {
4101         return i915_gem_ring_throttle(dev, file_priv);
4102 }
4103
4104 int
4105 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4106                        struct drm_file *file_priv)
4107 {
4108         struct drm_i915_private *dev_priv = to_i915(dev);
4109         struct drm_i915_gem_madvise *args = data;
4110         struct drm_i915_gem_object *obj;
4111         int err;
4112
4113         switch (args->madv) {
4114         case I915_MADV_DONTNEED:
4115         case I915_MADV_WILLNEED:
4116             break;
4117         default:
4118             return -EINVAL;
4119         }
4120
4121         obj = i915_gem_object_lookup(file_priv, args->handle);
4122         if (!obj)
4123                 return -ENOENT;
4124
4125         err = mutex_lock_interruptible(&obj->mm.lock);
4126         if (err)
4127                 goto out;
4128
4129         if (obj->mm.pages &&
4130             i915_gem_object_is_tiled(obj) &&
4131             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4132                 if (obj->mm.madv == I915_MADV_WILLNEED) {
4133                         GEM_BUG_ON(!obj->mm.quirked);
4134                         __i915_gem_object_unpin_pages(obj);
4135                         obj->mm.quirked = false;
4136                 }
4137                 if (args->madv == I915_MADV_WILLNEED) {
4138                         GEM_BUG_ON(obj->mm.quirked);
4139                         __i915_gem_object_pin_pages(obj);
4140                         obj->mm.quirked = true;
4141                 }
4142         }
4143
4144         if (obj->mm.madv != __I915_MADV_PURGED)
4145                 obj->mm.madv = args->madv;
4146
4147         /* if the object is no longer attached, discard its backing storage */
4148         if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
4149                 i915_gem_object_truncate(obj);
4150
4151         args->retained = obj->mm.madv != __I915_MADV_PURGED;
4152         mutex_unlock(&obj->mm.lock);
4153
4154 out:
4155         i915_gem_object_put(obj);
4156         return err;
4157 }
4158
4159 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4160                           const struct drm_i915_gem_object_ops *ops)
4161 {
4162         mutex_init(&obj->mm.lock);
4163
4164         INIT_LIST_HEAD(&obj->global_link);
4165         INIT_LIST_HEAD(&obj->userfault_link);
4166         INIT_LIST_HEAD(&obj->obj_exec_link);
4167         INIT_LIST_HEAD(&obj->vma_list);
4168         INIT_LIST_HEAD(&obj->batch_pool_link);
4169
4170         obj->ops = ops;
4171
4172         reservation_object_init(&obj->__builtin_resv);
4173         obj->resv = &obj->__builtin_resv;
4174
4175         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4176
4177         obj->mm.madv = I915_MADV_WILLNEED;
4178         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4179         mutex_init(&obj->mm.get_page.lock);
4180
4181         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4182 }
4183
4184 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4185         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4186                  I915_GEM_OBJECT_IS_SHRINKABLE,
4187         .get_pages = i915_gem_object_get_pages_gtt,
4188         .put_pages = i915_gem_object_put_pages_gtt,
4189 };
4190
4191 /* Note we don't consider signbits :| */
4192 #define overflows_type(x, T) \
4193         (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
4194
4195 struct drm_i915_gem_object *
4196 i915_gem_object_create(struct drm_device *dev, u64 size)
4197 {
4198         struct drm_i915_private *dev_priv = to_i915(dev);
4199         struct drm_i915_gem_object *obj;
4200         struct address_space *mapping;
4201         gfp_t mask;
4202         int ret;
4203
4204         /* There is a prevalence of the assumption that we fit the object's
4205          * page count inside a 32bit _signed_ variable. Let's document this and
4206          * catch if we ever need to fix it. In the meantime, if you do spot
4207          * such a local variable, please consider fixing!
4208          */
4209         if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
4210                 return ERR_PTR(-E2BIG);
4211
4212         if (overflows_type(size, obj->base.size))
4213                 return ERR_PTR(-E2BIG);
4214
4215         obj = i915_gem_object_alloc(dev);
4216         if (obj == NULL)
4217                 return ERR_PTR(-ENOMEM);
4218
4219         ret = drm_gem_object_init(dev, &obj->base, size);
4220         if (ret)
4221                 goto fail;
4222
4223         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4224         if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
4225                 /* 965gm cannot relocate objects above 4GiB. */
4226                 mask &= ~__GFP_HIGHMEM;
4227                 mask |= __GFP_DMA32;
4228         }
4229
4230         mapping = obj->base.filp->f_mapping;
4231         mapping_set_gfp_mask(mapping, mask);
4232
4233         i915_gem_object_init(obj, &i915_gem_object_ops);
4234
4235         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4236         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4237
4238         if (HAS_LLC(dev)) {
4239                 /* On some devices, we can have the GPU use the LLC (the CPU
4240                  * cache) for about a 10% performance improvement
4241                  * compared to uncached.  Graphics requests other than
4242                  * display scanout are coherent with the CPU in
4243                  * accessing this cache.  This means in this mode we
4244                  * don't need to clflush on the CPU side, and on the
4245                  * GPU side we only need to flush internal caches to
4246                  * get data visible to the CPU.
4247                  *
4248                  * However, we maintain the display planes as UC, and so
4249                  * need to rebind when first used as such.
4250                  */
4251                 obj->cache_level = I915_CACHE_LLC;
4252         } else
4253                 obj->cache_level = I915_CACHE_NONE;
4254
4255         trace_i915_gem_object_create(obj);
4256
4257         return obj;
4258
4259 fail:
4260         i915_gem_object_free(obj);
4261         return ERR_PTR(ret);
4262 }
4263
4264 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4265 {
4266         /* If we are the last user of the backing storage (be it shmemfs
4267          * pages or stolen etc), we know that the pages are going to be
4268          * immediately released. In this case, we can then skip copying
4269          * back the contents from the GPU.
4270          */
4271
4272         if (obj->mm.madv != I915_MADV_WILLNEED)
4273                 return false;
4274
4275         if (obj->base.filp == NULL)
4276                 return true;
4277
4278         /* At first glance, this looks racy, but then again so would be
4279          * userspace racing mmap against close. However, the first external
4280          * reference to the filp can only be obtained through the
4281          * i915_gem_mmap_ioctl() which safeguards us against the user
4282          * acquiring such a reference whilst we are in the middle of
4283          * freeing the object.
4284          */
4285         return atomic_long_read(&obj->base.filp->f_count) == 1;
4286 }
4287
4288 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4289                                     struct llist_node *freed)
4290 {
4291         struct drm_i915_gem_object *obj, *on;
4292
4293         mutex_lock(&i915->drm.struct_mutex);
4294         intel_runtime_pm_get(i915);
4295         llist_for_each_entry(obj, freed, freed) {
4296                 struct i915_vma *vma, *vn;
4297
4298                 trace_i915_gem_object_destroy(obj);
4299
4300                 GEM_BUG_ON(i915_gem_object_is_active(obj));
4301                 list_for_each_entry_safe(vma, vn,
4302                                          &obj->vma_list, obj_link) {
4303                         GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4304                         GEM_BUG_ON(i915_vma_is_active(vma));
4305                         vma->flags &= ~I915_VMA_PIN_MASK;
4306                         i915_vma_close(vma);
4307                 }
4308                 GEM_BUG_ON(!list_empty(&obj->vma_list));
4309                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4310
4311                 list_del(&obj->global_link);
4312         }
4313         intel_runtime_pm_put(i915);
4314         mutex_unlock(&i915->drm.struct_mutex);
4315
4316         llist_for_each_entry_safe(obj, on, freed, freed) {
4317                 GEM_BUG_ON(obj->bind_count);
4318                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4319
4320                 if (obj->ops->release)
4321                         obj->ops->release(obj);
4322
4323                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4324                         atomic_set(&obj->mm.pages_pin_count, 0);
4325                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4326                 GEM_BUG_ON(obj->mm.pages);
4327
4328                 if (obj->base.import_attach)
4329                         drm_prime_gem_destroy(&obj->base, NULL);
4330
4331                 reservation_object_fini(&obj->__builtin_resv);
4332                 drm_gem_object_release(&obj->base);
4333                 i915_gem_info_remove_obj(i915, obj->base.size);
4334
4335                 kfree(obj->bit_17);
4336                 i915_gem_object_free(obj);
4337         }
4338 }
4339
4340 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4341 {
4342         struct llist_node *freed;
4343
4344         freed = llist_del_all(&i915->mm.free_list);
4345         if (unlikely(freed))
4346                 __i915_gem_free_objects(i915, freed);
4347 }
4348
4349 static void __i915_gem_free_work(struct work_struct *work)
4350 {
4351         struct drm_i915_private *i915 =
4352                 container_of(work, struct drm_i915_private, mm.free_work);
4353         struct llist_node *freed;
4354
4355         /* All file-owned VMA should have been released by this point through
4356          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4357          * However, the object may also be bound into the global GTT (e.g.
4358          * older GPUs without per-process support, or for direct access through
4359          * the GTT either for the user or for scanout). Those VMA still need to
4360          * unbound now.
4361          */
4362
4363         while ((freed = llist_del_all(&i915->mm.free_list)))
4364                 __i915_gem_free_objects(i915, freed);
4365 }
4366
4367 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4368 {
4369         struct drm_i915_gem_object *obj =
4370                 container_of(head, typeof(*obj), rcu);
4371         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4372
4373         /* We can't simply use call_rcu() from i915_gem_free_object()
4374          * as we need to block whilst unbinding, and the call_rcu
4375          * task may be called from softirq context. So we take a
4376          * detour through a worker.
4377          */
4378         if (llist_add(&obj->freed, &i915->mm.free_list))
4379                 schedule_work(&i915->mm.free_work);
4380 }
4381
4382 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4383 {
4384         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4385
4386         if (obj->mm.quirked)
4387                 __i915_gem_object_unpin_pages(obj);
4388
4389         if (discard_backing_storage(obj))
4390                 obj->mm.madv = I915_MADV_DONTNEED;
4391
4392         /* Before we free the object, make sure any pure RCU-only
4393          * read-side critical sections are complete, e.g.
4394          * i915_gem_busy_ioctl(). For the corresponding synchronized
4395          * lookup see i915_gem_object_lookup_rcu().
4396          */
4397         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4398 }
4399
4400 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4401 {
4402         lockdep_assert_held(&obj->base.dev->struct_mutex);
4403
4404         GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
4405         if (i915_gem_object_is_active(obj))
4406                 i915_gem_object_set_active_reference(obj);
4407         else
4408                 i915_gem_object_put(obj);
4409 }
4410
4411 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
4412 {
4413         struct intel_engine_cs *engine;
4414         enum intel_engine_id id;
4415
4416         for_each_engine(engine, dev_priv, id)
4417                 GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
4418 }
4419
4420 int i915_gem_suspend(struct drm_device *dev)
4421 {
4422         struct drm_i915_private *dev_priv = to_i915(dev);
4423         int ret;
4424
4425         intel_suspend_gt_powersave(dev_priv);
4426
4427         mutex_lock(&dev->struct_mutex);
4428
4429         /* We have to flush all the executing contexts to main memory so
4430          * that they can saved in the hibernation image. To ensure the last
4431          * context image is coherent, we have to switch away from it. That
4432          * leaves the dev_priv->kernel_context still active when
4433          * we actually suspend, and its image in memory may not match the GPU
4434          * state. Fortunately, the kernel_context is disposable and we do
4435          * not rely on its state.
4436          */
4437         ret = i915_gem_switch_to_kernel_context(dev_priv);
4438         if (ret)
4439                 goto err;
4440
4441         ret = i915_gem_wait_for_idle(dev_priv,
4442                                      I915_WAIT_INTERRUPTIBLE |
4443                                      I915_WAIT_LOCKED);
4444         if (ret)
4445                 goto err;
4446
4447         i915_gem_retire_requests(dev_priv);
4448         GEM_BUG_ON(dev_priv->gt.active_requests);
4449
4450         assert_kernel_context_is_current(dev_priv);
4451         i915_gem_context_lost(dev_priv);
4452         mutex_unlock(&dev->struct_mutex);
4453
4454         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4455         cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4456         flush_delayed_work(&dev_priv->gt.idle_work);
4457         flush_work(&dev_priv->mm.free_work);
4458
4459         /* Assert that we sucessfully flushed all the work and
4460          * reset the GPU back to its idle, low power state.
4461          */
4462         WARN_ON(dev_priv->gt.awake);
4463
4464         /*
4465          * Neither the BIOS, ourselves or any other kernel
4466          * expects the system to be in execlists mode on startup,
4467          * so we need to reset the GPU back to legacy mode. And the only
4468          * known way to disable logical contexts is through a GPU reset.
4469          *
4470          * So in order to leave the system in a known default configuration,
4471          * always reset the GPU upon unload and suspend. Afterwards we then
4472          * clean up the GEM state tracking, flushing off the requests and
4473          * leaving the system in a known idle state.
4474          *
4475          * Note that is of the upmost importance that the GPU is idle and
4476          * all stray writes are flushed *before* we dismantle the backing
4477          * storage for the pinned objects.
4478          *
4479          * However, since we are uncertain that resetting the GPU on older
4480          * machines is a good idea, we don't - just in case it leaves the
4481          * machine in an unusable condition.
4482          */
4483         if (HAS_HW_CONTEXTS(dev)) {
4484                 int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4485                 WARN_ON(reset && reset != -ENODEV);
4486         }
4487
4488         return 0;
4489
4490 err:
4491         mutex_unlock(&dev->struct_mutex);
4492         return ret;
4493 }
4494
4495 void i915_gem_resume(struct drm_device *dev)
4496 {
4497         struct drm_i915_private *dev_priv = to_i915(dev);
4498
4499         mutex_lock(&dev->struct_mutex);
4500         i915_gem_restore_gtt_mappings(dev);
4501
4502         /* As we didn't flush the kernel context before suspend, we cannot
4503          * guarantee that the context image is complete. So let's just reset
4504          * it and start again.
4505          */
4506         dev_priv->gt.resume(dev_priv);
4507
4508         mutex_unlock(&dev->struct_mutex);
4509 }
4510
4511 void i915_gem_init_swizzling(struct drm_device *dev)
4512 {
4513         struct drm_i915_private *dev_priv = to_i915(dev);
4514
4515         if (INTEL_INFO(dev)->gen < 5 ||
4516             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4517                 return;
4518
4519         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4520                                  DISP_TILE_SURFACE_SWIZZLING);
4521
4522         if (IS_GEN5(dev_priv))
4523                 return;
4524
4525         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4526         if (IS_GEN6(dev_priv))
4527                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4528         else if (IS_GEN7(dev_priv))
4529                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4530         else if (IS_GEN8(dev_priv))
4531                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4532         else
4533                 BUG();
4534 }
4535
4536 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4537 {
4538         I915_WRITE(RING_CTL(base), 0);
4539         I915_WRITE(RING_HEAD(base), 0);
4540         I915_WRITE(RING_TAIL(base), 0);
4541         I915_WRITE(RING_START(base), 0);
4542 }
4543
4544 static void init_unused_rings(struct drm_i915_private *dev_priv)
4545 {
4546         if (IS_I830(dev_priv)) {
4547                 init_unused_ring(dev_priv, PRB1_BASE);
4548                 init_unused_ring(dev_priv, SRB0_BASE);
4549                 init_unused_ring(dev_priv, SRB1_BASE);
4550                 init_unused_ring(dev_priv, SRB2_BASE);
4551                 init_unused_ring(dev_priv, SRB3_BASE);
4552         } else if (IS_GEN2(dev_priv)) {
4553                 init_unused_ring(dev_priv, SRB0_BASE);
4554                 init_unused_ring(dev_priv, SRB1_BASE);
4555         } else if (IS_GEN3(dev_priv)) {
4556                 init_unused_ring(dev_priv, PRB1_BASE);
4557                 init_unused_ring(dev_priv, PRB2_BASE);
4558         }
4559 }
4560
4561 int
4562 i915_gem_init_hw(struct drm_device *dev)
4563 {
4564         struct drm_i915_private *dev_priv = to_i915(dev);
4565         struct intel_engine_cs *engine;
4566         enum intel_engine_id id;
4567         int ret;
4568
4569         dev_priv->gt.last_init_time = ktime_get();
4570
4571         /* Double layer security blanket, see i915_gem_init() */
4572         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4573
4574         if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
4575                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4576
4577         if (IS_HASWELL(dev_priv))
4578                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4579                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4580
4581         if (HAS_PCH_NOP(dev_priv)) {
4582                 if (IS_IVYBRIDGE(dev_priv)) {
4583                         u32 temp = I915_READ(GEN7_MSG_CTL);
4584                         temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4585                         I915_WRITE(GEN7_MSG_CTL, temp);
4586                 } else if (INTEL_INFO(dev)->gen >= 7) {
4587                         u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4588                         temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4589                         I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4590                 }
4591         }
4592
4593         i915_gem_init_swizzling(dev);
4594
4595         /*
4596          * At least 830 can leave some of the unused rings
4597          * "active" (ie. head != tail) after resume which
4598          * will prevent c3 entry. Makes sure all unused rings
4599          * are totally idle.
4600          */
4601         init_unused_rings(dev_priv);
4602
4603         BUG_ON(!dev_priv->kernel_context);
4604
4605         ret = i915_ppgtt_init_hw(dev);
4606         if (ret) {
4607                 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4608                 goto out;
4609         }
4610
4611         /* Need to do basic initialisation of all rings first: */
4612         for_each_engine(engine, dev_priv, id) {
4613                 ret = engine->init_hw(engine);
4614                 if (ret)
4615                         goto out;
4616         }
4617
4618         intel_mocs_init_l3cc_table(dev);
4619
4620         /* We can't enable contexts until all firmware is loaded */
4621         ret = intel_guc_setup(dev);
4622         if (ret)
4623                 goto out;
4624
4625 out:
4626         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4627         return ret;
4628 }
4629
4630 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4631 {
4632         if (INTEL_INFO(dev_priv)->gen < 6)
4633                 return false;
4634
4635         /* TODO: make semaphores and Execlists play nicely together */
4636         if (i915.enable_execlists)
4637                 return false;
4638
4639         if (value >= 0)
4640                 return value;
4641
4642 #ifdef CONFIG_INTEL_IOMMU
4643         /* Enable semaphores on SNB when IO remapping is off */
4644         if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4645                 return false;
4646 #endif
4647
4648         return true;
4649 }
4650
4651 int i915_gem_init(struct drm_device *dev)
4652 {
4653         struct drm_i915_private *dev_priv = to_i915(dev);
4654         int ret;
4655
4656         mutex_lock(&dev->struct_mutex);
4657
4658         if (!i915.enable_execlists) {
4659                 dev_priv->gt.resume = intel_legacy_submission_resume;
4660                 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4661         } else {
4662                 dev_priv->gt.resume = intel_lr_context_resume;
4663                 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4664         }
4665
4666         /* This is just a security blanket to placate dragons.
4667          * On some systems, we very sporadically observe that the first TLBs
4668          * used by the CS may be stale, despite us poking the TLB reset. If
4669          * we hold the forcewake during initialisation these problems
4670          * just magically go away.
4671          */
4672         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4673
4674         i915_gem_init_userptr(dev_priv);
4675
4676         ret = i915_gem_init_ggtt(dev_priv);
4677         if (ret)
4678                 goto out_unlock;
4679
4680         ret = i915_gem_context_init(dev);
4681         if (ret)
4682                 goto out_unlock;
4683
4684         ret = intel_engines_init(dev);
4685         if (ret)
4686                 goto out_unlock;
4687
4688         ret = i915_gem_init_hw(dev);
4689         if (ret == -EIO) {
4690                 /* Allow engine initialisation to fail by marking the GPU as
4691                  * wedged. But we only want to do this where the GPU is angry,
4692                  * for all other failure, such as an allocation failure, bail.
4693                  */
4694                 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4695                 i915_gem_set_wedged(dev_priv);
4696                 ret = 0;
4697         }
4698
4699 out_unlock:
4700         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4701         mutex_unlock(&dev->struct_mutex);
4702
4703         return ret;
4704 }
4705
4706 void
4707 i915_gem_cleanup_engines(struct drm_device *dev)
4708 {
4709         struct drm_i915_private *dev_priv = to_i915(dev);
4710         struct intel_engine_cs *engine;
4711         enum intel_engine_id id;
4712
4713         for_each_engine(engine, dev_priv, id)
4714                 dev_priv->gt.cleanup_engine(engine);
4715 }
4716
4717 void
4718 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4719 {
4720         struct drm_device *dev = &dev_priv->drm;
4721         int i;
4722
4723         if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4724             !IS_CHERRYVIEW(dev_priv))
4725                 dev_priv->num_fence_regs = 32;
4726         else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4727                  IS_I945GM(dev_priv) || IS_G33(dev_priv))
4728                 dev_priv->num_fence_regs = 16;
4729         else
4730                 dev_priv->num_fence_regs = 8;
4731
4732         if (intel_vgpu_active(dev_priv))
4733                 dev_priv->num_fence_regs =
4734                                 I915_READ(vgtif_reg(avail_rs.fence_num));
4735
4736         /* Initialize fence registers to zero */
4737         for (i = 0; i < dev_priv->num_fence_regs; i++) {
4738                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4739
4740                 fence->i915 = dev_priv;
4741                 fence->id = i;
4742                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4743         }
4744         i915_gem_restore_fences(dev);
4745
4746         i915_gem_detect_bit_6_swizzle(dev);
4747 }
4748
4749 int
4750 i915_gem_load_init(struct drm_device *dev)
4751 {
4752         struct drm_i915_private *dev_priv = to_i915(dev);
4753         int err = -ENOMEM;
4754
4755         dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
4756         if (!dev_priv->objects)
4757                 goto err_out;
4758
4759         dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
4760         if (!dev_priv->vmas)
4761                 goto err_objects;
4762
4763         dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
4764                                         SLAB_HWCACHE_ALIGN |
4765                                         SLAB_RECLAIM_ACCOUNT |
4766                                         SLAB_DESTROY_BY_RCU);
4767         if (!dev_priv->requests)
4768                 goto err_vmas;
4769
4770         mutex_lock(&dev_priv->drm.struct_mutex);
4771         INIT_LIST_HEAD(&dev_priv->gt.timelines);
4772         err = i915_gem_timeline_init(dev_priv,
4773                                      &dev_priv->gt.global_timeline,
4774                                      "[execution]");
4775         mutex_unlock(&dev_priv->drm.struct_mutex);
4776         if (err)
4777                 goto err_requests;
4778
4779         INIT_LIST_HEAD(&dev_priv->context_list);
4780         INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
4781         init_llist_head(&dev_priv->mm.free_list);
4782         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4783         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4784         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4785         INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
4786         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4787                           i915_gem_retire_work_handler);
4788         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4789                           i915_gem_idle_work_handler);
4790         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4791         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4792
4793         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4794
4795         init_waitqueue_head(&dev_priv->pending_flip_queue);
4796
4797         dev_priv->mm.interruptible = true;
4798
4799         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4800
4801         spin_lock_init(&dev_priv->fb_tracking.lock);
4802
4803         return 0;
4804
4805 err_requests:
4806         kmem_cache_destroy(dev_priv->requests);
4807 err_vmas:
4808         kmem_cache_destroy(dev_priv->vmas);
4809 err_objects:
4810         kmem_cache_destroy(dev_priv->objects);
4811 err_out:
4812         return err;
4813 }
4814
4815 void i915_gem_load_cleanup(struct drm_device *dev)
4816 {
4817         struct drm_i915_private *dev_priv = to_i915(dev);
4818
4819         WARN_ON(!llist_empty(&dev_priv->mm.free_list));
4820
4821         kmem_cache_destroy(dev_priv->requests);
4822         kmem_cache_destroy(dev_priv->vmas);
4823         kmem_cache_destroy(dev_priv->objects);
4824
4825         /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4826         rcu_barrier();
4827 }
4828
4829 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4830 {
4831         intel_runtime_pm_get(dev_priv);
4832
4833         mutex_lock(&dev_priv->drm.struct_mutex);
4834         i915_gem_shrink_all(dev_priv);
4835         mutex_unlock(&dev_priv->drm.struct_mutex);
4836
4837         intel_runtime_pm_put(dev_priv);
4838
4839         return 0;
4840 }
4841
4842 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4843 {
4844         struct drm_i915_gem_object *obj;
4845         struct list_head *phases[] = {
4846                 &dev_priv->mm.unbound_list,
4847                 &dev_priv->mm.bound_list,
4848                 NULL
4849         }, **p;
4850
4851         /* Called just before we write the hibernation image.
4852          *
4853          * We need to update the domain tracking to reflect that the CPU
4854          * will be accessing all the pages to create and restore from the
4855          * hibernation, and so upon restoration those pages will be in the
4856          * CPU domain.
4857          *
4858          * To make sure the hibernation image contains the latest state,
4859          * we update that state just before writing out the image.
4860          *
4861          * To try and reduce the hibernation image, we manually shrink
4862          * the objects as well.
4863          */
4864
4865         mutex_lock(&dev_priv->drm.struct_mutex);
4866         i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4867
4868         for (p = phases; *p; p++) {
4869                 list_for_each_entry(obj, *p, global_link) {
4870                         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4871                         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4872                 }
4873         }
4874         mutex_unlock(&dev_priv->drm.struct_mutex);
4875
4876         return 0;
4877 }
4878
4879 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4880 {
4881         struct drm_i915_file_private *file_priv = file->driver_priv;
4882         struct drm_i915_gem_request *request;
4883
4884         /* Clean up our request list when the client is going away, so that
4885          * later retire_requests won't dereference our soon-to-be-gone
4886          * file_priv.
4887          */
4888         spin_lock(&file_priv->mm.lock);
4889         list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4890                 request->file_priv = NULL;
4891         spin_unlock(&file_priv->mm.lock);
4892
4893         if (!list_empty(&file_priv->rps.link)) {
4894                 spin_lock(&to_i915(dev)->rps.client_lock);
4895                 list_del(&file_priv->rps.link);
4896                 spin_unlock(&to_i915(dev)->rps.client_lock);
4897         }
4898 }
4899
4900 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4901 {
4902         struct drm_i915_file_private *file_priv;
4903         int ret;
4904
4905         DRM_DEBUG_DRIVER("\n");
4906
4907         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4908         if (!file_priv)
4909                 return -ENOMEM;
4910
4911         file->driver_priv = file_priv;
4912         file_priv->dev_priv = to_i915(dev);
4913         file_priv->file = file;
4914         INIT_LIST_HEAD(&file_priv->rps.link);
4915
4916         spin_lock_init(&file_priv->mm.lock);
4917         INIT_LIST_HEAD(&file_priv->mm.request_list);
4918
4919         file_priv->bsd_engine = -1;
4920
4921         ret = i915_gem_context_open(dev, file);
4922         if (ret)
4923                 kfree(file_priv);
4924
4925         return ret;
4926 }
4927
4928 /**
4929  * i915_gem_track_fb - update frontbuffer tracking
4930  * @old: current GEM buffer for the frontbuffer slots
4931  * @new: new GEM buffer for the frontbuffer slots
4932  * @frontbuffer_bits: bitmask of frontbuffer slots
4933  *
4934  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4935  * from @old and setting them in @new. Both @old and @new can be NULL.
4936  */
4937 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4938                        struct drm_i915_gem_object *new,
4939                        unsigned frontbuffer_bits)
4940 {
4941         /* Control of individual bits within the mask are guarded by
4942          * the owning plane->mutex, i.e. we can never see concurrent
4943          * manipulation of individual bits. But since the bitfield as a whole
4944          * is updated using RMW, we need to use atomics in order to update
4945          * the bits.
4946          */
4947         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4948                      sizeof(atomic_t) * BITS_PER_BYTE);
4949
4950         if (old) {
4951                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4952                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4953         }
4954
4955         if (new) {
4956                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4957                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4958         }
4959 }
4960
4961 /* Allocate a new GEM object and fill it with the supplied data */
4962 struct drm_i915_gem_object *
4963 i915_gem_object_create_from_data(struct drm_device *dev,
4964                                  const void *data, size_t size)
4965 {
4966         struct drm_i915_gem_object *obj;
4967         struct sg_table *sg;
4968         size_t bytes;
4969         int ret;
4970
4971         obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
4972         if (IS_ERR(obj))
4973                 return obj;
4974
4975         ret = i915_gem_object_set_to_cpu_domain(obj, true);
4976         if (ret)
4977                 goto fail;
4978
4979         ret = i915_gem_object_pin_pages(obj);
4980         if (ret)
4981                 goto fail;
4982
4983         sg = obj->mm.pages;
4984         bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4985         obj->mm.dirty = true; /* Backing store is now out of date */
4986         i915_gem_object_unpin_pages(obj);
4987
4988         if (WARN_ON(bytes != size)) {
4989                 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4990                 ret = -EFAULT;
4991                 goto fail;
4992         }
4993
4994         return obj;
4995
4996 fail:
4997         i915_gem_object_put(obj);
4998         return ERR_PTR(ret);
4999 }
5000
5001 struct scatterlist *
5002 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5003                        unsigned int n,
5004                        unsigned int *offset)
5005 {
5006         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5007         struct scatterlist *sg;
5008         unsigned int idx, count;
5009
5010         might_sleep();
5011         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5012         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5013
5014         /* As we iterate forward through the sg, we record each entry in a
5015          * radixtree for quick repeated (backwards) lookups. If we have seen
5016          * this index previously, we will have an entry for it.
5017          *
5018          * Initial lookup is O(N), but this is amortized to O(1) for
5019          * sequential page access (where each new request is consecutive
5020          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5021          * i.e. O(1) with a large constant!
5022          */
5023         if (n < READ_ONCE(iter->sg_idx))
5024                 goto lookup;
5025
5026         mutex_lock(&iter->lock);
5027
5028         /* We prefer to reuse the last sg so that repeated lookup of this
5029          * (or the subsequent) sg are fast - comparing against the last
5030          * sg is faster than going through the radixtree.
5031          */
5032
5033         sg = iter->sg_pos;
5034         idx = iter->sg_idx;
5035         count = __sg_page_count(sg);
5036
5037         while (idx + count <= n) {
5038                 unsigned long exception, i;
5039                 int ret;
5040
5041                 /* If we cannot allocate and insert this entry, or the
5042                  * individual pages from this range, cancel updating the
5043                  * sg_idx so that on this lookup we are forced to linearly
5044                  * scan onwards, but on future lookups we will try the
5045                  * insertion again (in which case we need to be careful of
5046                  * the error return reporting that we have already inserted
5047                  * this index).
5048                  */
5049                 ret = radix_tree_insert(&iter->radix, idx, sg);
5050                 if (ret && ret != -EEXIST)
5051                         goto scan;
5052
5053                 exception =
5054                         RADIX_TREE_EXCEPTIONAL_ENTRY |
5055                         idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
5056                 for (i = 1; i < count; i++) {
5057                         ret = radix_tree_insert(&iter->radix, idx + i,
5058                                                 (void *)exception);
5059                         if (ret && ret != -EEXIST)
5060                                 goto scan;
5061                 }
5062
5063                 idx += count;
5064                 sg = ____sg_next(sg);
5065                 count = __sg_page_count(sg);
5066         }
5067
5068 scan:
5069         iter->sg_pos = sg;
5070         iter->sg_idx = idx;
5071
5072         mutex_unlock(&iter->lock);
5073
5074         if (unlikely(n < idx)) /* insertion completed by another thread */
5075                 goto lookup;
5076
5077         /* In case we failed to insert the entry into the radixtree, we need
5078          * to look beyond the current sg.
5079          */
5080         while (idx + count <= n) {
5081                 idx += count;
5082                 sg = ____sg_next(sg);
5083                 count = __sg_page_count(sg);
5084         }
5085
5086         *offset = n - idx;
5087         return sg;
5088
5089 lookup:
5090         rcu_read_lock();
5091
5092         sg = radix_tree_lookup(&iter->radix, n);
5093         GEM_BUG_ON(!sg);
5094
5095         /* If this index is in the middle of multi-page sg entry,
5096          * the radixtree will contain an exceptional entry that points
5097          * to the start of that range. We will return the pointer to
5098          * the base page and the offset of this page within the
5099          * sg entry's range.
5100          */
5101         *offset = 0;
5102         if (unlikely(radix_tree_exception(sg))) {
5103                 unsigned long base =
5104                         (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
5105
5106                 sg = radix_tree_lookup(&iter->radix, base);
5107                 GEM_BUG_ON(!sg);
5108
5109                 *offset = n - base;
5110         }
5111
5112         rcu_read_unlock();
5113
5114         return sg;
5115 }
5116
5117 struct page *
5118 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5119 {
5120         struct scatterlist *sg;
5121         unsigned int offset;
5122
5123         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5124
5125         sg = i915_gem_object_get_sg(obj, n, &offset);
5126         return nth_page(sg_page(sg), offset);
5127 }
5128
5129 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5130 struct page *
5131 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5132                                unsigned int n)
5133 {
5134         struct page *page;
5135
5136         page = i915_gem_object_get_page(obj, n);
5137         if (!obj->mm.dirty)
5138                 set_page_dirty(page);
5139
5140         return page;
5141 }
5142
5143 dma_addr_t
5144 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5145                                 unsigned long n)
5146 {
5147         struct scatterlist *sg;
5148         unsigned int offset;
5149
5150         sg = i915_gem_object_get_sg(obj, n, &offset);
5151         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5152 }