]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/i915_gem_execbuffer.c
drm: Advertise async page flip ability through GETCAP ioctl
[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35
36 struct eb_objects {
37         struct list_head objects;
38         int and;
39         union {
40                 struct drm_i915_gem_object *lut[0];
41                 struct hlist_head buckets[0];
42         };
43 };
44
45 static struct eb_objects *
46 eb_create(struct drm_i915_gem_execbuffer2 *args)
47 {
48         struct eb_objects *eb = NULL;
49
50         if (args->flags & I915_EXEC_HANDLE_LUT) {
51                 int size = args->buffer_count;
52                 size *= sizeof(struct drm_i915_gem_object *);
53                 size += sizeof(struct eb_objects);
54                 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
55         }
56
57         if (eb == NULL) {
58                 int size = args->buffer_count;
59                 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
60                 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
61                 while (count > 2*size)
62                         count >>= 1;
63                 eb = kzalloc(count*sizeof(struct hlist_head) +
64                              sizeof(struct eb_objects),
65                              GFP_TEMPORARY);
66                 if (eb == NULL)
67                         return eb;
68
69                 eb->and = count - 1;
70         } else
71                 eb->and = -args->buffer_count;
72
73         INIT_LIST_HEAD(&eb->objects);
74         return eb;
75 }
76
77 static void
78 eb_reset(struct eb_objects *eb)
79 {
80         if (eb->and >= 0)
81                 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
82 }
83
84 static int
85 eb_lookup_objects(struct eb_objects *eb,
86                   struct drm_i915_gem_exec_object2 *exec,
87                   const struct drm_i915_gem_execbuffer2 *args,
88                   struct drm_file *file)
89 {
90         int i;
91
92         spin_lock(&file->table_lock);
93         for (i = 0; i < args->buffer_count; i++) {
94                 struct drm_i915_gem_object *obj;
95
96                 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
97                 if (obj == NULL) {
98                         spin_unlock(&file->table_lock);
99                         DRM_DEBUG("Invalid object handle %d at index %d\n",
100                                    exec[i].handle, i);
101                         return -ENOENT;
102                 }
103
104                 if (!list_empty(&obj->exec_list)) {
105                         spin_unlock(&file->table_lock);
106                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
107                                    obj, exec[i].handle, i);
108                         return -EINVAL;
109                 }
110
111                 drm_gem_object_reference(&obj->base);
112                 list_add_tail(&obj->exec_list, &eb->objects);
113
114                 obj->exec_entry = &exec[i];
115                 if (eb->and < 0) {
116                         eb->lut[i] = obj;
117                 } else {
118                         uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
119                         obj->exec_handle = handle;
120                         hlist_add_head(&obj->exec_node,
121                                        &eb->buckets[handle & eb->and]);
122                 }
123         }
124         spin_unlock(&file->table_lock);
125
126         return 0;
127 }
128
129 static struct drm_i915_gem_object *
130 eb_get_object(struct eb_objects *eb, unsigned long handle)
131 {
132         if (eb->and < 0) {
133                 if (handle >= -eb->and)
134                         return NULL;
135                 return eb->lut[handle];
136         } else {
137                 struct hlist_head *head;
138                 struct hlist_node *node;
139
140                 head = &eb->buckets[handle & eb->and];
141                 hlist_for_each(node, head) {
142                         struct drm_i915_gem_object *obj;
143
144                         obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
145                         if (obj->exec_handle == handle)
146                                 return obj;
147                 }
148                 return NULL;
149         }
150 }
151
152 static void
153 eb_destroy(struct eb_objects *eb)
154 {
155         while (!list_empty(&eb->objects)) {
156                 struct drm_i915_gem_object *obj;
157
158                 obj = list_first_entry(&eb->objects,
159                                        struct drm_i915_gem_object,
160                                        exec_list);
161                 list_del_init(&obj->exec_list);
162                 drm_gem_object_unreference(&obj->base);
163         }
164         kfree(eb);
165 }
166
167 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
168 {
169         return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
170                 !obj->map_and_fenceable ||
171                 obj->cache_level != I915_CACHE_NONE);
172 }
173
174 static int
175 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
176                                    struct eb_objects *eb,
177                                    struct drm_i915_gem_relocation_entry *reloc,
178                                    struct i915_address_space *vm)
179 {
180         struct drm_device *dev = obj->base.dev;
181         struct drm_gem_object *target_obj;
182         struct drm_i915_gem_object *target_i915_obj;
183         uint32_t target_offset;
184         int ret = -EINVAL;
185
186         /* we've already hold a reference to all valid objects */
187         target_obj = &eb_get_object(eb, reloc->target_handle)->base;
188         if (unlikely(target_obj == NULL))
189                 return -ENOENT;
190
191         target_i915_obj = to_intel_bo(target_obj);
192         target_offset = i915_gem_obj_ggtt_offset(target_i915_obj);
193
194         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
195          * pipe_control writes because the gpu doesn't properly redirect them
196          * through the ppgtt for non_secure batchbuffers. */
197         if (unlikely(IS_GEN6(dev) &&
198             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
199             !target_i915_obj->has_global_gtt_mapping)) {
200                 i915_gem_gtt_bind_object(target_i915_obj,
201                                          target_i915_obj->cache_level);
202         }
203
204         /* Validate that the target is in a valid r/w GPU domain */
205         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
206                 DRM_DEBUG("reloc with multiple write domains: "
207                           "obj %p target %d offset %d "
208                           "read %08x write %08x",
209                           obj, reloc->target_handle,
210                           (int) reloc->offset,
211                           reloc->read_domains,
212                           reloc->write_domain);
213                 return ret;
214         }
215         if (unlikely((reloc->write_domain | reloc->read_domains)
216                      & ~I915_GEM_GPU_DOMAINS)) {
217                 DRM_DEBUG("reloc with read/write non-GPU domains: "
218                           "obj %p target %d offset %d "
219                           "read %08x write %08x",
220                           obj, reloc->target_handle,
221                           (int) reloc->offset,
222                           reloc->read_domains,
223                           reloc->write_domain);
224                 return ret;
225         }
226
227         target_obj->pending_read_domains |= reloc->read_domains;
228         target_obj->pending_write_domain |= reloc->write_domain;
229
230         /* If the relocation already has the right value in it, no
231          * more work needs to be done.
232          */
233         if (target_offset == reloc->presumed_offset)
234                 return 0;
235
236         /* Check that the relocation address is valid... */
237         if (unlikely(reloc->offset > obj->base.size - 4)) {
238                 DRM_DEBUG("Relocation beyond object bounds: "
239                           "obj %p target %d offset %d size %d.\n",
240                           obj, reloc->target_handle,
241                           (int) reloc->offset,
242                           (int) obj->base.size);
243                 return ret;
244         }
245         if (unlikely(reloc->offset & 3)) {
246                 DRM_DEBUG("Relocation not 4-byte aligned: "
247                           "obj %p target %d offset %d.\n",
248                           obj, reloc->target_handle,
249                           (int) reloc->offset);
250                 return ret;
251         }
252
253         /* We can't wait for rendering with pagefaults disabled */
254         if (obj->active && in_atomic())
255                 return -EFAULT;
256
257         reloc->delta += target_offset;
258         if (use_cpu_reloc(obj)) {
259                 uint32_t page_offset = offset_in_page(reloc->offset);
260                 char *vaddr;
261
262                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
263                 if (ret)
264                         return ret;
265
266                 vaddr = kmap_atomic(i915_gem_object_get_page(obj,
267                                                              reloc->offset >> PAGE_SHIFT));
268                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
269                 kunmap_atomic(vaddr);
270         } else {
271                 struct drm_i915_private *dev_priv = dev->dev_private;
272                 uint32_t __iomem *reloc_entry;
273                 void __iomem *reloc_page;
274
275                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
276                 if (ret)
277                         return ret;
278
279                 ret = i915_gem_object_put_fence(obj);
280                 if (ret)
281                         return ret;
282
283                 /* Map the page containing the relocation we're going to perform.  */
284                 reloc->offset += i915_gem_obj_ggtt_offset(obj);
285                 reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
286                                                       reloc->offset & PAGE_MASK);
287                 reloc_entry = (uint32_t __iomem *)
288                         (reloc_page + offset_in_page(reloc->offset));
289                 iowrite32(reloc->delta, reloc_entry);
290                 io_mapping_unmap_atomic(reloc_page);
291         }
292
293         /* and update the user's relocation entry */
294         reloc->presumed_offset = target_offset;
295
296         return 0;
297 }
298
299 static int
300 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
301                                     struct eb_objects *eb,
302                                     struct i915_address_space *vm)
303 {
304 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
305         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
306         struct drm_i915_gem_relocation_entry __user *user_relocs;
307         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
308         int remain, ret;
309
310         user_relocs = to_user_ptr(entry->relocs_ptr);
311
312         remain = entry->relocation_count;
313         while (remain) {
314                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
315                 int count = remain;
316                 if (count > ARRAY_SIZE(stack_reloc))
317                         count = ARRAY_SIZE(stack_reloc);
318                 remain -= count;
319
320                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
321                         return -EFAULT;
322
323                 do {
324                         u64 offset = r->presumed_offset;
325
326                         ret = i915_gem_execbuffer_relocate_entry(obj, eb, r,
327                                                                  vm);
328                         if (ret)
329                                 return ret;
330
331                         if (r->presumed_offset != offset &&
332                             __copy_to_user_inatomic(&user_relocs->presumed_offset,
333                                                     &r->presumed_offset,
334                                                     sizeof(r->presumed_offset))) {
335                                 return -EFAULT;
336                         }
337
338                         user_relocs++;
339                         r++;
340                 } while (--count);
341         }
342
343         return 0;
344 #undef N_RELOC
345 }
346
347 static int
348 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
349                                          struct eb_objects *eb,
350                                          struct drm_i915_gem_relocation_entry *relocs,
351                                          struct i915_address_space *vm)
352 {
353         const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
354         int i, ret;
355
356         for (i = 0; i < entry->relocation_count; i++) {
357                 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i],
358                                                          vm);
359                 if (ret)
360                         return ret;
361         }
362
363         return 0;
364 }
365
366 static int
367 i915_gem_execbuffer_relocate(struct eb_objects *eb,
368                              struct i915_address_space *vm)
369 {
370         struct drm_i915_gem_object *obj;
371         int ret = 0;
372
373         /* This is the fast path and we cannot handle a pagefault whilst
374          * holding the struct mutex lest the user pass in the relocations
375          * contained within a mmaped bo. For in such a case we, the page
376          * fault handler would call i915_gem_fault() and we would try to
377          * acquire the struct mutex again. Obviously this is bad and so
378          * lockdep complains vehemently.
379          */
380         pagefault_disable();
381         list_for_each_entry(obj, &eb->objects, exec_list) {
382                 ret = i915_gem_execbuffer_relocate_object(obj, eb, vm);
383                 if (ret)
384                         break;
385         }
386         pagefault_enable();
387
388         return ret;
389 }
390
391 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
392 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
393
394 static int
395 need_reloc_mappable(struct drm_i915_gem_object *obj)
396 {
397         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
398         return entry->relocation_count && !use_cpu_reloc(obj);
399 }
400
401 static int
402 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
403                                    struct intel_ring_buffer *ring,
404                                    struct i915_address_space *vm,
405                                    bool *need_reloc)
406 {
407         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
408         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
409         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
410         bool need_fence, need_mappable;
411         int ret;
412
413         need_fence =
414                 has_fenced_gpu_access &&
415                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
416                 obj->tiling_mode != I915_TILING_NONE;
417         need_mappable = need_fence || need_reloc_mappable(obj);
418
419         ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable,
420                                   false);
421         if (ret)
422                 return ret;
423
424         entry->flags |= __EXEC_OBJECT_HAS_PIN;
425
426         if (has_fenced_gpu_access) {
427                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
428                         ret = i915_gem_object_get_fence(obj);
429                         if (ret)
430                                 return ret;
431
432                         if (i915_gem_object_pin_fence(obj))
433                                 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
434
435                         obj->pending_fenced_gpu_access = true;
436                 }
437         }
438
439         /* Ensure ppgtt mapping exists if needed */
440         if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
441                 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
442                                        obj, obj->cache_level);
443
444                 obj->has_aliasing_ppgtt_mapping = 1;
445         }
446
447         if (entry->offset != i915_gem_obj_offset(obj, vm)) {
448                 entry->offset = i915_gem_obj_offset(obj, vm);
449                 *need_reloc = true;
450         }
451
452         if (entry->flags & EXEC_OBJECT_WRITE) {
453                 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
454                 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
455         }
456
457         if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
458             !obj->has_global_gtt_mapping)
459                 i915_gem_gtt_bind_object(obj, obj->cache_level);
460
461         return 0;
462 }
463
464 static void
465 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
466 {
467         struct drm_i915_gem_exec_object2 *entry;
468
469         if (!i915_gem_obj_bound_any(obj))
470                 return;
471
472         entry = obj->exec_entry;
473
474         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
475                 i915_gem_object_unpin_fence(obj);
476
477         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
478                 i915_gem_object_unpin(obj);
479
480         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
481 }
482
483 static int
484 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
485                             struct list_head *objects,
486                             struct i915_address_space *vm,
487                             bool *need_relocs)
488 {
489         struct drm_i915_gem_object *obj;
490         struct list_head ordered_objects;
491         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
492         int retry;
493
494         INIT_LIST_HEAD(&ordered_objects);
495         while (!list_empty(objects)) {
496                 struct drm_i915_gem_exec_object2 *entry;
497                 bool need_fence, need_mappable;
498
499                 obj = list_first_entry(objects,
500                                        struct drm_i915_gem_object,
501                                        exec_list);
502                 entry = obj->exec_entry;
503
504                 need_fence =
505                         has_fenced_gpu_access &&
506                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
507                         obj->tiling_mode != I915_TILING_NONE;
508                 need_mappable = need_fence || need_reloc_mappable(obj);
509
510                 if (need_mappable)
511                         list_move(&obj->exec_list, &ordered_objects);
512                 else
513                         list_move_tail(&obj->exec_list, &ordered_objects);
514
515                 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
516                 obj->base.pending_write_domain = 0;
517                 obj->pending_fenced_gpu_access = false;
518         }
519         list_splice(&ordered_objects, objects);
520
521         /* Attempt to pin all of the buffers into the GTT.
522          * This is done in 3 phases:
523          *
524          * 1a. Unbind all objects that do not match the GTT constraints for
525          *     the execbuffer (fenceable, mappable, alignment etc).
526          * 1b. Increment pin count for already bound objects.
527          * 2.  Bind new objects.
528          * 3.  Decrement pin count.
529          *
530          * This avoid unnecessary unbinding of later objects in order to make
531          * room for the earlier objects *unless* we need to defragment.
532          */
533         retry = 0;
534         do {
535                 int ret = 0;
536
537                 /* Unbind any ill-fitting objects or pin. */
538                 list_for_each_entry(obj, objects, exec_list) {
539                         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
540                         bool need_fence, need_mappable;
541                         u32 obj_offset;
542
543                         if (!i915_gem_obj_bound(obj, vm))
544                                 continue;
545
546                         obj_offset = i915_gem_obj_offset(obj, vm);
547                         need_fence =
548                                 has_fenced_gpu_access &&
549                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
550                                 obj->tiling_mode != I915_TILING_NONE;
551                         need_mappable = need_fence || need_reloc_mappable(obj);
552
553                         WARN_ON((need_mappable || need_fence) &&
554                                 !i915_is_ggtt(vm));
555
556                         if ((entry->alignment &&
557                              obj_offset & (entry->alignment - 1)) ||
558                             (need_mappable && !obj->map_and_fenceable))
559                                 ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm));
560                         else
561                                 ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
562                         if (ret)
563                                 goto err;
564                 }
565
566                 /* Bind fresh objects */
567                 list_for_each_entry(obj, objects, exec_list) {
568                         if (i915_gem_obj_bound(obj, vm))
569                                 continue;
570
571                         ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
572                         if (ret)
573                                 goto err;
574                 }
575
576 err:            /* Decrement pin count for bound objects */
577                 list_for_each_entry(obj, objects, exec_list)
578                         i915_gem_execbuffer_unreserve_object(obj);
579
580                 if (ret != -ENOSPC || retry++)
581                         return ret;
582
583                 ret = i915_gem_evict_everything(ring->dev);
584                 if (ret)
585                         return ret;
586         } while (1);
587 }
588
589 static int
590 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
591                                   struct drm_i915_gem_execbuffer2 *args,
592                                   struct drm_file *file,
593                                   struct intel_ring_buffer *ring,
594                                   struct eb_objects *eb,
595                                   struct drm_i915_gem_exec_object2 *exec,
596                                   struct i915_address_space *vm)
597 {
598         struct drm_i915_gem_relocation_entry *reloc;
599         struct drm_i915_gem_object *obj;
600         bool need_relocs;
601         int *reloc_offset;
602         int i, total, ret;
603         int count = args->buffer_count;
604
605         /* We may process another execbuffer during the unlock... */
606         while (!list_empty(&eb->objects)) {
607                 obj = list_first_entry(&eb->objects,
608                                        struct drm_i915_gem_object,
609                                        exec_list);
610                 list_del_init(&obj->exec_list);
611                 drm_gem_object_unreference(&obj->base);
612         }
613
614         mutex_unlock(&dev->struct_mutex);
615
616         total = 0;
617         for (i = 0; i < count; i++)
618                 total += exec[i].relocation_count;
619
620         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
621         reloc = drm_malloc_ab(total, sizeof(*reloc));
622         if (reloc == NULL || reloc_offset == NULL) {
623                 drm_free_large(reloc);
624                 drm_free_large(reloc_offset);
625                 mutex_lock(&dev->struct_mutex);
626                 return -ENOMEM;
627         }
628
629         total = 0;
630         for (i = 0; i < count; i++) {
631                 struct drm_i915_gem_relocation_entry __user *user_relocs;
632                 u64 invalid_offset = (u64)-1;
633                 int j;
634
635                 user_relocs = to_user_ptr(exec[i].relocs_ptr);
636
637                 if (copy_from_user(reloc+total, user_relocs,
638                                    exec[i].relocation_count * sizeof(*reloc))) {
639                         ret = -EFAULT;
640                         mutex_lock(&dev->struct_mutex);
641                         goto err;
642                 }
643
644                 /* As we do not update the known relocation offsets after
645                  * relocating (due to the complexities in lock handling),
646                  * we need to mark them as invalid now so that we force the
647                  * relocation processing next time. Just in case the target
648                  * object is evicted and then rebound into its old
649                  * presumed_offset before the next execbuffer - if that
650                  * happened we would make the mistake of assuming that the
651                  * relocations were valid.
652                  */
653                 for (j = 0; j < exec[i].relocation_count; j++) {
654                         if (copy_to_user(&user_relocs[j].presumed_offset,
655                                          &invalid_offset,
656                                          sizeof(invalid_offset))) {
657                                 ret = -EFAULT;
658                                 mutex_lock(&dev->struct_mutex);
659                                 goto err;
660                         }
661                 }
662
663                 reloc_offset[i] = total;
664                 total += exec[i].relocation_count;
665         }
666
667         ret = i915_mutex_lock_interruptible(dev);
668         if (ret) {
669                 mutex_lock(&dev->struct_mutex);
670                 goto err;
671         }
672
673         /* reacquire the objects */
674         eb_reset(eb);
675         ret = eb_lookup_objects(eb, exec, args, file);
676         if (ret)
677                 goto err;
678
679         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
680         ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
681         if (ret)
682                 goto err;
683
684         list_for_each_entry(obj, &eb->objects, exec_list) {
685                 int offset = obj->exec_entry - exec;
686                 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
687                                                                reloc + reloc_offset[offset],
688                                                                vm);
689                 if (ret)
690                         goto err;
691         }
692
693         /* Leave the user relocations as are, this is the painfully slow path,
694          * and we want to avoid the complication of dropping the lock whilst
695          * having buffers reserved in the aperture and so causing spurious
696          * ENOSPC for random operations.
697          */
698
699 err:
700         drm_free_large(reloc);
701         drm_free_large(reloc_offset);
702         return ret;
703 }
704
705 static int
706 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
707                                 struct list_head *objects)
708 {
709         struct drm_i915_gem_object *obj;
710         uint32_t flush_domains = 0;
711         int ret;
712
713         list_for_each_entry(obj, objects, exec_list) {
714                 ret = i915_gem_object_sync(obj, ring);
715                 if (ret)
716                         return ret;
717
718                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
719                         i915_gem_clflush_object(obj);
720
721                 flush_domains |= obj->base.write_domain;
722         }
723
724         if (flush_domains & I915_GEM_DOMAIN_CPU)
725                 i915_gem_chipset_flush(ring->dev);
726
727         if (flush_domains & I915_GEM_DOMAIN_GTT)
728                 wmb();
729
730         /* Unconditionally invalidate gpu caches and ensure that we do flush
731          * any residual writes from the previous batch.
732          */
733         return intel_ring_invalidate_all_caches(ring);
734 }
735
736 static bool
737 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
738 {
739         if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
740                 return false;
741
742         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
743 }
744
745 static int
746 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
747                    int count)
748 {
749         int i;
750         int relocs_total = 0;
751         int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
752
753         for (i = 0; i < count; i++) {
754                 char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
755                 int length; /* limited by fault_in_pages_readable() */
756
757                 if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
758                         return -EINVAL;
759
760                 /* First check for malicious input causing overflow in
761                  * the worst case where we need to allocate the entire
762                  * relocation tree as a single array.
763                  */
764                 if (exec[i].relocation_count > relocs_max - relocs_total)
765                         return -EINVAL;
766                 relocs_total += exec[i].relocation_count;
767
768                 length = exec[i].relocation_count *
769                         sizeof(struct drm_i915_gem_relocation_entry);
770                 /*
771                  * We must check that the entire relocation array is safe
772                  * to read, but since we may need to update the presumed
773                  * offsets during execution, check for full write access.
774                  */
775                 if (!access_ok(VERIFY_WRITE, ptr, length))
776                         return -EFAULT;
777
778                 if (likely(!i915_prefault_disable)) {
779                         if (fault_in_multipages_readable(ptr, length))
780                                 return -EFAULT;
781                 }
782         }
783
784         return 0;
785 }
786
787 static void
788 i915_gem_execbuffer_move_to_active(struct list_head *objects,
789                                    struct i915_address_space *vm,
790                                    struct intel_ring_buffer *ring)
791 {
792         struct drm_i915_gem_object *obj;
793
794         list_for_each_entry(obj, objects, exec_list) {
795                 u32 old_read = obj->base.read_domains;
796                 u32 old_write = obj->base.write_domain;
797
798                 obj->base.write_domain = obj->base.pending_write_domain;
799                 if (obj->base.write_domain == 0)
800                         obj->base.pending_read_domains |= obj->base.read_domains;
801                 obj->base.read_domains = obj->base.pending_read_domains;
802                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
803
804                 /* FIXME: This lookup gets fixed later <-- danvet */
805                 list_move_tail(&i915_gem_obj_to_vma(obj, vm)->mm_list, &vm->active_list);
806                 i915_gem_object_move_to_active(obj, ring);
807                 if (obj->base.write_domain) {
808                         obj->dirty = 1;
809                         obj->last_write_seqno = intel_ring_get_seqno(ring);
810                         if (obj->pin_count) /* check for potential scanout */
811                                 intel_mark_fb_busy(obj, ring);
812                 }
813
814                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
815         }
816 }
817
818 static void
819 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
820                                     struct drm_file *file,
821                                     struct intel_ring_buffer *ring,
822                                     struct drm_i915_gem_object *obj)
823 {
824         /* Unconditionally force add_request to emit a full flush. */
825         ring->gpu_caches_dirty = true;
826
827         /* Add a breadcrumb for the completion of the batch buffer */
828         (void)__i915_add_request(ring, file, obj, NULL);
829 }
830
831 static int
832 i915_reset_gen7_sol_offsets(struct drm_device *dev,
833                             struct intel_ring_buffer *ring)
834 {
835         drm_i915_private_t *dev_priv = dev->dev_private;
836         int ret, i;
837
838         if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
839                 return 0;
840
841         ret = intel_ring_begin(ring, 4 * 3);
842         if (ret)
843                 return ret;
844
845         for (i = 0; i < 4; i++) {
846                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
847                 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
848                 intel_ring_emit(ring, 0);
849         }
850
851         intel_ring_advance(ring);
852
853         return 0;
854 }
855
856 static int
857 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
858                        struct drm_file *file,
859                        struct drm_i915_gem_execbuffer2 *args,
860                        struct drm_i915_gem_exec_object2 *exec,
861                        struct i915_address_space *vm)
862 {
863         drm_i915_private_t *dev_priv = dev->dev_private;
864         struct eb_objects *eb;
865         struct drm_i915_gem_object *batch_obj;
866         struct drm_clip_rect *cliprects = NULL;
867         struct intel_ring_buffer *ring;
868         u32 ctx_id = i915_execbuffer2_get_context_id(*args);
869         u32 exec_start, exec_len;
870         u32 mask, flags;
871         int ret, mode, i;
872         bool need_relocs;
873
874         if (!i915_gem_check_execbuffer(args))
875                 return -EINVAL;
876
877         ret = validate_exec_list(exec, args->buffer_count);
878         if (ret)
879                 return ret;
880
881         flags = 0;
882         if (args->flags & I915_EXEC_SECURE) {
883                 if (!file->is_master || !capable(CAP_SYS_ADMIN))
884                     return -EPERM;
885
886                 flags |= I915_DISPATCH_SECURE;
887         }
888         if (args->flags & I915_EXEC_IS_PINNED)
889                 flags |= I915_DISPATCH_PINNED;
890
891         switch (args->flags & I915_EXEC_RING_MASK) {
892         case I915_EXEC_DEFAULT:
893         case I915_EXEC_RENDER:
894                 ring = &dev_priv->ring[RCS];
895                 break;
896         case I915_EXEC_BSD:
897                 ring = &dev_priv->ring[VCS];
898                 if (ctx_id != DEFAULT_CONTEXT_ID) {
899                         DRM_DEBUG("Ring %s doesn't support contexts\n",
900                                   ring->name);
901                         return -EPERM;
902                 }
903                 break;
904         case I915_EXEC_BLT:
905                 ring = &dev_priv->ring[BCS];
906                 if (ctx_id != DEFAULT_CONTEXT_ID) {
907                         DRM_DEBUG("Ring %s doesn't support contexts\n",
908                                   ring->name);
909                         return -EPERM;
910                 }
911                 break;
912         case I915_EXEC_VEBOX:
913                 ring = &dev_priv->ring[VECS];
914                 if (ctx_id != DEFAULT_CONTEXT_ID) {
915                         DRM_DEBUG("Ring %s doesn't support contexts\n",
916                                   ring->name);
917                         return -EPERM;
918                 }
919                 break;
920
921         default:
922                 DRM_DEBUG("execbuf with unknown ring: %d\n",
923                           (int)(args->flags & I915_EXEC_RING_MASK));
924                 return -EINVAL;
925         }
926         if (!intel_ring_initialized(ring)) {
927                 DRM_DEBUG("execbuf with invalid ring: %d\n",
928                           (int)(args->flags & I915_EXEC_RING_MASK));
929                 return -EINVAL;
930         }
931
932         mode = args->flags & I915_EXEC_CONSTANTS_MASK;
933         mask = I915_EXEC_CONSTANTS_MASK;
934         switch (mode) {
935         case I915_EXEC_CONSTANTS_REL_GENERAL:
936         case I915_EXEC_CONSTANTS_ABSOLUTE:
937         case I915_EXEC_CONSTANTS_REL_SURFACE:
938                 if (ring == &dev_priv->ring[RCS] &&
939                     mode != dev_priv->relative_constants_mode) {
940                         if (INTEL_INFO(dev)->gen < 4)
941                                 return -EINVAL;
942
943                         if (INTEL_INFO(dev)->gen > 5 &&
944                             mode == I915_EXEC_CONSTANTS_REL_SURFACE)
945                                 return -EINVAL;
946
947                         /* The HW changed the meaning on this bit on gen6 */
948                         if (INTEL_INFO(dev)->gen >= 6)
949                                 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
950                 }
951                 break;
952         default:
953                 DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
954                 return -EINVAL;
955         }
956
957         if (args->buffer_count < 1) {
958                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
959                 return -EINVAL;
960         }
961
962         if (args->num_cliprects != 0) {
963                 if (ring != &dev_priv->ring[RCS]) {
964                         DRM_DEBUG("clip rectangles are only valid with the render ring\n");
965                         return -EINVAL;
966                 }
967
968                 if (INTEL_INFO(dev)->gen >= 5) {
969                         DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
970                         return -EINVAL;
971                 }
972
973                 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
974                         DRM_DEBUG("execbuf with %u cliprects\n",
975                                   args->num_cliprects);
976                         return -EINVAL;
977                 }
978
979                 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
980                                     GFP_KERNEL);
981                 if (cliprects == NULL) {
982                         ret = -ENOMEM;
983                         goto pre_mutex_err;
984                 }
985
986                 if (copy_from_user(cliprects,
987                                    to_user_ptr(args->cliprects_ptr),
988                                    sizeof(*cliprects)*args->num_cliprects)) {
989                         ret = -EFAULT;
990                         goto pre_mutex_err;
991                 }
992         }
993
994         ret = i915_mutex_lock_interruptible(dev);
995         if (ret)
996                 goto pre_mutex_err;
997
998         if (dev_priv->ums.mm_suspended) {
999                 mutex_unlock(&dev->struct_mutex);
1000                 ret = -EBUSY;
1001                 goto pre_mutex_err;
1002         }
1003
1004         eb = eb_create(args);
1005         if (eb == NULL) {
1006                 mutex_unlock(&dev->struct_mutex);
1007                 ret = -ENOMEM;
1008                 goto pre_mutex_err;
1009         }
1010
1011         /* Look up object handles */
1012         ret = eb_lookup_objects(eb, exec, args, file);
1013         if (ret)
1014                 goto err;
1015
1016         /* take note of the batch buffer before we might reorder the lists */
1017         batch_obj = list_entry(eb->objects.prev,
1018                                struct drm_i915_gem_object,
1019                                exec_list);
1020
1021         /* Move the objects en-masse into the GTT, evicting if necessary. */
1022         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1023         ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
1024         if (ret)
1025                 goto err;
1026
1027         /* The objects are in their final locations, apply the relocations. */
1028         if (need_relocs)
1029                 ret = i915_gem_execbuffer_relocate(eb, vm);
1030         if (ret) {
1031                 if (ret == -EFAULT) {
1032                         ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1033                                                                 eb, exec, vm);
1034                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1035                 }
1036                 if (ret)
1037                         goto err;
1038         }
1039
1040         /* Set the pending read domains for the batch buffer to COMMAND */
1041         if (batch_obj->base.pending_write_domain) {
1042                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1043                 ret = -EINVAL;
1044                 goto err;
1045         }
1046         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1047
1048         /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1049          * batch" bit. Hence we need to pin secure batches into the global gtt.
1050          * hsw should have this fixed, but let's be paranoid and do it
1051          * unconditionally for now. */
1052         if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1053                 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1054
1055         ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
1056         if (ret)
1057                 goto err;
1058
1059         ret = i915_switch_context(ring, file, ctx_id);
1060         if (ret)
1061                 goto err;
1062
1063         if (ring == &dev_priv->ring[RCS] &&
1064             mode != dev_priv->relative_constants_mode) {
1065                 ret = intel_ring_begin(ring, 4);
1066                 if (ret)
1067                                 goto err;
1068
1069                 intel_ring_emit(ring, MI_NOOP);
1070                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1071                 intel_ring_emit(ring, INSTPM);
1072                 intel_ring_emit(ring, mask << 16 | mode);
1073                 intel_ring_advance(ring);
1074
1075                 dev_priv->relative_constants_mode = mode;
1076         }
1077
1078         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1079                 ret = i915_reset_gen7_sol_offsets(dev, ring);
1080                 if (ret)
1081                         goto err;
1082         }
1083
1084         exec_start = i915_gem_obj_offset(batch_obj, vm) +
1085                 args->batch_start_offset;
1086         exec_len = args->batch_len;
1087         if (cliprects) {
1088                 for (i = 0; i < args->num_cliprects; i++) {
1089                         ret = i915_emit_box(dev, &cliprects[i],
1090                                             args->DR1, args->DR4);
1091                         if (ret)
1092                                 goto err;
1093
1094                         ret = ring->dispatch_execbuffer(ring,
1095                                                         exec_start, exec_len,
1096                                                         flags);
1097                         if (ret)
1098                                 goto err;
1099                 }
1100         } else {
1101                 ret = ring->dispatch_execbuffer(ring,
1102                                                 exec_start, exec_len,
1103                                                 flags);
1104                 if (ret)
1105                         goto err;
1106         }
1107
1108         trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1109
1110         i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring);
1111         i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1112
1113 err:
1114         eb_destroy(eb);
1115
1116         mutex_unlock(&dev->struct_mutex);
1117
1118 pre_mutex_err:
1119         kfree(cliprects);
1120         return ret;
1121 }
1122
1123 /*
1124  * Legacy execbuffer just creates an exec2 list from the original exec object
1125  * list array and passes it to the real function.
1126  */
1127 int
1128 i915_gem_execbuffer(struct drm_device *dev, void *data,
1129                     struct drm_file *file)
1130 {
1131         struct drm_i915_private *dev_priv = dev->dev_private;
1132         struct drm_i915_gem_execbuffer *args = data;
1133         struct drm_i915_gem_execbuffer2 exec2;
1134         struct drm_i915_gem_exec_object *exec_list = NULL;
1135         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1136         int ret, i;
1137
1138         if (args->buffer_count < 1) {
1139                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1140                 return -EINVAL;
1141         }
1142
1143         /* Copy in the exec list from userland */
1144         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1145         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1146         if (exec_list == NULL || exec2_list == NULL) {
1147                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1148                           args->buffer_count);
1149                 drm_free_large(exec_list);
1150                 drm_free_large(exec2_list);
1151                 return -ENOMEM;
1152         }
1153         ret = copy_from_user(exec_list,
1154                              to_user_ptr(args->buffers_ptr),
1155                              sizeof(*exec_list) * args->buffer_count);
1156         if (ret != 0) {
1157                 DRM_DEBUG("copy %d exec entries failed %d\n",
1158                           args->buffer_count, ret);
1159                 drm_free_large(exec_list);
1160                 drm_free_large(exec2_list);
1161                 return -EFAULT;
1162         }
1163
1164         for (i = 0; i < args->buffer_count; i++) {
1165                 exec2_list[i].handle = exec_list[i].handle;
1166                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1167                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1168                 exec2_list[i].alignment = exec_list[i].alignment;
1169                 exec2_list[i].offset = exec_list[i].offset;
1170                 if (INTEL_INFO(dev)->gen < 4)
1171                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1172                 else
1173                         exec2_list[i].flags = 0;
1174         }
1175
1176         exec2.buffers_ptr = args->buffers_ptr;
1177         exec2.buffer_count = args->buffer_count;
1178         exec2.batch_start_offset = args->batch_start_offset;
1179         exec2.batch_len = args->batch_len;
1180         exec2.DR1 = args->DR1;
1181         exec2.DR4 = args->DR4;
1182         exec2.num_cliprects = args->num_cliprects;
1183         exec2.cliprects_ptr = args->cliprects_ptr;
1184         exec2.flags = I915_EXEC_RENDER;
1185         i915_execbuffer2_set_context_id(exec2, 0);
1186
1187         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
1188                                      &dev_priv->gtt.base);
1189         if (!ret) {
1190                 /* Copy the new buffer offsets back to the user's exec list. */
1191                 for (i = 0; i < args->buffer_count; i++)
1192                         exec_list[i].offset = exec2_list[i].offset;
1193                 /* ... and back out to userspace */
1194                 ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1195                                    exec_list,
1196                                    sizeof(*exec_list) * args->buffer_count);
1197                 if (ret) {
1198                         ret = -EFAULT;
1199                         DRM_DEBUG("failed to copy %d exec entries "
1200                                   "back to user (%d)\n",
1201                                   args->buffer_count, ret);
1202                 }
1203         }
1204
1205         drm_free_large(exec_list);
1206         drm_free_large(exec2_list);
1207         return ret;
1208 }
1209
1210 int
1211 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1212                      struct drm_file *file)
1213 {
1214         struct drm_i915_private *dev_priv = dev->dev_private;
1215         struct drm_i915_gem_execbuffer2 *args = data;
1216         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1217         int ret;
1218
1219         if (args->buffer_count < 1 ||
1220             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1221                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1222                 return -EINVAL;
1223         }
1224
1225         exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1226                              GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1227         if (exec2_list == NULL)
1228                 exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1229                                            args->buffer_count);
1230         if (exec2_list == NULL) {
1231                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1232                           args->buffer_count);
1233                 return -ENOMEM;
1234         }
1235         ret = copy_from_user(exec2_list,
1236                              to_user_ptr(args->buffers_ptr),
1237                              sizeof(*exec2_list) * args->buffer_count);
1238         if (ret != 0) {
1239                 DRM_DEBUG("copy %d exec entries failed %d\n",
1240                           args->buffer_count, ret);
1241                 drm_free_large(exec2_list);
1242                 return -EFAULT;
1243         }
1244
1245         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
1246                                      &dev_priv->gtt.base);
1247         if (!ret) {
1248                 /* Copy the new buffer offsets back to the user's exec list. */
1249                 ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1250                                    exec2_list,
1251                                    sizeof(*exec2_list) * args->buffer_count);
1252                 if (ret) {
1253                         ret = -EFAULT;
1254                         DRM_DEBUG("failed to copy %d exec entries "
1255                                   "back to user (%d)\n",
1256                                   args->buffer_count, ret);
1257                 }
1258         }
1259
1260         drm_free_large(exec2_list);
1261         return ret;
1262 }