]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/gpu/drm/i915/i915_gem.c
2eceb24bf54b952c34ed9307e7ab86e76719e8ab
[karo-tx-linux.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
37 #include <linux/intel-gtt.h>
38
39 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
40
41 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
42                                                   bool pipelined);
43 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
44 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
45 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
46                                              int write);
47 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
48                                                      uint64_t offset,
49                                                      uint64_t size);
50 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
51 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
52                                           bool interruptible);
53 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
54                                        unsigned alignment, bool mappable);
55 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
56 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
57                                 struct drm_i915_gem_pwrite *args,
58                                 struct drm_file *file_priv);
59 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
60
61 static int
62 i915_gem_object_get_pages(struct drm_gem_object *obj,
63                           gfp_t gfpmask);
64
65 static void
66 i915_gem_object_put_pages(struct drm_gem_object *obj);
67
68 static LIST_HEAD(shrink_list);
69 static DEFINE_SPINLOCK(shrink_list_lock);
70
71 /* some bookkeeping */
72 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
73                                   size_t size)
74 {
75         dev_priv->mm.object_count++;
76         dev_priv->mm.object_memory += size;
77 }
78
79 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
80                                      size_t size)
81 {
82         dev_priv->mm.object_count--;
83         dev_priv->mm.object_memory -= size;
84 }
85
86 static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
87                                   struct drm_gem_object *obj)
88 {
89         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
90         dev_priv->mm.gtt_count++;
91         dev_priv->mm.gtt_memory += obj->size;
92         if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
93                 dev_priv->mm.mappable_gtt_used +=
94                         min_t(size_t, obj->size,
95                               dev_priv->mm.gtt_mappable_end
96                                         - obj_priv->gtt_offset);
97         }
98 }
99
100 static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
101                                      struct drm_gem_object *obj)
102 {
103         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
104         dev_priv->mm.gtt_count--;
105         dev_priv->mm.gtt_memory -= obj->size;
106         if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
107                 dev_priv->mm.mappable_gtt_used -=
108                         min_t(size_t, obj->size,
109                               dev_priv->mm.gtt_mappable_end
110                                         - obj_priv->gtt_offset);
111         }
112 }
113
114 /**
115  * Update the mappable working set counters. Call _only_ when there is a change
116  * in one of (pin|fault)_mappable and update *_mappable _before_ calling.
117  * @mappable: new state the changed mappable flag (either pin_ or fault_).
118  */
119 static void
120 i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
121                               struct drm_gem_object *obj,
122                               bool mappable)
123 {
124         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
125
126         if (mappable) {
127                 if (obj_priv->pin_mappable && obj_priv->fault_mappable)
128                         /* Combined state was already mappable. */
129                         return;
130                 dev_priv->mm.gtt_mappable_count++;
131                 dev_priv->mm.gtt_mappable_memory += obj->size;
132         } else {
133                 if (obj_priv->pin_mappable || obj_priv->fault_mappable)
134                         /* Combined state still mappable. */
135                         return;
136                 dev_priv->mm.gtt_mappable_count--;
137                 dev_priv->mm.gtt_mappable_memory -= obj->size;
138         }
139 }
140
141 static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
142                                   struct drm_gem_object *obj,
143                                   bool mappable)
144 {
145         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
146         dev_priv->mm.pin_count++;
147         dev_priv->mm.pin_memory += obj->size;
148         if (mappable) {
149                 obj_priv->pin_mappable = true;
150                 i915_gem_info_update_mappable(dev_priv, obj, true);
151         }
152 }
153
154 static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
155                                      struct drm_gem_object *obj)
156 {
157         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
158         dev_priv->mm.pin_count--;
159         dev_priv->mm.pin_memory -= obj->size;
160         if (obj_priv->pin_mappable) {
161                 obj_priv->pin_mappable = false;
162                 i915_gem_info_update_mappable(dev_priv, obj, false);
163         }
164 }
165
166 int
167 i915_gem_check_is_wedged(struct drm_device *dev)
168 {
169         struct drm_i915_private *dev_priv = dev->dev_private;
170         struct completion *x = &dev_priv->error_completion;
171         unsigned long flags;
172         int ret;
173
174         if (!atomic_read(&dev_priv->mm.wedged))
175                 return 0;
176
177         ret = wait_for_completion_interruptible(x);
178         if (ret)
179                 return ret;
180
181         /* Success, we reset the GPU! */
182         if (!atomic_read(&dev_priv->mm.wedged))
183                 return 0;
184
185         /* GPU is hung, bump the completion count to account for
186          * the token we just consumed so that we never hit zero and
187          * end up waiting upon a subsequent completion event that
188          * will never happen.
189          */
190         spin_lock_irqsave(&x->wait.lock, flags);
191         x->done++;
192         spin_unlock_irqrestore(&x->wait.lock, flags);
193         return -EIO;
194 }
195
196 static int i915_mutex_lock_interruptible(struct drm_device *dev)
197 {
198         struct drm_i915_private *dev_priv = dev->dev_private;
199         int ret;
200
201         ret = i915_gem_check_is_wedged(dev);
202         if (ret)
203                 return ret;
204
205         ret = mutex_lock_interruptible(&dev->struct_mutex);
206         if (ret)
207                 return ret;
208
209         if (atomic_read(&dev_priv->mm.wedged)) {
210                 mutex_unlock(&dev->struct_mutex);
211                 return -EAGAIN;
212         }
213
214         WARN_ON(i915_verify_lists(dev));
215         return 0;
216 }
217
218 static inline bool
219 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
220 {
221         return obj_priv->gtt_space &&
222                 !obj_priv->active &&
223                 obj_priv->pin_count == 0;
224 }
225
226 int i915_gem_do_init(struct drm_device *dev,
227                      unsigned long start,
228                      unsigned long mappable_end,
229                      unsigned long end)
230 {
231         drm_i915_private_t *dev_priv = dev->dev_private;
232
233         if (start >= end ||
234             (start & (PAGE_SIZE - 1)) != 0 ||
235             (end & (PAGE_SIZE - 1)) != 0) {
236                 return -EINVAL;
237         }
238
239         drm_mm_init(&dev_priv->mm.gtt_space, start,
240                     end - start);
241
242         dev_priv->mm.gtt_total = end - start;
243         dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
244         dev_priv->mm.gtt_mappable_end = mappable_end;
245
246         return 0;
247 }
248
249 int
250 i915_gem_init_ioctl(struct drm_device *dev, void *data,
251                     struct drm_file *file_priv)
252 {
253         struct drm_i915_gem_init *args = data;
254         int ret;
255
256         mutex_lock(&dev->struct_mutex);
257         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
258         mutex_unlock(&dev->struct_mutex);
259
260         return ret;
261 }
262
263 int
264 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
265                             struct drm_file *file_priv)
266 {
267         struct drm_i915_private *dev_priv = dev->dev_private;
268         struct drm_i915_gem_get_aperture *args = data;
269
270         if (!(dev->driver->driver_features & DRIVER_GEM))
271                 return -ENODEV;
272
273         mutex_lock(&dev->struct_mutex);
274         args->aper_size = dev_priv->mm.gtt_total;
275         args->aper_available_size = args->aper_size - dev_priv->mm.pin_memory;
276         mutex_unlock(&dev->struct_mutex);
277
278         return 0;
279 }
280
281
282 /**
283  * Creates a new mm object and returns a handle to it.
284  */
285 int
286 i915_gem_create_ioctl(struct drm_device *dev, void *data,
287                       struct drm_file *file_priv)
288 {
289         struct drm_i915_gem_create *args = data;
290         struct drm_gem_object *obj;
291         int ret;
292         u32 handle;
293
294         args->size = roundup(args->size, PAGE_SIZE);
295
296         /* Allocate the new object */
297         obj = i915_gem_alloc_object(dev, args->size);
298         if (obj == NULL)
299                 return -ENOMEM;
300
301         ret = drm_gem_handle_create(file_priv, obj, &handle);
302         if (ret) {
303                 drm_gem_object_release(obj);
304                 i915_gem_info_remove_obj(dev->dev_private, obj->size);
305                 kfree(obj);
306                 return ret;
307         }
308
309         /* drop reference from allocate - handle holds it now */
310         drm_gem_object_unreference(obj);
311         trace_i915_gem_object_create(obj);
312
313         args->handle = handle;
314         return 0;
315 }
316
317 static bool
318 i915_gem_object_cpu_accessible(struct drm_i915_gem_object *obj)
319 {
320         struct drm_device *dev = obj->base.dev;
321         drm_i915_private_t *dev_priv = dev->dev_private;
322
323         return obj->gtt_space == NULL ||
324                 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
325 }
326
327 static inline int
328 fast_shmem_read(struct page **pages,
329                 loff_t page_base, int page_offset,
330                 char __user *data,
331                 int length)
332 {
333         char *vaddr;
334         int ret;
335
336         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
337         ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
338         kunmap_atomic(vaddr);
339
340         return ret;
341 }
342
343 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
344 {
345         drm_i915_private_t *dev_priv = obj->dev->dev_private;
346         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
347
348         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
349                 obj_priv->tiling_mode != I915_TILING_NONE;
350 }
351
352 static inline void
353 slow_shmem_copy(struct page *dst_page,
354                 int dst_offset,
355                 struct page *src_page,
356                 int src_offset,
357                 int length)
358 {
359         char *dst_vaddr, *src_vaddr;
360
361         dst_vaddr = kmap(dst_page);
362         src_vaddr = kmap(src_page);
363
364         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
365
366         kunmap(src_page);
367         kunmap(dst_page);
368 }
369
370 static inline void
371 slow_shmem_bit17_copy(struct page *gpu_page,
372                       int gpu_offset,
373                       struct page *cpu_page,
374                       int cpu_offset,
375                       int length,
376                       int is_read)
377 {
378         char *gpu_vaddr, *cpu_vaddr;
379
380         /* Use the unswizzled path if this page isn't affected. */
381         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
382                 if (is_read)
383                         return slow_shmem_copy(cpu_page, cpu_offset,
384                                                gpu_page, gpu_offset, length);
385                 else
386                         return slow_shmem_copy(gpu_page, gpu_offset,
387                                                cpu_page, cpu_offset, length);
388         }
389
390         gpu_vaddr = kmap(gpu_page);
391         cpu_vaddr = kmap(cpu_page);
392
393         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
394          * XORing with the other bits (A9 for Y, A9 and A10 for X)
395          */
396         while (length > 0) {
397                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
398                 int this_length = min(cacheline_end - gpu_offset, length);
399                 int swizzled_gpu_offset = gpu_offset ^ 64;
400
401                 if (is_read) {
402                         memcpy(cpu_vaddr + cpu_offset,
403                                gpu_vaddr + swizzled_gpu_offset,
404                                this_length);
405                 } else {
406                         memcpy(gpu_vaddr + swizzled_gpu_offset,
407                                cpu_vaddr + cpu_offset,
408                                this_length);
409                 }
410                 cpu_offset += this_length;
411                 gpu_offset += this_length;
412                 length -= this_length;
413         }
414
415         kunmap(cpu_page);
416         kunmap(gpu_page);
417 }
418
419 /**
420  * This is the fast shmem pread path, which attempts to copy_from_user directly
421  * from the backing pages of the object to the user's address space.  On a
422  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
423  */
424 static int
425 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
426                           struct drm_i915_gem_pread *args,
427                           struct drm_file *file_priv)
428 {
429         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
430         ssize_t remain;
431         loff_t offset, page_base;
432         char __user *user_data;
433         int page_offset, page_length;
434
435         user_data = (char __user *) (uintptr_t) args->data_ptr;
436         remain = args->size;
437
438         obj_priv = to_intel_bo(obj);
439         offset = args->offset;
440
441         while (remain > 0) {
442                 /* Operation in this page
443                  *
444                  * page_base = page offset within aperture
445                  * page_offset = offset within page
446                  * page_length = bytes to copy for this page
447                  */
448                 page_base = (offset & ~(PAGE_SIZE-1));
449                 page_offset = offset & (PAGE_SIZE-1);
450                 page_length = remain;
451                 if ((page_offset + remain) > PAGE_SIZE)
452                         page_length = PAGE_SIZE - page_offset;
453
454                 if (fast_shmem_read(obj_priv->pages,
455                                     page_base, page_offset,
456                                     user_data, page_length))
457                         return -EFAULT;
458
459                 remain -= page_length;
460                 user_data += page_length;
461                 offset += page_length;
462         }
463
464         return 0;
465 }
466
467 static int
468 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
469 {
470         int ret;
471
472         ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
473
474         /* If we've insufficient memory to map in the pages, attempt
475          * to make some space by throwing out some old buffers.
476          */
477         if (ret == -ENOMEM) {
478                 struct drm_device *dev = obj->dev;
479
480                 ret = i915_gem_evict_something(dev, obj->size,
481                                                i915_gem_get_gtt_alignment(obj),
482                                                false);
483                 if (ret)
484                         return ret;
485
486                 ret = i915_gem_object_get_pages(obj, 0);
487         }
488
489         return ret;
490 }
491
492 /**
493  * This is the fallback shmem pread path, which allocates temporary storage
494  * in kernel space to copy_to_user into outside of the struct_mutex, so we
495  * can copy out of the object's backing pages while holding the struct mutex
496  * and not take page faults.
497  */
498 static int
499 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
500                           struct drm_i915_gem_pread *args,
501                           struct drm_file *file_priv)
502 {
503         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
504         struct mm_struct *mm = current->mm;
505         struct page **user_pages;
506         ssize_t remain;
507         loff_t offset, pinned_pages, i;
508         loff_t first_data_page, last_data_page, num_pages;
509         int shmem_page_index, shmem_page_offset;
510         int data_page_index,  data_page_offset;
511         int page_length;
512         int ret;
513         uint64_t data_ptr = args->data_ptr;
514         int do_bit17_swizzling;
515
516         remain = args->size;
517
518         /* Pin the user pages containing the data.  We can't fault while
519          * holding the struct mutex, yet we want to hold it while
520          * dereferencing the user data.
521          */
522         first_data_page = data_ptr / PAGE_SIZE;
523         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
524         num_pages = last_data_page - first_data_page + 1;
525
526         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
527         if (user_pages == NULL)
528                 return -ENOMEM;
529
530         mutex_unlock(&dev->struct_mutex);
531         down_read(&mm->mmap_sem);
532         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
533                                       num_pages, 1, 0, user_pages, NULL);
534         up_read(&mm->mmap_sem);
535         mutex_lock(&dev->struct_mutex);
536         if (pinned_pages < num_pages) {
537                 ret = -EFAULT;
538                 goto out;
539         }
540
541         ret = i915_gem_object_set_cpu_read_domain_range(obj,
542                                                         args->offset,
543                                                         args->size);
544         if (ret)
545                 goto out;
546
547         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
548
549         obj_priv = to_intel_bo(obj);
550         offset = args->offset;
551
552         while (remain > 0) {
553                 /* Operation in this page
554                  *
555                  * shmem_page_index = page number within shmem file
556                  * shmem_page_offset = offset within page in shmem file
557                  * data_page_index = page number in get_user_pages return
558                  * data_page_offset = offset with data_page_index page.
559                  * page_length = bytes to copy for this page
560                  */
561                 shmem_page_index = offset / PAGE_SIZE;
562                 shmem_page_offset = offset & ~PAGE_MASK;
563                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
564                 data_page_offset = data_ptr & ~PAGE_MASK;
565
566                 page_length = remain;
567                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
568                         page_length = PAGE_SIZE - shmem_page_offset;
569                 if ((data_page_offset + page_length) > PAGE_SIZE)
570                         page_length = PAGE_SIZE - data_page_offset;
571
572                 if (do_bit17_swizzling) {
573                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
574                                               shmem_page_offset,
575                                               user_pages[data_page_index],
576                                               data_page_offset,
577                                               page_length,
578                                               1);
579                 } else {
580                         slow_shmem_copy(user_pages[data_page_index],
581                                         data_page_offset,
582                                         obj_priv->pages[shmem_page_index],
583                                         shmem_page_offset,
584                                         page_length);
585                 }
586
587                 remain -= page_length;
588                 data_ptr += page_length;
589                 offset += page_length;
590         }
591
592 out:
593         for (i = 0; i < pinned_pages; i++) {
594                 SetPageDirty(user_pages[i]);
595                 page_cache_release(user_pages[i]);
596         }
597         drm_free_large(user_pages);
598
599         return ret;
600 }
601
602 /**
603  * Reads data from the object referenced by handle.
604  *
605  * On error, the contents of *data are undefined.
606  */
607 int
608 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
609                      struct drm_file *file_priv)
610 {
611         struct drm_i915_gem_pread *args = data;
612         struct drm_gem_object *obj;
613         struct drm_i915_gem_object *obj_priv;
614         int ret = 0;
615
616         ret = i915_mutex_lock_interruptible(dev);
617         if (ret)
618                 return ret;
619
620         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
621         if (obj == NULL) {
622                 ret = -ENOENT;
623                 goto unlock;
624         }
625         obj_priv = to_intel_bo(obj);
626
627         /* Bounds check source.  */
628         if (args->offset > obj->size || args->size > obj->size - args->offset) {
629                 ret = -EINVAL;
630                 goto out;
631         }
632
633         if (args->size == 0)
634                 goto out;
635
636         if (!access_ok(VERIFY_WRITE,
637                        (char __user *)(uintptr_t)args->data_ptr,
638                        args->size)) {
639                 ret = -EFAULT;
640                 goto out;
641         }
642
643         ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
644                                        args->size);
645         if (ret) {
646                 ret = -EFAULT;
647                 goto out;
648         }
649
650         ret = i915_gem_object_get_pages_or_evict(obj);
651         if (ret)
652                 goto out;
653
654         ret = i915_gem_object_set_cpu_read_domain_range(obj,
655                                                         args->offset,
656                                                         args->size);
657         if (ret)
658                 goto out_put;
659
660         ret = -EFAULT;
661         if (!i915_gem_object_needs_bit17_swizzle(obj))
662                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
663         if (ret == -EFAULT)
664                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
665
666 out_put:
667         i915_gem_object_put_pages(obj);
668 out:
669         drm_gem_object_unreference(obj);
670 unlock:
671         mutex_unlock(&dev->struct_mutex);
672         return ret;
673 }
674
675 /* This is the fast write path which cannot handle
676  * page faults in the source data
677  */
678
679 static inline int
680 fast_user_write(struct io_mapping *mapping,
681                 loff_t page_base, int page_offset,
682                 char __user *user_data,
683                 int length)
684 {
685         char *vaddr_atomic;
686         unsigned long unwritten;
687
688         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
689         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
690                                                       user_data, length);
691         io_mapping_unmap_atomic(vaddr_atomic);
692         return unwritten;
693 }
694
695 /* Here's the write path which can sleep for
696  * page faults
697  */
698
699 static inline void
700 slow_kernel_write(struct io_mapping *mapping,
701                   loff_t gtt_base, int gtt_offset,
702                   struct page *user_page, int user_offset,
703                   int length)
704 {
705         char __iomem *dst_vaddr;
706         char *src_vaddr;
707
708         dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
709         src_vaddr = kmap(user_page);
710
711         memcpy_toio(dst_vaddr + gtt_offset,
712                     src_vaddr + user_offset,
713                     length);
714
715         kunmap(user_page);
716         io_mapping_unmap(dst_vaddr);
717 }
718
719 static inline int
720 fast_shmem_write(struct page **pages,
721                  loff_t page_base, int page_offset,
722                  char __user *data,
723                  int length)
724 {
725         char *vaddr;
726         int ret;
727
728         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
729         ret = __copy_from_user_inatomic(vaddr + page_offset, data, length);
730         kunmap_atomic(vaddr);
731
732         return ret;
733 }
734
735 /**
736  * This is the fast pwrite path, where we copy the data directly from the
737  * user into the GTT, uncached.
738  */
739 static int
740 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
741                          struct drm_i915_gem_pwrite *args,
742                          struct drm_file *file_priv)
743 {
744         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
745         drm_i915_private_t *dev_priv = dev->dev_private;
746         ssize_t remain;
747         loff_t offset, page_base;
748         char __user *user_data;
749         int page_offset, page_length;
750
751         user_data = (char __user *) (uintptr_t) args->data_ptr;
752         remain = args->size;
753
754         obj_priv = to_intel_bo(obj);
755         offset = obj_priv->gtt_offset + args->offset;
756
757         while (remain > 0) {
758                 /* Operation in this page
759                  *
760                  * page_base = page offset within aperture
761                  * page_offset = offset within page
762                  * page_length = bytes to copy for this page
763                  */
764                 page_base = (offset & ~(PAGE_SIZE-1));
765                 page_offset = offset & (PAGE_SIZE-1);
766                 page_length = remain;
767                 if ((page_offset + remain) > PAGE_SIZE)
768                         page_length = PAGE_SIZE - page_offset;
769
770                 /* If we get a fault while copying data, then (presumably) our
771                  * source page isn't available.  Return the error and we'll
772                  * retry in the slow path.
773                  */
774                 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
775                                     page_offset, user_data, page_length))
776
777                         return -EFAULT;
778
779                 remain -= page_length;
780                 user_data += page_length;
781                 offset += page_length;
782         }
783
784         return 0;
785 }
786
787 /**
788  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
789  * the memory and maps it using kmap_atomic for copying.
790  *
791  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
792  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
793  */
794 static int
795 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
796                          struct drm_i915_gem_pwrite *args,
797                          struct drm_file *file_priv)
798 {
799         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
800         drm_i915_private_t *dev_priv = dev->dev_private;
801         ssize_t remain;
802         loff_t gtt_page_base, offset;
803         loff_t first_data_page, last_data_page, num_pages;
804         loff_t pinned_pages, i;
805         struct page **user_pages;
806         struct mm_struct *mm = current->mm;
807         int gtt_page_offset, data_page_offset, data_page_index, page_length;
808         int ret;
809         uint64_t data_ptr = args->data_ptr;
810
811         remain = args->size;
812
813         /* Pin the user pages containing the data.  We can't fault while
814          * holding the struct mutex, and all of the pwrite implementations
815          * want to hold it while dereferencing the user data.
816          */
817         first_data_page = data_ptr / PAGE_SIZE;
818         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
819         num_pages = last_data_page - first_data_page + 1;
820
821         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
822         if (user_pages == NULL)
823                 return -ENOMEM;
824
825         mutex_unlock(&dev->struct_mutex);
826         down_read(&mm->mmap_sem);
827         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
828                                       num_pages, 0, 0, user_pages, NULL);
829         up_read(&mm->mmap_sem);
830         mutex_lock(&dev->struct_mutex);
831         if (pinned_pages < num_pages) {
832                 ret = -EFAULT;
833                 goto out_unpin_pages;
834         }
835
836         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
837         if (ret)
838                 goto out_unpin_pages;
839
840         obj_priv = to_intel_bo(obj);
841         offset = obj_priv->gtt_offset + args->offset;
842
843         while (remain > 0) {
844                 /* Operation in this page
845                  *
846                  * gtt_page_base = page offset within aperture
847                  * gtt_page_offset = offset within page in aperture
848                  * data_page_index = page number in get_user_pages return
849                  * data_page_offset = offset with data_page_index page.
850                  * page_length = bytes to copy for this page
851                  */
852                 gtt_page_base = offset & PAGE_MASK;
853                 gtt_page_offset = offset & ~PAGE_MASK;
854                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
855                 data_page_offset = data_ptr & ~PAGE_MASK;
856
857                 page_length = remain;
858                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
859                         page_length = PAGE_SIZE - gtt_page_offset;
860                 if ((data_page_offset + page_length) > PAGE_SIZE)
861                         page_length = PAGE_SIZE - data_page_offset;
862
863                 slow_kernel_write(dev_priv->mm.gtt_mapping,
864                                   gtt_page_base, gtt_page_offset,
865                                   user_pages[data_page_index],
866                                   data_page_offset,
867                                   page_length);
868
869                 remain -= page_length;
870                 offset += page_length;
871                 data_ptr += page_length;
872         }
873
874 out_unpin_pages:
875         for (i = 0; i < pinned_pages; i++)
876                 page_cache_release(user_pages[i]);
877         drm_free_large(user_pages);
878
879         return ret;
880 }
881
882 /**
883  * This is the fast shmem pwrite path, which attempts to directly
884  * copy_from_user into the kmapped pages backing the object.
885  */
886 static int
887 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
888                            struct drm_i915_gem_pwrite *args,
889                            struct drm_file *file_priv)
890 {
891         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
892         ssize_t remain;
893         loff_t offset, page_base;
894         char __user *user_data;
895         int page_offset, page_length;
896
897         user_data = (char __user *) (uintptr_t) args->data_ptr;
898         remain = args->size;
899
900         obj_priv = to_intel_bo(obj);
901         offset = args->offset;
902         obj_priv->dirty = 1;
903
904         while (remain > 0) {
905                 /* Operation in this page
906                  *
907                  * page_base = page offset within aperture
908                  * page_offset = offset within page
909                  * page_length = bytes to copy for this page
910                  */
911                 page_base = (offset & ~(PAGE_SIZE-1));
912                 page_offset = offset & (PAGE_SIZE-1);
913                 page_length = remain;
914                 if ((page_offset + remain) > PAGE_SIZE)
915                         page_length = PAGE_SIZE - page_offset;
916
917                 if (fast_shmem_write(obj_priv->pages,
918                                        page_base, page_offset,
919                                        user_data, page_length))
920                         return -EFAULT;
921
922                 remain -= page_length;
923                 user_data += page_length;
924                 offset += page_length;
925         }
926
927         return 0;
928 }
929
930 /**
931  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
932  * the memory and maps it using kmap_atomic for copying.
933  *
934  * This avoids taking mmap_sem for faulting on the user's address while the
935  * struct_mutex is held.
936  */
937 static int
938 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
939                            struct drm_i915_gem_pwrite *args,
940                            struct drm_file *file_priv)
941 {
942         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
943         struct mm_struct *mm = current->mm;
944         struct page **user_pages;
945         ssize_t remain;
946         loff_t offset, pinned_pages, i;
947         loff_t first_data_page, last_data_page, num_pages;
948         int shmem_page_index, shmem_page_offset;
949         int data_page_index,  data_page_offset;
950         int page_length;
951         int ret;
952         uint64_t data_ptr = args->data_ptr;
953         int do_bit17_swizzling;
954
955         remain = args->size;
956
957         /* Pin the user pages containing the data.  We can't fault while
958          * holding the struct mutex, and all of the pwrite implementations
959          * want to hold it while dereferencing the user data.
960          */
961         first_data_page = data_ptr / PAGE_SIZE;
962         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
963         num_pages = last_data_page - first_data_page + 1;
964
965         user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
966         if (user_pages == NULL)
967                 return -ENOMEM;
968
969         mutex_unlock(&dev->struct_mutex);
970         down_read(&mm->mmap_sem);
971         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
972                                       num_pages, 0, 0, user_pages, NULL);
973         up_read(&mm->mmap_sem);
974         mutex_lock(&dev->struct_mutex);
975         if (pinned_pages < num_pages) {
976                 ret = -EFAULT;
977                 goto out;
978         }
979
980         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
981         if (ret)
982                 goto out;
983
984         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
985
986         obj_priv = to_intel_bo(obj);
987         offset = args->offset;
988         obj_priv->dirty = 1;
989
990         while (remain > 0) {
991                 /* Operation in this page
992                  *
993                  * shmem_page_index = page number within shmem file
994                  * shmem_page_offset = offset within page in shmem file
995                  * data_page_index = page number in get_user_pages return
996                  * data_page_offset = offset with data_page_index page.
997                  * page_length = bytes to copy for this page
998                  */
999                 shmem_page_index = offset / PAGE_SIZE;
1000                 shmem_page_offset = offset & ~PAGE_MASK;
1001                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
1002                 data_page_offset = data_ptr & ~PAGE_MASK;
1003
1004                 page_length = remain;
1005                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1006                         page_length = PAGE_SIZE - shmem_page_offset;
1007                 if ((data_page_offset + page_length) > PAGE_SIZE)
1008                         page_length = PAGE_SIZE - data_page_offset;
1009
1010                 if (do_bit17_swizzling) {
1011                         slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
1012                                               shmem_page_offset,
1013                                               user_pages[data_page_index],
1014                                               data_page_offset,
1015                                               page_length,
1016                                               0);
1017                 } else {
1018                         slow_shmem_copy(obj_priv->pages[shmem_page_index],
1019                                         shmem_page_offset,
1020                                         user_pages[data_page_index],
1021                                         data_page_offset,
1022                                         page_length);
1023                 }
1024
1025                 remain -= page_length;
1026                 data_ptr += page_length;
1027                 offset += page_length;
1028         }
1029
1030 out:
1031         for (i = 0; i < pinned_pages; i++)
1032                 page_cache_release(user_pages[i]);
1033         drm_free_large(user_pages);
1034
1035         return ret;
1036 }
1037
1038 /**
1039  * Writes data to the object referenced by handle.
1040  *
1041  * On error, the contents of the buffer that were to be modified are undefined.
1042  */
1043 int
1044 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1045                       struct drm_file *file)
1046 {
1047         struct drm_i915_gem_pwrite *args = data;
1048         struct drm_gem_object *obj;
1049         struct drm_i915_gem_object *obj_priv;
1050         int ret = 0;
1051
1052         ret = i915_mutex_lock_interruptible(dev);
1053         if (ret)
1054                 return ret;
1055
1056         obj = drm_gem_object_lookup(dev, file, args->handle);
1057         if (obj == NULL) {
1058                 ret = -ENOENT;
1059                 goto unlock;
1060         }
1061         obj_priv = to_intel_bo(obj);
1062
1063
1064         /* Bounds check destination. */
1065         if (args->offset > obj->size || args->size > obj->size - args->offset) {
1066                 ret = -EINVAL;
1067                 goto out;
1068         }
1069
1070         if (args->size == 0)
1071                 goto out;
1072
1073         if (!access_ok(VERIFY_READ,
1074                        (char __user *)(uintptr_t)args->data_ptr,
1075                        args->size)) {
1076                 ret = -EFAULT;
1077                 goto out;
1078         }
1079
1080         ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
1081                                       args->size);
1082         if (ret) {
1083                 ret = -EFAULT;
1084                 goto out;
1085         }
1086
1087         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1088          * it would end up going through the fenced access, and we'll get
1089          * different detiling behavior between reading and writing.
1090          * pread/pwrite currently are reading and writing from the CPU
1091          * perspective, requiring manual detiling by the client.
1092          */
1093         if (obj_priv->phys_obj)
1094                 ret = i915_gem_phys_pwrite(dev, obj, args, file);
1095         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
1096                  obj_priv->gtt_space &&
1097                  obj->write_domain != I915_GEM_DOMAIN_CPU) {
1098                 ret = i915_gem_object_pin(obj, 0, true);
1099                 if (ret)
1100                         goto out;
1101
1102                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1103                 if (ret)
1104                         goto out_unpin;
1105
1106                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1107                 if (ret == -EFAULT)
1108                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
1109
1110 out_unpin:
1111                 i915_gem_object_unpin(obj);
1112         } else {
1113                 ret = i915_gem_object_get_pages_or_evict(obj);
1114                 if (ret)
1115                         goto out;
1116
1117                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1118                 if (ret)
1119                         goto out_put;
1120
1121                 ret = -EFAULT;
1122                 if (!i915_gem_object_needs_bit17_swizzle(obj))
1123                         ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1124                 if (ret == -EFAULT)
1125                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1126
1127 out_put:
1128                 i915_gem_object_put_pages(obj);
1129         }
1130
1131 out:
1132         drm_gem_object_unreference(obj);
1133 unlock:
1134         mutex_unlock(&dev->struct_mutex);
1135         return ret;
1136 }
1137
1138 /**
1139  * Called when user space prepares to use an object with the CPU, either
1140  * through the mmap ioctl's mapping or a GTT mapping.
1141  */
1142 int
1143 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1144                           struct drm_file *file_priv)
1145 {
1146         struct drm_i915_private *dev_priv = dev->dev_private;
1147         struct drm_i915_gem_set_domain *args = data;
1148         struct drm_gem_object *obj;
1149         struct drm_i915_gem_object *obj_priv;
1150         uint32_t read_domains = args->read_domains;
1151         uint32_t write_domain = args->write_domain;
1152         int ret;
1153
1154         if (!(dev->driver->driver_features & DRIVER_GEM))
1155                 return -ENODEV;
1156
1157         /* Only handle setting domains to types used by the CPU. */
1158         if (write_domain & I915_GEM_GPU_DOMAINS)
1159                 return -EINVAL;
1160
1161         if (read_domains & I915_GEM_GPU_DOMAINS)
1162                 return -EINVAL;
1163
1164         /* Having something in the write domain implies it's in the read
1165          * domain, and only that read domain.  Enforce that in the request.
1166          */
1167         if (write_domain != 0 && read_domains != write_domain)
1168                 return -EINVAL;
1169
1170         ret = i915_mutex_lock_interruptible(dev);
1171         if (ret)
1172                 return ret;
1173
1174         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1175         if (obj == NULL) {
1176                 ret = -ENOENT;
1177                 goto unlock;
1178         }
1179         obj_priv = to_intel_bo(obj);
1180
1181         intel_mark_busy(dev, obj);
1182
1183         if (read_domains & I915_GEM_DOMAIN_GTT) {
1184                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1185
1186                 /* Update the LRU on the fence for the CPU access that's
1187                  * about to occur.
1188                  */
1189                 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1190                         struct drm_i915_fence_reg *reg =
1191                                 &dev_priv->fence_regs[obj_priv->fence_reg];
1192                         list_move_tail(&reg->lru_list,
1193                                        &dev_priv->mm.fence_list);
1194                 }
1195
1196                 /* Silently promote "you're not bound, there was nothing to do"
1197                  * to success, since the client was just asking us to
1198                  * make sure everything was done.
1199                  */
1200                 if (ret == -EINVAL)
1201                         ret = 0;
1202         } else {
1203                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1204         }
1205
1206         /* Maintain LRU order of "inactive" objects */
1207         if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1208                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1209
1210         drm_gem_object_unreference(obj);
1211 unlock:
1212         mutex_unlock(&dev->struct_mutex);
1213         return ret;
1214 }
1215
1216 /**
1217  * Called when user space has done writes to this buffer
1218  */
1219 int
1220 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1221                       struct drm_file *file_priv)
1222 {
1223         struct drm_i915_gem_sw_finish *args = data;
1224         struct drm_gem_object *obj;
1225         int ret = 0;
1226
1227         if (!(dev->driver->driver_features & DRIVER_GEM))
1228                 return -ENODEV;
1229
1230         ret = i915_mutex_lock_interruptible(dev);
1231         if (ret)
1232                 return ret;
1233
1234         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1235         if (obj == NULL) {
1236                 ret = -ENOENT;
1237                 goto unlock;
1238         }
1239
1240         /* Pinned buffers may be scanout, so flush the cache */
1241         if (to_intel_bo(obj)->pin_count)
1242                 i915_gem_object_flush_cpu_write_domain(obj);
1243
1244         drm_gem_object_unreference(obj);
1245 unlock:
1246         mutex_unlock(&dev->struct_mutex);
1247         return ret;
1248 }
1249
1250 /**
1251  * Maps the contents of an object, returning the address it is mapped
1252  * into.
1253  *
1254  * While the mapping holds a reference on the contents of the object, it doesn't
1255  * imply a ref on the object itself.
1256  */
1257 int
1258 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1259                    struct drm_file *file_priv)
1260 {
1261         struct drm_i915_gem_mmap *args = data;
1262         struct drm_gem_object *obj;
1263         loff_t offset;
1264         unsigned long addr;
1265
1266         if (!(dev->driver->driver_features & DRIVER_GEM))
1267                 return -ENODEV;
1268
1269         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1270         if (obj == NULL)
1271                 return -ENOENT;
1272
1273         offset = args->offset;
1274
1275         down_write(&current->mm->mmap_sem);
1276         addr = do_mmap(obj->filp, 0, args->size,
1277                        PROT_READ | PROT_WRITE, MAP_SHARED,
1278                        args->offset);
1279         up_write(&current->mm->mmap_sem);
1280         drm_gem_object_unreference_unlocked(obj);
1281         if (IS_ERR((void *)addr))
1282                 return addr;
1283
1284         args->addr_ptr = (uint64_t) addr;
1285
1286         return 0;
1287 }
1288
1289 /**
1290  * i915_gem_fault - fault a page into the GTT
1291  * vma: VMA in question
1292  * vmf: fault info
1293  *
1294  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1295  * from userspace.  The fault handler takes care of binding the object to
1296  * the GTT (if needed), allocating and programming a fence register (again,
1297  * only if needed based on whether the old reg is still valid or the object
1298  * is tiled) and inserting a new PTE into the faulting process.
1299  *
1300  * Note that the faulting process may involve evicting existing objects
1301  * from the GTT and/or fence registers to make room.  So performance may
1302  * suffer if the GTT working set is large or there are few fence registers
1303  * left.
1304  */
1305 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1306 {
1307         struct drm_gem_object *obj = vma->vm_private_data;
1308         struct drm_device *dev = obj->dev;
1309         drm_i915_private_t *dev_priv = dev->dev_private;
1310         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1311         pgoff_t page_offset;
1312         unsigned long pfn;
1313         int ret = 0;
1314         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1315
1316         /* We don't use vmf->pgoff since that has the fake offset */
1317         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1318                 PAGE_SHIFT;
1319
1320         /* Now bind it into the GTT if needed */
1321         mutex_lock(&dev->struct_mutex);
1322         BUG_ON(obj_priv->pin_count && !obj_priv->pin_mappable);
1323         if (!i915_gem_object_cpu_accessible(obj_priv))
1324                 i915_gem_object_unbind(obj);
1325
1326         if (!obj_priv->gtt_space) {
1327                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1328                 if (ret)
1329                         goto unlock;
1330
1331                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1332                 if (ret)
1333                         goto unlock;
1334         }
1335
1336         if (!obj_priv->fault_mappable) {
1337                 obj_priv->fault_mappable = true;
1338                 i915_gem_info_update_mappable(dev_priv, obj, true);
1339         }
1340
1341         /* Need a new fence register? */
1342         if (obj_priv->tiling_mode != I915_TILING_NONE) {
1343                 ret = i915_gem_object_get_fence_reg(obj, true);
1344                 if (ret)
1345                         goto unlock;
1346         }
1347
1348         if (i915_gem_object_is_inactive(obj_priv))
1349                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1350
1351         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1352                 page_offset;
1353
1354         /* Finally, remap it using the new GTT offset */
1355         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1356 unlock:
1357         mutex_unlock(&dev->struct_mutex);
1358
1359         switch (ret) {
1360         case 0:
1361         case -ERESTARTSYS:
1362                 return VM_FAULT_NOPAGE;
1363         case -ENOMEM:
1364         case -EAGAIN:
1365                 return VM_FAULT_OOM;
1366         default:
1367                 return VM_FAULT_SIGBUS;
1368         }
1369 }
1370
1371 /**
1372  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1373  * @obj: obj in question
1374  *
1375  * GEM memory mapping works by handing back to userspace a fake mmap offset
1376  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1377  * up the object based on the offset and sets up the various memory mapping
1378  * structures.
1379  *
1380  * This routine allocates and attaches a fake offset for @obj.
1381  */
1382 static int
1383 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1384 {
1385         struct drm_device *dev = obj->dev;
1386         struct drm_gem_mm *mm = dev->mm_private;
1387         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1388         struct drm_map_list *list;
1389         struct drm_local_map *map;
1390         int ret = 0;
1391
1392         /* Set the object up for mmap'ing */
1393         list = &obj->map_list;
1394         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1395         if (!list->map)
1396                 return -ENOMEM;
1397
1398         map = list->map;
1399         map->type = _DRM_GEM;
1400         map->size = obj->size;
1401         map->handle = obj;
1402
1403         /* Get a DRM GEM mmap offset allocated... */
1404         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1405                                                     obj->size / PAGE_SIZE, 0, 0);
1406         if (!list->file_offset_node) {
1407                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1408                 ret = -ENOSPC;
1409                 goto out_free_list;
1410         }
1411
1412         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1413                                                   obj->size / PAGE_SIZE, 0);
1414         if (!list->file_offset_node) {
1415                 ret = -ENOMEM;
1416                 goto out_free_list;
1417         }
1418
1419         list->hash.key = list->file_offset_node->start;
1420         ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1421         if (ret) {
1422                 DRM_ERROR("failed to add to map hash\n");
1423                 goto out_free_mm;
1424         }
1425
1426         /* By now we should be all set, any drm_mmap request on the offset
1427          * below will get to our mmap & fault handler */
1428         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1429
1430         return 0;
1431
1432 out_free_mm:
1433         drm_mm_put_block(list->file_offset_node);
1434 out_free_list:
1435         kfree(list->map);
1436
1437         return ret;
1438 }
1439
1440 /**
1441  * i915_gem_release_mmap - remove physical page mappings
1442  * @obj: obj in question
1443  *
1444  * Preserve the reservation of the mmapping with the DRM core code, but
1445  * relinquish ownership of the pages back to the system.
1446  *
1447  * It is vital that we remove the page mapping if we have mapped a tiled
1448  * object through the GTT and then lose the fence register due to
1449  * resource pressure. Similarly if the object has been moved out of the
1450  * aperture, than pages mapped into userspace must be revoked. Removing the
1451  * mapping will then trigger a page fault on the next user access, allowing
1452  * fixup by i915_gem_fault().
1453  */
1454 void
1455 i915_gem_release_mmap(struct drm_gem_object *obj)
1456 {
1457         struct drm_device *dev = obj->dev;
1458         struct drm_i915_private *dev_priv = dev->dev_private;
1459         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1460
1461         if (dev->dev_mapping)
1462                 unmap_mapping_range(dev->dev_mapping,
1463                                     obj_priv->mmap_offset, obj->size, 1);
1464
1465         if (obj_priv->fault_mappable) {
1466                 obj_priv->fault_mappable = false;
1467                 i915_gem_info_update_mappable(dev_priv, obj, false);
1468         }
1469 }
1470
1471 static void
1472 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1473 {
1474         struct drm_device *dev = obj->dev;
1475         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1476         struct drm_gem_mm *mm = dev->mm_private;
1477         struct drm_map_list *list;
1478
1479         list = &obj->map_list;
1480         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1481
1482         if (list->file_offset_node) {
1483                 drm_mm_put_block(list->file_offset_node);
1484                 list->file_offset_node = NULL;
1485         }
1486
1487         if (list->map) {
1488                 kfree(list->map);
1489                 list->map = NULL;
1490         }
1491
1492         obj_priv->mmap_offset = 0;
1493 }
1494
1495 /**
1496  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1497  * @obj: object to check
1498  *
1499  * Return the required GTT alignment for an object, taking into account
1500  * potential fence register mapping if needed.
1501  */
1502 static uint32_t
1503 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1504 {
1505         struct drm_device *dev = obj->dev;
1506         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1507         int start, i;
1508
1509         /*
1510          * Minimum alignment is 4k (GTT page size), but might be greater
1511          * if a fence register is needed for the object.
1512          */
1513         if (INTEL_INFO(dev)->gen >= 4 || obj_priv->tiling_mode == I915_TILING_NONE)
1514                 return 4096;
1515
1516         /*
1517          * Previous chips need to be aligned to the size of the smallest
1518          * fence register that can contain the object.
1519          */
1520         if (INTEL_INFO(dev)->gen == 3)
1521                 start = 1024*1024;
1522         else
1523                 start = 512*1024;
1524
1525         for (i = start; i < obj->size; i <<= 1)
1526                 ;
1527
1528         return i;
1529 }
1530
1531 /**
1532  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1533  * @dev: DRM device
1534  * @data: GTT mapping ioctl data
1535  * @file_priv: GEM object info
1536  *
1537  * Simply returns the fake offset to userspace so it can mmap it.
1538  * The mmap call will end up in drm_gem_mmap(), which will set things
1539  * up so we can get faults in the handler above.
1540  *
1541  * The fault handler will take care of binding the object into the GTT
1542  * (since it may have been evicted to make room for something), allocating
1543  * a fence register, and mapping the appropriate aperture address into
1544  * userspace.
1545  */
1546 int
1547 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1548                         struct drm_file *file_priv)
1549 {
1550         struct drm_i915_gem_mmap_gtt *args = data;
1551         struct drm_gem_object *obj;
1552         struct drm_i915_gem_object *obj_priv;
1553         int ret;
1554
1555         if (!(dev->driver->driver_features & DRIVER_GEM))
1556                 return -ENODEV;
1557
1558         ret = i915_mutex_lock_interruptible(dev);
1559         if (ret)
1560                 return ret;
1561
1562         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1563         if (obj == NULL) {
1564                 ret = -ENOENT;
1565                 goto unlock;
1566         }
1567         obj_priv = to_intel_bo(obj);
1568
1569         if (obj_priv->madv != I915_MADV_WILLNEED) {
1570                 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1571                 ret = -EINVAL;
1572                 goto out;
1573         }
1574
1575         if (!obj_priv->mmap_offset) {
1576                 ret = i915_gem_create_mmap_offset(obj);
1577                 if (ret)
1578                         goto out;
1579         }
1580
1581         args->offset = obj_priv->mmap_offset;
1582
1583         /*
1584          * Pull it into the GTT so that we have a page list (makes the
1585          * initial fault faster and any subsequent flushing possible).
1586          */
1587         if (!obj_priv->agp_mem) {
1588                 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1589                 if (ret)
1590                         goto out;
1591         }
1592
1593 out:
1594         drm_gem_object_unreference(obj);
1595 unlock:
1596         mutex_unlock(&dev->struct_mutex);
1597         return ret;
1598 }
1599
1600 static void
1601 i915_gem_object_put_pages(struct drm_gem_object *obj)
1602 {
1603         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1604         int page_count = obj->size / PAGE_SIZE;
1605         int i;
1606
1607         BUG_ON(obj_priv->pages_refcount == 0);
1608         BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1609
1610         if (--obj_priv->pages_refcount != 0)
1611                 return;
1612
1613         if (obj_priv->tiling_mode != I915_TILING_NONE)
1614                 i915_gem_object_save_bit_17_swizzle(obj);
1615
1616         if (obj_priv->madv == I915_MADV_DONTNEED)
1617                 obj_priv->dirty = 0;
1618
1619         for (i = 0; i < page_count; i++) {
1620                 if (obj_priv->dirty)
1621                         set_page_dirty(obj_priv->pages[i]);
1622
1623                 if (obj_priv->madv == I915_MADV_WILLNEED)
1624                         mark_page_accessed(obj_priv->pages[i]);
1625
1626                 page_cache_release(obj_priv->pages[i]);
1627         }
1628         obj_priv->dirty = 0;
1629
1630         drm_free_large(obj_priv->pages);
1631         obj_priv->pages = NULL;
1632 }
1633
1634 static uint32_t
1635 i915_gem_next_request_seqno(struct drm_device *dev,
1636                             struct intel_ring_buffer *ring)
1637 {
1638         drm_i915_private_t *dev_priv = dev->dev_private;
1639
1640         ring->outstanding_lazy_request = true;
1641         return dev_priv->next_seqno;
1642 }
1643
1644 static void
1645 i915_gem_object_move_to_active(struct drm_gem_object *obj,
1646                                struct intel_ring_buffer *ring)
1647 {
1648         struct drm_device *dev = obj->dev;
1649         struct drm_i915_private *dev_priv = dev->dev_private;
1650         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1651         uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1652
1653         BUG_ON(ring == NULL);
1654         obj_priv->ring = ring;
1655
1656         /* Add a reference if we're newly entering the active list. */
1657         if (!obj_priv->active) {
1658                 drm_gem_object_reference(obj);
1659                 obj_priv->active = 1;
1660         }
1661
1662         /* Move from whatever list we were on to the tail of execution. */
1663         list_move_tail(&obj_priv->mm_list, &dev_priv->mm.active_list);
1664         list_move_tail(&obj_priv->ring_list, &ring->active_list);
1665         obj_priv->last_rendering_seqno = seqno;
1666 }
1667
1668 static void
1669 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1670 {
1671         struct drm_device *dev = obj->dev;
1672         drm_i915_private_t *dev_priv = dev->dev_private;
1673         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1674
1675         BUG_ON(!obj_priv->active);
1676         list_move_tail(&obj_priv->mm_list, &dev_priv->mm.flushing_list);
1677         list_del_init(&obj_priv->ring_list);
1678         obj_priv->last_rendering_seqno = 0;
1679 }
1680
1681 /* Immediately discard the backing storage */
1682 static void
1683 i915_gem_object_truncate(struct drm_gem_object *obj)
1684 {
1685         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1686         struct inode *inode;
1687
1688         /* Our goal here is to return as much of the memory as
1689          * is possible back to the system as we are called from OOM.
1690          * To do this we must instruct the shmfs to drop all of its
1691          * backing pages, *now*. Here we mirror the actions taken
1692          * when by shmem_delete_inode() to release the backing store.
1693          */
1694         inode = obj->filp->f_path.dentry->d_inode;
1695         truncate_inode_pages(inode->i_mapping, 0);
1696         if (inode->i_op->truncate_range)
1697                 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
1698
1699         obj_priv->madv = __I915_MADV_PURGED;
1700 }
1701
1702 static inline int
1703 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1704 {
1705         return obj_priv->madv == I915_MADV_DONTNEED;
1706 }
1707
1708 static void
1709 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1710 {
1711         struct drm_device *dev = obj->dev;
1712         drm_i915_private_t *dev_priv = dev->dev_private;
1713         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1714
1715         if (obj_priv->pin_count != 0)
1716                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.pinned_list);
1717         else
1718                 list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1719         list_del_init(&obj_priv->ring_list);
1720
1721         BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1722
1723         obj_priv->last_rendering_seqno = 0;
1724         obj_priv->ring = NULL;
1725         if (obj_priv->active) {
1726                 obj_priv->active = 0;
1727                 drm_gem_object_unreference(obj);
1728         }
1729         WARN_ON(i915_verify_lists(dev));
1730 }
1731
1732 static void
1733 i915_gem_process_flushing_list(struct drm_device *dev,
1734                                uint32_t flush_domains,
1735                                struct intel_ring_buffer *ring)
1736 {
1737         drm_i915_private_t *dev_priv = dev->dev_private;
1738         struct drm_i915_gem_object *obj_priv, *next;
1739
1740         list_for_each_entry_safe(obj_priv, next,
1741                                  &ring->gpu_write_list,
1742                                  gpu_write_list) {
1743                 struct drm_gem_object *obj = &obj_priv->base;
1744
1745                 if (obj->write_domain & flush_domains) {
1746                         uint32_t old_write_domain = obj->write_domain;
1747
1748                         obj->write_domain = 0;
1749                         list_del_init(&obj_priv->gpu_write_list);
1750                         i915_gem_object_move_to_active(obj, ring);
1751
1752                         /* update the fence lru list */
1753                         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1754                                 struct drm_i915_fence_reg *reg =
1755                                         &dev_priv->fence_regs[obj_priv->fence_reg];
1756                                 list_move_tail(&reg->lru_list,
1757                                                 &dev_priv->mm.fence_list);
1758                         }
1759
1760                         trace_i915_gem_object_change_domain(obj,
1761                                                             obj->read_domains,
1762                                                             old_write_domain);
1763                 }
1764         }
1765 }
1766
1767 int
1768 i915_add_request(struct drm_device *dev,
1769                  struct drm_file *file,
1770                  struct drm_i915_gem_request *request,
1771                  struct intel_ring_buffer *ring)
1772 {
1773         drm_i915_private_t *dev_priv = dev->dev_private;
1774         struct drm_i915_file_private *file_priv = NULL;
1775         uint32_t seqno;
1776         int was_empty;
1777         int ret;
1778
1779         BUG_ON(request == NULL);
1780
1781         if (file != NULL)
1782                 file_priv = file->driver_priv;
1783
1784         ret = ring->add_request(ring, &seqno);
1785         if (ret)
1786             return ret;
1787
1788         ring->outstanding_lazy_request = false;
1789
1790         request->seqno = seqno;
1791         request->ring = ring;
1792         request->emitted_jiffies = jiffies;
1793         was_empty = list_empty(&ring->request_list);
1794         list_add_tail(&request->list, &ring->request_list);
1795
1796         if (file_priv) {
1797                 spin_lock(&file_priv->mm.lock);
1798                 request->file_priv = file_priv;
1799                 list_add_tail(&request->client_list,
1800                               &file_priv->mm.request_list);
1801                 spin_unlock(&file_priv->mm.lock);
1802         }
1803
1804         if (!dev_priv->mm.suspended) {
1805                 mod_timer(&dev_priv->hangcheck_timer,
1806                           jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1807                 if (was_empty)
1808                         queue_delayed_work(dev_priv->wq,
1809                                            &dev_priv->mm.retire_work, HZ);
1810         }
1811         return 0;
1812 }
1813
1814 /**
1815  * Command execution barrier
1816  *
1817  * Ensures that all commands in the ring are finished
1818  * before signalling the CPU
1819  */
1820 static void
1821 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1822 {
1823         uint32_t flush_domains = 0;
1824
1825         /* The sampler always gets flushed on i965 (sigh) */
1826         if (INTEL_INFO(dev)->gen >= 4)
1827                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1828
1829         ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
1830 }
1831
1832 static inline void
1833 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1834 {
1835         struct drm_i915_file_private *file_priv = request->file_priv;
1836
1837         if (!file_priv)
1838                 return;
1839
1840         spin_lock(&file_priv->mm.lock);
1841         list_del(&request->client_list);
1842         request->file_priv = NULL;
1843         spin_unlock(&file_priv->mm.lock);
1844 }
1845
1846 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1847                                       struct intel_ring_buffer *ring)
1848 {
1849         while (!list_empty(&ring->request_list)) {
1850                 struct drm_i915_gem_request *request;
1851
1852                 request = list_first_entry(&ring->request_list,
1853                                            struct drm_i915_gem_request,
1854                                            list);
1855
1856                 list_del(&request->list);
1857                 i915_gem_request_remove_from_client(request);
1858                 kfree(request);
1859         }
1860
1861         while (!list_empty(&ring->active_list)) {
1862                 struct drm_i915_gem_object *obj_priv;
1863
1864                 obj_priv = list_first_entry(&ring->active_list,
1865                                             struct drm_i915_gem_object,
1866                                             ring_list);
1867
1868                 obj_priv->base.write_domain = 0;
1869                 list_del_init(&obj_priv->gpu_write_list);
1870                 i915_gem_object_move_to_inactive(&obj_priv->base);
1871         }
1872 }
1873
1874 void i915_gem_reset(struct drm_device *dev)
1875 {
1876         struct drm_i915_private *dev_priv = dev->dev_private;
1877         struct drm_i915_gem_object *obj_priv;
1878         int i;
1879
1880         i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
1881         i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
1882         i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
1883
1884         /* Remove anything from the flushing lists. The GPU cache is likely
1885          * to be lost on reset along with the data, so simply move the
1886          * lost bo to the inactive list.
1887          */
1888         while (!list_empty(&dev_priv->mm.flushing_list)) {
1889                 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
1890                                             struct drm_i915_gem_object,
1891                                             mm_list);
1892
1893                 obj_priv->base.write_domain = 0;
1894                 list_del_init(&obj_priv->gpu_write_list);
1895                 i915_gem_object_move_to_inactive(&obj_priv->base);
1896         }
1897
1898         /* Move everything out of the GPU domains to ensure we do any
1899          * necessary invalidation upon reuse.
1900          */
1901         list_for_each_entry(obj_priv,
1902                             &dev_priv->mm.inactive_list,
1903                             mm_list)
1904         {
1905                 obj_priv->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1906         }
1907
1908         /* The fence registers are invalidated so clear them out */
1909         for (i = 0; i < 16; i++) {
1910                 struct drm_i915_fence_reg *reg;
1911
1912                 reg = &dev_priv->fence_regs[i];
1913                 if (!reg->obj)
1914                         continue;
1915
1916                 i915_gem_clear_fence_reg(reg->obj);
1917         }
1918 }
1919
1920 /**
1921  * This function clears the request list as sequence numbers are passed.
1922  */
1923 static void
1924 i915_gem_retire_requests_ring(struct drm_device *dev,
1925                               struct intel_ring_buffer *ring)
1926 {
1927         drm_i915_private_t *dev_priv = dev->dev_private;
1928         uint32_t seqno;
1929
1930         if (!ring->status_page.page_addr ||
1931             list_empty(&ring->request_list))
1932                 return;
1933
1934         WARN_ON(i915_verify_lists(dev));
1935
1936         seqno = ring->get_seqno(ring);
1937         while (!list_empty(&ring->request_list)) {
1938                 struct drm_i915_gem_request *request;
1939
1940                 request = list_first_entry(&ring->request_list,
1941                                            struct drm_i915_gem_request,
1942                                            list);
1943
1944                 if (!i915_seqno_passed(seqno, request->seqno))
1945                         break;
1946
1947                 trace_i915_gem_request_retire(dev, request->seqno);
1948
1949                 list_del(&request->list);
1950                 i915_gem_request_remove_from_client(request);
1951                 kfree(request);
1952         }
1953
1954         /* Move any buffers on the active list that are no longer referenced
1955          * by the ringbuffer to the flushing/inactive lists as appropriate.
1956          */
1957         while (!list_empty(&ring->active_list)) {
1958                 struct drm_gem_object *obj;
1959                 struct drm_i915_gem_object *obj_priv;
1960
1961                 obj_priv = list_first_entry(&ring->active_list,
1962                                             struct drm_i915_gem_object,
1963                                             ring_list);
1964
1965                 if (!i915_seqno_passed(seqno, obj_priv->last_rendering_seqno))
1966                         break;
1967
1968                 obj = &obj_priv->base;
1969                 if (obj->write_domain != 0)
1970                         i915_gem_object_move_to_flushing(obj);
1971                 else
1972                         i915_gem_object_move_to_inactive(obj);
1973         }
1974
1975         if (unlikely (dev_priv->trace_irq_seqno &&
1976                       i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1977                 ring->user_irq_put(ring);
1978                 dev_priv->trace_irq_seqno = 0;
1979         }
1980
1981         WARN_ON(i915_verify_lists(dev));
1982 }
1983
1984 void
1985 i915_gem_retire_requests(struct drm_device *dev)
1986 {
1987         drm_i915_private_t *dev_priv = dev->dev_private;
1988
1989         if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1990             struct drm_i915_gem_object *obj_priv, *tmp;
1991
1992             /* We must be careful that during unbind() we do not
1993              * accidentally infinitely recurse into retire requests.
1994              * Currently:
1995              *   retire -> free -> unbind -> wait -> retire_ring
1996              */
1997             list_for_each_entry_safe(obj_priv, tmp,
1998                                      &dev_priv->mm.deferred_free_list,
1999                                      mm_list)
2000                     i915_gem_free_object_tail(&obj_priv->base);
2001         }
2002
2003         i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
2004         i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
2005         i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
2006 }
2007
2008 static void
2009 i915_gem_retire_work_handler(struct work_struct *work)
2010 {
2011         drm_i915_private_t *dev_priv;
2012         struct drm_device *dev;
2013
2014         dev_priv = container_of(work, drm_i915_private_t,
2015                                 mm.retire_work.work);
2016         dev = dev_priv->dev;
2017
2018         /* Come back later if the device is busy... */
2019         if (!mutex_trylock(&dev->struct_mutex)) {
2020                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2021                 return;
2022         }
2023
2024         i915_gem_retire_requests(dev);
2025
2026         if (!dev_priv->mm.suspended &&
2027                 (!list_empty(&dev_priv->render_ring.request_list) ||
2028                  !list_empty(&dev_priv->bsd_ring.request_list) ||
2029                  !list_empty(&dev_priv->blt_ring.request_list)))
2030                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2031         mutex_unlock(&dev->struct_mutex);
2032 }
2033
2034 int
2035 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
2036                      bool interruptible, struct intel_ring_buffer *ring)
2037 {
2038         drm_i915_private_t *dev_priv = dev->dev_private;
2039         u32 ier;
2040         int ret = 0;
2041
2042         BUG_ON(seqno == 0);
2043
2044         if (atomic_read(&dev_priv->mm.wedged))
2045                 return -EAGAIN;
2046
2047         if (ring->outstanding_lazy_request) {
2048                 struct drm_i915_gem_request *request;
2049
2050                 request = kzalloc(sizeof(*request), GFP_KERNEL);
2051                 if (request == NULL)
2052                         return -ENOMEM;
2053
2054                 ret = i915_add_request(dev, NULL, request, ring);
2055                 if (ret) {
2056                         kfree(request);
2057                         return ret;
2058                 }
2059
2060                 seqno = request->seqno;
2061         }
2062         BUG_ON(seqno == dev_priv->next_seqno);
2063
2064         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2065                 if (HAS_PCH_SPLIT(dev))
2066                         ier = I915_READ(DEIER) | I915_READ(GTIER);
2067                 else
2068                         ier = I915_READ(IER);
2069                 if (!ier) {
2070                         DRM_ERROR("something (likely vbetool) disabled "
2071                                   "interrupts, re-enabling\n");
2072                         i915_driver_irq_preinstall(dev);
2073                         i915_driver_irq_postinstall(dev);
2074                 }
2075
2076                 trace_i915_gem_request_wait_begin(dev, seqno);
2077
2078                 ring->waiting_seqno = seqno;
2079                 ring->user_irq_get(ring);
2080                 if (interruptible)
2081                         ret = wait_event_interruptible(ring->irq_queue,
2082                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2083                                 || atomic_read(&dev_priv->mm.wedged));
2084                 else
2085                         wait_event(ring->irq_queue,
2086                                 i915_seqno_passed(ring->get_seqno(ring), seqno)
2087                                 || atomic_read(&dev_priv->mm.wedged));
2088
2089                 ring->user_irq_put(ring);
2090                 ring->waiting_seqno = 0;
2091
2092                 trace_i915_gem_request_wait_end(dev, seqno);
2093         }
2094         if (atomic_read(&dev_priv->mm.wedged))
2095                 ret = -EAGAIN;
2096
2097         if (ret && ret != -ERESTARTSYS)
2098                 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2099                           __func__, ret, seqno, ring->get_seqno(ring),
2100                           dev_priv->next_seqno);
2101
2102         /* Directly dispatch request retiring.  While we have the work queue
2103          * to handle this, the waiter on a request often wants an associated
2104          * buffer to have made it to the inactive list, and we would need
2105          * a separate wait queue to handle that.
2106          */
2107         if (ret == 0)
2108                 i915_gem_retire_requests_ring(dev, ring);
2109
2110         return ret;
2111 }
2112
2113 /**
2114  * Waits for a sequence number to be signaled, and cleans up the
2115  * request and object lists appropriately for that event.
2116  */
2117 static int
2118 i915_wait_request(struct drm_device *dev, uint32_t seqno,
2119                   struct intel_ring_buffer *ring)
2120 {
2121         return i915_do_wait_request(dev, seqno, 1, ring);
2122 }
2123
2124 static void
2125 i915_gem_flush_ring(struct drm_device *dev,
2126                     struct drm_file *file_priv,
2127                     struct intel_ring_buffer *ring,
2128                     uint32_t invalidate_domains,
2129                     uint32_t flush_domains)
2130 {
2131         ring->flush(ring, invalidate_domains, flush_domains);
2132         i915_gem_process_flushing_list(dev, flush_domains, ring);
2133 }
2134
2135 static void
2136 i915_gem_flush(struct drm_device *dev,
2137                struct drm_file *file_priv,
2138                uint32_t invalidate_domains,
2139                uint32_t flush_domains,
2140                uint32_t flush_rings)
2141 {
2142         drm_i915_private_t *dev_priv = dev->dev_private;
2143
2144         if (flush_domains & I915_GEM_DOMAIN_CPU)
2145                 drm_agp_chipset_flush(dev);
2146
2147         if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
2148                 if (flush_rings & RING_RENDER)
2149                         i915_gem_flush_ring(dev, file_priv,
2150                                             &dev_priv->render_ring,
2151                                             invalidate_domains, flush_domains);
2152                 if (flush_rings & RING_BSD)
2153                         i915_gem_flush_ring(dev, file_priv,
2154                                             &dev_priv->bsd_ring,
2155                                             invalidate_domains, flush_domains);
2156                 if (flush_rings & RING_BLT)
2157                         i915_gem_flush_ring(dev, file_priv,
2158                                             &dev_priv->blt_ring,
2159                                             invalidate_domains, flush_domains);
2160         }
2161 }
2162
2163 /**
2164  * Ensures that all rendering to the object has completed and the object is
2165  * safe to unbind from the GTT or access from the CPU.
2166  */
2167 static int
2168 i915_gem_object_wait_rendering(struct drm_gem_object *obj,
2169                                bool interruptible)
2170 {
2171         struct drm_device *dev = obj->dev;
2172         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2173         int ret;
2174
2175         /* This function only exists to support waiting for existing rendering,
2176          * not for emitting required flushes.
2177          */
2178         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
2179
2180         /* If there is rendering queued on the buffer being evicted, wait for
2181          * it.
2182          */
2183         if (obj_priv->active) {
2184                 ret = i915_do_wait_request(dev,
2185                                            obj_priv->last_rendering_seqno,
2186                                            interruptible,
2187                                            obj_priv->ring);
2188                 if (ret)
2189                         return ret;
2190         }
2191
2192         return 0;
2193 }
2194
2195 /**
2196  * Unbinds an object from the GTT aperture.
2197  */
2198 int
2199 i915_gem_object_unbind(struct drm_gem_object *obj)
2200 {
2201         struct drm_device *dev = obj->dev;
2202         struct drm_i915_private *dev_priv = dev->dev_private;
2203         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2204         int ret = 0;
2205
2206         if (obj_priv->gtt_space == NULL)
2207                 return 0;
2208
2209         if (obj_priv->pin_count != 0) {
2210                 DRM_ERROR("Attempting to unbind pinned buffer\n");
2211                 return -EINVAL;
2212         }
2213
2214         /* blow away mappings if mapped through GTT */
2215         i915_gem_release_mmap(obj);
2216
2217         /* Move the object to the CPU domain to ensure that
2218          * any possible CPU writes while it's not in the GTT
2219          * are flushed when we go to remap it. This will
2220          * also ensure that all pending GPU writes are finished
2221          * before we unbind.
2222          */
2223         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2224         if (ret == -ERESTARTSYS)
2225                 return ret;
2226         /* Continue on if we fail due to EIO, the GPU is hung so we
2227          * should be safe and we need to cleanup or else we might
2228          * cause memory corruption through use-after-free.
2229          */
2230         if (ret) {
2231                 i915_gem_clflush_object(obj);
2232                 obj->read_domains = obj->write_domain = I915_GEM_DOMAIN_CPU;
2233         }
2234
2235         /* release the fence reg _after_ flushing */
2236         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
2237                 i915_gem_clear_fence_reg(obj);
2238
2239         drm_unbind_agp(obj_priv->agp_mem);
2240         drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
2241
2242         i915_gem_object_put_pages(obj);
2243         BUG_ON(obj_priv->pages_refcount);
2244
2245         i915_gem_info_remove_gtt(dev_priv, obj);
2246         list_del_init(&obj_priv->mm_list);
2247
2248         drm_mm_put_block(obj_priv->gtt_space);
2249         obj_priv->gtt_space = NULL;
2250         obj_priv->gtt_offset = 0;
2251
2252         if (i915_gem_object_is_purgeable(obj_priv))
2253                 i915_gem_object_truncate(obj);
2254
2255         trace_i915_gem_object_unbind(obj);
2256
2257         return ret;
2258 }
2259
2260 static int i915_ring_idle(struct drm_device *dev,
2261                           struct intel_ring_buffer *ring)
2262 {
2263         if (list_empty(&ring->gpu_write_list))
2264                 return 0;
2265
2266         i915_gem_flush_ring(dev, NULL, ring,
2267                             I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2268         return i915_wait_request(dev,
2269                                  i915_gem_next_request_seqno(dev, ring),
2270                                  ring);
2271 }
2272
2273 int
2274 i915_gpu_idle(struct drm_device *dev)
2275 {
2276         drm_i915_private_t *dev_priv = dev->dev_private;
2277         bool lists_empty;
2278         int ret;
2279
2280         lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2281                        list_empty(&dev_priv->render_ring.active_list) &&
2282                        list_empty(&dev_priv->bsd_ring.active_list) &&
2283                        list_empty(&dev_priv->blt_ring.active_list));
2284         if (lists_empty)
2285                 return 0;
2286
2287         /* Flush everything onto the inactive list. */
2288         ret = i915_ring_idle(dev, &dev_priv->render_ring);
2289         if (ret)
2290                 return ret;
2291
2292         ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
2293         if (ret)
2294                 return ret;
2295
2296         ret = i915_ring_idle(dev, &dev_priv->blt_ring);
2297         if (ret)
2298                 return ret;
2299
2300         return 0;
2301 }
2302
2303 static int
2304 i915_gem_object_get_pages(struct drm_gem_object *obj,
2305                           gfp_t gfpmask)
2306 {
2307         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2308         int page_count, i;
2309         struct address_space *mapping;
2310         struct inode *inode;
2311         struct page *page;
2312
2313         BUG_ON(obj_priv->pages_refcount
2314                         == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2315
2316         if (obj_priv->pages_refcount++ != 0)
2317                 return 0;
2318
2319         /* Get the list of pages out of our struct file.  They'll be pinned
2320          * at this point until we release them.
2321          */
2322         page_count = obj->size / PAGE_SIZE;
2323         BUG_ON(obj_priv->pages != NULL);
2324         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2325         if (obj_priv->pages == NULL) {
2326                 obj_priv->pages_refcount--;
2327                 return -ENOMEM;
2328         }
2329
2330         inode = obj->filp->f_path.dentry->d_inode;
2331         mapping = inode->i_mapping;
2332         for (i = 0; i < page_count; i++) {
2333                 page = read_cache_page_gfp(mapping, i,
2334                                            GFP_HIGHUSER |
2335                                            __GFP_COLD |
2336                                            __GFP_RECLAIMABLE |
2337                                            gfpmask);
2338                 if (IS_ERR(page))
2339                         goto err_pages;
2340
2341                 obj_priv->pages[i] = page;
2342         }
2343
2344         if (obj_priv->tiling_mode != I915_TILING_NONE)
2345                 i915_gem_object_do_bit_17_swizzle(obj);
2346
2347         return 0;
2348
2349 err_pages:
2350         while (i--)
2351                 page_cache_release(obj_priv->pages[i]);
2352
2353         drm_free_large(obj_priv->pages);
2354         obj_priv->pages = NULL;
2355         obj_priv->pages_refcount--;
2356         return PTR_ERR(page);
2357 }
2358
2359 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2360 {
2361         struct drm_gem_object *obj = reg->obj;
2362         struct drm_device *dev = obj->dev;
2363         drm_i915_private_t *dev_priv = dev->dev_private;
2364         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2365         int regnum = obj_priv->fence_reg;
2366         uint64_t val;
2367
2368         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2369                     0xfffff000) << 32;
2370         val |= obj_priv->gtt_offset & 0xfffff000;
2371         val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2372                 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2373
2374         if (obj_priv->tiling_mode == I915_TILING_Y)
2375                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2376         val |= I965_FENCE_REG_VALID;
2377
2378         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2379 }
2380
2381 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2382 {
2383         struct drm_gem_object *obj = reg->obj;
2384         struct drm_device *dev = obj->dev;
2385         drm_i915_private_t *dev_priv = dev->dev_private;
2386         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2387         int regnum = obj_priv->fence_reg;
2388         uint64_t val;
2389
2390         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2391                     0xfffff000) << 32;
2392         val |= obj_priv->gtt_offset & 0xfffff000;
2393         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2394         if (obj_priv->tiling_mode == I915_TILING_Y)
2395                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2396         val |= I965_FENCE_REG_VALID;
2397
2398         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2399 }
2400
2401 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2402 {
2403         struct drm_gem_object *obj = reg->obj;
2404         struct drm_device *dev = obj->dev;
2405         drm_i915_private_t *dev_priv = dev->dev_private;
2406         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2407         int regnum = obj_priv->fence_reg;
2408         int tile_width;
2409         uint32_t fence_reg, val;
2410         uint32_t pitch_val;
2411
2412         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2413             (obj_priv->gtt_offset & (obj->size - 1))) {
2414                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2415                      __func__, obj_priv->gtt_offset, obj->size);
2416                 return;
2417         }
2418
2419         if (obj_priv->tiling_mode == I915_TILING_Y &&
2420             HAS_128_BYTE_Y_TILING(dev))
2421                 tile_width = 128;
2422         else
2423                 tile_width = 512;
2424
2425         /* Note: pitch better be a power of two tile widths */
2426         pitch_val = obj_priv->stride / tile_width;
2427         pitch_val = ffs(pitch_val) - 1;
2428
2429         if (obj_priv->tiling_mode == I915_TILING_Y &&
2430             HAS_128_BYTE_Y_TILING(dev))
2431                 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2432         else
2433                 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2434
2435         val = obj_priv->gtt_offset;
2436         if (obj_priv->tiling_mode == I915_TILING_Y)
2437                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2438         val |= I915_FENCE_SIZE_BITS(obj->size);
2439         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2440         val |= I830_FENCE_REG_VALID;
2441
2442         if (regnum < 8)
2443                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2444         else
2445                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2446         I915_WRITE(fence_reg, val);
2447 }
2448
2449 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2450 {
2451         struct drm_gem_object *obj = reg->obj;
2452         struct drm_device *dev = obj->dev;
2453         drm_i915_private_t *dev_priv = dev->dev_private;
2454         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2455         int regnum = obj_priv->fence_reg;
2456         uint32_t val;
2457         uint32_t pitch_val;
2458         uint32_t fence_size_bits;
2459
2460         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2461             (obj_priv->gtt_offset & (obj->size - 1))) {
2462                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2463                      __func__, obj_priv->gtt_offset);
2464                 return;
2465         }
2466
2467         pitch_val = obj_priv->stride / 128;
2468         pitch_val = ffs(pitch_val) - 1;
2469         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2470
2471         val = obj_priv->gtt_offset;
2472         if (obj_priv->tiling_mode == I915_TILING_Y)
2473                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2474         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2475         WARN_ON(fence_size_bits & ~0x00000f00);
2476         val |= fence_size_bits;
2477         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2478         val |= I830_FENCE_REG_VALID;
2479
2480         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2481 }
2482
2483 static int i915_find_fence_reg(struct drm_device *dev,
2484                                bool interruptible)
2485 {
2486         struct drm_i915_fence_reg *reg = NULL;
2487         struct drm_i915_gem_object *obj_priv = NULL;
2488         struct drm_i915_private *dev_priv = dev->dev_private;
2489         struct drm_gem_object *obj = NULL;
2490         int i, avail, ret;
2491
2492         /* First try to find a free reg */
2493         avail = 0;
2494         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2495                 reg = &dev_priv->fence_regs[i];
2496                 if (!reg->obj)
2497                         return i;
2498
2499                 obj_priv = to_intel_bo(reg->obj);
2500                 if (!obj_priv->pin_count)
2501                     avail++;
2502         }
2503
2504         if (avail == 0)
2505                 return -ENOSPC;
2506
2507         /* None available, try to steal one or wait for a user to finish */
2508         i = I915_FENCE_REG_NONE;
2509         list_for_each_entry(reg, &dev_priv->mm.fence_list,
2510                             lru_list) {
2511                 obj = reg->obj;
2512                 obj_priv = to_intel_bo(obj);
2513
2514                 if (obj_priv->pin_count)
2515                         continue;
2516
2517                 /* found one! */
2518                 i = obj_priv->fence_reg;
2519                 break;
2520         }
2521
2522         BUG_ON(i == I915_FENCE_REG_NONE);
2523
2524         /* We only have a reference on obj from the active list. put_fence_reg
2525          * might drop that one, causing a use-after-free in it. So hold a
2526          * private reference to obj like the other callers of put_fence_reg
2527          * (set_tiling ioctl) do. */
2528         drm_gem_object_reference(obj);
2529         ret = i915_gem_object_put_fence_reg(obj, interruptible);
2530         drm_gem_object_unreference(obj);
2531         if (ret != 0)
2532                 return ret;
2533
2534         return i;
2535 }
2536
2537 /**
2538  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2539  * @obj: object to map through a fence reg
2540  *
2541  * When mapping objects through the GTT, userspace wants to be able to write
2542  * to them without having to worry about swizzling if the object is tiled.
2543  *
2544  * This function walks the fence regs looking for a free one for @obj,
2545  * stealing one if it can't find any.
2546  *
2547  * It then sets up the reg based on the object's properties: address, pitch
2548  * and tiling format.
2549  */
2550 int
2551 i915_gem_object_get_fence_reg(struct drm_gem_object *obj,
2552                               bool interruptible)
2553 {
2554         struct drm_device *dev = obj->dev;
2555         struct drm_i915_private *dev_priv = dev->dev_private;
2556         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2557         struct drm_i915_fence_reg *reg = NULL;
2558         int ret;
2559
2560         /* Just update our place in the LRU if our fence is getting used. */
2561         if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2562                 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2563                 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2564                 return 0;
2565         }
2566
2567         switch (obj_priv->tiling_mode) {
2568         case I915_TILING_NONE:
2569                 WARN(1, "allocating a fence for non-tiled object?\n");
2570                 break;
2571         case I915_TILING_X:
2572                 if (!obj_priv->stride)
2573                         return -EINVAL;
2574                 WARN((obj_priv->stride & (512 - 1)),
2575                      "object 0x%08x is X tiled but has non-512B pitch\n",
2576                      obj_priv->gtt_offset);
2577                 break;
2578         case I915_TILING_Y:
2579                 if (!obj_priv->stride)
2580                         return -EINVAL;
2581                 WARN((obj_priv->stride & (128 - 1)),
2582                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2583                      obj_priv->gtt_offset);
2584                 break;
2585         }
2586
2587         ret = i915_find_fence_reg(dev, interruptible);
2588         if (ret < 0)
2589                 return ret;
2590
2591         obj_priv->fence_reg = ret;
2592         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2593         list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2594
2595         reg->obj = obj;
2596
2597         switch (INTEL_INFO(dev)->gen) {
2598         case 6:
2599                 sandybridge_write_fence_reg(reg);
2600                 break;
2601         case 5:
2602         case 4:
2603                 i965_write_fence_reg(reg);
2604                 break;
2605         case 3:
2606                 i915_write_fence_reg(reg);
2607                 break;
2608         case 2:
2609                 i830_write_fence_reg(reg);
2610                 break;
2611         }
2612
2613         trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2614                         obj_priv->tiling_mode);
2615
2616         return 0;
2617 }
2618
2619 /**
2620  * i915_gem_clear_fence_reg - clear out fence register info
2621  * @obj: object to clear
2622  *
2623  * Zeroes out the fence register itself and clears out the associated
2624  * data structures in dev_priv and obj_priv.
2625  */
2626 static void
2627 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2628 {
2629         struct drm_device *dev = obj->dev;
2630         drm_i915_private_t *dev_priv = dev->dev_private;
2631         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2632         struct drm_i915_fence_reg *reg =
2633                 &dev_priv->fence_regs[obj_priv->fence_reg];
2634         uint32_t fence_reg;
2635
2636         switch (INTEL_INFO(dev)->gen) {
2637         case 6:
2638                 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2639                              (obj_priv->fence_reg * 8), 0);
2640                 break;
2641         case 5:
2642         case 4:
2643                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2644                 break;
2645         case 3:
2646                 if (obj_priv->fence_reg >= 8)
2647                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2648                 else
2649         case 2:
2650                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2651
2652                 I915_WRITE(fence_reg, 0);
2653                 break;
2654         }
2655
2656         reg->obj = NULL;
2657         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2658         list_del_init(&reg->lru_list);
2659 }
2660
2661 /**
2662  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2663  * to the buffer to finish, and then resets the fence register.
2664  * @obj: tiled object holding a fence register.
2665  * @bool: whether the wait upon the fence is interruptible
2666  *
2667  * Zeroes out the fence register itself and clears out the associated
2668  * data structures in dev_priv and obj_priv.
2669  */
2670 int
2671 i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
2672                               bool interruptible)
2673 {
2674         struct drm_device *dev = obj->dev;
2675         struct drm_i915_private *dev_priv = dev->dev_private;
2676         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2677         struct drm_i915_fence_reg *reg;
2678
2679         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2680                 return 0;
2681
2682         /* If we've changed tiling, GTT-mappings of the object
2683          * need to re-fault to ensure that the correct fence register
2684          * setup is in place.
2685          */
2686         i915_gem_release_mmap(obj);
2687
2688         /* On the i915, GPU access to tiled buffers is via a fence,
2689          * therefore we must wait for any outstanding access to complete
2690          * before clearing the fence.
2691          */
2692         reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2693         if (reg->gpu) {
2694                 int ret;
2695
2696                 ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2697                 if (ret)
2698                         return ret;
2699
2700                 ret = i915_gem_object_wait_rendering(obj, interruptible);
2701                 if (ret)
2702                         return ret;
2703
2704                 reg->gpu = false;
2705         }
2706
2707         i915_gem_object_flush_gtt_write_domain(obj);
2708         i915_gem_clear_fence_reg(obj);
2709
2710         return 0;
2711 }
2712
2713 /**
2714  * Finds free space in the GTT aperture and binds the object there.
2715  */
2716 static int
2717 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
2718                             unsigned alignment,
2719                             bool mappable)
2720 {
2721         struct drm_device *dev = obj->dev;
2722         drm_i915_private_t *dev_priv = dev->dev_private;
2723         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2724         struct drm_mm_node *free_space;
2725         gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2726         int ret;
2727
2728         if (obj_priv->madv != I915_MADV_WILLNEED) {
2729                 DRM_ERROR("Attempting to bind a purgeable object\n");
2730                 return -EINVAL;
2731         }
2732
2733         if (alignment == 0)
2734                 alignment = i915_gem_get_gtt_alignment(obj);
2735         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2736                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2737                 return -EINVAL;
2738         }
2739
2740         /* If the object is bigger than the entire aperture, reject it early
2741          * before evicting everything in a vain attempt to find space.
2742          */
2743         if (obj->size >
2744             (mappable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2745                 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2746                 return -E2BIG;
2747         }
2748
2749  search_free:
2750         if (mappable)
2751                 free_space =
2752                         drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2753                                                     obj->size, alignment, 0,
2754                                                     dev_priv->mm.gtt_mappable_end,
2755                                                     0);
2756         else
2757                 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2758                                                 obj->size, alignment, 0);
2759
2760         if (free_space != NULL) {
2761                 if (mappable)
2762                         obj_priv->gtt_space =
2763                                 drm_mm_get_block_range_generic(free_space,
2764                                                                obj->size,
2765                                                                alignment, 0,
2766                                                                dev_priv->mm.gtt_mappable_end,
2767                                                                0);
2768                 else
2769                         obj_priv->gtt_space =
2770                                 drm_mm_get_block(free_space, obj->size,
2771                                                  alignment);
2772         }
2773         if (obj_priv->gtt_space == NULL) {
2774                 /* If the gtt is empty and we're still having trouble
2775                  * fitting our object in, we're out of memory.
2776                  */
2777                 ret = i915_gem_evict_something(dev, obj->size, alignment,
2778                                                mappable);
2779                 if (ret)
2780                         return ret;
2781
2782                 goto search_free;
2783         }
2784
2785         ret = i915_gem_object_get_pages(obj, gfpmask);
2786         if (ret) {
2787                 drm_mm_put_block(obj_priv->gtt_space);
2788                 obj_priv->gtt_space = NULL;
2789
2790                 if (ret == -ENOMEM) {
2791                         /* first try to clear up some space from the GTT */
2792                         ret = i915_gem_evict_something(dev, obj->size,
2793                                                        alignment, mappable);
2794                         if (ret) {
2795                                 /* now try to shrink everyone else */
2796                                 if (gfpmask) {
2797                                         gfpmask = 0;
2798                                         goto search_free;
2799                                 }
2800
2801                                 return ret;
2802                         }
2803
2804                         goto search_free;
2805                 }
2806
2807                 return ret;
2808         }
2809
2810         /* Create an AGP memory structure pointing at our pages, and bind it
2811          * into the GTT.
2812          */
2813         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2814                                                obj_priv->pages,
2815                                                obj->size >> PAGE_SHIFT,
2816                                                obj_priv->gtt_space->start,
2817                                                obj_priv->agp_type);
2818         if (obj_priv->agp_mem == NULL) {
2819                 i915_gem_object_put_pages(obj);
2820                 drm_mm_put_block(obj_priv->gtt_space);
2821                 obj_priv->gtt_space = NULL;
2822
2823                 ret = i915_gem_evict_something(dev, obj->size, alignment,
2824                                                mappable);
2825                 if (ret)
2826                         return ret;
2827
2828                 goto search_free;
2829         }
2830
2831         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2832
2833         /* keep track of bounds object by adding it to the inactive list */
2834         list_add_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
2835         i915_gem_info_add_gtt(dev_priv, obj);
2836
2837         /* Assert that the object is not currently in any GPU domain. As it
2838          * wasn't in the GTT, there shouldn't be any way it could have been in
2839          * a GPU cache
2840          */
2841         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2842         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2843
2844         trace_i915_gem_object_bind(obj, obj_priv->gtt_offset, mappable);
2845
2846         return 0;
2847 }
2848
2849 void
2850 i915_gem_clflush_object(struct drm_gem_object *obj)
2851 {
2852         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
2853
2854         /* If we don't have a page list set up, then we're not pinned
2855          * to GPU, and we can ignore the cache flush because it'll happen
2856          * again at bind time.
2857          */
2858         if (obj_priv->pages == NULL)
2859                 return;
2860
2861         trace_i915_gem_object_clflush(obj);
2862
2863         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2864 }
2865
2866 /** Flushes any GPU write domain for the object if it's dirty. */
2867 static int
2868 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
2869                                        bool pipelined)
2870 {
2871         struct drm_device *dev = obj->dev;
2872         uint32_t old_write_domain;
2873
2874         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2875                 return 0;
2876
2877         /* Queue the GPU write cache flushing we need. */
2878         old_write_domain = obj->write_domain;
2879         i915_gem_flush_ring(dev, NULL,
2880                             to_intel_bo(obj)->ring,
2881                             0, obj->write_domain);
2882         BUG_ON(obj->write_domain);
2883
2884         trace_i915_gem_object_change_domain(obj,
2885                                             obj->read_domains,
2886                                             old_write_domain);
2887
2888         if (pipelined)
2889                 return 0;
2890
2891         return i915_gem_object_wait_rendering(obj, true);
2892 }
2893
2894 /** Flushes the GTT write domain for the object if it's dirty. */
2895 static void
2896 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2897 {
2898         uint32_t old_write_domain;
2899
2900         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2901                 return;
2902
2903         /* No actual flushing is required for the GTT write domain.   Writes
2904          * to it immediately go to main memory as far as we know, so there's
2905          * no chipset flush.  It also doesn't land in render cache.
2906          */
2907         old_write_domain = obj->write_domain;
2908         obj->write_domain = 0;
2909
2910         trace_i915_gem_object_change_domain(obj,
2911                                             obj->read_domains,
2912                                             old_write_domain);
2913 }
2914
2915 /** Flushes the CPU write domain for the object if it's dirty. */
2916 static void
2917 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2918 {
2919         struct drm_device *dev = obj->dev;
2920         uint32_t old_write_domain;
2921
2922         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2923                 return;
2924
2925         i915_gem_clflush_object(obj);
2926         drm_agp_chipset_flush(dev);
2927         old_write_domain = obj->write_domain;
2928         obj->write_domain = 0;
2929
2930         trace_i915_gem_object_change_domain(obj,
2931                                             obj->read_domains,
2932                                             old_write_domain);
2933 }
2934
2935 /**
2936  * Moves a single object to the GTT read, and possibly write domain.
2937  *
2938  * This function returns when the move is complete, including waiting on
2939  * flushes to occur.
2940  */
2941 int
2942 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2943 {
2944         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2945         uint32_t old_write_domain, old_read_domains;
2946         int ret;
2947
2948         /* Not valid to be called on unbound objects. */
2949         if (obj_priv->gtt_space == NULL)
2950                 return -EINVAL;
2951
2952         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2953         if (ret != 0)
2954                 return ret;
2955
2956         i915_gem_object_flush_cpu_write_domain(obj);
2957
2958         if (write) {
2959                 ret = i915_gem_object_wait_rendering(obj, true);
2960                 if (ret)
2961                         return ret;
2962         }
2963
2964         old_write_domain = obj->write_domain;
2965         old_read_domains = obj->read_domains;
2966
2967         /* It should now be out of any other write domains, and we can update
2968          * the domain values for our changes.
2969          */
2970         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2971         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2972         if (write) {
2973                 obj->read_domains = I915_GEM_DOMAIN_GTT;
2974                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2975                 obj_priv->dirty = 1;
2976         }
2977
2978         trace_i915_gem_object_change_domain(obj,
2979                                             old_read_domains,
2980                                             old_write_domain);
2981
2982         return 0;
2983 }
2984
2985 /*
2986  * Prepare buffer for display plane. Use uninterruptible for possible flush
2987  * wait, as in modesetting process we're not supposed to be interrupted.
2988  */
2989 int
2990 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj,
2991                                      bool pipelined)
2992 {
2993         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2994         uint32_t old_read_domains;
2995         int ret;
2996
2997         /* Not valid to be called on unbound objects. */
2998         if (obj_priv->gtt_space == NULL)
2999                 return -EINVAL;
3000
3001         ret = i915_gem_object_flush_gpu_write_domain(obj, true);
3002         if (ret)
3003                 return ret;
3004
3005         /* Currently, we are always called from an non-interruptible context. */
3006         if (!pipelined) {
3007                 ret = i915_gem_object_wait_rendering(obj, false);
3008                 if (ret)
3009                         return ret;
3010         }
3011
3012         i915_gem_object_flush_cpu_write_domain(obj);
3013
3014         old_read_domains = obj->read_domains;
3015         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3016
3017         trace_i915_gem_object_change_domain(obj,
3018                                             old_read_domains,
3019                                             obj->write_domain);
3020
3021         return 0;
3022 }
3023
3024 /**
3025  * Moves a single object to the CPU read, and possibly write domain.
3026  *
3027  * This function returns when the move is complete, including waiting on
3028  * flushes to occur.
3029  */
3030 static int
3031 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
3032 {
3033         uint32_t old_write_domain, old_read_domains;
3034         int ret;
3035
3036         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3037         if (ret != 0)
3038                 return ret;
3039
3040         i915_gem_object_flush_gtt_write_domain(obj);
3041
3042         /* If we have a partially-valid cache of the object in the CPU,
3043          * finish invalidating it and free the per-page flags.
3044          */
3045         i915_gem_object_set_to_full_cpu_read_domain(obj);
3046
3047         if (write) {
3048                 ret = i915_gem_object_wait_rendering(obj, true);
3049                 if (ret)
3050                         return ret;
3051         }
3052
3053         old_write_domain = obj->write_domain;
3054         old_read_domains = obj->read_domains;
3055
3056         /* Flush the CPU cache if it's still invalid. */
3057         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3058                 i915_gem_clflush_object(obj);
3059
3060                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3061         }
3062
3063         /* It should now be out of any other write domains, and we can update
3064          * the domain values for our changes.
3065          */
3066         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3067
3068         /* If we're writing through the CPU, then the GPU read domains will
3069          * need to be invalidated at next use.
3070          */
3071         if (write) {
3072                 obj->read_domains = I915_GEM_DOMAIN_CPU;
3073                 obj->write_domain = I915_GEM_DOMAIN_CPU;
3074         }
3075
3076         trace_i915_gem_object_change_domain(obj,
3077                                             old_read_domains,
3078                                             old_write_domain);
3079
3080         return 0;
3081 }
3082
3083 /*
3084  * Set the next domain for the specified object. This
3085  * may not actually perform the necessary flushing/invaliding though,
3086  * as that may want to be batched with other set_domain operations
3087  *
3088  * This is (we hope) the only really tricky part of gem. The goal
3089  * is fairly simple -- track which caches hold bits of the object
3090  * and make sure they remain coherent. A few concrete examples may
3091  * help to explain how it works. For shorthand, we use the notation
3092  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3093  * a pair of read and write domain masks.
3094  *
3095  * Case 1: the batch buffer
3096  *
3097  *      1. Allocated
3098  *      2. Written by CPU
3099  *      3. Mapped to GTT
3100  *      4. Read by GPU
3101  *      5. Unmapped from GTT
3102  *      6. Freed
3103  *
3104  *      Let's take these a step at a time
3105  *
3106  *      1. Allocated
3107  *              Pages allocated from the kernel may still have
3108  *              cache contents, so we set them to (CPU, CPU) always.
3109  *      2. Written by CPU (using pwrite)
3110  *              The pwrite function calls set_domain (CPU, CPU) and
3111  *              this function does nothing (as nothing changes)
3112  *      3. Mapped by GTT
3113  *              This function asserts that the object is not
3114  *              currently in any GPU-based read or write domains
3115  *      4. Read by GPU
3116  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
3117  *              As write_domain is zero, this function adds in the
3118  *              current read domains (CPU+COMMAND, 0).
3119  *              flush_domains is set to CPU.
3120  *              invalidate_domains is set to COMMAND
3121  *              clflush is run to get data out of the CPU caches
3122  *              then i915_dev_set_domain calls i915_gem_flush to
3123  *              emit an MI_FLUSH and drm_agp_chipset_flush
3124  *      5. Unmapped from GTT
3125  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
3126  *              flush_domains and invalidate_domains end up both zero
3127  *              so no flushing/invalidating happens
3128  *      6. Freed
3129  *              yay, done
3130  *
3131  * Case 2: The shared render buffer
3132  *
3133  *      1. Allocated
3134  *      2. Mapped to GTT
3135  *      3. Read/written by GPU
3136  *      4. set_domain to (CPU,CPU)
3137  *      5. Read/written by CPU
3138  *      6. Read/written by GPU
3139  *
3140  *      1. Allocated
3141  *              Same as last example, (CPU, CPU)
3142  *      2. Mapped to GTT
3143  *              Nothing changes (assertions find that it is not in the GPU)
3144  *      3. Read/written by GPU
3145  *              execbuffer calls set_domain (RENDER, RENDER)
3146  *              flush_domains gets CPU
3147  *              invalidate_domains gets GPU
3148  *              clflush (obj)
3149  *              MI_FLUSH and drm_agp_chipset_flush
3150  *      4. set_domain (CPU, CPU)
3151  *              flush_domains gets GPU
3152  *              invalidate_domains gets CPU
3153  *              wait_rendering (obj) to make sure all drawing is complete.
3154  *              This will include an MI_FLUSH to get the data from GPU
3155  *              to memory
3156  *              clflush (obj) to invalidate the CPU cache
3157  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3158  *      5. Read/written by CPU
3159  *              cache lines are loaded and dirtied
3160  *      6. Read written by GPU
3161  *              Same as last GPU access
3162  *
3163  * Case 3: The constant buffer
3164  *
3165  *      1. Allocated
3166  *      2. Written by CPU
3167  *      3. Read by GPU
3168  *      4. Updated (written) by CPU again
3169  *      5. Read by GPU
3170  *
3171  *      1. Allocated
3172  *              (CPU, CPU)
3173  *      2. Written by CPU
3174  *              (CPU, CPU)
3175  *      3. Read by GPU
3176  *              (CPU+RENDER, 0)
3177  *              flush_domains = CPU
3178  *              invalidate_domains = RENDER
3179  *              clflush (obj)
3180  *              MI_FLUSH
3181  *              drm_agp_chipset_flush
3182  *      4. Updated (written) by CPU again
3183  *              (CPU, CPU)
3184  *              flush_domains = 0 (no previous write domain)
3185  *              invalidate_domains = 0 (no new read domains)
3186  *      5. Read by GPU
3187  *              (CPU+RENDER, 0)
3188  *              flush_domains = CPU
3189  *              invalidate_domains = RENDER
3190  *              clflush (obj)
3191  *              MI_FLUSH
3192  *              drm_agp_chipset_flush
3193  */
3194 static void
3195 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
3196                                   struct intel_ring_buffer *ring)
3197 {
3198         struct drm_device               *dev = obj->dev;
3199         struct drm_i915_private         *dev_priv = dev->dev_private;
3200         struct drm_i915_gem_object      *obj_priv = to_intel_bo(obj);
3201         uint32_t                        invalidate_domains = 0;
3202         uint32_t                        flush_domains = 0;
3203
3204         /*
3205          * If the object isn't moving to a new write domain,
3206          * let the object stay in multiple read domains
3207          */
3208         if (obj->pending_write_domain == 0)
3209                 obj->pending_read_domains |= obj->read_domains;
3210
3211         /*
3212          * Flush the current write domain if
3213          * the new read domains don't match. Invalidate
3214          * any read domains which differ from the old
3215          * write domain
3216          */
3217         if (obj->write_domain &&
3218             obj->write_domain != obj->pending_read_domains) {
3219                 flush_domains |= obj->write_domain;
3220                 invalidate_domains |=
3221                         obj->pending_read_domains & ~obj->write_domain;
3222         }
3223         /*
3224          * Invalidate any read caches which may have
3225          * stale data. That is, any new read domains.
3226          */
3227         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3228         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
3229                 i915_gem_clflush_object(obj);
3230
3231         /* The actual obj->write_domain will be updated with
3232          * pending_write_domain after we emit the accumulated flush for all
3233          * of our domain changes in execbuffers (which clears objects'
3234          * write_domains).  So if we have a current write domain that we
3235          * aren't changing, set pending_write_domain to that.
3236          */
3237         if (flush_domains == 0 && obj->pending_write_domain == 0)
3238                 obj->pending_write_domain = obj->write_domain;
3239
3240         dev->invalidate_domains |= invalidate_domains;
3241         dev->flush_domains |= flush_domains;
3242         if (flush_domains & I915_GEM_GPU_DOMAINS)
3243                 dev_priv->mm.flush_rings |= obj_priv->ring->id;
3244         if (invalidate_domains & I915_GEM_GPU_DOMAINS)
3245                 dev_priv->mm.flush_rings |= ring->id;
3246 }
3247
3248 /**
3249  * Moves the object from a partially CPU read to a full one.
3250  *
3251  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3252  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3253  */
3254 static void
3255 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3256 {
3257         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3258
3259         if (!obj_priv->page_cpu_valid)
3260                 return;
3261
3262         /* If we're partially in the CPU read domain, finish moving it in.
3263          */
3264         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3265                 int i;
3266
3267                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3268                         if (obj_priv->page_cpu_valid[i])
3269                                 continue;
3270                         drm_clflush_pages(obj_priv->pages + i, 1);
3271                 }
3272         }
3273
3274         /* Free the page_cpu_valid mappings which are now stale, whether
3275          * or not we've got I915_GEM_DOMAIN_CPU.
3276          */
3277         kfree(obj_priv->page_cpu_valid);
3278         obj_priv->page_cpu_valid = NULL;
3279 }
3280
3281 /**
3282  * Set the CPU read domain on a range of the object.
3283  *
3284  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3285  * not entirely valid.  The page_cpu_valid member of the object flags which
3286  * pages have been flushed, and will be respected by
3287  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3288  * of the whole object.
3289  *
3290  * This function returns when the move is complete, including waiting on
3291  * flushes to occur.
3292  */
3293 static int
3294 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3295                                           uint64_t offset, uint64_t size)
3296 {
3297         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3298         uint32_t old_read_domains;
3299         int i, ret;
3300
3301         if (offset == 0 && size == obj->size)
3302                 return i915_gem_object_set_to_cpu_domain(obj, 0);
3303
3304         ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3305         if (ret != 0)
3306                 return ret;
3307         i915_gem_object_flush_gtt_write_domain(obj);
3308
3309         /* If we're already fully in the CPU read domain, we're done. */
3310         if (obj_priv->page_cpu_valid == NULL &&
3311             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3312                 return 0;
3313
3314         /* Otherwise, create/clear the per-page CPU read domain flag if we're
3315          * newly adding I915_GEM_DOMAIN_CPU
3316          */
3317         if (obj_priv->page_cpu_valid == NULL) {
3318                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3319                                                    GFP_KERNEL);
3320                 if (obj_priv->page_cpu_valid == NULL)
3321                         return -ENOMEM;
3322         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3323                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3324
3325         /* Flush the cache on any pages that are still invalid from the CPU's
3326          * perspective.
3327          */
3328         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3329              i++) {
3330                 if (obj_priv->page_cpu_valid[i])
3331                         continue;
3332
3333                 drm_clflush_pages(obj_priv->pages + i, 1);
3334
3335                 obj_priv->page_cpu_valid[i] = 1;
3336         }
3337
3338         /* It should now be out of any other write domains, and we can update
3339          * the domain values for our changes.
3340          */
3341         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3342
3343         old_read_domains = obj->read_domains;
3344         obj->read_domains |= I915_GEM_DOMAIN_CPU;
3345
3346         trace_i915_gem_object_change_domain(obj,
3347                                             old_read_domains,
3348                                             obj->write_domain);
3349
3350         return 0;
3351 }
3352
3353 /**
3354  * Pin an object to the GTT and evaluate the relocations landing in it.
3355  */
3356 static int
3357 i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
3358                              struct drm_file *file_priv,
3359                              struct drm_i915_gem_exec_object2 *entry)
3360 {
3361         struct drm_device *dev = obj->base.dev;
3362         drm_i915_private_t *dev_priv = dev->dev_private;
3363         struct drm_i915_gem_relocation_entry __user *user_relocs;
3364         struct drm_gem_object *target_obj = NULL;
3365         uint32_t target_handle = 0;
3366         int i, ret = 0;
3367
3368         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3369         for (i = 0; i < entry->relocation_count; i++) {
3370                 struct drm_i915_gem_relocation_entry reloc;
3371                 uint32_t target_offset;
3372
3373                 if (__copy_from_user_inatomic(&reloc,
3374                                               user_relocs+i,
3375                                               sizeof(reloc))) {
3376                         ret = -EFAULT;
3377                         break;
3378                 }
3379
3380                 if (reloc.target_handle != target_handle) {
3381                         drm_gem_object_unreference(target_obj);
3382
3383                         target_obj = drm_gem_object_lookup(dev, file_priv,
3384                                                            reloc.target_handle);
3385                         if (target_obj == NULL) {
3386                                 ret = -ENOENT;
3387                                 break;
3388                         }
3389
3390                         target_handle = reloc.target_handle;
3391                 }
3392                 target_offset = to_intel_bo(target_obj)->gtt_offset;
3393
3394 #if WATCH_RELOC
3395                 DRM_INFO("%s: obj %p offset %08x target %d "
3396                          "read %08x write %08x gtt %08x "
3397                          "presumed %08x delta %08x\n",
3398                          __func__,
3399                          obj,
3400                          (int) reloc.offset,
3401                          (int) reloc.target_handle,
3402                          (int) reloc.read_domains,
3403                          (int) reloc.write_domain,
3404                          (int) target_offset,
3405                          (int) reloc.presumed_offset,
3406                          reloc.delta);
3407 #endif
3408
3409                 /* The target buffer should have appeared before us in the
3410                  * exec_object list, so it should have a GTT space bound by now.
3411                  */
3412                 if (target_offset == 0) {
3413                         DRM_ERROR("No GTT space found for object %d\n",
3414                                   reloc.target_handle);
3415                         ret = -EINVAL;
3416                         break;
3417                 }
3418
3419                 /* Validate that the target is in a valid r/w GPU domain */
3420                 if (reloc.write_domain & (reloc.write_domain - 1)) {
3421                         DRM_ERROR("reloc with multiple write domains: "
3422                                   "obj %p target %d offset %d "
3423                                   "read %08x write %08x",
3424                                   obj, reloc.target_handle,
3425                                   (int) reloc.offset,
3426                                   reloc.read_domains,
3427                                   reloc.write_domain);
3428                         ret = -EINVAL;
3429                         break;
3430                 }
3431                 if (reloc.write_domain & I915_GEM_DOMAIN_CPU ||
3432                     reloc.read_domains & I915_GEM_DOMAIN_CPU) {
3433                         DRM_ERROR("reloc with read/write CPU domains: "
3434                                   "obj %p target %d offset %d "
3435                                   "read %08x write %08x",
3436                                   obj, reloc.target_handle,
3437                                   (int) reloc.offset,
3438                                   reloc.read_domains,
3439                                   reloc.write_domain);
3440                         ret = -EINVAL;
3441                         break;
3442                 }
3443                 if (reloc.write_domain && target_obj->pending_write_domain &&
3444                     reloc.write_domain != target_obj->pending_write_domain) {
3445                         DRM_ERROR("Write domain conflict: "
3446                                   "obj %p target %d offset %d "
3447                                   "new %08x old %08x\n",
3448                                   obj, reloc.target_handle,
3449                                   (int) reloc.offset,
3450                                   reloc.write_domain,
3451                                   target_obj->pending_write_domain);
3452                         ret = -EINVAL;
3453                         break;
3454                 }
3455
3456                 target_obj->pending_read_domains |= reloc.read_domains;
3457                 target_obj->pending_write_domain |= reloc.write_domain;
3458
3459                 /* If the relocation already has the right value in it, no
3460                  * more work needs to be done.
3461                  */
3462                 if (target_offset == reloc.presumed_offset)
3463                         continue;
3464
3465                 /* Check that the relocation address is valid... */
3466                 if (reloc.offset > obj->base.size - 4) {
3467                         DRM_ERROR("Relocation beyond object bounds: "
3468                                   "obj %p target %d offset %d size %d.\n",
3469                                   obj, reloc.target_handle,
3470                                   (int) reloc.offset, (int) obj->base.size);
3471                         ret = -EINVAL;
3472                         break;
3473                 }
3474                 if (reloc.offset & 3) {
3475                         DRM_ERROR("Relocation not 4-byte aligned: "
3476                                   "obj %p target %d offset %d.\n",
3477                                   obj, reloc.target_handle,
3478                                   (int) reloc.offset);
3479                         ret = -EINVAL;
3480                         break;
3481                 }
3482
3483                 /* and points to somewhere within the target object. */
3484                 if (reloc.delta >= target_obj->size) {
3485                         DRM_ERROR("Relocation beyond target object bounds: "
3486                                   "obj %p target %d delta %d size %d.\n",
3487                                   obj, reloc.target_handle,
3488                                   (int) reloc.delta, (int) target_obj->size);
3489                         ret = -EINVAL;
3490                         break;
3491                 }
3492
3493                 reloc.delta += target_offset;
3494                 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3495                         uint32_t page_offset = reloc.offset & ~PAGE_MASK;
3496                         char *vaddr;
3497
3498                         vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT]);
3499                         *(uint32_t *)(vaddr + page_offset) = reloc.delta;
3500                         kunmap_atomic(vaddr);
3501                 } else {
3502                         uint32_t __iomem *reloc_entry;
3503                         void __iomem *reloc_page;
3504
3505                         ret = i915_gem_object_set_to_gtt_domain(&obj->base, 1);
3506                         if (ret)
3507                                 break;
3508
3509                         /* Map the page containing the relocation we're going to perform.  */
3510                         reloc.offset += obj->gtt_offset;
3511                         reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3512                                                               reloc.offset & PAGE_MASK);
3513                         reloc_entry = (uint32_t __iomem *)
3514                                 (reloc_page + (reloc.offset & ~PAGE_MASK));
3515                         iowrite32(reloc.delta, reloc_entry);
3516                         io_mapping_unmap_atomic(reloc_page);
3517                 }
3518
3519                 /* and update the user's relocation entry */
3520                 reloc.presumed_offset = target_offset;
3521                 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
3522                                               &reloc.presumed_offset,
3523                                               sizeof(reloc.presumed_offset))) {
3524                     ret = -EFAULT;
3525                     break;
3526                 }
3527         }
3528
3529         drm_gem_object_unreference(target_obj);
3530         return ret;
3531 }
3532
3533 static int
3534 i915_gem_execbuffer_pin(struct drm_device *dev,
3535                         struct drm_file *file,
3536                         struct drm_gem_object **object_list,
3537                         struct drm_i915_gem_exec_object2 *exec_list,
3538                         int count)
3539 {
3540         struct drm_i915_private *dev_priv = dev->dev_private;
3541         int ret, i, retry;
3542
3543         /* attempt to pin all of the buffers into the GTT */
3544         for (retry = 0; retry < 2; retry++) {
3545                 ret = 0;
3546                 for (i = 0; i < count; i++) {
3547                         struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3548                         struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
3549                         bool need_fence =
3550                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3551                                 obj->tiling_mode != I915_TILING_NONE;
3552
3553                         /* g33/pnv can't fence buffers in the unmappable part */
3554                         bool need_mappable =
3555                                 entry->relocation_count ? true : need_fence;
3556
3557                         /* Check fence reg constraints and rebind if necessary */
3558                         if (need_fence &&
3559                             !i915_gem_object_fence_offset_ok(&obj->base,
3560                                                              obj->tiling_mode)) {
3561                                 ret = i915_gem_object_unbind(&obj->base);
3562                                 if (ret)
3563                                         break;
3564                         }
3565
3566                         ret = i915_gem_object_pin(&obj->base,
3567                                                   entry->alignment,
3568                                                   need_mappable);
3569                         if (ret)
3570                                 break;
3571
3572                         /*
3573                          * Pre-965 chips need a fence register set up in order
3574                          * to properly handle blits to/from tiled surfaces.
3575                          */
3576                         if (need_fence) {
3577                                 ret = i915_gem_object_get_fence_reg(&obj->base, true);
3578                                 if (ret) {
3579                                         i915_gem_object_unpin(&obj->base);
3580                                         break;
3581                                 }
3582
3583                                 dev_priv->fence_regs[obj->fence_reg].gpu = true;
3584                         }
3585
3586                         entry->offset = obj->gtt_offset;
3587                 }
3588
3589                 while (i--)
3590                         i915_gem_object_unpin(object_list[i]);
3591
3592                 if (ret == 0)
3593                         break;
3594
3595                 if (ret != -ENOSPC || retry)
3596                         return ret;
3597
3598                 ret = i915_gem_evict_everything(dev);
3599                 if (ret)
3600                         return ret;
3601         }
3602
3603         return 0;
3604 }
3605
3606 /* Throttle our rendering by waiting until the ring has completed our requests
3607  * emitted over 20 msec ago.
3608  *
3609  * Note that if we were to use the current jiffies each time around the loop,
3610  * we wouldn't escape the function with any frames outstanding if the time to
3611  * render a frame was over 20ms.
3612  *
3613  * This should get us reasonable parallelism between CPU and GPU but also
3614  * relatively low latency when blocking on a particular request to finish.
3615  */
3616 static int
3617 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3618 {
3619         struct drm_i915_private *dev_priv = dev->dev_private;
3620         struct drm_i915_file_private *file_priv = file->driver_priv;
3621         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3622         struct drm_i915_gem_request *request;
3623         struct intel_ring_buffer *ring = NULL;
3624         u32 seqno = 0;
3625         int ret;
3626
3627         spin_lock(&file_priv->mm.lock);
3628         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3629                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3630                         break;
3631
3632                 ring = request->ring;
3633                 seqno = request->seqno;
3634         }
3635         spin_unlock(&file_priv->mm.lock);
3636
3637         if (seqno == 0)
3638                 return 0;
3639
3640         ret = 0;
3641         if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3642                 /* And wait for the seqno passing without holding any locks and
3643                  * causing extra latency for others. This is safe as the irq
3644                  * generation is designed to be run atomically and so is
3645                  * lockless.
3646                  */
3647                 ring->user_irq_get(ring);
3648                 ret = wait_event_interruptible(ring->irq_queue,
3649                                                i915_seqno_passed(ring->get_seqno(ring), seqno)
3650                                                || atomic_read(&dev_priv->mm.wedged));
3651                 ring->user_irq_put(ring);
3652
3653                 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3654                         ret = -EIO;
3655         }
3656
3657         if (ret == 0)
3658                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3659
3660         return ret;
3661 }
3662
3663 static int
3664 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3665                           uint64_t exec_offset)
3666 {
3667         uint32_t exec_start, exec_len;
3668
3669         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3670         exec_len = (uint32_t) exec->batch_len;
3671
3672         if ((exec_start | exec_len) & 0x7)
3673                 return -EINVAL;
3674
3675         if (!exec_start)
3676                 return -EINVAL;
3677
3678         return 0;
3679 }
3680
3681 static int
3682 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3683                    int count)
3684 {
3685         int i;
3686
3687         for (i = 0; i < count; i++) {
3688                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
3689                 size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry);
3690
3691                 if (!access_ok(VERIFY_READ, ptr, length))
3692                         return -EFAULT;
3693
3694                 /* we may also need to update the presumed offsets */
3695                 if (!access_ok(VERIFY_WRITE, ptr, length))
3696                         return -EFAULT;
3697
3698                 if (fault_in_pages_readable(ptr, length))
3699                         return -EFAULT;
3700         }
3701
3702         return 0;
3703 }
3704
3705 static int
3706 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3707                        struct drm_file *file,
3708                        struct drm_i915_gem_execbuffer2 *args,
3709                        struct drm_i915_gem_exec_object2 *exec_list)
3710 {
3711         drm_i915_private_t *dev_priv = dev->dev_private;
3712         struct drm_gem_object **object_list = NULL;
3713         struct drm_gem_object *batch_obj;
3714         struct drm_clip_rect *cliprects = NULL;
3715         struct drm_i915_gem_request *request = NULL;
3716         int ret, i, flips;
3717         uint64_t exec_offset;
3718
3719         struct intel_ring_buffer *ring = NULL;
3720
3721         ret = i915_gem_check_is_wedged(dev);
3722         if (ret)
3723                 return ret;
3724
3725         ret = validate_exec_list(exec_list, args->buffer_count);
3726         if (ret)
3727                 return ret;
3728
3729 #if WATCH_EXEC
3730         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3731                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3732 #endif
3733         switch (args->flags & I915_EXEC_RING_MASK) {
3734         case I915_EXEC_DEFAULT:
3735         case I915_EXEC_RENDER:
3736                 ring = &dev_priv->render_ring;
3737                 break;
3738         case I915_EXEC_BSD:
3739                 if (!HAS_BSD(dev)) {
3740                         DRM_ERROR("execbuf with invalid ring (BSD)\n");
3741                         return -EINVAL;
3742                 }
3743                 ring = &dev_priv->bsd_ring;
3744                 break;
3745         case I915_EXEC_BLT:
3746                 if (!HAS_BLT(dev)) {
3747                         DRM_ERROR("execbuf with invalid ring (BLT)\n");
3748                         return -EINVAL;
3749                 }
3750                 ring = &dev_priv->blt_ring;
3751                 break;
3752         default:
3753                 DRM_ERROR("execbuf with unknown ring: %d\n",
3754                           (int)(args->flags & I915_EXEC_RING_MASK));
3755                 return -EINVAL;
3756         }
3757
3758         if (args->buffer_count < 1) {
3759                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3760                 return -EINVAL;
3761         }
3762         object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3763         if (object_list == NULL) {
3764                 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3765                           args->buffer_count);
3766                 ret = -ENOMEM;
3767                 goto pre_mutex_err;
3768         }
3769
3770         if (args->num_cliprects != 0) {
3771                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3772                                     GFP_KERNEL);
3773                 if (cliprects == NULL) {
3774                         ret = -ENOMEM;
3775                         goto pre_mutex_err;
3776                 }
3777
3778                 ret = copy_from_user(cliprects,
3779                                      (struct drm_clip_rect __user *)
3780                                      (uintptr_t) args->cliprects_ptr,
3781                                      sizeof(*cliprects) * args->num_cliprects);
3782                 if (ret != 0) {
3783                         DRM_ERROR("copy %d cliprects failed: %d\n",
3784                                   args->num_cliprects, ret);
3785                         ret = -EFAULT;
3786                         goto pre_mutex_err;
3787                 }
3788         }
3789
3790         request = kzalloc(sizeof(*request), GFP_KERNEL);
3791         if (request == NULL) {
3792                 ret = -ENOMEM;
3793                 goto pre_mutex_err;
3794         }
3795
3796         ret = i915_mutex_lock_interruptible(dev);
3797         if (ret)
3798                 goto pre_mutex_err;
3799
3800         if (dev_priv->mm.suspended) {
3801                 mutex_unlock(&dev->struct_mutex);
3802                 ret = -EBUSY;
3803                 goto pre_mutex_err;
3804         }
3805
3806         /* Look up object handles */
3807         for (i = 0; i < args->buffer_count; i++) {
3808                 struct drm_i915_gem_object *obj_priv;
3809
3810                 object_list[i] = drm_gem_object_lookup(dev, file,
3811                                                        exec_list[i].handle);
3812                 if (object_list[i] == NULL) {
3813                         DRM_ERROR("Invalid object handle %d at index %d\n",
3814                                    exec_list[i].handle, i);
3815                         /* prevent error path from reading uninitialized data */
3816                         args->buffer_count = i + 1;
3817                         ret = -ENOENT;
3818                         goto err;
3819                 }
3820
3821                 obj_priv = to_intel_bo(object_list[i]);
3822                 if (obj_priv->in_execbuffer) {
3823                         DRM_ERROR("Object %p appears more than once in object list\n",
3824                                    object_list[i]);
3825                         /* prevent error path from reading uninitialized data */
3826                         args->buffer_count = i + 1;
3827                         ret = -EINVAL;
3828                         goto err;
3829                 }
3830                 obj_priv->in_execbuffer = true;
3831         }
3832
3833         /* Move the objects en-masse into the GTT, evicting if necessary. */
3834         ret = i915_gem_execbuffer_pin(dev, file,
3835                                       object_list, exec_list,
3836                                       args->buffer_count);
3837         if (ret)
3838                 goto err;
3839
3840         /* The objects are in their final locations, apply the relocations. */
3841         for (i = 0; i < args->buffer_count; i++) {
3842                 struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
3843                 obj->base.pending_read_domains = 0;
3844                 obj->base.pending_write_domain = 0;
3845                 ret = i915_gem_execbuffer_relocate(obj, file, &exec_list[i]);
3846                 if (ret)
3847                         goto err;
3848         }
3849
3850         /* Set the pending read domains for the batch buffer to COMMAND */
3851         batch_obj = object_list[args->buffer_count-1];
3852         if (batch_obj->pending_write_domain) {
3853                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3854                 ret = -EINVAL;
3855                 goto err;
3856         }
3857         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3858
3859         /* Sanity check the batch buffer */
3860         exec_offset = to_intel_bo(batch_obj)->gtt_offset;
3861         ret = i915_gem_check_execbuffer(args, exec_offset);
3862         if (ret != 0) {
3863                 DRM_ERROR("execbuf with invalid offset/length\n");
3864                 goto err;
3865         }
3866
3867         /* Zero the global flush/invalidate flags. These
3868          * will be modified as new domains are computed
3869          * for each object
3870          */
3871         dev->invalidate_domains = 0;
3872         dev->flush_domains = 0;
3873         dev_priv->mm.flush_rings = 0;
3874         for (i = 0; i < args->buffer_count; i++)
3875                 i915_gem_object_set_to_gpu_domain(object_list[i], ring);
3876
3877         if (dev->invalidate_domains | dev->flush_domains) {
3878 #if WATCH_EXEC
3879                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3880                           __func__,
3881                          dev->invalidate_domains,
3882                          dev->flush_domains);
3883 #endif
3884                 i915_gem_flush(dev, file,
3885                                dev->invalidate_domains,
3886                                dev->flush_domains,
3887                                dev_priv->mm.flush_rings);
3888         }
3889
3890 #if WATCH_COHERENCY
3891         for (i = 0; i < args->buffer_count; i++) {
3892                 i915_gem_object_check_coherency(object_list[i],
3893                                                 exec_list[i].handle);
3894         }
3895 #endif
3896
3897 #if WATCH_EXEC
3898         i915_gem_dump_object(batch_obj,
3899                               args->batch_len,
3900                               __func__,
3901                               ~0);
3902 #endif
3903
3904         /* Check for any pending flips. As we only maintain a flip queue depth
3905          * of 1, we can simply insert a WAIT for the next display flip prior
3906          * to executing the batch and avoid stalling the CPU.
3907          */
3908         flips = 0;
3909         for (i = 0; i < args->buffer_count; i++) {
3910                 if (object_list[i]->write_domain)
3911                         flips |= atomic_read(&to_intel_bo(object_list[i])->pending_flip);
3912         }
3913         if (flips) {
3914                 int plane, flip_mask;
3915
3916                 for (plane = 0; flips >> plane; plane++) {
3917                         if (((flips >> plane) & 1) == 0)
3918                                 continue;
3919
3920                         if (plane)
3921                                 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
3922                         else
3923                                 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
3924
3925                         ret = intel_ring_begin(ring, 2);
3926                         if (ret)
3927                                 goto err;
3928
3929                         intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
3930                         intel_ring_emit(ring, MI_NOOP);
3931                         intel_ring_advance(ring);
3932                 }
3933         }
3934
3935         /* Exec the batchbuffer */
3936         ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
3937         if (ret) {
3938                 DRM_ERROR("dispatch failed %d\n", ret);
3939                 goto err;
3940         }
3941
3942         for (i = 0; i < args->buffer_count; i++) {
3943                 struct drm_gem_object *obj = object_list[i];
3944
3945                 obj->read_domains = obj->pending_read_domains;
3946                 obj->write_domain = obj->pending_write_domain;
3947
3948                 i915_gem_object_move_to_active(obj, ring);
3949                 if (obj->write_domain) {
3950                         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3951                         obj_priv->dirty = 1;
3952                         list_move_tail(&obj_priv->gpu_write_list,
3953                                        &ring->gpu_write_list);
3954                         intel_mark_busy(dev, obj);
3955                 }
3956
3957                 trace_i915_gem_object_change_domain(obj,
3958                                                     obj->read_domains,
3959                                                     obj->write_domain);
3960         }
3961
3962         /*
3963          * Ensure that the commands in the batch buffer are
3964          * finished before the interrupt fires
3965          */
3966         i915_retire_commands(dev, ring);
3967
3968         if (i915_add_request(dev, file, request, ring))
3969                 ring->outstanding_lazy_request = true;
3970         else
3971                 request = NULL;
3972
3973 err:
3974         for (i = 0; i < args->buffer_count; i++) {
3975                 if (object_list[i] == NULL)
3976                     break;
3977
3978                 to_intel_bo(object_list[i])->in_execbuffer = false;
3979                 drm_gem_object_unreference(object_list[i]);
3980         }
3981
3982         mutex_unlock(&dev->struct_mutex);
3983
3984 pre_mutex_err:
3985         drm_free_large(object_list);
3986         kfree(cliprects);
3987         kfree(request);
3988
3989         return ret;
3990 }
3991
3992 /*
3993  * Legacy execbuffer just creates an exec2 list from the original exec object
3994  * list array and passes it to the real function.
3995  */
3996 int
3997 i915_gem_execbuffer(struct drm_device *dev, void *data,
3998                     struct drm_file *file_priv)
3999 {
4000         struct drm_i915_gem_execbuffer *args = data;
4001         struct drm_i915_gem_execbuffer2 exec2;
4002         struct drm_i915_gem_exec_object *exec_list = NULL;
4003         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4004         int ret, i;
4005
4006 #if WATCH_EXEC
4007         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4008                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4009 #endif
4010
4011         if (args->buffer_count < 1) {
4012                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4013                 return -EINVAL;
4014         }
4015
4016         /* Copy in the exec list from userland */
4017         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4018         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4019         if (exec_list == NULL || exec2_list == NULL) {
4020                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4021                           args->buffer_count);
4022                 drm_free_large(exec_list);
4023                 drm_free_large(exec2_list);
4024                 return -ENOMEM;
4025         }
4026         ret = copy_from_user(exec_list,
4027                              (struct drm_i915_relocation_entry __user *)
4028                              (uintptr_t) args->buffers_ptr,
4029                              sizeof(*exec_list) * args->buffer_count);
4030         if (ret != 0) {
4031                 DRM_ERROR("copy %d exec entries failed %d\n",
4032                           args->buffer_count, ret);
4033                 drm_free_large(exec_list);
4034                 drm_free_large(exec2_list);
4035                 return -EFAULT;
4036         }
4037
4038         for (i = 0; i < args->buffer_count; i++) {
4039                 exec2_list[i].handle = exec_list[i].handle;
4040                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4041                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4042                 exec2_list[i].alignment = exec_list[i].alignment;
4043                 exec2_list[i].offset = exec_list[i].offset;
4044                 if (INTEL_INFO(dev)->gen < 4)
4045                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4046                 else
4047                         exec2_list[i].flags = 0;
4048         }
4049
4050         exec2.buffers_ptr = args->buffers_ptr;
4051         exec2.buffer_count = args->buffer_count;
4052         exec2.batch_start_offset = args->batch_start_offset;
4053         exec2.batch_len = args->batch_len;
4054         exec2.DR1 = args->DR1;
4055         exec2.DR4 = args->DR4;
4056         exec2.num_cliprects = args->num_cliprects;
4057         exec2.cliprects_ptr = args->cliprects_ptr;
4058         exec2.flags = I915_EXEC_RENDER;
4059
4060         ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
4061         if (!ret) {
4062                 /* Copy the new buffer offsets back to the user's exec list. */
4063                 for (i = 0; i < args->buffer_count; i++)
4064                         exec_list[i].offset = exec2_list[i].offset;
4065                 /* ... and back out to userspace */
4066                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4067                                    (uintptr_t) args->buffers_ptr,
4068                                    exec_list,
4069                                    sizeof(*exec_list) * args->buffer_count);
4070                 if (ret) {
4071                         ret = -EFAULT;
4072                         DRM_ERROR("failed to copy %d exec entries "
4073                                   "back to user (%d)\n",
4074                                   args->buffer_count, ret);
4075                 }
4076         }
4077
4078         drm_free_large(exec_list);
4079         drm_free_large(exec2_list);
4080         return ret;
4081 }
4082
4083 int
4084 i915_gem_execbuffer2(struct drm_device *dev, void *data,
4085                      struct drm_file *file_priv)
4086 {
4087         struct drm_i915_gem_execbuffer2 *args = data;
4088         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4089         int ret;
4090
4091 #if WATCH_EXEC
4092         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4093                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4094 #endif
4095
4096         if (args->buffer_count < 1) {
4097                 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4098                 return -EINVAL;
4099         }
4100
4101         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4102         if (exec2_list == NULL) {
4103                 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4104                           args->buffer_count);
4105                 return -ENOMEM;
4106         }
4107         ret = copy_from_user(exec2_list,
4108                              (struct drm_i915_relocation_entry __user *)
4109                              (uintptr_t) args->buffers_ptr,
4110                              sizeof(*exec2_list) * args->buffer_count);
4111         if (ret != 0) {
4112                 DRM_ERROR("copy %d exec entries failed %d\n",
4113                           args->buffer_count, ret);
4114                 drm_free_large(exec2_list);
4115                 return -EFAULT;
4116         }
4117
4118         ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4119         if (!ret) {
4120                 /* Copy the new buffer offsets back to the user's exec list. */
4121                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4122                                    (uintptr_t) args->buffers_ptr,
4123                                    exec2_list,
4124                                    sizeof(*exec2_list) * args->buffer_count);
4125                 if (ret) {
4126                         ret = -EFAULT;
4127                         DRM_ERROR("failed to copy %d exec entries "
4128                                   "back to user (%d)\n",
4129                                   args->buffer_count, ret);
4130                 }
4131         }
4132
4133         drm_free_large(exec2_list);
4134         return ret;
4135 }
4136
4137 int
4138 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
4139                     bool mappable)
4140 {
4141         struct drm_device *dev = obj->dev;
4142         struct drm_i915_private *dev_priv = dev->dev_private;
4143         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4144         int ret;
4145
4146         BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4147         WARN_ON(i915_verify_lists(dev));
4148
4149         if (obj_priv->gtt_space != NULL) {
4150                 if (alignment == 0)
4151                         alignment = i915_gem_get_gtt_alignment(obj);
4152                 if (obj_priv->gtt_offset & (alignment - 1) ||
4153                     (mappable && !i915_gem_object_cpu_accessible(obj_priv))) {
4154                         WARN(obj_priv->pin_count,
4155                              "bo is already pinned with incorrect alignment:"
4156                              " offset=%x, req.alignment=%x\n",
4157                              obj_priv->gtt_offset, alignment);
4158                         ret = i915_gem_object_unbind(obj);
4159                         if (ret)
4160                                 return ret;
4161                 }
4162         }
4163
4164         if (obj_priv->gtt_space == NULL) {
4165                 ret = i915_gem_object_bind_to_gtt(obj, alignment, mappable);
4166                 if (ret)
4167                         return ret;
4168         }
4169
4170         obj_priv->pin_count++;
4171
4172         /* If the object is not active and not pending a flush,
4173          * remove it from the inactive list
4174          */
4175         if (obj_priv->pin_count == 1) {
4176                 i915_gem_info_add_pin(dev_priv, obj, mappable);
4177                 if (!obj_priv->active)
4178                         list_move_tail(&obj_priv->mm_list,
4179                                        &dev_priv->mm.pinned_list);
4180         }
4181         BUG_ON(!obj_priv->pin_mappable && mappable);
4182
4183         WARN_ON(i915_verify_lists(dev));
4184         return 0;
4185 }
4186
4187 void
4188 i915_gem_object_unpin(struct drm_gem_object *obj)
4189 {
4190         struct drm_device *dev = obj->dev;
4191         drm_i915_private_t *dev_priv = dev->dev_private;
4192         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4193
4194         WARN_ON(i915_verify_lists(dev));
4195         obj_priv->pin_count--;
4196         BUG_ON(obj_priv->pin_count < 0);
4197         BUG_ON(obj_priv->gtt_space == NULL);
4198
4199         /* If the object is no longer pinned, and is
4200          * neither active nor being flushed, then stick it on
4201          * the inactive list
4202          */
4203         if (obj_priv->pin_count == 0) {
4204                 if (!obj_priv->active)
4205                         list_move_tail(&obj_priv->mm_list,
4206                                        &dev_priv->mm.inactive_list);
4207                 i915_gem_info_remove_pin(dev_priv, obj);
4208         }
4209         WARN_ON(i915_verify_lists(dev));
4210 }
4211
4212 int
4213 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4214                    struct drm_file *file_priv)
4215 {
4216         struct drm_i915_gem_pin *args = data;
4217         struct drm_gem_object *obj;
4218         struct drm_i915_gem_object *obj_priv;
4219         int ret;
4220
4221         ret = i915_mutex_lock_interruptible(dev);
4222         if (ret)
4223                 return ret;
4224
4225         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4226         if (obj == NULL) {
4227                 ret = -ENOENT;
4228                 goto unlock;
4229         }
4230         obj_priv = to_intel_bo(obj);
4231
4232         if (obj_priv->madv != I915_MADV_WILLNEED) {
4233                 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4234                 ret = -EINVAL;
4235                 goto out;
4236         }
4237
4238         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4239                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4240                           args->handle);
4241                 ret = -EINVAL;
4242                 goto out;
4243         }
4244
4245         obj_priv->user_pin_count++;
4246         obj_priv->pin_filp = file_priv;
4247         if (obj_priv->user_pin_count == 1) {
4248                 ret = i915_gem_object_pin(obj, args->alignment, true);
4249                 if (ret)
4250                         goto out;
4251         }
4252
4253         /* XXX - flush the CPU caches for pinned objects
4254          * as the X server doesn't manage domains yet
4255          */
4256         i915_gem_object_flush_cpu_write_domain(obj);
4257         args->offset = obj_priv->gtt_offset;
4258 out:
4259         drm_gem_object_unreference(obj);
4260 unlock:
4261         mutex_unlock(&dev->struct_mutex);
4262         return ret;
4263 }
4264
4265 int
4266 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4267                      struct drm_file *file_priv)
4268 {
4269         struct drm_i915_gem_pin *args = data;
4270         struct drm_gem_object *obj;
4271         struct drm_i915_gem_object *obj_priv;
4272         int ret;
4273
4274         ret = i915_mutex_lock_interruptible(dev);
4275         if (ret)
4276                 return ret;
4277
4278         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4279         if (obj == NULL) {
4280                 ret = -ENOENT;
4281                 goto unlock;
4282         }
4283         obj_priv = to_intel_bo(obj);
4284
4285         if (obj_priv->pin_filp != file_priv) {
4286                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4287                           args->handle);
4288                 ret = -EINVAL;
4289                 goto out;
4290         }
4291         obj_priv->user_pin_count--;
4292         if (obj_priv->user_pin_count == 0) {
4293                 obj_priv->pin_filp = NULL;
4294                 i915_gem_object_unpin(obj);
4295         }
4296
4297 out:
4298         drm_gem_object_unreference(obj);
4299 unlock:
4300         mutex_unlock(&dev->struct_mutex);
4301         return ret;
4302 }
4303
4304 int
4305 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4306                     struct drm_file *file_priv)
4307 {
4308         struct drm_i915_gem_busy *args = data;
4309         struct drm_gem_object *obj;
4310         struct drm_i915_gem_object *obj_priv;
4311         int ret;
4312
4313         ret = i915_mutex_lock_interruptible(dev);
4314         if (ret)
4315                 return ret;
4316
4317         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4318         if (obj == NULL) {
4319                 ret = -ENOENT;
4320                 goto unlock;
4321         }
4322         obj_priv = to_intel_bo(obj);
4323
4324         /* Count all active objects as busy, even if they are currently not used
4325          * by the gpu. Users of this interface expect objects to eventually
4326          * become non-busy without any further actions, therefore emit any
4327          * necessary flushes here.
4328          */
4329         args->busy = obj_priv->active;
4330         if (args->busy) {
4331                 /* Unconditionally flush objects, even when the gpu still uses this
4332                  * object. Userspace calling this function indicates that it wants to
4333                  * use this buffer rather sooner than later, so issuing the required
4334                  * flush earlier is beneficial.
4335                  */
4336                 if (obj->write_domain & I915_GEM_GPU_DOMAINS)
4337                         i915_gem_flush_ring(dev, file_priv,
4338                                             obj_priv->ring,
4339                                             0, obj->write_domain);
4340
4341                 /* Update the active list for the hardware's current position.
4342                  * Otherwise this only updates on a delayed timer or when irqs
4343                  * are actually unmasked, and our working set ends up being
4344                  * larger than required.
4345                  */
4346                 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4347
4348                 args->busy = obj_priv->active;
4349         }
4350
4351         drm_gem_object_unreference(obj);
4352 unlock:
4353         mutex_unlock(&dev->struct_mutex);
4354         return ret;
4355 }
4356
4357 int
4358 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4359                         struct drm_file *file_priv)
4360 {
4361     return i915_gem_ring_throttle(dev, file_priv);
4362 }
4363
4364 int
4365 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4366                        struct drm_file *file_priv)
4367 {
4368         struct drm_i915_gem_madvise *args = data;
4369         struct drm_gem_object *obj;
4370         struct drm_i915_gem_object *obj_priv;
4371         int ret;
4372
4373         switch (args->madv) {
4374         case I915_MADV_DONTNEED:
4375         case I915_MADV_WILLNEED:
4376             break;
4377         default:
4378             return -EINVAL;
4379         }
4380
4381         ret = i915_mutex_lock_interruptible(dev);
4382         if (ret)
4383                 return ret;
4384
4385         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4386         if (obj == NULL) {
4387                 ret = -ENOENT;
4388                 goto unlock;
4389         }
4390         obj_priv = to_intel_bo(obj);
4391
4392         if (obj_priv->pin_count) {
4393                 ret = -EINVAL;
4394                 goto out;
4395         }
4396
4397         if (obj_priv->madv != __I915_MADV_PURGED)
4398                 obj_priv->madv = args->madv;
4399
4400         /* if the object is no longer bound, discard its backing storage */
4401         if (i915_gem_object_is_purgeable(obj_priv) &&
4402             obj_priv->gtt_space == NULL)
4403                 i915_gem_object_truncate(obj);
4404
4405         args->retained = obj_priv->madv != __I915_MADV_PURGED;
4406
4407 out:
4408         drm_gem_object_unreference(obj);
4409 unlock:
4410         mutex_unlock(&dev->struct_mutex);
4411         return ret;
4412 }
4413
4414 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4415                                               size_t size)
4416 {
4417         struct drm_i915_private *dev_priv = dev->dev_private;
4418         struct drm_i915_gem_object *obj;
4419
4420         obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4421         if (obj == NULL)
4422                 return NULL;
4423
4424         if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4425                 kfree(obj);
4426                 return NULL;
4427         }
4428
4429         i915_gem_info_add_obj(dev_priv, size);
4430
4431         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4432         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4433
4434         obj->agp_type = AGP_USER_MEMORY;
4435         obj->base.driver_private = NULL;
4436         obj->fence_reg = I915_FENCE_REG_NONE;
4437         INIT_LIST_HEAD(&obj->mm_list);
4438         INIT_LIST_HEAD(&obj->ring_list);
4439         INIT_LIST_HEAD(&obj->gpu_write_list);
4440         obj->madv = I915_MADV_WILLNEED;
4441
4442         return &obj->base;
4443 }
4444
4445 int i915_gem_init_object(struct drm_gem_object *obj)
4446 {
4447         BUG();
4448
4449         return 0;
4450 }
4451
4452 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4453 {
4454         struct drm_device *dev = obj->dev;
4455         drm_i915_private_t *dev_priv = dev->dev_private;
4456         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4457         int ret;
4458
4459         ret = i915_gem_object_unbind(obj);
4460         if (ret == -ERESTARTSYS) {
4461                 list_move(&obj_priv->mm_list,
4462                           &dev_priv->mm.deferred_free_list);
4463                 return;
4464         }
4465
4466         if (obj_priv->mmap_offset)
4467                 i915_gem_free_mmap_offset(obj);
4468
4469         drm_gem_object_release(obj);
4470         i915_gem_info_remove_obj(dev_priv, obj->size);
4471
4472         kfree(obj_priv->page_cpu_valid);
4473         kfree(obj_priv->bit_17);
4474         kfree(obj_priv);
4475 }
4476
4477 void i915_gem_free_object(struct drm_gem_object *obj)
4478 {
4479         struct drm_device *dev = obj->dev;
4480         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4481
4482         trace_i915_gem_object_destroy(obj);
4483
4484         while (obj_priv->pin_count > 0)
4485                 i915_gem_object_unpin(obj);
4486
4487         if (obj_priv->phys_obj)
4488                 i915_gem_detach_phys_object(dev, obj);
4489
4490         i915_gem_free_object_tail(obj);
4491 }
4492
4493 int
4494 i915_gem_idle(struct drm_device *dev)
4495 {
4496         drm_i915_private_t *dev_priv = dev->dev_private;
4497         int ret;
4498
4499         mutex_lock(&dev->struct_mutex);
4500
4501         if (dev_priv->mm.suspended) {
4502                 mutex_unlock(&dev->struct_mutex);
4503                 return 0;
4504         }
4505
4506         ret = i915_gpu_idle(dev);
4507         if (ret) {
4508                 mutex_unlock(&dev->struct_mutex);
4509                 return ret;
4510         }
4511
4512         /* Under UMS, be paranoid and evict. */
4513         if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4514                 ret = i915_gem_evict_inactive(dev);
4515                 if (ret) {
4516                         mutex_unlock(&dev->struct_mutex);
4517                         return ret;
4518                 }
4519         }
4520
4521         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
4522          * We need to replace this with a semaphore, or something.
4523          * And not confound mm.suspended!
4524          */
4525         dev_priv->mm.suspended = 1;
4526         del_timer_sync(&dev_priv->hangcheck_timer);
4527
4528         i915_kernel_lost_context(dev);
4529         i915_gem_cleanup_ringbuffer(dev);
4530
4531         mutex_unlock(&dev->struct_mutex);
4532
4533         /* Cancel the retire work handler, which should be idle now. */
4534         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4535
4536         return 0;
4537 }
4538
4539 /*
4540  * 965+ support PIPE_CONTROL commands, which provide finer grained control
4541  * over cache flushing.
4542  */
4543 static int
4544 i915_gem_init_pipe_control(struct drm_device *dev)
4545 {
4546         drm_i915_private_t *dev_priv = dev->dev_private;
4547         struct drm_gem_object *obj;
4548         struct drm_i915_gem_object *obj_priv;
4549         int ret;
4550
4551         obj = i915_gem_alloc_object(dev, 4096);
4552         if (obj == NULL) {
4553                 DRM_ERROR("Failed to allocate seqno page\n");
4554                 ret = -ENOMEM;
4555                 goto err;
4556         }
4557         obj_priv = to_intel_bo(obj);
4558         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4559
4560         ret = i915_gem_object_pin(obj, 4096, true);
4561         if (ret)
4562                 goto err_unref;
4563
4564         dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4565         dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
4566         if (dev_priv->seqno_page == NULL)
4567                 goto err_unpin;
4568
4569         dev_priv->seqno_obj = obj;
4570         memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4571
4572         return 0;
4573
4574 err_unpin:
4575         i915_gem_object_unpin(obj);
4576 err_unref:
4577         drm_gem_object_unreference(obj);
4578 err:
4579         return ret;
4580 }
4581
4582
4583 static void
4584 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4585 {
4586         drm_i915_private_t *dev_priv = dev->dev_private;
4587         struct drm_gem_object *obj;
4588         struct drm_i915_gem_object *obj_priv;
4589
4590         obj = dev_priv->seqno_obj;
4591         obj_priv = to_intel_bo(obj);
4592         kunmap(obj_priv->pages[0]);
4593         i915_gem_object_unpin(obj);
4594         drm_gem_object_unreference(obj);
4595         dev_priv->seqno_obj = NULL;
4596
4597         dev_priv->seqno_page = NULL;
4598 }
4599
4600 int
4601 i915_gem_init_ringbuffer(struct drm_device *dev)
4602 {
4603         drm_i915_private_t *dev_priv = dev->dev_private;
4604         int ret;
4605
4606         if (HAS_PIPE_CONTROL(dev)) {
4607                 ret = i915_gem_init_pipe_control(dev);
4608                 if (ret)
4609                         return ret;
4610         }
4611
4612         ret = intel_init_render_ring_buffer(dev);
4613         if (ret)
4614                 goto cleanup_pipe_control;
4615
4616         if (HAS_BSD(dev)) {
4617                 ret = intel_init_bsd_ring_buffer(dev);
4618                 if (ret)
4619                         goto cleanup_render_ring;
4620         }
4621
4622         if (HAS_BLT(dev)) {
4623                 ret = intel_init_blt_ring_buffer(dev);
4624                 if (ret)
4625                         goto cleanup_bsd_ring;
4626         }
4627
4628         dev_priv->next_seqno = 1;
4629
4630         return 0;
4631
4632 cleanup_bsd_ring:
4633         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4634 cleanup_render_ring:
4635         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4636 cleanup_pipe_control:
4637         if (HAS_PIPE_CONTROL(dev))
4638                 i915_gem_cleanup_pipe_control(dev);
4639         return ret;
4640 }
4641
4642 void
4643 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4644 {
4645         drm_i915_private_t *dev_priv = dev->dev_private;
4646
4647         intel_cleanup_ring_buffer(&dev_priv->render_ring);
4648         intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4649         intel_cleanup_ring_buffer(&dev_priv->blt_ring);
4650         if (HAS_PIPE_CONTROL(dev))
4651                 i915_gem_cleanup_pipe_control(dev);
4652 }
4653
4654 int
4655 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4656                        struct drm_file *file_priv)
4657 {
4658         drm_i915_private_t *dev_priv = dev->dev_private;
4659         int ret;
4660
4661         if (drm_core_check_feature(dev, DRIVER_MODESET))
4662                 return 0;
4663
4664         if (atomic_read(&dev_priv->mm.wedged)) {
4665                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4666                 atomic_set(&dev_priv->mm.wedged, 0);
4667         }
4668
4669         mutex_lock(&dev->struct_mutex);
4670         dev_priv->mm.suspended = 0;
4671
4672         ret = i915_gem_init_ringbuffer(dev);
4673         if (ret != 0) {
4674                 mutex_unlock(&dev->struct_mutex);
4675                 return ret;
4676         }
4677
4678         BUG_ON(!list_empty(&dev_priv->mm.active_list));
4679         BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4680         BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
4681         BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
4682         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4683         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4684         BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4685         BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
4686         BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
4687         mutex_unlock(&dev->struct_mutex);
4688
4689         ret = drm_irq_install(dev);
4690         if (ret)
4691                 goto cleanup_ringbuffer;
4692
4693         return 0;
4694
4695 cleanup_ringbuffer:
4696         mutex_lock(&dev->struct_mutex);
4697         i915_gem_cleanup_ringbuffer(dev);
4698         dev_priv->mm.suspended = 1;
4699         mutex_unlock(&dev->struct_mutex);
4700
4701         return ret;
4702 }
4703
4704 int
4705 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4706                        struct drm_file *file_priv)
4707 {
4708         if (drm_core_check_feature(dev, DRIVER_MODESET))
4709                 return 0;
4710
4711         drm_irq_uninstall(dev);
4712         return i915_gem_idle(dev);
4713 }
4714
4715 void
4716 i915_gem_lastclose(struct drm_device *dev)
4717 {
4718         int ret;
4719
4720         if (drm_core_check_feature(dev, DRIVER_MODESET))
4721                 return;
4722
4723         ret = i915_gem_idle(dev);
4724         if (ret)
4725                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4726 }
4727
4728 static void
4729 init_ring_lists(struct intel_ring_buffer *ring)
4730 {
4731         INIT_LIST_HEAD(&ring->active_list);
4732         INIT_LIST_HEAD(&ring->request_list);
4733         INIT_LIST_HEAD(&ring->gpu_write_list);
4734 }
4735
4736 void
4737 i915_gem_load(struct drm_device *dev)
4738 {
4739         int i;
4740         drm_i915_private_t *dev_priv = dev->dev_private;
4741
4742         INIT_LIST_HEAD(&dev_priv->mm.active_list);
4743         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4744         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4745         INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
4746         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4747         INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4748         init_ring_lists(&dev_priv->render_ring);
4749         init_ring_lists(&dev_priv->bsd_ring);
4750         init_ring_lists(&dev_priv->blt_ring);
4751         for (i = 0; i < 16; i++)
4752                 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4753         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4754                           i915_gem_retire_work_handler);
4755         init_completion(&dev_priv->error_completion);
4756         spin_lock(&shrink_list_lock);
4757         list_add(&dev_priv->mm.shrink_list, &shrink_list);
4758         spin_unlock(&shrink_list_lock);
4759
4760         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4761         if (IS_GEN3(dev)) {
4762                 u32 tmp = I915_READ(MI_ARB_STATE);
4763                 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4764                         /* arb state is a masked write, so set bit + bit in mask */
4765                         tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4766                         I915_WRITE(MI_ARB_STATE, tmp);
4767                 }
4768         }
4769
4770         /* Old X drivers will take 0-2 for front, back, depth buffers */
4771         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4772                 dev_priv->fence_reg_start = 3;
4773
4774         if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4775                 dev_priv->num_fence_regs = 16;
4776         else
4777                 dev_priv->num_fence_regs = 8;
4778
4779         /* Initialize fence registers to zero */
4780         switch (INTEL_INFO(dev)->gen) {
4781         case 6:
4782                 for (i = 0; i < 16; i++)
4783                         I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
4784                 break;
4785         case 5:
4786         case 4:
4787                 for (i = 0; i < 16; i++)
4788                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4789                 break;
4790         case 3:
4791                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4792                         for (i = 0; i < 8; i++)
4793                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4794         case 2:
4795                 for (i = 0; i < 8; i++)
4796                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4797                 break;
4798         }
4799         i915_gem_detect_bit_6_swizzle(dev);
4800         init_waitqueue_head(&dev_priv->pending_flip_queue);
4801 }
4802
4803 /*
4804  * Create a physically contiguous memory object for this object
4805  * e.g. for cursor + overlay regs
4806  */
4807 static int i915_gem_init_phys_object(struct drm_device *dev,
4808                                      int id, int size, int align)
4809 {
4810         drm_i915_private_t *dev_priv = dev->dev_private;
4811         struct drm_i915_gem_phys_object *phys_obj;
4812         int ret;
4813
4814         if (dev_priv->mm.phys_objs[id - 1] || !size)
4815                 return 0;
4816
4817         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4818         if (!phys_obj)
4819                 return -ENOMEM;
4820
4821         phys_obj->id = id;
4822
4823         phys_obj->handle = drm_pci_alloc(dev, size, align);
4824         if (!phys_obj->handle) {
4825                 ret = -ENOMEM;
4826                 goto kfree_obj;
4827         }
4828 #ifdef CONFIG_X86
4829         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4830 #endif
4831
4832         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4833
4834         return 0;
4835 kfree_obj:
4836         kfree(phys_obj);
4837         return ret;
4838 }
4839
4840 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4841 {
4842         drm_i915_private_t *dev_priv = dev->dev_private;
4843         struct drm_i915_gem_phys_object *phys_obj;
4844
4845         if (!dev_priv->mm.phys_objs[id - 1])
4846                 return;
4847
4848         phys_obj = dev_priv->mm.phys_objs[id - 1];
4849         if (phys_obj->cur_obj) {
4850                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4851         }
4852
4853 #ifdef CONFIG_X86
4854         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4855 #endif
4856         drm_pci_free(dev, phys_obj->handle);
4857         kfree(phys_obj);
4858         dev_priv->mm.phys_objs[id - 1] = NULL;
4859 }
4860
4861 void i915_gem_free_all_phys_object(struct drm_device *dev)
4862 {
4863         int i;
4864
4865         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4866                 i915_gem_free_phys_object(dev, i);
4867 }
4868
4869 void i915_gem_detach_phys_object(struct drm_device *dev,
4870                                  struct drm_gem_object *obj)
4871 {
4872         struct drm_i915_gem_object *obj_priv;
4873         int i;
4874         int ret;
4875         int page_count;
4876
4877         obj_priv = to_intel_bo(obj);
4878         if (!obj_priv->phys_obj)
4879                 return;
4880
4881         ret = i915_gem_object_get_pages(obj, 0);
4882         if (ret)
4883                 goto out;
4884
4885         page_count = obj->size / PAGE_SIZE;
4886
4887         for (i = 0; i < page_count; i++) {
4888                 char *dst = kmap_atomic(obj_priv->pages[i]);
4889                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4890
4891                 memcpy(dst, src, PAGE_SIZE);
4892                 kunmap_atomic(dst);
4893         }
4894         drm_clflush_pages(obj_priv->pages, page_count);
4895         drm_agp_chipset_flush(dev);
4896
4897         i915_gem_object_put_pages(obj);
4898 out:
4899         obj_priv->phys_obj->cur_obj = NULL;
4900         obj_priv->phys_obj = NULL;
4901 }
4902
4903 int
4904 i915_gem_attach_phys_object(struct drm_device *dev,
4905                             struct drm_gem_object *obj,
4906                             int id,
4907                             int align)
4908 {
4909         drm_i915_private_t *dev_priv = dev->dev_private;
4910         struct drm_i915_gem_object *obj_priv;
4911         int ret = 0;
4912         int page_count;
4913         int i;
4914
4915         if (id > I915_MAX_PHYS_OBJECT)
4916                 return -EINVAL;
4917
4918         obj_priv = to_intel_bo(obj);
4919
4920         if (obj_priv->phys_obj) {
4921                 if (obj_priv->phys_obj->id == id)
4922                         return 0;
4923                 i915_gem_detach_phys_object(dev, obj);
4924         }
4925
4926         /* create a new object */
4927         if (!dev_priv->mm.phys_objs[id - 1]) {
4928                 ret = i915_gem_init_phys_object(dev, id,
4929                                                 obj->size, align);
4930                 if (ret) {
4931                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4932                         goto out;
4933                 }
4934         }
4935
4936         /* bind to the object */
4937         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4938         obj_priv->phys_obj->cur_obj = obj;
4939
4940         ret = i915_gem_object_get_pages(obj, 0);
4941         if (ret) {
4942                 DRM_ERROR("failed to get page list\n");
4943                 goto out;
4944         }
4945
4946         page_count = obj->size / PAGE_SIZE;
4947
4948         for (i = 0; i < page_count; i++) {
4949                 char *src = kmap_atomic(obj_priv->pages[i]);
4950                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4951
4952                 memcpy(dst, src, PAGE_SIZE);
4953                 kunmap_atomic(src);
4954         }
4955
4956         i915_gem_object_put_pages(obj);
4957
4958         return 0;
4959 out:
4960         return ret;
4961 }
4962
4963 static int
4964 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4965                      struct drm_i915_gem_pwrite *args,
4966                      struct drm_file *file_priv)
4967 {
4968         struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4969         void *obj_addr;
4970         int ret;
4971         char __user *user_data;
4972
4973         user_data = (char __user *) (uintptr_t) args->data_ptr;
4974         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4975
4976         DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4977         ret = copy_from_user(obj_addr, user_data, args->size);
4978         if (ret)
4979                 return -EFAULT;
4980
4981         drm_agp_chipset_flush(dev);
4982         return 0;
4983 }
4984
4985 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4986 {
4987         struct drm_i915_file_private *file_priv = file->driver_priv;
4988
4989         /* Clean up our request list when the client is going away, so that
4990          * later retire_requests won't dereference our soon-to-be-gone
4991          * file_priv.
4992          */
4993         spin_lock(&file_priv->mm.lock);
4994         while (!list_empty(&file_priv->mm.request_list)) {
4995                 struct drm_i915_gem_request *request;
4996
4997                 request = list_first_entry(&file_priv->mm.request_list,
4998                                            struct drm_i915_gem_request,
4999                                            client_list);
5000                 list_del(&request->client_list);
5001                 request->file_priv = NULL;
5002         }
5003         spin_unlock(&file_priv->mm.lock);
5004 }
5005
5006 static int
5007 i915_gpu_is_active(struct drm_device *dev)
5008 {
5009         drm_i915_private_t *dev_priv = dev->dev_private;
5010         int lists_empty;
5011
5012         lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5013                       list_empty(&dev_priv->render_ring.active_list) &&
5014                       list_empty(&dev_priv->bsd_ring.active_list) &&
5015                       list_empty(&dev_priv->blt_ring.active_list);
5016
5017         return !lists_empty;
5018 }
5019
5020 static int
5021 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
5022 {
5023         drm_i915_private_t *dev_priv, *next_dev;
5024         struct drm_i915_gem_object *obj_priv, *next_obj;
5025         int cnt = 0;
5026         int would_deadlock = 1;
5027
5028         /* "fast-path" to count number of available objects */
5029         if (nr_to_scan == 0) {
5030                 spin_lock(&shrink_list_lock);
5031                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5032                         struct drm_device *dev = dev_priv->dev;
5033
5034                         if (mutex_trylock(&dev->struct_mutex)) {
5035                                 list_for_each_entry(obj_priv,
5036                                                     &dev_priv->mm.inactive_list,
5037                                                     mm_list)
5038                                         cnt++;
5039                                 mutex_unlock(&dev->struct_mutex);
5040                         }
5041                 }
5042                 spin_unlock(&shrink_list_lock);
5043
5044                 return (cnt / 100) * sysctl_vfs_cache_pressure;
5045         }
5046
5047         spin_lock(&shrink_list_lock);
5048
5049 rescan:
5050         /* first scan for clean buffers */
5051         list_for_each_entry_safe(dev_priv, next_dev,
5052                                  &shrink_list, mm.shrink_list) {
5053                 struct drm_device *dev = dev_priv->dev;
5054
5055                 if (! mutex_trylock(&dev->struct_mutex))
5056                         continue;
5057
5058                 spin_unlock(&shrink_list_lock);
5059                 i915_gem_retire_requests(dev);
5060
5061                 list_for_each_entry_safe(obj_priv, next_obj,
5062                                          &dev_priv->mm.inactive_list,
5063                                          mm_list) {
5064                         if (i915_gem_object_is_purgeable(obj_priv)) {
5065                                 i915_gem_object_unbind(&obj_priv->base);
5066                                 if (--nr_to_scan <= 0)
5067                                         break;
5068                         }
5069                 }
5070
5071                 spin_lock(&shrink_list_lock);
5072                 mutex_unlock(&dev->struct_mutex);
5073
5074                 would_deadlock = 0;
5075
5076                 if (nr_to_scan <= 0)
5077                         break;
5078         }
5079
5080         /* second pass, evict/count anything still on the inactive list */
5081         list_for_each_entry_safe(dev_priv, next_dev,
5082                                  &shrink_list, mm.shrink_list) {
5083                 struct drm_device *dev = dev_priv->dev;
5084
5085                 if (! mutex_trylock(&dev->struct_mutex))
5086                         continue;
5087
5088                 spin_unlock(&shrink_list_lock);
5089
5090                 list_for_each_entry_safe(obj_priv, next_obj,
5091                                          &dev_priv->mm.inactive_list,
5092                                          mm_list) {
5093                         if (nr_to_scan > 0) {
5094                                 i915_gem_object_unbind(&obj_priv->base);
5095                                 nr_to_scan--;
5096                         } else
5097                                 cnt++;
5098                 }
5099
5100                 spin_lock(&shrink_list_lock);
5101                 mutex_unlock(&dev->struct_mutex);
5102
5103                 would_deadlock = 0;
5104         }
5105
5106         if (nr_to_scan) {
5107                 int active = 0;
5108
5109                 /*
5110                  * We are desperate for pages, so as a last resort, wait
5111                  * for the GPU to finish and discard whatever we can.
5112                  * This has a dramatic impact to reduce the number of
5113                  * OOM-killer events whilst running the GPU aggressively.
5114                  */
5115                 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5116                         struct drm_device *dev = dev_priv->dev;
5117
5118                         if (!mutex_trylock(&dev->struct_mutex))
5119                                 continue;
5120
5121                         spin_unlock(&shrink_list_lock);
5122
5123                         if (i915_gpu_is_active(dev)) {
5124                                 i915_gpu_idle(dev);
5125                                 active++;
5126                         }
5127
5128                         spin_lock(&shrink_list_lock);
5129                         mutex_unlock(&dev->struct_mutex);
5130                 }
5131
5132                 if (active)
5133                         goto rescan;
5134         }
5135
5136         spin_unlock(&shrink_list_lock);
5137
5138         if (would_deadlock)
5139                 return -1;
5140         else if (cnt > 0)
5141                 return (cnt / 100) * sysctl_vfs_cache_pressure;
5142         else
5143                 return 0;
5144 }
5145
5146 static struct shrinker shrinker = {
5147         .shrink = i915_gem_shrink,
5148         .seeks = DEFAULT_SEEKS,
5149 };
5150
5151 __init void
5152 i915_gem_shrinker_init(void)
5153 {
5154     register_shrinker(&shrinker);
5155 }
5156
5157 __exit void
5158 i915_gem_shrinker_exit(void)
5159 {
5160     unregister_shrinker(&shrinker);
5161 }