2 * Copyright (C) 2007 Ben Skeggs.
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #include "nouveau_drv.h"
31 #include "nouveau_ramht.h"
32 #include "nouveau_dma.h"
34 #define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
35 #define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17)
37 struct nouveau_fence {
38 struct nouveau_channel *channel;
40 struct list_head entry;
45 void (*work)(void *priv, bool signalled);
49 struct nouveau_semaphore {
51 struct drm_device *dev;
52 struct drm_mm_node *mem;
55 static inline struct nouveau_fence *
56 nouveau_fence(void *sync_obj)
58 return (struct nouveau_fence *)sync_obj;
62 nouveau_fence_del(struct kref *ref)
64 struct nouveau_fence *fence =
65 container_of(ref, struct nouveau_fence, refcount);
67 nouveau_channel_ref(NULL, &fence->channel);
72 nouveau_fence_update(struct nouveau_channel *chan)
74 struct drm_device *dev = chan->dev;
75 struct nouveau_fence *tmp, *fence;
78 spin_lock(&chan->fence.lock);
80 /* Fetch the last sequence if the channel is still up and running */
81 if (likely(!list_empty(&chan->fence.pending))) {
83 sequence = nvchan_rd32(chan, 0x48);
85 sequence = atomic_read(&chan->fence.last_sequence_irq);
87 if (chan->fence.sequence_ack == sequence)
89 chan->fence.sequence_ack = sequence;
92 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
93 sequence = fence->sequence;
94 fence->signalled = true;
95 list_del(&fence->entry);
97 if (unlikely(fence->work))
98 fence->work(fence->priv, true);
100 kref_put(&fence->refcount, nouveau_fence_del);
102 if (sequence == chan->fence.sequence_ack)
106 spin_unlock(&chan->fence.lock);
110 nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
113 struct nouveau_fence *fence;
116 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
119 kref_init(&fence->refcount);
120 nouveau_channel_ref(chan, &fence->channel);
123 ret = nouveau_fence_emit(fence);
126 nouveau_fence_unref(&fence);
131 struct nouveau_channel *
132 nouveau_fence_channel(struct nouveau_fence *fence)
134 return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
138 nouveau_fence_emit(struct nouveau_fence *fence)
140 struct nouveau_channel *chan = fence->channel;
141 struct drm_device *dev = chan->dev;
144 ret = RING_SPACE(chan, 2);
148 if (unlikely(chan->fence.sequence == chan->fence.sequence_ack - 1)) {
149 nouveau_fence_update(chan);
151 BUG_ON(chan->fence.sequence ==
152 chan->fence.sequence_ack - 1);
155 fence->sequence = ++chan->fence.sequence;
157 kref_get(&fence->refcount);
158 spin_lock(&chan->fence.lock);
159 list_add_tail(&fence->entry, &chan->fence.pending);
160 spin_unlock(&chan->fence.lock);
162 BEGIN_RING(chan, NvSubSw, USE_REFCNT(dev) ? 0x0050 : 0x0150, 1);
163 OUT_RING(chan, fence->sequence);
170 nouveau_fence_work(struct nouveau_fence *fence,
171 void (*work)(void *priv, bool signalled),
176 spin_lock(&fence->channel->fence.lock);
178 if (fence->signalled) {
185 spin_unlock(&fence->channel->fence.lock);
189 __nouveau_fence_unref(void **sync_obj)
191 struct nouveau_fence *fence = nouveau_fence(*sync_obj);
194 kref_put(&fence->refcount, nouveau_fence_del);
199 __nouveau_fence_ref(void *sync_obj)
201 struct nouveau_fence *fence = nouveau_fence(sync_obj);
203 kref_get(&fence->refcount);
208 __nouveau_fence_signalled(void *sync_obj, void *sync_arg)
210 struct nouveau_fence *fence = nouveau_fence(sync_obj);
211 struct nouveau_channel *chan = fence->channel;
213 if (fence->signalled)
216 nouveau_fence_update(chan);
217 return fence->signalled;
221 __nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
223 unsigned long timeout = jiffies + (3 * DRM_HZ);
224 unsigned long sleep_time = jiffies + 1;
228 if (__nouveau_fence_signalled(sync_obj, sync_arg))
231 if (time_after_eq(jiffies, timeout)) {
236 __set_current_state(intr ? TASK_INTERRUPTIBLE
237 : TASK_UNINTERRUPTIBLE);
238 if (lazy && time_after_eq(jiffies, sleep_time))
241 if (intr && signal_pending(current)) {
247 __set_current_state(TASK_RUNNING);
252 static struct nouveau_semaphore *
253 alloc_semaphore(struct drm_device *dev)
255 struct drm_nouveau_private *dev_priv = dev->dev_private;
256 struct nouveau_semaphore *sema;
262 sema = kmalloc(sizeof(*sema), GFP_KERNEL);
266 ret = drm_mm_pre_get(&dev_priv->fence.heap);
270 spin_lock(&dev_priv->fence.lock);
271 sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
273 sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
274 spin_unlock(&dev_priv->fence.lock);
279 kref_init(&sema->ref);
281 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
290 free_semaphore(struct kref *ref)
292 struct nouveau_semaphore *sema =
293 container_of(ref, struct nouveau_semaphore, ref);
294 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
296 spin_lock(&dev_priv->fence.lock);
297 drm_mm_put_block(sema->mem);
298 spin_unlock(&dev_priv->fence.lock);
304 semaphore_work(void *priv, bool signalled)
306 struct nouveau_semaphore *sema = priv;
307 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
309 if (unlikely(!signalled))
310 nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
312 kref_put(&sema->ref, free_semaphore);
316 emit_semaphore(struct nouveau_channel *chan, int method,
317 struct nouveau_semaphore *sema)
319 struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
320 struct nouveau_fence *fence;
321 bool smart = (dev_priv->card_type >= NV_50);
324 ret = RING_SPACE(chan, smart ? 8 : 4);
329 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
330 OUT_RING(chan, NvSema);
332 BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
333 OUT_RING(chan, sema->mem->start);
335 if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
337 * NV50 tries to be too smart and context-switch
338 * between semaphores instead of doing a "first come,
339 * first served" strategy like previous cards
342 * That's bad because the ACQUIRE latency can get as
343 * large as the PFIFO context time slice in the
344 * typical DRI2 case where you have several
345 * outstanding semaphores at the same moment.
347 * If we're going to ACQUIRE, force the card to
348 * context switch before, just in case the matching
349 * RELEASE is already scheduled to be executed in
352 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
356 BEGIN_RING(chan, NvSubSw, method, 1);
359 if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
361 * Force the card to context switch, there may be
362 * another channel waiting for the semaphore we just
365 BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
369 /* Delay semaphore destruction until its work is done */
370 ret = nouveau_fence_new(chan, &fence, true);
374 kref_get(&sema->ref);
375 nouveau_fence_work(fence, semaphore_work, sema);
376 nouveau_fence_unref(&fence);
382 nouveau_fence_sync(struct nouveau_fence *fence,
383 struct nouveau_channel *wchan)
385 struct nouveau_channel *chan = nouveau_fence_channel(fence);
386 struct drm_device *dev = wchan->dev;
387 struct nouveau_semaphore *sema;
390 if (likely(!chan || chan == wchan ||
391 nouveau_fence_signalled(fence)))
394 sema = alloc_semaphore(dev);
396 /* Early card or broken userspace, fall back to
398 ret = nouveau_fence_wait(fence, true, false);
402 /* try to take chan's mutex, if we can't take it right away
403 * we have to fallback to software sync to prevent locking
406 if (!mutex_trylock(&chan->mutex)) {
407 ret = nouveau_fence_wait(fence, true, false);
411 /* Make wchan wait until it gets signalled */
412 ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
416 /* Signal the semaphore from chan */
417 ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
420 mutex_unlock(&chan->mutex);
422 kref_put(&sema->ref, free_semaphore);
425 nouveau_channel_put_unlocked(&chan);
430 __nouveau_fence_flush(void *sync_obj, void *sync_arg)
436 nouveau_fence_channel_init(struct nouveau_channel *chan)
438 struct drm_device *dev = chan->dev;
439 struct drm_nouveau_private *dev_priv = dev->dev_private;
440 struct nouveau_gpuobj *obj = NULL;
443 /* Create an NV_SW object for various sync purposes */
444 ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
448 ret = RING_SPACE(chan, 2);
451 BEGIN_RING(chan, NvSubSw, 0, 1);
452 OUT_RING(chan, NvSw);
454 /* Create a DMA object for the shared cross-channel sync area. */
456 struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
458 ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
459 mem->start << PAGE_SHIFT,
460 mem->size, NV_MEM_ACCESS_RW,
461 NV_MEM_TARGET_VRAM, &obj);
465 ret = nouveau_ramht_insert(chan, NvSema, obj);
466 nouveau_gpuobj_ref(NULL, &obj);
470 ret = RING_SPACE(chan, 2);
473 BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
474 OUT_RING(chan, NvSema);
479 INIT_LIST_HEAD(&chan->fence.pending);
480 spin_lock_init(&chan->fence.lock);
481 atomic_set(&chan->fence.last_sequence_irq, 0);
487 nouveau_fence_channel_fini(struct nouveau_channel *chan)
489 struct nouveau_fence *tmp, *fence;
491 spin_lock(&chan->fence.lock);
493 list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
494 fence->signalled = true;
495 list_del(&fence->entry);
497 if (unlikely(fence->work))
498 fence->work(fence->priv, false);
500 kref_put(&fence->refcount, nouveau_fence_del);
503 spin_unlock(&chan->fence.lock);
507 nouveau_fence_init(struct drm_device *dev)
509 struct drm_nouveau_private *dev_priv = dev->dev_private;
512 /* Create a shared VRAM heap for cross-channel sync. */
514 ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
515 0, 0, false, true, &dev_priv->fence.bo);
519 ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
523 ret = nouveau_bo_map(dev_priv->fence.bo);
527 ret = drm_mm_init(&dev_priv->fence.heap, 0,
528 dev_priv->fence.bo->bo.mem.size);
532 spin_lock_init(&dev_priv->fence.lock);
537 nouveau_bo_unmap(dev_priv->fence.bo);
538 nouveau_bo_ref(NULL, &dev_priv->fence.bo);
543 nouveau_fence_fini(struct drm_device *dev)
545 struct drm_nouveau_private *dev_priv = dev->dev_private;
548 drm_mm_takedown(&dev_priv->fence.heap);
549 nouveau_bo_unmap(dev_priv->fence.bo);
550 nouveau_bo_unpin(dev_priv->fence.bo);
551 nouveau_bo_ref(NULL, &dev_priv->fence.bo);