+static struct nouveau_semaphore *
+alloc_semaphore(struct drm_device *dev)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_semaphore *sema;
+
+ if (!USE_SEMA(dev))
+ return NULL;
+
+ sema = kmalloc(sizeof(*sema), GFP_KERNEL);
+ if (!sema)
+ goto fail;
+
+ spin_lock(&dev_priv->fence.lock);
+ sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
+ if (sema->mem)
+ sema->mem = drm_mm_get_block(sema->mem, 4, 0);
+ spin_unlock(&dev_priv->fence.lock);
+
+ if (!sema->mem)
+ goto fail;
+
+ kref_init(&sema->ref);
+ sema->dev = dev;
+ nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
+
+ return sema;
+fail:
+ kfree(sema);
+ return NULL;
+}
+
+static void
+free_semaphore(struct kref *ref)
+{
+ struct nouveau_semaphore *sema =
+ container_of(ref, struct nouveau_semaphore, ref);
+ struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
+
+ spin_lock(&dev_priv->fence.lock);
+ drm_mm_put_block(sema->mem);
+ spin_unlock(&dev_priv->fence.lock);
+
+ kfree(sema);
+}
+
+static void
+semaphore_work(void *priv, bool signalled)
+{
+ struct nouveau_semaphore *sema = priv;
+ struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
+
+ if (unlikely(!signalled))
+ nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
+
+ kref_put(&sema->ref, free_semaphore);
+}
+
+static int
+emit_semaphore(struct nouveau_channel *chan, int method,
+ struct nouveau_semaphore *sema)
+{
+ struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
+ struct nouveau_fence *fence;
+ bool smart = (dev_priv->card_type >= NV_50);
+ int ret;
+
+ ret = RING_SPACE(chan, smart ? 8 : 4);
+ if (ret)
+ return ret;
+
+ if (smart) {
+ BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
+ OUT_RING(chan, NvSema);
+ }
+ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
+ OUT_RING(chan, sema->mem->start);
+
+ if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
+ /*
+ * NV50 tries to be too smart and context-switch
+ * between semaphores instead of doing a "first come,
+ * first served" strategy like previous cards
+ * do.
+ *
+ * That's bad because the ACQUIRE latency can get as
+ * large as the PFIFO context time slice in the
+ * typical DRI2 case where you have several
+ * outstanding semaphores at the same moment.
+ *
+ * If we're going to ACQUIRE, force the card to
+ * context switch before, just in case the matching
+ * RELEASE is already scheduled to be executed in
+ * another channel.
+ */
+ BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+ OUT_RING(chan, 0);
+ }
+
+ BEGIN_RING(chan, NvSubSw, method, 1);
+ OUT_RING(chan, 1);
+
+ if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
+ /*
+ * Force the card to context switch, there may be
+ * another channel waiting for the semaphore we just
+ * released.
+ */
+ BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
+ OUT_RING(chan, 0);
+ }
+
+ /* Delay semaphore destruction until its work is done */
+ ret = nouveau_fence_new(chan, &fence, true);
+ if (ret)
+ return ret;
+
+ kref_get(&sema->ref);
+ nouveau_fence_work(fence, semaphore_work, sema);
+ nouveau_fence_unref((void *)&fence);
+
+ return 0;
+}
+
+int
+nouveau_fence_sync(struct nouveau_fence *fence,
+ struct nouveau_channel *wchan)
+{
+ struct nouveau_channel *chan = nouveau_fence_channel(fence);
+ struct drm_device *dev = wchan->dev;
+ struct nouveau_semaphore *sema;
+ int ret;
+
+ if (likely(!fence || chan == wchan ||
+ nouveau_fence_signalled(fence, NULL)))
+ return 0;
+
+ sema = alloc_semaphore(dev);
+ if (!sema) {
+ /* Early card or broken userspace, fall back to
+ * software sync. */
+ return nouveau_fence_wait(fence, NULL, false, false);
+ }
+
+ /* Make wchan wait until it gets signalled */
+ ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
+ if (ret)
+ goto out;
+
+ /* Signal the semaphore from chan */
+ ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
+out:
+ kref_put(&sema->ref, free_semaphore);
+ return ret;
+}
+