drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/kthread.h>
  25 #include <linux/wait.h>
  26 #include <linux/sched.h>
  27 #include <uapi/linux/sched/types.h>
  28 #include <drm/drmP.h>
  29 #include "gpu_scheduler.h"
  30
  31 #define CREATE_TRACE_POINTS
  32 #include "gpu_sched_trace.h"
  33
  34 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
  35 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
  36 static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  37
  38 /* Initialize a given run queue struct */
  39 static void amd_sched_rq_init(struct amd_sched_rq *rq)
  40 {
  41         spin_lock_init(&rq->lock);
  42         INIT_LIST_HEAD(&rq->entities);
  43         rq->current_entity = NULL;
  44 }
  45
  46 static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
  47                                     struct amd_sched_entity *entity)
  48 {
  49         if (!list_empty(&entity->list))
  50                 return;
  51         spin_lock(&rq->lock);
  52         list_add_tail(&entity->list, &rq->entities);
  53         spin_unlock(&rq->lock);
  54 }
  55
  56 static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
  57                                        struct amd_sched_entity *entity)
  58 {
  59         if (list_empty(&entity->list))
  60                 return;
  61         spin_lock(&rq->lock);
  62         list_del_init(&entity->list);
  63         if (rq->current_entity == entity)
  64                 rq->current_entity = NULL;
  65         spin_unlock(&rq->lock);
  66 }
  67
  68 /**
  69  * Select an entity which could provide a job to run
  70  *
  71  * @rq          The run queue to check.
  72  *
  73  * Try to find a ready entity, returns NULL if none found.
  74  */
  75 static struct amd_sched_entity *
  76 amd_sched_rq_select_entity(struct amd_sched_rq *rq)
  77 {
  78         struct amd_sched_entity *entity;
  79
  80         spin_lock(&rq->lock);
  81
  82         entity = rq->current_entity;
  83         if (entity) {
  84                 list_for_each_entry_continue(entity, &rq->entities, list) {
  85                         if (amd_sched_entity_is_ready(entity)) {
  86                                 rq->current_entity = entity;
  87                                 spin_unlock(&rq->lock);
  88                                 return entity;
  89                         }
  90                 }
  91         }
  92
  93         list_for_each_entry(entity, &rq->entities, list) {
  94
  95                 if (amd_sched_entity_is_ready(entity)) {
  96                         rq->current_entity = entity;
  97                         spin_unlock(&rq->lock);
  98                         return entity;
  99                 }
 100
 101                 if (entity == rq->current_entity)
 102                         break;
 103         }
 104
 105         spin_unlock(&rq->lock);
 106
 107         return NULL;
 108 }
 109
 110 /**
 111  * Init a context entity used by scheduler when submit to HW ring.
 112  *
 113  * @sched       The pointer to the scheduler
 114  * @entity      The pointer to a valid amd_sched_entity
 115  * @rq          The run queue this entity belongs
 116  * @kernel      If this is an entity for the kernel
 117  * @jobs        The max number of jobs in the job queue
 118  *
 119  * return 0 if succeed. negative error code on failure
 120 */
 121 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 122                           struct amd_sched_entity *entity,
 123                           struct amd_sched_rq *rq,
 124                           uint32_t jobs)
 125 {
 126         int r;
 127
 128         if (!(sched && entity && rq))
 129                 return -EINVAL;
 130
 131         memset(entity, 0, sizeof(struct amd_sched_entity));
 132         INIT_LIST_HEAD(&entity->list);
 133         entity->rq = rq;
 134         entity->sched = sched;
 135
 136         spin_lock_init(&entity->queue_lock);
 137         r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
 138         if (r)
 139                 return r;
 140
 141         atomic_set(&entity->fence_seq, 0);
 142         entity->fence_context = dma_fence_context_alloc(2);
 143
 144         return 0;
 145 }
 146
 147 /**
 148  * Query if entity is initialized
 149  *
 150  * @sched       Pointer to scheduler instance
 151  * @entity      The pointer to a valid scheduler entity
 152  *
 153  * return true if entity is initialized, false otherwise
 154 */
 155 static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
 156                                             struct amd_sched_entity *entity)
 157 {
 158         return entity->sched == sched &&
 159                 entity->rq != NULL;
 160 }
 161
 162 /**
 163  * Check if entity is idle
 164  *
 165  * @entity      The pointer to a valid scheduler entity
 166  *
 167  * Return true if entity don't has any unscheduled jobs.
 168  */
 169 static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
 170 {
 171         rmb();
 172         if (kfifo_is_empty(&entity->job_queue))
 173                 return true;
 174
 175         return false;
 176 }
 177
 178 /**
 179  * Check if entity is ready
 180  *
 181  * @entity      The pointer to a valid scheduler entity
 182  *
 183  * Return true if entity could provide a job.
 184  */
 185 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
 186 {
 187         if (kfifo_is_empty(&entity->job_queue))
 188                 return false;
 189
 190         if (ACCESS_ONCE(entity->dependency))
 191                 return false;
 192
 193         return true;
 194 }
 195
 196 /**
 197  * Destroy a context entity
 198  *
 199  * @sched       Pointer to scheduler instance
 200  * @entity      The pointer to a valid scheduler entity
 201  *
 202  * Cleanup and free the allocated resources.
 203  */
 204 void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 205                            struct amd_sched_entity *entity)
 206 {
 207         struct amd_sched_rq *rq = entity->rq;
 208
 209         if (!amd_sched_entity_is_initialized(sched, entity))
 210                 return;
 211
 212         /**
 213          * The client will not queue more IBs during this fini, consume existing
 214          * queued IBs
 215         */
 216         wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
 217
 218         amd_sched_rq_remove_entity(rq, entity);
 219         kfifo_free(&entity->job_queue);
 220 }
 221
 222 static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
 223 {
 224         struct amd_sched_entity *entity =
 225                 container_of(cb, struct amd_sched_entity, cb);
 226         entity->dependency = NULL;
 227         dma_fence_put(f);
 228         amd_sched_wakeup(entity->sched);
 229 }
 230
 231 static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
 232 {
 233         struct amd_sched_entity *entity =
 234                 container_of(cb, struct amd_sched_entity, cb);
 235         entity->dependency = NULL;
 236         dma_fence_put(f);
 237 }
 238
 239 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 240 {
 241         struct amd_gpu_scheduler *sched = entity->sched;
 242         struct dma_fence * fence = entity->dependency;
 243         struct amd_sched_fence *s_fence;
 244
 245         if (fence->context == entity->fence_context) {
 246                 /* We can ignore fences from ourself */
 247                 dma_fence_put(entity->dependency);
 248                 return false;
 249         }
 250
 251         s_fence = to_amd_sched_fence(fence);
 252         if (s_fence && s_fence->sched == sched) {
 253
 254                 /*
 255                  * Fence is from the same scheduler, only need to wait for
 256                  * it to be scheduled
 257                  */
 258                 fence = dma_fence_get(&s_fence->scheduled);
 259                 dma_fence_put(entity->dependency);
 260                 entity->dependency = fence;
 261                 if (!dma_fence_add_callback(fence, &entity->cb,
 262                                             amd_sched_entity_clear_dep))
 263                         return true;
 264
 265                 /* Ignore it when it is already scheduled */
 266                 dma_fence_put(fence);
 267                 return false;
 268         }
 269
 270         if (!dma_fence_add_callback(entity->dependency, &entity->cb,
 271                                     amd_sched_entity_wakeup))
 272                 return true;
 273
 274         dma_fence_put(entity->dependency);
 275         return false;
 276 }
 277
 278 static struct amd_sched_job *
 279 amd_sched_entity_pop_job(struct amd_sched_entity *entity)
 280 {
 281         struct amd_gpu_scheduler *sched = entity->sched;
 282         struct amd_sched_job *sched_job;
 283
 284         if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
 285                 return NULL;
 286
 287         while ((entity->dependency = sched->ops->dependency(sched_job)))
 288                 if (amd_sched_entity_add_dependency_cb(entity))
 289                         return NULL;
 290
 291         return sched_job;
 292 }
 293
 294 /**
 295  * Helper to submit a job to the job queue
 296  *
 297  * @sched_job           The pointer to job required to submit
 298  *
 299  * Returns true if we could submit the job.
 300  */
 301 static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 302 {
 303         struct amd_gpu_scheduler *sched = sched_job->sched;
 304         struct amd_sched_entity *entity = sched_job->s_entity;
 305         bool added, first = false;
 306
 307         spin_lock(&entity->queue_lock);
 308         added = kfifo_in(&entity->job_queue, &sched_job,
 309                         sizeof(sched_job)) == sizeof(sched_job);
 310
 311         if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
 312                 first = true;
 313
 314         spin_unlock(&entity->queue_lock);
 315
 316         /* first job wakes up scheduler */
 317         if (first) {
 318                 /* Add the entity to the run queue */
 319                 amd_sched_rq_add_entity(entity->rq, entity);
 320                 amd_sched_wakeup(sched);
 321         }
 322         return added;
 323 }
 324
 325 /* job_finish is called after hw fence signaled, and
 326  * the job had already been deleted from ring_mirror_list
 327  */
 328 static void amd_sched_job_finish(struct work_struct *work)
 329 {
 330         struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
 331                                                    finish_work);
 332         struct amd_gpu_scheduler *sched = s_job->sched;
 333
 334         /* remove job from ring_mirror_list */
 335         spin_lock(&sched->job_list_lock);
 336         list_del_init(&s_job->node);
 337         if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
 338                 struct amd_sched_job *next;
 339
 340                 spin_unlock(&sched->job_list_lock);
 341                 cancel_delayed_work_sync(&s_job->work_tdr);
 342                 spin_lock(&sched->job_list_lock);
 343
 344                 /* queue TDR for next job */
 345                 next = list_first_entry_or_null(&sched->ring_mirror_list,
 346                                                 struct amd_sched_job, node);
 347
 348                 if (next)
 349                         schedule_delayed_work(&next->work_tdr, sched->timeout);
 350         }
 351         spin_unlock(&sched->job_list_lock);
 352         sched->ops->free_job(s_job);
 353 }
 354
 355 static void amd_sched_job_finish_cb(struct dma_fence *f,
 356                                     struct dma_fence_cb *cb)
 357 {
 358         struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
 359                                                  finish_cb);
 360         schedule_work(&job->finish_work);
 361 }
 362
 363 static void amd_sched_job_begin(struct amd_sched_job *s_job)
 364 {
 365         struct amd_gpu_scheduler *sched = s_job->sched;
 366
 367         spin_lock(&sched->job_list_lock);
 368         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 369         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 370             list_first_entry_or_null(&sched->ring_mirror_list,
 371                                      struct amd_sched_job, node) == s_job)
 372                 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 373         spin_unlock(&sched->job_list_lock);
 374 }
 375
 376 static void amd_sched_job_timedout(struct work_struct *work)
 377 {
 378         struct amd_sched_job *job = container_of(work, struct amd_sched_job,
 379                                                  work_tdr.work);
 380
 381         job->sched->ops->timedout_job(job);
 382 }
 383
 384 void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 385 {
 386         struct amd_sched_job *s_job;
 387
 388         spin_lock(&sched->job_list_lock);
 389         list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
 390                 if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
 391                         dma_fence_put(s_job->s_fence->parent);
 392                         s_job->s_fence->parent = NULL;
 393                 }
 394         }
 395         atomic_set(&sched->hw_rq_count, 0);
 396         spin_unlock(&sched->job_list_lock);
 397 }
 398
 399 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
 400 {
 401         struct amd_sched_job *s_job, *tmp;
 402         int r;
 403
 404         spin_lock(&sched->job_list_lock);
 405         s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 406                                          struct amd_sched_job, node);
 407         if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
 408                 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 409
 410         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 411                 struct amd_sched_fence *s_fence = s_job->s_fence;
 412                 struct dma_fence *fence;
 413
 414                 spin_unlock(&sched->job_list_lock);
 415                 fence = sched->ops->run_job(s_job);
 416                 atomic_inc(&sched->hw_rq_count);
 417                 if (fence) {
 418                         s_fence->parent = dma_fence_get(fence);
 419                         r = dma_fence_add_callback(fence, &s_fence->cb,
 420                                                    amd_sched_process_job);
 421                         if (r == -ENOENT)
 422                                 amd_sched_process_job(fence, &s_fence->cb);
 423                         else if (r)
 424                                 DRM_ERROR("fence add callback failed (%d)\n",
 425                                           r);
 426                         dma_fence_put(fence);
 427                 } else {
 428                         DRM_ERROR("Failed to run job!\n");
 429                         amd_sched_process_job(NULL, &s_fence->cb);
 430                 }
 431                 spin_lock(&sched->job_list_lock);
 432         }
 433         spin_unlock(&sched->job_list_lock);
 434 }
 435
 436 /**
 437  * Submit a job to the job queue
 438  *
 439  * @sched_job           The pointer to job required to submit
 440  *
 441  * Returns 0 for success, negative error code otherwise.
 442  */
 443 void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 444 {
 445         struct amd_sched_entity *entity = sched_job->s_entity;
 446
 447         trace_amd_sched_job(sched_job);
 448         dma_fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
 449                                amd_sched_job_finish_cb);
 450         wait_event(entity->sched->job_scheduled,
 451                    amd_sched_entity_in(sched_job));
 452 }
 453
 454 /* init a sched_job with basic field */
 455 int amd_sched_job_init(struct amd_sched_job *job,
 456                        struct amd_gpu_scheduler *sched,
 457                        struct amd_sched_entity *entity,
 458                        void *owner)
 459 {
 460         job->sched = sched;
 461         job->s_entity = entity;
 462         job->s_fence = amd_sched_fence_create(entity, owner);
 463         if (!job->s_fence)
 464                 return -ENOMEM;
 465
 466         INIT_WORK(&job->finish_work, amd_sched_job_finish);
 467         INIT_LIST_HEAD(&job->node);
 468         INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
 469
 470         return 0;
 471 }
 472
 473 /**
 474  * Return ture if we can push more jobs to the hw.
 475  */
 476 static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
 477 {
 478         return atomic_read(&sched->hw_rq_count) <
 479                 sched->hw_submission_limit;
 480 }
 481
 482 /**
 483  * Wake up the scheduler when it is ready
 484  */
 485 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
 486 {
 487         if (amd_sched_ready(sched))
 488                 wake_up_interruptible(&sched->wake_up_worker);
 489 }
 490
 491 /**
 492  * Select next entity to process
 493 */
 494 static struct amd_sched_entity *
 495 amd_sched_select_entity(struct amd_gpu_scheduler *sched)
 496 {
 497         struct amd_sched_entity *entity;
 498         int i;
 499
 500         if (!amd_sched_ready(sched))
 501                 return NULL;
 502
 503         /* Kernel run queue has higher priority than normal run queue*/
 504         for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++) {
 505                 entity = amd_sched_rq_select_entity(&sched->sched_rq[i]);
 506                 if (entity)
 507                         break;
 508         }
 509
 510         return entity;
 511 }
 512
 513 static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 514 {
 515         struct amd_sched_fence *s_fence =
 516                 container_of(cb, struct amd_sched_fence, cb);
 517         struct amd_gpu_scheduler *sched = s_fence->sched;
 518
 519         atomic_dec(&sched->hw_rq_count);
 520         amd_sched_fence_finished(s_fence);
 521
 522         trace_amd_sched_process_job(s_fence);
 523         dma_fence_put(&s_fence->finished);
 524         wake_up_interruptible(&sched->wake_up_worker);
 525 }
 526
 527 static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
 528 {
 529         if (kthread_should_park()) {
 530                 kthread_parkme();
 531                 return true;
 532         }
 533
 534         return false;
 535 }
 536
 537 static int amd_sched_main(void *param)
 538 {
 539         struct sched_param sparam = {.sched_priority = 1};
 540         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
 541         int r, count;
 542
 543         sched_setscheduler(current, SCHED_FIFO, &sparam);
 544
 545         while (!kthread_should_stop()) {
 546                 struct amd_sched_entity *entity = NULL;
 547                 struct amd_sched_fence *s_fence;
 548                 struct amd_sched_job *sched_job;
 549                 struct dma_fence *fence;
 550
 551                 wait_event_interruptible(sched->wake_up_worker,
 552                                          (!amd_sched_blocked(sched) &&
 553                                           (entity = amd_sched_select_entity(sched))) ||
 554                                          kthread_should_stop());
 555
 556                 if (!entity)
 557                         continue;
 558
 559                 sched_job = amd_sched_entity_pop_job(entity);
 560                 if (!sched_job)
 561                         continue;
 562
 563                 s_fence = sched_job->s_fence;
 564
 565                 atomic_inc(&sched->hw_rq_count);
 566                 amd_sched_job_begin(sched_job);
 567
 568                 fence = sched->ops->run_job(sched_job);
 569                 amd_sched_fence_scheduled(s_fence);
 570                 if (fence) {
 571                         s_fence->parent = dma_fence_get(fence);
 572                         r = dma_fence_add_callback(fence, &s_fence->cb,
 573                                                    amd_sched_process_job);
 574                         if (r == -ENOENT)
 575                                 amd_sched_process_job(fence, &s_fence->cb);
 576                         else if (r)
 577                                 DRM_ERROR("fence add callback failed (%d)\n",
 578                                           r);
 579                         dma_fence_put(fence);
 580                 } else {
 581                         DRM_ERROR("Failed to run job!\n");
 582                         amd_sched_process_job(NULL, &s_fence->cb);
 583                 }
 584
 585                 count = kfifo_out(&entity->job_queue, &sched_job,
 586                                 sizeof(sched_job));
 587                 WARN_ON(count != sizeof(sched_job));
 588                 wake_up(&sched->job_scheduled);
 589         }
 590         return 0;
 591 }
 592
 593 /**
 594  * Init a gpu scheduler instance
 595  *
 596  * @sched               The pointer to the scheduler
 597  * @ops                 The backend operations for this scheduler.
 598  * @hw_submissions      Number of hw submissions to do.
 599  * @name                Name used for debugging
 600  *
 601  * Return 0 on success, otherwise error code.
 602 */
 603 int amd_sched_init(struct amd_gpu_scheduler *sched,
 604                    const struct amd_sched_backend_ops *ops,
 605                    unsigned hw_submission, long timeout, const char *name)
 606 {
 607         int i;
 608         sched->ops = ops;
 609         sched->hw_submission_limit = hw_submission;
 610         sched->name = name;
 611         sched->timeout = timeout;
 612         for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++)
 613                 amd_sched_rq_init(&sched->sched_rq[i]);
 614
 615         init_waitqueue_head(&sched->wake_up_worker);
 616         init_waitqueue_head(&sched->job_scheduled);
 617         INIT_LIST_HEAD(&sched->ring_mirror_list);
 618         spin_lock_init(&sched->job_list_lock);
 619         atomic_set(&sched->hw_rq_count, 0);
 620
 621         /* Each scheduler will run on a seperate kernel thread */
 622         sched->thread = kthread_run(amd_sched_main, sched, sched->name);
 623         if (IS_ERR(sched->thread)) {
 624                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 625                 return PTR_ERR(sched->thread);
 626         }
 627
 628         return 0;
 629 }
 630
 631 /**
 632  * Destroy a gpu scheduler
 633  *
 634  * @sched       The pointer to the scheduler
 635  */
 636 void amd_sched_fini(struct amd_gpu_scheduler *sched)
 637 {
 638         if (sched->thread)
 639                 kthread_stop(sched->thread);
 640 }