drivers/gpu/drm/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 /**
  25  * DOC: Overview
  26  *
  27  * The GPU scheduler provides entities which allow userspace to push jobs
  28  * into software queues which are then scheduled on a hardware run queue.
  29  * The software queues have a priority among them. The scheduler selects the entities
  30  * from the run queue using a FIFO. The scheduler provides dependency handling
  31  * features among jobs. The driver is supposed to provide callback functions for
  32  * backend operations to the scheduler like submitting a job to hardware run queue,
  33  * returning the dependencies of a job etc.
  34  *
  35  * The organisation of the scheduler is the following:
  36  *
  37  * 1. Each hw run queue has one scheduler
  38  * 2. Each scheduler has multiple run queues with different priorities
  39  *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
  40  * 3. Each scheduler run queue has a queue of entities to schedule
  41  * 4. Entities themselves maintain a queue of jobs that will be scheduled on
  42  *    the hardware.
  43  *
  44  * The jobs in a entity are always scheduled in the order that they were pushed.
  45  */
  46
  47 #include <linux/kthread.h>
  48 #include <linux/wait.h>
  49 #include <linux/sched.h>
  50 #include <uapi/linux/sched/types.h>
  51 #include <drm/drmP.h>
  52 #include <drm/gpu_scheduler.h>
  53 #include <drm/spsc_queue.h>
  54
  55 #define CREATE_TRACE_POINTS
  56 #include "gpu_scheduler_trace.h"
  57
  58 #define to_drm_sched_job(sched_job)             \
  59                 container_of((sched_job), struct drm_sched_job, queue_node)
  60
  61 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
  62 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
  63 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  64
  65 /**
  66  * drm_sched_rq_init - initialize a given run queue struct
  67  *
  68  * @rq: scheduler run queue
  69  *
  70  * Initializes a scheduler runqueue.
  71  */
  72 static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
  73                               struct drm_sched_rq *rq)
  74 {
  75         spin_lock_init(&rq->lock);
  76         INIT_LIST_HEAD(&rq->entities);
  77         rq->current_entity = NULL;
  78         rq->sched = sched;
  79 }
  80
  81 /**
  82  * drm_sched_rq_add_entity - add an entity
  83  *
  84  * @rq: scheduler run queue
  85  * @entity: scheduler entity
  86  *
  87  * Adds a scheduler entity to the run queue.
  88  */
  89 static void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
  90                                     struct drm_sched_entity *entity)
  91 {
  92         if (!list_empty(&entity->list))
  93                 return;
  94         spin_lock(&rq->lock);
  95         list_add_tail(&entity->list, &rq->entities);
  96         spin_unlock(&rq->lock);
  97 }
  98
  99 /**
 100  * drm_sched_rq_remove_entity - remove an entity
 101  *
 102  * @rq: scheduler run queue
 103  * @entity: scheduler entity
 104  *
 105  * Removes a scheduler entity from the run queue.
 106  */
 107 static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 108                                        struct drm_sched_entity *entity)
 109 {
 110         if (list_empty(&entity->list))
 111                 return;
 112         spin_lock(&rq->lock);
 113         list_del_init(&entity->list);
 114         if (rq->current_entity == entity)
 115                 rq->current_entity = NULL;
 116         spin_unlock(&rq->lock);
 117 }
 118
 119 /**
 120  * drm_sched_rq_select_entity - Select an entity which could provide a job to run
 121  *
 122  * @rq: scheduler run queue to check.
 123  *
 124  * Try to find a ready entity, returns NULL if none found.
 125  */
 126 static struct drm_sched_entity *
 127 drm_sched_rq_select_entity(struct drm_sched_rq *rq)
 128 {
 129         struct drm_sched_entity *entity;
 130
 131         spin_lock(&rq->lock);
 132
 133         entity = rq->current_entity;
 134         if (entity) {
 135                 list_for_each_entry_continue(entity, &rq->entities, list) {
 136                         if (drm_sched_entity_is_ready(entity)) {
 137                                 rq->current_entity = entity;
 138                                 spin_unlock(&rq->lock);
 139                                 return entity;
 140                         }
 141                 }
 142         }
 143
 144         list_for_each_entry(entity, &rq->entities, list) {
 145
 146                 if (drm_sched_entity_is_ready(entity)) {
 147                         rq->current_entity = entity;
 148                         spin_unlock(&rq->lock);
 149                         return entity;
 150                 }
 151
 152                 if (entity == rq->current_entity)
 153                         break;
 154         }
 155
 156         spin_unlock(&rq->lock);
 157
 158         return NULL;
 159 }
 160
 161 /**
 162  * drm_sched_entity_init - Init a context entity used by scheduler when
 163  * submit to HW ring.
 164  *
 165  * @entity: scheduler entity to init
 166  * @rq_list: the list of run queue on which jobs from this
 167  *           entity can be submitted
 168  * @num_rq_list: number of run queue in rq_list
 169  * @guilty: atomic_t set to 1 when a job on this queue
 170  *          is found to be guilty causing a timeout
 171  *
 172  * Note: the rq_list should have atleast one element to schedule
 173  *       the entity
 174  *
 175  * Returns 0 on success or a negative error code on failure.
 176 */
 177 int drm_sched_entity_init(struct drm_sched_entity *entity,
 178                           struct drm_sched_rq **rq_list,
 179                           unsigned int num_rq_list,
 180                           atomic_t *guilty)
 181 {
 182         if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
 183                 return -EINVAL;
 184
 185         memset(entity, 0, sizeof(struct drm_sched_entity));
 186         INIT_LIST_HEAD(&entity->list);
 187         entity->rq = rq_list[0];
 188         entity->guilty = guilty;
 189         entity->last_scheduled = NULL;
 190
 191         spin_lock_init(&entity->rq_lock);
 192         spsc_queue_init(&entity->job_queue);
 193
 194         atomic_set(&entity->fence_seq, 0);
 195         entity->fence_context = dma_fence_context_alloc(2);
 196
 197         return 0;
 198 }
 199 EXPORT_SYMBOL(drm_sched_entity_init);
 200
 201 /**
 202  * drm_sched_entity_is_idle - Check if entity is idle
 203  *
 204  * @entity: scheduler entity
 205  *
 206  * Returns true if the entity does not have any unscheduled jobs.
 207  */
 208 static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
 209 {
 210         rmb();
 211
 212         if (list_empty(&entity->list) ||
 213             spsc_queue_peek(&entity->job_queue) == NULL)
 214                 return true;
 215
 216         return false;
 217 }
 218
 219 /**
 220  * drm_sched_entity_is_ready - Check if entity is ready
 221  *
 222  * @entity: scheduler entity
 223  *
 224  * Return true if entity could provide a job.
 225  */
 226 static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
 227 {
 228         if (spsc_queue_peek(&entity->job_queue) == NULL)
 229                 return false;
 230
 231         if (READ_ONCE(entity->dependency))
 232                 return false;
 233
 234         return true;
 235 }
 236
 237 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 238                                     struct dma_fence_cb *cb)
 239 {
 240         struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 241                                                  finish_cb);
 242         drm_sched_fence_finished(job->s_fence);
 243         WARN_ON(job->s_fence->parent);
 244         dma_fence_put(&job->s_fence->finished);
 245         job->sched->ops->free_job(job);
 246 }
 247
 248
 249 /**
 250  * drm_sched_entity_flush - Flush a context entity
 251  *
 252  * @entity: scheduler entity
 253  * @timeout: time to wait in for Q to become empty in jiffies.
 254  *
 255  * Splitting drm_sched_entity_fini() into two functions, The first one does the waiting,
 256  * removes the entity from the runqueue and returns an error when the process was killed.
 257  *
 258  * Returns the remaining time in jiffies left from the input timeout
 259  */
 260 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 261 {
 262         struct drm_gpu_scheduler *sched;
 263         struct task_struct *last_user;
 264         long ret = timeout;
 265
 266         sched = entity->rq->sched;
 267         /**
 268          * The client will not queue more IBs during this fini, consume existing
 269          * queued IBs or discard them on SIGKILL
 270         */
 271         if (current->flags & PF_EXITING) {
 272                 if (timeout)
 273                         ret = wait_event_timeout(
 274                                         sched->job_scheduled,
 275                                         drm_sched_entity_is_idle(entity),
 276                                         timeout);
 277         } else
 278                 wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity));
 279
 280
 281         /* For killed process disable any more IBs enqueue right now */
 282         last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
 283         if ((!last_user || last_user == current->group_leader) &&
 284             (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
 285                 drm_sched_rq_remove_entity(entity->rq, entity);
 286
 287         return ret;
 288 }
 289 EXPORT_SYMBOL(drm_sched_entity_flush);
 290
 291 /**
 292  * drm_sched_entity_cleanup - Destroy a context entity
 293  *
 294  * @entity: scheduler entity
 295  *
 296  * This should be called after @drm_sched_entity_do_release. It goes over the
 297  * entity and signals all jobs with an error code if the process was killed.
 298  *
 299  */
 300 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 301 {
 302         struct drm_gpu_scheduler *sched;
 303
 304         sched = entity->rq->sched;
 305         drm_sched_rq_remove_entity(entity->rq, entity);
 306
 307         /* Consumption of existing IBs wasn't completed. Forcefully
 308          * remove them here.
 309          */
 310         if (spsc_queue_peek(&entity->job_queue)) {
 311                 struct drm_sched_job *job;
 312                 int r;
 313
 314                 /* Park the kernel for a moment to make sure it isn't processing
 315                  * our enity.
 316                  */
 317                 kthread_park(sched->thread);
 318                 kthread_unpark(sched->thread);
 319                 if (entity->dependency) {
 320                         dma_fence_remove_callback(entity->dependency,
 321                                                   &entity->cb);
 322                         dma_fence_put(entity->dependency);
 323                         entity->dependency = NULL;
 324                 }
 325
 326                 while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
 327                         struct drm_sched_fence *s_fence = job->s_fence;
 328                         drm_sched_fence_scheduled(s_fence);
 329                         dma_fence_set_error(&s_fence->finished, -ESRCH);
 330
 331                         /*
 332                          * When pipe is hanged by older entity, new entity might
 333                          * not even have chance to submit it's first job to HW
 334                          * and so entity->last_scheduled will remain NULL
 335                          */
 336                         if (!entity->last_scheduled) {
 337                                 drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
 338                         } else {
 339                                 r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb,
 340                                                                 drm_sched_entity_kill_jobs_cb);
 341                                 if (r == -ENOENT)
 342                                         drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
 343                                 else if (r)
 344                                         DRM_ERROR("fence add callback failed (%d)\n", r);
 345                         }
 346                 }
 347         }
 348
 349         dma_fence_put(entity->last_scheduled);
 350         entity->last_scheduled = NULL;
 351 }
 352 EXPORT_SYMBOL(drm_sched_entity_fini);
 353
 354 /**
 355  * drm_sched_entity_fini - Destroy a context entity
 356  *
 357  * @entity: scheduler entity
 358  *
 359  * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup()
 360  */
 361 void drm_sched_entity_destroy(struct drm_sched_entity *entity)
 362 {
 363         drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
 364         drm_sched_entity_fini(entity);
 365 }
 366 EXPORT_SYMBOL(drm_sched_entity_destroy);
 367
 368 static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
 369 {
 370         struct drm_sched_entity *entity =
 371                 container_of(cb, struct drm_sched_entity, cb);
 372         entity->dependency = NULL;
 373         dma_fence_put(f);
 374         drm_sched_wakeup(entity->rq->sched);
 375 }
 376
 377 static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
 378 {
 379         struct drm_sched_entity *entity =
 380                 container_of(cb, struct drm_sched_entity, cb);
 381         entity->dependency = NULL;
 382         dma_fence_put(f);
 383 }
 384
 385 /**
 386  * drm_sched_entity_set_rq - Sets the run queue for an entity
 387  *
 388  * @entity: scheduler entity
 389  * @rq: scheduler run queue
 390  *
 391  * Sets the run queue for an entity and removes the entity from the previous
 392  * run queue in which was present.
 393  */
 394 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
 395                              struct drm_sched_rq *rq)
 396 {
 397         if (entity->rq == rq)
 398                 return;
 399
 400         BUG_ON(!rq);
 401
 402         spin_lock(&entity->rq_lock);
 403         drm_sched_rq_remove_entity(entity->rq, entity);
 404         entity->rq = rq;
 405         drm_sched_rq_add_entity(rq, entity);
 406         spin_unlock(&entity->rq_lock);
 407 }
 408 EXPORT_SYMBOL(drm_sched_entity_set_rq);
 409
 410 /**
 411  * drm_sched_dependency_optimized
 412  *
 413  * @fence: the dependency fence
 414  * @entity: the entity which depends on the above fence
 415  *
 416  * Returns true if the dependency can be optimized and false otherwise
 417  */
 418 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 419                                     struct drm_sched_entity *entity)
 420 {
 421         struct drm_gpu_scheduler *sched = entity->rq->sched;
 422         struct drm_sched_fence *s_fence;
 423
 424         if (!fence || dma_fence_is_signaled(fence))
 425                 return false;
 426         if (fence->context == entity->fence_context)
 427                 return true;
 428         s_fence = to_drm_sched_fence(fence);
 429         if (s_fence && s_fence->sched == sched)
 430                 return true;
 431
 432         return false;
 433 }
 434 EXPORT_SYMBOL(drm_sched_dependency_optimized);
 435
 436 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 437 {
 438         struct drm_gpu_scheduler *sched = entity->rq->sched;
 439         struct dma_fence * fence = entity->dependency;
 440         struct drm_sched_fence *s_fence;
 441
 442         if (fence->context == entity->fence_context ||
 443             fence->context == entity->fence_context + 1) {
 444                 /*
 445                  * Fence is a scheduled/finished fence from a job
 446                  * which belongs to the same entity, we can ignore
 447                  * fences from ourself
 448                  */
 449                 dma_fence_put(entity->dependency);
 450                 return false;
 451         }
 452
 453         s_fence = to_drm_sched_fence(fence);
 454         if (s_fence && s_fence->sched == sched) {
 455
 456                 /*
 457                  * Fence is from the same scheduler, only need to wait for
 458                  * it to be scheduled
 459                  */
 460                 fence = dma_fence_get(&s_fence->scheduled);
 461                 dma_fence_put(entity->dependency);
 462                 entity->dependency = fence;
 463                 if (!dma_fence_add_callback(fence, &entity->cb,
 464                                             drm_sched_entity_clear_dep))
 465                         return true;
 466
 467                 /* Ignore it when it is already scheduled */
 468                 dma_fence_put(fence);
 469                 return false;
 470         }
 471
 472         if (!dma_fence_add_callback(entity->dependency, &entity->cb,
 473                                     drm_sched_entity_wakeup))
 474                 return true;
 475
 476         dma_fence_put(entity->dependency);
 477         return false;
 478 }
 479
 480 static struct drm_sched_job *
 481 drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 482 {
 483         struct drm_gpu_scheduler *sched = entity->rq->sched;
 484         struct drm_sched_job *sched_job = to_drm_sched_job(
 485                                                 spsc_queue_peek(&entity->job_queue));
 486
 487         if (!sched_job)
 488                 return NULL;
 489
 490         while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
 491                 if (drm_sched_entity_add_dependency_cb(entity))
 492                         return NULL;
 493
 494         /* skip jobs from entity that marked guilty */
 495         if (entity->guilty && atomic_read(entity->guilty))
 496                 dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
 497
 498         dma_fence_put(entity->last_scheduled);
 499         entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
 500
 501         spsc_queue_pop(&entity->job_queue);
 502         return sched_job;
 503 }
 504
 505 /**
 506  * drm_sched_entity_push_job - Submit a job to the entity's job queue
 507  *
 508  * @sched_job: job to submit
 509  * @entity: scheduler entity
 510  *
 511  * Note: To guarantee that the order of insertion to queue matches
 512  * the job's fence sequence number this function should be
 513  * called with drm_sched_job_init under common lock.
 514  *
 515  * Returns 0 for success, negative error code otherwise.
 516  */
 517 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 518                                struct drm_sched_entity *entity)
 519 {
 520         struct drm_gpu_scheduler *sched = sched_job->sched;
 521         bool first = false;
 522
 523         trace_drm_sched_job(sched_job, entity);
 524
 525         WRITE_ONCE(entity->last_user, current->group_leader);
 526         first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 527
 528         /* first job wakes up scheduler */
 529         if (first) {
 530                 /* Add the entity to the run queue */
 531                 spin_lock(&entity->rq_lock);
 532                 if (!entity->rq) {
 533                         DRM_ERROR("Trying to push to a killed entity\n");
 534                         spin_unlock(&entity->rq_lock);
 535                         return;
 536                 }
 537                 drm_sched_rq_add_entity(entity->rq, entity);
 538                 spin_unlock(&entity->rq_lock);
 539                 drm_sched_wakeup(sched);
 540         }
 541 }
 542 EXPORT_SYMBOL(drm_sched_entity_push_job);
 543
 544 /* job_finish is called after hw fence signaled
 545  */
 546 static void drm_sched_job_finish(struct work_struct *work)
 547 {
 548         struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
 549                                                    finish_work);
 550         struct drm_gpu_scheduler *sched = s_job->sched;
 551
 552         /*
 553          * Canceling the timeout without removing our job from the ring mirror
 554          * list is safe, as we will only end up in this worker if our jobs
 555          * finished fence has been signaled. So even if some another worker
 556          * manages to find this job as the next job in the list, the fence
 557          * signaled check below will prevent the timeout to be restarted.
 558          */
 559         cancel_delayed_work_sync(&s_job->work_tdr);
 560
 561         spin_lock(&sched->job_list_lock);
 562         /* queue TDR for next job */
 563         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 564             !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
 565                 struct drm_sched_job *next = list_next_entry(s_job, node);
 566
 567                 if (!dma_fence_is_signaled(&next->s_fence->finished))
 568                         schedule_delayed_work(&next->work_tdr, sched->timeout);
 569         }
 570         /* remove job from ring_mirror_list */
 571         list_del(&s_job->node);
 572         spin_unlock(&sched->job_list_lock);
 573
 574         dma_fence_put(&s_job->s_fence->finished);
 575         sched->ops->free_job(s_job);
 576 }
 577
 578 static void drm_sched_job_finish_cb(struct dma_fence *f,
 579                                     struct dma_fence_cb *cb)
 580 {
 581         struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 582                                                  finish_cb);
 583         schedule_work(&job->finish_work);
 584 }
 585
 586 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 587 {
 588         struct drm_gpu_scheduler *sched = s_job->sched;
 589
 590         dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
 591                                drm_sched_job_finish_cb);
 592
 593         spin_lock(&sched->job_list_lock);
 594         list_add_tail(&s_job->node, &sched->ring_mirror_list);
 595         if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 596             list_first_entry_or_null(&sched->ring_mirror_list,
 597                                      struct drm_sched_job, node) == s_job)
 598                 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 599         spin_unlock(&sched->job_list_lock);
 600 }
 601
 602 static void drm_sched_job_timedout(struct work_struct *work)
 603 {
 604         struct drm_sched_job *job = container_of(work, struct drm_sched_job,
 605                                                  work_tdr.work);
 606
 607         job->sched->ops->timedout_job(job);
 608 }
 609
 610 /**
 611  * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job
 612  *
 613  * @sched: scheduler instance
 614  * @bad: bad scheduler job
 615  *
 616  */
 617 void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 618 {
 619         struct drm_sched_job *s_job;
 620         struct drm_sched_entity *entity, *tmp;
 621         int i;
 622
 623         spin_lock(&sched->job_list_lock);
 624         list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
 625                 if (s_job->s_fence->parent &&
 626                     dma_fence_remove_callback(s_job->s_fence->parent,
 627                                               &s_job->s_fence->cb)) {
 628                         dma_fence_put(s_job->s_fence->parent);
 629                         s_job->s_fence->parent = NULL;
 630                         atomic_dec(&sched->hw_rq_count);
 631                 }
 632         }
 633         spin_unlock(&sched->job_list_lock);
 634
 635         if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 636                 atomic_inc(&bad->karma);
 637                 /* don't increase @bad's karma if it's from KERNEL RQ,
 638                  * becuase sometimes GPU hang would cause kernel jobs (like VM updating jobs)
 639                  * corrupt but keep in mind that kernel jobs always considered good.
 640                  */
 641                 for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; i++ ) {
 642                         struct drm_sched_rq *rq = &sched->sched_rq[i];
 643
 644                         spin_lock(&rq->lock);
 645                         list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
 646                                 if (bad->s_fence->scheduled.context == entity->fence_context) {
 647                                     if (atomic_read(&bad->karma) > bad->sched->hang_limit)
 648                                                 if (entity->guilty)
 649                                                         atomic_set(entity->guilty, 1);
 650                                         break;
 651                                 }
 652                         }
 653                         spin_unlock(&rq->lock);
 654                         if (&entity->list != &rq->entities)
 655                                 break;
 656                 }
 657         }
 658 }
 659 EXPORT_SYMBOL(drm_sched_hw_job_reset);
 660
 661 /**
 662  * drm_sched_job_recovery - recover jobs after a reset
 663  *
 664  * @sched: scheduler instance
 665  *
 666  */
 667 void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 668 {
 669         struct drm_sched_job *s_job, *tmp;
 670         bool found_guilty = false;
 671         int r;
 672
 673         spin_lock(&sched->job_list_lock);
 674         s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 675                                          struct drm_sched_job, node);
 676         if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
 677                 schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 678
 679         list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 680                 struct drm_sched_fence *s_fence = s_job->s_fence;
 681                 struct dma_fence *fence;
 682                 uint64_t guilty_context;
 683
 684                 if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 685                         found_guilty = true;
 686                         guilty_context = s_job->s_fence->scheduled.context;
 687                 }
 688
 689                 if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 690                         dma_fence_set_error(&s_fence->finished, -ECANCELED);
 691
 692                 spin_unlock(&sched->job_list_lock);
 693                 fence = sched->ops->run_job(s_job);
 694                 atomic_inc(&sched->hw_rq_count);
 695
 696                 if (fence) {
 697                         s_fence->parent = dma_fence_get(fence);
 698                         r = dma_fence_add_callback(fence, &s_fence->cb,
 699                                                    drm_sched_process_job);
 700                         if (r == -ENOENT)
 701                                 drm_sched_process_job(fence, &s_fence->cb);
 702                         else if (r)
 703                                 DRM_ERROR("fence add callback failed (%d)\n",
 704                                           r);
 705                         dma_fence_put(fence);
 706                 } else {
 707                         drm_sched_process_job(NULL, &s_fence->cb);
 708                 }
 709                 spin_lock(&sched->job_list_lock);
 710         }
 711         spin_unlock(&sched->job_list_lock);
 712 }
 713 EXPORT_SYMBOL(drm_sched_job_recovery);
 714
 715 /**
 716  * drm_sched_job_init - init a scheduler job
 717  *
 718  * @job: scheduler job to init
 719  * @entity: scheduler entity to use
 720  * @owner: job owner for debugging
 721  *
 722  * Refer to drm_sched_entity_push_job() documentation
 723  * for locking considerations.
 724  *
 725  * Returns 0 for success, negative error code otherwise.
 726  */
 727 int drm_sched_job_init(struct drm_sched_job *job,
 728                        struct drm_sched_entity *entity,
 729                        void *owner)
 730 {
 731         struct drm_gpu_scheduler *sched = entity->rq->sched;
 732
 733         job->sched = sched;
 734         job->entity = entity;
 735         job->s_priority = entity->rq - sched->sched_rq;
 736         job->s_fence = drm_sched_fence_create(entity, owner);
 737         if (!job->s_fence)
 738                 return -ENOMEM;
 739         job->id = atomic64_inc_return(&sched->job_id_count);
 740
 741         INIT_WORK(&job->finish_work, drm_sched_job_finish);
 742         INIT_LIST_HEAD(&job->node);
 743         INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 744
 745         return 0;
 746 }
 747 EXPORT_SYMBOL(drm_sched_job_init);
 748
 749 /**
 750  * drm_sched_ready - is the scheduler ready
 751  *
 752  * @sched: scheduler instance
 753  *
 754  * Return true if we can push more jobs to the hw, otherwise false.
 755  */
 756 static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
 757 {
 758         return atomic_read(&sched->hw_rq_count) <
 759                 sched->hw_submission_limit;
 760 }
 761
 762 /**
 763  * drm_sched_wakeup - Wake up the scheduler when it is ready
 764  *
 765  * @sched: scheduler instance
 766  *
 767  */
 768 static void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 769 {
 770         if (drm_sched_ready(sched))
 771                 wake_up_interruptible(&sched->wake_up_worker);
 772 }
 773
 774 /**
 775  * drm_sched_select_entity - Select next entity to process
 776  *
 777  * @sched: scheduler instance
 778  *
 779  * Returns the entity to process or NULL if none are found.
 780  */
 781 static struct drm_sched_entity *
 782 drm_sched_select_entity(struct drm_gpu_scheduler *sched)
 783 {
 784         struct drm_sched_entity *entity;
 785         int i;
 786
 787         if (!drm_sched_ready(sched))
 788                 return NULL;
 789
 790         /* Kernel run queue has higher priority than normal run queue*/
 791         for (i = DRM_SCHED_PRIORITY_MAX - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 792                 entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
 793                 if (entity)
 794                         break;
 795         }
 796
 797         return entity;
 798 }
 799
 800 /**
 801  * drm_sched_process_job - process a job
 802  *
 803  * @f: fence
 804  * @cb: fence callbacks
 805  *
 806  * Called after job has finished execution.
 807  */
 808 static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 809 {
 810         struct drm_sched_fence *s_fence =
 811                 container_of(cb, struct drm_sched_fence, cb);
 812         struct drm_gpu_scheduler *sched = s_fence->sched;
 813
 814         dma_fence_get(&s_fence->finished);
 815         atomic_dec(&sched->hw_rq_count);
 816         drm_sched_fence_finished(s_fence);
 817
 818         trace_drm_sched_process_job(s_fence);
 819         dma_fence_put(&s_fence->finished);
 820         wake_up_interruptible(&sched->wake_up_worker);
 821 }
 822
 823 /**
 824  * drm_sched_blocked - check if the scheduler is blocked
 825  *
 826  * @sched: scheduler instance
 827  *
 828  * Returns true if blocked, otherwise false.
 829  */
 830 static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
 831 {
 832         if (kthread_should_park()) {
 833                 kthread_parkme();
 834                 return true;
 835         }
 836
 837         return false;
 838 }
 839
 840 /**
 841  * drm_sched_main - main scheduler thread
 842  *
 843  * @param: scheduler instance
 844  *
 845  * Returns 0.
 846  */
 847 static int drm_sched_main(void *param)
 848 {
 849         struct sched_param sparam = {.sched_priority = 1};
 850         struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
 851         int r;
 852
 853         sched_setscheduler(current, SCHED_FIFO, &sparam);
 854
 855         while (!kthread_should_stop()) {
 856                 struct drm_sched_entity *entity = NULL;
 857                 struct drm_sched_fence *s_fence;
 858                 struct drm_sched_job *sched_job;
 859                 struct dma_fence *fence;
 860
 861                 wait_event_interruptible(sched->wake_up_worker,
 862                                          (!drm_sched_blocked(sched) &&
 863                                           (entity = drm_sched_select_entity(sched))) ||
 864                                          kthread_should_stop());
 865
 866                 if (!entity)
 867                         continue;
 868
 869                 sched_job = drm_sched_entity_pop_job(entity);
 870                 if (!sched_job)
 871                         continue;
 872
 873                 s_fence = sched_job->s_fence;
 874
 875                 atomic_inc(&sched->hw_rq_count);
 876                 drm_sched_job_begin(sched_job);
 877
 878                 fence = sched->ops->run_job(sched_job);
 879                 drm_sched_fence_scheduled(s_fence);
 880
 881                 if (fence) {
 882                         s_fence->parent = dma_fence_get(fence);
 883                         r = dma_fence_add_callback(fence, &s_fence->cb,
 884                                                    drm_sched_process_job);
 885                         if (r == -ENOENT)
 886                                 drm_sched_process_job(fence, &s_fence->cb);
 887                         else if (r)
 888                                 DRM_ERROR("fence add callback failed (%d)\n",
 889                                           r);
 890                         dma_fence_put(fence);
 891                 } else {
 892                         drm_sched_process_job(NULL, &s_fence->cb);
 893                 }
 894
 895                 wake_up(&sched->job_scheduled);
 896         }
 897         return 0;
 898 }
 899
 900 /**
 901  * drm_sched_init - Init a gpu scheduler instance
 902  *
 903  * @sched: scheduler instance
 904  * @ops: backend operations for this scheduler
 905  * @hw_submission: number of hw submissions that can be in flight
 906  * @hang_limit: number of times to allow a job to hang before dropping it
 907  * @timeout: timeout value in jiffies for the scheduler
 908  * @name: name used for debugging
 909  *
 910  * Return 0 on success, otherwise error code.
 911  */
 912 int drm_sched_init(struct drm_gpu_scheduler *sched,
 913                    const struct drm_sched_backend_ops *ops,
 914                    unsigned hw_submission,
 915                    unsigned hang_limit,
 916                    long timeout,
 917                    const char *name)
 918 {
 919         int i;
 920         sched->ops = ops;
 921         sched->hw_submission_limit = hw_submission;
 922         sched->name = name;
 923         sched->timeout = timeout;
 924         sched->hang_limit = hang_limit;
 925         for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
 926                 drm_sched_rq_init(sched, &sched->sched_rq[i]);
 927
 928         init_waitqueue_head(&sched->wake_up_worker);
 929         init_waitqueue_head(&sched->job_scheduled);
 930         INIT_LIST_HEAD(&sched->ring_mirror_list);
 931         spin_lock_init(&sched->job_list_lock);
 932         atomic_set(&sched->hw_rq_count, 0);
 933         atomic64_set(&sched->job_id_count, 0);
 934
 935         /* Each scheduler will run on a seperate kernel thread */
 936         sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 937         if (IS_ERR(sched->thread)) {
 938                 DRM_ERROR("Failed to create scheduler for %s.\n", name);
 939                 return PTR_ERR(sched->thread);
 940         }
 941
 942         return 0;
 943 }
 944 EXPORT_SYMBOL(drm_sched_init);
 945
 946 /**
 947  * drm_sched_fini - Destroy a gpu scheduler
 948  *
 949  * @sched: scheduler instance
 950  *
 951  * Tears down and cleans up the scheduler.
 952  */
 953 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 954 {
 955         if (sched->thread)
 956                 kthread_stop(sched->thread);
 957 }
 958 EXPORT_SYMBOL(drm_sched_fini);