drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Christian König <christian.koenig@amd.com>
  29  */
  30
  31 /**
  32  * DOC: MMU Notifier
  33  *
  34  * For coherent userptr handling registers an MMU notifier to inform the driver
  35  * about updates on the page tables of a process.
  36  *
  37  * When somebody tries to invalidate the page tables we block the update until
  38  * all operations on the pages in question are completed, then those pages are
  39  * marked as accessed and also dirty if it wasn't a read only access.
  40  *
  41  * New command submissions using the userptrs in question are delayed until all
  42  * page table invalidation are completed and we once more see a coherent process
  43  * address space.
  44  */
  45
  46 #include <linux/firmware.h>
  47 #include <linux/module.h>
  48 #include <linux/mmu_notifier.h>
  49 #include <linux/interval_tree.h>
  50 #include <drm/drmP.h>
  51 #include <drm/drm.h>
  52
  53 #include "amdgpu.h"
  54 #include "amdgpu_amdkfd.h"
  55
  56 /**
  57  * struct amdgpu_mn
  58  *
  59  * @adev: amdgpu device pointer
  60  * @mm: process address space
  61  * @mn: MMU notifier structure
  62  * @type: type of MMU notifier
  63  * @work: destruction work item
  64  * @node: hash table node to find structure by adev and mn
  65  * @lock: rw semaphore protecting the notifier nodes
  66  * @objects: interval tree containing amdgpu_mn_nodes
  67  * @read_lock: mutex for recursive locking of @lock
  68  * @recursion: depth of recursion
  69  *
  70  * Data for each amdgpu device and process address space.
  71  */
  72 struct amdgpu_mn {
  73         /* constant after initialisation */
  74         struct amdgpu_device    *adev;
  75         struct mm_struct        *mm;
  76         struct mmu_notifier     mn;
  77         enum amdgpu_mn_type     type;
  78
  79         /* only used on destruction */
  80         struct work_struct      work;
  81
  82         /* protected by adev->mn_lock */
  83         struct hlist_node       node;
  84
  85         /* objects protected by lock */
  86         struct rw_semaphore     lock;
  87         struct rb_root_cached   objects;
  88         struct mutex            read_lock;
  89         atomic_t                recursion;
  90 };
  91
  92 /**
  93  * struct amdgpu_mn_node
  94  *
  95  * @it: interval node defining start-last of the affected address range
  96  * @bos: list of all BOs in the affected address range
  97  *
  98  * Manages all BOs which are affected of a certain range of address space.
  99  */
 100 struct amdgpu_mn_node {
 101         struct interval_tree_node       it;
 102         struct list_head                bos;
 103 };
 104
 105 /**
 106  * amdgpu_mn_destroy - destroy the MMU notifier
 107  *
 108  * @work: previously sheduled work item
 109  *
 110  * Lazy destroys the notifier from a work item
 111  */
 112 static void amdgpu_mn_destroy(struct work_struct *work)
 113 {
 114         struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
 115         struct amdgpu_device *adev = amn->adev;
 116         struct amdgpu_mn_node *node, *next_node;
 117         struct amdgpu_bo *bo, *next_bo;
 118
 119         mutex_lock(&adev->mn_lock);
 120         down_write(&amn->lock);
 121         hash_del(&amn->node);
 122         rbtree_postorder_for_each_entry_safe(node, next_node,
 123                                              &amn->objects.rb_root, it.rb) {
 124                 list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 125                         bo->mn = NULL;
 126                         list_del_init(&bo->mn_list);
 127                 }
 128                 kfree(node);
 129         }
 130         up_write(&amn->lock);
 131         mutex_unlock(&adev->mn_lock);
 132         mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
 133         kfree(amn);
 134 }
 135
 136 /**
 137  * amdgpu_mn_release - callback to notify about mm destruction
 138  *
 139  * @mn: our notifier
 140  * @mm: the mm this callback is about
 141  *
 142  * Shedule a work item to lazy destroy our notifier.
 143  */
 144 static void amdgpu_mn_release(struct mmu_notifier *mn,
 145                               struct mm_struct *mm)
 146 {
 147         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 148
 149         INIT_WORK(&amn->work, amdgpu_mn_destroy);
 150         schedule_work(&amn->work);
 151 }
 152
 153
 154 /**
 155  * amdgpu_mn_lock - take the write side lock for this notifier
 156  *
 157  * @mn: our notifier
 158  */
 159 void amdgpu_mn_lock(struct amdgpu_mn *mn)
 160 {
 161         if (mn)
 162                 down_write(&mn->lock);
 163 }
 164
 165 /**
 166  * amdgpu_mn_unlock - drop the write side lock for this notifier
 167  *
 168  * @mn: our notifier
 169  */
 170 void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 171 {
 172         if (mn)
 173                 up_write(&mn->lock);
 174 }
 175
 176 /**
 177  * amdgpu_mn_read_lock - take the read side lock for this notifier
 178  *
 179  * @amn: our notifier
 180  */
 181 static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 182 {
 183         if (blockable)
 184                 mutex_lock(&amn->read_lock);
 185         else if (!mutex_trylock(&amn->read_lock))
 186                 return -EAGAIN;
 187
 188         if (atomic_inc_return(&amn->recursion) == 1)
 189                 down_read_non_owner(&amn->lock);
 190         mutex_unlock(&amn->read_lock);
 191
 192         return 0;
 193 }
 194
 195 /**
 196  * amdgpu_mn_read_unlock - drop the read side lock for this notifier
 197  *
 198  * @amn: our notifier
 199  */
 200 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 201 {
 202         if (atomic_dec_return(&amn->recursion) == 0)
 203                 up_read_non_owner(&amn->lock);
 204 }
 205
 206 /**
 207  * amdgpu_mn_invalidate_node - unmap all BOs of a node
 208  *
 209  * @node: the node with the BOs to unmap
 210  * @start: start of address range affected
 211  * @end: end of address range affected
 212  *
 213  * Block for operations on BOs to finish and mark pages as accessed and
 214  * potentially dirty.
 215  */
 216 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 217                                       unsigned long start,
 218                                       unsigned long end)
 219 {
 220         struct amdgpu_bo *bo;
 221         long r;
 222
 223         list_for_each_entry(bo, &node->bos, mn_list) {
 224
 225                 if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
 226                         continue;
 227
 228                 r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
 229                         true, false, MAX_SCHEDULE_TIMEOUT);
 230                 if (r <= 0)
 231                         DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 232
 233                 amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 234         }
 235 }
 236
 237 /**
 238  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
 239  *
 240  * @mn: our notifier
 241  * @mm: the mm this callback is about
 242  * @start: start of updated range
 243  * @end: end of updated range
 244  *
 245  * Block for operations on BOs to finish and mark pages as accessed and
 246  * potentially dirty.
 247  */
 248 static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 249                                                  struct mm_struct *mm,
 250                                                  unsigned long start,
 251                                                  unsigned long end,
 252                                                  bool blockable)
 253 {
 254         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 255         struct interval_tree_node *it;
 256
 257         /* notification is exclusive, but interval is inclusive */
 258         end -= 1;
 259
 260         /* TODO we should be able to split locking for interval tree and
 261          * amdgpu_mn_invalidate_node
 262          */
 263         if (amdgpu_mn_read_lock(amn, blockable))
 264                 return -EAGAIN;
 265
 266         it = interval_tree_iter_first(&amn->objects, start, end);
 267         while (it) {
 268                 struct amdgpu_mn_node *node;
 269
 270                 if (!blockable) {
 271                         amdgpu_mn_read_unlock(amn);
 272                         return -EAGAIN;
 273                 }
 274
 275                 node = container_of(it, struct amdgpu_mn_node, it);
 276                 it = interval_tree_iter_next(it, start, end);
 277
 278                 amdgpu_mn_invalidate_node(node, start, end);
 279         }
 280
 281         return 0;
 282 }
 283
 284 /**
 285  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
 286  *
 287  * @mn: our notifier
 288  * @mm: the mm this callback is about
 289  * @start: start of updated range
 290  * @end: end of updated range
 291  *
 292  * We temporarily evict all BOs between start and end. This
 293  * necessitates evicting all user-mode queues of the process. The BOs
 294  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
 295  */
 296 static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 297                                                  struct mm_struct *mm,
 298                                                  unsigned long start,
 299                                                  unsigned long end,
 300                                                  bool blockable)
 301 {
 302         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 303         struct interval_tree_node *it;
 304
 305         /* notification is exclusive, but interval is inclusive */
 306         end -= 1;
 307
 308         if (amdgpu_mn_read_lock(amn, blockable))
 309                 return -EAGAIN;
 310
 311         it = interval_tree_iter_first(&amn->objects, start, end);
 312         while (it) {
 313                 struct amdgpu_mn_node *node;
 314                 struct amdgpu_bo *bo;
 315
 316                 if (!blockable) {
 317                         amdgpu_mn_read_unlock(amn);
 318                         return -EAGAIN;
 319                 }
 320
 321                 node = container_of(it, struct amdgpu_mn_node, it);
 322                 it = interval_tree_iter_next(it, start, end);
 323
 324                 list_for_each_entry(bo, &node->bos, mn_list) {
 325                         struct kgd_mem *mem = bo->kfd_bo;
 326
 327                         if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 328                                                          start, end))
 329                                 amdgpu_amdkfd_evict_userptr(mem, mm);
 330                 }
 331         }
 332
 333         return 0;
 334 }
 335
 336 /**
 337  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
 338  *
 339  * @mn: our notifier
 340  * @mm: the mm this callback is about
 341  * @start: start of updated range
 342  * @end: end of updated range
 343  *
 344  * Release the lock again to allow new command submissions.
 345  */
 346 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 347                                            struct mm_struct *mm,
 348                                            unsigned long start,
 349                                            unsigned long end)
 350 {
 351         struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 352
 353         amdgpu_mn_read_unlock(amn);
 354 }
 355
 356 static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
 357         [AMDGPU_MN_TYPE_GFX] = {
 358                 .release = amdgpu_mn_release,
 359                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
 360                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 361         },
 362         [AMDGPU_MN_TYPE_HSA] = {
 363                 .release = amdgpu_mn_release,
 364                 .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
 365                 .invalidate_range_end = amdgpu_mn_invalidate_range_end,
 366         },
 367 };
 368
 369 /* Low bits of any reasonable mm pointer will be unused due to struct
 370  * alignment. Use these bits to make a unique key from the mm pointer
 371  * and notifier type.
 372  */
 373 #define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 374
 375 /**
 376  * amdgpu_mn_get - create notifier context
 377  *
 378  * @adev: amdgpu device pointer
 379  * @type: type of MMU notifier context
 380  *
 381  * Creates a notifier context for current->mm.
 382  */
 383 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 384                                 enum amdgpu_mn_type type)
 385 {
 386         struct mm_struct *mm = current->mm;
 387         struct amdgpu_mn *amn;
 388         unsigned long key = AMDGPU_MN_KEY(mm, type);
 389         int r;
 390
 391         mutex_lock(&adev->mn_lock);
 392         if (down_write_killable(&mm->mmap_sem)) {
 393                 mutex_unlock(&adev->mn_lock);
 394                 return ERR_PTR(-EINTR);
 395         }
 396
 397         hash_for_each_possible(adev->mn_hash, amn, node, key)
 398                 if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
 399                         goto release_locks;
 400
 401         amn = kzalloc(sizeof(*amn), GFP_KERNEL);
 402         if (!amn) {
 403                 amn = ERR_PTR(-ENOMEM);
 404                 goto release_locks;
 405         }
 406
 407         amn->adev = adev;
 408         amn->mm = mm;
 409         init_rwsem(&amn->lock);
 410         amn->type = type;
 411         amn->mn.ops = &amdgpu_mn_ops[type];
 412         amn->objects = RB_ROOT_CACHED;
 413         mutex_init(&amn->read_lock);
 414         atomic_set(&amn->recursion, 0);
 415
 416         r = __mmu_notifier_register(&amn->mn, mm);
 417         if (r)
 418                 goto free_amn;
 419
 420         hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
 421
 422 release_locks:
 423         up_write(&mm->mmap_sem);
 424         mutex_unlock(&adev->mn_lock);
 425
 426         return amn;
 427
 428 free_amn:
 429         up_write(&mm->mmap_sem);
 430         mutex_unlock(&adev->mn_lock);
 431         kfree(amn);
 432
 433         return ERR_PTR(r);
 434 }
 435
 436 /**
 437  * amdgpu_mn_register - register a BO for notifier updates
 438  *
 439  * @bo: amdgpu buffer object
 440  * @addr: userptr addr we should monitor
 441  *
 442  * Registers an MMU notifier for the given BO at the specified address.
 443  * Returns 0 on success, -ERRNO if anything goes wrong.
 444  */
 445 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 446 {
 447         unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 448         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 449         enum amdgpu_mn_type type =
 450                 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 451         struct amdgpu_mn *amn;
 452         struct amdgpu_mn_node *node = NULL, *new_node;
 453         struct list_head bos;
 454         struct interval_tree_node *it;
 455
 456         amn = amdgpu_mn_get(adev, type);
 457         if (IS_ERR(amn))
 458                 return PTR_ERR(amn);
 459
 460         new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
 461         if (!new_node)
 462                 return -ENOMEM;
 463
 464         INIT_LIST_HEAD(&bos);
 465
 466         down_write(&amn->lock);
 467
 468         while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
 469                 kfree(node);
 470                 node = container_of(it, struct amdgpu_mn_node, it);
 471                 interval_tree_remove(&node->it, &amn->objects);
 472                 addr = min(it->start, addr);
 473                 end = max(it->last, end);
 474                 list_splice(&node->bos, &bos);
 475         }
 476
 477         if (!node)
 478                 node = new_node;
 479         else
 480                 kfree(new_node);
 481
 482         bo->mn = amn;
 483
 484         node->it.start = addr;
 485         node->it.last = end;
 486         INIT_LIST_HEAD(&node->bos);
 487         list_splice(&bos, &node->bos);
 488         list_add(&bo->mn_list, &node->bos);
 489
 490         interval_tree_insert(&node->it, &amn->objects);
 491
 492         up_write(&amn->lock);
 493
 494         return 0;
 495 }
 496
 497 /**
 498  * amdgpu_mn_unregister - unregister a BO for notifier updates
 499  *
 500  * @bo: amdgpu buffer object
 501  *
 502  * Remove any registration of MMU notifier updates from the buffer object.
 503  */
 504 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 505 {
 506         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 507         struct amdgpu_mn *amn;
 508         struct list_head *head;
 509
 510         mutex_lock(&adev->mn_lock);
 511
 512         amn = bo->mn;
 513         if (amn == NULL) {
 514                 mutex_unlock(&adev->mn_lock);
 515                 return;
 516         }
 517
 518         down_write(&amn->lock);
 519
 520         /* save the next list entry for later */
 521         head = bo->mn_list.next;
 522
 523         bo->mn = NULL;
 524         list_del_init(&bo->mn_list);
 525
 526         if (list_empty(head)) {
 527                 struct amdgpu_mn_node *node;
 528
 529                 node = container_of(head, struct amdgpu_mn_node, bos);
 530                 interval_tree_remove(&node->it, &amn->objects);
 531                 kfree(node);
 532         }
 533
 534         up_write(&amn->lock);
 535         mutex_unlock(&adev->mn_lock);
 536 }
 537