Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
radeon_gart.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  * Alex Deucher
26  * Jerome Glisse
27  */
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon.h"
31 #include "radeon_reg.h"
32 
33 /*
34  * GART
35  * The GART (Graphics Aperture Remapping Table) is an aperture
36  * in the GPU's address space. System pages can be mapped into
37  * the aperture and look like contiguous pages from the GPU's
38  * perspective. A page table maps the pages in the aperture
39  * to the actual backing pages in system memory.
40  *
41  * Radeon GPUs support both an internal GART, as described above,
42  * and AGP. AGP works similarly, but the GART table is configured
43  * and maintained by the northbridge rather than the driver.
44  * Radeon hw has a separate AGP aperture that is programmed to
45  * point to the AGP aperture provided by the northbridge and the
46  * requests are passed through to the northbridge aperture.
47  * Both AGP and internal GART can be used at the same time, however
48  * that is not currently supported by the driver.
49  *
50  * This file handles the common internal GART management.
51  */
52 
53 /*
54  * Common GART table functions.
55  */
67 {
68  void *ptr;
69 
70  ptr = pci_alloc_consistent(rdev->pdev, rdev->gart.table_size,
71  &rdev->gart.table_addr);
72  if (ptr == NULL) {
73  return -ENOMEM;
74  }
75 #ifdef CONFIG_X86
76  if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
77  rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
78  set_memory_uc((unsigned long)ptr,
79  rdev->gart.table_size >> PAGE_SHIFT);
80  }
81 #endif
82  rdev->gart.ptr = ptr;
83  memset((void *)rdev->gart.ptr, 0, rdev->gart.table_size);
84  return 0;
85 }
86 
97 {
98  if (rdev->gart.ptr == NULL) {
99  return;
100  }
101 #ifdef CONFIG_X86
102  if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480 ||
103  rdev->family == CHIP_RS690 || rdev->family == CHIP_RS740) {
104  set_memory_wb((unsigned long)rdev->gart.ptr,
105  rdev->gart.table_size >> PAGE_SHIFT);
106  }
107 #endif
108  pci_free_consistent(rdev->pdev, rdev->gart.table_size,
109  (void *)rdev->gart.ptr,
110  rdev->gart.table_addr);
111  rdev->gart.ptr = NULL;
112  rdev->gart.table_addr = 0;
113 }
114 
126 {
127  int r;
128 
129  if (rdev->gart.robj == NULL) {
130  r = radeon_bo_create(rdev, rdev->gart.table_size,
132  NULL, &rdev->gart.robj);
133  if (r) {
134  return r;
135  }
136  }
137  return 0;
138 }
139 
151 {
152  uint64_t gpu_addr;
153  int r;
154 
155  r = radeon_bo_reserve(rdev->gart.robj, false);
156  if (unlikely(r != 0))
157  return r;
158  r = radeon_bo_pin(rdev->gart.robj,
159  RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
160  if (r) {
161  radeon_bo_unreserve(rdev->gart.robj);
162  return r;
163  }
164  r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr);
165  if (r)
166  radeon_bo_unpin(rdev->gart.robj);
167  radeon_bo_unreserve(rdev->gart.robj);
168  rdev->gart.table_addr = gpu_addr;
169  return r;
170 }
171 
181 {
182  int r;
183 
184  if (rdev->gart.robj == NULL) {
185  return;
186  }
187  r = radeon_bo_reserve(rdev->gart.robj, false);
188  if (likely(r == 0)) {
189  radeon_bo_kunmap(rdev->gart.robj);
190  radeon_bo_unpin(rdev->gart.robj);
191  radeon_bo_unreserve(rdev->gart.robj);
192  rdev->gart.ptr = NULL;
193  }
194 }
195 
206 {
207  if (rdev->gart.robj == NULL) {
208  return;
209  }
211  radeon_bo_unref(&rdev->gart.robj);
212 }
213 
214 /*
215  * Common gart functions.
216  */
228  int pages)
229 {
230  unsigned t;
231  unsigned p;
232  int i, j;
233  u64 page_base;
234 
235  if (!rdev->gart.ready) {
236  WARN(1, "trying to unbind memory from uninitialized GART !\n");
237  return;
238  }
239  t = offset / RADEON_GPU_PAGE_SIZE;
240  p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
241  for (i = 0; i < pages; i++, p++) {
242  if (rdev->gart.pages[p]) {
243  rdev->gart.pages[p] = NULL;
244  rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
245  page_base = rdev->gart.pages_addr[p];
246  for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
247  if (rdev->gart.ptr) {
248  radeon_gart_set_page(rdev, t, page_base);
249  }
250  page_base += RADEON_GPU_PAGE_SIZE;
251  }
252  }
253  }
254  mb();
255  radeon_gart_tlb_flush(rdev);
256 }
257 
272  int pages, struct page **pagelist, dma_addr_t *dma_addr)
273 {
274  unsigned t;
275  unsigned p;
276  uint64_t page_base;
277  int i, j;
278 
279  if (!rdev->gart.ready) {
280  WARN(1, "trying to bind memory to uninitialized GART !\n");
281  return -EINVAL;
282  }
283  t = offset / RADEON_GPU_PAGE_SIZE;
284  p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
285 
286  for (i = 0; i < pages; i++, p++) {
287  rdev->gart.pages_addr[p] = dma_addr[i];
288  rdev->gart.pages[p] = pagelist[i];
289  if (rdev->gart.ptr) {
290  page_base = rdev->gart.pages_addr[p];
291  for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
292  radeon_gart_set_page(rdev, t, page_base);
293  page_base += RADEON_GPU_PAGE_SIZE;
294  }
295  }
296  }
297  mb();
298  radeon_gart_tlb_flush(rdev);
299  return 0;
300 }
301 
311 {
312  int i, j, t;
313  u64 page_base;
314 
315  if (!rdev->gart.ptr) {
316  return;
317  }
318  for (i = 0, t = 0; i < rdev->gart.num_cpu_pages; i++) {
319  page_base = rdev->gart.pages_addr[i];
320  for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
321  radeon_gart_set_page(rdev, t, page_base);
322  page_base += RADEON_GPU_PAGE_SIZE;
323  }
324  }
325  mb();
326  radeon_gart_tlb_flush(rdev);
327 }
328 
338 {
339  int r, i;
340 
341  if (rdev->gart.pages) {
342  return 0;
343  }
344  /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */
346  DRM_ERROR("Page size is smaller than GPU page size!\n");
347  return -EINVAL;
348  }
349  r = radeon_dummy_page_init(rdev);
350  if (r)
351  return r;
352  /* Compute table size */
353  rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE;
354  rdev->gart.num_gpu_pages = rdev->mc.gtt_size / RADEON_GPU_PAGE_SIZE;
355  DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
356  rdev->gart.num_cpu_pages, rdev->gart.num_gpu_pages);
357  /* Allocate pages table */
358  rdev->gart.pages = vzalloc(sizeof(void *) * rdev->gart.num_cpu_pages);
359  if (rdev->gart.pages == NULL) {
360  radeon_gart_fini(rdev);
361  return -ENOMEM;
362  }
363  rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) *
364  rdev->gart.num_cpu_pages);
365  if (rdev->gart.pages_addr == NULL) {
366  radeon_gart_fini(rdev);
367  return -ENOMEM;
368  }
369  /* set GART entry to point to the dummy page by default */
370  for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
371  rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
372  }
373  return 0;
374 }
375 
384 {
385  if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
386  /* unbind pages */
387  radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages);
388  }
389  rdev->gart.ready = false;
390  vfree(rdev->gart.pages);
391  vfree(rdev->gart.pages_addr);
392  rdev->gart.pages = NULL;
393  rdev->gart.pages_addr = NULL;
394 
396 }
397 
398 /*
399  * GPUVM
400  * GPUVM is similar to the legacy gart on older asics, however
401  * rather than there being a single global gart table
402  * for the entire GPU, there are multiple VM page tables active
403  * at any given time. The VM page tables can contain a mix
404  * vram pages and system memory pages and system memory pages
405  * can be mapped as snooped (cached system pages) or unsnooped
406  * (uncached system pages).
407  * Each VM has an ID associated with it and there is a page table
408  * associated with each VMID. When execting a command buffer,
409  * the kernel tells the the ring what VMID to use for that command
410  * buffer. VMIDs are allocated dynamically as commands are submitted.
411  * The userspace drivers maintain their own address space and the kernel
412  * sets up their pages tables accordingly when they submit their
413  * command buffers and a VMID is assigned.
414  * Cayman/Trinity support up to 8 active VMs at any given time;
415  * SI supports 16.
416  */
417 
418 /*
419  * vm helpers
420  *
421  * TODO bind a default page at vm initialization for default address
422  */
423 
431 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
432 {
433  return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
434 }
435 
443 static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
444 {
445  return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
446 }
447 
457 {
458  struct radeon_vm *vm;
459  struct radeon_bo_va *bo_va;
460  int r;
461  unsigned size;
462 
463  if (!rdev->vm_manager.enabled) {
464  /* allocate enough for 2 full VM pts */
465  size = radeon_vm_directory_size(rdev);
466  size += rdev->vm_manager.max_pfn * 8;
467  size *= 2;
468  r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
469  RADEON_GPU_PAGE_ALIGN(size),
471  if (r) {
472  dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
473  (rdev->vm_manager.max_pfn * 8) >> 10);
474  return r;
475  }
476 
477  r = radeon_asic_vm_init(rdev);
478  if (r)
479  return r;
480 
481  rdev->vm_manager.enabled = true;
482 
483  r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
484  if (r)
485  return r;
486  }
487 
488  /* restore page table */
489  list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
490  if (vm->page_directory == NULL)
491  continue;
492 
493  list_for_each_entry(bo_va, &vm->va, vm_list) {
494  bo_va->valid = false;
495  }
496  }
497  return 0;
498 }
499 
510 static void radeon_vm_free_pt(struct radeon_device *rdev,
511  struct radeon_vm *vm)
512 {
513  struct radeon_bo_va *bo_va;
514  int i;
515 
516  if (!vm->page_directory)
517  return;
518 
519  list_del_init(&vm->list);
520  radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
521 
522  list_for_each_entry(bo_va, &vm->va, vm_list) {
523  bo_va->valid = false;
524  }
525 
526  if (vm->page_tables == NULL)
527  return;
528 
529  for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
530  radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
531 
532  kfree(vm->page_tables);
533 }
534 
543 {
544  struct radeon_vm *vm, *tmp;
545  int i;
546 
547  if (!rdev->vm_manager.enabled)
548  return;
549 
550  mutex_lock(&rdev->vm_manager.lock);
551  /* free all allocated page tables */
552  list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
553  mutex_lock(&vm->mutex);
554  radeon_vm_free_pt(rdev, vm);
555  mutex_unlock(&vm->mutex);
556  }
557  for (i = 0; i < RADEON_NUM_VM; ++i) {
558  radeon_fence_unref(&rdev->vm_manager.active[i]);
559  }
560  radeon_asic_vm_fini(rdev);
561  mutex_unlock(&rdev->vm_manager.lock);
562 
563  radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
564  radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
565  rdev->vm_manager.enabled = false;
566 }
567 
579 static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
580 {
581  struct radeon_vm *vm_evict;
582 
583  if (list_empty(&rdev->vm_manager.lru_vm))
584  return -ENOMEM;
585 
586  vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
587  struct radeon_vm, list);
588  if (vm_evict == vm)
589  return -ENOMEM;
590 
591  mutex_lock(&vm_evict->mutex);
592  radeon_vm_free_pt(rdev, vm_evict);
593  mutex_unlock(&vm_evict->mutex);
594  return 0;
595 }
596 
608 int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
609 {
610  unsigned pd_size, pts_size;
611  u64 *pd_addr;
612  int r;
613 
614  if (vm == NULL) {
615  return -EINVAL;
616  }
617 
618  if (vm->page_directory != NULL) {
619  return 0;
620  }
621 
622 retry:
623  pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
624  r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
625  &vm->page_directory, pd_size,
626  RADEON_GPU_PAGE_SIZE, false);
627  if (r == -ENOMEM) {
628  r = radeon_vm_evict(rdev, vm);
629  if (r)
630  return r;
631  goto retry;
632 
633  } else if (r) {
634  return r;
635  }
636 
637  vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
638 
639  /* Initially clear the page directory */
640  pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory);
641  memset(pd_addr, 0, pd_size);
642 
643  pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
644  vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
645 
646  if (vm->page_tables == NULL) {
647  DRM_ERROR("Cannot allocate memory for page table array\n");
648  radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
649  return -ENOMEM;
650  }
651 
652  return 0;
653 }
654 
665 void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
666 {
667  list_del_init(&vm->list);
668  list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
669 }
670 
684  struct radeon_vm *vm, int ring)
685 {
686  struct radeon_fence *best[RADEON_NUM_RINGS] = {};
687  unsigned choices[2] = {};
688  unsigned i;
689 
690  /* check if the id is still valid */
691  if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id])
692  return NULL;
693 
694  /* we definately need to flush */
696 
697  /* skip over VMID 0, since it is the system VM */
698  for (i = 1; i < rdev->vm_manager.nvm; ++i) {
699  struct radeon_fence *fence = rdev->vm_manager.active[i];
700 
701  if (fence == NULL) {
702  /* found a free one */
703  vm->id = i;
704  return NULL;
705  }
706 
707  if (radeon_fence_is_earlier(fence, best[fence->ring])) {
708  best[fence->ring] = fence;
709  choices[fence->ring == ring ? 0 : 1] = i;
710  }
711  }
712 
713  for (i = 0; i < 2; ++i) {
714  if (choices[i]) {
715  vm->id = choices[i];
716  return rdev->vm_manager.active[choices[i]];
717  }
718  }
719 
720  /* should never happen */
721  BUG();
722  return NULL;
723 }
724 
737 void radeon_vm_fence(struct radeon_device *rdev,
738  struct radeon_vm *vm,
739  struct radeon_fence *fence)
740 {
741  radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
742  rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
743 
745  vm->fence = radeon_fence_ref(fence);
746 }
747 
761  struct radeon_bo *bo)
762 {
763  struct radeon_bo_va *bo_va;
764 
765  list_for_each_entry(bo_va, &bo->va, bo_list) {
766  if (bo_va->vm == vm) {
767  return bo_va;
768  }
769  }
770  return NULL;
771 }
772 
787  struct radeon_vm *vm,
788  struct radeon_bo *bo)
789 {
790  struct radeon_bo_va *bo_va;
791 
792  bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
793  if (bo_va == NULL) {
794  return NULL;
795  }
796  bo_va->vm = vm;
797  bo_va->bo = bo;
798  bo_va->soffset = 0;
799  bo_va->eoffset = 0;
800  bo_va->flags = 0;
801  bo_va->valid = false;
802  bo_va->ref_count = 1;
803  INIT_LIST_HEAD(&bo_va->bo_list);
804  INIT_LIST_HEAD(&bo_va->vm_list);
805 
806  mutex_lock(&vm->mutex);
807  list_add(&bo_va->vm_list, &vm->va);
808  list_add_tail(&bo_va->bo_list, &bo->va);
809  mutex_unlock(&vm->mutex);
810 
811  return bo_va;
812 }
813 
829  struct radeon_bo_va *bo_va,
831  uint32_t flags)
832 {
833  uint64_t size = radeon_bo_size(bo_va->bo);
834  uint64_t eoffset, last_offset = 0;
835  struct radeon_vm *vm = bo_va->vm;
836  struct radeon_bo_va *tmp;
837  struct list_head *head;
838  unsigned last_pfn;
839 
840  if (soffset) {
841  /* make sure object fit at this offset */
842  eoffset = soffset + size;
843  if (soffset >= eoffset) {
844  return -EINVAL;
845  }
846 
847  last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
848  if (last_pfn > rdev->vm_manager.max_pfn) {
849  dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
850  last_pfn, rdev->vm_manager.max_pfn);
851  return -EINVAL;
852  }
853 
854  } else {
855  eoffset = last_pfn = 0;
856  }
857 
858  mutex_lock(&vm->mutex);
859  head = &vm->va;
860  last_offset = 0;
861  list_for_each_entry(tmp, &vm->va, vm_list) {
862  if (bo_va == tmp) {
863  /* skip over currently modified bo */
864  continue;
865  }
866 
867  if (soffset >= last_offset && eoffset <= tmp->soffset) {
868  /* bo can be added before this one */
869  break;
870  }
871  if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
872  /* bo and tmp overlap, invalid offset */
873  dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
874  bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
875  (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
876  mutex_unlock(&vm->mutex);
877  return -EINVAL;
878  }
879  last_offset = tmp->eoffset;
880  head = &tmp->vm_list;
881  }
882 
883  bo_va->soffset = soffset;
884  bo_va->eoffset = eoffset;
885  bo_va->flags = flags;
886  bo_va->valid = false;
887  list_move(&bo_va->vm_list, head);
888 
889  mutex_unlock(&vm->mutex);
890  return 0;
891 }
892 
904 {
906 
907  /* page table offset */
908  result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
909 
910  /* in case cpu page size != gpu page size*/
911  result |= addr & (~PAGE_MASK);
912 
913  return result;
914 }
915 
930 static int radeon_vm_update_pdes(struct radeon_device *rdev,
931  struct radeon_vm *vm,
933 {
934  static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
935 
936  uint64_t last_pde = ~0, last_pt = ~0;
937  unsigned count = 0;
938  uint64_t pt_idx;
939  int r;
940 
941  start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
943 
944  /* walk over the address space and update the page directory */
945  for (pt_idx = start; pt_idx <= end; ++pt_idx) {
946  uint64_t pde, pt;
947 
948  if (vm->page_tables[pt_idx])
949  continue;
950 
951 retry:
952  r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
953  &vm->page_tables[pt_idx],
955  RADEON_GPU_PAGE_SIZE, false);
956 
957  if (r == -ENOMEM) {
958  r = radeon_vm_evict(rdev, vm);
959  if (r)
960  return r;
961  goto retry;
962  } else if (r) {
963  return r;
964  }
965 
966  pde = vm->pd_gpu_addr + pt_idx * 8;
967 
968  pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
969 
970  if (((last_pde + 8 * count) != pde) ||
971  ((last_pt + incr * count) != pt)) {
972 
973  if (count) {
974  radeon_asic_vm_set_page(rdev, last_pde,
975  last_pt, count, incr,
977  }
978 
979  count = 1;
980  last_pde = pde;
981  last_pt = pt;
982  } else {
983  ++count;
984  }
985  }
986 
987  if (count) {
988  radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
989  incr, RADEON_VM_PAGE_VALID);
990 
991  }
992 
993  return 0;
994 }
995 
1010 static void radeon_vm_update_ptes(struct radeon_device *rdev,
1011  struct radeon_vm *vm,
1012  uint64_t start, uint64_t end,
1014 {
1015  static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
1016 
1017  uint64_t last_pte = ~0, last_dst = ~0;
1018  unsigned count = 0;
1019  uint64_t addr;
1020 
1021  start = start / RADEON_GPU_PAGE_SIZE;
1022  end = end / RADEON_GPU_PAGE_SIZE;
1023 
1024  /* walk over the address space and update the page tables */
1025  for (addr = start; addr < end; ) {
1026  uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
1027  unsigned nptes;
1028  uint64_t pte;
1029 
1030  if ((addr & ~mask) == (end & ~mask))
1031  nptes = end - addr;
1032  else
1033  nptes = RADEON_VM_PTE_COUNT - (addr & mask);
1034 
1035  pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
1036  pte += (addr & mask) * 8;
1037 
1038  if ((last_pte + 8 * count) != pte) {
1039 
1040  if (count) {
1041  radeon_asic_vm_set_page(rdev, last_pte,
1042  last_dst, count,
1044  flags);
1045  }
1046 
1047  count = nptes;
1048  last_pte = pte;
1049  last_dst = dst;
1050  } else {
1051  count += nptes;
1052  }
1053 
1054  addr += nptes;
1055  dst += nptes * RADEON_GPU_PAGE_SIZE;
1056  }
1057 
1058  if (count) {
1059  radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
1060  RADEON_GPU_PAGE_SIZE, flags);
1061  }
1062 }
1063 
1078  struct radeon_vm *vm,
1079  struct radeon_bo *bo,
1080  struct ttm_mem_reg *mem)
1081 {
1082  unsigned ridx = rdev->asic->vm.pt_ring_index;
1083  struct radeon_ring *ring = &rdev->ring[ridx];
1084  struct radeon_semaphore *sem = NULL;
1085  struct radeon_bo_va *bo_va;
1086  unsigned nptes, npdes, ndw;
1087  uint64_t addr;
1088  int r;
1089 
1090  /* nothing to do if vm isn't bound */
1091  if (vm->page_directory == NULL)
1092  return 0;
1093 
1094  bo_va = radeon_vm_bo_find(vm, bo);
1095  if (bo_va == NULL) {
1096  dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
1097  return -EINVAL;
1098  }
1099 
1100  if (!bo_va->soffset) {
1101  dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
1102  bo, vm);
1103  return -EINVAL;
1104  }
1105 
1106  if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
1107  return 0;
1108 
1109  bo_va->flags &= ~RADEON_VM_PAGE_VALID;
1110  bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
1111  if (mem) {
1112  addr = mem->start << PAGE_SHIFT;
1113  if (mem->mem_type != TTM_PL_SYSTEM) {
1114  bo_va->flags |= RADEON_VM_PAGE_VALID;
1115  bo_va->valid = true;
1116  }
1117  if (mem->mem_type == TTM_PL_TT) {
1118  bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
1119  } else {
1120  addr += rdev->vm_manager.vram_base_offset;
1121  }
1122  } else {
1123  addr = 0;
1124  bo_va->valid = false;
1125  }
1126 
1127  if (vm->fence && radeon_fence_signaled(vm->fence)) {
1128  radeon_fence_unref(&vm->fence);
1129  }
1130 
1131  if (vm->fence && vm->fence->ring != ridx) {
1132  r = radeon_semaphore_create(rdev, &sem);
1133  if (r) {
1134  return r;
1135  }
1136  }
1137 
1138  nptes = radeon_bo_ngpu_pages(bo);
1139 
1140  /* assume two extra pdes in case the mapping overlaps the borders */
1141  npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
1142 
1143  /* estimate number of dw needed */
1144  /* semaphore, fence and padding */
1145  ndw = 32;
1146 
1147  if (RADEON_VM_BLOCK_SIZE > 11)
1148  /* reserve space for one header for every 2k dwords */
1149  ndw += (nptes >> 11) * 4;
1150  else
1151  /* reserve space for one header for
1152  every (1 << BLOCK_SIZE) entries */
1153  ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
1154 
1155  /* reserve space for pte addresses */
1156  ndw += nptes * 2;
1157 
1158  /* reserve space for one header for every 2k dwords */
1159  ndw += (npdes >> 11) * 4;
1160 
1161  /* reserve space for pde addresses */
1162  ndw += npdes * 2;
1163 
1164  r = radeon_ring_lock(rdev, ring, ndw);
1165  if (r) {
1166  return r;
1167  }
1168 
1169  if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
1170  radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
1171  radeon_fence_note_sync(vm->fence, ridx);
1172  }
1173 
1174  r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
1175  if (r) {
1176  radeon_ring_unlock_undo(rdev, ring);
1177  return r;
1178  }
1179 
1180  radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
1181  addr, bo_va->flags);
1182 
1183  radeon_fence_unref(&vm->fence);
1184  r = radeon_fence_emit(rdev, &vm->fence, ridx);
1185  if (r) {
1186  radeon_ring_unlock_undo(rdev, ring);
1187  return r;
1188  }
1189  radeon_ring_unlock_commit(rdev, ring);
1190  radeon_semaphore_free(rdev, &sem, vm->fence);
1192 
1193  return 0;
1194 }
1195 
1210  struct radeon_bo_va *bo_va)
1211 {
1212  int r;
1213 
1214  mutex_lock(&rdev->vm_manager.lock);
1215  mutex_lock(&bo_va->vm->mutex);
1216  r = radeon_vm_bo_update_pte(rdev, bo_va->vm, bo_va->bo, NULL);
1217  mutex_unlock(&rdev->vm_manager.lock);
1218  list_del(&bo_va->vm_list);
1219  mutex_unlock(&bo_va->vm->mutex);
1220  list_del(&bo_va->bo_list);
1221 
1222  kfree(bo_va);
1223  return r;
1224 }
1225 
1236  struct radeon_bo *bo)
1237 {
1238  struct radeon_bo_va *bo_va;
1239 
1240  BUG_ON(!atomic_read(&bo->tbo.reserved));
1241  list_for_each_entry(bo_va, &bo->va, bo_list) {
1242  bo_va->valid = false;
1243  }
1244 }
1245 
1254 void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1255 {
1256  vm->id = 0;
1257  vm->fence = NULL;
1258  mutex_init(&vm->mutex);
1259  INIT_LIST_HEAD(&vm->list);
1260  INIT_LIST_HEAD(&vm->va);
1261 }
1262 
1272 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1273 {
1274  struct radeon_bo_va *bo_va, *tmp;
1275  int r;
1276 
1277  mutex_lock(&rdev->vm_manager.lock);
1278  mutex_lock(&vm->mutex);
1279  radeon_vm_free_pt(rdev, vm);
1280  mutex_unlock(&rdev->vm_manager.lock);
1281 
1282  if (!list_empty(&vm->va)) {
1283  dev_err(rdev->dev, "still active bo inside vm\n");
1284  }
1285  list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
1286  list_del_init(&bo_va->vm_list);
1287  r = radeon_bo_reserve(bo_va->bo, false);
1288  if (!r) {
1289  list_del_init(&bo_va->bo_list);
1290  radeon_bo_unreserve(bo_va->bo);
1291  kfree(bo_va);
1292  }
1293  }
1294  radeon_fence_unref(&vm->fence);
1296  mutex_unlock(&vm->mutex);
1297 }