Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
r100.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  * Alex Deucher
26  * Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include <linux/slab.h>
30 #include <drm/drmP.h>
31 #include <drm/radeon_drm.h>
32 #include "radeon_reg.h"
33 #include "radeon.h"
34 #include "radeon_asic.h"
35 #include "r100d.h"
36 #include "rs100d.h"
37 #include "rv200d.h"
38 #include "rv250d.h"
39 #include "atom.h"
40 
41 #include <linux/firmware.h>
42 #include <linux/platform_device.h>
43 #include <linux/module.h>
44 
45 #include "r100_reg_safe.h"
46 #include "rn50_reg_safe.h"
47 
48 /* Firmware Names */
49 #define FIRMWARE_R100 "radeon/R100_cp.bin"
50 #define FIRMWARE_R200 "radeon/R200_cp.bin"
51 #define FIRMWARE_R300 "radeon/R300_cp.bin"
52 #define FIRMWARE_R420 "radeon/R420_cp.bin"
53 #define FIRMWARE_RS690 "radeon/RS690_cp.bin"
54 #define FIRMWARE_RS600 "radeon/RS600_cp.bin"
55 #define FIRMWARE_R520 "radeon/R520_cp.bin"
56 
64 
65 #include "r100_track.h"
66 
67 /* This files gather functions specifics to:
68  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
69  * and others in some cases.
70  */
71 
81 {
82  int i;
83 
84  if (crtc >= rdev->num_crtc)
85  return;
86 
87  if (crtc == 0) {
89  for (i = 0; i < rdev->usec_timeout; i++) {
91  break;
92  udelay(1);
93  }
94  for (i = 0; i < rdev->usec_timeout; i++) {
96  break;
97  udelay(1);
98  }
99  }
100  } else {
102  for (i = 0; i < rdev->usec_timeout; i++) {
104  break;
105  udelay(1);
106  }
107  for (i = 0; i < rdev->usec_timeout; i++) {
109  break;
110  udelay(1);
111  }
112  }
113  }
114 }
115 
126 {
127  /* enable the pflip int */
128  radeon_irq_kms_pflip_irq_get(rdev, crtc);
129 }
130 
141 {
142  /* disable the pflip int */
143  radeon_irq_kms_pflip_irq_put(rdev, crtc);
144 }
145 
159 u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
160 {
161  struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
162  u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
163  int i;
164 
165  /* Lock the graphics update lock */
166  /* update the scanout addresses */
167  WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
168 
169  /* Wait for update_pending to go high. */
170  for (i = 0; i < rdev->usec_timeout; i++) {
172  break;
173  udelay(1);
174  }
175  DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
176 
177  /* Unlock the lock, so double-buffering can take place inside vblank */
179  WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
180 
181  /* Return current update_pending status: */
183 }
184 
195 {
196  int i;
197  rdev->pm.dynpm_can_upclock = true;
198  rdev->pm.dynpm_can_downclock = true;
199 
200  switch (rdev->pm.dynpm_planned_action) {
202  rdev->pm.requested_power_state_index = 0;
203  rdev->pm.dynpm_can_downclock = false;
204  break;
206  if (rdev->pm.current_power_state_index == 0) {
207  rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
208  rdev->pm.dynpm_can_downclock = false;
209  } else {
210  if (rdev->pm.active_crtc_count > 1) {
211  for (i = 0; i < rdev->pm.num_power_states; i++) {
212  if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
213  continue;
214  else if (i >= rdev->pm.current_power_state_index) {
215  rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
216  break;
217  } else {
218  rdev->pm.requested_power_state_index = i;
219  break;
220  }
221  }
222  } else
223  rdev->pm.requested_power_state_index =
224  rdev->pm.current_power_state_index - 1;
225  }
226  /* don't use the power state if crtcs are active and no display flag is set */
227  if ((rdev->pm.active_crtc_count > 0) &&
228  (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
230  rdev->pm.requested_power_state_index++;
231  }
232  break;
234  if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
235  rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
236  rdev->pm.dynpm_can_upclock = false;
237  } else {
238  if (rdev->pm.active_crtc_count > 1) {
239  for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
240  if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
241  continue;
242  else if (i <= rdev->pm.current_power_state_index) {
243  rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
244  break;
245  } else {
246  rdev->pm.requested_power_state_index = i;
247  break;
248  }
249  }
250  } else
251  rdev->pm.requested_power_state_index =
252  rdev->pm.current_power_state_index + 1;
253  }
254  break;
256  rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
257  rdev->pm.dynpm_can_upclock = false;
258  break;
259  case DYNPM_ACTION_NONE:
260  default:
261  DRM_ERROR("Requested mode for not defined action\n");
262  return;
263  }
264  /* only one clock mode per power state */
265  rdev->pm.requested_clock_mode_index = 0;
266 
267  DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
268  rdev->pm.power_state[rdev->pm.requested_power_state_index].
269  clock_info[rdev->pm.requested_clock_mode_index].sclk,
270  rdev->pm.power_state[rdev->pm.requested_power_state_index].
271  clock_info[rdev->pm.requested_clock_mode_index].mclk,
272  rdev->pm.power_state[rdev->pm.requested_power_state_index].
273  pcie_lanes);
274 }
275 
286 {
287  /* default */
288  rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
289  rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
290  rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
291  rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
292  /* low sh */
293  rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
294  rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
295  rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
296  rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
297  /* mid sh */
298  rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
299  rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
300  rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
301  rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
302  /* high sh */
303  rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
304  rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
305  rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
306  rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
307  /* low mh */
308  rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
309  rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
310  rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
311  rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
312  /* mid mh */
313  rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
314  rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
315  rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
316  rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
317  /* high mh */
318  rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
319  rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
320  rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
321  rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
322 }
323 
333 {
334  int requested_index = rdev->pm.requested_power_state_index;
335  struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
336  struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
337  u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
338 
339  if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
341  tmp = RREG32(voltage->gpio.reg);
342  if (voltage->active_high)
343  tmp |= voltage->gpio.mask;
344  else
345  tmp &= ~(voltage->gpio.mask);
346  WREG32(voltage->gpio.reg, tmp);
347  if (voltage->delay)
348  udelay(voltage->delay);
349  } else {
350  tmp = RREG32(voltage->gpio.reg);
351  if (voltage->active_high)
352  tmp &= ~voltage->gpio.mask;
353  else
354  tmp |= voltage->gpio.mask;
355  WREG32(voltage->gpio.reg, tmp);
356  if (voltage->delay)
357  udelay(voltage->delay);
358  }
359  }
360 
361  sclk_cntl = RREG32_PLL(SCLK_CNTL);
362  sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
363  sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
364  sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
365  sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
367  sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
369  sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
370  else
371  sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
373  sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
375  sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
376  } else
377  sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
378 
380  sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
381  if (voltage->delay) {
382  sclk_more_cntl |= VOLTAGE_DROP_SYNC;
383  switch (voltage->delay) {
384  case 33:
385  sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
386  break;
387  case 66:
388  sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
389  break;
390  case 99:
391  sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
392  break;
393  case 132:
394  sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
395  break;
396  }
397  } else
398  sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
399  } else
400  sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
401 
403  sclk_cntl &= ~FORCE_HDP;
404  else
405  sclk_cntl |= FORCE_HDP;
406 
407  WREG32_PLL(SCLK_CNTL, sclk_cntl);
408  WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
409  WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
410 
411  /* set pcie lanes */
412  if ((rdev->flags & RADEON_IS_PCIE) &&
413  !(rdev->flags & RADEON_IS_IGP) &&
414  rdev->asic->pm.set_pcie_lanes &&
415  (ps->pcie_lanes !=
416  rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
418  ps->pcie_lanes);
419  DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
420  }
421 }
422 
431 {
432  struct drm_device *ddev = rdev->ddev;
433  struct drm_crtc *crtc;
434  struct radeon_crtc *radeon_crtc;
435  u32 tmp;
436 
437  /* disable any active CRTCs */
438  list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
439  radeon_crtc = to_radeon_crtc(crtc);
440  if (radeon_crtc->enabled) {
441  if (radeon_crtc->crtc_id) {
445  } else {
449  }
450  }
451  }
452 }
453 
462 {
463  struct drm_device *ddev = rdev->ddev;
464  struct drm_crtc *crtc;
465  struct radeon_crtc *radeon_crtc;
466  u32 tmp;
467 
468  /* enable any active CRTCs */
469  list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
470  radeon_crtc = to_radeon_crtc(crtc);
471  if (radeon_crtc->enabled) {
472  if (radeon_crtc->crtc_id) {
476  } else {
480  }
481  }
482  }
483 }
484 
494 {
496  return false;
497  else
498  return true;
499 }
500 
501 /* hpd for digital panel detect/disconnect */
512 {
513  bool connected = false;
514 
515  switch (hpd) {
516  case RADEON_HPD_1:
518  connected = true;
519  break;
520  case RADEON_HPD_2:
522  connected = true;
523  break;
524  default:
525  break;
526  }
527  return connected;
528 }
529 
539  enum radeon_hpd_id hpd)
540 {
541  u32 tmp;
542  bool connected = r100_hpd_sense(rdev, hpd);
543 
544  switch (hpd) {
545  case RADEON_HPD_1:
546  tmp = RREG32(RADEON_FP_GEN_CNTL);
547  if (connected)
548  tmp &= ~RADEON_FP_DETECT_INT_POL;
549  else
552  break;
553  case RADEON_HPD_2:
555  if (connected)
557  else
560  break;
561  default:
562  break;
563  }
564 }
565 
575 {
576  struct drm_device *dev = rdev->ddev;
577  struct drm_connector *connector;
578  unsigned enable = 0;
579 
580  list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
582  enable |= 1 << radeon_connector->hpd.hpd;
583  radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
584  }
585  radeon_irq_kms_enable_hpd(rdev, enable);
586 }
587 
597 {
598  struct drm_device *dev = rdev->ddev;
599  struct drm_connector *connector;
600  unsigned disable = 0;
601 
602  list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
604  disable |= 1 << radeon_connector->hpd.hpd;
605  }
606  radeon_irq_kms_disable_hpd(rdev, disable);
607 }
608 
609 /*
610  * PCI GART
611  */
613 {
614  /* TODO: can we do somethings here ? */
615  /* It seems hw only cache one entry so we should discard this
616  * entry otherwise if first GPU GART read hit this entry it
617  * could end up in wrong address. */
618 }
619 
621 {
622  int r;
623 
624  if (rdev->gart.ptr) {
625  WARN(1, "R100 PCI GART already initialized\n");
626  return 0;
627  }
628  /* Initialize common gart structure */
629  r = radeon_gart_init(rdev);
630  if (r)
631  return r;
632  rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
633  rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
634  rdev->asic->gart.set_page = &r100_pci_gart_set_page;
635  return radeon_gart_table_ram_alloc(rdev);
636 }
637 
639 {
640  uint32_t tmp;
641 
642  radeon_gart_restore(rdev);
643  /* discard memory request outside of configured range */
645  WREG32(RADEON_AIC_CNTL, tmp);
646  /* set address range for PCI address translate */
647  WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
648  WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
649  /* set PCI GART page-table base address */
650  WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
652  WREG32(RADEON_AIC_CNTL, tmp);
654  DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
655  (unsigned)(rdev->mc.gtt_size >> 20),
656  (unsigned long long)rdev->gart.table_addr);
657  rdev->gart.ready = true;
658  return 0;
659 }
660 
662 {
663  uint32_t tmp;
664 
665  /* discard memory request outside of configured range */
670 }
671 
673 {
674  u32 *gtt = rdev->gart.ptr;
675 
676  if (i < 0 || i > rdev->gart.num_gpu_pages) {
677  return -EINVAL;
678  }
679  gtt[i] = cpu_to_le32(lower_32_bits(addr));
680  return 0;
681 }
682 
684 {
685  radeon_gart_fini(rdev);
686  r100_pci_gart_disable(rdev);
688 }
689 
691 {
692  uint32_t tmp = 0;
693 
694  if (!rdev->irq.installed) {
695  WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
697  return -EINVAL;
698  }
699  if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
700  tmp |= RADEON_SW_INT_ENABLE;
701  }
702  if (rdev->irq.crtc_vblank_int[0] ||
703  atomic_read(&rdev->irq.pflip[0])) {
705  }
706  if (rdev->irq.crtc_vblank_int[1] ||
707  atomic_read(&rdev->irq.pflip[1])) {
709  }
710  if (rdev->irq.hpd[0]) {
711  tmp |= RADEON_FP_DETECT_MASK;
712  }
713  if (rdev->irq.hpd[1]) {
714  tmp |= RADEON_FP2_DETECT_MASK;
715  }
717  return 0;
718 }
719 
721 {
722  u32 tmp;
723 
725  /* Wait and acknowledge irq */
726  mdelay(1);
729 }
730 
731 static uint32_t r100_irq_ack(struct radeon_device *rdev)
732 {
734  uint32_t irq_mask = RADEON_SW_INT_TEST |
737 
738  if (irqs) {
740  }
741  return irqs & irq_mask;
742 }
743 
745 {
746  uint32_t status, msi_rearm;
747  bool queue_hotplug = false;
748 
749  status = r100_irq_ack(rdev);
750  if (!status) {
751  return IRQ_NONE;
752  }
753  if (rdev->shutdown) {
754  return IRQ_NONE;
755  }
756  while (status) {
757  /* SW interrupt */
758  if (status & RADEON_SW_INT_TEST) {
760  }
761  /* Vertical blank interrupts */
762  if (status & RADEON_CRTC_VBLANK_STAT) {
763  if (rdev->irq.crtc_vblank_int[0]) {
764  drm_handle_vblank(rdev->ddev, 0);
765  rdev->pm.vblank_sync = true;
766  wake_up(&rdev->irq.vblank_queue);
767  }
768  if (atomic_read(&rdev->irq.pflip[0]))
769  radeon_crtc_handle_flip(rdev, 0);
770  }
771  if (status & RADEON_CRTC2_VBLANK_STAT) {
772  if (rdev->irq.crtc_vblank_int[1]) {
773  drm_handle_vblank(rdev->ddev, 1);
774  rdev->pm.vblank_sync = true;
775  wake_up(&rdev->irq.vblank_queue);
776  }
777  if (atomic_read(&rdev->irq.pflip[1]))
778  radeon_crtc_handle_flip(rdev, 1);
779  }
780  if (status & RADEON_FP_DETECT_STAT) {
781  queue_hotplug = true;
782  DRM_DEBUG("HPD1\n");
783  }
784  if (status & RADEON_FP2_DETECT_STAT) {
785  queue_hotplug = true;
786  DRM_DEBUG("HPD2\n");
787  }
788  status = r100_irq_ack(rdev);
789  }
790  if (queue_hotplug)
791  schedule_work(&rdev->hotplug_work);
792  if (rdev->msi_enabled) {
793  switch (rdev->family) {
794  case CHIP_RS400:
795  case CHIP_RS480:
796  msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
797  WREG32(RADEON_AIC_CNTL, msi_rearm);
798  WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
799  break;
800  default:
802  break;
803  }
804  }
805  return IRQ_HANDLED;
806 }
807 
809 {
810  if (crtc == 0)
812  else
814 }
815 
816 /* Who ever call radeon_fence_emit should call ring_lock and ask
817  * for enough space (today caller are ib schedule and buffer move) */
819  struct radeon_fence *fence)
820 {
821  struct radeon_ring *ring = &rdev->ring[fence->ring];
822 
823  /* We have to make sure that caches are flushed before
824  * CPU might read something from VRAM. */
829  /* Wait until IDLE & CLEAN */
833  radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
836  radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
837  /* Emit fence sequence & fire IRQ */
838  radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
839  radeon_ring_write(ring, fence->seq);
842 }
843 
845  struct radeon_ring *ring,
846  struct radeon_semaphore *semaphore,
847  bool emit_wait)
848 {
849  /* Unused on older asics, since we don't have semaphores or multiple rings */
850  BUG();
851 }
852 
853 int r100_copy_blit(struct radeon_device *rdev,
854  uint64_t src_offset,
855  uint64_t dst_offset,
856  unsigned num_gpu_pages,
857  struct radeon_fence **fence)
858 {
860  uint32_t cur_pages;
861  uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
862  uint32_t pitch;
863  uint32_t stride_pixels;
864  unsigned ndw;
865  int num_loops;
866  int r = 0;
867 
868  /* radeon limited to 16k stride */
869  stride_bytes &= 0x3fff;
870  /* radeon pitch is /64 */
871  pitch = stride_bytes / 64;
872  stride_pixels = stride_bytes / 4;
873  num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
874 
875  /* Ask for enough room for blit + flush + fence */
876  ndw = 64 + (10 * num_loops);
877  r = radeon_ring_lock(rdev, ring, ndw);
878  if (r) {
879  DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
880  return -EINVAL;
881  }
882  while (num_gpu_pages > 0) {
883  cur_pages = num_gpu_pages;
884  if (cur_pages > 8191) {
885  cur_pages = 8191;
886  }
887  num_gpu_pages -= cur_pages;
888 
889  /* pages are in Y direction - height
890  page width in X direction - width */
892  radeon_ring_write(ring,
900  RADEON_ROP3_S |
904  radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
905  radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
906  radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
907  radeon_ring_write(ring, 0);
908  radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
909  radeon_ring_write(ring, num_gpu_pages);
910  radeon_ring_write(ring, num_gpu_pages);
911  radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
912  }
916  radeon_ring_write(ring,
920  if (fence) {
922  }
923  radeon_ring_unlock_commit(rdev, ring);
924  return r;
925 }
926 
927 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
928 {
929  unsigned i;
930  u32 tmp;
931 
932  for (i = 0; i < rdev->usec_timeout; i++) {
934  if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
935  return 0;
936  }
937  udelay(1);
938  }
939  return -1;
940 }
941 
942 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
943 {
944  int r;
945 
946  r = radeon_ring_lock(rdev, ring, 2);
947  if (r) {
948  return;
949  }
951  radeon_ring_write(ring,
956  radeon_ring_unlock_commit(rdev, ring);
957 }
958 
959 
960 /* Load the microcode for the CP */
961 static int r100_cp_init_microcode(struct radeon_device *rdev)
962 {
963  struct platform_device *pdev;
964  const char *fw_name = NULL;
965  int err;
966 
967  DRM_DEBUG_KMS("\n");
968 
969  pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
970  err = IS_ERR(pdev);
971  if (err) {
972  printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
973  return -EINVAL;
974  }
975  if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
976  (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
977  (rdev->family == CHIP_RS200)) {
978  DRM_INFO("Loading R100 Microcode\n");
979  fw_name = FIRMWARE_R100;
980  } else if ((rdev->family == CHIP_R200) ||
981  (rdev->family == CHIP_RV250) ||
982  (rdev->family == CHIP_RV280) ||
983  (rdev->family == CHIP_RS300)) {
984  DRM_INFO("Loading R200 Microcode\n");
985  fw_name = FIRMWARE_R200;
986  } else if ((rdev->family == CHIP_R300) ||
987  (rdev->family == CHIP_R350) ||
988  (rdev->family == CHIP_RV350) ||
989  (rdev->family == CHIP_RV380) ||
990  (rdev->family == CHIP_RS400) ||
991  (rdev->family == CHIP_RS480)) {
992  DRM_INFO("Loading R300 Microcode\n");
993  fw_name = FIRMWARE_R300;
994  } else if ((rdev->family == CHIP_R420) ||
995  (rdev->family == CHIP_R423) ||
996  (rdev->family == CHIP_RV410)) {
997  DRM_INFO("Loading R400 Microcode\n");
998  fw_name = FIRMWARE_R420;
999  } else if ((rdev->family == CHIP_RS690) ||
1000  (rdev->family == CHIP_RS740)) {
1001  DRM_INFO("Loading RS690/RS740 Microcode\n");
1002  fw_name = FIRMWARE_RS690;
1003  } else if (rdev->family == CHIP_RS600) {
1004  DRM_INFO("Loading RS600 Microcode\n");
1005  fw_name = FIRMWARE_RS600;
1006  } else if ((rdev->family == CHIP_RV515) ||
1007  (rdev->family == CHIP_R520) ||
1008  (rdev->family == CHIP_RV530) ||
1009  (rdev->family == CHIP_R580) ||
1010  (rdev->family == CHIP_RV560) ||
1011  (rdev->family == CHIP_RV570)) {
1012  DRM_INFO("Loading R500 Microcode\n");
1013  fw_name = FIRMWARE_R520;
1014  }
1015 
1016  err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1018  if (err) {
1019  printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
1020  fw_name);
1021  } else if (rdev->me_fw->size % 8) {
1023  "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
1024  rdev->me_fw->size, fw_name);
1025  err = -EINVAL;
1026  release_firmware(rdev->me_fw);
1027  rdev->me_fw = NULL;
1028  }
1029  return err;
1030 }
1031 
1032 static void r100_cp_load_microcode(struct radeon_device *rdev)
1033 {
1034  const __be32 *fw_data;
1035  int i, size;
1036 
1037  if (r100_gui_wait_for_idle(rdev)) {
1038  printk(KERN_WARNING "Failed to wait GUI idle while "
1039  "programming pipes. Bad things might happen.\n");
1040  }
1041 
1042  if (rdev->me_fw) {
1043  size = rdev->me_fw->size / 4;
1044  fw_data = (const __be32 *)&rdev->me_fw->data[0];
1046  for (i = 0; i < size; i += 2) {
1048  be32_to_cpup(&fw_data[i]));
1050  be32_to_cpup(&fw_data[i + 1]));
1051  }
1052  }
1053 }
1054 
1055 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1056 {
1057  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1058  unsigned rb_bufsz;
1059  unsigned rb_blksz;
1060  unsigned max_fetch;
1061  unsigned pre_write_timer;
1062  unsigned pre_write_limit;
1063  unsigned indirect2_start;
1064  unsigned indirect1_start;
1065  uint32_t tmp;
1066  int r;
1067 
1068  if (r100_debugfs_cp_init(rdev)) {
1069  DRM_ERROR("Failed to register debugfs file for CP !\n");
1070  }
1071  if (!rdev->me_fw) {
1072  r = r100_cp_init_microcode(rdev);
1073  if (r) {
1074  DRM_ERROR("Failed to load firmware!\n");
1075  return r;
1076  }
1077  }
1078 
1079  /* Align ring size */
1080  rb_bufsz = drm_order(ring_size / 8);
1081  ring_size = (1 << (rb_bufsz + 1)) * 4;
1082  r100_cp_load_microcode(rdev);
1083  r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
1085  0, 0x7fffff, RADEON_CP_PACKET2);
1086  if (r) {
1087  return r;
1088  }
1089  /* Each time the cp read 1024 bytes (16 dword/quadword) update
1090  * the rptr copy in system ram */
1091  rb_blksz = 9;
1092  /* cp will read 128bytes at a time (4 dwords) */
1093  max_fetch = 1;
1094  ring->align_mask = 16 - 1;
1095  /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
1096  pre_write_timer = 64;
1097  /* Force CP_RB_WPTR write if written more than one time before the
1098  * delay expire
1099  */
1100  pre_write_limit = 0;
1101  /* Setup the cp cache like this (cache size is 96 dwords) :
1102  * RING 0 to 15
1103  * INDIRECT1 16 to 79
1104  * INDIRECT2 80 to 95
1105  * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1106  * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
1107  * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
1108  * Idea being that most of the gpu cmd will be through indirect1 buffer
1109  * so it gets the bigger cache.
1110  */
1111  indirect2_start = 80;
1112  indirect1_start = 16;
1113  /* cp setup */
1114  WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
1115  tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
1116  REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
1117  REG_SET(RADEON_MAX_FETCH, max_fetch));
1118 #ifdef __BIG_ENDIAN
1119  tmp |= RADEON_BUF_SWAP_32BIT;
1120 #endif
1122 
1123  /* Set ring address */
1124  DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
1126  /* Force read & write ptr to 0 */
1129  ring->wptr = 0;
1130  WREG32(RADEON_CP_RB_WPTR, ring->wptr);
1131 
1132  /* set the wb address whether it's enabled or not */
1134  S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
1136 
1137  if (rdev->wb.enabled)
1139  else {
1140  tmp |= RADEON_RB_NO_UPDATE;
1142  }
1143 
1144  WREG32(RADEON_CP_RB_CNTL, tmp);
1145  udelay(10);
1146  ring->rptr = RREG32(RADEON_CP_RB_RPTR);
1147  /* Set cp mode to bus mastering & enable cp*/
1149  REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
1150  REG_SET(RADEON_INDIRECT1_START, indirect1_start));
1152  WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
1154 
1155  /* at this point everything should be setup correctly to enable master */
1156  pci_set_master(rdev->pdev);
1157 
1160  if (r) {
1161  DRM_ERROR("radeon: cp isn't working (%d).\n", r);
1162  return r;
1163  }
1164  ring->ready = true;
1165  radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1166 
1167  if (!ring->rptr_save_reg /* not resuming from suspend */
1168  && radeon_ring_supports_scratch_reg(rdev, ring)) {
1169  r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1170  if (r) {
1171  DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1172  ring->rptr_save_reg = 0;
1173  }
1174  }
1175  return 0;
1176 }
1177 
1178 void r100_cp_fini(struct radeon_device *rdev)
1179 {
1180  if (r100_cp_wait_for_idle(rdev)) {
1181  DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
1182  }
1183  /* Disable ring */
1184  r100_cp_disable(rdev);
1185  radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1187  DRM_INFO("radeon: cp finalized\n");
1188 }
1189 
1190 void r100_cp_disable(struct radeon_device *rdev)
1191 {
1192  /* Disable ring */
1193  radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1194  rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1198  if (r100_gui_wait_for_idle(rdev)) {
1199  printk(KERN_WARNING "Failed to wait GUI idle while "
1200  "programming pipes. Bad things might happen.\n");
1201  }
1202 }
1203 
1204 /*
1205  * CS functions
1206  */
1208  struct radeon_cs_packet *pkt,
1209  unsigned idx,
1210  unsigned reg)
1211 {
1212  int r;
1213  u32 tile_flags = 0;
1214  u32 tmp;
1215  struct radeon_cs_reloc *reloc;
1216  u32 value;
1217 
1218  r = r100_cs_packet_next_reloc(p, &reloc);
1219  if (r) {
1220  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1221  idx, reg);
1222  r100_cs_dump_packet(p, pkt);
1223  return r;
1224  }
1225 
1226  value = radeon_get_ib_value(p, idx);
1227  tmp = value & 0x003fffff;
1228  tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1229 
1230  if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1231  if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1232  tile_flags |= RADEON_DST_TILE_MACRO;
1233  if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1234  if (reg == RADEON_SRC_PITCH_OFFSET) {
1235  DRM_ERROR("Cannot src blit from microtiled surface\n");
1236  r100_cs_dump_packet(p, pkt);
1237  return -EINVAL;
1238  }
1239  tile_flags |= RADEON_DST_TILE_MICRO;
1240  }
1241 
1242  tmp |= tile_flags;
1243  p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1244  } else
1245  p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1246  return 0;
1247 }
1248 
1250  struct radeon_cs_packet *pkt,
1251  int idx)
1252 {
1253  unsigned c, i;
1254  struct radeon_cs_reloc *reloc;
1255  struct r100_cs_track *track;
1256  int r = 0;
1257  volatile uint32_t *ib;
1258  u32 idx_value;
1259 
1260  ib = p->ib.ptr;
1261  track = (struct r100_cs_track *)p->track;
1262  c = radeon_get_ib_value(p, idx++) & 0x1F;
1263  if (c > 16) {
1264  DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1265  pkt->opcode);
1266  r100_cs_dump_packet(p, pkt);
1267  return -EINVAL;
1268  }
1269  track->num_arrays = c;
1270  for (i = 0; i < (c - 1); i+=2, idx+=3) {
1271  r = r100_cs_packet_next_reloc(p, &reloc);
1272  if (r) {
1273  DRM_ERROR("No reloc for packet3 %d\n",
1274  pkt->opcode);
1275  r100_cs_dump_packet(p, pkt);
1276  return r;
1277  }
1278  idx_value = radeon_get_ib_value(p, idx);
1279  ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1280 
1281  track->arrays[i + 0].esize = idx_value >> 8;
1282  track->arrays[i + 0].robj = reloc->robj;
1283  track->arrays[i + 0].esize &= 0x7F;
1284  r = r100_cs_packet_next_reloc(p, &reloc);
1285  if (r) {
1286  DRM_ERROR("No reloc for packet3 %d\n",
1287  pkt->opcode);
1288  r100_cs_dump_packet(p, pkt);
1289  return r;
1290  }
1291  ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
1292  track->arrays[i + 1].robj = reloc->robj;
1293  track->arrays[i + 1].esize = idx_value >> 24;
1294  track->arrays[i + 1].esize &= 0x7F;
1295  }
1296  if (c & 1) {
1297  r = r100_cs_packet_next_reloc(p, &reloc);
1298  if (r) {
1299  DRM_ERROR("No reloc for packet3 %d\n",
1300  pkt->opcode);
1301  r100_cs_dump_packet(p, pkt);
1302  return r;
1303  }
1304  idx_value = radeon_get_ib_value(p, idx);
1305  ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1306  track->arrays[i + 0].robj = reloc->robj;
1307  track->arrays[i + 0].esize = idx_value >> 8;
1308  track->arrays[i + 0].esize &= 0x7F;
1309  }
1310  return r;
1311 }
1312 
1314  struct radeon_cs_packet *pkt,
1315  const unsigned *auth, unsigned n,
1317 {
1318  unsigned reg;
1319  unsigned i, j, m;
1320  unsigned idx;
1321  int r;
1322 
1323  idx = pkt->idx + 1;
1324  reg = pkt->reg;
1325  /* Check that register fall into register range
1326  * determined by the number of entry (n) in the
1327  * safe register bitmap.
1328  */
1329  if (pkt->one_reg_wr) {
1330  if ((reg >> 7) > n) {
1331  return -EINVAL;
1332  }
1333  } else {
1334  if (((reg + (pkt->count << 2)) >> 7) > n) {
1335  return -EINVAL;
1336  }
1337  }
1338  for (i = 0; i <= pkt->count; i++, idx++) {
1339  j = (reg >> 7);
1340  m = 1 << ((reg >> 2) & 31);
1341  if (auth[j] & m) {
1342  r = check(p, pkt, idx, reg);
1343  if (r) {
1344  return r;
1345  }
1346  }
1347  if (pkt->one_reg_wr) {
1348  if (!(auth[j] & m)) {
1349  break;
1350  }
1351  } else {
1352  reg += 4;
1353  }
1354  }
1355  return 0;
1356 }
1357 
1359  struct radeon_cs_packet *pkt)
1360 {
1361  volatile uint32_t *ib;
1362  unsigned i;
1363  unsigned idx;
1364 
1365  ib = p->ib.ptr;
1366  idx = pkt->idx;
1367  for (i = 0; i <= (pkt->count + 1); i++, idx++) {
1368  DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
1369  }
1370 }
1371 
1381  struct radeon_cs_packet *pkt,
1382  unsigned idx)
1383 {
1384  struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
1385  uint32_t header;
1386 
1387  if (idx >= ib_chunk->length_dw) {
1388  DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1389  idx, ib_chunk->length_dw);
1390  return -EINVAL;
1391  }
1392  header = radeon_get_ib_value(p, idx);
1393  pkt->idx = idx;
1394  pkt->type = CP_PACKET_GET_TYPE(header);
1395  pkt->count = CP_PACKET_GET_COUNT(header);
1396  switch (pkt->type) {
1397  case PACKET_TYPE0:
1398  pkt->reg = CP_PACKET0_GET_REG(header);
1399  pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
1400  break;
1401  case PACKET_TYPE3:
1402  pkt->opcode = CP_PACKET3_GET_OPCODE(header);
1403  break;
1404  case PACKET_TYPE2:
1405  pkt->count = -1;
1406  break;
1407  default:
1408  DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
1409  return -EINVAL;
1410  }
1411  if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
1412  DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1413  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
1414  return -EINVAL;
1415  }
1416  return 0;
1417 }
1418 
1434 {
1435  struct drm_mode_object *obj;
1436  struct drm_crtc *crtc;
1437  struct radeon_crtc *radeon_crtc;
1438  struct radeon_cs_packet p3reloc, waitreloc;
1439  int crtc_id;
1440  int r;
1441  uint32_t header, h_idx, reg;
1442  volatile uint32_t *ib;
1443 
1444  ib = p->ib.ptr;
1445 
1446  /* parse the wait until */
1447  r = r100_cs_packet_parse(p, &waitreloc, p->idx);
1448  if (r)
1449  return r;
1450 
1451  /* check its a wait until and only 1 count */
1452  if (waitreloc.reg != RADEON_WAIT_UNTIL ||
1453  waitreloc.count != 0) {
1454  DRM_ERROR("vline wait had illegal wait until segment\n");
1455  return -EINVAL;
1456  }
1457 
1458  if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
1459  DRM_ERROR("vline wait had illegal wait until\n");
1460  return -EINVAL;
1461  }
1462 
1463  /* jump over the NOP */
1464  r = r100_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
1465  if (r)
1466  return r;
1467 
1468  h_idx = p->idx - 2;
1469  p->idx += waitreloc.count + 2;
1470  p->idx += p3reloc.count + 2;
1471 
1472  header = radeon_get_ib_value(p, h_idx);
1473  crtc_id = radeon_get_ib_value(p, h_idx + 5);
1474  reg = CP_PACKET0_GET_REG(header);
1475  obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
1476  if (!obj) {
1477  DRM_ERROR("cannot find crtc %d\n", crtc_id);
1478  return -EINVAL;
1479  }
1480  crtc = obj_to_crtc(obj);
1481  radeon_crtc = to_radeon_crtc(crtc);
1482  crtc_id = radeon_crtc->crtc_id;
1483 
1484  if (!crtc->enabled) {
1485  /* if the CRTC isn't enabled - we need to nop out the wait until */
1486  ib[h_idx + 2] = PACKET2(0);
1487  ib[h_idx + 3] = PACKET2(0);
1488  } else if (crtc_id == 1) {
1489  switch (reg) {
1491  header &= ~R300_CP_PACKET0_REG_MASK;
1492  header |= AVIVO_D2MODE_VLINE_START_END >> 2;
1493  break;
1495  header &= ~R300_CP_PACKET0_REG_MASK;
1496  header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
1497  break;
1498  default:
1499  DRM_ERROR("unknown crtc reloc\n");
1500  return -EINVAL;
1501  }
1502  ib[h_idx] = header;
1503  ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
1504  }
1505 
1506  return 0;
1507 }
1508 
1521  struct radeon_cs_reloc **cs_reloc)
1522 {
1523  struct radeon_cs_chunk *relocs_chunk;
1524  struct radeon_cs_packet p3reloc;
1525  unsigned idx;
1526  int r;
1527 
1528  if (p->chunk_relocs_idx == -1) {
1529  DRM_ERROR("No relocation chunk !\n");
1530  return -EINVAL;
1531  }
1532  *cs_reloc = NULL;
1533  relocs_chunk = &p->chunks[p->chunk_relocs_idx];
1534  r = r100_cs_packet_parse(p, &p3reloc, p->idx);
1535  if (r) {
1536  return r;
1537  }
1538  p->idx += p3reloc.count + 2;
1539  if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
1540  DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1541  p3reloc.idx);
1542  r100_cs_dump_packet(p, &p3reloc);
1543  return -EINVAL;
1544  }
1545  idx = radeon_get_ib_value(p, p3reloc.idx + 1);
1546  if (idx >= relocs_chunk->length_dw) {
1547  DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1548  idx, relocs_chunk->length_dw);
1549  r100_cs_dump_packet(p, &p3reloc);
1550  return -EINVAL;
1551  }
1552  /* FIXME: we assume reloc size is 4 dwords */
1553  *cs_reloc = p->relocs_ptr[(idx / 4)];
1554  return 0;
1555 }
1556 
1557 static int r100_get_vtx_size(uint32_t vtx_fmt)
1558 {
1559  int vtx_size;
1560  vtx_size = 2;
1561  /* ordered according to bits in spec */
1562  if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
1563  vtx_size++;
1564  if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
1565  vtx_size += 3;
1566  if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
1567  vtx_size++;
1568  if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
1569  vtx_size++;
1570  if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
1571  vtx_size += 3;
1572  if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
1573  vtx_size++;
1574  if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
1575  vtx_size++;
1576  if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
1577  vtx_size += 2;
1578  if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
1579  vtx_size += 2;
1580  if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
1581  vtx_size++;
1582  if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
1583  vtx_size += 2;
1584  if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
1585  vtx_size++;
1586  if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
1587  vtx_size += 2;
1588  if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
1589  vtx_size++;
1590  if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
1591  vtx_size++;
1592  /* blend weight */
1593  if (vtx_fmt & (0x7 << 15))
1594  vtx_size += (vtx_fmt >> 15) & 0x7;
1595  if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
1596  vtx_size += 3;
1597  if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
1598  vtx_size += 2;
1599  if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
1600  vtx_size++;
1601  if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
1602  vtx_size++;
1603  if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
1604  vtx_size++;
1605  if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
1606  vtx_size++;
1607  return vtx_size;
1608 }
1609 
1610 static int r100_packet0_check(struct radeon_cs_parser *p,
1611  struct radeon_cs_packet *pkt,
1612  unsigned idx, unsigned reg)
1613 {
1614  struct radeon_cs_reloc *reloc;
1615  struct r100_cs_track *track;
1616  volatile uint32_t *ib;
1617  uint32_t tmp;
1618  int r;
1619  int i, face;
1620  u32 tile_flags = 0;
1621  u32 idx_value;
1622 
1623  ib = p->ib.ptr;
1624  track = (struct r100_cs_track *)p->track;
1625 
1626  idx_value = radeon_get_ib_value(p, idx);
1627 
1628  switch (reg) {
1631  if (r) {
1632  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1633  idx, reg);
1634  r100_cs_dump_packet(p, pkt);
1635  return r;
1636  }
1637  break;
1638  /* FIXME: only allow PACKET3 blit? easier to check for out of
1639  * range access */
1642  r = r100_reloc_pitch_offset(p, pkt, idx, reg);
1643  if (r)
1644  return r;
1645  break;
1647  r = r100_cs_packet_next_reloc(p, &reloc);
1648  if (r) {
1649  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1650  idx, reg);
1651  r100_cs_dump_packet(p, pkt);
1652  return r;
1653  }
1654  track->zb.robj = reloc->robj;
1655  track->zb.offset = idx_value;
1656  track->zb_dirty = true;
1657  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1658  break;
1660  r = r100_cs_packet_next_reloc(p, &reloc);
1661  if (r) {
1662  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1663  idx, reg);
1664  r100_cs_dump_packet(p, pkt);
1665  return r;
1666  }
1667  track->cb[0].robj = reloc->robj;
1668  track->cb[0].offset = idx_value;
1669  track->cb_dirty = true;
1670  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1671  break;
1672  case RADEON_PP_TXOFFSET_0:
1673  case RADEON_PP_TXOFFSET_1:
1674  case RADEON_PP_TXOFFSET_2:
1675  i = (reg - RADEON_PP_TXOFFSET_0) / 24;
1676  r = r100_cs_packet_next_reloc(p, &reloc);
1677  if (r) {
1678  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1679  idx, reg);
1680  r100_cs_dump_packet(p, pkt);
1681  return r;
1682  }
1683  if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1684  if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1685  tile_flags |= RADEON_TXO_MACRO_TILE;
1686  if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1687  tile_flags |= RADEON_TXO_MICRO_TILE_X2;
1688 
1689  tmp = idx_value & ~(0x7 << 2);
1690  tmp |= tile_flags;
1691  ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset);
1692  } else
1693  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1694  track->textures[i].robj = reloc->robj;
1695  track->tex_dirty = true;
1696  break;
1702  i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
1703  r = r100_cs_packet_next_reloc(p, &reloc);
1704  if (r) {
1705  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1706  idx, reg);
1707  r100_cs_dump_packet(p, pkt);
1708  return r;
1709  }
1710  track->textures[0].cube_info[i].offset = idx_value;
1711  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1712  track->textures[0].cube_info[i].robj = reloc->robj;
1713  track->tex_dirty = true;
1714  break;
1720  i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
1721  r = r100_cs_packet_next_reloc(p, &reloc);
1722  if (r) {
1723  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1724  idx, reg);
1725  r100_cs_dump_packet(p, pkt);
1726  return r;
1727  }
1728  track->textures[1].cube_info[i].offset = idx_value;
1729  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1730  track->textures[1].cube_info[i].robj = reloc->robj;
1731  track->tex_dirty = true;
1732  break;
1738  i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
1739  r = r100_cs_packet_next_reloc(p, &reloc);
1740  if (r) {
1741  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1742  idx, reg);
1743  r100_cs_dump_packet(p, pkt);
1744  return r;
1745  }
1746  track->textures[2].cube_info[i].offset = idx_value;
1747  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1748  track->textures[2].cube_info[i].robj = reloc->robj;
1749  track->tex_dirty = true;
1750  break;
1752  track->maxy = ((idx_value >> 16) & 0x7FF);
1753  track->cb_dirty = true;
1754  track->zb_dirty = true;
1755  break;
1757  r = r100_cs_packet_next_reloc(p, &reloc);
1758  if (r) {
1759  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1760  idx, reg);
1761  r100_cs_dump_packet(p, pkt);
1762  return r;
1763  }
1764  if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1765  if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1766  tile_flags |= RADEON_COLOR_TILE_ENABLE;
1767  if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1768  tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
1769 
1770  tmp = idx_value & ~(0x7 << 16);
1771  tmp |= tile_flags;
1772  ib[idx] = tmp;
1773  } else
1774  ib[idx] = idx_value;
1775 
1776  track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
1777  track->cb_dirty = true;
1778  break;
1780  track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
1781  track->zb_dirty = true;
1782  break;
1783  case RADEON_RB3D_CNTL:
1784  switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
1785  case 7:
1786  case 8:
1787  case 9:
1788  case 11:
1789  case 12:
1790  track->cb[0].cpp = 1;
1791  break;
1792  case 3:
1793  case 4:
1794  case 15:
1795  track->cb[0].cpp = 2;
1796  break;
1797  case 6:
1798  track->cb[0].cpp = 4;
1799  break;
1800  default:
1801  DRM_ERROR("Invalid color buffer format (%d) !\n",
1802  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
1803  return -EINVAL;
1804  }
1805  track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
1806  track->cb_dirty = true;
1807  track->zb_dirty = true;
1808  break;
1810  switch (idx_value & 0xf) {
1811  case 0:
1812  track->zb.cpp = 2;
1813  break;
1814  case 2:
1815  case 3:
1816  case 4:
1817  case 5:
1818  case 9:
1819  case 11:
1820  track->zb.cpp = 4;
1821  break;
1822  default:
1823  break;
1824  }
1825  track->zb_dirty = true;
1826  break;
1828  r = r100_cs_packet_next_reloc(p, &reloc);
1829  if (r) {
1830  DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1831  idx, reg);
1832  r100_cs_dump_packet(p, pkt);
1833  return r;
1834  }
1835  ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
1836  break;
1837  case RADEON_PP_CNTL:
1838  {
1839  uint32_t temp = idx_value >> 4;
1840  for (i = 0; i < track->num_texture; i++)
1841  track->textures[i].enabled = !!(temp & (1 << i));
1842  track->tex_dirty = true;
1843  }
1844  break;
1845  case RADEON_SE_VF_CNTL:
1846  track->vap_vf_cntl = idx_value;
1847  break;
1848  case RADEON_SE_VTX_FMT:
1849  track->vtx_size = r100_get_vtx_size(idx_value);
1850  break;
1851  case RADEON_PP_TEX_SIZE_0:
1852  case RADEON_PP_TEX_SIZE_1:
1853  case RADEON_PP_TEX_SIZE_2:
1854  i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
1855  track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
1856  track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
1857  track->tex_dirty = true;
1858  break;
1859  case RADEON_PP_TEX_PITCH_0:
1860  case RADEON_PP_TEX_PITCH_1:
1861  case RADEON_PP_TEX_PITCH_2:
1862  i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
1863  track->textures[i].pitch = idx_value + 32;
1864  track->tex_dirty = true;
1865  break;
1866  case RADEON_PP_TXFILTER_0:
1867  case RADEON_PP_TXFILTER_1:
1868  case RADEON_PP_TXFILTER_2:
1869  i = (reg - RADEON_PP_TXFILTER_0) / 24;
1870  track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
1872  tmp = (idx_value >> 23) & 0x7;
1873  if (tmp == 2 || tmp == 6)
1874  track->textures[i].roundup_w = false;
1875  tmp = (idx_value >> 27) & 0x7;
1876  if (tmp == 2 || tmp == 6)
1877  track->textures[i].roundup_h = false;
1878  track->tex_dirty = true;
1879  break;
1880  case RADEON_PP_TXFORMAT_0:
1881  case RADEON_PP_TXFORMAT_1:
1882  case RADEON_PP_TXFORMAT_2:
1883  i = (reg - RADEON_PP_TXFORMAT_0) / 24;
1884  if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
1885  track->textures[i].use_pitch = 1;
1886  } else {
1887  track->textures[i].use_pitch = 0;
1888  track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
1889  track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
1890  }
1891  if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
1892  track->textures[i].tex_coord_type = 2;
1893  switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
1894  case RADEON_TXFORMAT_I8:
1896  case RADEON_TXFORMAT_Y8:
1897  track->textures[i].cpp = 1;
1898  track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1899  break;
1900  case RADEON_TXFORMAT_AI88:
1909  track->textures[i].cpp = 2;
1910  track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1911  break;
1916  track->textures[i].cpp = 4;
1917  track->textures[i].compress_format = R100_TRACK_COMP_NONE;
1918  break;
1919  case RADEON_TXFORMAT_DXT1:
1920  track->textures[i].cpp = 1;
1921  track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
1922  break;
1923  case RADEON_TXFORMAT_DXT23:
1924  case RADEON_TXFORMAT_DXT45:
1925  track->textures[i].cpp = 1;
1926  track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
1927  break;
1928  }
1929  track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
1930  track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
1931  track->tex_dirty = true;
1932  break;
1936  tmp = idx_value;
1937  i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
1938  for (face = 0; face < 4; face++) {
1939  track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
1940  track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
1941  }
1942  track->tex_dirty = true;
1943  break;
1944  default:
1945  printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1946  reg, idx);
1947  return -EINVAL;
1948  }
1949  return 0;
1950 }
1951 
1953  struct radeon_cs_packet *pkt,
1954  struct radeon_bo *robj)
1955 {
1956  unsigned idx;
1957  u32 value;
1958  idx = pkt->idx + 1;
1959  value = radeon_get_ib_value(p, idx + 2);
1960  if ((value + 1) > radeon_bo_size(robj)) {
1961  DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
1962  "(need %u have %lu) !\n",
1963  value + 1,
1964  radeon_bo_size(robj));
1965  return -EINVAL;
1966  }
1967  return 0;
1968 }
1969 
1970 static int r100_packet3_check(struct radeon_cs_parser *p,
1971  struct radeon_cs_packet *pkt)
1972 {
1973  struct radeon_cs_reloc *reloc;
1974  struct r100_cs_track *track;
1975  unsigned idx;
1976  volatile uint32_t *ib;
1977  int r;
1978 
1979  ib = p->ib.ptr;
1980  idx = pkt->idx + 1;
1981  track = (struct r100_cs_track *)p->track;
1982  switch (pkt->opcode) {
1984  r = r100_packet3_load_vbpntr(p, pkt, idx);
1985  if (r)
1986  return r;
1987  break;
1988  case PACKET3_INDX_BUFFER:
1989  r = r100_cs_packet_next_reloc(p, &reloc);
1990  if (r) {
1991  DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1992  r100_cs_dump_packet(p, pkt);
1993  return r;
1994  }
1995  ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset);
1996  r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1997  if (r) {
1998  return r;
1999  }
2000  break;
2001  case 0x23:
2002  /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
2003  r = r100_cs_packet_next_reloc(p, &reloc);
2004  if (r) {
2005  DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
2006  r100_cs_dump_packet(p, pkt);
2007  return r;
2008  }
2009  ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset);
2010  track->num_arrays = 1;
2011  track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
2012 
2013  track->arrays[0].robj = reloc->robj;
2014  track->arrays[0].esize = track->vtx_size;
2015 
2016  track->max_indx = radeon_get_ib_value(p, idx+1);
2017 
2018  track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
2019  track->immd_dwords = pkt->count - 1;
2020  r = r100_cs_track_check(p->rdev, track);
2021  if (r)
2022  return r;
2023  break;
2024  case PACKET3_3D_DRAW_IMMD:
2025  if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
2026  DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
2027  return -EINVAL;
2028  }
2029  track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
2030  track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2031  track->immd_dwords = pkt->count - 1;
2032  r = r100_cs_track_check(p->rdev, track);
2033  if (r)
2034  return r;
2035  break;
2036  /* triggers drawing using in-packet vertex data */
2038  if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
2039  DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
2040  return -EINVAL;
2041  }
2042  track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2043  track->immd_dwords = pkt->count;
2044  r = r100_cs_track_check(p->rdev, track);
2045  if (r)
2046  return r;
2047  break;
2048  /* triggers drawing using in-packet vertex data */
2050  track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2051  r = r100_cs_track_check(p->rdev, track);
2052  if (r)
2053  return r;
2054  break;
2055  /* triggers drawing of vertex buffers setup elsewhere */
2057  track->vap_vf_cntl = radeon_get_ib_value(p, idx);
2058  r = r100_cs_track_check(p->rdev, track);
2059  if (r)
2060  return r;
2061  break;
2062  /* triggers drawing using indices to vertex buffer */
2063  case PACKET3_3D_DRAW_VBUF:
2064  track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2065  r = r100_cs_track_check(p->rdev, track);
2066  if (r)
2067  return r;
2068  break;
2069  /* triggers drawing of vertex buffers setup elsewhere */
2070  case PACKET3_3D_DRAW_INDX:
2071  track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
2072  r = r100_cs_track_check(p->rdev, track);
2073  if (r)
2074  return r;
2075  break;
2076  /* triggers drawing using indices to vertex buffer */
2077  case PACKET3_3D_CLEAR_HIZ:
2079  if (p->rdev->hyperz_filp != p->filp)
2080  return -EINVAL;
2081  break;
2082  case PACKET3_NOP:
2083  break;
2084  default:
2085  DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2086  return -EINVAL;
2087  }
2088  return 0;
2089 }
2090 
2092 {
2093  struct radeon_cs_packet pkt;
2094  struct r100_cs_track *track;
2095  int r;
2096 
2097  track = kzalloc(sizeof(*track), GFP_KERNEL);
2098  if (!track)
2099  return -ENOMEM;
2100  r100_cs_track_clear(p->rdev, track);
2101  p->track = track;
2102  do {
2103  r = r100_cs_packet_parse(p, &pkt, p->idx);
2104  if (r) {
2105  return r;
2106  }
2107  p->idx += pkt.count + 2;
2108  switch (pkt.type) {
2109  case PACKET_TYPE0:
2110  if (p->rdev->family >= CHIP_R200)
2111  r = r100_cs_parse_packet0(p, &pkt,
2112  p->rdev->config.r100.reg_safe_bm,
2113  p->rdev->config.r100.reg_safe_bm_size,
2115  else
2116  r = r100_cs_parse_packet0(p, &pkt,
2117  p->rdev->config.r100.reg_safe_bm,
2118  p->rdev->config.r100.reg_safe_bm_size,
2119  &r100_packet0_check);
2120  break;
2121  case PACKET_TYPE2:
2122  break;
2123  case PACKET_TYPE3:
2124  r = r100_packet3_check(p, &pkt);
2125  break;
2126  default:
2127  DRM_ERROR("Unknown packet type %d !\n",
2128  pkt.type);
2129  return -EINVAL;
2130  }
2131  if (r) {
2132  return r;
2133  }
2134  } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2135  return 0;
2136 }
2137 
2138 static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2139 {
2140  DRM_ERROR("pitch %d\n", t->pitch);
2141  DRM_ERROR("use_pitch %d\n", t->use_pitch);
2142  DRM_ERROR("width %d\n", t->width);
2143  DRM_ERROR("width_11 %d\n", t->width_11);
2144  DRM_ERROR("height %d\n", t->height);
2145  DRM_ERROR("height_11 %d\n", t->height_11);
2146  DRM_ERROR("num levels %d\n", t->num_levels);
2147  DRM_ERROR("depth %d\n", t->txdepth);
2148  DRM_ERROR("bpp %d\n", t->cpp);
2149  DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
2150  DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
2151  DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2152  DRM_ERROR("compress format %d\n", t->compress_format);
2153 }
2154 
2155 static int r100_track_compress_size(int compress_format, int w, int h)
2156 {
2157  int block_width, block_height, block_bytes;
2158  int wblocks, hblocks;
2159  int min_wblocks;
2160  int sz;
2161 
2162  block_width = 4;
2163  block_height = 4;
2164 
2165  switch (compress_format) {
2166  case R100_TRACK_COMP_DXT1:
2167  block_bytes = 8;
2168  min_wblocks = 4;
2169  break;
2170  default:
2171  case R100_TRACK_COMP_DXT35:
2172  block_bytes = 16;
2173  min_wblocks = 2;
2174  break;
2175  }
2176 
2177  hblocks = (h + block_height - 1) / block_height;
2178  wblocks = (w + block_width - 1) / block_width;
2179  if (wblocks < min_wblocks)
2180  wblocks = min_wblocks;
2181  sz = wblocks * hblocks * block_bytes;
2182  return sz;
2183 }
2184 
2185 static int r100_cs_track_cube(struct radeon_device *rdev,
2186  struct r100_cs_track *track, unsigned idx)
2187 {
2188  unsigned face, w, h;
2189  struct radeon_bo *cube_robj;
2190  unsigned long size;
2191  unsigned compress_format = track->textures[idx].compress_format;
2192 
2193  for (face = 0; face < 5; face++) {
2194  cube_robj = track->textures[idx].cube_info[face].robj;
2195  w = track->textures[idx].cube_info[face].width;
2196  h = track->textures[idx].cube_info[face].height;
2197 
2198  if (compress_format) {
2199  size = r100_track_compress_size(compress_format, w, h);
2200  } else
2201  size = w * h;
2202  size *= track->textures[idx].cpp;
2203 
2204  size += track->textures[idx].cube_info[face].offset;
2205 
2206  if (size > radeon_bo_size(cube_robj)) {
2207  DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2208  size, radeon_bo_size(cube_robj));
2209  r100_cs_track_texture_print(&track->textures[idx]);
2210  return -1;
2211  }
2212  }
2213  return 0;
2214 }
2215 
2216 static int r100_cs_track_texture_check(struct radeon_device *rdev,
2217  struct r100_cs_track *track)
2218 {
2219  struct radeon_bo *robj;
2220  unsigned long size;
2221  unsigned u, i, w, h, d;
2222  int ret;
2223 
2224  for (u = 0; u < track->num_texture; u++) {
2225  if (!track->textures[u].enabled)
2226  continue;
2227  if (track->textures[u].lookup_disable)
2228  continue;
2229  robj = track->textures[u].robj;
2230  if (robj == NULL) {
2231  DRM_ERROR("No texture bound to unit %u\n", u);
2232  return -EINVAL;
2233  }
2234  size = 0;
2235  for (i = 0; i <= track->textures[u].num_levels; i++) {
2236  if (track->textures[u].use_pitch) {
2237  if (rdev->family < CHIP_R300)
2238  w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2239  else
2240  w = track->textures[u].pitch / (1 << i);
2241  } else {
2242  w = track->textures[u].width;
2243  if (rdev->family >= CHIP_RV515)
2244  w |= track->textures[u].width_11;
2245  w = w / (1 << i);
2246  if (track->textures[u].roundup_w)
2247  w = roundup_pow_of_two(w);
2248  }
2249  h = track->textures[u].height;
2250  if (rdev->family >= CHIP_RV515)
2251  h |= track->textures[u].height_11;
2252  h = h / (1 << i);
2253  if (track->textures[u].roundup_h)
2254  h = roundup_pow_of_two(h);
2255  if (track->textures[u].tex_coord_type == 1) {
2256  d = (1 << track->textures[u].txdepth) / (1 << i);
2257  if (!d)
2258  d = 1;
2259  } else {
2260  d = 1;
2261  }
2262  if (track->textures[u].compress_format) {
2263 
2264  size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2265  /* compressed textures are block based */
2266  } else
2267  size += w * h * d;
2268  }
2269  size *= track->textures[u].cpp;
2270 
2271  switch (track->textures[u].tex_coord_type) {
2272  case 0:
2273  case 1:
2274  break;
2275  case 2:
2276  if (track->separate_cube) {
2277  ret = r100_cs_track_cube(rdev, track, u);
2278  if (ret)
2279  return ret;
2280  } else
2281  size *= 6;
2282  break;
2283  default:
2284  DRM_ERROR("Invalid texture coordinate type %u for unit "
2285  "%u\n", track->textures[u].tex_coord_type, u);
2286  return -EINVAL;
2287  }
2288  if (size > radeon_bo_size(robj)) {
2289  DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2290  "%lu\n", u, size, radeon_bo_size(robj));
2291  r100_cs_track_texture_print(&track->textures[u]);
2292  return -EINVAL;
2293  }
2294  }
2295  return 0;
2296 }
2297 
2298 int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2299 {
2300  unsigned i;
2301  unsigned long size;
2302  unsigned prim_walk;
2303  unsigned nverts;
2304  unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2305 
2306  if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2307  !track->blend_read_enable)
2308  num_cb = 0;
2309 
2310  for (i = 0; i < num_cb; i++) {
2311  if (track->cb[i].robj == NULL) {
2312  DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2313  return -EINVAL;
2314  }
2315  size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2316  size += track->cb[i].offset;
2317  if (size > radeon_bo_size(track->cb[i].robj)) {
2318  DRM_ERROR("[drm] Buffer too small for color buffer %d "
2319  "(need %lu have %lu) !\n", i, size,
2320  radeon_bo_size(track->cb[i].robj));
2321  DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2322  i, track->cb[i].pitch, track->cb[i].cpp,
2323  track->cb[i].offset, track->maxy);
2324  return -EINVAL;
2325  }
2326  }
2327  track->cb_dirty = false;
2328 
2329  if (track->zb_dirty && track->z_enabled) {
2330  if (track->zb.robj == NULL) {
2331  DRM_ERROR("[drm] No buffer for z buffer !\n");
2332  return -EINVAL;
2333  }
2334  size = track->zb.pitch * track->zb.cpp * track->maxy;
2335  size += track->zb.offset;
2336  if (size > radeon_bo_size(track->zb.robj)) {
2337  DRM_ERROR("[drm] Buffer too small for z buffer "
2338  "(need %lu have %lu) !\n", size,
2339  radeon_bo_size(track->zb.robj));
2340  DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2341  track->zb.pitch, track->zb.cpp,
2342  track->zb.offset, track->maxy);
2343  return -EINVAL;
2344  }
2345  }
2346  track->zb_dirty = false;
2347 
2348  if (track->aa_dirty && track->aaresolve) {
2349  if (track->aa.robj == NULL) {
2350  DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2351  return -EINVAL;
2352  }
2353  /* I believe the format comes from colorbuffer0. */
2354  size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2355  size += track->aa.offset;
2356  if (size > radeon_bo_size(track->aa.robj)) {
2357  DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2358  "(need %lu have %lu) !\n", i, size,
2359  radeon_bo_size(track->aa.robj));
2360  DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2361  i, track->aa.pitch, track->cb[0].cpp,
2362  track->aa.offset, track->maxy);
2363  return -EINVAL;
2364  }
2365  }
2366  track->aa_dirty = false;
2367 
2368  prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2369  if (track->vap_vf_cntl & (1 << 14)) {
2370  nverts = track->vap_alt_nverts;
2371  } else {
2372  nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2373  }
2374  switch (prim_walk) {
2375  case 1:
2376  for (i = 0; i < track->num_arrays; i++) {
2377  size = track->arrays[i].esize * track->max_indx * 4;
2378  if (track->arrays[i].robj == NULL) {
2379  DRM_ERROR("(PW %u) Vertex array %u no buffer "
2380  "bound\n", prim_walk, i);
2381  return -EINVAL;
2382  }
2383  if (size > radeon_bo_size(track->arrays[i].robj)) {
2384  dev_err(rdev->dev, "(PW %u) Vertex array %u "
2385  "need %lu dwords have %lu dwords\n",
2386  prim_walk, i, size >> 2,
2387  radeon_bo_size(track->arrays[i].robj)
2388  >> 2);
2389  DRM_ERROR("Max indices %u\n", track->max_indx);
2390  return -EINVAL;
2391  }
2392  }
2393  break;
2394  case 2:
2395  for (i = 0; i < track->num_arrays; i++) {
2396  size = track->arrays[i].esize * (nverts - 1) * 4;
2397  if (track->arrays[i].robj == NULL) {
2398  DRM_ERROR("(PW %u) Vertex array %u no buffer "
2399  "bound\n", prim_walk, i);
2400  return -EINVAL;
2401  }
2402  if (size > radeon_bo_size(track->arrays[i].robj)) {
2403  dev_err(rdev->dev, "(PW %u) Vertex array %u "
2404  "need %lu dwords have %lu dwords\n",
2405  prim_walk, i, size >> 2,
2406  radeon_bo_size(track->arrays[i].robj)
2407  >> 2);
2408  return -EINVAL;
2409  }
2410  }
2411  break;
2412  case 3:
2413  size = track->vtx_size * nverts;
2414  if (size != track->immd_dwords) {
2415  DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2416  track->immd_dwords, size);
2417  DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2418  nverts, track->vtx_size);
2419  return -EINVAL;
2420  }
2421  break;
2422  default:
2423  DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2424  prim_walk);
2425  return -EINVAL;
2426  }
2427 
2428  if (track->tex_dirty) {
2429  track->tex_dirty = false;
2430  return r100_cs_track_texture_check(rdev, track);
2431  }
2432  return 0;
2433 }
2434 
2435 void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2436 {
2437  unsigned i, face;
2438 
2439  track->cb_dirty = true;
2440  track->zb_dirty = true;
2441  track->tex_dirty = true;
2442  track->aa_dirty = true;
2443 
2444  if (rdev->family < CHIP_R300) {
2445  track->num_cb = 1;
2446  if (rdev->family <= CHIP_RS200)
2447  track->num_texture = 3;
2448  else
2449  track->num_texture = 6;
2450  track->maxy = 2048;
2451  track->separate_cube = 1;
2452  } else {
2453  track->num_cb = 4;
2454  track->num_texture = 16;
2455  track->maxy = 4096;
2456  track->separate_cube = 0;
2457  track->aaresolve = false;
2458  track->aa.robj = NULL;
2459  }
2460 
2461  for (i = 0; i < track->num_cb; i++) {
2462  track->cb[i].robj = NULL;
2463  track->cb[i].pitch = 8192;
2464  track->cb[i].cpp = 16;
2465  track->cb[i].offset = 0;
2466  }
2467  track->z_enabled = true;
2468  track->zb.robj = NULL;
2469  track->zb.pitch = 8192;
2470  track->zb.cpp = 4;
2471  track->zb.offset = 0;
2472  track->vtx_size = 0x7F;
2473  track->immd_dwords = 0xFFFFFFFFUL;
2474  track->num_arrays = 11;
2475  track->max_indx = 0x00FFFFFFUL;
2476  for (i = 0; i < track->num_arrays; i++) {
2477  track->arrays[i].robj = NULL;
2478  track->arrays[i].esize = 0x7F;
2479  }
2480  for (i = 0; i < track->num_texture; i++) {
2481  track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2482  track->textures[i].pitch = 16536;
2483  track->textures[i].width = 16536;
2484  track->textures[i].height = 16536;
2485  track->textures[i].width_11 = 1 << 11;
2486  track->textures[i].height_11 = 1 << 11;
2487  track->textures[i].num_levels = 12;
2488  if (rdev->family <= CHIP_RS200) {
2489  track->textures[i].tex_coord_type = 0;
2490  track->textures[i].txdepth = 0;
2491  } else {
2492  track->textures[i].txdepth = 16;
2493  track->textures[i].tex_coord_type = 1;
2494  }
2495  track->textures[i].cpp = 64;
2496  track->textures[i].robj = NULL;
2497  /* CS IB emission code makes sure texture unit are disabled */
2498  track->textures[i].enabled = false;
2499  track->textures[i].lookup_disable = false;
2500  track->textures[i].roundup_w = true;
2501  track->textures[i].roundup_h = true;
2502  if (track->separate_cube)
2503  for (face = 0; face < 5; face++) {
2504  track->textures[i].cube_info[face].robj = NULL;
2505  track->textures[i].cube_info[face].width = 16536;
2506  track->textures[i].cube_info[face].height = 16536;
2507  track->textures[i].cube_info[face].offset = 0;
2508  }
2509  }
2510 }
2511 
2512 /*
2513  * Global GPU functions
2514  */
2515 static void r100_errata(struct radeon_device *rdev)
2516 {
2517  rdev->pll_errata = 0;
2518 
2519  if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
2521  }
2522 
2523  if (rdev->family == CHIP_RV100 ||
2524  rdev->family == CHIP_RS100 ||
2525  rdev->family == CHIP_RS200) {
2527  }
2528 }
2529 
2530 static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
2531 {
2532  unsigned i;
2533  uint32_t tmp;
2534 
2535  for (i = 0; i < rdev->usec_timeout; i++) {
2537  if (tmp >= n) {
2538  return 0;
2539  }
2540  DRM_UDELAY(1);
2541  }
2542  return -1;
2543 }
2544 
2546 {
2547  unsigned i;
2548  uint32_t tmp;
2549 
2550  if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
2551  printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
2552  " Bad things might happen.\n");
2553  }
2554  for (i = 0; i < rdev->usec_timeout; i++) {
2555  tmp = RREG32(RADEON_RBBM_STATUS);
2556  if (!(tmp & RADEON_RBBM_ACTIVE)) {
2557  return 0;
2558  }
2559  DRM_UDELAY(1);
2560  }
2561  return -1;
2562 }
2563 
2565 {
2566  unsigned i;
2567  uint32_t tmp;
2568 
2569  for (i = 0; i < rdev->usec_timeout; i++) {
2570  /* read MC_STATUS */
2571  tmp = RREG32(RADEON_MC_STATUS);
2572  if (tmp & RADEON_MC_IDLE) {
2573  return 0;
2574  }
2575  DRM_UDELAY(1);
2576  }
2577  return -1;
2578 }
2579 
2581 {
2582  u32 rbbm_status;
2583 
2584  rbbm_status = RREG32(R_000E40_RBBM_STATUS);
2585  if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
2587  return false;
2588  }
2589  /* force CP activities */
2590  radeon_ring_force_activity(rdev, ring);
2591  return radeon_ring_test_lockup(rdev, ring);
2592 }
2593 
2594 /* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2595 void r100_enable_bm(struct radeon_device *rdev)
2596 {
2597  uint32_t tmp;
2598  /* Enable bus mastering */
2600  WREG32(RADEON_BUS_CNTL, tmp);
2601 }
2602 
2603 void r100_bm_disable(struct radeon_device *rdev)
2604 {
2605  u32 tmp;
2606 
2607  /* disable bus mastering */
2608  tmp = RREG32(R_000030_BUS_CNTL);
2609  WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
2610  mdelay(1);
2611  WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
2612  mdelay(1);
2613  WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
2614  tmp = RREG32(RADEON_BUS_CNTL);
2615  mdelay(1);
2616  pci_clear_master(rdev->pdev);
2617  mdelay(1);
2618 }
2619 
2621 {
2622  struct r100_mc_save save;
2623  u32 status, tmp;
2624  int ret = 0;
2625 
2626  status = RREG32(R_000E40_RBBM_STATUS);
2627  if (!G_000E40_GUI_ACTIVE(status)) {
2628  return 0;
2629  }
2630  r100_mc_stop(rdev, &save);
2631  status = RREG32(R_000E40_RBBM_STATUS);
2632  dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2633  /* stop CP */
2635  tmp = RREG32(RADEON_CP_RB_CNTL);
2639  WREG32(RADEON_CP_RB_CNTL, tmp);
2640  /* save PCI state */
2641  pci_save_state(rdev->pdev);
2642  /* disable bus mastering */
2643  r100_bm_disable(rdev);
2649  mdelay(500);
2651  mdelay(1);
2652  status = RREG32(R_000E40_RBBM_STATUS);
2653  dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2654  /* reset CP */
2657  mdelay(500);
2659  mdelay(1);
2660  status = RREG32(R_000E40_RBBM_STATUS);
2661  dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
2662  /* restore PCI & busmastering */
2663  pci_restore_state(rdev->pdev);
2664  r100_enable_bm(rdev);
2665  /* Check if GPU is idle */
2666  if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
2667  G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
2668  dev_err(rdev->dev, "failed to reset GPU\n");
2669  ret = -1;
2670  } else
2671  dev_info(rdev->dev, "GPU reset succeed\n");
2672  r100_mc_resume(rdev, &save);
2673  return ret;
2674 }
2675 
2677 {
2678  struct drm_device *dev = rdev->ddev;
2679  bool force_dac2 = false;
2680  u32 tmp;
2681 
2682  /* set these so they don't interfere with anything */
2690 
2691  /* always set up dac2 on rn50 and some rv100 as lots
2692  * of servers seem to wire it up to a VGA port but
2693  * don't report it in the bios connector
2694  * table.
2695  */
2696  switch (dev->pdev->device) {
2697  /* RN50 */
2698  case 0x515e:
2699  case 0x5969:
2700  force_dac2 = true;
2701  break;
2702  /* RV100*/
2703  case 0x5159:
2704  case 0x515a:
2705  /* DELL triple head servers */
2706  if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
2707  ((dev->pdev->subsystem_device == 0x016c) ||
2708  (dev->pdev->subsystem_device == 0x016d) ||
2709  (dev->pdev->subsystem_device == 0x016e) ||
2710  (dev->pdev->subsystem_device == 0x016f) ||
2711  (dev->pdev->subsystem_device == 0x0170) ||
2712  (dev->pdev->subsystem_device == 0x017d) ||
2713  (dev->pdev->subsystem_device == 0x017e) ||
2714  (dev->pdev->subsystem_device == 0x0183) ||
2715  (dev->pdev->subsystem_device == 0x018a) ||
2716  (dev->pdev->subsystem_device == 0x019a)))
2717  force_dac2 = true;
2718  break;
2719  }
2720 
2721  if (force_dac2) {
2722  u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
2723  u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
2724  u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
2725 
2726  /* For CRT on DAC2, don't turn it on if BIOS didn't
2727  enable it, even it's detected.
2728  */
2729 
2730  /* force it to crtc0 */
2731  dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
2732  dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
2733  disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
2734 
2735  /* set up the TV DAC */
2736  tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
2743  tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
2746  (0x58 << 16));
2747 
2748  WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
2749  WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
2750  WREG32(RADEON_DAC_CNTL2, dac2_cntl);
2751  }
2752 
2753  /* switch PM block to ACPI mode */
2755  tmp &= ~RADEON_PM_MODE_SEL;
2757 
2758 }
2759 
2760 /*
2761  * VRAM info
2762  */
2763 static void r100_vram_get_type(struct radeon_device *rdev)
2764 {
2765  uint32_t tmp;
2766 
2767  rdev->mc.vram_is_ddr = false;
2768  if (rdev->flags & RADEON_IS_IGP)
2769  rdev->mc.vram_is_ddr = true;
2771  rdev->mc.vram_is_ddr = true;
2772  if ((rdev->family == CHIP_RV100) ||
2773  (rdev->family == CHIP_RS100) ||
2774  (rdev->family == CHIP_RS200)) {
2775  tmp = RREG32(RADEON_MEM_CNTL);
2776  if (tmp & RV100_HALF_MODE) {
2777  rdev->mc.vram_width = 32;
2778  } else {
2779  rdev->mc.vram_width = 64;
2780  }
2781  if (rdev->flags & RADEON_SINGLE_CRTC) {
2782  rdev->mc.vram_width /= 4;
2783  rdev->mc.vram_is_ddr = true;
2784  }
2785  } else if (rdev->family <= CHIP_RV280) {
2786  tmp = RREG32(RADEON_MEM_CNTL);
2787  if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
2788  rdev->mc.vram_width = 128;
2789  } else {
2790  rdev->mc.vram_width = 64;
2791  }
2792  } else {
2793  /* newer IGPs */
2794  rdev->mc.vram_width = 128;
2795  }
2796 }
2797 
2798 static u32 r100_get_accessible_vram(struct radeon_device *rdev)
2799 {
2800  u32 aper_size;
2801  u8 byte;
2802 
2803  aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2804 
2805  /* Set HDP_APER_CNTL only on cards that are known not to be broken,
2806  * that is has the 2nd generation multifunction PCI interface
2807  */
2808  if (rdev->family == CHIP_RV280 ||
2809  rdev->family >= CHIP_RV350) {
2812  DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
2813  return aper_size * 2;
2814  }
2815 
2816  /* Older cards have all sorts of funny issues to deal with. First
2817  * check if it's a multifunction card by reading the PCI config
2818  * header type... Limit those to one aperture size
2819  */
2820  pci_read_config_byte(rdev->pdev, 0xe, &byte);
2821  if (byte & 0x80) {
2822  DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
2823  DRM_INFO("Limiting VRAM to one aperture\n");
2824  return aper_size;
2825  }
2826 
2827  /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
2828  * have set it up. We don't write this as it's broken on some ASICs but
2829  * we expect the BIOS to have done the right thing (might be too optimistic...)
2830  */
2832  return aper_size * 2;
2833  return aper_size;
2834 }
2835 
2837 {
2838  u64 config_aper_size;
2839 
2840  /* work out accessible VRAM */
2841  rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2842  rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2843  rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
2844  /* FIXME we don't use the second aperture yet when we could use it */
2845  if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2846  rdev->mc.visible_vram_size = rdev->mc.aper_size;
2847  config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2848  if (rdev->flags & RADEON_IS_IGP) {
2849  uint32_t tom;
2850  /* read NB_TOM to get the amount of ram stolen for the GPU */
2851  tom = RREG32(RADEON_NB_TOM);
2852  rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
2853  WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2854  rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2855  } else {
2856  rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
2857  /* Some production boards of m6 will report 0
2858  * if it's 8 MB
2859  */
2860  if (rdev->mc.real_vram_size == 0) {
2861  rdev->mc.real_vram_size = 8192 * 1024;
2862  WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
2863  }
2864  /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
2865  * Novell bug 204882 + along with lots of ubuntu ones
2866  */
2867  if (rdev->mc.aper_size > config_aper_size)
2868  config_aper_size = rdev->mc.aper_size;
2869 
2870  if (config_aper_size > rdev->mc.real_vram_size)
2871  rdev->mc.mc_vram_size = config_aper_size;
2872  else
2873  rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
2874  }
2875 }
2876 
2877 void r100_vga_set_state(struct radeon_device *rdev, bool state)
2878 {
2879  uint32_t temp;
2880 
2881  temp = RREG32(RADEON_CONFIG_CNTL);
2882  if (state == false) {
2883  temp &= ~RADEON_CFG_VGA_RAM_EN;
2884  temp |= RADEON_CFG_VGA_IO_DIS;
2885  } else {
2886  temp &= ~RADEON_CFG_VGA_IO_DIS;
2887  }
2888  WREG32(RADEON_CONFIG_CNTL, temp);
2889 }
2890 
2891 static void r100_mc_init(struct radeon_device *rdev)
2892 {
2893  u64 base;
2894 
2895  r100_vram_get_type(rdev);
2896  r100_vram_init_sizes(rdev);
2897  base = rdev->mc.aper_base;
2898  if (rdev->flags & RADEON_IS_IGP)
2899  base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
2900  radeon_vram_location(rdev, &rdev->mc, base);
2901  rdev->mc.gtt_base_align = 0;
2902  if (!(rdev->flags & RADEON_IS_AGP))
2903  radeon_gtt_location(rdev, &rdev->mc);
2905 }
2906 
2907 
2908 /*
2909  * Indirect registers accessor
2910  */
2912 {
2913  if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
2916  }
2917 }
2918 
2919 static void r100_pll_errata_after_data(struct radeon_device *rdev)
2920 {
2921  /* This workarounds is necessary on RV100, RS100 and RS200 chips
2922  * or the chip could hang on a subsequent access
2923  */
2924  if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
2925  mdelay(5);
2926  }
2927 
2928  /* This function is required to workaround a hardware bug in some (all?)
2929  * revisions of the R300. This workaround should be called after every
2930  * CLOCK_CNTL_INDEX register access. If not, register reads afterward
2931  * may not be correct.
2932  */
2933  if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
2934  uint32_t save, tmp;
2935 
2937  tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
2941  }
2942 }
2943 
2945 {
2946  uint32_t data;
2947 
2948  WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
2951  r100_pll_errata_after_data(rdev);
2952  return data;
2953 }
2954 
2956 {
2957  WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
2960  r100_pll_errata_after_data(rdev);
2961 }
2962 
2963 static void r100_set_safe_registers(struct radeon_device *rdev)
2964 {
2965  if (ASIC_IS_RN50(rdev)) {
2966  rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
2967  rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
2968  } else if (rdev->family < CHIP_R200) {
2969  rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
2970  rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
2971  } else {
2973  }
2974 }
2975 
2976 /*
2977  * Debugfs info
2978  */
2979 #if defined(CONFIG_DEBUG_FS)
2980 static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
2981 {
2982  struct drm_info_node *node = (struct drm_info_node *) m->private;
2983  struct drm_device *dev = node->minor->dev;
2984  struct radeon_device *rdev = dev->dev_private;
2985  uint32_t reg, value;
2986  unsigned i;
2987 
2988  seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
2989  seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
2990  seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
2991  for (i = 0; i < 64; i++) {
2992  WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
2993  reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
2996  seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
2997  }
2998  return 0;
2999 }
3000 
3001 static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
3002 {
3003  struct drm_info_node *node = (struct drm_info_node *) m->private;
3004  struct drm_device *dev = node->minor->dev;
3005  struct radeon_device *rdev = dev->dev_private;
3006  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3007  uint32_t rdp, wdp;
3008  unsigned count, i, j;
3009 
3010  radeon_ring_free_size(rdev, ring);
3011  rdp = RREG32(RADEON_CP_RB_RPTR);
3012  wdp = RREG32(RADEON_CP_RB_WPTR);
3013  count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
3014  seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3015  seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
3016  seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
3017  seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
3018  seq_printf(m, "%u dwords in ring\n", count);
3019  for (j = 0; j <= count; j++) {
3020  i = (rdp + j) & ring->ptr_mask;
3021  seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
3022  }
3023  return 0;
3024 }
3025 
3026 
3027 static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
3028 {
3029  struct drm_info_node *node = (struct drm_info_node *) m->private;
3030  struct drm_device *dev = node->minor->dev;
3031  struct radeon_device *rdev = dev->dev_private;
3032  uint32_t csq_stat, csq2_stat, tmp;
3033  unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
3034  unsigned i;
3035 
3036  seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
3037  seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
3038  csq_stat = RREG32(RADEON_CP_CSQ_STAT);
3039  csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
3040  r_rptr = (csq_stat >> 0) & 0x3ff;
3041  r_wptr = (csq_stat >> 10) & 0x3ff;
3042  ib1_rptr = (csq_stat >> 20) & 0x3ff;
3043  ib1_wptr = (csq2_stat >> 0) & 0x3ff;
3044  ib2_rptr = (csq2_stat >> 10) & 0x3ff;
3045  ib2_wptr = (csq2_stat >> 20) & 0x3ff;
3046  seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
3047  seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
3048  seq_printf(m, "Ring rptr %u\n", r_rptr);
3049  seq_printf(m, "Ring wptr %u\n", r_wptr);
3050  seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
3051  seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
3052  seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
3053  seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
3054  /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
3055  * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
3056  seq_printf(m, "Ring fifo:\n");
3057  for (i = 0; i < 256; i++) {
3058  WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3059  tmp = RREG32(RADEON_CP_CSQ_DATA);
3060  seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
3061  }
3062  seq_printf(m, "Indirect1 fifo:\n");
3063  for (i = 256; i <= 512; i++) {
3064  WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3065  tmp = RREG32(RADEON_CP_CSQ_DATA);
3066  seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
3067  }
3068  seq_printf(m, "Indirect2 fifo:\n");
3069  for (i = 640; i < ib1_wptr; i++) {
3070  WREG32(RADEON_CP_CSQ_ADDR, i << 2);
3071  tmp = RREG32(RADEON_CP_CSQ_DATA);
3072  seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
3073  }
3074  return 0;
3075 }
3076 
3077 static int r100_debugfs_mc_info(struct seq_file *m, void *data)
3078 {
3079  struct drm_info_node *node = (struct drm_info_node *) m->private;
3080  struct drm_device *dev = node->minor->dev;
3081  struct radeon_device *rdev = dev->dev_private;
3082  uint32_t tmp;
3083 
3085  seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
3087  seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
3088  tmp = RREG32(RADEON_BUS_CNTL);
3089  seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
3091  seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
3092  tmp = RREG32(RADEON_AGP_BASE);
3093  seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
3095  seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
3096  tmp = RREG32(0x01D0);
3097  seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
3098  tmp = RREG32(RADEON_AIC_LO_ADDR);
3099  seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
3100  tmp = RREG32(RADEON_AIC_HI_ADDR);
3101  seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
3102  tmp = RREG32(0x01E4);
3103  seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
3104  return 0;
3105 }
3106 
3107 static struct drm_info_list r100_debugfs_rbbm_list[] = {
3108  {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
3109 };
3110 
3111 static struct drm_info_list r100_debugfs_cp_list[] = {
3112  {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
3113  {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
3114 };
3115 
3116 static struct drm_info_list r100_debugfs_mc_info_list[] = {
3117  {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
3118 };
3119 #endif
3120 
3122 {
3123 #if defined(CONFIG_DEBUG_FS)
3124  return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
3125 #else
3126  return 0;
3127 #endif
3128 }
3129 
3131 {
3132 #if defined(CONFIG_DEBUG_FS)
3133  return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
3134 #else
3135  return 0;
3136 #endif
3137 }
3138 
3140 {
3141 #if defined(CONFIG_DEBUG_FS)
3142  return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
3143 #else
3144  return 0;
3145 #endif
3146 }
3147 
3148 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
3149  uint32_t tiling_flags, uint32_t pitch,
3150  uint32_t offset, uint32_t obj_size)
3151 {
3152  int surf_index = reg * 16;
3153  int flags = 0;
3154 
3155  if (rdev->family <= CHIP_RS200) {
3156  if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
3158  flags |= RADEON_SURF_TILE_COLOR_BOTH;
3159  if (tiling_flags & RADEON_TILING_MACRO)
3161  } else if (rdev->family <= CHIP_RV280) {
3162  if (tiling_flags & (RADEON_TILING_MACRO))
3163  flags |= R200_SURF_TILE_COLOR_MACRO;
3164  if (tiling_flags & RADEON_TILING_MICRO)
3165  flags |= R200_SURF_TILE_COLOR_MICRO;
3166  } else {
3167  if (tiling_flags & RADEON_TILING_MACRO)
3168  flags |= R300_SURF_TILE_MACRO;
3169  if (tiling_flags & RADEON_TILING_MICRO)
3170  flags |= R300_SURF_TILE_MICRO;
3171  }
3172 
3173  if (tiling_flags & RADEON_TILING_SWAP_16BIT)
3175  if (tiling_flags & RADEON_TILING_SWAP_32BIT)
3177 
3178  /* when we aren't tiling the pitch seems to needs to be furtherdivided down. - tested on power5 + rn50 server */
3179  if (tiling_flags & (RADEON_TILING_SWAP_16BIT | RADEON_TILING_SWAP_32BIT)) {
3180  if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO)))
3181  if (ASIC_IS_RN50(rdev))
3182  pitch /= 16;
3183  }
3184 
3185  /* r100/r200 divide by 16 */
3186  if (rdev->family < CHIP_R300)
3187  flags |= pitch / 16;
3188  else
3189  flags |= pitch / 8;
3190 
3191 
3192  DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
3193  WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
3194  WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
3195  WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
3196  return 0;
3197 }
3198 
3199 void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
3200 {
3201  int surf_index = reg * 16;
3202  WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
3203 }
3204 
3206 {
3207  fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
3208  fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
3209  fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
3210  uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
3211  fixed20_12 memtcas_ff[8] = {
3212  dfixed_init(1),
3213  dfixed_init(2),
3214  dfixed_init(3),
3215  dfixed_init(0),
3216  dfixed_init_half(1),
3217  dfixed_init_half(2),
3218  dfixed_init(0),
3219  };
3220  fixed20_12 memtcas_rs480_ff[8] = {
3221  dfixed_init(0),
3222  dfixed_init(1),
3223  dfixed_init(2),
3224  dfixed_init(3),
3225  dfixed_init(0),
3226  dfixed_init_half(1),
3227  dfixed_init_half(2),
3228  dfixed_init_half(3),
3229  };
3230  fixed20_12 memtcas2_ff[8] = {
3231  dfixed_init(0),
3232  dfixed_init(1),
3233  dfixed_init(2),
3234  dfixed_init(3),
3235  dfixed_init(4),
3236  dfixed_init(5),
3237  dfixed_init(6),
3238  dfixed_init(7),
3239  };
3240  fixed20_12 memtrbs[8] = {
3241  dfixed_init(1),
3242  dfixed_init_half(1),
3243  dfixed_init(2),
3244  dfixed_init_half(2),
3245  dfixed_init(3),
3246  dfixed_init_half(3),
3247  dfixed_init(4),
3248  dfixed_init_half(4)
3249  };
3250  fixed20_12 memtrbs_r4xx[8] = {
3251  dfixed_init(4),
3252  dfixed_init(5),
3253  dfixed_init(6),
3254  dfixed_init(7),
3255  dfixed_init(8),
3256  dfixed_init(9),
3257  dfixed_init(10),
3258  dfixed_init(11)
3259  };
3260  fixed20_12 min_mem_eff;
3261  fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
3262  fixed20_12 cur_latency_mclk, cur_latency_sclk;
3263  fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
3264  disp_drain_rate2, read_return_rate;
3265  fixed20_12 time_disp1_drop_priority;
3266  int c;
3267  int cur_size = 16; /* in octawords */
3268  int critical_point = 0, critical_point2;
3269 /* uint32_t read_return_rate, time_disp1_drop_priority; */
3270  int stop_req, max_stop_req;
3271  struct drm_display_mode *mode1 = NULL;
3272  struct drm_display_mode *mode2 = NULL;
3273  uint32_t pixel_bytes1 = 0;
3274  uint32_t pixel_bytes2 = 0;
3275 
3277 
3278  if (rdev->mode_info.crtcs[0]->base.enabled) {
3279  mode1 = &rdev->mode_info.crtcs[0]->base.mode;
3280  pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
3281  }
3282  if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3283  if (rdev->mode_info.crtcs[1]->base.enabled) {
3284  mode2 = &rdev->mode_info.crtcs[1]->base.mode;
3285  pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
3286  }
3287  }
3288 
3289  min_mem_eff.full = dfixed_const_8(0);
3290  /* get modes */
3291  if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
3292  uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
3293  mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
3294  mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
3295  /* check crtc enables */
3296  if (mode2)
3297  mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
3298  if (mode1)
3299  mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
3300  WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
3301  }
3302 
3303  /*
3304  * determine is there is enough bw for current mode
3305  */
3306  sclk_ff = rdev->pm.sclk;
3307  mclk_ff = rdev->pm.mclk;
3308 
3309  temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
3310  temp_ff.full = dfixed_const(temp);
3311  mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
3312 
3313  pix_clk.full = 0;
3314  pix_clk2.full = 0;
3315  peak_disp_bw.full = 0;
3316  if (mode1) {
3317  temp_ff.full = dfixed_const(1000);
3318  pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
3319  pix_clk.full = dfixed_div(pix_clk, temp_ff);
3320  temp_ff.full = dfixed_const(pixel_bytes1);
3321  peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
3322  }
3323  if (mode2) {
3324  temp_ff.full = dfixed_const(1000);
3325  pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
3326  pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
3327  temp_ff.full = dfixed_const(pixel_bytes2);
3328  peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
3329  }
3330 
3331  mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
3332  if (peak_disp_bw.full >= mem_bw.full) {
3333  DRM_ERROR("You may not have enough display bandwidth for current mode\n"
3334  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
3335  }
3336 
3337  /* Get values from the EXT_MEM_CNTL register...converting its contents. */
3339  if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
3340  mem_trcd = ((temp >> 2) & 0x3) + 1;
3341  mem_trp = ((temp & 0x3)) + 1;
3342  mem_tras = ((temp & 0x70) >> 4) + 1;
3343  } else if (rdev->family == CHIP_R300 ||
3344  rdev->family == CHIP_R350) { /* r300, r350 */
3345  mem_trcd = (temp & 0x7) + 1;
3346  mem_trp = ((temp >> 8) & 0x7) + 1;
3347  mem_tras = ((temp >> 11) & 0xf) + 4;
3348  } else if (rdev->family == CHIP_RV350 ||
3349  rdev->family <= CHIP_RV380) {
3350  /* rv3x0 */
3351  mem_trcd = (temp & 0x7) + 3;
3352  mem_trp = ((temp >> 8) & 0x7) + 3;
3353  mem_tras = ((temp >> 11) & 0xf) + 6;
3354  } else if (rdev->family == CHIP_R420 ||
3355  rdev->family == CHIP_R423 ||
3356  rdev->family == CHIP_RV410) {
3357  /* r4xx */
3358  mem_trcd = (temp & 0xf) + 3;
3359  if (mem_trcd > 15)
3360  mem_trcd = 15;
3361  mem_trp = ((temp >> 8) & 0xf) + 3;
3362  if (mem_trp > 15)
3363  mem_trp = 15;
3364  mem_tras = ((temp >> 12) & 0x1f) + 6;
3365  if (mem_tras > 31)
3366  mem_tras = 31;
3367  } else { /* RV200, R200 */
3368  mem_trcd = (temp & 0x7) + 1;
3369  mem_trp = ((temp >> 8) & 0x7) + 1;
3370  mem_tras = ((temp >> 12) & 0xf) + 4;
3371  }
3372  /* convert to FF */
3373  trcd_ff.full = dfixed_const(mem_trcd);
3374  trp_ff.full = dfixed_const(mem_trp);
3375  tras_ff.full = dfixed_const(mem_tras);
3376 
3377  /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
3379  data = (temp & (7 << 20)) >> 20;
3380  if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
3381  if (rdev->family == CHIP_RS480) /* don't think rs400 */
3382  tcas_ff = memtcas_rs480_ff[data];
3383  else
3384  tcas_ff = memtcas_ff[data];
3385  } else
3386  tcas_ff = memtcas2_ff[data];
3387 
3388  if (rdev->family == CHIP_RS400 ||
3389  rdev->family == CHIP_RS480) {
3390  /* extra cas latency stored in bits 23-25 0-4 clocks */
3391  data = (temp >> 23) & 0x7;
3392  if (data < 5)
3393  tcas_ff.full += dfixed_const(data);
3394  }
3395 
3396  if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
3397  /* on the R300, Tcas is included in Trbs.
3398  */
3399  temp = RREG32(RADEON_MEM_CNTL);
3400  data = (R300_MEM_NUM_CHANNELS_MASK & temp);
3401  if (data == 1) {
3402  if (R300_MEM_USE_CD_CH_ONLY & temp) {
3403  temp = RREG32(R300_MC_IND_INDEX);
3404  temp &= ~R300_MC_IND_ADDR_MASK;
3406  WREG32(R300_MC_IND_INDEX, temp);
3407  temp = RREG32(R300_MC_IND_DATA);
3409  } else {
3410  temp = RREG32(R300_MC_READ_CNTL_AB);
3412  }
3413  } else {
3414  temp = RREG32(R300_MC_READ_CNTL_AB);
3416  }
3417  if (rdev->family == CHIP_RV410 ||
3418  rdev->family == CHIP_R420 ||
3419  rdev->family == CHIP_R423)
3420  trbs_ff = memtrbs_r4xx[data];
3421  else
3422  trbs_ff = memtrbs[data];
3423  tcas_ff.full += trbs_ff.full;
3424  }
3425 
3426  sclk_eff_ff.full = sclk_ff.full;
3427 
3428  if (rdev->flags & RADEON_IS_AGP) {
3429  fixed20_12 agpmode_ff;
3430  agpmode_ff.full = dfixed_const(radeon_agpmode);
3431  temp_ff.full = dfixed_const_666(16);
3432  sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
3433  }
3434  /* TODO PCIE lanes may affect this - agpmode == 16?? */
3435 
3436  if (ASIC_IS_R300(rdev)) {
3437  sclk_delay_ff.full = dfixed_const(250);
3438  } else {
3439  if ((rdev->family == CHIP_RV100) ||
3440  rdev->flags & RADEON_IS_IGP) {
3441  if (rdev->mc.vram_is_ddr)
3442  sclk_delay_ff.full = dfixed_const(41);
3443  else
3444  sclk_delay_ff.full = dfixed_const(33);
3445  } else {
3446  if (rdev->mc.vram_width == 128)
3447  sclk_delay_ff.full = dfixed_const(57);
3448  else
3449  sclk_delay_ff.full = dfixed_const(41);
3450  }
3451  }
3452 
3453  mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
3454 
3455  if (rdev->mc.vram_is_ddr) {
3456  if (rdev->mc.vram_width == 32) {
3457  k1.full = dfixed_const(40);
3458  c = 3;
3459  } else {
3460  k1.full = dfixed_const(20);
3461  c = 1;
3462  }
3463  } else {
3464  k1.full = dfixed_const(40);
3465  c = 3;
3466  }
3467 
3468  temp_ff.full = dfixed_const(2);
3469  mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
3470  temp_ff.full = dfixed_const(c);
3471  mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
3472  temp_ff.full = dfixed_const(4);
3473  mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
3474  mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
3475  mc_latency_mclk.full += k1.full;
3476 
3477  mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
3478  mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
3479 
3480  /*
3481  HW cursor time assuming worst case of full size colour cursor.
3482  */
3483  temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
3484  temp_ff.full += trcd_ff.full;
3485  if (temp_ff.full < tras_ff.full)
3486  temp_ff.full = tras_ff.full;
3487  cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
3488 
3489  temp_ff.full = dfixed_const(cur_size);
3490  cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
3491  /*
3492  Find the total latency for the display data.
3493  */
3494  disp_latency_overhead.full = dfixed_const(8);
3495  disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
3496  mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
3497  mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
3498 
3499  if (mc_latency_mclk.full > mc_latency_sclk.full)
3500  disp_latency.full = mc_latency_mclk.full;
3501  else
3502  disp_latency.full = mc_latency_sclk.full;
3503 
3504  /* setup Max GRPH_STOP_REQ default value */
3505  if (ASIC_IS_RV100(rdev))
3506  max_stop_req = 0x5c;
3507  else
3508  max_stop_req = 0x7c;
3509 
3510  if (mode1) {
3511  /* CRTC1
3512  Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
3513  GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
3514  */
3515  stop_req = mode1->hdisplay * pixel_bytes1 / 16;
3516 
3517  if (stop_req > max_stop_req)
3518  stop_req = max_stop_req;
3519 
3520  /*
3521  Find the drain rate of the display buffer.
3522  */
3523  temp_ff.full = dfixed_const((16/pixel_bytes1));
3524  disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
3525 
3526  /*
3527  Find the critical point of the display buffer.
3528  */
3529  crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
3530  crit_point_ff.full += dfixed_const_half(0);
3531 
3532  critical_point = dfixed_trunc(crit_point_ff);
3533 
3534  if (rdev->disp_priority == 2) {
3535  critical_point = 0;
3536  }
3537 
3538  /*
3539  The critical point should never be above max_stop_req-4. Setting
3540  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
3541  */
3542  if (max_stop_req - critical_point < 4)
3543  critical_point = 0;
3544 
3545  if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
3546  /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
3547  critical_point = 0x10;
3548  }
3549 
3551  temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
3552  temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3553  temp &= ~(RADEON_GRPH_START_REQ_MASK);
3554  if ((rdev->family == CHIP_R350) &&
3555  (stop_req > 0x15)) {
3556  stop_req -= 0x10;
3557  }
3558  temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3559  temp |= RADEON_GRPH_BUFFER_SIZE;
3560  temp &= ~(RADEON_GRPH_CRITICAL_CNTL |
3563  /*
3564  Write the result into the register.
3565  */
3567  (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3568 
3569 #if 0
3570  if ((rdev->family == CHIP_RS400) ||
3571  (rdev->family == CHIP_RS480)) {
3572  /* attempt to program RS400 disp regs correctly ??? */
3573  temp = RREG32(RS400_DISP1_REG_CNTL);
3576  WREG32(RS400_DISP1_REQ_CNTL1, (temp |
3577  (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3578  (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3579  temp = RREG32(RS400_DMIF_MEM_CNTL1);
3582  WREG32(RS400_DMIF_MEM_CNTL1, (temp |
3583  (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
3584  (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
3585  }
3586 #endif
3587 
3588  DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
3589  /* (unsigned int)info->SavedReg->grph_buffer_cntl, */
3590  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
3591  }
3592 
3593  if (mode2) {
3594  u32 grph2_cntl;
3595  stop_req = mode2->hdisplay * pixel_bytes2 / 16;
3596 
3597  if (stop_req > max_stop_req)
3598  stop_req = max_stop_req;
3599 
3600  /*
3601  Find the drain rate of the display buffer.
3602  */
3603  temp_ff.full = dfixed_const((16/pixel_bytes2));
3604  disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
3605 
3606  grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
3607  grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
3608  grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
3609  grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
3610  if ((rdev->family == CHIP_R350) &&
3611  (stop_req > 0x15)) {
3612  stop_req -= 0x10;
3613  }
3614  grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
3615  grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
3616  grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL |
3619 
3620  if ((rdev->family == CHIP_RS100) ||
3621  (rdev->family == CHIP_RS200))
3622  critical_point2 = 0;
3623  else {
3624  temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
3625  temp_ff.full = dfixed_const(temp);
3626  temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
3627  if (sclk_ff.full < temp_ff.full)
3628  temp_ff.full = sclk_ff.full;
3629 
3630  read_return_rate.full = temp_ff.full;
3631 
3632  if (mode1) {
3633  temp_ff.full = read_return_rate.full - disp_drain_rate.full;
3634  time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
3635  } else {
3636  time_disp1_drop_priority.full = 0;
3637  }
3638  crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
3639  crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
3640  crit_point_ff.full += dfixed_const_half(0);
3641 
3642  critical_point2 = dfixed_trunc(crit_point_ff);
3643 
3644  if (rdev->disp_priority == 2) {
3645  critical_point2 = 0;
3646  }
3647 
3648  if (max_stop_req - critical_point2 < 4)
3649  critical_point2 = 0;
3650 
3651  }
3652 
3653  if (critical_point2 == 0 && rdev->family == CHIP_R300) {
3654  /* some R300 cards have problem with this set to 0 */
3655  critical_point2 = 0x10;
3656  }
3657 
3659  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
3660 
3661  if ((rdev->family == CHIP_RS400) ||
3662  (rdev->family == CHIP_RS480)) {
3663 #if 0
3664  /* attempt to program RS400 disp2 regs correctly ??? */
3665  temp = RREG32(RS400_DISP2_REQ_CNTL1);
3668  WREG32(RS400_DISP2_REQ_CNTL1, (temp |
3669  (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
3670  (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
3671  temp = RREG32(RS400_DISP2_REQ_CNTL2);
3674  WREG32(RS400_DISP2_REQ_CNTL2, (temp |
3675  (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
3676  (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
3677 #endif
3678  WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
3679  WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
3680  WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC);
3681  WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
3682  }
3683 
3684  DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
3685  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
3686  }
3687 }
3688 
3689 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3690 {
3691  uint32_t scratch;
3692  uint32_t tmp = 0;
3693  unsigned i;
3694  int r;
3695 
3696  r = radeon_scratch_get(rdev, &scratch);
3697  if (r) {
3698  DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3699  return r;
3700  }
3701  WREG32(scratch, 0xCAFEDEAD);
3702  r = radeon_ring_lock(rdev, ring, 2);
3703  if (r) {
3704  DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3705  radeon_scratch_free(rdev, scratch);
3706  return r;
3707  }
3708  radeon_ring_write(ring, PACKET0(scratch, 0));
3709  radeon_ring_write(ring, 0xDEADBEEF);
3710  radeon_ring_unlock_commit(rdev, ring);
3711  for (i = 0; i < rdev->usec_timeout; i++) {
3712  tmp = RREG32(scratch);
3713  if (tmp == 0xDEADBEEF) {
3714  break;
3715  }
3716  DRM_UDELAY(1);
3717  }
3718  if (i < rdev->usec_timeout) {
3719  DRM_INFO("ring test succeeded in %d usecs\n", i);
3720  } else {
3721  DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
3722  scratch, tmp);
3723  r = -EINVAL;
3724  }
3725  radeon_scratch_free(rdev, scratch);
3726  return r;
3727 }
3728 
3729 void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730 {
3731  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3732 
3733  if (ring->rptr_save_reg) {
3734  u32 next_rptr = ring->wptr + 2 + 3;
3735  radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3736  radeon_ring_write(ring, next_rptr);
3737  }
3738 
3740  radeon_ring_write(ring, ib->gpu_addr);
3741  radeon_ring_write(ring, ib->length_dw);
3742 }
3743 
3744 int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3745 {
3746  struct radeon_ib ib;
3747  uint32_t scratch;
3748  uint32_t tmp = 0;
3749  unsigned i;
3750  int r;
3751 
3752  r = radeon_scratch_get(rdev, &scratch);
3753  if (r) {
3754  DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3755  return r;
3756  }
3757  WREG32(scratch, 0xCAFEDEAD);
3758  r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
3759  if (r) {
3760  DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3761  goto free_scratch;
3762  }
3763  ib.ptr[0] = PACKET0(scratch, 0);
3764  ib.ptr[1] = 0xDEADBEEF;
3765  ib.ptr[2] = PACKET2(0);
3766  ib.ptr[3] = PACKET2(0);
3767  ib.ptr[4] = PACKET2(0);
3768  ib.ptr[5] = PACKET2(0);
3769  ib.ptr[6] = PACKET2(0);
3770  ib.ptr[7] = PACKET2(0);
3771  ib.length_dw = 8;
3772  r = radeon_ib_schedule(rdev, &ib, NULL);
3773  if (r) {
3774  DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3775  goto free_ib;
3776  }
3777  r = radeon_fence_wait(ib.fence, false);
3778  if (r) {
3779  DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3780  goto free_ib;
3781  }
3782  for (i = 0; i < rdev->usec_timeout; i++) {
3783  tmp = RREG32(scratch);
3784  if (tmp == 0xDEADBEEF) {
3785  break;
3786  }
3787  DRM_UDELAY(1);
3788  }
3789  if (i < rdev->usec_timeout) {
3790  DRM_INFO("ib test succeeded in %u usecs\n", i);
3791  } else {
3792  DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3793  scratch, tmp);
3794  r = -EINVAL;
3795  }
3796 free_ib:
3797  radeon_ib_free(rdev, &ib);
3798 free_scratch:
3799  radeon_scratch_free(rdev, scratch);
3800  return r;
3801 }
3802 
3803 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3804 {
3805  /* Shutdown CP we shouldn't need to do that but better be safe than
3806  * sorry
3807  */
3808  rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3810 
3811  /* Save few CRTC registers */
3812  save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
3816  if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3819  }
3820 
3821  /* Disable VGA aperture access */
3823  /* Disable cursor, overlay, crtc */
3833  if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3835  S_000360_CUR2_LOCK(1));
3842  }
3843 }
3844 
3845 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
3846 {
3847  /* Update base address for crtc */
3848  WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3849  if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3850  WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
3851  }
3852  /* Restore CRTC registers */
3856  if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
3858  }
3859 }
3860 
3862 {
3863  u32 tmp;
3864 
3865  tmp = RREG8(R_0003C2_GENMO_WT);
3867 }
3868 
3869 static void r100_debugfs(struct radeon_device *rdev)
3870 {
3871  int r;
3872 
3873  r = r100_debugfs_mc_info_init(rdev);
3874  if (r)
3875  dev_warn(rdev->dev, "Failed to create r100_mc debugfs file.\n");
3876 }
3877 
3878 static void r100_mc_program(struct radeon_device *rdev)
3879 {
3880  struct r100_mc_save save;
3881 
3882  /* Stops all mc clients */
3883  r100_mc_stop(rdev, &save);
3884  if (rdev->flags & RADEON_IS_AGP) {
3886  S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
3887  S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
3888  WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
3889  if (rdev->family > CHIP_RV200)
3891  upper_32_bits(rdev->mc.agp_base) & 0xff);
3892  } else {
3893  WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
3895  if (rdev->family > CHIP_RV200)
3897  }
3898  /* Wait for mc idle */
3899  if (r100_mc_wait_for_idle(rdev))
3900  dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
3901  /* Program MC, should be a 32bits limited address space */
3903  S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
3904  S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
3905  r100_mc_resume(rdev, &save);
3906 }
3907 
3908 static void r100_clock_startup(struct radeon_device *rdev)
3909 {
3910  u32 tmp;
3911 
3912  if (radeon_dynclks != -1 && radeon_dynclks)
3914  /* We need to force on some of the block */
3916  tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
3917  if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
3920 }
3921 
3922 static int r100_startup(struct radeon_device *rdev)
3923 {
3924  int r;
3925 
3926  /* set common regs */
3927  r100_set_common_regs(rdev);
3928  /* program mc */
3929  r100_mc_program(rdev);
3930  /* Resume clock */
3931  r100_clock_startup(rdev);
3932  /* Initialize GART (initialize after TTM so we can allocate
3933  * memory through TTM but finalize after TTM) */
3934  r100_enable_bm(rdev);
3935  if (rdev->flags & RADEON_IS_PCI) {
3936  r = r100_pci_gart_enable(rdev);
3937  if (r)
3938  return r;
3939  }
3940 
3941  /* allocate wb buffer */
3942  r = radeon_wb_init(rdev);
3943  if (r)
3944  return r;
3945 
3947  if (r) {
3948  dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3949  return r;
3950  }
3951 
3952  /* Enable IRQ */
3953  r100_irq_set(rdev);
3954  rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
3955  /* 1M ring buffer */
3956  r = r100_cp_init(rdev, 1024 * 1024);
3957  if (r) {
3958  dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
3959  return r;
3960  }
3961 
3962  r = radeon_ib_pool_init(rdev);
3963  if (r) {
3964  dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3965  return r;
3966  }
3967 
3968  return 0;
3969 }
3970 
3971 int r100_resume(struct radeon_device *rdev)
3972 {
3973  int r;
3974 
3975  /* Make sur GART are not working */
3976  if (rdev->flags & RADEON_IS_PCI)
3977  r100_pci_gart_disable(rdev);
3978  /* Resume clock before doing reset */
3979  r100_clock_startup(rdev);
3980  /* Reset gpu before posting otherwise ATOM will enter infinite loop */
3981  if (radeon_asic_reset(rdev)) {
3982  dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
3985  }
3986  /* post */
3988  /* Resume clock after posting */
3989  r100_clock_startup(rdev);
3990  /* Initialize surface registers */
3991  radeon_surface_init(rdev);
3992 
3993  rdev->accel_working = true;
3994  r = r100_startup(rdev);
3995  if (r) {
3996  rdev->accel_working = false;
3997  }
3998  return r;
3999 }
4000 
4001 int r100_suspend(struct radeon_device *rdev)
4002 {
4003  r100_cp_disable(rdev);
4004  radeon_wb_disable(rdev);
4005  r100_irq_disable(rdev);
4006  if (rdev->flags & RADEON_IS_PCI)
4007  r100_pci_gart_disable(rdev);
4008  return 0;
4009 }
4010 
4011 void r100_fini(struct radeon_device *rdev)
4012 {
4013  r100_cp_fini(rdev);
4014  radeon_wb_fini(rdev);
4015  radeon_ib_pool_fini(rdev);
4016  radeon_gem_fini(rdev);
4017  if (rdev->flags & RADEON_IS_PCI)
4018  r100_pci_gart_fini(rdev);
4019  radeon_agp_fini(rdev);
4020  radeon_irq_kms_fini(rdev);
4022  radeon_bo_fini(rdev);
4023  radeon_atombios_fini(rdev);
4024  kfree(rdev->bios);
4025  rdev->bios = NULL;
4026 }
4027 
4028 /*
4029  * Due to how kexec works, it can leave the hw fully initialised when it
4030  * boots the new kernel. However doing our init sequence with the CP and
4031  * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
4032  * do some quick sanity checks and restore sane values to avoid this
4033  * problem.
4034  */
4036 {
4037  u32 tmp;
4038 
4039  tmp = RREG32(RADEON_CP_CSQ_CNTL);
4040  if (tmp) {
4042  }
4043  tmp = RREG32(RADEON_CP_RB_CNTL);
4044  if (tmp) {
4046  }
4047  tmp = RREG32(RADEON_SCRATCH_UMSK);
4048  if (tmp) {
4050  }
4051 }
4052 
4053 int r100_init(struct radeon_device *rdev)
4054 {
4055  int r;
4056 
4057  /* Register debugfs file specific to this group of asics */
4058  r100_debugfs(rdev);
4059  /* Disable VGA */
4061  /* Initialize scratch registers */
4062  radeon_scratch_init(rdev);
4063  /* Initialize surface registers */
4064  radeon_surface_init(rdev);
4065  /* sanity check some register to avoid hangs like after kexec */
4066  r100_restore_sanity(rdev);
4067  /* TODO: disable VGA need to use VGA request */
4068  /* BIOS*/
4069  if (!radeon_get_bios(rdev)) {
4070  if (ASIC_IS_AVIVO(rdev))
4071  return -EINVAL;
4072  }
4073  if (rdev->is_atom_bios) {
4074  dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
4075  return -EINVAL;
4076  } else {
4077  r = radeon_combios_init(rdev);
4078  if (r)
4079  return r;
4080  }
4081  /* Reset gpu before posting otherwise ATOM will enter infinite loop */
4082  if (radeon_asic_reset(rdev)) {
4083  dev_warn(rdev->dev,
4084  "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
4087  }
4088  /* check if cards are posted or not */
4089  if (radeon_boot_test_post_card(rdev) == false)
4090  return -EINVAL;
4091  /* Set asic errata */
4092  r100_errata(rdev);
4093  /* Initialize clocks */
4094  radeon_get_clock_info(rdev->ddev);
4095  /* initialize AGP */
4096  if (rdev->flags & RADEON_IS_AGP) {
4097  r = radeon_agp_init(rdev);
4098  if (r) {
4099  radeon_agp_disable(rdev);
4100  }
4101  }
4102  /* initialize VRAM */
4103  r100_mc_init(rdev);
4104  /* Fence driver */
4105  r = radeon_fence_driver_init(rdev);
4106  if (r)
4107  return r;
4108  r = radeon_irq_kms_init(rdev);
4109  if (r)
4110  return r;
4111  /* Memory manager */
4112  r = radeon_bo_init(rdev);
4113  if (r)
4114  return r;
4115  if (rdev->flags & RADEON_IS_PCI) {
4116  r = r100_pci_gart_init(rdev);
4117  if (r)
4118  return r;
4119  }
4120  r100_set_safe_registers(rdev);
4121 
4122  rdev->accel_working = true;
4123  r = r100_startup(rdev);
4124  if (r) {
4125  /* Somethings want wront with the accel init stop accel */
4126  dev_err(rdev->dev, "Disabling GPU acceleration\n");
4127  r100_cp_fini(rdev);
4128  radeon_wb_fini(rdev);
4129  radeon_ib_pool_fini(rdev);
4130  radeon_irq_kms_fini(rdev);
4131  if (rdev->flags & RADEON_IS_PCI)
4132  r100_pci_gart_fini(rdev);
4133  rdev->accel_working = false;
4134  }
4135  return 0;
4136 }
4137 
4139 {
4140  if (reg < rdev->rmmio_size)
4141  return readl(((void __iomem *)rdev->rmmio) + reg);
4142  else {
4143  writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4144  return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4145  }
4146 }
4147 
4149 {
4150  if (reg < rdev->rmmio_size)
4151  writel(v, ((void __iomem *)rdev->rmmio) + reg);
4152  else {
4153  writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4154  writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4155  }
4156 }
4157 
4158 u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
4159 {
4160  if (reg < rdev->rio_mem_size)
4161  return ioread32(rdev->rio_mem + reg);
4162  else {
4163  iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4164  return ioread32(rdev->rio_mem + RADEON_MM_DATA);
4165  }
4166 }
4167 
4168 void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
4169 {
4170  if (reg < rdev->rio_mem_size)
4171  iowrite32(v, rdev->rio_mem + reg);
4172  else {
4173  iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
4174  iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
4175  }
4176 }