Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
si.c
Go to the documentation of this file.
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 
42 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
52 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
53 MODULE_FIRMWARE("radeon/VERDE_me.bin");
54 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
56 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57 
58 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
59 extern void r600_ih_ring_fini(struct radeon_device *rdev);
61 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
62 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
64 
65 /* get temperature in millidegrees */
67 {
68  u32 temp;
69  int actual_temp = 0;
70 
73 
74  if (temp & 0x200)
75  actual_temp = 255;
76  else
77  actual_temp = temp & 0x1ff;
78 
79  actual_temp = (actual_temp * 1000);
80 
81  return actual_temp;
82 }
83 
84 #define TAHITI_IO_MC_REGS_SIZE 36
85 
86 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
87  {0x0000006f, 0x03044000},
88  {0x00000070, 0x0480c018},
89  {0x00000071, 0x00000040},
90  {0x00000072, 0x01000000},
91  {0x00000074, 0x000000ff},
92  {0x00000075, 0x00143400},
93  {0x00000076, 0x08ec0800},
94  {0x00000077, 0x040000cc},
95  {0x00000079, 0x00000000},
96  {0x0000007a, 0x21000409},
97  {0x0000007c, 0x00000000},
98  {0x0000007d, 0xe8000000},
99  {0x0000007e, 0x044408a8},
100  {0x0000007f, 0x00000003},
101  {0x00000080, 0x00000000},
102  {0x00000081, 0x01000000},
103  {0x00000082, 0x02000000},
104  {0x00000083, 0x00000000},
105  {0x00000084, 0xe3f3e4f4},
106  {0x00000085, 0x00052024},
107  {0x00000087, 0x00000000},
108  {0x00000088, 0x66036603},
109  {0x00000089, 0x01000000},
110  {0x0000008b, 0x1c0a0000},
111  {0x0000008c, 0xff010000},
112  {0x0000008e, 0xffffefff},
113  {0x0000008f, 0xfff3efff},
114  {0x00000090, 0xfff3efbf},
115  {0x00000094, 0x00101101},
116  {0x00000095, 0x00000fff},
117  {0x00000096, 0x00116fff},
118  {0x00000097, 0x60010000},
119  {0x00000098, 0x10010000},
120  {0x00000099, 0x00006000},
121  {0x0000009a, 0x00001000},
122  {0x0000009f, 0x00a77400}
123 };
124 
125 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
126  {0x0000006f, 0x03044000},
127  {0x00000070, 0x0480c018},
128  {0x00000071, 0x00000040},
129  {0x00000072, 0x01000000},
130  {0x00000074, 0x000000ff},
131  {0x00000075, 0x00143400},
132  {0x00000076, 0x08ec0800},
133  {0x00000077, 0x040000cc},
134  {0x00000079, 0x00000000},
135  {0x0000007a, 0x21000409},
136  {0x0000007c, 0x00000000},
137  {0x0000007d, 0xe8000000},
138  {0x0000007e, 0x044408a8},
139  {0x0000007f, 0x00000003},
140  {0x00000080, 0x00000000},
141  {0x00000081, 0x01000000},
142  {0x00000082, 0x02000000},
143  {0x00000083, 0x00000000},
144  {0x00000084, 0xe3f3e4f4},
145  {0x00000085, 0x00052024},
146  {0x00000087, 0x00000000},
147  {0x00000088, 0x66036603},
148  {0x00000089, 0x01000000},
149  {0x0000008b, 0x1c0a0000},
150  {0x0000008c, 0xff010000},
151  {0x0000008e, 0xffffefff},
152  {0x0000008f, 0xfff3efff},
153  {0x00000090, 0xfff3efbf},
154  {0x00000094, 0x00101101},
155  {0x00000095, 0x00000fff},
156  {0x00000096, 0x00116fff},
157  {0x00000097, 0x60010000},
158  {0x00000098, 0x10010000},
159  {0x00000099, 0x00006000},
160  {0x0000009a, 0x00001000},
161  {0x0000009f, 0x00a47400}
162 };
163 
164 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
165  {0x0000006f, 0x03044000},
166  {0x00000070, 0x0480c018},
167  {0x00000071, 0x00000040},
168  {0x00000072, 0x01000000},
169  {0x00000074, 0x000000ff},
170  {0x00000075, 0x00143400},
171  {0x00000076, 0x08ec0800},
172  {0x00000077, 0x040000cc},
173  {0x00000079, 0x00000000},
174  {0x0000007a, 0x21000409},
175  {0x0000007c, 0x00000000},
176  {0x0000007d, 0xe8000000},
177  {0x0000007e, 0x044408a8},
178  {0x0000007f, 0x00000003},
179  {0x00000080, 0x00000000},
180  {0x00000081, 0x01000000},
181  {0x00000082, 0x02000000},
182  {0x00000083, 0x00000000},
183  {0x00000084, 0xe3f3e4f4},
184  {0x00000085, 0x00052024},
185  {0x00000087, 0x00000000},
186  {0x00000088, 0x66036603},
187  {0x00000089, 0x01000000},
188  {0x0000008b, 0x1c0a0000},
189  {0x0000008c, 0xff010000},
190  {0x0000008e, 0xffffefff},
191  {0x0000008f, 0xfff3efff},
192  {0x00000090, 0xfff3efbf},
193  {0x00000094, 0x00101101},
194  {0x00000095, 0x00000fff},
195  {0x00000096, 0x00116fff},
196  {0x00000097, 0x60010000},
197  {0x00000098, 0x10010000},
198  {0x00000099, 0x00006000},
199  {0x0000009a, 0x00001000},
200  {0x0000009f, 0x00a37400}
201 };
202 
203 /* ucode loading */
204 static int si_mc_load_microcode(struct radeon_device *rdev)
205 {
206  const __be32 *fw_data;
207  u32 running, blackout = 0;
208  u32 *io_mc_regs;
209  int i, ucode_size, regs_size;
210 
211  if (!rdev->mc_fw)
212  return -EINVAL;
213 
214  switch (rdev->family) {
215  case CHIP_TAHITI:
216  io_mc_regs = (u32 *)&tahiti_io_mc_regs;
217  ucode_size = SI_MC_UCODE_SIZE;
218  regs_size = TAHITI_IO_MC_REGS_SIZE;
219  break;
220  case CHIP_PITCAIRN:
221  io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
222  ucode_size = SI_MC_UCODE_SIZE;
223  regs_size = TAHITI_IO_MC_REGS_SIZE;
224  break;
225  case CHIP_VERDE:
226  default:
227  io_mc_regs = (u32 *)&verde_io_mc_regs;
228  ucode_size = SI_MC_UCODE_SIZE;
229  regs_size = TAHITI_IO_MC_REGS_SIZE;
230  break;
231  }
232 
233  running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
234 
235  if (running == 0) {
236  if (running) {
237  blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
238  WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
239  }
240 
241  /* reset the engine and set to writable */
242  WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
243  WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
244 
245  /* load mc io regs */
246  for (i = 0; i < regs_size; i++) {
247  WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
248  WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
249  }
250  /* load the MC ucode */
251  fw_data = (const __be32 *)rdev->mc_fw->data;
252  for (i = 0; i < ucode_size; i++)
253  WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
254 
255  /* put the engine back into the active state */
256  WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
257  WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
258  WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
259 
260  /* wait for training to complete */
261  for (i = 0; i < rdev->usec_timeout; i++) {
263  break;
264  udelay(1);
265  }
266  for (i = 0; i < rdev->usec_timeout; i++) {
268  break;
269  udelay(1);
270  }
271 
272  if (running)
273  WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
274  }
275 
276  return 0;
277 }
278 
279 static int si_init_microcode(struct radeon_device *rdev)
280 {
281  struct platform_device *pdev;
282  const char *chip_name;
283  const char *rlc_chip_name;
284  size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
285  char fw_name[30];
286  int err;
287 
288  DRM_DEBUG("\n");
289 
290  pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
291  err = IS_ERR(pdev);
292  if (err) {
293  printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
294  return -EINVAL;
295  }
296 
297  switch (rdev->family) {
298  case CHIP_TAHITI:
299  chip_name = "TAHITI";
300  rlc_chip_name = "TAHITI";
301  pfp_req_size = SI_PFP_UCODE_SIZE * 4;
302  me_req_size = SI_PM4_UCODE_SIZE * 4;
303  ce_req_size = SI_CE_UCODE_SIZE * 4;
304  rlc_req_size = SI_RLC_UCODE_SIZE * 4;
305  mc_req_size = SI_MC_UCODE_SIZE * 4;
306  break;
307  case CHIP_PITCAIRN:
308  chip_name = "PITCAIRN";
309  rlc_chip_name = "PITCAIRN";
310  pfp_req_size = SI_PFP_UCODE_SIZE * 4;
311  me_req_size = SI_PM4_UCODE_SIZE * 4;
312  ce_req_size = SI_CE_UCODE_SIZE * 4;
313  rlc_req_size = SI_RLC_UCODE_SIZE * 4;
314  mc_req_size = SI_MC_UCODE_SIZE * 4;
315  break;
316  case CHIP_VERDE:
317  chip_name = "VERDE";
318  rlc_chip_name = "VERDE";
319  pfp_req_size = SI_PFP_UCODE_SIZE * 4;
320  me_req_size = SI_PM4_UCODE_SIZE * 4;
321  ce_req_size = SI_CE_UCODE_SIZE * 4;
322  rlc_req_size = SI_RLC_UCODE_SIZE * 4;
323  mc_req_size = SI_MC_UCODE_SIZE * 4;
324  break;
325  default: BUG();
326  }
327 
328  DRM_INFO("Loading %s Microcode\n", chip_name);
329 
330  snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
331  err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
332  if (err)
333  goto out;
334  if (rdev->pfp_fw->size != pfp_req_size) {
336  "si_cp: Bogus length %zu in firmware \"%s\"\n",
337  rdev->pfp_fw->size, fw_name);
338  err = -EINVAL;
339  goto out;
340  }
341 
342  snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
343  err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
344  if (err)
345  goto out;
346  if (rdev->me_fw->size != me_req_size) {
348  "si_cp: Bogus length %zu in firmware \"%s\"\n",
349  rdev->me_fw->size, fw_name);
350  err = -EINVAL;
351  }
352 
353  snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
354  err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
355  if (err)
356  goto out;
357  if (rdev->ce_fw->size != ce_req_size) {
359  "si_cp: Bogus length %zu in firmware \"%s\"\n",
360  rdev->ce_fw->size, fw_name);
361  err = -EINVAL;
362  }
363 
364  snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
365  err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
366  if (err)
367  goto out;
368  if (rdev->rlc_fw->size != rlc_req_size) {
370  "si_rlc: Bogus length %zu in firmware \"%s\"\n",
371  rdev->rlc_fw->size, fw_name);
372  err = -EINVAL;
373  }
374 
375  snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
376  err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
377  if (err)
378  goto out;
379  if (rdev->mc_fw->size != mc_req_size) {
381  "si_mc: Bogus length %zu in firmware \"%s\"\n",
382  rdev->mc_fw->size, fw_name);
383  err = -EINVAL;
384  }
385 
386 out:
388 
389  if (err) {
390  if (err != -EINVAL)
392  "si_cp: Failed to load firmware \"%s\"\n",
393  fw_name);
394  release_firmware(rdev->pfp_fw);
395  rdev->pfp_fw = NULL;
396  release_firmware(rdev->me_fw);
397  rdev->me_fw = NULL;
398  release_firmware(rdev->ce_fw);
399  rdev->ce_fw = NULL;
400  release_firmware(rdev->rlc_fw);
401  rdev->rlc_fw = NULL;
402  release_firmware(rdev->mc_fw);
403  rdev->mc_fw = NULL;
404  }
405  return err;
406 }
407 
408 /* watermark setup */
409 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
410  struct radeon_crtc *radeon_crtc,
411  struct drm_display_mode *mode,
412  struct drm_display_mode *other_mode)
413 {
414  u32 tmp;
415  /*
416  * Line Buffer Setup
417  * There are 3 line buffers, each one shared by 2 display controllers.
418  * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
419  * the display controllers. The paritioning is done via one of four
420  * preset allocations specified in bits 21:20:
421  * 0 - half lb
422  * 2 - whole lb, other crtc must be disabled
423  */
424  /* this can get tricky if we have two large displays on a paired group
425  * of crtcs. Ideally for multiple large displays we'd assign them to
426  * non-linked crtcs for maximum line buffer allocation.
427  */
428  if (radeon_crtc->base.enabled && mode) {
429  if (other_mode)
430  tmp = 0; /* 1/2 */
431  else
432  tmp = 2; /* whole */
433  } else
434  tmp = 0;
435 
436  WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
437  DC_LB_MEMORY_CONFIG(tmp));
438 
439  if (radeon_crtc->base.enabled && mode) {
440  switch (tmp) {
441  case 0:
442  default:
443  return 4096 * 2;
444  case 2:
445  return 8192 * 2;
446  }
447  }
448 
449  /* controller not enabled, so no lb used */
450  return 0;
451 }
452 
453 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
454 {
455  u32 tmp = RREG32(MC_SHARED_CHMAP);
456 
457  switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
458  case 0:
459  default:
460  return 1;
461  case 1:
462  return 2;
463  case 2:
464  return 4;
465  case 3:
466  return 8;
467  case 4:
468  return 3;
469  case 5:
470  return 6;
471  case 6:
472  return 10;
473  case 7:
474  return 12;
475  case 8:
476  return 16;
477  }
478 }
479 
481  u32 dram_channels; /* number of dram channels */
482  u32 yclk; /* bandwidth per dram data pin in kHz */
483  u32 sclk; /* engine clock in kHz */
484  u32 disp_clk; /* display clock in kHz */
485  u32 src_width; /* viewport width */
486  u32 active_time; /* active display time in ns */
487  u32 blank_time; /* blank time in ns */
488  bool interlaced; /* mode is interlaced */
489  fixed20_12 vsc; /* vertical scale ratio */
490  u32 num_heads; /* number of active crtcs */
491  u32 bytes_per_pixel; /* bytes per pixel display + overlay */
492  u32 lb_size; /* line buffer allocated to pipe */
493  u32 vtaps; /* vertical scaler taps */
494 };
495 
496 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
497 {
498  /* Calculate raw DRAM Bandwidth */
499  fixed20_12 dram_efficiency; /* 0.7 */
500  fixed20_12 yclk, dram_channels, bandwidth;
501  fixed20_12 a;
502 
503  a.full = dfixed_const(1000);
504  yclk.full = dfixed_const(wm->yclk);
505  yclk.full = dfixed_div(yclk, a);
506  dram_channels.full = dfixed_const(wm->dram_channels * 4);
507  a.full = dfixed_const(10);
508  dram_efficiency.full = dfixed_const(7);
509  dram_efficiency.full = dfixed_div(dram_efficiency, a);
510  bandwidth.full = dfixed_mul(dram_channels, yclk);
511  bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
512 
513  return dfixed_trunc(bandwidth);
514 }
515 
516 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
517 {
518  /* Calculate DRAM Bandwidth and the part allocated to display. */
519  fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
520  fixed20_12 yclk, dram_channels, bandwidth;
521  fixed20_12 a;
522 
523  a.full = dfixed_const(1000);
524  yclk.full = dfixed_const(wm->yclk);
525  yclk.full = dfixed_div(yclk, a);
526  dram_channels.full = dfixed_const(wm->dram_channels * 4);
527  a.full = dfixed_const(10);
528  disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
529  disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
530  bandwidth.full = dfixed_mul(dram_channels, yclk);
531  bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
532 
533  return dfixed_trunc(bandwidth);
534 }
535 
536 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
537 {
538  /* Calculate the display Data return Bandwidth */
539  fixed20_12 return_efficiency; /* 0.8 */
541  fixed20_12 a;
542 
543  a.full = dfixed_const(1000);
544  sclk.full = dfixed_const(wm->sclk);
545  sclk.full = dfixed_div(sclk, a);
546  a.full = dfixed_const(10);
547  return_efficiency.full = dfixed_const(8);
548  return_efficiency.full = dfixed_div(return_efficiency, a);
549  a.full = dfixed_const(32);
550  bandwidth.full = dfixed_mul(a, sclk);
551  bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
552 
553  return dfixed_trunc(bandwidth);
554 }
555 
556 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
557 {
558  return 32;
559 }
560 
561 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
562 {
563  /* Calculate the DMIF Request Bandwidth */
564  fixed20_12 disp_clk_request_efficiency; /* 0.8 */
565  fixed20_12 disp_clk, sclk, bandwidth;
566  fixed20_12 a, b1, b2;
567  u32 min_bandwidth;
568 
569  a.full = dfixed_const(1000);
570  disp_clk.full = dfixed_const(wm->disp_clk);
571  disp_clk.full = dfixed_div(disp_clk, a);
572  a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
573  b1.full = dfixed_mul(a, disp_clk);
574 
575  a.full = dfixed_const(1000);
576  sclk.full = dfixed_const(wm->sclk);
577  sclk.full = dfixed_div(sclk, a);
578  a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
579  b2.full = dfixed_mul(a, sclk);
580 
581  a.full = dfixed_const(10);
582  disp_clk_request_efficiency.full = dfixed_const(8);
583  disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
584 
585  min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
586 
587  a.full = dfixed_const(min_bandwidth);
588  bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
589 
590  return dfixed_trunc(bandwidth);
591 }
592 
593 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
594 {
595  /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
596  u32 dram_bandwidth = dce6_dram_bandwidth(wm);
597  u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
598  u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
599 
600  return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
601 }
602 
603 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
604 {
605  /* Calculate the display mode Average Bandwidth
606  * DisplayMode should contain the source and destination dimensions,
607  * timing, etc.
608  */
609  fixed20_12 bpp;
610  fixed20_12 line_time;
611  fixed20_12 src_width;
613  fixed20_12 a;
614 
615  a.full = dfixed_const(1000);
616  line_time.full = dfixed_const(wm->active_time + wm->blank_time);
617  line_time.full = dfixed_div(line_time, a);
618  bpp.full = dfixed_const(wm->bytes_per_pixel);
619  src_width.full = dfixed_const(wm->src_width);
620  bandwidth.full = dfixed_mul(src_width, bpp);
621  bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
622  bandwidth.full = dfixed_div(bandwidth, line_time);
623 
624  return dfixed_trunc(bandwidth);
625 }
626 
627 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
628 {
629  /* First calcualte the latency in ns */
630  u32 mc_latency = 2000; /* 2000 ns. */
631  u32 available_bandwidth = dce6_available_bandwidth(wm);
632  u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
633  u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
634  u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
635  u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
636  (wm->num_heads * cursor_line_pair_return_time);
637  u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
638  u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
639  u32 tmp, dmif_size = 12288;
640  fixed20_12 a, b, c;
641 
642  if (wm->num_heads == 0)
643  return 0;
644 
645  a.full = dfixed_const(2);
646  b.full = dfixed_const(1);
647  if ((wm->vsc.full > a.full) ||
648  ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
649  (wm->vtaps >= 5) ||
650  ((wm->vsc.full >= a.full) && wm->interlaced))
651  max_src_lines_per_dst_line = 4;
652  else
653  max_src_lines_per_dst_line = 2;
654 
655  a.full = dfixed_const(available_bandwidth);
656  b.full = dfixed_const(wm->num_heads);
657  a.full = dfixed_div(a, b);
658 
659  b.full = dfixed_const(mc_latency + 512);
660  c.full = dfixed_const(wm->disp_clk);
661  b.full = dfixed_div(b, c);
662 
663  c.full = dfixed_const(dmif_size);
664  b.full = dfixed_div(c, b);
665 
666  tmp = min(dfixed_trunc(a), dfixed_trunc(b));
667 
668  b.full = dfixed_const(1000);
669  c.full = dfixed_const(wm->disp_clk);
670  b.full = dfixed_div(c, b);
672  b.full = dfixed_mul(b, c);
673 
674  lb_fill_bw = min(tmp, dfixed_trunc(b));
675 
676  a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
677  b.full = dfixed_const(1000);
678  c.full = dfixed_const(lb_fill_bw);
679  b.full = dfixed_div(c, b);
680  a.full = dfixed_div(a, b);
681  line_fill_time = dfixed_trunc(a);
682 
683  if (line_fill_time < wm->active_time)
684  return latency;
685  else
686  return latency + (line_fill_time - wm->active_time);
687 
688 }
689 
690 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
691 {
692  if (dce6_average_bandwidth(wm) <=
693  (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
694  return true;
695  else
696  return false;
697 };
698 
699 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
700 {
701  if (dce6_average_bandwidth(wm) <=
702  (dce6_available_bandwidth(wm) / wm->num_heads))
703  return true;
704  else
705  return false;
706 };
707 
708 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
709 {
710  u32 lb_partitions = wm->lb_size / wm->src_width;
711  u32 line_time = wm->active_time + wm->blank_time;
712  u32 latency_tolerant_lines;
713  u32 latency_hiding;
714  fixed20_12 a;
715 
716  a.full = dfixed_const(1);
717  if (wm->vsc.full > a.full)
718  latency_tolerant_lines = 1;
719  else {
720  if (lb_partitions <= (wm->vtaps + 1))
721  latency_tolerant_lines = 1;
722  else
723  latency_tolerant_lines = 2;
724  }
725 
726  latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
727 
728  if (dce6_latency_watermark(wm) <= latency_hiding)
729  return true;
730  else
731  return false;
732 }
733 
734 static void dce6_program_watermarks(struct radeon_device *rdev,
735  struct radeon_crtc *radeon_crtc,
736  u32 lb_size, u32 num_heads)
737 {
738  struct drm_display_mode *mode = &radeon_crtc->base.mode;
739  struct dce6_wm_params wm;
740  u32 pixel_period;
741  u32 line_time = 0;
742  u32 latency_watermark_a = 0, latency_watermark_b = 0;
743  u32 priority_a_mark = 0, priority_b_mark = 0;
744  u32 priority_a_cnt = PRIORITY_OFF;
745  u32 priority_b_cnt = PRIORITY_OFF;
746  u32 tmp, arb_control3;
747  fixed20_12 a, b, c;
748 
749  if (radeon_crtc->base.enabled && num_heads && mode) {
750  pixel_period = 1000000 / (u32)mode->clock;
751  line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
752  priority_a_cnt = 0;
753  priority_b_cnt = 0;
754 
755  wm.yclk = rdev->pm.current_mclk * 10;
756  wm.sclk = rdev->pm.current_sclk * 10;
757  wm.disp_clk = mode->clock;
758  wm.src_width = mode->crtc_hdisplay;
759  wm.active_time = mode->crtc_hdisplay * pixel_period;
760  wm.blank_time = line_time - wm.active_time;
761  wm.interlaced = false;
762  if (mode->flags & DRM_MODE_FLAG_INTERLACE)
763  wm.interlaced = true;
764  wm.vsc = radeon_crtc->vsc;
765  wm.vtaps = 1;
766  if (radeon_crtc->rmx_type != RMX_OFF)
767  wm.vtaps = 2;
768  wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
769  wm.lb_size = lb_size;
770  if (rdev->family == CHIP_ARUBA)
772  else
773  wm.dram_channels = si_get_number_of_dram_channels(rdev);
774  wm.num_heads = num_heads;
775 
776  /* set for high clocks */
777  latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
778  /* set for low clocks */
779  /* wm.yclk = low clk; wm.sclk = low clk */
780  latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
781 
782  /* possibly force display priority to high */
783  /* should really do this at mode validation time... */
784  if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
785  !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
786  !dce6_check_latency_hiding(&wm) ||
787  (rdev->disp_priority == 2)) {
788  DRM_DEBUG_KMS("force priority to high\n");
789  priority_a_cnt |= PRIORITY_ALWAYS_ON;
790  priority_b_cnt |= PRIORITY_ALWAYS_ON;
791  }
792 
793  a.full = dfixed_const(1000);
794  b.full = dfixed_const(mode->clock);
795  b.full = dfixed_div(b, a);
796  c.full = dfixed_const(latency_watermark_a);
797  c.full = dfixed_mul(c, b);
798  c.full = dfixed_mul(c, radeon_crtc->hsc);
799  c.full = dfixed_div(c, a);
800  a.full = dfixed_const(16);
801  c.full = dfixed_div(c, a);
802  priority_a_mark = dfixed_trunc(c);
803  priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
804 
805  a.full = dfixed_const(1000);
806  b.full = dfixed_const(mode->clock);
807  b.full = dfixed_div(b, a);
808  c.full = dfixed_const(latency_watermark_b);
809  c.full = dfixed_mul(c, b);
810  c.full = dfixed_mul(c, radeon_crtc->hsc);
811  c.full = dfixed_div(c, a);
812  a.full = dfixed_const(16);
813  c.full = dfixed_div(c, a);
814  priority_b_mark = dfixed_trunc(c);
815  priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
816  }
817 
818  /* select wm A */
819  arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
820  tmp = arb_control3;
821  tmp &= ~LATENCY_WATERMARK_MASK(3);
822  tmp |= LATENCY_WATERMARK_MASK(1);
823  WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
825  (LATENCY_LOW_WATERMARK(latency_watermark_a) |
826  LATENCY_HIGH_WATERMARK(line_time)));
827  /* select wm B */
828  tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
829  tmp &= ~LATENCY_WATERMARK_MASK(3);
830  tmp |= LATENCY_WATERMARK_MASK(2);
831  WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
833  (LATENCY_LOW_WATERMARK(latency_watermark_b) |
834  LATENCY_HIGH_WATERMARK(line_time)));
835  /* restore original selection */
836  WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
837 
838  /* write the priority marks */
839  WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
840  WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
841 
842 }
843 
845 {
846  struct drm_display_mode *mode0 = NULL;
847  struct drm_display_mode *mode1 = NULL;
848  u32 num_heads = 0, lb_size;
849  int i;
850 
852 
853  for (i = 0; i < rdev->num_crtc; i++) {
854  if (rdev->mode_info.crtcs[i]->base.enabled)
855  num_heads++;
856  }
857  for (i = 0; i < rdev->num_crtc; i += 2) {
858  mode0 = &rdev->mode_info.crtcs[i]->base.mode;
859  mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
860  lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
861  dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
862  lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
863  dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
864  }
865 }
866 
867 /*
868  * Core functions
869  */
870 static void si_tiling_mode_table_init(struct radeon_device *rdev)
871 {
872  const u32 num_tile_mode_states = 32;
873  u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
874 
875  switch (rdev->config.si.mem_row_size_in_kb) {
876  case 1:
877  split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
878  break;
879  case 2:
880  default:
881  split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
882  break;
883  case 4:
884  split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
885  break;
886  }
887 
888  if ((rdev->family == CHIP_TAHITI) ||
889  (rdev->family == CHIP_PITCAIRN)) {
890  for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
891  switch (reg_offset) {
892  case 0: /* non-AA compressed depth or any compressed stencil */
893  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
901  break;
902  case 1: /* 2xAA/4xAA compressed depth only */
903  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
911  break;
912  case 2: /* 8xAA compressed depth only */
913  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
921  break;
922  case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
923  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
931  break;
932  case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
933  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
941  break;
942  case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
943  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946  TILE_SPLIT(split_equal_to_row_size) |
951  break;
952  case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
953  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956  TILE_SPLIT(split_equal_to_row_size) |
961  break;
962  case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
963  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
966  TILE_SPLIT(split_equal_to_row_size) |
971  break;
972  case 8: /* 1D and 1D Array Surfaces */
973  gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
981  break;
982  case 9: /* Displayable maps. */
983  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
991  break;
992  case 10: /* Display 8bpp. */
993  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1001  break;
1002  case 11: /* Display 16bpp. */
1003  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1011  break;
1012  case 12: /* Display 32bpp. */
1013  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1021  break;
1022  case 13: /* Thin. */
1023  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1031  break;
1032  case 14: /* Thin 8 bpp. */
1033  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1041  break;
1042  case 15: /* Thin 16 bpp. */
1043  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1051  break;
1052  case 16: /* Thin 32 bpp. */
1053  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1061  break;
1062  case 17: /* Thin 64 bpp. */
1063  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1066  TILE_SPLIT(split_equal_to_row_size) |
1071  break;
1072  case 21: /* 8 bpp PRT. */
1073  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1081  break;
1082  case 22: /* 16 bpp PRT */
1083  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1091  break;
1092  case 23: /* 32 bpp PRT */
1093  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1101  break;
1102  case 24: /* 64 bpp PRT */
1103  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1111  break;
1112  case 25: /* 128 bpp PRT */
1113  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1121  break;
1122  default:
1123  gb_tile_moden = 0;
1124  break;
1125  }
1126  WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1127  }
1128  } else if (rdev->family == CHIP_VERDE) {
1129  for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1130  switch (reg_offset) {
1131  case 0: /* non-AA compressed depth or any compressed stencil */
1132  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1140  break;
1141  case 1: /* 2xAA/4xAA compressed depth only */
1142  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1150  break;
1151  case 2: /* 8xAA compressed depth only */
1152  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1160  break;
1161  case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1162  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170  break;
1171  case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1172  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1180  break;
1181  case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1182  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1185  TILE_SPLIT(split_equal_to_row_size) |
1190  break;
1191  case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1192  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1195  TILE_SPLIT(split_equal_to_row_size) |
1200  break;
1201  case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1202  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1205  TILE_SPLIT(split_equal_to_row_size) |
1210  break;
1211  case 8: /* 1D and 1D Array Surfaces */
1212  gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1220  break;
1221  case 9: /* Displayable maps. */
1222  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1230  break;
1231  case 10: /* Display 8bpp. */
1232  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1240  break;
1241  case 11: /* Display 16bpp. */
1242  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1250  break;
1251  case 12: /* Display 32bpp. */
1252  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1260  break;
1261  case 13: /* Thin. */
1262  gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1270  break;
1271  case 14: /* Thin 8 bpp. */
1272  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1280  break;
1281  case 15: /* Thin 16 bpp. */
1282  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290  break;
1291  case 16: /* Thin 32 bpp. */
1292  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1300  break;
1301  case 17: /* Thin 64 bpp. */
1302  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1305  TILE_SPLIT(split_equal_to_row_size) |
1310  break;
1311  case 21: /* 8 bpp PRT. */
1312  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1320  break;
1321  case 22: /* 16 bpp PRT */
1322  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1330  break;
1331  case 23: /* 32 bpp PRT */
1332  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1340  break;
1341  case 24: /* 64 bpp PRT */
1342  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1350  break;
1351  case 25: /* 128 bpp PRT */
1352  gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1360  break;
1361  default:
1362  gb_tile_moden = 0;
1363  break;
1364  }
1365  WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1366  }
1367  } else
1368  DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1369 }
1370 
1371 static void si_select_se_sh(struct radeon_device *rdev,
1372  u32 se_num, u32 sh_num)
1373 {
1375 
1376  if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1378  else if (se_num == 0xffffffff)
1379  data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380  else if (sh_num == 0xffffffff)
1381  data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382  else
1383  data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384  WREG32(GRBM_GFX_INDEX, data);
1385 }
1386 
1387 static u32 si_create_bitmask(u32 bit_width)
1388 {
1389  u32 i, mask = 0;
1390 
1391  for (i = 0; i < bit_width; i++) {
1392  mask <<= 1;
1393  mask |= 1;
1394  }
1395  return mask;
1396 }
1397 
1398 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399 {
1400  u32 data, mask;
1401 
1403  if (data & 1)
1404  data &= INACTIVE_CUS_MASK;
1405  else
1406  data = 0;
1408 
1409  data >>= INACTIVE_CUS_SHIFT;
1410 
1411  mask = si_create_bitmask(cu_per_sh);
1412 
1413  return ~data & mask;
1414 }
1415 
1416 static void si_setup_spi(struct radeon_device *rdev,
1417  u32 se_num, u32 sh_per_se,
1418  u32 cu_per_sh)
1419 {
1420  int i, j, k;
1421  u32 data, mask, active_cu;
1422 
1423  for (i = 0; i < se_num; i++) {
1424  for (j = 0; j < sh_per_se; j++) {
1425  si_select_se_sh(rdev, i, j);
1427  active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428 
1429  mask = 1;
1430  for (k = 0; k < 16; k++) {
1431  mask <<= k;
1432  if (active_cu & mask) {
1433  data &= ~mask;
1435  break;
1436  }
1437  }
1438  }
1439  }
1440  si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441 }
1442 
1443 static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444  u32 max_rb_num, u32 se_num,
1445  u32 sh_per_se)
1446 {
1447  u32 data, mask;
1448 
1449  data = RREG32(CC_RB_BACKEND_DISABLE);
1450  if (data & 1)
1451  data &= BACKEND_DISABLE_MASK;
1452  else
1453  data = 0;
1455 
1456  data >>= BACKEND_DISABLE_SHIFT;
1457 
1458  mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459 
1460  return data & mask;
1461 }
1462 
1463 static void si_setup_rb(struct radeon_device *rdev,
1464  u32 se_num, u32 sh_per_se,
1465  u32 max_rb_num)
1466 {
1467  int i, j;
1468  u32 data, mask;
1469  u32 disabled_rbs = 0;
1470  u32 enabled_rbs = 0;
1471 
1472  for (i = 0; i < se_num; i++) {
1473  for (j = 0; j < sh_per_se; j++) {
1474  si_select_se_sh(rdev, i, j);
1475  data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476  disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477  }
1478  }
1479  si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480 
1481  mask = 1;
1482  for (i = 0; i < max_rb_num; i++) {
1483  if (!(disabled_rbs & mask))
1484  enabled_rbs |= mask;
1485  mask <<= 1;
1486  }
1487 
1488  for (i = 0; i < se_num; i++) {
1489  si_select_se_sh(rdev, i, 0xffffffff);
1490  data = 0;
1491  for (j = 0; j < sh_per_se; j++) {
1492  switch (enabled_rbs & 3) {
1493  case 1:
1494  data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495  break;
1496  case 2:
1497  data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498  break;
1499  case 3:
1500  default:
1501  data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502  break;
1503  }
1504  enabled_rbs >>= 2;
1505  }
1506  WREG32(PA_SC_RASTER_CONFIG, data);
1507  }
1508  si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509 }
1510 
1511 static void si_gpu_init(struct radeon_device *rdev)
1512 {
1513  u32 gb_addr_config = 0;
1514  u32 mc_shared_chmap, mc_arb_ramcfg;
1515  u32 sx_debug_1;
1516  u32 hdp_host_path_cntl;
1517  u32 tmp;
1518  int i, j;
1519 
1520  switch (rdev->family) {
1521  case CHIP_TAHITI:
1522  rdev->config.si.max_shader_engines = 2;
1523  rdev->config.si.max_tile_pipes = 12;
1524  rdev->config.si.max_cu_per_sh = 8;
1525  rdev->config.si.max_sh_per_se = 2;
1526  rdev->config.si.max_backends_per_se = 4;
1527  rdev->config.si.max_texture_channel_caches = 12;
1528  rdev->config.si.max_gprs = 256;
1529  rdev->config.si.max_gs_threads = 32;
1530  rdev->config.si.max_hw_contexts = 8;
1531 
1532  rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1533  rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1534  rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1535  rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536  gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1537  break;
1538  case CHIP_PITCAIRN:
1539  rdev->config.si.max_shader_engines = 2;
1540  rdev->config.si.max_tile_pipes = 8;
1541  rdev->config.si.max_cu_per_sh = 5;
1542  rdev->config.si.max_sh_per_se = 2;
1543  rdev->config.si.max_backends_per_se = 4;
1544  rdev->config.si.max_texture_channel_caches = 8;
1545  rdev->config.si.max_gprs = 256;
1546  rdev->config.si.max_gs_threads = 32;
1547  rdev->config.si.max_hw_contexts = 8;
1548 
1549  rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1550  rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1551  rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1552  rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553  gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1554  break;
1555  case CHIP_VERDE:
1556  default:
1557  rdev->config.si.max_shader_engines = 1;
1558  rdev->config.si.max_tile_pipes = 4;
1559  rdev->config.si.max_cu_per_sh = 2;
1560  rdev->config.si.max_sh_per_se = 2;
1561  rdev->config.si.max_backends_per_se = 4;
1562  rdev->config.si.max_texture_channel_caches = 4;
1563  rdev->config.si.max_gprs = 256;
1564  rdev->config.si.max_gs_threads = 32;
1565  rdev->config.si.max_hw_contexts = 8;
1566 
1567  rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1568  rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1569  rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1570  rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571  gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1572  break;
1573  }
1574 
1575  /* Initialize HDP */
1576  for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1577  WREG32((0x2c14 + j), 0x00000000);
1578  WREG32((0x2c18 + j), 0x00000000);
1579  WREG32((0x2c1c + j), 0x00000000);
1580  WREG32((0x2c20 + j), 0x00000000);
1581  WREG32((0x2c24 + j), 0x00000000);
1582  }
1583 
1585 
1587 
1589 
1590  mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1591  mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1592 
1593  rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1594  rdev->config.si.mem_max_burst_length_bytes = 256;
1595  tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1596  rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1597  if (rdev->config.si.mem_row_size_in_kb > 4)
1598  rdev->config.si.mem_row_size_in_kb = 4;
1599  /* XXX use MC settings? */
1600  rdev->config.si.shader_engine_tile_size = 32;
1601  rdev->config.si.num_gpus = 1;
1602  rdev->config.si.multi_gpu_tile_size = 64;
1603 
1604  /* fix up row size */
1605  gb_addr_config &= ~ROW_SIZE_MASK;
1606  switch (rdev->config.si.mem_row_size_in_kb) {
1607  case 1:
1608  default:
1609  gb_addr_config |= ROW_SIZE(0);
1610  break;
1611  case 2:
1612  gb_addr_config |= ROW_SIZE(1);
1613  break;
1614  case 4:
1615  gb_addr_config |= ROW_SIZE(2);
1616  break;
1617  }
1618 
1619  /* setup tiling info dword. gb_addr_config is not adequate since it does
1620  * not have bank info, so create a custom tiling dword.
1621  * bits 3:0 num_pipes
1622  * bits 7:4 num_banks
1623  * bits 11:8 group_size
1624  * bits 15:12 row_size
1625  */
1626  rdev->config.si.tile_config = 0;
1627  switch (rdev->config.si.num_tile_pipes) {
1628  case 1:
1629  rdev->config.si.tile_config |= (0 << 0);
1630  break;
1631  case 2:
1632  rdev->config.si.tile_config |= (1 << 0);
1633  break;
1634  case 4:
1635  rdev->config.si.tile_config |= (2 << 0);
1636  break;
1637  case 8:
1638  default:
1639  /* XXX what about 12? */
1640  rdev->config.si.tile_config |= (3 << 0);
1641  break;
1642  }
1643  switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1644  case 0: /* four banks */
1645  rdev->config.si.tile_config |= 0 << 4;
1646  break;
1647  case 1: /* eight banks */
1648  rdev->config.si.tile_config |= 1 << 4;
1649  break;
1650  case 2: /* sixteen banks */
1651  default:
1652  rdev->config.si.tile_config |= 2 << 4;
1653  break;
1654  }
1655  rdev->config.si.tile_config |=
1656  ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1657  rdev->config.si.tile_config |=
1658  ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1659 
1660  WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661  WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662  WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1663 
1664  si_tiling_mode_table_init(rdev);
1665 
1666  si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1667  rdev->config.si.max_sh_per_se,
1668  rdev->config.si.max_backends_per_se);
1669 
1670  si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1671  rdev->config.si.max_sh_per_se,
1672  rdev->config.si.max_cu_per_sh);
1673 
1674 
1675  /* set HW defaults for 3D engine */
1677  ROQ_IB2_START(0x2b)));
1679 
1680  sx_debug_1 = RREG32(SX_DEBUG_1);
1681  WREG32(SX_DEBUG_1, sx_debug_1);
1682 
1684 
1685  WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1686  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1687  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1688  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1689 
1691 
1692  WREG32(CP_PERFMON_CNTL, 0);
1693 
1694  WREG32(SQ_CONFIG, 0);
1695 
1697  FORCE_EOV_MAX_REZ_CNT(255)));
1698 
1701 
1704 
1713 
1714  tmp = RREG32(HDP_MISC_CNTL);
1716  WREG32(HDP_MISC_CNTL, tmp);
1717 
1718  hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1719  WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1720 
1722 
1723  udelay(50);
1724 }
1725 
1726 /*
1727  * GPU scratch registers helpers function.
1728  */
1729 static void si_scratch_init(struct radeon_device *rdev)
1730 {
1731  int i;
1732 
1733  rdev->scratch.num_reg = 7;
1734  rdev->scratch.reg_base = SCRATCH_REG0;
1735  for (i = 0; i < rdev->scratch.num_reg; i++) {
1736  rdev->scratch.free[i] = true;
1737  rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1738  }
1739 }
1740 
1742  struct radeon_fence *fence)
1743 {
1744  struct radeon_ring *ring = &rdev->ring[fence->ring];
1745  u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1746 
1747  /* flush read cache over gart */
1750  radeon_ring_write(ring, 0);
1756  radeon_ring_write(ring, 0xFFFFFFFF);
1757  radeon_ring_write(ring, 0);
1758  radeon_ring_write(ring, 10); /* poll interval */
1759  /* EVENT_WRITE_EOP - flush caches, send int */
1762  radeon_ring_write(ring, addr & 0xffffffff);
1763  radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1764  radeon_ring_write(ring, fence->seq);
1765  radeon_ring_write(ring, 0);
1766 }
1767 
1768 /*
1769  * IB stuff
1770  */
1771 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1772 {
1773  struct radeon_ring *ring = &rdev->ring[ib->ring];
1774  u32 header;
1775 
1776  if (ib->is_const_ib) {
1777  /* set switch buffer packet before const IB */
1779  radeon_ring_write(ring, 0);
1780 
1782  } else {
1783  u32 next_rptr;
1784  if (ring->rptr_save_reg) {
1785  next_rptr = ring->wptr + 3 + 4 + 8;
1787  radeon_ring_write(ring, ((ring->rptr_save_reg -
1789  radeon_ring_write(ring, next_rptr);
1790  } else if (rdev->wb.enabled) {
1791  next_rptr = ring->wptr + 5 + 4 + 8;
1793  radeon_ring_write(ring, (1 << 8));
1794  radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1795  radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
1796  radeon_ring_write(ring, next_rptr);
1797  }
1798 
1799  header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1800  }
1801 
1802  radeon_ring_write(ring, header);
1803  radeon_ring_write(ring,
1804 #ifdef __BIG_ENDIAN
1805  (2 << 0) |
1806 #endif
1807  (ib->gpu_addr & 0xFFFFFFFC));
1808  radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1809  radeon_ring_write(ring, ib->length_dw |
1810  (ib->vm ? (ib->vm->id << 24) : 0));
1811 
1812  if (!ib->is_const_ib) {
1813  /* flush read cache over gart for this vmid */
1816  radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1822  radeon_ring_write(ring, 0xFFFFFFFF);
1823  radeon_ring_write(ring, 0);
1824  radeon_ring_write(ring, 10); /* poll interval */
1825  }
1826 }
1827 
1828 /*
1829  * CP.
1830  */
1831 static void si_cp_enable(struct radeon_device *rdev, bool enable)
1832 {
1833  if (enable)
1834  WREG32(CP_ME_CNTL, 0);
1835  else {
1836  radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1838  WREG32(SCRATCH_UMSK, 0);
1839  }
1840  udelay(50);
1841 }
1842 
1843 static int si_cp_load_microcode(struct radeon_device *rdev)
1844 {
1845  const __be32 *fw_data;
1846  int i;
1847 
1848  if (!rdev->me_fw || !rdev->pfp_fw)
1849  return -EINVAL;
1850 
1851  si_cp_enable(rdev, false);
1852 
1853  /* PFP */
1854  fw_data = (const __be32 *)rdev->pfp_fw->data;
1856  for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1857  WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1859 
1860  /* CE */
1861  fw_data = (const __be32 *)rdev->ce_fw->data;
1863  for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1864  WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1866 
1867  /* ME */
1868  fw_data = (const __be32 *)rdev->me_fw->data;
1869  WREG32(CP_ME_RAM_WADDR, 0);
1870  for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1871  WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1872  WREG32(CP_ME_RAM_WADDR, 0);
1873 
1876  WREG32(CP_ME_RAM_WADDR, 0);
1877  WREG32(CP_ME_RAM_RADDR, 0);
1878  return 0;
1879 }
1880 
1881 static int si_cp_start(struct radeon_device *rdev)
1882 {
1883  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1884  int r, i;
1885 
1886  r = radeon_ring_lock(rdev, ring, 7 + 4);
1887  if (r) {
1888  DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1889  return r;
1890  }
1891  /* init the CP */
1893  radeon_ring_write(ring, 0x1);
1894  radeon_ring_write(ring, 0x0);
1895  radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1897  radeon_ring_write(ring, 0);
1898  radeon_ring_write(ring, 0);
1899 
1900  /* init the CE partitions */
1903  radeon_ring_write(ring, 0xc000);
1904  radeon_ring_write(ring, 0xe000);
1905  radeon_ring_unlock_commit(rdev, ring);
1906 
1907  si_cp_enable(rdev, true);
1908 
1909  r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1910  if (r) {
1911  DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1912  return r;
1913  }
1914 
1915  /* setup clear context state */
1918 
1919  for (i = 0; i < si_default_size; i++)
1921 
1924 
1925  /* set clear context state */
1927  radeon_ring_write(ring, 0);
1928 
1930  radeon_ring_write(ring, 0x00000316);
1931  radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1932  radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1933 
1934  radeon_ring_unlock_commit(rdev, ring);
1935 
1937  ring = &rdev->ring[i];
1938  r = radeon_ring_lock(rdev, ring, 2);
1939 
1940  /* clear the compute context state */
1942  radeon_ring_write(ring, 0);
1943 
1944  radeon_ring_unlock_commit(rdev, ring);
1945  }
1946 
1947  return 0;
1948 }
1949 
1950 static void si_cp_fini(struct radeon_device *rdev)
1951 {
1952  struct radeon_ring *ring;
1953  si_cp_enable(rdev, false);
1954 
1955  ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1956  radeon_ring_fini(rdev, ring);
1957  radeon_scratch_free(rdev, ring->rptr_save_reg);
1958 
1959  ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1960  radeon_ring_fini(rdev, ring);
1961  radeon_scratch_free(rdev, ring->rptr_save_reg);
1962 
1963  ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
1964  radeon_ring_fini(rdev, ring);
1965  radeon_scratch_free(rdev, ring->rptr_save_reg);
1966 }
1967 
1968 static int si_cp_resume(struct radeon_device *rdev)
1969 {
1970  struct radeon_ring *ring;
1971  u32 tmp;
1972  u32 rb_bufsz;
1973  int r;
1974 
1975  /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1977  SOFT_RESET_PA |
1978  SOFT_RESET_VGT |
1979  SOFT_RESET_SPI |
1980  SOFT_RESET_SX));
1982  mdelay(15);
1983  WREG32(GRBM_SOFT_RESET, 0);
1985 
1986  WREG32(CP_SEM_WAIT_TIMER, 0x0);
1988 
1989  /* Set the write pointer delay */
1991 
1992  WREG32(CP_DEBUG, 0);
1993  WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1994 
1995  /* ring 0 - compute and gfx */
1996  /* Set ring buffer size */
1997  ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1998  rb_bufsz = drm_order(ring->ring_size / 8);
1999  tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2000 #ifdef __BIG_ENDIAN
2001  tmp |= BUF_SWAP_32BIT;
2002 #endif
2003  WREG32(CP_RB0_CNTL, tmp);
2004 
2005  /* Initialize the ring buffer's read and write pointers */
2007  ring->wptr = 0;
2008  WREG32(CP_RB0_WPTR, ring->wptr);
2009 
2010  /* set the wb address wether it's enabled or not */
2011  WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2013 
2014  if (rdev->wb.enabled)
2015  WREG32(SCRATCH_UMSK, 0xff);
2016  else {
2017  tmp |= RB_NO_UPDATE;
2018  WREG32(SCRATCH_UMSK, 0);
2019  }
2020 
2021  mdelay(1);
2022  WREG32(CP_RB0_CNTL, tmp);
2023 
2024  WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2025 
2026  ring->rptr = RREG32(CP_RB0_RPTR);
2027 
2028  /* ring1 - compute only */
2029  /* Set ring buffer size */
2030  ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2031  rb_bufsz = drm_order(ring->ring_size / 8);
2032  tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2033 #ifdef __BIG_ENDIAN
2034  tmp |= BUF_SWAP_32BIT;
2035 #endif
2036  WREG32(CP_RB1_CNTL, tmp);
2037 
2038  /* Initialize the ring buffer's read and write pointers */
2040  ring->wptr = 0;
2041  WREG32(CP_RB1_WPTR, ring->wptr);
2042 
2043  /* set the wb address wether it's enabled or not */
2044  WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2046 
2047  mdelay(1);
2048  WREG32(CP_RB1_CNTL, tmp);
2049 
2050  WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2051 
2052  ring->rptr = RREG32(CP_RB1_RPTR);
2053 
2054  /* ring2 - compute only */
2055  /* Set ring buffer size */
2056  ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2057  rb_bufsz = drm_order(ring->ring_size / 8);
2058  tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2059 #ifdef __BIG_ENDIAN
2060  tmp |= BUF_SWAP_32BIT;
2061 #endif
2062  WREG32(CP_RB2_CNTL, tmp);
2063 
2064  /* Initialize the ring buffer's read and write pointers */
2066  ring->wptr = 0;
2067  WREG32(CP_RB2_WPTR, ring->wptr);
2068 
2069  /* set the wb address wether it's enabled or not */
2070  WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2072 
2073  mdelay(1);
2074  WREG32(CP_RB2_CNTL, tmp);
2075 
2076  WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2077 
2078  ring->rptr = RREG32(CP_RB2_RPTR);
2079 
2080  /* start the rings */
2081  si_cp_start(rdev);
2082  rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2083  rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2084  rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2086  if (r) {
2087  rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2088  rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2089  rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2090  return r;
2091  }
2093  if (r) {
2094  rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2095  }
2097  if (r) {
2098  rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2099  }
2100 
2101  return 0;
2102 }
2103 
2104 bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2105 {
2106  u32 srbm_status;
2107  u32 grbm_status, grbm_status2;
2108  u32 grbm_status_se0, grbm_status_se1;
2109 
2110  srbm_status = RREG32(SRBM_STATUS);
2111  grbm_status = RREG32(GRBM_STATUS);
2112  grbm_status2 = RREG32(GRBM_STATUS2);
2113  grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2114  grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2115  if (!(grbm_status & GUI_ACTIVE)) {
2117  return false;
2118  }
2119  /* force CP activities */
2120  radeon_ring_force_activity(rdev, ring);
2121  return radeon_ring_test_lockup(rdev, ring);
2122 }
2123 
2124 static int si_gpu_soft_reset(struct radeon_device *rdev)
2125 {
2126  struct evergreen_mc_save save;
2127  u32 grbm_reset = 0;
2128 
2129  if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2130  return 0;
2131 
2132  dev_info(rdev->dev, "GPU softreset \n");
2133  dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2134  RREG32(GRBM_STATUS));
2135  dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2136  RREG32(GRBM_STATUS2));
2137  dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2139  dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2141  dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2142  RREG32(SRBM_STATUS));
2143  evergreen_mc_stop(rdev, &save);
2144  if (radeon_mc_wait_for_idle(rdev)) {
2145  dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2146  }
2147  /* Disable CP parsing/prefetching */
2149 
2150  /* reset all the gfx blocks */
2151  grbm_reset = (SOFT_RESET_CP |
2152  SOFT_RESET_CB |
2153  SOFT_RESET_DB |
2154  SOFT_RESET_GDS |
2155  SOFT_RESET_PA |
2156  SOFT_RESET_SC |
2157  SOFT_RESET_BCI |
2158  SOFT_RESET_SPI |
2159  SOFT_RESET_SX |
2160  SOFT_RESET_TC |
2161  SOFT_RESET_TA |
2162  SOFT_RESET_VGT |
2163  SOFT_RESET_IA);
2164 
2165  dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2166  WREG32(GRBM_SOFT_RESET, grbm_reset);
2168  udelay(50);
2169  WREG32(GRBM_SOFT_RESET, 0);
2171  /* Wait a little for things to settle down */
2172  udelay(50);
2173  dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
2174  RREG32(GRBM_STATUS));
2175  dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
2176  RREG32(GRBM_STATUS2));
2177  dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
2179  dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
2181  dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
2182  RREG32(SRBM_STATUS));
2183  evergreen_mc_resume(rdev, &save);
2184  return 0;
2185 }
2186 
2187 int si_asic_reset(struct radeon_device *rdev)
2188 {
2189  return si_gpu_soft_reset(rdev);
2190 }
2191 
2192 /* MC */
2193 static void si_mc_program(struct radeon_device *rdev)
2194 {
2195  struct evergreen_mc_save save;
2196  u32 tmp;
2197  int i, j;
2198 
2199  /* Initialize HDP */
2200  for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2201  WREG32((0x2c14 + j), 0x00000000);
2202  WREG32((0x2c18 + j), 0x00000000);
2203  WREG32((0x2c1c + j), 0x00000000);
2204  WREG32((0x2c20 + j), 0x00000000);
2205  WREG32((0x2c24 + j), 0x00000000);
2206  }
2208 
2209  evergreen_mc_stop(rdev, &save);
2210  if (radeon_mc_wait_for_idle(rdev)) {
2211  dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2212  }
2213  /* Lockout access through VGA aperture*/
2215  /* Update configuration */
2217  rdev->mc.vram_start >> 12);
2219  rdev->mc.vram_end >> 12);
2221  rdev->vram_scratch.gpu_addr >> 12);
2222  tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2223  tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2224  WREG32(MC_VM_FB_LOCATION, tmp);
2225  /* XXX double check these! */
2226  WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2227  WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2228  WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2229  WREG32(MC_VM_AGP_BASE, 0);
2230  WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2231  WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2232  if (radeon_mc_wait_for_idle(rdev)) {
2233  dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2234  }
2235  evergreen_mc_resume(rdev, &save);
2236  /* we need to own VRAM, so turn off the VGA renderer here
2237  * to stop it overwriting our objects */
2239 }
2240 
2241 /* SI MC address space is 40 bits */
2242 static void si_vram_location(struct radeon_device *rdev,
2243  struct radeon_mc *mc, u64 base)
2244 {
2245  mc->vram_start = base;
2246  if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2247  dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2248  mc->real_vram_size = mc->aper_size;
2249  mc->mc_vram_size = mc->aper_size;
2250  }
2251  mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2252  dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2253  mc->mc_vram_size >> 20, mc->vram_start,
2254  mc->vram_end, mc->real_vram_size >> 20);
2255 }
2256 
2257 static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2258 {
2259  u64 size_af, size_bf;
2260 
2261  size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2262  size_bf = mc->vram_start & ~mc->gtt_base_align;
2263  if (size_bf > size_af) {
2264  if (mc->gtt_size > size_bf) {
2265  dev_warn(rdev->dev, "limiting GTT\n");
2266  mc->gtt_size = size_bf;
2267  }
2268  mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2269  } else {
2270  if (mc->gtt_size > size_af) {
2271  dev_warn(rdev->dev, "limiting GTT\n");
2272  mc->gtt_size = size_af;
2273  }
2274  mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2275  }
2276  mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2277  dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2278  mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2279 }
2280 
2281 static void si_vram_gtt_location(struct radeon_device *rdev,
2282  struct radeon_mc *mc)
2283 {
2284  if (mc->mc_vram_size > 0xFFC0000000ULL) {
2285  /* leave room for at least 1024M GTT */
2286  dev_warn(rdev->dev, "limiting VRAM\n");
2287  mc->real_vram_size = 0xFFC0000000ULL;
2288  mc->mc_vram_size = 0xFFC0000000ULL;
2289  }
2290  si_vram_location(rdev, &rdev->mc, 0);
2291  rdev->mc.gtt_base_align = 0;
2292  si_gtt_location(rdev, mc);
2293 }
2294 
2295 static int si_mc_init(struct radeon_device *rdev)
2296 {
2297  u32 tmp;
2298  int chansize, numchan;
2299 
2300  /* Get VRAM informations */
2301  rdev->mc.vram_is_ddr = true;
2302  tmp = RREG32(MC_ARB_RAMCFG);
2303  if (tmp & CHANSIZE_OVERRIDE) {
2304  chansize = 16;
2305  } else if (tmp & CHANSIZE_MASK) {
2306  chansize = 64;
2307  } else {
2308  chansize = 32;
2309  }
2310  tmp = RREG32(MC_SHARED_CHMAP);
2311  switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2312  case 0:
2313  default:
2314  numchan = 1;
2315  break;
2316  case 1:
2317  numchan = 2;
2318  break;
2319  case 2:
2320  numchan = 4;
2321  break;
2322  case 3:
2323  numchan = 8;
2324  break;
2325  case 4:
2326  numchan = 3;
2327  break;
2328  case 5:
2329  numchan = 6;
2330  break;
2331  case 6:
2332  numchan = 10;
2333  break;
2334  case 7:
2335  numchan = 12;
2336  break;
2337  case 8:
2338  numchan = 16;
2339  break;
2340  }
2341  rdev->mc.vram_width = numchan * chansize;
2342  /* Could aper size report 0 ? */
2343  rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2344  rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2345  /* size in MB on si */
2346  rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2347  rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2348  rdev->mc.visible_vram_size = rdev->mc.aper_size;
2349  si_vram_gtt_location(rdev, &rdev->mc);
2351 
2352  return 0;
2353 }
2354 
2355 /*
2356  * GART
2357  */
2359 {
2360  /* flush hdp cache */
2362 
2363  /* bits 0-15 are the VM contexts0-15 */
2365 }
2366 
2367 static int si_pcie_gart_enable(struct radeon_device *rdev)
2368 {
2369  int r, i;
2370 
2371  if (rdev->gart.robj == NULL) {
2372  dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2373  return -EINVAL;
2374  }
2375  r = radeon_gart_table_vram_pin(rdev);
2376  if (r)
2377  return r;
2378  radeon_gart_restore(rdev);
2379  /* Setup TLB control */
2381  (0xA << 7) |
2382  ENABLE_L1_TLB |
2386  /* Setup L2 cache */
2395  /* setup context0 */
2396  WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2397  WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2398  WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2400  (u32)(rdev->dummy_page.addr >> 12));
2404 
2405  WREG32(0x15D4, 0);
2406  WREG32(0x15D8, 0);
2407  WREG32(0x15DC, 0);
2408 
2409  /* empty context1-15 */
2410  /* set vm size, must be a multiple of 4 */
2413  /* Assign the pt base to something valid for now; the pts used for
2414  * the VMs are determined by the application and setup and assigned
2415  * on the fly in the vm part of radeon_gart.c
2416  */
2417  for (i = 1; i < 16; i++) {
2418  if (i < 8)
2420  rdev->gart.table_addr >> 12);
2421  else
2422  WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2423  rdev->gart.table_addr >> 12);
2424  }
2425 
2426  /* enable context1-15 */
2428  (u32)(rdev->dummy_page.addr >> 12));
2432 
2433  si_pcie_gart_tlb_flush(rdev);
2434  DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2435  (unsigned)(rdev->mc.gtt_size >> 20),
2436  (unsigned long long)rdev->gart.table_addr);
2437  rdev->gart.ready = true;
2438  return 0;
2439 }
2440 
2441 static void si_pcie_gart_disable(struct radeon_device *rdev)
2442 {
2443  /* Disable all tables */
2446  /* Setup TLB control */
2449  /* Setup L2 cache */
2454  WREG32(VM_L2_CNTL2, 0);
2458 }
2459 
2460 static void si_pcie_gart_fini(struct radeon_device *rdev)
2461 {
2462  si_pcie_gart_disable(rdev);
2464  radeon_gart_fini(rdev);
2465 }
2466 
2467 /* vm parser */
2468 static bool si_vm_reg_valid(u32 reg)
2469 {
2470  /* context regs are fine */
2471  if (reg >= 0x28000)
2472  return true;
2473 
2474  /* check config regs */
2475  switch (reg) {
2476  case GRBM_GFX_INDEX:
2477  case CP_STRMOUT_CNTL:
2480  case VGT_ESGS_RING_SIZE:
2481  case VGT_GSVS_RING_SIZE:
2482  case VGT_GS_VERTEX_REUSE:
2483  case VGT_PRIMITIVE_TYPE:
2484  case VGT_INDEX_TYPE:
2485  case VGT_NUM_INDICES:
2486  case VGT_NUM_INSTANCES:
2487  case VGT_TF_RING_SIZE:
2488  case VGT_HS_OFFCHIP_PARAM:
2489  case VGT_TF_MEMORY_BASE:
2490  case PA_CL_ENHANCE:
2493  case PA_SC_ENHANCE:
2494  case SQC_CACHES:
2498  case SPI_PS_MAX_WAVE_ID:
2499  case SPI_CONFIG_CNTL:
2500  case SPI_CONFIG_CNTL_1:
2501  case TA_CNTL_AUX:
2502  return true;
2503  default:
2504  DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2505  return false;
2506  }
2507 }
2508 
2509 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2510  u32 *ib, struct radeon_cs_packet *pkt)
2511 {
2512  switch (pkt->opcode) {
2513  case PACKET3_NOP:
2514  case PACKET3_SET_BASE:
2522  case PACKET3_CE_WRITE:
2523  break;
2524  default:
2525  DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2526  return -EINVAL;
2527  }
2528  return 0;
2529 }
2530 
2531 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2532  u32 *ib, struct radeon_cs_packet *pkt)
2533 {
2534  u32 idx = pkt->idx + 1;
2535  u32 idx_value = ib[idx];
2536  u32 start_reg, end_reg, reg, i;
2537 
2538  switch (pkt->opcode) {
2539  case PACKET3_NOP:
2540  case PACKET3_SET_BASE:
2541  case PACKET3_CLEAR_STATE:
2545  case PACKET3_ALLOC_GDS:
2546  case PACKET3_WRITE_GDS_RAM:
2547  case PACKET3_ATOMIC_GDS:
2548  case PACKET3_ATOMIC:
2551  case PACKET3_COND_EXEC:
2552  case PACKET3_PRED_EXEC:
2553  case PACKET3_DRAW_INDIRECT:
2555  case PACKET3_INDEX_BASE:
2556  case PACKET3_DRAW_INDEX_2:
2558  case PACKET3_INDEX_TYPE:
2562  case PACKET3_NUM_INSTANCES:
2568  case PACKET3_MPEG_INDEX:
2569  case PACKET3_WAIT_REG_MEM:
2570  case PACKET3_MEM_WRITE:
2571  case PACKET3_PFP_SYNC_ME:
2572  case PACKET3_SURFACE_SYNC:
2573  case PACKET3_EVENT_WRITE:
2578  case PACKET3_SET_SH_REG:
2583  case PACKET3_ME_WRITE:
2584  break;
2585  case PACKET3_COPY_DATA:
2586  if ((idx_value & 0xf00) == 0) {
2587  reg = ib[idx + 3] * 4;
2588  if (!si_vm_reg_valid(reg))
2589  return -EINVAL;
2590  }
2591  break;
2592  case PACKET3_WRITE_DATA:
2593  if ((idx_value & 0xf00) == 0) {
2594  start_reg = ib[idx + 1] * 4;
2595  if (idx_value & 0x10000) {
2596  if (!si_vm_reg_valid(start_reg))
2597  return -EINVAL;
2598  } else {
2599  for (i = 0; i < (pkt->count - 2); i++) {
2600  reg = start_reg + (4 * i);
2601  if (!si_vm_reg_valid(reg))
2602  return -EINVAL;
2603  }
2604  }
2605  }
2606  break;
2607  case PACKET3_COND_WRITE:
2608  if (idx_value & 0x100) {
2609  reg = ib[idx + 5] * 4;
2610  if (!si_vm_reg_valid(reg))
2611  return -EINVAL;
2612  }
2613  break;
2614  case PACKET3_COPY_DW:
2615  if (idx_value & 0x2) {
2616  reg = ib[idx + 3] * 4;
2617  if (!si_vm_reg_valid(reg))
2618  return -EINVAL;
2619  }
2620  break;
2622  start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2623  end_reg = 4 * pkt->count + start_reg - 4;
2624  if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2625  (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2626  (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2627  DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2628  return -EINVAL;
2629  }
2630  for (i = 0; i < pkt->count; i++) {
2631  reg = start_reg + (4 * i);
2632  if (!si_vm_reg_valid(reg))
2633  return -EINVAL;
2634  }
2635  break;
2636  default:
2637  DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2638  return -EINVAL;
2639  }
2640  return 0;
2641 }
2642 
2643 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2644  u32 *ib, struct radeon_cs_packet *pkt)
2645 {
2646  u32 idx = pkt->idx + 1;
2647  u32 idx_value = ib[idx];
2648  u32 start_reg, reg, i;
2649 
2650  switch (pkt->opcode) {
2651  case PACKET3_NOP:
2652  case PACKET3_SET_BASE:
2653  case PACKET3_CLEAR_STATE:
2656  case PACKET3_ALLOC_GDS:
2657  case PACKET3_WRITE_GDS_RAM:
2658  case PACKET3_ATOMIC_GDS:
2659  case PACKET3_ATOMIC:
2662  case PACKET3_COND_EXEC:
2663  case PACKET3_PRED_EXEC:
2666  case PACKET3_WAIT_REG_MEM:
2667  case PACKET3_MEM_WRITE:
2668  case PACKET3_PFP_SYNC_ME:
2669  case PACKET3_SURFACE_SYNC:
2670  case PACKET3_EVENT_WRITE:
2675  case PACKET3_SET_SH_REG:
2680  case PACKET3_ME_WRITE:
2681  break;
2682  case PACKET3_COPY_DATA:
2683  if ((idx_value & 0xf00) == 0) {
2684  reg = ib[idx + 3] * 4;
2685  if (!si_vm_reg_valid(reg))
2686  return -EINVAL;
2687  }
2688  break;
2689  case PACKET3_WRITE_DATA:
2690  if ((idx_value & 0xf00) == 0) {
2691  start_reg = ib[idx + 1] * 4;
2692  if (idx_value & 0x10000) {
2693  if (!si_vm_reg_valid(start_reg))
2694  return -EINVAL;
2695  } else {
2696  for (i = 0; i < (pkt->count - 2); i++) {
2697  reg = start_reg + (4 * i);
2698  if (!si_vm_reg_valid(reg))
2699  return -EINVAL;
2700  }
2701  }
2702  }
2703  break;
2704  case PACKET3_COND_WRITE:
2705  if (idx_value & 0x100) {
2706  reg = ib[idx + 5] * 4;
2707  if (!si_vm_reg_valid(reg))
2708  return -EINVAL;
2709  }
2710  break;
2711  case PACKET3_COPY_DW:
2712  if (idx_value & 0x2) {
2713  reg = ib[idx + 3] * 4;
2714  if (!si_vm_reg_valid(reg))
2715  return -EINVAL;
2716  }
2717  break;
2718  default:
2719  DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2720  return -EINVAL;
2721  }
2722  return 0;
2723 }
2724 
2725 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2726 {
2727  int ret = 0;
2728  u32 idx = 0;
2729  struct radeon_cs_packet pkt;
2730 
2731  do {
2732  pkt.idx = idx;
2733  pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2734  pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2735  pkt.one_reg_wr = 0;
2736  switch (pkt.type) {
2737  case PACKET_TYPE0:
2738  dev_err(rdev->dev, "Packet0 not allowed!\n");
2739  ret = -EINVAL;
2740  break;
2741  case PACKET_TYPE2:
2742  idx += 1;
2743  break;
2744  case PACKET_TYPE3:
2745  pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2746  if (ib->is_const_ib)
2747  ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2748  else {
2749  switch (ib->ring) {
2751  ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2752  break;
2755  ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2756  break;
2757  default:
2758  dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
2759  ret = -EINVAL;
2760  break;
2761  }
2762  }
2763  idx += pkt.count + 2;
2764  break;
2765  default:
2766  dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2767  ret = -EINVAL;
2768  break;
2769  }
2770  if (ret)
2771  break;
2772  } while (idx < ib->length_dw);
2773 
2774  return ret;
2775 }
2776 
2777 /*
2778  * vm
2779  */
2780 int si_vm_init(struct radeon_device *rdev)
2781 {
2782  /* number of VMs */
2783  rdev->vm_manager.nvm = 16;
2784  /* base offset of vram pages */
2785  rdev->vm_manager.vram_base_offset = 0;
2786 
2787  return 0;
2788 }
2789 
2790 void si_vm_fini(struct radeon_device *rdev)
2791 {
2792 }
2793 
2806 void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2807  uint64_t addr, unsigned count,
2808  uint32_t incr, uint32_t flags)
2809 {
2810  struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2811  uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2812 
2813  while (count) {
2814  unsigned ndw = 2 + count * 2;
2815  if (ndw > 0x3FFE)
2816  ndw = 0x3FFE;
2817 
2820  WRITE_DATA_DST_SEL(1)));
2821  radeon_ring_write(ring, pe);
2822  radeon_ring_write(ring, upper_32_bits(pe));
2823  for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2824  uint64_t value;
2825  if (flags & RADEON_VM_PAGE_SYSTEM) {
2826  value = radeon_vm_map_gart(rdev, addr);
2827  value &= 0xFFFFFFFFFFFFF000ULL;
2828  } else if (flags & RADEON_VM_PAGE_VALID)
2829  value = addr;
2830  else
2831  value = 0;
2832  addr += incr;
2833  value |= r600_flags;
2834  radeon_ring_write(ring, value);
2835  radeon_ring_write(ring, upper_32_bits(value));
2836  }
2837  }
2838 }
2839 
2840 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2841 {
2842  struct radeon_ring *ring = &rdev->ring[ridx];
2843 
2844  if (vm == NULL)
2845  return;
2846 
2847  /* write new base address */
2850  WRITE_DATA_DST_SEL(0)));
2851 
2852  if (vm->id < 8) {
2853  radeon_ring_write(ring,
2854  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
2855  } else {
2856  radeon_ring_write(ring,
2857  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
2858  }
2859  radeon_ring_write(ring, 0);
2860  radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2861 
2862  /* flush hdp cache */
2865  WRITE_DATA_DST_SEL(0)));
2867  radeon_ring_write(ring, 0);
2868  radeon_ring_write(ring, 0x1);
2869 
2870  /* bits 0-15 are the VM contexts0-15 */
2873  WRITE_DATA_DST_SEL(0)));
2875  radeon_ring_write(ring, 0);
2876  radeon_ring_write(ring, 1 << vm->id);
2877 
2878  /* sync PFP to ME, otherwise we might get invalid PFP reads */
2880  radeon_ring_write(ring, 0x0);
2881 }
2882 
2883 /*
2884  * RLC
2885  */
2886 void si_rlc_fini(struct radeon_device *rdev)
2887 {
2888  int r;
2889 
2890  /* save restore block */
2891  if (rdev->rlc.save_restore_obj) {
2892  r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2893  if (unlikely(r != 0))
2894  dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
2895  radeon_bo_unpin(rdev->rlc.save_restore_obj);
2896  radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2897 
2898  radeon_bo_unref(&rdev->rlc.save_restore_obj);
2899  rdev->rlc.save_restore_obj = NULL;
2900  }
2901 
2902  /* clear state block */
2903  if (rdev->rlc.clear_state_obj) {
2904  r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2905  if (unlikely(r != 0))
2906  dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
2907  radeon_bo_unpin(rdev->rlc.clear_state_obj);
2908  radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2909 
2910  radeon_bo_unref(&rdev->rlc.clear_state_obj);
2911  rdev->rlc.clear_state_obj = NULL;
2912  }
2913 }
2914 
2915 int si_rlc_init(struct radeon_device *rdev)
2916 {
2917  int r;
2918 
2919  /* save restore block */
2920  if (rdev->rlc.save_restore_obj == NULL) {
2923  &rdev->rlc.save_restore_obj);
2924  if (r) {
2925  dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
2926  return r;
2927  }
2928  }
2929 
2930  r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2931  if (unlikely(r != 0)) {
2932  si_rlc_fini(rdev);
2933  return r;
2934  }
2935  r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
2936  &rdev->rlc.save_restore_gpu_addr);
2937  radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2938  if (r) {
2939  dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
2940  si_rlc_fini(rdev);
2941  return r;
2942  }
2943 
2944  /* clear state block */
2945  if (rdev->rlc.clear_state_obj == NULL) {
2948  &rdev->rlc.clear_state_obj);
2949  if (r) {
2950  dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
2951  si_rlc_fini(rdev);
2952  return r;
2953  }
2954  }
2955  r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2956  if (unlikely(r != 0)) {
2957  si_rlc_fini(rdev);
2958  return r;
2959  }
2960  r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
2961  &rdev->rlc.clear_state_gpu_addr);
2962  radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2963  if (r) {
2964  dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
2965  si_rlc_fini(rdev);
2966  return r;
2967  }
2968 
2969  return 0;
2970 }
2971 
2972 static void si_rlc_stop(struct radeon_device *rdev)
2973 {
2974  WREG32(RLC_CNTL, 0);
2975 }
2976 
2977 static void si_rlc_start(struct radeon_device *rdev)
2978 {
2980 }
2981 
2982 static int si_rlc_resume(struct radeon_device *rdev)
2983 {
2984  u32 i;
2985  const __be32 *fw_data;
2986 
2987  if (!rdev->rlc_fw)
2988  return -EINVAL;
2989 
2990  si_rlc_stop(rdev);
2991 
2992  WREG32(RLC_RL_BASE, 0);
2993  WREG32(RLC_RL_SIZE, 0);
2994  WREG32(RLC_LB_CNTL, 0);
2995  WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
2997 
2998  WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
2999  WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
3000 
3001  WREG32(RLC_MC_CNTL, 0);
3002  WREG32(RLC_UCODE_CNTL, 0);
3003 
3004  fw_data = (const __be32 *)rdev->rlc_fw->data;
3005  for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
3006  WREG32(RLC_UCODE_ADDR, i);
3007  WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
3008  }
3009  WREG32(RLC_UCODE_ADDR, 0);
3010 
3011  si_rlc_start(rdev);
3012 
3013  return 0;
3014 }
3015 
3016 static void si_enable_interrupts(struct radeon_device *rdev)
3017 {
3018  u32 ih_cntl = RREG32(IH_CNTL);
3019  u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3020 
3021  ih_cntl |= ENABLE_INTR;
3022  ih_rb_cntl |= IH_RB_ENABLE;
3023  WREG32(IH_CNTL, ih_cntl);
3024  WREG32(IH_RB_CNTL, ih_rb_cntl);
3025  rdev->ih.enabled = true;
3026 }
3027 
3028 static void si_disable_interrupts(struct radeon_device *rdev)
3029 {
3030  u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
3031  u32 ih_cntl = RREG32(IH_CNTL);
3032 
3033  ih_rb_cntl &= ~IH_RB_ENABLE;
3034  ih_cntl &= ~ENABLE_INTR;
3035  WREG32(IH_RB_CNTL, ih_rb_cntl);
3036  WREG32(IH_CNTL, ih_cntl);
3037  /* set rptr, wptr to 0 */
3038  WREG32(IH_RB_RPTR, 0);
3039  WREG32(IH_RB_WPTR, 0);
3040  rdev->ih.enabled = false;
3041  rdev->ih.rptr = 0;
3042 }
3043 
3044 static void si_disable_interrupt_state(struct radeon_device *rdev)
3045 {
3046  u32 tmp;
3047 
3051  WREG32(GRBM_INT_CNTL, 0);
3054  if (rdev->num_crtc >= 4) {
3057  }
3058  if (rdev->num_crtc >= 6) {
3061  }
3062 
3065  if (rdev->num_crtc >= 4) {
3068  }
3069  if (rdev->num_crtc >= 6) {
3072  }
3073 
3075 
3088 
3089 }
3090 
3091 static int si_irq_init(struct radeon_device *rdev)
3092 {
3093  int ret = 0;
3094  int rb_bufsz;
3095  u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3096 
3097  /* allocate ring */
3098  ret = r600_ih_ring_alloc(rdev);
3099  if (ret)
3100  return ret;
3101 
3102  /* disable irqs */
3103  si_disable_interrupts(rdev);
3104 
3105  /* init rlc */
3106  ret = si_rlc_resume(rdev);
3107  if (ret) {
3108  r600_ih_ring_fini(rdev);
3109  return ret;
3110  }
3111 
3112  /* setup interrupt control */
3113  /* set dummy read address to ring address */
3114  WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3115  interrupt_cntl = RREG32(INTERRUPT_CNTL);
3116  /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3117  * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3118  */
3119  interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3120  /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3121  interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3122  WREG32(INTERRUPT_CNTL, interrupt_cntl);
3123 
3124  WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3125  rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3126 
3127  ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3129  (rb_bufsz << 1));
3130 
3131  if (rdev->wb.enabled)
3132  ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3133 
3134  /* set the writeback address whether it's enabled or not */
3135  WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3137 
3138  WREG32(IH_RB_CNTL, ih_rb_cntl);
3139 
3140  /* set rptr, wptr to 0 */
3141  WREG32(IH_RB_RPTR, 0);
3142  WREG32(IH_RB_WPTR, 0);
3143 
3144  /* Default settings for IH_CNTL (disabled at first) */
3145  ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3146  /* RPTR_REARM only works if msi's are enabled */
3147  if (rdev->msi_enabled)
3148  ih_cntl |= RPTR_REARM;
3149  WREG32(IH_CNTL, ih_cntl);
3150 
3151  /* force the active interrupt state to all disabled */
3152  si_disable_interrupt_state(rdev);
3153 
3154  pci_set_master(rdev->pdev);
3155 
3156  /* enable irqs */
3157  si_enable_interrupts(rdev);
3158 
3159  return ret;
3160 }
3161 
3162 int si_irq_set(struct radeon_device *rdev)
3163 {
3165  u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3166  u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3167  u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3168  u32 grbm_int_cntl = 0;
3169  u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3170 
3171  if (!rdev->irq.installed) {
3172  WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3173  return -EINVAL;
3174  }
3175  /* don't enable anything if the ih is disabled */
3176  if (!rdev->ih.enabled) {
3177  si_disable_interrupts(rdev);
3178  /* force the active interrupt state to all disabled */
3179  si_disable_interrupt_state(rdev);
3180  return 0;
3181  }
3182 
3189 
3190  /* enable CP interrupts on all rings */
3191  if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3192  DRM_DEBUG("si_irq_set: sw int gfx\n");
3193  cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3194  }
3195  if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
3196  DRM_DEBUG("si_irq_set: sw int cp1\n");
3197  cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3198  }
3199  if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
3200  DRM_DEBUG("si_irq_set: sw int cp2\n");
3201  cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3202  }
3203  if (rdev->irq.crtc_vblank_int[0] ||
3204  atomic_read(&rdev->irq.pflip[0])) {
3205  DRM_DEBUG("si_irq_set: vblank 0\n");
3206  crtc1 |= VBLANK_INT_MASK;
3207  }
3208  if (rdev->irq.crtc_vblank_int[1] ||
3209  atomic_read(&rdev->irq.pflip[1])) {
3210  DRM_DEBUG("si_irq_set: vblank 1\n");
3211  crtc2 |= VBLANK_INT_MASK;
3212  }
3213  if (rdev->irq.crtc_vblank_int[2] ||
3214  atomic_read(&rdev->irq.pflip[2])) {
3215  DRM_DEBUG("si_irq_set: vblank 2\n");
3216  crtc3 |= VBLANK_INT_MASK;
3217  }
3218  if (rdev->irq.crtc_vblank_int[3] ||
3219  atomic_read(&rdev->irq.pflip[3])) {
3220  DRM_DEBUG("si_irq_set: vblank 3\n");
3221  crtc4 |= VBLANK_INT_MASK;
3222  }
3223  if (rdev->irq.crtc_vblank_int[4] ||
3224  atomic_read(&rdev->irq.pflip[4])) {
3225  DRM_DEBUG("si_irq_set: vblank 4\n");
3226  crtc5 |= VBLANK_INT_MASK;
3227  }
3228  if (rdev->irq.crtc_vblank_int[5] ||
3229  atomic_read(&rdev->irq.pflip[5])) {
3230  DRM_DEBUG("si_irq_set: vblank 5\n");
3231  crtc6 |= VBLANK_INT_MASK;
3232  }
3233  if (rdev->irq.hpd[0]) {
3234  DRM_DEBUG("si_irq_set: hpd 1\n");
3235  hpd1 |= DC_HPDx_INT_EN;
3236  }
3237  if (rdev->irq.hpd[1]) {
3238  DRM_DEBUG("si_irq_set: hpd 2\n");
3239  hpd2 |= DC_HPDx_INT_EN;
3240  }
3241  if (rdev->irq.hpd[2]) {
3242  DRM_DEBUG("si_irq_set: hpd 3\n");
3243  hpd3 |= DC_HPDx_INT_EN;
3244  }
3245  if (rdev->irq.hpd[3]) {
3246  DRM_DEBUG("si_irq_set: hpd 4\n");
3247  hpd4 |= DC_HPDx_INT_EN;
3248  }
3249  if (rdev->irq.hpd[4]) {
3250  DRM_DEBUG("si_irq_set: hpd 5\n");
3251  hpd5 |= DC_HPDx_INT_EN;
3252  }
3253  if (rdev->irq.hpd[5]) {
3254  DRM_DEBUG("si_irq_set: hpd 6\n");
3255  hpd6 |= DC_HPDx_INT_EN;
3256  }
3257 
3258  WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3259  WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3260  WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3261 
3262  WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3263 
3266  if (rdev->num_crtc >= 4) {
3269  }
3270  if (rdev->num_crtc >= 6) {
3273  }
3274 
3277  if (rdev->num_crtc >= 4) {
3280  }
3281  if (rdev->num_crtc >= 6) {
3284  }
3285 
3286  WREG32(DC_HPD1_INT_CONTROL, hpd1);
3287  WREG32(DC_HPD2_INT_CONTROL, hpd2);
3288  WREG32(DC_HPD3_INT_CONTROL, hpd3);
3289  WREG32(DC_HPD4_INT_CONTROL, hpd4);
3290  WREG32(DC_HPD5_INT_CONTROL, hpd5);
3291  WREG32(DC_HPD6_INT_CONTROL, hpd6);
3292 
3293  return 0;
3294 }
3295 
3296 static inline void si_irq_ack(struct radeon_device *rdev)
3297 {
3298  u32 tmp;
3299 
3300  rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3301  rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3302  rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3303  rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3304  rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3305  rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3306  rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3307  rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3308  if (rdev->num_crtc >= 4) {
3309  rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3310  rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3311  }
3312  if (rdev->num_crtc >= 6) {
3313  rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3314  rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3315  }
3316 
3317  if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3319  if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3321  if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3323  if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3325  if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3327  if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3329 
3330  if (rdev->num_crtc >= 4) {
3331  if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3333  if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3335  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3337  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3339  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3341  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3343  }
3344 
3345  if (rdev->num_crtc >= 6) {
3346  if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3348  if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3350  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3352  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3354  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3356  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3358  }
3359 
3360  if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3361  tmp = RREG32(DC_HPD1_INT_CONTROL);
3362  tmp |= DC_HPDx_INT_ACK;
3364  }
3365  if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3366  tmp = RREG32(DC_HPD2_INT_CONTROL);
3367  tmp |= DC_HPDx_INT_ACK;
3369  }
3370  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3371  tmp = RREG32(DC_HPD3_INT_CONTROL);
3372  tmp |= DC_HPDx_INT_ACK;
3374  }
3375  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3376  tmp = RREG32(DC_HPD4_INT_CONTROL);
3377  tmp |= DC_HPDx_INT_ACK;
3379  }
3380  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3381  tmp = RREG32(DC_HPD5_INT_CONTROL);
3382  tmp |= DC_HPDx_INT_ACK;
3384  }
3385  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3386  tmp = RREG32(DC_HPD5_INT_CONTROL);
3387  tmp |= DC_HPDx_INT_ACK;
3389  }
3390 }
3391 
3392 static void si_irq_disable(struct radeon_device *rdev)
3393 {
3394  si_disable_interrupts(rdev);
3395  /* Wait and acknowledge irq */
3396  mdelay(1);
3397  si_irq_ack(rdev);
3398  si_disable_interrupt_state(rdev);
3399 }
3400 
3401 static void si_irq_suspend(struct radeon_device *rdev)
3402 {
3403  si_irq_disable(rdev);
3404  si_rlc_stop(rdev);
3405 }
3406 
3407 static void si_irq_fini(struct radeon_device *rdev)
3408 {
3409  si_irq_suspend(rdev);
3410  r600_ih_ring_fini(rdev);
3411 }
3412 
3413 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3414 {
3415  u32 wptr, tmp;
3416 
3417  if (rdev->wb.enabled)
3418  wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3419  else
3420  wptr = RREG32(IH_RB_WPTR);
3421 
3422  if (wptr & RB_OVERFLOW) {
3423  /* When a ring buffer overflow happen start parsing interrupt
3424  * from the last not overwritten vector (wptr + 16). Hopefully
3425  * this should allow us to catchup.
3426  */
3427  dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3428  wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3429  rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3430  tmp = RREG32(IH_RB_CNTL);
3431  tmp |= IH_WPTR_OVERFLOW_CLEAR;
3432  WREG32(IH_RB_CNTL, tmp);
3433  }
3434  return (wptr & rdev->ih.ptr_mask);
3435 }
3436 
3437 /* SI IV Ring
3438  * Each IV ring entry is 128 bits:
3439  * [7:0] - interrupt source id
3440  * [31:8] - reserved
3441  * [59:32] - interrupt source data
3442  * [63:60] - reserved
3443  * [71:64] - RINGID
3444  * [79:72] - VMID
3445  * [127:80] - reserved
3446  */
3448 {
3449  u32 wptr;
3450  u32 rptr;
3451  u32 src_id, src_data, ring_id;
3452  u32 ring_index;
3453  bool queue_hotplug = false;
3454 
3455  if (!rdev->ih.enabled || rdev->shutdown)
3456  return IRQ_NONE;
3457 
3458  wptr = si_get_ih_wptr(rdev);
3459 
3460 restart_ih:
3461  /* is somebody else already processing irqs? */
3462  if (atomic_xchg(&rdev->ih.lock, 1))
3463  return IRQ_NONE;
3464 
3465  rptr = rdev->ih.rptr;
3466  DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3467 
3468  /* Order reading of wptr vs. reading of IH ring data */
3469  rmb();
3470 
3471  /* display interrupts */
3472  si_irq_ack(rdev);
3473 
3474  while (rptr != wptr) {
3475  /* wptr/rptr are in bytes! */
3476  ring_index = rptr / 4;
3477  src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3478  src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3479  ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3480 
3481  switch (src_id) {
3482  case 1: /* D1 vblank/vline */
3483  switch (src_data) {
3484  case 0: /* D1 vblank */
3485  if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3486  if (rdev->irq.crtc_vblank_int[0]) {
3487  drm_handle_vblank(rdev->ddev, 0);
3488  rdev->pm.vblank_sync = true;
3489  wake_up(&rdev->irq.vblank_queue);
3490  }
3491  if (atomic_read(&rdev->irq.pflip[0]))
3492  radeon_crtc_handle_flip(rdev, 0);
3493  rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3494  DRM_DEBUG("IH: D1 vblank\n");
3495  }
3496  break;
3497  case 1: /* D1 vline */
3498  if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3499  rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3500  DRM_DEBUG("IH: D1 vline\n");
3501  }
3502  break;
3503  default:
3504  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3505  break;
3506  }
3507  break;
3508  case 2: /* D2 vblank/vline */
3509  switch (src_data) {
3510  case 0: /* D2 vblank */
3511  if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3512  if (rdev->irq.crtc_vblank_int[1]) {
3513  drm_handle_vblank(rdev->ddev, 1);
3514  rdev->pm.vblank_sync = true;
3515  wake_up(&rdev->irq.vblank_queue);
3516  }
3517  if (atomic_read(&rdev->irq.pflip[1]))
3518  radeon_crtc_handle_flip(rdev, 1);
3519  rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3520  DRM_DEBUG("IH: D2 vblank\n");
3521  }
3522  break;
3523  case 1: /* D2 vline */
3524  if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3525  rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3526  DRM_DEBUG("IH: D2 vline\n");
3527  }
3528  break;
3529  default:
3530  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3531  break;
3532  }
3533  break;
3534  case 3: /* D3 vblank/vline */
3535  switch (src_data) {
3536  case 0: /* D3 vblank */
3537  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3538  if (rdev->irq.crtc_vblank_int[2]) {
3539  drm_handle_vblank(rdev->ddev, 2);
3540  rdev->pm.vblank_sync = true;
3541  wake_up(&rdev->irq.vblank_queue);
3542  }
3543  if (atomic_read(&rdev->irq.pflip[2]))
3544  radeon_crtc_handle_flip(rdev, 2);
3545  rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3546  DRM_DEBUG("IH: D3 vblank\n");
3547  }
3548  break;
3549  case 1: /* D3 vline */
3550  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3551  rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3552  DRM_DEBUG("IH: D3 vline\n");
3553  }
3554  break;
3555  default:
3556  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3557  break;
3558  }
3559  break;
3560  case 4: /* D4 vblank/vline */
3561  switch (src_data) {
3562  case 0: /* D4 vblank */
3563  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3564  if (rdev->irq.crtc_vblank_int[3]) {
3565  drm_handle_vblank(rdev->ddev, 3);
3566  rdev->pm.vblank_sync = true;
3567  wake_up(&rdev->irq.vblank_queue);
3568  }
3569  if (atomic_read(&rdev->irq.pflip[3]))
3570  radeon_crtc_handle_flip(rdev, 3);
3571  rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3572  DRM_DEBUG("IH: D4 vblank\n");
3573  }
3574  break;
3575  case 1: /* D4 vline */
3576  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3577  rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3578  DRM_DEBUG("IH: D4 vline\n");
3579  }
3580  break;
3581  default:
3582  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3583  break;
3584  }
3585  break;
3586  case 5: /* D5 vblank/vline */
3587  switch (src_data) {
3588  case 0: /* D5 vblank */
3589  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3590  if (rdev->irq.crtc_vblank_int[4]) {
3591  drm_handle_vblank(rdev->ddev, 4);
3592  rdev->pm.vblank_sync = true;
3593  wake_up(&rdev->irq.vblank_queue);
3594  }
3595  if (atomic_read(&rdev->irq.pflip[4]))
3596  radeon_crtc_handle_flip(rdev, 4);
3597  rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3598  DRM_DEBUG("IH: D5 vblank\n");
3599  }
3600  break;
3601  case 1: /* D5 vline */
3602  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3603  rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3604  DRM_DEBUG("IH: D5 vline\n");
3605  }
3606  break;
3607  default:
3608  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3609  break;
3610  }
3611  break;
3612  case 6: /* D6 vblank/vline */
3613  switch (src_data) {
3614  case 0: /* D6 vblank */
3615  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3616  if (rdev->irq.crtc_vblank_int[5]) {
3617  drm_handle_vblank(rdev->ddev, 5);
3618  rdev->pm.vblank_sync = true;
3619  wake_up(&rdev->irq.vblank_queue);
3620  }
3621  if (atomic_read(&rdev->irq.pflip[5]))
3622  radeon_crtc_handle_flip(rdev, 5);
3623  rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3624  DRM_DEBUG("IH: D6 vblank\n");
3625  }
3626  break;
3627  case 1: /* D6 vline */
3628  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3629  rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3630  DRM_DEBUG("IH: D6 vline\n");
3631  }
3632  break;
3633  default:
3634  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3635  break;
3636  }
3637  break;
3638  case 42: /* HPD hotplug */
3639  switch (src_data) {
3640  case 0:
3641  if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3642  rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3643  queue_hotplug = true;
3644  DRM_DEBUG("IH: HPD1\n");
3645  }
3646  break;
3647  case 1:
3648  if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3649  rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3650  queue_hotplug = true;
3651  DRM_DEBUG("IH: HPD2\n");
3652  }
3653  break;
3654  case 2:
3655  if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3656  rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3657  queue_hotplug = true;
3658  DRM_DEBUG("IH: HPD3\n");
3659  }
3660  break;
3661  case 3:
3662  if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3663  rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3664  queue_hotplug = true;
3665  DRM_DEBUG("IH: HPD4\n");
3666  }
3667  break;
3668  case 4:
3669  if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3670  rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3671  queue_hotplug = true;
3672  DRM_DEBUG("IH: HPD5\n");
3673  }
3674  break;
3675  case 5:
3676  if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3677  rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3678  queue_hotplug = true;
3679  DRM_DEBUG("IH: HPD6\n");
3680  }
3681  break;
3682  default:
3683  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3684  break;
3685  }
3686  break;
3687  case 176: /* RINGID0 CP_INT */
3689  break;
3690  case 177: /* RINGID1 CP_INT */
3692  break;
3693  case 178: /* RINGID2 CP_INT */
3695  break;
3696  case 181: /* CP EOP event */
3697  DRM_DEBUG("IH: CP EOP\n");
3698  switch (ring_id) {
3699  case 0:
3701  break;
3702  case 1:
3704  break;
3705  case 2:
3707  break;
3708  }
3709  break;
3710  case 233: /* GUI IDLE */
3711  DRM_DEBUG("IH: GUI idle\n");
3712  break;
3713  default:
3714  DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3715  break;
3716  }
3717 
3718  /* wptr/rptr are in bytes! */
3719  rptr += 16;
3720  rptr &= rdev->ih.ptr_mask;
3721  }
3722  if (queue_hotplug)
3723  schedule_work(&rdev->hotplug_work);
3724  rdev->ih.rptr = rptr;
3725  WREG32(IH_RB_RPTR, rdev->ih.rptr);
3726  atomic_set(&rdev->ih.lock, 0);
3727 
3728  /* make sure wptr hasn't changed while processing */
3729  wptr = si_get_ih_wptr(rdev);
3730  if (wptr != rptr)
3731  goto restart_ih;
3732 
3733  return IRQ_HANDLED;
3734 }
3735 
3736 /*
3737  * startup/shutdown callbacks
3738  */
3739 static int si_startup(struct radeon_device *rdev)
3740 {
3741  struct radeon_ring *ring;
3742  int r;
3743 
3744  if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
3745  !rdev->rlc_fw || !rdev->mc_fw) {
3746  r = si_init_microcode(rdev);
3747  if (r) {
3748  DRM_ERROR("Failed to load firmware!\n");
3749  return r;
3750  }
3751  }
3752 
3753  r = si_mc_load_microcode(rdev);
3754  if (r) {
3755  DRM_ERROR("Failed to load MC firmware!\n");
3756  return r;
3757  }
3758 
3759  r = r600_vram_scratch_init(rdev);
3760  if (r)
3761  return r;
3762 
3763  si_mc_program(rdev);
3764  r = si_pcie_gart_enable(rdev);
3765  if (r)
3766  return r;
3767  si_gpu_init(rdev);
3768 
3769 #if 0
3770  r = evergreen_blit_init(rdev);
3771  if (r) {
3772  r600_blit_fini(rdev);
3773  rdev->asic->copy = NULL;
3774  dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
3775  }
3776 #endif
3777  /* allocate rlc buffers */
3778  r = si_rlc_init(rdev);
3779  if (r) {
3780  DRM_ERROR("Failed to init rlc BOs!\n");
3781  return r;
3782  }
3783 
3784  /* allocate wb buffer */
3785  r = radeon_wb_init(rdev);
3786  if (r)
3787  return r;
3788 
3790  if (r) {
3791  dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3792  return r;
3793  }
3794 
3796  if (r) {
3797  dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3798  return r;
3799  }
3800 
3802  if (r) {
3803  dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3804  return r;
3805  }
3806 
3807  /* Enable IRQ */
3808  r = si_irq_init(rdev);
3809  if (r) {
3810  DRM_ERROR("radeon: IH init failed (%d).\n", r);
3811  radeon_irq_kms_fini(rdev);
3812  return r;
3813  }
3814  si_irq_set(rdev);
3815 
3816  ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3817  r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
3819  0, 0xfffff, RADEON_CP_PACKET2);
3820  if (r)
3821  return r;
3822 
3823  ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3824  r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
3826  0, 0xfffff, RADEON_CP_PACKET2);
3827  if (r)
3828  return r;
3829 
3830  ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3831  r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
3833  0, 0xfffff, RADEON_CP_PACKET2);
3834  if (r)
3835  return r;
3836 
3837  r = si_cp_load_microcode(rdev);
3838  if (r)
3839  return r;
3840  r = si_cp_resume(rdev);
3841  if (r)
3842  return r;
3843 
3844  r = radeon_ib_pool_init(rdev);
3845  if (r) {
3846  dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3847  return r;
3848  }
3849 
3850  r = radeon_vm_manager_init(rdev);
3851  if (r) {
3852  dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
3853  return r;
3854  }
3855 
3856  return 0;
3857 }
3858 
3859 int si_resume(struct radeon_device *rdev)
3860 {
3861  int r;
3862 
3863  /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
3864  * posting will perform necessary task to bring back GPU into good
3865  * shape.
3866  */
3867  /* post card */
3868  atom_asic_init(rdev->mode_info.atom_context);
3869 
3870  rdev->accel_working = true;
3871  r = si_startup(rdev);
3872  if (r) {
3873  DRM_ERROR("si startup failed on resume\n");
3874  rdev->accel_working = false;
3875  return r;
3876  }
3877 
3878  return r;
3879 
3880 }
3881 
3882 int si_suspend(struct radeon_device *rdev)
3883 {
3884  si_cp_enable(rdev, false);
3885  rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886  rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3887  rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3888  si_irq_suspend(rdev);
3889  radeon_wb_disable(rdev);
3890  si_pcie_gart_disable(rdev);
3891  return 0;
3892 }
3893 
3894 /* Plan is to move initialization in that function and use
3895  * helper function so that radeon_device_init pretty much
3896  * do nothing more than calling asic specific function. This
3897  * should also allow to remove a bunch of callback function
3898  * like vram_info.
3899  */
3900 int si_init(struct radeon_device *rdev)
3901 {
3902  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3903  int r;
3904 
3905  /* Read BIOS */
3906  if (!radeon_get_bios(rdev)) {
3907  if (ASIC_IS_AVIVO(rdev))
3908  return -EINVAL;
3909  }
3910  /* Must be an ATOMBIOS */
3911  if (!rdev->is_atom_bios) {
3912  dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
3913  return -EINVAL;
3914  }
3915  r = radeon_atombios_init(rdev);
3916  if (r)
3917  return r;
3918 
3919  /* Post card if necessary */
3920  if (!radeon_card_posted(rdev)) {
3921  if (!rdev->bios) {
3922  dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
3923  return -EINVAL;
3924  }
3925  DRM_INFO("GPU not posted. posting now...\n");
3926  atom_asic_init(rdev->mode_info.atom_context);
3927  }
3928  /* Initialize scratch registers */
3929  si_scratch_init(rdev);
3930  /* Initialize surface registers */
3931  radeon_surface_init(rdev);
3932  /* Initialize clocks */
3933  radeon_get_clock_info(rdev->ddev);
3934 
3935  /* Fence driver */
3936  r = radeon_fence_driver_init(rdev);
3937  if (r)
3938  return r;
3939 
3940  /* initialize memory controller */
3941  r = si_mc_init(rdev);
3942  if (r)
3943  return r;
3944  /* Memory manager */
3945  r = radeon_bo_init(rdev);
3946  if (r)
3947  return r;
3948 
3949  r = radeon_irq_kms_init(rdev);
3950  if (r)
3951  return r;
3952 
3953  ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3954  ring->ring_obj = NULL;
3955  r600_ring_init(rdev, ring, 1024 * 1024);
3956 
3957  ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3958  ring->ring_obj = NULL;
3959  r600_ring_init(rdev, ring, 1024 * 1024);
3960 
3961  ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3962  ring->ring_obj = NULL;
3963  r600_ring_init(rdev, ring, 1024 * 1024);
3964 
3965  rdev->ih.ring_obj = NULL;
3966  r600_ih_ring_init(rdev, 64 * 1024);
3967 
3968  r = r600_pcie_gart_init(rdev);
3969  if (r)
3970  return r;
3971 
3972  rdev->accel_working = true;
3973  r = si_startup(rdev);
3974  if (r) {
3975  dev_err(rdev->dev, "disabling GPU acceleration\n");
3976  si_cp_fini(rdev);
3977  si_irq_fini(rdev);
3978  si_rlc_fini(rdev);
3979  radeon_wb_fini(rdev);
3980  radeon_ib_pool_fini(rdev);
3981  radeon_vm_manager_fini(rdev);
3982  radeon_irq_kms_fini(rdev);
3983  si_pcie_gart_fini(rdev);
3984  rdev->accel_working = false;
3985  }
3986 
3987  /* Don't start up if the MC ucode is missing.
3988  * The default clocks and voltages before the MC ucode
3989  * is loaded are not suffient for advanced operations.
3990  */
3991  if (!rdev->mc_fw) {
3992  DRM_ERROR("radeon: MC ucode required for NI+.\n");
3993  return -EINVAL;
3994  }
3995 
3996  return 0;
3997 }
3998 
3999 void si_fini(struct radeon_device *rdev)
4000 {
4001 #if 0
4002  r600_blit_fini(rdev);
4003 #endif
4004  si_cp_fini(rdev);
4005  si_irq_fini(rdev);
4006  si_rlc_fini(rdev);
4007  radeon_wb_fini(rdev);
4008  radeon_vm_manager_fini(rdev);
4009  radeon_ib_pool_fini(rdev);
4010  radeon_irq_kms_fini(rdev);
4011  si_pcie_gart_fini(rdev);
4012  r600_vram_scratch_fini(rdev);
4013  radeon_gem_fini(rdev);
4015  radeon_bo_fini(rdev);
4016  radeon_atombios_fini(rdev);
4017  kfree(rdev->bios);
4018  rdev->bios = NULL;
4019 }
4020 
4030 {
4031  uint64_t clock;
4032 
4033  mutex_lock(&rdev->gpu_clock_mutex);
4037  mutex_unlock(&rdev->gpu_clock_mutex);
4038  return clock;
4039 }