Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
evergreen_blit_kms.c
Go to the documentation of this file.
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  * Alex Deucher <[email protected]>
25  */
26 
27 #include <drm/drmP.h>
28 #include <drm/radeon_drm.h>
29 #include "radeon.h"
30 
31 #include "evergreend.h"
32 #include "evergreen_blit_shaders.h"
33 #include "cayman_blit_shaders.h"
34 #include "radeon_blit_common.h"
35 
36 /* emits 17 */
37 static void
38 set_render_target(struct radeon_device *rdev, int format,
39  int w, int h, u64 gpu_addr)
40 {
42  u32 cb_color_info;
43  int pitch, slice;
44 
45  h = ALIGN(h, 8);
46  if (h < 8)
47  h = 8;
48 
49  cb_color_info = CB_FORMAT(format) |
52  pitch = (w / 8) - 1;
53  slice = ((w * h) / 64) - 1;
54 
57  radeon_ring_write(ring, gpu_addr >> 8);
58  radeon_ring_write(ring, pitch);
59  radeon_ring_write(ring, slice);
60  radeon_ring_write(ring, 0);
61  radeon_ring_write(ring, cb_color_info);
62  radeon_ring_write(ring, 0);
63  radeon_ring_write(ring, (w - 1) | ((h - 1) << 16));
64  radeon_ring_write(ring, 0);
65  radeon_ring_write(ring, 0);
66  radeon_ring_write(ring, 0);
67  radeon_ring_write(ring, 0);
68  radeon_ring_write(ring, 0);
69  radeon_ring_write(ring, 0);
70  radeon_ring_write(ring, 0);
71  radeon_ring_write(ring, 0);
72 }
73 
74 /* emits 5dw */
75 static void
76 cp_set_surface_sync(struct radeon_device *rdev,
77  u32 sync_type, u32 size,
78  u64 mc_addr)
79 {
80  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
81  u32 cp_coher_size;
82 
83  if (size == 0xffffffff)
84  cp_coher_size = 0xffffffff;
85  else
86  cp_coher_size = ((size + 255) >> 8);
87 
88  if (rdev->family >= CHIP_CAYMAN) {
89  /* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync
90  * to the RB directly. For IBs, the CP programs this as part of the
91  * surface_sync packet.
92  */
94  radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
95  radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */
96  }
98  radeon_ring_write(ring, sync_type);
99  radeon_ring_write(ring, cp_coher_size);
100  radeon_ring_write(ring, mc_addr >> 8);
101  radeon_ring_write(ring, 10); /* poll interval */
102 }
103 
104 /* emits 11dw + 1 surface sync = 16dw */
105 static void
106 set_shaders(struct radeon_device *rdev)
107 {
108  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
109  u64 gpu_addr;
110 
111  /* VS */
112  gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
115  radeon_ring_write(ring, gpu_addr >> 8);
116  radeon_ring_write(ring, 2);
117  radeon_ring_write(ring, 0);
118 
119  /* PS */
120  gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
123  radeon_ring_write(ring, gpu_addr >> 8);
124  radeon_ring_write(ring, 1);
125  radeon_ring_write(ring, 0);
126  radeon_ring_write(ring, 2);
127 
128  gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
129  cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
130 }
131 
132 /* emits 10 + 1 sync (5) = 15 */
133 static void
134 set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
135 {
136  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
137  u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
138 
139  /* high addr, stride */
140  sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
141  SQ_VTXC_STRIDE(16);
142 #ifdef __BIG_ENDIAN
143  sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
144 #endif
145  /* xyzw swizzles */
146  sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
150 
152  radeon_ring_write(ring, 0x580);
153  radeon_ring_write(ring, gpu_addr & 0xffffffff);
154  radeon_ring_write(ring, 48 - 1); /* size */
155  radeon_ring_write(ring, sq_vtx_constant_word2);
156  radeon_ring_write(ring, sq_vtx_constant_word3);
157  radeon_ring_write(ring, 0);
158  radeon_ring_write(ring, 0);
159  radeon_ring_write(ring, 0);
161 
162  if ((rdev->family == CHIP_CEDAR) ||
163  (rdev->family == CHIP_PALM) ||
164  (rdev->family == CHIP_SUMO) ||
165  (rdev->family == CHIP_SUMO2) ||
166  (rdev->family == CHIP_CAICOS))
167  cp_set_surface_sync(rdev,
168  PACKET3_TC_ACTION_ENA, 48, gpu_addr);
169  else
170  cp_set_surface_sync(rdev,
171  PACKET3_VC_ACTION_ENA, 48, gpu_addr);
172 
173 }
174 
175 /* emits 10 */
176 static void
177 set_tex_resource(struct radeon_device *rdev,
178  int format, int w, int h, int pitch,
179  u64 gpu_addr, u32 size)
180 {
181  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
182  u32 sq_tex_resource_word0, sq_tex_resource_word1;
183  u32 sq_tex_resource_word4, sq_tex_resource_word7;
184 
185  if (h < 1)
186  h = 1;
187 
188  sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
189  sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
190  ((w - 1) << 18));
191  sq_tex_resource_word1 = ((h - 1) << 0) |
193  /* xyzw swizzles */
194  sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
198 
199  sq_tex_resource_word7 = format |
201 
202  cp_set_surface_sync(rdev,
203  PACKET3_TC_ACTION_ENA, size, gpu_addr);
204 
206  radeon_ring_write(ring, 0);
207  radeon_ring_write(ring, sq_tex_resource_word0);
208  radeon_ring_write(ring, sq_tex_resource_word1);
209  radeon_ring_write(ring, gpu_addr >> 8);
210  radeon_ring_write(ring, gpu_addr >> 8);
211  radeon_ring_write(ring, sq_tex_resource_word4);
212  radeon_ring_write(ring, 0);
213  radeon_ring_write(ring, 0);
214  radeon_ring_write(ring, sq_tex_resource_word7);
215 }
216 
217 /* emits 12 */
218 static void
219 set_scissors(struct radeon_device *rdev, int x1, int y1,
220  int x2, int y2)
221 {
222  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
223  /* workaround some hw bugs */
224  if (x2 == 0)
225  x1 = 1;
226  if (y2 == 0)
227  y1 = 1;
228  if (rdev->family >= CHIP_CAYMAN) {
229  if ((x2 == 1) && (y2 == 1))
230  x2 = 2;
231  }
232 
235  radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
236  radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
237 
240  radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
241  radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
242 
245  radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
246  radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
247 }
248 
249 /* emits 10 */
250 static void
251 draw_auto(struct radeon_device *rdev)
252 {
253  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
257 
259  radeon_ring_write(ring,
260 #ifdef __BIG_ENDIAN
261  (2 << 2) |
262 #endif
264 
266  radeon_ring_write(ring, 1);
267 
269  radeon_ring_write(ring, 3);
271 
272 }
273 
274 /* emits 39 */
275 static void
276 set_default_state(struct radeon_device *rdev)
277 {
278  struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
279  u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
280  u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
281  u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
282  int num_ps_gprs, num_vs_gprs, num_temp_gprs;
283  int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs;
284  int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
285  int num_hs_threads, num_ls_threads;
286  int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
287  int num_hs_stack_entries, num_ls_stack_entries;
288  u64 gpu_addr;
289  int dwords;
290 
291  /* set clear context state */
293  radeon_ring_write(ring, 0);
294 
295  if (rdev->family < CHIP_CAYMAN) {
296  switch (rdev->family) {
297  case CHIP_CEDAR:
298  default:
299  num_ps_gprs = 93;
300  num_vs_gprs = 46;
301  num_temp_gprs = 4;
302  num_gs_gprs = 31;
303  num_es_gprs = 31;
304  num_hs_gprs = 23;
305  num_ls_gprs = 23;
306  num_ps_threads = 96;
307  num_vs_threads = 16;
308  num_gs_threads = 16;
309  num_es_threads = 16;
310  num_hs_threads = 16;
311  num_ls_threads = 16;
312  num_ps_stack_entries = 42;
313  num_vs_stack_entries = 42;
314  num_gs_stack_entries = 42;
315  num_es_stack_entries = 42;
316  num_hs_stack_entries = 42;
317  num_ls_stack_entries = 42;
318  break;
319  case CHIP_REDWOOD:
320  num_ps_gprs = 93;
321  num_vs_gprs = 46;
322  num_temp_gprs = 4;
323  num_gs_gprs = 31;
324  num_es_gprs = 31;
325  num_hs_gprs = 23;
326  num_ls_gprs = 23;
327  num_ps_threads = 128;
328  num_vs_threads = 20;
329  num_gs_threads = 20;
330  num_es_threads = 20;
331  num_hs_threads = 20;
332  num_ls_threads = 20;
333  num_ps_stack_entries = 42;
334  num_vs_stack_entries = 42;
335  num_gs_stack_entries = 42;
336  num_es_stack_entries = 42;
337  num_hs_stack_entries = 42;
338  num_ls_stack_entries = 42;
339  break;
340  case CHIP_JUNIPER:
341  num_ps_gprs = 93;
342  num_vs_gprs = 46;
343  num_temp_gprs = 4;
344  num_gs_gprs = 31;
345  num_es_gprs = 31;
346  num_hs_gprs = 23;
347  num_ls_gprs = 23;
348  num_ps_threads = 128;
349  num_vs_threads = 20;
350  num_gs_threads = 20;
351  num_es_threads = 20;
352  num_hs_threads = 20;
353  num_ls_threads = 20;
354  num_ps_stack_entries = 85;
355  num_vs_stack_entries = 85;
356  num_gs_stack_entries = 85;
357  num_es_stack_entries = 85;
358  num_hs_stack_entries = 85;
359  num_ls_stack_entries = 85;
360  break;
361  case CHIP_CYPRESS:
362  case CHIP_HEMLOCK:
363  num_ps_gprs = 93;
364  num_vs_gprs = 46;
365  num_temp_gprs = 4;
366  num_gs_gprs = 31;
367  num_es_gprs = 31;
368  num_hs_gprs = 23;
369  num_ls_gprs = 23;
370  num_ps_threads = 128;
371  num_vs_threads = 20;
372  num_gs_threads = 20;
373  num_es_threads = 20;
374  num_hs_threads = 20;
375  num_ls_threads = 20;
376  num_ps_stack_entries = 85;
377  num_vs_stack_entries = 85;
378  num_gs_stack_entries = 85;
379  num_es_stack_entries = 85;
380  num_hs_stack_entries = 85;
381  num_ls_stack_entries = 85;
382  break;
383  case CHIP_PALM:
384  num_ps_gprs = 93;
385  num_vs_gprs = 46;
386  num_temp_gprs = 4;
387  num_gs_gprs = 31;
388  num_es_gprs = 31;
389  num_hs_gprs = 23;
390  num_ls_gprs = 23;
391  num_ps_threads = 96;
392  num_vs_threads = 16;
393  num_gs_threads = 16;
394  num_es_threads = 16;
395  num_hs_threads = 16;
396  num_ls_threads = 16;
397  num_ps_stack_entries = 42;
398  num_vs_stack_entries = 42;
399  num_gs_stack_entries = 42;
400  num_es_stack_entries = 42;
401  num_hs_stack_entries = 42;
402  num_ls_stack_entries = 42;
403  break;
404  case CHIP_SUMO:
405  num_ps_gprs = 93;
406  num_vs_gprs = 46;
407  num_temp_gprs = 4;
408  num_gs_gprs = 31;
409  num_es_gprs = 31;
410  num_hs_gprs = 23;
411  num_ls_gprs = 23;
412  num_ps_threads = 96;
413  num_vs_threads = 25;
414  num_gs_threads = 25;
415  num_es_threads = 25;
416  num_hs_threads = 25;
417  num_ls_threads = 25;
418  num_ps_stack_entries = 42;
419  num_vs_stack_entries = 42;
420  num_gs_stack_entries = 42;
421  num_es_stack_entries = 42;
422  num_hs_stack_entries = 42;
423  num_ls_stack_entries = 42;
424  break;
425  case CHIP_SUMO2:
426  num_ps_gprs = 93;
427  num_vs_gprs = 46;
428  num_temp_gprs = 4;
429  num_gs_gprs = 31;
430  num_es_gprs = 31;
431  num_hs_gprs = 23;
432  num_ls_gprs = 23;
433  num_ps_threads = 96;
434  num_vs_threads = 25;
435  num_gs_threads = 25;
436  num_es_threads = 25;
437  num_hs_threads = 25;
438  num_ls_threads = 25;
439  num_ps_stack_entries = 85;
440  num_vs_stack_entries = 85;
441  num_gs_stack_entries = 85;
442  num_es_stack_entries = 85;
443  num_hs_stack_entries = 85;
444  num_ls_stack_entries = 85;
445  break;
446  case CHIP_BARTS:
447  num_ps_gprs = 93;
448  num_vs_gprs = 46;
449  num_temp_gprs = 4;
450  num_gs_gprs = 31;
451  num_es_gprs = 31;
452  num_hs_gprs = 23;
453  num_ls_gprs = 23;
454  num_ps_threads = 128;
455  num_vs_threads = 20;
456  num_gs_threads = 20;
457  num_es_threads = 20;
458  num_hs_threads = 20;
459  num_ls_threads = 20;
460  num_ps_stack_entries = 85;
461  num_vs_stack_entries = 85;
462  num_gs_stack_entries = 85;
463  num_es_stack_entries = 85;
464  num_hs_stack_entries = 85;
465  num_ls_stack_entries = 85;
466  break;
467  case CHIP_TURKS:
468  num_ps_gprs = 93;
469  num_vs_gprs = 46;
470  num_temp_gprs = 4;
471  num_gs_gprs = 31;
472  num_es_gprs = 31;
473  num_hs_gprs = 23;
474  num_ls_gprs = 23;
475  num_ps_threads = 128;
476  num_vs_threads = 20;
477  num_gs_threads = 20;
478  num_es_threads = 20;
479  num_hs_threads = 20;
480  num_ls_threads = 20;
481  num_ps_stack_entries = 42;
482  num_vs_stack_entries = 42;
483  num_gs_stack_entries = 42;
484  num_es_stack_entries = 42;
485  num_hs_stack_entries = 42;
486  num_ls_stack_entries = 42;
487  break;
488  case CHIP_CAICOS:
489  num_ps_gprs = 93;
490  num_vs_gprs = 46;
491  num_temp_gprs = 4;
492  num_gs_gprs = 31;
493  num_es_gprs = 31;
494  num_hs_gprs = 23;
495  num_ls_gprs = 23;
496  num_ps_threads = 128;
497  num_vs_threads = 10;
498  num_gs_threads = 10;
499  num_es_threads = 10;
500  num_hs_threads = 10;
501  num_ls_threads = 10;
502  num_ps_stack_entries = 42;
503  num_vs_stack_entries = 42;
504  num_gs_stack_entries = 42;
505  num_es_stack_entries = 42;
506  num_hs_stack_entries = 42;
507  num_ls_stack_entries = 42;
508  break;
509  }
510 
511  if ((rdev->family == CHIP_CEDAR) ||
512  (rdev->family == CHIP_PALM) ||
513  (rdev->family == CHIP_SUMO) ||
514  (rdev->family == CHIP_SUMO2) ||
515  (rdev->family == CHIP_CAICOS))
516  sq_config = 0;
517  else
518  sq_config = VC_ENABLE;
519 
520  sq_config |= (EXPORT_SRC_C |
521  CS_PRIO(0) |
522  LS_PRIO(0) |
523  HS_PRIO(0) |
524  PS_PRIO(0) |
525  VS_PRIO(1) |
526  GS_PRIO(2) |
527  ES_PRIO(3));
528 
529  sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
530  NUM_VS_GPRS(num_vs_gprs) |
531  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
532  sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
533  NUM_ES_GPRS(num_es_gprs));
534  sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
535  NUM_LS_GPRS(num_ls_gprs));
536  sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
537  NUM_VS_THREADS(num_vs_threads) |
538  NUM_GS_THREADS(num_gs_threads) |
539  NUM_ES_THREADS(num_es_threads));
540  sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
541  NUM_LS_THREADS(num_ls_threads));
542  sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
543  NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
544  sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
545  NUM_ES_STACK_ENTRIES(num_es_stack_entries));
546  sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
547  NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
548 
549  /* disable dyn gprs */
552  radeon_ring_write(ring, 0);
553 
554  /* setup LDS */
557  radeon_ring_write(ring, 0x10001000);
558 
559  /* SQ config */
562  radeon_ring_write(ring, sq_config);
563  radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
564  radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
565  radeon_ring_write(ring, sq_gpr_resource_mgmt_3);
566  radeon_ring_write(ring, 0);
567  radeon_ring_write(ring, 0);
568  radeon_ring_write(ring, sq_thread_resource_mgmt);
569  radeon_ring_write(ring, sq_thread_resource_mgmt_2);
570  radeon_ring_write(ring, sq_stack_resource_mgmt_1);
571  radeon_ring_write(ring, sq_stack_resource_mgmt_2);
572  radeon_ring_write(ring, sq_stack_resource_mgmt_3);
573  }
574 
575  /* CONTEXT_CONTROL */
576  radeon_ring_write(ring, 0xc0012800);
577  radeon_ring_write(ring, 0x80000000);
578  radeon_ring_write(ring, 0x80000000);
579 
580  /* SQ_VTX_BASE_VTX_LOC */
581  radeon_ring_write(ring, 0xc0026f00);
582  radeon_ring_write(ring, 0x00000000);
583  radeon_ring_write(ring, 0x00000000);
584  radeon_ring_write(ring, 0x00000000);
585 
586  /* SET_SAMPLER */
587  radeon_ring_write(ring, 0xc0036e00);
588  radeon_ring_write(ring, 0x00000000);
589  radeon_ring_write(ring, 0x00000012);
590  radeon_ring_write(ring, 0x00000000);
591  radeon_ring_write(ring, 0x00000000);
592 
593  /* set to DX10/11 mode */
595  radeon_ring_write(ring, 1);
596 
597  /* emit an IB pointing at default state */
598  dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
599  gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
601  radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC);
602  radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
603  radeon_ring_write(ring, dwords);
604 
605 }
606 
608 {
609  u32 obj_size;
610  int i, r, dwords;
611  void *ptr;
612  u32 packet2s[16];
613  int num_packet2s = 0;
614 
615  rdev->r600_blit.primitives.set_render_target = set_render_target;
616  rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
617  rdev->r600_blit.primitives.set_shaders = set_shaders;
618  rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
619  rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
620  rdev->r600_blit.primitives.set_scissors = set_scissors;
621  rdev->r600_blit.primitives.draw_auto = draw_auto;
622  rdev->r600_blit.primitives.set_default_state = set_default_state;
623 
624  rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
625  rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
626  rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
627  rdev->r600_blit.ring_size_common += 5; /* done copy */
628  rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
629 
630  rdev->r600_blit.ring_size_per_loop = 74;
631  if (rdev->family >= CHIP_CAYMAN)
632  rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */
633 
634  rdev->r600_blit.max_dim = 16384;
635 
636  rdev->r600_blit.state_offset = 0;
637 
638  if (rdev->family < CHIP_CAYMAN)
639  rdev->r600_blit.state_len = evergreen_default_size;
640  else
641  rdev->r600_blit.state_len = cayman_default_size;
642 
643  dwords = rdev->r600_blit.state_len;
644  while (dwords & 0xf) {
645  packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
646  dwords++;
647  }
648 
649  obj_size = dwords * 4;
650  obj_size = ALIGN(obj_size, 256);
651 
652  rdev->r600_blit.vs_offset = obj_size;
653  if (rdev->family < CHIP_CAYMAN)
654  obj_size += evergreen_vs_size * 4;
655  else
656  obj_size += cayman_vs_size * 4;
657  obj_size = ALIGN(obj_size, 256);
658 
659  rdev->r600_blit.ps_offset = obj_size;
660  if (rdev->family < CHIP_CAYMAN)
661  obj_size += evergreen_ps_size * 4;
662  else
663  obj_size += cayman_ps_size * 4;
664  obj_size = ALIGN(obj_size, 256);
665 
666  /* pin copy shader into vram if not already initialized */
667  if (!rdev->r600_blit.shader_obj) {
668  r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
670  NULL, &rdev->r600_blit.shader_obj);
671  if (r) {
672  DRM_ERROR("evergreen failed to allocate shader\n");
673  return r;
674  }
675 
676  r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
677  if (unlikely(r != 0))
678  return r;
679  r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
680  &rdev->r600_blit.shader_gpu_addr);
681  radeon_bo_unreserve(rdev->r600_blit.shader_obj);
682  if (r) {
683  dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
684  return r;
685  }
686  }
687 
688  DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
689  obj_size,
690  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
691 
692  r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
693  if (unlikely(r != 0))
694  return r;
695  r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
696  if (r) {
697  DRM_ERROR("failed to map blit object %d\n", r);
698  return r;
699  }
700 
701  if (rdev->family < CHIP_CAYMAN) {
702  memcpy_toio(ptr + rdev->r600_blit.state_offset,
703  evergreen_default_state, rdev->r600_blit.state_len * 4);
704 
705  if (num_packet2s)
706  memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
707  packet2s, num_packet2s * 4);
708  for (i = 0; i < evergreen_vs_size; i++)
709  *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
710  for (i = 0; i < evergreen_ps_size; i++)
711  *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
712  } else {
713  memcpy_toio(ptr + rdev->r600_blit.state_offset,
714  cayman_default_state, rdev->r600_blit.state_len * 4);
715 
716  if (num_packet2s)
717  memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
718  packet2s, num_packet2s * 4);
719  for (i = 0; i < cayman_vs_size; i++)
720  *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
721  for (i = 0; i < cayman_ps_size; i++)
722  *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
723  }
724  radeon_bo_kunmap(rdev->r600_blit.shader_obj);
725  radeon_bo_unreserve(rdev->r600_blit.shader_obj);
726 
727  radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
728  return 0;
729 }