Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
r600_blit.c
Go to the documentation of this file.
1 /*
2  * Copyright 2009 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  * Alex Deucher <[email protected]>
25  */
26 #include <drm/drmP.h>
27 #include <drm/radeon_drm.h>
28 #include "radeon_drv.h"
29 
30 #include "r600_blit_shaders.h"
31 
32 #define DI_PT_RECTLIST 0x11
33 #define DI_INDEX_SIZE_16_BIT 0x0
34 #define DI_SRC_SEL_AUTO_INDEX 0x2
35 
36 #define FMT_8 0x1
37 #define FMT_5_6_5 0x8
38 #define FMT_8_8_8_8 0x1a
39 #define COLOR_8 0x1
40 #define COLOR_5_6_5 0x8
41 #define COLOR_8_8_8_8 0x1a
42 
43 static void
44 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
45 {
46  u32 cb_color_info;
47  int pitch, slice;
49  DRM_DEBUG("\n");
50 
51  h = ALIGN(h, 8);
52  if (h < 8)
53  h = 8;
54 
55  cb_color_info = ((format << 2) | (1 << 27));
56  pitch = (w / 8) - 1;
57  slice = ((w * h) / 64) - 1;
58 
59  if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
60  ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
61  BEGIN_RING(21 + 2);
64  OUT_RING(gpu_addr >> 8);
66  OUT_RING(2 << 0);
67  } else {
68  BEGIN_RING(21);
71  OUT_RING(gpu_addr >> 8);
72  }
73 
76  OUT_RING((pitch << 0) | (slice << 10));
77 
80  OUT_RING(0);
81 
84  OUT_RING(cb_color_info);
85 
88  OUT_RING(0);
89 
92  OUT_RING(0);
93 
96  OUT_RING(0);
97 
98  ADVANCE_RING();
99 }
100 
101 static void
102 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
103  u32 sync_type, u32 size, u64 mc_addr)
104 {
105  u32 cp_coher_size;
106  RING_LOCALS;
107  DRM_DEBUG("\n");
108 
109  if (size == 0xffffffff)
110  cp_coher_size = 0xffffffff;
111  else
112  cp_coher_size = ((size + 255) >> 8);
113 
114  BEGIN_RING(5);
116  OUT_RING(sync_type);
117  OUT_RING(cp_coher_size);
118  OUT_RING((mc_addr >> 8));
119  OUT_RING(10); /* poll interval */
120  ADVANCE_RING();
121 }
122 
123 static void
124 set_shaders(struct drm_device *dev)
125 {
126  drm_radeon_private_t *dev_priv = dev->dev_private;
127  u64 gpu_addr;
128  int i;
129  u32 *vs, *ps;
130  uint32_t sq_pgm_resources;
131  RING_LOCALS;
132  DRM_DEBUG("\n");
133 
134  /* load shaders */
135  vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
136  ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
137 
138  for (i = 0; i < r6xx_vs_size; i++)
139  vs[i] = cpu_to_le32(r6xx_vs[i]);
140  for (i = 0; i < r6xx_ps_size; i++)
141  ps[i] = cpu_to_le32(r6xx_ps[i]);
142 
143  dev_priv->blit_vb->used = 512;
144 
145  gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
146 
147  /* setup shader regs */
148  sq_pgm_resources = (1 << 0);
149 
150  BEGIN_RING(9 + 12);
151  /* VS */
154  OUT_RING(gpu_addr >> 8);
155 
158  OUT_RING(sq_pgm_resources);
159 
162  OUT_RING(0);
163 
164  /* PS */
167  OUT_RING((gpu_addr + 256) >> 8);
168 
171  OUT_RING(sq_pgm_resources | (1 << 28));
172 
175  OUT_RING(2);
176 
179  OUT_RING(0);
180  ADVANCE_RING();
181 
182  cp_set_surface_sync(dev_priv,
183  R600_SH_ACTION_ENA, 512, gpu_addr);
184 }
185 
186 static void
187 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
188 {
189  uint32_t sq_vtx_constant_word2;
190  RING_LOCALS;
191  DRM_DEBUG("\n");
192 
193  sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
194 #ifdef __BIG_ENDIAN
195  sq_vtx_constant_word2 |= (2 << 30);
196 #endif
197 
198  BEGIN_RING(9);
200  OUT_RING(0x460);
201  OUT_RING(gpu_addr & 0xffffffff);
202  OUT_RING(48 - 1);
203  OUT_RING(sq_vtx_constant_word2);
204  OUT_RING(1 << 0);
205  OUT_RING(0);
206  OUT_RING(0);
208  ADVANCE_RING();
209 
210  if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
211  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
212  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
213  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
214  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
215  cp_set_surface_sync(dev_priv,
216  R600_TC_ACTION_ENA, 48, gpu_addr);
217  else
218  cp_set_surface_sync(dev_priv,
219  R600_VC_ACTION_ENA, 48, gpu_addr);
220 }
221 
222 static void
223 set_tex_resource(drm_radeon_private_t *dev_priv,
224  int format, int w, int h, int pitch, u64 gpu_addr)
225 {
226  uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
227  RING_LOCALS;
228  DRM_DEBUG("\n");
229 
230  if (h < 1)
231  h = 1;
232 
233  sq_tex_resource_word0 = (1 << 0);
234  sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
235  ((w - 1) << 19));
236 
237  sq_tex_resource_word1 = (format << 26);
238  sq_tex_resource_word1 |= ((h - 1) << 0);
239 
240  sq_tex_resource_word4 = ((1 << 14) |
241  (0 << 16) |
242  (1 << 19) |
243  (2 << 22) |
244  (3 << 25));
245 
246  BEGIN_RING(9);
248  OUT_RING(0);
249  OUT_RING(sq_tex_resource_word0);
250  OUT_RING(sq_tex_resource_word1);
251  OUT_RING(gpu_addr >> 8);
252  OUT_RING(gpu_addr >> 8);
253  OUT_RING(sq_tex_resource_word4);
254  OUT_RING(0);
256  ADVANCE_RING();
257 
258 }
259 
260 static void
261 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
262 {
263  RING_LOCALS;
264  DRM_DEBUG("\n");
265 
266  BEGIN_RING(12);
269  OUT_RING((x1 << 0) | (y1 << 16));
270  OUT_RING((x2 << 0) | (y2 << 16));
271 
274  OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
275  OUT_RING((x2 << 0) | (y2 << 16));
276 
279  OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
280  OUT_RING((x2 << 0) | (y2 << 16));
281  ADVANCE_RING();
282 }
283 
284 static void
285 draw_auto(drm_radeon_private_t *dev_priv)
286 {
287  RING_LOCALS;
288  DRM_DEBUG("\n");
289 
290  BEGIN_RING(10);
294 
296 #ifdef __BIG_ENDIAN
297  OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
298 #else
300 #endif
301 
303  OUT_RING(1);
304 
306  OUT_RING(3);
308 
309  ADVANCE_RING();
310  COMMIT_RING();
311 }
312 
313 static void
314 set_default_state(drm_radeon_private_t *dev_priv)
315 {
316  int i;
317  u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
318  u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
319  int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
320  int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
321  int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
322  RING_LOCALS;
323 
324  switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
325  case CHIP_R600:
326  num_ps_gprs = 192;
327  num_vs_gprs = 56;
328  num_temp_gprs = 4;
329  num_gs_gprs = 0;
330  num_es_gprs = 0;
331  num_ps_threads = 136;
332  num_vs_threads = 48;
333  num_gs_threads = 4;
334  num_es_threads = 4;
335  num_ps_stack_entries = 128;
336  num_vs_stack_entries = 128;
337  num_gs_stack_entries = 0;
338  num_es_stack_entries = 0;
339  break;
340  case CHIP_RV630:
341  case CHIP_RV635:
342  num_ps_gprs = 84;
343  num_vs_gprs = 36;
344  num_temp_gprs = 4;
345  num_gs_gprs = 0;
346  num_es_gprs = 0;
347  num_ps_threads = 144;
348  num_vs_threads = 40;
349  num_gs_threads = 4;
350  num_es_threads = 4;
351  num_ps_stack_entries = 40;
352  num_vs_stack_entries = 40;
353  num_gs_stack_entries = 32;
354  num_es_stack_entries = 16;
355  break;
356  case CHIP_RV610:
357  case CHIP_RV620:
358  case CHIP_RS780:
359  case CHIP_RS880:
360  default:
361  num_ps_gprs = 84;
362  num_vs_gprs = 36;
363  num_temp_gprs = 4;
364  num_gs_gprs = 0;
365  num_es_gprs = 0;
366  num_ps_threads = 136;
367  num_vs_threads = 48;
368  num_gs_threads = 4;
369  num_es_threads = 4;
370  num_ps_stack_entries = 40;
371  num_vs_stack_entries = 40;
372  num_gs_stack_entries = 32;
373  num_es_stack_entries = 16;
374  break;
375  case CHIP_RV670:
376  num_ps_gprs = 144;
377  num_vs_gprs = 40;
378  num_temp_gprs = 4;
379  num_gs_gprs = 0;
380  num_es_gprs = 0;
381  num_ps_threads = 136;
382  num_vs_threads = 48;
383  num_gs_threads = 4;
384  num_es_threads = 4;
385  num_ps_stack_entries = 40;
386  num_vs_stack_entries = 40;
387  num_gs_stack_entries = 32;
388  num_es_stack_entries = 16;
389  break;
390  case CHIP_RV770:
391  num_ps_gprs = 192;
392  num_vs_gprs = 56;
393  num_temp_gprs = 4;
394  num_gs_gprs = 0;
395  num_es_gprs = 0;
396  num_ps_threads = 188;
397  num_vs_threads = 60;
398  num_gs_threads = 0;
399  num_es_threads = 0;
400  num_ps_stack_entries = 256;
401  num_vs_stack_entries = 256;
402  num_gs_stack_entries = 0;
403  num_es_stack_entries = 0;
404  break;
405  case CHIP_RV730:
406  case CHIP_RV740:
407  num_ps_gprs = 84;
408  num_vs_gprs = 36;
409  num_temp_gprs = 4;
410  num_gs_gprs = 0;
411  num_es_gprs = 0;
412  num_ps_threads = 188;
413  num_vs_threads = 60;
414  num_gs_threads = 0;
415  num_es_threads = 0;
416  num_ps_stack_entries = 128;
417  num_vs_stack_entries = 128;
418  num_gs_stack_entries = 0;
419  num_es_stack_entries = 0;
420  break;
421  case CHIP_RV710:
422  num_ps_gprs = 192;
423  num_vs_gprs = 56;
424  num_temp_gprs = 4;
425  num_gs_gprs = 0;
426  num_es_gprs = 0;
427  num_ps_threads = 144;
428  num_vs_threads = 48;
429  num_gs_threads = 0;
430  num_es_threads = 0;
431  num_ps_stack_entries = 128;
432  num_vs_stack_entries = 128;
433  num_gs_stack_entries = 0;
434  num_es_stack_entries = 0;
435  break;
436  }
437 
438  if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
439  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
440  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
441  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
442  ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
443  sq_config = 0;
444  else
445  sq_config = R600_VC_ENABLE;
446 
447  sq_config |= (R600_DX9_CONSTS |
449  R600_PS_PRIO(0) |
450  R600_VS_PRIO(1) |
451  R600_GS_PRIO(2) |
452  R600_ES_PRIO(3));
453 
454  sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
455  R600_NUM_VS_GPRS(num_vs_gprs) |
456  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
457  sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
458  R600_NUM_ES_GPRS(num_es_gprs));
459  sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
460  R600_NUM_VS_THREADS(num_vs_threads) |
461  R600_NUM_GS_THREADS(num_gs_threads) |
462  R600_NUM_ES_THREADS(num_es_threads));
463  sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
464  R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
465  sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
466  R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
467 
468  if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
470  for (i = 0; i < r7xx_default_size; i++)
472  } else {
474  for (i = 0; i < r6xx_default_size; i++)
476  }
479  /* SQ config */
482  OUT_RING(sq_config);
483  OUT_RING(sq_gpr_resource_mgmt_1);
484  OUT_RING(sq_gpr_resource_mgmt_2);
485  OUT_RING(sq_thread_resource_mgmt);
486  OUT_RING(sq_stack_resource_mgmt_1);
487  OUT_RING(sq_stack_resource_mgmt_2);
488  ADVANCE_RING();
489 }
490 
491 /* 23 bits of float fractional data */
492 #define I2F_FRAC_BITS 23
493 #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
494 
495 /*
496  * Converts unsigned integer into 32-bit IEEE floating point representation.
497  * Will be exact from 0 to 2^24. Above that, we round towards zero
498  * as the fractional bits will not fit in a float. (It would be better to
499  * round towards even as the fpu does, but that is slower.)
500  */
502 {
503  uint32_t msb, exponent, fraction;
504 
505  /* Zero is special */
506  if (!x) return 0;
507 
508  /* Get location of the most significant bit */
509  msb = __fls(x);
510 
511  /*
512  * Use a rotate instead of a shift because that works both leftwards
513  * and rightwards due to the mod(32) behaviour. This means we don't
514  * need to check to see if we are above 2^24 or not.
515  */
516  fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
517  exponent = (127 + msb) << I2F_FRAC_BITS;
518 
519  return fraction + exponent;
520 }
521 
522 static int r600_nomm_get_vb(struct drm_device *dev)
523 {
524  drm_radeon_private_t *dev_priv = dev->dev_private;
525  dev_priv->blit_vb = radeon_freelist_get(dev);
526  if (!dev_priv->blit_vb) {
527  DRM_ERROR("Unable to allocate vertex buffer for blit\n");
528  return -EAGAIN;
529  }
530  return 0;
531 }
532 
533 static void r600_nomm_put_vb(struct drm_device *dev)
534 {
535  drm_radeon_private_t *dev_priv = dev->dev_private;
536 
537  dev_priv->blit_vb->used = 0;
538  radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
539 }
540 
541 static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
542 {
543  drm_radeon_private_t *dev_priv = dev->dev_private;
544  return (((char *)dev->agp_buffer_map->handle +
545  dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
546 }
547 
548 int
549 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
550 {
551  drm_radeon_private_t *dev_priv = dev->dev_private;
552  int ret;
553  DRM_DEBUG("\n");
554 
555  ret = r600_nomm_get_vb(dev);
556  if (ret)
557  return ret;
558 
559  dev_priv->blit_vb->file_priv = file_priv;
560 
561  set_default_state(dev_priv);
562  set_shaders(dev);
563 
564  return 0;
565 }
566 
567 
568 void
570 {
571  drm_radeon_private_t *dev_priv = dev->dev_private;
572  RING_LOCALS;
573  DRM_DEBUG("\n");
574 
575  BEGIN_RING(5);
578  /* wait for 3D idle clean */
582 
583  ADVANCE_RING();
584  COMMIT_RING();
585 
586  r600_nomm_put_vb(dev);
587 }
588 
589 void
591  uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
592  int size_bytes)
593 {
594  drm_radeon_private_t *dev_priv = dev->dev_private;
595  int max_bytes;
596  u64 vb_addr;
597  u32 *vb;
598 
599  vb = r600_nomm_get_vb_ptr(dev);
600 
601  if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
602  max_bytes = 8192;
603 
604  while (size_bytes) {
605  int cur_size = size_bytes;
606  int src_x = src_gpu_addr & 255;
607  int dst_x = dst_gpu_addr & 255;
608  int h = 1;
609  src_gpu_addr = src_gpu_addr & ~255;
610  dst_gpu_addr = dst_gpu_addr & ~255;
611 
612  if (!src_x && !dst_x) {
613  h = (cur_size / max_bytes);
614  if (h > 8192)
615  h = 8192;
616  if (h == 0)
617  h = 1;
618  else
619  cur_size = max_bytes;
620  } else {
621  if (cur_size > max_bytes)
622  cur_size = max_bytes;
623  if (cur_size > (max_bytes - dst_x))
624  cur_size = (max_bytes - dst_x);
625  if (cur_size > (max_bytes - src_x))
626  cur_size = (max_bytes - src_x);
627  }
628 
629  if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
630 
631  r600_nomm_put_vb(dev);
632  r600_nomm_get_vb(dev);
633  if (!dev_priv->blit_vb)
634  return;
635  set_shaders(dev);
636  vb = r600_nomm_get_vb_ptr(dev);
637  }
638 
639  vb[0] = int2float(dst_x);
640  vb[1] = 0;
641  vb[2] = int2float(src_x);
642  vb[3] = 0;
643 
644  vb[4] = int2float(dst_x);
645  vb[5] = int2float(h);
646  vb[6] = int2float(src_x);
647  vb[7] = int2float(h);
648 
649  vb[8] = int2float(dst_x + cur_size);
650  vb[9] = int2float(h);
651  vb[10] = int2float(src_x + cur_size);
652  vb[11] = int2float(h);
653 
654  /* src */
655  set_tex_resource(dev_priv, FMT_8,
656  src_x + cur_size, h, src_x + cur_size,
657  src_gpu_addr);
658 
659  cp_set_surface_sync(dev_priv,
660  R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
661 
662  /* dst */
663  set_render_target(dev_priv, COLOR_8,
664  dst_x + cur_size, h,
665  dst_gpu_addr);
666 
667  /* scissors */
668  set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
669 
670  /* Vertex buffer setup */
671  vb_addr = dev_priv->gart_buffers_offset +
672  dev_priv->blit_vb->offset +
673  dev_priv->blit_vb->used;
674  set_vtx_resource(dev_priv, vb_addr);
675 
676  /* draw */
677  draw_auto(dev_priv);
678 
679  cp_set_surface_sync(dev_priv,
681  cur_size * h, dst_gpu_addr);
682 
683  vb += 12;
684  dev_priv->blit_vb->used += 12 * 4;
685 
686  src_gpu_addr += cur_size * h;
687  dst_gpu_addr += cur_size * h;
688  size_bytes -= cur_size * h;
689  }
690  } else {
691  max_bytes = 8192 * 4;
692 
693  while (size_bytes) {
694  int cur_size = size_bytes;
695  int src_x = (src_gpu_addr & 255);
696  int dst_x = (dst_gpu_addr & 255);
697  int h = 1;
698  src_gpu_addr = src_gpu_addr & ~255;
699  dst_gpu_addr = dst_gpu_addr & ~255;
700 
701  if (!src_x && !dst_x) {
702  h = (cur_size / max_bytes);
703  if (h > 8192)
704  h = 8192;
705  if (h == 0)
706  h = 1;
707  else
708  cur_size = max_bytes;
709  } else {
710  if (cur_size > max_bytes)
711  cur_size = max_bytes;
712  if (cur_size > (max_bytes - dst_x))
713  cur_size = (max_bytes - dst_x);
714  if (cur_size > (max_bytes - src_x))
715  cur_size = (max_bytes - src_x);
716  }
717 
718  if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
719  r600_nomm_put_vb(dev);
720  r600_nomm_get_vb(dev);
721  if (!dev_priv->blit_vb)
722  return;
723 
724  set_shaders(dev);
725  vb = r600_nomm_get_vb_ptr(dev);
726  }
727 
728  vb[0] = int2float(dst_x / 4);
729  vb[1] = 0;
730  vb[2] = int2float(src_x / 4);
731  vb[3] = 0;
732 
733  vb[4] = int2float(dst_x / 4);
734  vb[5] = int2float(h);
735  vb[6] = int2float(src_x / 4);
736  vb[7] = int2float(h);
737 
738  vb[8] = int2float((dst_x + cur_size) / 4);
739  vb[9] = int2float(h);
740  vb[10] = int2float((src_x + cur_size) / 4);
741  vb[11] = int2float(h);
742 
743  /* src */
744  set_tex_resource(dev_priv, FMT_8_8_8_8,
745  (src_x + cur_size) / 4,
746  h, (src_x + cur_size) / 4,
747  src_gpu_addr);
748 
749  cp_set_surface_sync(dev_priv,
750  R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
751 
752  /* dst */
753  set_render_target(dev_priv, COLOR_8_8_8_8,
754  (dst_x + cur_size) / 4, h,
755  dst_gpu_addr);
756 
757  /* scissors */
758  set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
759 
760  /* Vertex buffer setup */
761  vb_addr = dev_priv->gart_buffers_offset +
762  dev_priv->blit_vb->offset +
763  dev_priv->blit_vb->used;
764  set_vtx_resource(dev_priv, vb_addr);
765 
766  /* draw */
767  draw_auto(dev_priv);
768 
769  cp_set_surface_sync(dev_priv,
771  cur_size * h, dst_gpu_addr);
772 
773  vb += 12;
774  dev_priv->blit_vb->used += 12 * 4;
775 
776  src_gpu_addr += cur_size * h;
777  dst_gpu_addr += cur_size * h;
778  size_bytes -= cur_size * h;
779  }
780  }
781 }
782 
783 void
785  uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
786  int sx, int sy, int dx, int dy,
787  int w, int h, int src_pitch, int dst_pitch, int cpp)
788 {
789  drm_radeon_private_t *dev_priv = dev->dev_private;
790  int cb_format, tex_format;
791  int sx2, sy2, dx2, dy2;
792  u64 vb_addr;
793  u32 *vb;
794 
795  if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
796 
797  r600_nomm_put_vb(dev);
798  r600_nomm_get_vb(dev);
799  if (!dev_priv->blit_vb)
800  return;
801 
802  set_shaders(dev);
803  }
804  vb = r600_nomm_get_vb_ptr(dev);
805 
806  sx2 = sx + w;
807  sy2 = sy + h;
808  dx2 = dx + w;
809  dy2 = dy + h;
810 
811  vb[0] = int2float(dx);
812  vb[1] = int2float(dy);
813  vb[2] = int2float(sx);
814  vb[3] = int2float(sy);
815 
816  vb[4] = int2float(dx);
817  vb[5] = int2float(dy2);
818  vb[6] = int2float(sx);
819  vb[7] = int2float(sy2);
820 
821  vb[8] = int2float(dx2);
822  vb[9] = int2float(dy2);
823  vb[10] = int2float(sx2);
824  vb[11] = int2float(sy2);
825 
826  switch(cpp) {
827  case 4:
828  cb_format = COLOR_8_8_8_8;
829  tex_format = FMT_8_8_8_8;
830  break;
831  case 2:
832  cb_format = COLOR_5_6_5;
833  tex_format = FMT_5_6_5;
834  break;
835  default:
836  cb_format = COLOR_8;
837  tex_format = FMT_8;
838  break;
839  }
840 
841  /* src */
842  set_tex_resource(dev_priv, tex_format,
843  src_pitch / cpp,
844  sy2, src_pitch / cpp,
845  src_gpu_addr);
846 
847  cp_set_surface_sync(dev_priv,
848  R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
849 
850  /* dst */
851  set_render_target(dev_priv, cb_format,
852  dst_pitch / cpp, dy2,
853  dst_gpu_addr);
854 
855  /* scissors */
856  set_scissors(dev_priv, dx, dy, dx2, dy2);
857 
858  /* Vertex buffer setup */
859  vb_addr = dev_priv->gart_buffers_offset +
860  dev_priv->blit_vb->offset +
861  dev_priv->blit_vb->used;
862  set_vtx_resource(dev_priv, vb_addr);
863 
864  /* draw */
865  draw_auto(dev_priv);
866 
867  cp_set_surface_sync(dev_priv,
869  dst_pitch * dy2, dst_gpu_addr);
870 
871  dev_priv->blit_vb->used += 12 * 4;
872 }