Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
r300_cmdbuf.c
Go to the documentation of this file.
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc. 2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  * Nicolai Haehnle <[email protected]>
32  */
33 
34 #include <drm/drmP.h>
35 #include <drm/drm_buffer.h>
36 #include <drm/radeon_drm.h>
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39 
40 #include <asm/unaligned.h>
41 
42 #define R300_SIMULTANEOUS_CLIPRECTS 4
43 
44 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
45  */
46 static const int r300_cliprect_cntl[4] = {
47  0xAAAA,
48  0xEEEE,
49  0xFEFE,
50  0xFFFE
51 };
52 
57 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
59 {
60  struct drm_clip_rect box;
61  int nr;
62  int i;
64 
65  nr = cmdbuf->nbox - n;
68 
69  DRM_DEBUG("%i cliprects\n", nr);
70 
71  if (nr) {
72  BEGIN_RING(6 + nr * 2);
74 
75  for (i = 0; i < nr; ++i) {
77  (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
78  DRM_ERROR("copy cliprect faulted\n");
79  return -EFAULT;
80  }
81 
82  box.x2--; /* Hardware expects inclusive bottom-right corner */
83  box.y2--;
84 
85  if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
86  box.x1 = (box.x1) &
88  box.y1 = (box.y1) &
90  box.x2 = (box.x2) &
92  box.y2 = (box.y2) &
94  } else {
95  box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
97  box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
99  box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
101  box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
103  }
104 
105  OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
106  (box.y1 << R300_CLIPRECT_Y_SHIFT));
107  OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
108  (box.y2 << R300_CLIPRECT_Y_SHIFT));
109 
110  }
111 
112  OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
113 
114  /* TODO/SECURITY: Force scissors to a safe value, otherwise the
115  * client might be able to trample over memory.
116  * The impact should be very limited, but I'd rather be safe than
117  * sorry.
118  */
120  OUT_RING(0);
122  ADVANCE_RING();
123  } else {
124  /* Why we allow zero cliprect rendering:
125  * There are some commands in a command buffer that must be submitted
126  * even when there are no cliprects, e.g. DMA buffer discard
127  * or state setting (though state setting could be avoided by
128  * simulating a loss of context).
129  *
130  * Now since the cmdbuf interface is so chaotic right now (and is
131  * bound to remain that way for a bit until things settle down),
132  * it is basically impossible to filter out the commands that are
133  * necessary and those that aren't.
134  *
135  * So I choose the safe way and don't do any filtering at all;
136  * instead, I simply set up the engine so that all rendering
137  * can't produce any fragments.
138  */
139  BEGIN_RING(2);
141  ADVANCE_RING();
142  }
143 
144  /* flus cache and wait idle clean after cliprect change */
145  BEGIN_RING(2);
148  ADVANCE_RING();
149  BEGIN_RING(2);
152  ADVANCE_RING();
153  /* set flush flag */
154  dev_priv->track_flush |= RADEON_FLUSH_EMITED;
155 
156  return 0;
157 }
158 
159 static u8 r300_reg_flags[0x10000 >> 2];
160 
162 {
163  int i;
164  drm_radeon_private_t *dev_priv = dev->dev_private;
165 
166  memset(r300_reg_flags, 0, 0x10000 >> 2);
167 #define ADD_RANGE_MARK(reg, count,mark) \
168  for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
169  r300_reg_flags[i]|=(mark);
170 
171 #define MARK_SAFE 1
172 #define MARK_CHECK_OFFSET 2
173 
174 #define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
175 
176  /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
180  ADD_RANGE(0x2134, 2);
183  ADD_RANGE(0x21DC, 1);
194  ADD_RANGE(0x4200, 4);
195  ADD_RANGE(0x4214, 1);
197  ADD_RANGE(0x4230, 3);
200  ADD_RANGE(0x4260, 3);
207  ADD_RANGE(0x42C0, 2);
209 
211  if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
213 
215  ADD_RANGE(0x43E8, 1);
216 
217  ADD_RANGE(0x46A4, 5);
218 
222  ADD_RANGE(0x4BD8, 1);
224  ADD_RANGE(0x4E00, 1);
230  ADD_RANGE(0x4E50, 9);
231  ADD_RANGE(0x4E88, 1);
232  ADD_RANGE(0x4EA0, 2);
235  ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
239  ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
240 
246  /* Texture offset is dangerous and needs more checking */
250 
251  /* Sporadic registers used as primitives are emitted */
256 
257  if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
262  ADD_RANGE(R500_RS_IP_0, 16);
267  } else {
277 
278  }
279 }
280 
281 static __inline__ int r300_check_range(unsigned reg, int count)
282 {
283  int i;
284  if (reg & ~0xffff)
285  return -1;
286  for (i = (reg >> 2); i < (reg >> 2) + count; i++)
287  if (r300_reg_flags[i] != MARK_SAFE)
288  return 1;
289  return 0;
290 }
291 
292 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
293  dev_priv,
295  * cmdbuf,
297  header)
298 {
299  int reg;
300  int sz;
301  int i;
302  u32 *value;
303  RING_LOCALS;
304 
305  sz = header.packet0.count;
306  reg = (header.packet0.reghi << 8) | header.packet0.reglo;
307 
308  if ((sz > 64) || (sz < 0)) {
309  DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
310  reg, sz);
311  return -EINVAL;
312  }
313 
314  for (i = 0; i < sz; i++) {
315  switch (r300_reg_flags[(reg >> 2) + i]) {
316  case MARK_SAFE:
317  break;
318  case MARK_CHECK_OFFSET:
319  value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
320  if (!radeon_check_offset(dev_priv, *value)) {
321  DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
322  reg, sz);
323  return -EINVAL;
324  }
325  break;
326  default:
327  DRM_ERROR("Register %04x failed check as flag=%02x\n",
328  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
329  return -EINVAL;
330  }
331  }
332 
333  BEGIN_RING(1 + sz);
334  OUT_RING(CP_PACKET0(reg, sz - 1));
335  OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
336  ADVANCE_RING();
337 
338  return 0;
339 }
340 
347 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
348  drm_radeon_kcmd_buffer_t *cmdbuf,
349  drm_r300_cmd_header_t header)
350 {
351  int reg;
352  int sz;
353  RING_LOCALS;
354 
355  sz = header.packet0.count;
356  reg = (header.packet0.reghi << 8) | header.packet0.reglo;
357 
358  if (!sz)
359  return 0;
360 
361  if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
362  return -EINVAL;
363 
364  if (reg + sz * 4 >= 0x10000) {
365  DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
366  sz);
367  return -EINVAL;
368  }
369 
370  if (r300_check_range(reg, sz)) {
371  /* go and check everything */
372  return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
373  header);
374  }
375  /* the rest of the data is safe to emit, whatever the values the user passed */
376 
377  BEGIN_RING(1 + sz);
378  OUT_RING(CP_PACKET0(reg, sz - 1));
379  OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
380  ADVANCE_RING();
381 
382  return 0;
383 }
384 
390 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
391  drm_radeon_kcmd_buffer_t *cmdbuf,
392  drm_r300_cmd_header_t header)
393 {
394  int sz;
395  int addr;
396  RING_LOCALS;
397 
398  sz = header.vpu.count;
399  addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
400 
401  if (!sz)
402  return 0;
403  if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
404  return -EINVAL;
405 
406  /* VAP is very sensitive so we purge cache before we program it
407  * and we also flush its state before & after */
408  BEGIN_RING(6);
414  OUT_RING(0);
415  ADVANCE_RING();
416  /* set flush flag */
417  dev_priv->track_flush |= RADEON_FLUSH_EMITED;
418 
419  BEGIN_RING(3 + sz * 4);
422  OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
423  ADVANCE_RING();
424 
425  BEGIN_RING(2);
427  OUT_RING(0);
428  ADVANCE_RING();
429 
430  return 0;
431 }
432 
437 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
438  drm_radeon_kcmd_buffer_t *cmdbuf)
439 {
440  RING_LOCALS;
441 
442  if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
443  return -EINVAL;
444 
445  BEGIN_RING(10);
449  OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
450  ADVANCE_RING();
451 
452  BEGIN_RING(4);
457  ADVANCE_RING();
458  /* set flush flag */
459  dev_priv->track_flush |= RADEON_FLUSH_EMITED;
460 
461  return 0;
462 }
463 
464 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
465  drm_radeon_kcmd_buffer_t *cmdbuf,
466  u32 header)
467 {
468  int count, i, k;
469 #define MAX_ARRAY_PACKET 64
470  u32 *data;
471  u32 narrays;
472  RING_LOCALS;
473 
474  count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
475 
476  if ((count + 1) > MAX_ARRAY_PACKET) {
477  DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
478  count);
479  return -EINVAL;
480  }
481  /* carefully check packet contents */
482 
483  /* We have already read the header so advance the buffer. */
484  drm_buffer_advance(cmdbuf->buffer, 4);
485 
486  narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
487  k = 0;
488  i = 1;
489  while ((k < narrays) && (i < (count + 1))) {
490  i++; /* skip attribute field */
491  data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
492  if (!radeon_check_offset(dev_priv, *data)) {
493  DRM_ERROR
494  ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
495  k, i);
496  return -EINVAL;
497  }
498  k++;
499  i++;
500  if (k == narrays)
501  break;
502  /* have one more to process, they come in pairs */
503  data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
504  if (!radeon_check_offset(dev_priv, *data)) {
505  DRM_ERROR
506  ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
507  k, i);
508  return -EINVAL;
509  }
510  k++;
511  i++;
512  }
513  /* do the counts match what we expect ? */
514  if ((k != narrays) || (i != (count + 1))) {
515  DRM_ERROR
516  ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
517  k, i, narrays, count + 1);
518  return -EINVAL;
519  }
520 
521  /* all clear, output packet */
522 
523  BEGIN_RING(count + 2);
524  OUT_RING(header);
525  OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
526  ADVANCE_RING();
527 
528  return 0;
529 }
530 
531 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
532  drm_radeon_kcmd_buffer_t *cmdbuf)
533 {
534  u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
535  int count, ret;
536  RING_LOCALS;
537 
538 
539  count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
540 
541  if (*cmd & 0x8000) {
542  u32 offset;
543  u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
546 
547  u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
548  offset = *cmd2 << 10;
549  ret = !radeon_check_offset(dev_priv, offset);
550  if (ret) {
551  DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
552  return -EINVAL;
553  }
554  }
555 
556  if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
558  u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
559  offset = *cmd3 << 10;
560  ret = !radeon_check_offset(dev_priv, offset);
561  if (ret) {
562  DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
563  return -EINVAL;
564  }
565 
566  }
567  }
568 
569  BEGIN_RING(count+2);
570  OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
571  ADVANCE_RING();
572 
573  return 0;
574 }
575 
576 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
577  drm_radeon_kcmd_buffer_t *cmdbuf)
578 {
579  u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
580  u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
581  int count;
582  int expected_count;
583  RING_LOCALS;
584 
585  count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
586 
587  expected_count = *cmd1 >> 16;
588  if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
589  expected_count = (expected_count+1)/2;
590 
591  if (count && count != expected_count) {
592  DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
593  count, expected_count);
594  return -EINVAL;
595  }
596 
597  BEGIN_RING(count+2);
598  OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
599  ADVANCE_RING();
600 
601  if (!count) {
602  drm_r300_cmd_header_t stack_header, *header;
603  u32 *cmd1, *cmd2, *cmd3;
604 
605  if (drm_buffer_unprocessed(cmdbuf->buffer)
606  < 4*4 + sizeof(stack_header)) {
607  DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
608  return -EINVAL;
609  }
610 
611  header = drm_buffer_read_object(cmdbuf->buffer,
612  sizeof(stack_header), &stack_header);
613 
614  cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
615  cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
616  cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
617  cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
618 
619  if (header->header.cmd_type != R300_CMD_PACKET3 ||
620  header->packet3.packet != R300_CMD_PACKET3_RAW ||
621  *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
622  DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
623  return -EINVAL;
624  }
625 
626  if ((*cmd1 & 0x8000ffff) != 0x80000810) {
627  DRM_ERROR("Invalid indx_buffer reg address %08X\n",
628  *cmd1);
629  return -EINVAL;
630  }
631  if (!radeon_check_offset(dev_priv, *cmd2)) {
632  DRM_ERROR("Invalid indx_buffer offset is %08X\n",
633  *cmd2);
634  return -EINVAL;
635  }
636  if (*cmd3 != expected_count) {
637  DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
638  *cmd3, expected_count);
639  return -EINVAL;
640  }
641 
642  BEGIN_RING(4);
643  OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
644  ADVANCE_RING();
645  }
646 
647  return 0;
648 }
649 
650 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
651  drm_radeon_kcmd_buffer_t *cmdbuf)
652 {
653  u32 *header;
654  int count;
655  RING_LOCALS;
656 
657  if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
658  return -EINVAL;
659 
660  /* Fixme !! This simply emits a packet without much checking.
661  We need to be smarter. */
662 
663  /* obtain first word - actual packet3 header */
664  header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
665 
666  /* Is it packet 3 ? */
667  if ((*header >> 30) != 0x3) {
668  DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
669  return -EINVAL;
670  }
671 
672  count = (*header >> 16) & 0x3fff;
673 
674  /* Check again now that we know how much data to expect */
675  if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
676  DRM_ERROR
677  ("Expected packet3 of length %d but have only %d bytes left\n",
678  (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
679  return -EINVAL;
680  }
681 
682  /* Is it a packet type we know about ? */
683  switch (*header & 0xff00) {
684  case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
685  return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
686 
688  return r300_emit_bitblt_multi(dev_priv, cmdbuf);
689 
691  DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
692  return -EINVAL;
694  /* triggers drawing using in-packet vertex data */
696  /* triggers drawing of vertex buffers setup elsewhere */
697  dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
699  break;
701  /* triggers drawing using indices to vertex buffer */
702  /* whenever we send vertex we clear flush & purge */
703  dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
705  return r300_emit_draw_indx_2(dev_priv, cmdbuf);
707  case RADEON_CP_NOP:
708  /* these packets are safe */
709  break;
710  default:
711  DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
712  return -EINVAL;
713  }
714 
715  BEGIN_RING(count + 2);
716  OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
717  ADVANCE_RING();
718 
719  return 0;
720 }
721 
726 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
727  drm_radeon_kcmd_buffer_t *cmdbuf,
728  drm_r300_cmd_header_t header)
729 {
730  int n;
731  int ret;
732  int orig_iter = cmdbuf->buffer->iterator;
733 
734  /* This is a do-while-loop so that we run the interior at least once,
735  * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
736  */
737  n = 0;
738  do {
739  if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
740  ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
741  if (ret)
742  return ret;
743 
744  cmdbuf->buffer->iterator = orig_iter;
745  }
746 
747  switch (header.packet3.packet) {
749  DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
750  ret = r300_emit_clear(dev_priv, cmdbuf);
751  if (ret) {
752  DRM_ERROR("r300_emit_clear failed\n");
753  return ret;
754  }
755  break;
756 
758  DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
759  ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
760  if (ret) {
761  DRM_ERROR("r300_emit_raw_packet3 failed\n");
762  return ret;
763  }
764  break;
765 
766  default:
767  DRM_ERROR("bad packet3 type %i at byte %d\n",
768  header.packet3.packet,
769  cmdbuf->buffer->iterator - (int)sizeof(header));
770  return -EINVAL;
771  }
772 
774  } while (n < cmdbuf->nbox);
775 
776  return 0;
777 }
778 
779 /* Some of the R300 chips seem to be extremely touchy about the two registers
780  * that are configured in r300_pacify.
781  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
782  * sends a command buffer that contains only state setting commands and a
783  * vertex program/parameter upload sequence, this will eventually lead to a
784  * lockup, unless the sequence is bracketed by calls to r300_pacify.
785  * So we should take great care to *always* call r300_pacify before
786  * *anything* 3D related, and again afterwards. This is what the
787  * call bracket in r300_do_cp_cmdbuf is for.
788  */
789 
793 static void r300_pacify(drm_radeon_private_t *dev_priv)
794 {
795  uint32_t cache_z, cache_3d, cache_2d;
796  RING_LOCALS;
797 
798  cache_z = R300_ZC_FLUSH;
799  cache_2d = R300_RB2D_DC_FLUSH;
800  cache_3d = R300_RB3D_DC_FLUSH;
801  if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
802  /* we can purge, primitive where draw since last purge */
803  cache_z |= R300_ZC_FREE;
804  cache_2d |= R300_RB2D_DC_FREE;
805  cache_3d |= R300_RB3D_DC_FREE;
806  }
807 
808  /* flush & purge zbuffer */
809  BEGIN_RING(2);
811  OUT_RING(cache_z);
812  ADVANCE_RING();
813  /* flush & purge 3d */
814  BEGIN_RING(2);
816  OUT_RING(cache_3d);
817  ADVANCE_RING();
818  /* flush & purge texture */
819  BEGIN_RING(2);
821  OUT_RING(0);
822  ADVANCE_RING();
823  /* FIXME: is this one really needed ? */
824  BEGIN_RING(2);
826  OUT_RING(0);
827  ADVANCE_RING();
828  BEGIN_RING(2);
831  ADVANCE_RING();
832  /* flush & purge 2d through E2 as RB2D will trigger lockup */
833  BEGIN_RING(4);
835  OUT_RING(cache_2d);
839  ADVANCE_RING();
840  /* set flush & purge flags */
842 }
843 
849 static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
850 {
851  drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
852  struct drm_radeon_master_private *master_priv = master->driver_priv;
853 
854  buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
855  buf->pending = 1;
856  buf->used = 0;
857 }
858 
859 static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
860  drm_r300_cmd_header_t header)
861 {
862  u32 wait_until;
863  RING_LOCALS;
864 
865  if (!header.wait.flags)
866  return;
867 
868  wait_until = 0;
869 
870  switch(header.wait.flags) {
871  case R300_WAIT_2D:
872  wait_until = RADEON_WAIT_2D_IDLE;
873  break;
874  case R300_WAIT_3D:
875  wait_until = RADEON_WAIT_3D_IDLE;
876  break;
877  case R300_NEW_WAIT_2D_3D:
879  break;
882  break;
885  break;
889  break;
890  default:
891  return;
892  }
893 
894  BEGIN_RING(2);
896  OUT_RING(wait_until);
897  ADVANCE_RING();
898 }
899 
900 static int r300_scratch(drm_radeon_private_t *dev_priv,
901  drm_radeon_kcmd_buffer_t *cmdbuf,
902  drm_r300_cmd_header_t header)
903 {
904  u32 *ref_age_base;
905  u32 i, *buf_idx, h_pending;
906  u64 *ptr_addr;
907  u64 stack_ptr_addr;
908  RING_LOCALS;
909 
910  if (drm_buffer_unprocessed(cmdbuf->buffer) <
911  (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
912  return -EINVAL;
913  }
914 
915  if (header.scratch.reg >= 5) {
916  return -EINVAL;
917  }
918 
919  dev_priv->scratch_ages[header.scratch.reg]++;
920 
921  ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
922  sizeof(stack_ptr_addr), &stack_ptr_addr);
923  ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
924 
925  for (i=0; i < header.scratch.n_bufs; i++) {
926  buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
927  *buf_idx *= 2; /* 8 bytes per buf */
928 
929  if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
930  &dev_priv->scratch_ages[header.scratch.reg],
931  sizeof(u32)))
932  return -EINVAL;
933 
934  if (DRM_COPY_FROM_USER(&h_pending,
935  ref_age_base + *buf_idx + 1,
936  sizeof(u32)))
937  return -EINVAL;
938 
939  if (h_pending == 0)
940  return -EINVAL;
941 
942  h_pending--;
943 
944  if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
945  &h_pending,
946  sizeof(u32)))
947  return -EINVAL;
948 
949  drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
950  }
951 
952  BEGIN_RING(2);
953  OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
954  OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
955  ADVANCE_RING();
956 
957  return 0;
958 }
959 
965 static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
966  drm_radeon_kcmd_buffer_t *cmdbuf,
967  drm_r300_cmd_header_t header)
968 {
969  int sz;
970  int addr;
971  int type;
972  int isclamp;
973  int stride;
974  RING_LOCALS;
975 
976  sz = header.r500fp.count;
977  /* address is 9 bits 0 - 8, bit 1 of flags is part of address */
978  addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
979 
980  type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
981  isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
982 
983  addr |= (type << 16);
984  addr |= (isclamp << 17);
985 
986  stride = type ? 4 : 6;
987 
988  DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
989  if (!sz)
990  return 0;
991  if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
992  return -EINVAL;
993 
994  BEGIN_RING(3 + sz * stride);
997  OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
998 
999  ADVANCE_RING();
1000 
1001  return 0;
1002 }
1003 
1004 
1011  struct drm_file *file_priv,
1012  drm_radeon_kcmd_buffer_t *cmdbuf)
1013 {
1014  drm_radeon_private_t *dev_priv = dev->dev_private;
1015  struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1016  struct drm_device_dma *dma = dev->dma;
1017  struct drm_buf *buf = NULL;
1018  int emit_dispatch_age = 0;
1019  int ret = 0;
1020 
1021  DRM_DEBUG("\n");
1022 
1023  /* pacify */
1024  r300_pacify(dev_priv);
1025 
1026  if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1027  ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1028  if (ret)
1029  goto cleanup;
1030  }
1031 
1032  while (drm_buffer_unprocessed(cmdbuf->buffer)
1033  >= sizeof(drm_r300_cmd_header_t)) {
1034  int idx;
1035  drm_r300_cmd_header_t *header, stack_header;
1036 
1037  header = drm_buffer_read_object(cmdbuf->buffer,
1038  sizeof(stack_header), &stack_header);
1039 
1040  switch (header->header.cmd_type) {
1041  case R300_CMD_PACKET0:
1042  DRM_DEBUG("R300_CMD_PACKET0\n");
1043  ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
1044  if (ret) {
1045  DRM_ERROR("r300_emit_packet0 failed\n");
1046  goto cleanup;
1047  }
1048  break;
1049 
1050  case R300_CMD_VPU:
1051  DRM_DEBUG("R300_CMD_VPU\n");
1052  ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
1053  if (ret) {
1054  DRM_ERROR("r300_emit_vpu failed\n");
1055  goto cleanup;
1056  }
1057  break;
1058 
1059  case R300_CMD_PACKET3:
1060  DRM_DEBUG("R300_CMD_PACKET3\n");
1061  ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
1062  if (ret) {
1063  DRM_ERROR("r300_emit_packet3 failed\n");
1064  goto cleanup;
1065  }
1066  break;
1067 
1068  case R300_CMD_END3D:
1069  DRM_DEBUG("R300_CMD_END3D\n");
1070  /* TODO:
1071  Ideally userspace driver should not need to issue this call,
1072  i.e. the drm driver should issue it automatically and prevent
1073  lockups.
1074 
1075  In practice, we do not understand why this call is needed and what
1076  it does (except for some vague guesses that it has to do with cache
1077  coherence) and so the user space driver does it.
1078 
1079  Once we are sure which uses prevent lockups the code could be moved
1080  into the kernel and the userspace driver will not
1081  need to use this command.
1082 
1083  Note that issuing this command does not hurt anything
1084  except, possibly, performance */
1085  r300_pacify(dev_priv);
1086  break;
1087 
1088  case R300_CMD_CP_DELAY:
1089  /* simple enough, we can do it here */
1090  DRM_DEBUG("R300_CMD_CP_DELAY\n");
1091  {
1092  int i;
1093  RING_LOCALS;
1094 
1095  BEGIN_RING(header->delay.count);
1096  for (i = 0; i < header->delay.count; i++)
1098  ADVANCE_RING();
1099  }
1100  break;
1101 
1102  case R300_CMD_DMA_DISCARD:
1103  DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1104  idx = header->dma.buf_idx;
1105  if (idx < 0 || idx >= dma->buf_count) {
1106  DRM_ERROR("buffer index %d (of %d max)\n",
1107  idx, dma->buf_count - 1);
1108  ret = -EINVAL;
1109  goto cleanup;
1110  }
1111 
1112  buf = dma->buflist[idx];
1113  if (buf->file_priv != file_priv || buf->pending) {
1114  DRM_ERROR("bad buffer %p %p %d\n",
1115  buf->file_priv, file_priv,
1116  buf->pending);
1117  ret = -EINVAL;
1118  goto cleanup;
1119  }
1120 
1121  emit_dispatch_age = 1;
1122  r300_discard_buffer(dev, file_priv->master, buf);
1123  break;
1124 
1125  case R300_CMD_WAIT:
1126  DRM_DEBUG("R300_CMD_WAIT\n");
1127  r300_cmd_wait(dev_priv, *header);
1128  break;
1129 
1130  case R300_CMD_SCRATCH:
1131  DRM_DEBUG("R300_CMD_SCRATCH\n");
1132  ret = r300_scratch(dev_priv, cmdbuf, *header);
1133  if (ret) {
1134  DRM_ERROR("r300_scratch failed\n");
1135  goto cleanup;
1136  }
1137  break;
1138 
1139  case R300_CMD_R500FP:
1140  if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1141  DRM_ERROR("Calling r500 command on r300 card\n");
1142  ret = -EINVAL;
1143  goto cleanup;
1144  }
1145  DRM_DEBUG("R300_CMD_R500FP\n");
1146  ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
1147  if (ret) {
1148  DRM_ERROR("r300_emit_r500fp failed\n");
1149  goto cleanup;
1150  }
1151  break;
1152  default:
1153  DRM_ERROR("bad cmd_type %i at byte %d\n",
1154  header->header.cmd_type,
1155  cmdbuf->buffer->iterator - (int)sizeof(*header));
1156  ret = -EINVAL;
1157  goto cleanup;
1158  }
1159  }
1160 
1161  DRM_DEBUG("END\n");
1162 
1163  cleanup:
1164  r300_pacify(dev_priv);
1165 
1166  /* We emit the vertex buffer age here, outside the pacifier "brackets"
1167  * for two reasons:
1168  * (1) This may coalesce multiple age emissions into a single one and
1169  * (2) more importantly, some chips lock up hard when scratch registers
1170  * are written inside the pacifier bracket.
1171  */
1172  if (emit_dispatch_age) {
1173  RING_LOCALS;
1174 
1175  /* Emit the vertex buffer age */
1176  BEGIN_RING(2);
1178  ADVANCE_RING();
1179  }
1180 
1181  COMMIT_RING();
1182 
1183  return ret;
1184 }