Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
i915_gem_tiling.c
Go to the documentation of this file.
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  * Eric Anholt <[email protected]>
25  *
26  */
27 
28 #include <linux/string.h>
29 #include <linux/bitops.h>
30 #include <drm/drmP.h>
31 #include <drm/i915_drm.h>
32 #include "i915_drv.h"
33 
87 void
89 {
90  drm_i915_private_t *dev_priv = dev->dev_private;
93 
94  if (IS_VALLEYVIEW(dev)) {
95  swizzle_x = I915_BIT_6_SWIZZLE_NONE;
96  swizzle_y = I915_BIT_6_SWIZZLE_NONE;
97  } else if (INTEL_INFO(dev)->gen >= 6) {
98  uint32_t dimm_c0, dimm_c1;
99  dimm_c0 = I915_READ(MAD_DIMM_C0);
100  dimm_c1 = I915_READ(MAD_DIMM_C1);
103  /* Enable swizzling when the channels are populated with
104  * identically sized dimms. We don't need to check the 3rd
105  * channel because no cpu with gpu attached ships in that
106  * configuration. Also, swizzling only makes sense for 2
107  * channels anyway. */
108  if (dimm_c0 == dimm_c1) {
109  swizzle_x = I915_BIT_6_SWIZZLE_9_10;
110  swizzle_y = I915_BIT_6_SWIZZLE_9;
111  } else {
112  swizzle_x = I915_BIT_6_SWIZZLE_NONE;
113  swizzle_y = I915_BIT_6_SWIZZLE_NONE;
114  }
115  } else if (IS_GEN5(dev)) {
116  /* On Ironlake whatever DRAM config, GPU always do
117  * same swizzling setup.
118  */
119  swizzle_x = I915_BIT_6_SWIZZLE_9_10;
120  swizzle_y = I915_BIT_6_SWIZZLE_9;
121  } else if (IS_GEN2(dev)) {
122  /* As far as we know, the 865 doesn't have these bit 6
123  * swizzling issues.
124  */
125  swizzle_x = I915_BIT_6_SWIZZLE_NONE;
126  swizzle_y = I915_BIT_6_SWIZZLE_NONE;
127  } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
128  uint32_t dcc;
129 
130  /* On 9xx chipsets, channel interleave by the CPU is
131  * determined by DCC. For single-channel, neither the CPU
132  * nor the GPU do swizzling. For dual channel interleaved,
133  * the GPU's interleave is bit 9 and 10 for X tiled, and bit
134  * 9 for Y tiled. The CPU's interleave is independent, and
135  * can be based on either bit 11 (haven't seen this yet) or
136  * bit 17 (common).
137  */
138  dcc = I915_READ(DCC);
139  switch (dcc & DCC_ADDRESSING_MODE_MASK) {
142  swizzle_x = I915_BIT_6_SWIZZLE_NONE;
143  swizzle_y = I915_BIT_6_SWIZZLE_NONE;
144  break;
146  if (dcc & DCC_CHANNEL_XOR_DISABLE) {
147  /* This is the base swizzling by the GPU for
148  * tiled buffers.
149  */
150  swizzle_x = I915_BIT_6_SWIZZLE_9_10;
151  swizzle_y = I915_BIT_6_SWIZZLE_9;
152  } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
153  /* Bit 11 swizzling by the CPU in addition. */
154  swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
155  swizzle_y = I915_BIT_6_SWIZZLE_9_11;
156  } else {
157  /* Bit 17 swizzling by the CPU in addition. */
158  swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
159  swizzle_y = I915_BIT_6_SWIZZLE_9_17;
160  }
161  break;
162  }
163  if (dcc == 0xffffffff) {
164  DRM_ERROR("Couldn't read from MCHBAR. "
165  "Disabling tiling.\n");
166  swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
167  swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
168  }
169  } else {
170  /* The 965, G33, and newer, have a very flexible memory
171  * configuration. It will enable dual-channel mode
172  * (interleaving) on as much memory as it can, and the GPU
173  * will additionally sometimes enable different bit 6
174  * swizzling for tiled objects from the CPU.
175  *
176  * Here's what I found on the G965:
177  * slot fill memory size swizzling
178  * 0A 0B 1A 1B 1-ch 2-ch
179  * 512 0 0 0 512 0 O
180  * 512 0 512 0 16 1008 X
181  * 512 0 0 512 16 1008 X
182  * 0 512 0 512 16 1008 X
183  * 1024 1024 1024 0 2048 1024 O
184  *
185  * We could probably detect this based on either the DRB
186  * matching, which was the case for the swizzling required in
187  * the table above, or from the 1-ch value being less than
188  * the minimum size of a rank.
189  */
191  swizzle_x = I915_BIT_6_SWIZZLE_NONE;
192  swizzle_y = I915_BIT_6_SWIZZLE_NONE;
193  } else {
194  swizzle_x = I915_BIT_6_SWIZZLE_9_10;
195  swizzle_y = I915_BIT_6_SWIZZLE_9;
196  }
197  }
198 
199  dev_priv->mm.bit_6_swizzle_x = swizzle_x;
200  dev_priv->mm.bit_6_swizzle_y = swizzle_y;
201 }
202 
203 /* Check pitch constriants for all chips & tiling formats */
204 static bool
205 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
206 {
207  int tile_width;
208 
209  /* Linear is always fine */
210  if (tiling_mode == I915_TILING_NONE)
211  return true;
212 
213  if (IS_GEN2(dev) ||
214  (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
215  tile_width = 128;
216  else
217  tile_width = 512;
218 
219  /* check maximum stride & object size */
220  if (INTEL_INFO(dev)->gen >= 4) {
221  /* i965 stores the end address of the gtt mapping in the fence
222  * reg, so dont bother to check the size */
223  if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
224  return false;
225  } else {
226  if (stride > 8192)
227  return false;
228 
229  if (IS_GEN3(dev)) {
230  if (size > I830_FENCE_MAX_SIZE_VAL << 20)
231  return false;
232  } else {
233  if (size > I830_FENCE_MAX_SIZE_VAL << 19)
234  return false;
235  }
236  }
237 
238  /* 965+ just needs multiples of tile width */
239  if (INTEL_INFO(dev)->gen >= 4) {
240  if (stride & (tile_width - 1))
241  return false;
242  return true;
243  }
244 
245  /* Pre-965 needs power of two tile widths */
246  if (stride < tile_width)
247  return false;
248 
249  if (stride & (stride - 1))
250  return false;
251 
252  return true;
253 }
254 
255 /* Is the current GTT allocation valid for the change in tiling? */
256 static bool
257 i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
258 {
259  u32 size;
260 
261  if (tiling_mode == I915_TILING_NONE)
262  return true;
263 
264  if (INTEL_INFO(obj->base.dev)->gen >= 4)
265  return true;
266 
267  if (INTEL_INFO(obj->base.dev)->gen == 3) {
268  if (obj->gtt_offset & ~I915_FENCE_START_MASK)
269  return false;
270  } else {
271  if (obj->gtt_offset & ~I830_FENCE_START_MASK)
272  return false;
273  }
274 
275  /*
276  * Previous chips need to be aligned to the size of the smallest
277  * fence register that can contain the object.
278  */
279  if (INTEL_INFO(obj->base.dev)->gen == 3)
280  size = 1024*1024;
281  else
282  size = 512*1024;
283 
284  while (size < obj->base.size)
285  size <<= 1;
286 
287  if (obj->gtt_space->size != size)
288  return false;
289 
290  if (obj->gtt_offset & (size - 1))
291  return false;
292 
293  return true;
294 }
295 
300 int
302  struct drm_file *file)
303 {
305  drm_i915_private_t *dev_priv = dev->dev_private;
306  struct drm_i915_gem_object *obj;
307  int ret = 0;
308 
309  obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
310  if (&obj->base == NULL)
311  return -ENOENT;
312 
313  if (!i915_tiling_ok(dev,
314  args->stride, obj->base.size, args->tiling_mode)) {
315  drm_gem_object_unreference_unlocked(&obj->base);
316  return -EINVAL;
317  }
318 
319  if (obj->pin_count) {
320  drm_gem_object_unreference_unlocked(&obj->base);
321  return -EBUSY;
322  }
323 
324  if (args->tiling_mode == I915_TILING_NONE) {
326  args->stride = 0;
327  } else {
328  if (args->tiling_mode == I915_TILING_X)
329  args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
330  else
331  args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
332 
333  /* Hide bit 17 swizzling from the user. This prevents old Mesa
334  * from aborting the application on sw fallbacks to bit 17,
335  * and we use the pread/pwrite bit17 paths to swizzle for it.
336  * If there was a user that was relying on the swizzle
337  * information for drm_intel_bo_map()ed reads/writes this would
338  * break it, but we don't have any of those.
339  */
344 
345  /* If we can't handle the swizzling, make it untiled. */
349  args->stride = 0;
350  }
351  }
352 
353  mutex_lock(&dev->struct_mutex);
354  if (args->tiling_mode != obj->tiling_mode ||
355  args->stride != obj->stride) {
356  /* We need to rebind the object if its current allocation
357  * no longer meets the alignment restrictions for its new
358  * tiling mode. Otherwise we can just leave it alone, but
359  * need to ensure that any fence register is updated before
360  * the next fenced (either through the GTT or by the BLT unit
361  * on older GPUs) access.
362  *
363  * After updating the tiling parameters, we then flag whether
364  * we need to update an associated fence register. Note this
365  * has to also include the unfenced register the GPU uses
366  * whilst executing a fenced command for an untiled object.
367  */
368 
369  obj->map_and_fenceable =
370  obj->gtt_space == NULL ||
371  (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
372  i915_gem_object_fence_ok(obj, args->tiling_mode));
373 
374  /* Rebind if we need a change of alignment */
375  if (!obj->map_and_fenceable) {
376  u32 unfenced_alignment =
378  obj->base.size,
379  args->tiling_mode);
380  if (obj->gtt_offset & (unfenced_alignment - 1))
381  ret = i915_gem_object_unbind(obj);
382  }
383 
384  if (ret == 0) {
385  obj->fence_dirty =
386  obj->fenced_gpu_access ||
388 
389  obj->tiling_mode = args->tiling_mode;
390  obj->stride = args->stride;
391 
392  /* Force the fence to be reacquired for GTT access */
394  }
395  }
396  /* we have to maintain this existing ABI... */
397  args->stride = obj->stride;
398  args->tiling_mode = obj->tiling_mode;
399  drm_gem_object_unreference(&obj->base);
400  mutex_unlock(&dev->struct_mutex);
401 
402  return ret;
403 }
404 
408 int
410  struct drm_file *file)
411 {
413  drm_i915_private_t *dev_priv = dev->dev_private;
414  struct drm_i915_gem_object *obj;
415 
416  obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
417  if (&obj->base == NULL)
418  return -ENOENT;
419 
420  mutex_lock(&dev->struct_mutex);
421 
422  args->tiling_mode = obj->tiling_mode;
423  switch (obj->tiling_mode) {
424  case I915_TILING_X:
425  args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
426  break;
427  case I915_TILING_Y:
428  args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
429  break;
430  case I915_TILING_NONE:
432  break;
433  default:
434  DRM_ERROR("unknown tiling mode\n");
435  }
436 
437  /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
442 
443  drm_gem_object_unreference(&obj->base);
444  mutex_unlock(&dev->struct_mutex);
445 
446  return 0;
447 }
448 
454 static void
455 i915_gem_swizzle_page(struct page *page)
456 {
457  char temp[64];
458  char *vaddr;
459  int i;
460 
461  vaddr = kmap(page);
462 
463  for (i = 0; i < PAGE_SIZE; i += 128) {
464  memcpy(temp, &vaddr[i], 64);
465  memcpy(&vaddr[i], &vaddr[i + 64], 64);
466  memcpy(&vaddr[i + 64], temp, 64);
467  }
468 
469  kunmap(page);
470 }
471 
472 void
474 {
475  struct scatterlist *sg;
476  int page_count = obj->base.size >> PAGE_SHIFT;
477  int i;
478 
479  if (obj->bit_17 == NULL)
480  return;
481 
482  for_each_sg(obj->pages->sgl, sg, page_count, i) {
483  struct page *page = sg_page(sg);
484  char new_bit_17 = page_to_phys(page) >> 17;
485  if ((new_bit_17 & 0x1) !=
486  (test_bit(i, obj->bit_17) != 0)) {
487  i915_gem_swizzle_page(page);
488  set_page_dirty(page);
489  }
490  }
491 }
492 
493 void
495 {
496  struct scatterlist *sg;
497  int page_count = obj->base.size >> PAGE_SHIFT;
498  int i;
499 
500  if (obj->bit_17 == NULL) {
501  obj->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
502  sizeof(long), GFP_KERNEL);
503  if (obj->bit_17 == NULL) {
504  DRM_ERROR("Failed to allocate memory for bit 17 "
505  "record\n");
506  return;
507  }
508  }
509 
510  for_each_sg(obj->pages->sgl, sg, page_count, i) {
511  struct page *page = sg_page(sg);
512  if (page_to_phys(page) & (1 << 17))
513  __set_bit(i, obj->bit_17);
514  else
515  __clear_bit(i, obj->bit_17);
516  }
517 }