diff --git a/libdrm/intel/Makefile.am b/libdrm/intel/Makefile.am index 5e3dee0..c7526f6 100644 --- a/libdrm/intel/Makefile.am +++ b/libdrm/intel/Makefile.am @@ -39,6 +39,7 @@ libdrm_intel_la_SOURCES = \ intel_bufmgr_priv.h \ intel_bufmgr_fake.c \ intel_bufmgr_gem.c \ + intel_chipset.h \ mm.c \ mm.h diff --git a/libdrm/intel/intel_bufmgr.c b/libdrm/intel/intel_bufmgr.c index 188eac2..25a6828 100644 --- a/libdrm/intel/intel_bufmgr.c +++ b/libdrm/intel/intel_bufmgr.c @@ -51,6 +51,13 @@ drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, return bufmgr->bo_alloc(bufmgr, name, size, alignment); } +drm_intel_bo * +drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment); +} + void drm_intel_bo_reference(drm_intel_bo *bo) { diff --git a/libdrm/intel/intel_bufmgr.h b/libdrm/intel/intel_bufmgr.h index e8c2e06..111d2af 100644 --- a/libdrm/intel/intel_bufmgr.h +++ b/libdrm/intel/intel_bufmgr.h @@ -75,6 +75,10 @@ struct _drm_intel_bo { drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment); +drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); void drm_intel_bo_reference(drm_intel_bo *bo); void drm_intel_bo_unreference(drm_intel_bo *bo); int drm_intel_bo_map(drm_intel_bo *bo, int write_enable); diff --git a/libdrm/intel/intel_bufmgr_fake.c b/libdrm/intel/intel_bufmgr_fake.c index 6c21625..e7cec35 100644 --- a/libdrm/intel/intel_bufmgr_fake.c +++ b/libdrm/intel/intel_bufmgr_fake.c @@ -1503,6 +1503,7 @@ drm_intel_bufmgr_fake_init(int fd, /* Hook in methods */ bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc; + bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc; bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference; bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference; bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map; diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c index 7b821de..6ddecf4 100644 --- a/libdrm/intel/intel_bufmgr_gem.c +++ b/libdrm/intel/intel_bufmgr_gem.c @@ -52,8 +52,10 @@ #include #include "errno.h" +#include "libdrm_lists.h" #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" +#include "intel_chipset.h" #include "string.h" #include "i915_drm.h" @@ -66,7 +68,8 @@ typedef struct _drm_intel_bo_gem drm_intel_bo_gem; struct drm_intel_gem_bo_bucket { - drm_intel_bo_gem *head, **tail; + drmMMListHead head; + /** * Limit on the number of entries in this bucket. * @@ -99,6 +102,8 @@ typedef struct _drm_intel_bufmgr_gem { struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; uint64_t gtt_size; + int available_fences; + int pci_device; } drm_intel_bufmgr_gem; struct _drm_intel_bo_gem { @@ -142,8 +147,8 @@ struct _drm_intel_bo_gem { /** Mapped address for the buffer, saved across map/unmap cycles */ void *virtual; - /** free list */ - drm_intel_bo_gem *next; + /** BO cache list */ + drmMMListHead head; /** * Boolean of whether this BO and its children have been included in @@ -165,6 +170,11 @@ struct _drm_intel_bo_gem { * the common case. */ int reloc_tree_size; + /** + * Number of potential fence registers required by this buffer and its + * relocations. + */ + int reloc_tree_fences; }; static void drm_intel_gem_bo_reference_locked(drm_intel_bo *bo); @@ -315,8 +325,9 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo) } static drm_intel_bo * -drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, - unsigned long size, unsigned int alignment) +drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment, + int for_render) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; drm_intel_bo_gem *bo_gem; @@ -345,18 +356,35 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, /* Get a buffer out of the cache if available */ if (bucket != NULL && bucket->num_entries > 0) { struct drm_i915_gem_busy busy; - - bo_gem = bucket->head; - busy.handle = bo_gem->gem_handle; - - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - alloc_from_cache = (ret == 0 && busy.busy == 0); - if (alloc_from_cache) { - bucket->head = bo_gem->next; - if (bo_gem->next == NULL) - bucket->tail = &bucket->head; + if (for_render) { + /* Allocate new render-target BOs from the tail (MRU) + * of the list, as it will likely be hot in the GPU cache + * and in the aperture for us. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head); + DRMLISTDEL(&bo_gem->head); bucket->num_entries--; + alloc_from_cache = 1; + } else { + /* For non-render-target BOs (where we're probably going to map it + * first thing in order to fill it with data), check if the + * last BO in the cache is unbusy, and only reuse in that case. + * Otherwise, allocating a new buffer is probably faster than + * waiting for the GPU to finish. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); + + memset(&busy, 0, sizeof(busy)); + busy.handle = bo_gem->gem_handle; + + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + alloc_from_cache = (ret == 0 && busy.busy == 0); + + if (alloc_from_cache) { + DRMLISTDEL(&bo_gem->head); + bucket->num_entries--; + } } } pthread_mutex_unlock(&bufmgr_gem->lock); @@ -386,6 +414,7 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, bo_gem->refcount = 1; bo_gem->validate_index = -1; bo_gem->reloc_tree_size = bo_gem->bo.size; + bo_gem->reloc_tree_fences = 0; bo_gem->used_as_reloc_target = 0; bo_gem->tiling_mode = I915_TILING_NONE; bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; @@ -396,6 +425,20 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, return &bo_gem->bo; } +static drm_intel_bo * +drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0); +} + /** * Returns a drm_intel_bo wrapping the given buffer object handle. * @@ -435,6 +478,7 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name, bo_gem->gem_handle = open_arg.handle; bo_gem->global_name = handle; + memset(&get_tiling, 0, sizeof(get_tiling)); get_tiling.handle = bo_gem->gem_handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); if (ret != 0) { @@ -443,6 +487,10 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name, } bo_gem->tiling_mode = get_tiling.tiling_mode; bo_gem->swizzle_mode = get_tiling.swizzle_mode; + if (bo_gem->tiling_mode == I915_TILING_NONE) + bo_gem->reloc_tree_fences = 0; + else + bo_gem->reloc_tree_fences = 1; DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); @@ -480,6 +528,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) munmap (bo_gem->virtual, bo_gem->bo.size); /* Close this object */ + memset(&close, 0, sizeof(close)); close.handle = bo_gem->gem_handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { @@ -529,9 +578,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo) bo_gem->reloc_target_bo = NULL; bo_gem->reloc_count = 0; - bo_gem->next = NULL; - *bucket->tail = bo_gem; - bucket->tail = &bo_gem->next; + DRMLISTADDTAIL(&bo_gem->head, &bucket->head); bucket->num_entries++; } else { drm_intel_gem_bo_free(bo); @@ -811,10 +858,9 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i]; drm_intel_bo_gem *bo_gem; - while ((bo_gem = bucket->head) != NULL) { - bucket->head = bo_gem->next; - if (bo_gem->next == NULL) - bucket->tail = &bucket->head; + while (!DRMLISTEMPTY(&bucket->head)) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); + DRMLISTDEL(&bo_gem->head); bucket->num_entries--; drm_intel_gem_bo_free(&bo_gem->bo); @@ -860,6 +906,7 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, */ assert(!bo_gem->used_as_reloc_target); bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; /* Flag the target to disallow further relocations in it. */ target_bo_gem->used_as_reloc_target = 1; @@ -996,6 +1043,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) struct drm_i915_gem_pin pin; int ret; + memset(&pin, 0, sizeof(pin)); pin.handle = bo_gem->gem_handle; pin.alignment = alignment; @@ -1018,6 +1066,7 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo) struct drm_i915_gem_unpin unpin; int ret; + memset(&unpin, 0, sizeof(unpin)); unpin.handle = bo_gem->gem_handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); @@ -1039,6 +1088,11 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) return 0; + /* If we're going from non-tiling to tiling, bump fence count */ + if (bo_gem->tiling_mode == I915_TILING_NONE) + bo_gem->reloc_tree_fences++; + + memset(&set_tiling, 0, sizeof(set_tiling)); set_tiling.handle = bo_gem->gem_handle; set_tiling.tiling_mode = *tiling_mode; set_tiling.stride = stride; @@ -1051,6 +1105,10 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, bo_gem->tiling_mode = set_tiling.tiling_mode; bo_gem->swizzle_mode = set_tiling.swizzle_mode; + /* If we're going from tiling to non-tiling, drop fence count */ + if (bo_gem->tiling_mode == I915_TILING_NONE) + bo_gem->reloc_tree_fences--; + *tiling_mode = bo_gem->tiling_mode; return 0; } @@ -1075,6 +1133,7 @@ drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t *name) int ret; if (!bo_gem->global_name) { + memset(&flink, 0, sizeof(flink)); flink.handle = bo_gem->gem_handle; ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); @@ -1129,6 +1188,31 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) } /** + * Count the number of buffers in this list that need a fence reg + * + * If the count is greater than the number of available regs, we'll have + * to ask the caller to resubmit a batch with fewer tiled buffers. + * + * This function over-counts if the same buffer is used multiple times. + */ +static unsigned int +drm_intel_gem_total_fences(drm_intel_bo **bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo_array[i]; + + if (bo_gem == NULL) + continue; + + total += bo_gem->reloc_tree_fences; + } + return total; +} + +/** * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready * for the next drm_intel_bufmgr_check_aperture_space() call. */ @@ -1206,9 +1290,17 @@ drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo_array[0]->bufmgr; unsigned int total = 0; unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; + int total_fences; + + /* Check for fence reg constraints if necessary */ + if (bufmgr_gem->available_fences) { + total_fences = drm_intel_gem_total_fences(bo_array, count); + if (total_fences > bufmgr_gem->available_fences) + return -1; + } total = drm_intel_gem_estimate_batch_space(bo_array, count); - + if (total > threshold) total = drm_intel_gem_compute_batch_space(bo_array, count); @@ -1234,6 +1326,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) { drm_intel_bufmgr_gem *bufmgr_gem; struct drm_i915_gem_get_aperture aperture; + drm_i915_getparam_t gp; int ret, i; bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); @@ -1257,6 +1350,25 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) (int)bufmgr_gem->gtt_size / 1024); } + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &bufmgr_gem->pci_device; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get chip id failed: %d\n", ret); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + } + + if (!IS_I965G(bufmgr_gem)) { + gp.param = I915_PARAM_NUM_FENCES_AVAIL; + gp.value = &bufmgr_gem->available_fences; + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get fences failed: %d\n", ret); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + bufmgr_gem->available_fences = 0; + } + } + /* Let's go with one relocation per every 2 dwords (but round down a bit * since a power of two will mean an extra page allocation for the reloc * buffer). @@ -1266,6 +1378,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; + bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render; bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; @@ -1285,7 +1398,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; /* Initialize the linked lists for BO reuse cache. */ for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) - bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head; + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); return &bufmgr_gem->bufmgr; } diff --git a/libdrm/intel/intel_bufmgr_priv.h b/libdrm/intel/intel_bufmgr_priv.h index 76d31e4..82d87b4 100644 --- a/libdrm/intel/intel_bufmgr_priv.h +++ b/libdrm/intel/intel_bufmgr_priv.h @@ -51,6 +51,16 @@ struct _drm_intel_bufmgr { drm_intel_bo *(*bo_alloc)(drm_intel_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment); + /** + * Allocate a buffer object, hinting that it will be used as a render target. + * + * This is otherwise the same as bo_alloc. + */ + drm_intel_bo *(*bo_alloc_for_render)(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); + /** Takes a reference on a buffer object */ void (*bo_reference)(drm_intel_bo *bo); diff --git a/libdrm/intel/intel_chipset.h b/libdrm/intel/intel_chipset.h new file mode 100644 index 0000000..0b3af02 --- /dev/null +++ b/libdrm/intel/intel_chipset.h @@ -0,0 +1,71 @@ +/* + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_CHIPSET_H +#define _INTEL_CHIPSET_H + +#define IS_I830(dev) ((dev)->pci_device == 0x3577) +#define IS_845G(dev) ((dev)->pci_device == 0x2562) +#define IS_I85X(dev) ((dev)->pci_device == 0x3582) +#define IS_I855(dev) ((dev)->pci_device == 0x3582) +#define IS_I865G(dev) ((dev)->pci_device == 0x2572) + +#define IS_I915G(dev) ((dev)->pci_device == 0x2582 || (dev)->pci_device == 0x258a) +#define IS_I915GM(dev) ((dev)->pci_device == 0x2592) +#define IS_I945G(dev) ((dev)->pci_device == 0x2772) +#define IS_I945GM(dev) ((dev)->pci_device == 0x27A2 ||\ + (dev)->pci_device == 0x27AE) +#define IS_I965G(dev) ((dev)->pci_device == 0x2972 || \ + (dev)->pci_device == 0x2982 || \ + (dev)->pci_device == 0x2992 || \ + (dev)->pci_device == 0x29A2 || \ + (dev)->pci_device == 0x2A02 || \ + (dev)->pci_device == 0x2A12 || \ + (dev)->pci_device == 0x2A42 || \ + (dev)->pci_device == 0x2E02 || \ + (dev)->pci_device == 0x2E12 || \ + (dev)->pci_device == 0x2E22) + +#define IS_I965GM(dev) ((dev)->pci_device == 0x2A02) + +#define IS_GM45(dev) ((dev)->pci_device == 0x2A42) + +#define IS_G4X(dev) ((dev)->pci_device == 0x2E02 || \ + (dev)->pci_device == 0x2E12 || \ + (dev)->pci_device == 0x2E22) + +#define IS_G33(dev) ((dev)->pci_device == 0x29C2 || \ + (dev)->pci_device == 0x29B2 || \ + (dev)->pci_device == 0x29D2) + +#define IS_I9XX(dev) (IS_I915G(dev) || IS_I915GM(dev) || IS_I945G(dev) || \ + IS_I945GM(dev) || IS_I965G(dev) || IS_G33(dev)) + +#define IS_MOBILE(dev) (IS_I830(dev) || IS_I85X(dev) || IS_I915GM(dev) || \ + IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev)) + +#endif /* _INTEL_CHIPSET_H */ diff --git a/libdrm/libdrm_lists.h b/libdrm/libdrm_lists.h index 8e23991..6410f57 100644 --- a/libdrm/libdrm_lists.h +++ b/libdrm/libdrm_lists.h @@ -29,6 +29,8 @@ * list handling. No list looping yet. */ +#include + typedef struct _drmMMListHead { struct _drmMMListHead *prev;