From f7e21206a98f8afa2f427d240f8af85e77665af7 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@fedoraproject.org>
Date: Mon, 23 Feb 2009 04:59:49 +0000
Subject: [PATCH] - radeon: merge radeon-rewrite branch, drop old r300 bufmgr

---
 mesa.spec            |     9 +-
 r300-bufmgr.patch    | 10618 -------------
 radeon-rewrite.patch | 34385 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 34391 insertions(+), 10621 deletions(-)
 delete mode 100644 r300-bufmgr.patch
 create mode 100644 radeon-rewrite.patch

diff --git a/mesa.spec b/mesa.spec
index 67e6e06..4de4f32 100644
--- a/mesa.spec
+++ b/mesa.spec
@@ -20,7 +20,7 @@
 Summary: Mesa graphics libraries
 Name: mesa
 Version: 7.3
-Release: 3%{?dist}
+Release: 4%{?dist}
 License: MIT
 Group: System Environment/Libraries
 URL: http://www.mesa3d.org
@@ -40,7 +40,7 @@ Patch0: mesa-7.1-osmesa-version.patch
 Patch2: mesa-7.1-nukeglthread-debug.patch
 Patch3: mesa-no-mach64.patch
 
-Patch5: r300-bufmgr.patch
+Patch5: radeon-rewrite.patch
 
 Patch7: mesa-7.1-link-shared.patch
 Patch9: intel-revert-vbl.patch
@@ -170,7 +170,7 @@ This package provides some demo applications for testing Mesa.
 %patch0 -p1 -b .osmesa
 %patch2 -p1 -b .intel-glthread
 %patch3 -p0 -b .no-mach64
-#%patch5 -p1 -b .r300-bufmgr
+%patch5 -p1 -b .radeon-rewrite
 %patch7 -p1 -b .dricore
 %patch9 -p1 -b .intel-vbl
 %patch12 -p1 -b .intel-nowarn
@@ -420,6 +420,9 @@ rm -rf $RPM_BUILD_ROOT
 %{_libdir}/mesa-demos-data
 
 %changelog
+* Mon Feb 23 2009 Dave Airlie <airlied@redhat.com> 7.3-4
+- radeon: merge radeon-rewrite branch, drop old r300 bufmgr
+
 * Sat Feb 21 2009 Adam Jackson <ajax@redhat.com> 7.3-3
 - Merge review cleanups (#226136)
 
diff --git a/r300-bufmgr.patch b/r300-bufmgr.patch
deleted file mode 100644
index fb0245f..0000000
--- a/r300-bufmgr.patch
+++ /dev/null
@@ -1,10618 +0,0 @@
-diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
-index e9144ac..7064f42 100644
---- a/src/mesa/drivers/dri/r200/Makefile
-+++ b/src/mesa/drivers/dri/r200/Makefile
-@@ -48,7 +48,9 @@ SYMLINKS = \
- COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
--	radeon_screen.h
-+	radeon_screen.h \
-+	radeon_bo_legacy.h \
-+	radeon_buffer.h
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
-index 6ca9342..cbb09e6 100644
---- a/src/mesa/drivers/dri/r300/Makefile
-+++ b/src/mesa/drivers/dri/r300/Makefile
-@@ -21,13 +21,14 @@ COMMON_SOURCES = \
- 	../common/dri_util.c
- 
- DRIVER_SOURCES = \
-+		 radeon_bo_legacy.c \
-+		 radeon_cs_legacy.c \
- 		 radeon_screen.c \
- 		 radeon_context.c \
- 		 radeon_ioctl.c \
- 		 radeon_lock.c \
- 		 radeon_span.c \
- 		 radeon_state.c \
--		 r300_mem.c \
- 		 r300_context.c \
- 		 r300_ioctl.c \
- 		 r300_cmdbuf.c \
-@@ -36,6 +37,7 @@ DRIVER_SOURCES = \
- 		 r300_texmem.c \
- 		 r300_tex.c \
- 		 r300_texstate.c \
-+		 r300_mipmap_tree.c \
- 		 radeon_program.c \
- 		 radeon_program_alu.c \
- 		 radeon_program_pair.c \
-@@ -54,7 +56,9 @@ DRIVER_SOURCES = \
- C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
- 
- DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
--	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
-+	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
-+#	-DRADEON_BO_TRACK \
-+	-Wall
- 
- SYMLINKS = \
- 	server/radeon_dri.c \
-@@ -68,7 +72,14 @@ COMMON_SYMLINKS = \
- 	radeon_chipset.h \
- 	radeon_screen.c \
- 	radeon_screen.h \
--	radeon_span.h
-+	radeon_span.h \
-+	radeon_buffer.h \
-+	radeon_bo_legacy.c \
-+	radeon_cs_legacy.c \
-+	radeon_bo_legacy.h \
-+	radeon_cs_legacy.h
-+
-+DRI_LIB_DEPS += -ldrm_radeon
- 
- ##### TARGETS #####
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-index c9e1dfe..4eac518 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
-@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drm.h"
- #include "radeon_drm.h"
- 
-+#include "radeon_buffer.h"
- #include "radeon_ioctl.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
-@@ -51,62 +52,41 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_reg.h"
- #include "r300_cmdbuf.h"
- #include "r300_emit.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_state.h"
-+#include "radeon_cs_legacy.h"
-+#include "radeon_cs_gem.h"
-+#include "radeon_reg.h"
-+
-+#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
-+#   define RADEON_ONE_REG_WR        (1 << 15)
- 
- // Set this to 1 for extremely verbose debugging of command buffers
- #define DEBUG_CMDBUF		0
- 
-+/** # of dwords reserved for additional instructions that may need to be written
-+ * during flushing.
-+ */
-+#define SPACE_FOR_FLUSHING	4
-+
- /**
-  * Send the current command buffer via ioctl to the hardware.
-  */
- int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
- {
--	int ret;
--	int i;
--	drm_radeon_cmd_buffer_t cmd;
--	int start;
--
--	if (r300->radeon.lost_context) {
--		start = 0;
--		r300->radeon.lost_context = GL_FALSE;
--	} else
--		start = r300->cmdbuf.count_reemit;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL) {
--		fprintf(stderr, "%s from %s - %i cliprects\n",
--			__FUNCTION__, caller, r300->radeon.numClipRects);
--
--		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
--			for (i = start; i < r300->cmdbuf.count_used; ++i)
--				fprintf(stderr, "%d: %08x\n", i,
--					r300->cmdbuf.cmd_buf[i]);
--	}
--
--	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
--	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
--
--	if (r300->radeon.state.scissor.enabled) {
--		cmd.nbox = r300->radeon.state.scissor.numClipRects;
--		cmd.boxes =
--		    (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
--	} else {
--		cmd.nbox = r300->radeon.numClipRects;
--		cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
--	}
--
--	ret = drmCommandWrite(r300->radeon.dri.fd,
--			      DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
-+	int ret = 0;
- 
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "Syncing in %s (from %s)\n\n",
--			__FUNCTION__, caller);
--		radeonWaitForIdleLocked(&r300->radeon);
-+	if (r300->cmdbuf.flushing) {
-+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
-+		exit(-1);
- 	}
--
--	r300->dma.nr_released_bufs = 0;
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
--
-+	r300->cmdbuf.flushing = 1;
-+    if (r300->cmdbuf.cs->cdw) {
-+        ret = radeon_cs_emit(r300->cmdbuf.cs);
-+        r300->hw.all_dirty = 1;
-+    }
-+    radeon_cs_erase(r300->cmdbuf.cs);
-+	r300->cmdbuf.flushing = 0;
- 	return ret;
- }
- 
-@@ -115,9 +95,7 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
- 	int ret;
- 
- 	LOCK_HARDWARE(&r300->radeon);
--
- 	ret = r300FlushCmdBufLocked(r300, caller);
--
- 	UNLOCK_HARDWARE(&r300->radeon);
- 
- 	if (ret) {
-@@ -128,13 +106,42 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
- 	return ret;
- }
- 
--static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
-+/**
-+ * Make sure that enough space is available in the command buffer
-+ * by flushing if necessary.
-+ *
-+ * \param dwords The number of dwords we need to be free on the command buffer
-+ */
-+void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller)
-+{
-+	if ((r300->cmdbuf.cs->cdw + dwords + 128) > r300->cmdbuf.size ||
-+        radeon_cs_need_flush(r300->cmdbuf.cs)) {
-+		r300FlushCmdBuf(r300, caller);
-+    }
-+}
-+
-+void r300BeginBatch(r300ContextPtr r300, int n,
-+		    int dostate,
-+                    const char *file,
-+                    const char *function,
-+                    int line)
-+{
-+	r300EnsureCmdBufSpace(r300, n, function);
-+	if (!r300->cmdbuf.cs->cdw && dostate) {
-+		if (RADEON_DEBUG & DEBUG_IOCTL)
-+			fprintf(stderr, "Reemit state after flush (from %s)\n", function);
-+		r300EmitState(r300);
-+	}
-+    radeon_cs_begin(r300->cmdbuf.cs, n, file, function, line);
-+}
-+
-+static void r300PrintStateAtom(r300ContextPtr r300,
-+                               struct r300_state_atom *state)
- {
- 	int i;
- 	int dwords = (*state->check) (r300, state);
- 
--	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords,
--		state->cmd_size);
-+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
- 
- 	if (RADEON_DEBUG & DEBUG_VERBOSE) {
- 		for (i = 0; i < dwords; i++) {
-@@ -152,33 +159,18 @@ static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *stat
-  */
- static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- {
-+	BATCH_LOCALS(r300);
- 	struct r300_state_atom *atom;
--	uint32_t *dest;
- 	int dwords;
- 
--	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
--
--	/* Emit WAIT */
--	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit cache flush */
--	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	*dest = R300_TX_FLUSH;
--	dest++;
--	r300->cmdbuf.count_used++;
--
--	/* Emit END3D */
--	*dest = cmdpacify();
--	dest++;
--	r300->cmdbuf.count_used++;
-+    cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH(cmdpacket0(r300->radeon.radeonScreen, R300_TX_INVALTAGS, 1));
-+	OUT_BATCH(R300_TX_FLUSH);
-+	END_BATCH();
-+    end_3d(r300);
- 
- 	/* Emit actual atoms */
--
- 	foreach(atom, &r300->hw.atomlist) {
- 		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
- 			dwords = (*atom->check) (r300, atom);
-@@ -186,9 +178,13 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
- 					r300PrintStateAtom(r300, atom);
- 				}
--				memcpy(dest, atom->cmd, dwords * 4);
--				dest += dwords;
--				r300->cmdbuf.count_used += dwords;
-+				if (atom->emit) {
-+					(*atom->emit)(r300, atom);
-+				} else {
-+					BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+					OUT_BATCH_TABLE(atom->cmd, dwords);
-+					END_BATCH();
-+				}
- 				atom->dirty = GL_FALSE;
- 			} else {
- 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
-@@ -198,6 +194,8 @@ static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
- 			}
- 		}
- 	}
-+
-+	COMMIT_BATCH();
- }
- 
- /**
-@@ -211,39 +209,208 @@ void r300EmitState(r300ContextPtr r300)
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
--	    && !r300->hw.all_dirty)
-+	if (r300->cmdbuf.cs->cdw && !r300->hw.is_dirty && !r300->hw.all_dirty)
- 		return;
- 
- 	/* To avoid going across the entire set of states multiple times, just check
--	 * for enough space for the case of emitting all state, and inline the
--	 * r300AllocCmdBuf code here without all the checks.
-+	 * for enough space for the case of emitting all state.
- 	 */
- 	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
- 
--	if (!r300->cmdbuf.count_used) {
-+	if (!r300->cmdbuf.cs->cdw) {
- 		if (RADEON_DEBUG & DEBUG_STATE)
- 			fprintf(stderr, "Begin reemit state\n");
- 
- 		r300EmitAtoms(r300, GL_FALSE);
--		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
- 	}
- 
- 	if (RADEON_DEBUG & DEBUG_STATE)
- 		fprintf(stderr, "Begin dirty state\n");
- 
- 	r300EmitAtoms(r300, GL_TRUE);
--
--	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
--
- 	r300->hw.is_dirty = GL_FALSE;
- 	r300->hw.all_dirty = GL_FALSE;
- }
- 
--#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
-+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
-+{
-+    if (r300->radeon.radeonScreen->kernel_mm) {
-+        return ((((*pkt) >> 16) & 0x3FFF) + 1);
-+    } else {
-+        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
-+        return t->packet0.count;
-+    }
-+    return 0;
-+}
-+
- #define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
- #define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
- 
-+void emit_vpu(r300ContextPtr r300, struct r300_state_atom * atom)
-+{
-+	BATCH_LOCALS(r300);
-+	drm_r300_cmd_header_t cmd;
-+    uint32_t addr, ndw, i;
-+
-+    if (!r300->radeon.radeonScreen->kernel_mm) {
-+        uint32_t dwords;
-+    	dwords = (*atom->check) (r300, atom);
-+        BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+        OUT_BATCH_TABLE(atom->cmd, dwords);
-+        END_BATCH();
-+        return;
-+    }
-+
-+    cmd.u = atom->cmd[0];
-+    addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
-+	ndw = cmd.vpu.count * 4;
-+    if (ndw) {
-+        /* flush processing vertices */
-+        OUT_BATCH(CP_PACKET0(R300_SC_SCREENDOOR, 0));
-+        OUT_BATCH(0x0);
-+        OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
-+        OUT_BATCH((1 << 15) | (1 << 28));
-+        OUT_BATCH(CP_PACKET0(R300_SC_SCREENDOOR, 0));
-+        OUT_BATCH(0x00FFFFFF);
-+        OUT_BATCH(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
-+        OUT_BATCH(1);
-+        /* write vpu */
-+        OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_ADDRESS, 0));
-+        OUT_BATCH(addr);
-+        OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+        for (i = 0; i < ndw; i++) {
-+            OUT_BATCH(atom->cmd[i+1]);
-+        }
-+    }
-+}
-+
-+void emit_r500fp(r300ContextPtr r300, struct r300_state_atom * atom)
-+{
-+	BATCH_LOCALS(r300);
-+	drm_r300_cmd_header_t cmd;
-+	uint32_t addr, ndw, i, sz;
-+	int type, clamp, stride;
-+
-+	if (!r300->radeon.radeonScreen->kernel_mm) {
-+		uint32_t dwords;
-+		dwords = (*atom->check) (r300, atom);
-+		BEGIN_BATCH_NO_AUTOSTATE(dwords);
-+		OUT_BATCH_TABLE(atom->cmd, dwords);
-+		END_BATCH();
-+		return;
-+	}
-+
-+	cmd.u = atom->cmd[0];
-+	sz = cmd.r500fp.count;
-+	addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
-+	type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
-+	clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
-+
-+	addr |= (type << 16);
-+	addr |= (clamp << 17);
-+
-+	stride = type ? 4 : 6;
-+
-+	ndw = sz * stride;
-+	if (ndw) {
-+
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
-+		OUT_BATCH(addr);
-+		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
-+		for (i = 0; i < ndw; i++) {
-+			OUT_BATCH(atom->cmd[i+1]);
-+		}
-+	}
-+}
-+
-+static void emit_tex_offsets(r300ContextPtr r300, struct r300_state_atom * atom)
-+{
-+	BATCH_LOCALS(r300);
-+	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
-+
-+	if (numtmus) {
-+		int i;
-+
-+		for(i = 0; i < numtmus; ++i) {
-+		    BEGIN_BATCH(2);
-+    		OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
-+			r300TexObj *t = r300->hw.textures[i];
-+			if (t && !t->image_override) {
-+				OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
-+                                RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+			} else if (!t) {
-+				OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
-+			} else {
-+				if (t->bo) {
-+					OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
-+							RADEON_GEM_DOMAIN_VRAM, 0, 0);
-+				} else {
-+					OUT_BATCH(t->override_offset);
-+				}
-+			}
-+			END_BATCH();
-+		}
-+	}
-+}
-+
-+static void emit_cb_offset(r300ContextPtr r300, struct r300_state_atom * atom)
-+{
-+	BATCH_LOCALS(r300);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t cbpitch;
-+	GLframebuffer *fb = r300->radeon.dri.drawable->driverPrivate;
-+
-+	rrb = r300->radeon.state.color.rrb;
-+	if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
-+		rrb = fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	}
-+	if (!rrb || !rrb->bo) {
-+		fprintf(stderr, "no rrb\n");
-+		return;
-+	}
-+
-+	cbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->cpp == 4)
-+		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+	else
-+		cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
-+		cbpitch |= R300_COLOR_TILE_ENABLE;
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
-+	OUT_BATCH(cbpitch);
-+	END_BATCH();
-+}
-+
-+static void emit_zb_offset(r300ContextPtr r300, struct r300_state_atom * atom)
-+{
-+	BATCH_LOCALS(r300);
-+	struct radeon_renderbuffer *rrb;
-+	uint32_t zbpitch;
-+
-+	rrb = r300->radeon.state.depth_buffer;
-+	if (!rrb)
-+		return;
-+
-+	zbpitch = (rrb->pitch / rrb->cpp);
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+		zbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+	}
-+	if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+		zbpitch |= R300_DEPTHMICROTILE_TILED;
-+	}
-+	
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+	OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
-+	END_BATCH();
-+}
-+
- static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	return atom->cmd_size;
-@@ -252,27 +419,33 @@ static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
- static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	int cnt;
--	cnt = packet0_count(atom->cmd);
-+    if (atom->cmd[0] == CP_PACKET2) {
-+        return 0;
-+    }
-+	cnt = packet0_count(r300, atom->cmd);
- 	return cnt ? cnt + 1 : 0;
- }
- 
--static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = vpu_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
- 
--static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 6) + 1 : 0;
- }
- 
--static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
-+int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
- {
- 	int cnt;
-+
- 	cnt = r500fp_count(atom->cmd);
- 	return cnt ? (cnt * 4) + 1 : 0;
- }
-@@ -318,92 +491,92 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 
- 	/* Initialize state atoms */
- 	ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
--	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
-+	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
- 	ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
- 	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
--	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
-+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
- 	if (is_r500) {
- 	    ALLOC_STATE(vap_index_offset, always, 2, 0);
--	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
-+	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
- 	    r300->hw.vap_index_offset.cmd[1] = 0;
- 	}
- 	ALLOC_STATE(vte, always, 3, 0);
--	r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
-+	r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
- 	ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
--	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
-+	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
- 	ALLOC_STATE(vap_cntl_status, always, 2, 0);
--	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
-+	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
- 	ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
- 	r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
- 	r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
--	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
- 	ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
--	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
-+	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
- 	ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
--	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
-+	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(vap_clip_cntl, always, 2, 0);
--		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
-+		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
- 		ALLOC_STATE(vap_clip, always, 5, 0);
--		r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
-+		r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
- 		ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
--		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
-+		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
- 	}
- 
- 	ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
- 	r300->hw.vof.cmd[R300_VOF_CMD_0] =
--	    cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
- 
- 	if (has_tcl) {
- 		ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
- 		r300->hw.pvs.cmd[R300_PVS_CMD_0] =
--		    cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
- 	}
- 
- 	ALLOC_STATE(gb_enable, always, 2, 0);
--	r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
-+	r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
- 	ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
--	r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
-+	r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
- 	ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
--	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
-+	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
- 	ALLOC_STATE(ga_point_s0, always, 5, 0);
--	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
-+	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
- 	ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
--	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
-+	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
- 	ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
--	r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
-+	r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
- 	ALLOC_STATE(ga_point_minmax, always, 4, 0);
--	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
-+	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
- 	ALLOC_STATE(lcntl, always, 2, 0);
--	r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
-+	r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
- 	ALLOC_STATE(ga_line_stipple, always, 4, 0);
--	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
-+	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
- 	ALLOC_STATE(shade, always, 5, 0);
--	r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
-+	r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
- 	ALLOC_STATE(polygon_mode, always, 4, 0);
--	r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
-+	r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
- 	ALLOC_STATE(fogp, always, 3, 0);
--	r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
-+	r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
- 	ALLOC_STATE(zbias_cntl, always, 2, 0);
--	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
-+	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
- 	ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
- 	r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
--	    cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
- 	ALLOC_STATE(occlusion_cntl, always, 2, 0);
--	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
-+	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
- 	ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
--	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
-+	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
- 	ALLOC_STATE(su_depth_scale, always, 3, 0);
--	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
-+	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
- 	ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
--	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
-+	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
- 	if (is_r500) {
- 		ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
- 		for (i = 0; i < 8; i++) {
- 			r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
- 			  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-@@ -412,133 +585,146 @@ void r300InitCmdBuf(r300ContextPtr r300)
-                           (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
- 		}
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
- 	} else {
- 		ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
--		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
-+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
- 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
--		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
-+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
- 	}
- 	ALLOC_STATE(sc_hyperz, always, 3, 0);
--	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
-+	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
- 	ALLOC_STATE(sc_screendoor, always, 2, 0);
--	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
-+	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
- 	ALLOC_STATE(us_out_fmt, always, 6, 0);
--	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
-+	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
- 
- 	if (is_r500) {
- 		ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
-+		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
- 		r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
--		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
--		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
-+		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
-+		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
- 		r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
- 
- 		ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
--		r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
-+		r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
-+		r300->hw.r500fp.emit = emit_r500fp;
- 		ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
--		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
-+		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
-+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
-+		r300->hw.r500fp_const.emit = emit_r500fp;
- 	} else {
- 		ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
--		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
--		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
-+		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
-+		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
-+
- 		ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
--		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
-+		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
- 
- 		ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
--		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
-+		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
- 		ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
--		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
-+		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
- 		ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
--		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
-+		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
- 		ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
--		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
-+		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
- 		ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
--		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
-+		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
- 	}
- 	ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
--	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
-+	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
- 	ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
--	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
-+	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
- 	ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
--	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
-+	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
- 	ALLOC_STATE(fg_depth_src, always, 2, 0);
--	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
-+	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
- 	ALLOC_STATE(rb3d_cctl, always, 2, 0);
--	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
-+	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
- 	ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
--	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
-+	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
- 	ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
--	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
-+	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
- 	if (is_r500) {
- 		ALLOC_STATE(blend_color, always, 3, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
- 	} else {
- 		ALLOC_STATE(blend_color, always, 2, 0);
--		r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
-+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
- 	}
- 	ALLOC_STATE(rop, always, 2, 0);
--	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
-+	r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
- 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
--	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
--	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
-+	r300->hw.cb.emit = &emit_cb_offset;
- 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
--	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
-+	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
- 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
--	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
-+	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- 	ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
--	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
-+	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
- 	ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
- 	r300->hw.zs.cmd[R300_ZS_CMD_0] =
--	    cmdpacket0(R300_ZB_CNTL, 3);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
- 	ALLOC_STATE(zstencil_format, always, 5, 0);
- 	r300->hw.zstencil_format.cmd[0] =
--	    cmdpacket0(R300_ZB_FORMAT, 4);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
- 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
--	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
-+	r300->hw.zb.emit = emit_zb_offset;
- 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
--	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
-+	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
- 	ALLOC_STATE(unk4F30, always, 3, 0);
--	r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
-+	r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
- 	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
--	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
-+	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
- 	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
--	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
-+	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
- 
- 	/* VPU only on TCL */
- 	if (has_tcl) {
-    	        int i;
- 		ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
--		r300->hw.vpi.cmd[R300_VPI_CMD_0] =
--		    cmdvpu(R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.cmd[0] =
-+		    cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
-+		r300->hw.vpi.emit = emit_vpu;
- 
- 		if (is_r500) {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
--				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R500_PVS_UCP_START + i, 1);
-+			  ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
-+			  r300->hw.vpucp[i].cmd[0] =
-+				  cmdvpu(r300->radeon.radeonScreen,
-+                           R500_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		} else {
- 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
--		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
--			cmdvpu(R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
-+		    r300->hw.vpp.emit = emit_vpu;
- 
- 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
--		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
--			cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.cmd[0] =
-+			cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
-+		    r300->hw.vps.emit = emit_vpu;
- 
- 			for (i = 0; i < 6; i++) {
- 				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
--				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
--					cmdvpu(R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].cmd[0] =
-+					cmdvpu(r300->radeon.radeonScreen,
-+					       R300_PVS_UCP_START + i, 1);
-+				r300->hw.vpucp[i].emit = emit_vpu;
- 			}
- 		}
- 	}
-@@ -546,33 +732,34 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	/* Textures */
- 	ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
- 
- 	ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
- 
- 	ALLOC_STATE(tex.size, variable, mtu + 1, 0);
--	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
-+	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
- 
- 	ALLOC_STATE(tex.format, variable, mtu + 1, 0);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
- 
- 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
--	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
-+	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
- 
--	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
-+	ALLOC_STATE(tex.offset, variable, 1, 0);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
-+	r300->hw.tex.offset.emit = &emit_tex_offsets;
- 
- 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
- 
- 	ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
- 
- 	r300->hw.is_dirty = GL_TRUE;
- 	r300->hw.all_dirty = GL_TRUE;
-@@ -587,6 +774,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 	if (size > 64 * 256)
- 		size = 64 * 256;
- 
-+    size = 64 * 1024 / 4;
- 	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
- 		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
- 			sizeof(drm_r300_cmd_header_t));
-@@ -597,10 +785,19 @@ void r300InitCmdBuf(r300ContextPtr r300)
- 			size * 4, r300->hw.max_state_size * 4);
- 	}
- 
-+    if (r300->radeon.radeonScreen->kernel_mm) {
-+        int fd = r300->radeon.radeonScreen->driScreen->fd;
-+        r300->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
-+    } else {
-+        r300->cmdbuf.csm = radeon_cs_manager_legacy_ctor(&r300->radeon);
-+    }
-+    if (r300->cmdbuf.csm == NULL) {
-+        /* FIXME: fatal error */
-+        return;
-+    }
-+    r300->cmdbuf.cs = radeon_cs_create(r300->cmdbuf.csm, size);
-+    assert(r300->cmdbuf.cs != NULL);
- 	r300->cmdbuf.size = size;
--	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
--	r300->cmdbuf.count_used = 0;
--	r300->cmdbuf.count_reemit = 0;
- }
- 
- /**
-@@ -610,66 +807,13 @@ void r300DestroyCmdBuf(r300ContextPtr r300)
- {
- 	struct r300_state_atom *atom;
- 
--	FREE(r300->cmdbuf.cmd_buf);
--
-+    radeon_cs_destroy(r300->cmdbuf.cs);
- 	foreach(atom, &r300->hw.atomlist) {
- 		FREE(atom->cmd);
- 	}
--}
--
--void r300EmitBlit(r300ContextPtr rmesa,
--		  GLuint color_fmt,
--		  GLuint src_pitch,
--		  GLuint src_offset,
--		  GLuint dst_pitch,
--		  GLuint dst_offset,
--		  GLint srcx, GLint srcy,
--		  GLint dstx, GLint dsty, GLuint w, GLuint h)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr,
--			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
--			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
--			dst_pitch, dst_offset, dstx, dsty, w, h);
--
--	assert((src_pitch & 63) == 0);
--	assert((dst_pitch & 63) == 0);
--	assert((src_offset & 1023) == 0);
--	assert((dst_offset & 1023) == 0);
--	assert(w < (1 << 16));
--	assert(h < (1 << 16));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
--
--	cmd[0].header.cmd_type = R300_CMD_PACKET3;
--	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
--	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
--	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
--		    RADEON_GMC_BRUSH_NONE |
--		    (color_fmt << 8) |
--		    RADEON_GMC_SRC_DATATYPE_COLOR |
--		    RADEON_ROP3_S |
--		    RADEON_DP_SRC_SOURCE_MEMORY |
--		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
--
--	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
--	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
--	cmd[5].u = (srcx << 16) | srcy;
--	cmd[6].u = (dstx << 16) | dsty;	/* dst */
--	cmd[7].u = (w << 16) | h;
--}
--
--void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
--{
--	drm_r300_cmd_header_t *cmd;
--
--	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
--
--	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].u = 0;
--	cmd[0].wait.cmd_type = R300_CMD_WAIT;
--	cmd[0].wait.flags = flags;
-+    if (r300->radeon.radeonScreen->driScreen->dri2.enabled || r300->radeon.radeonScreen->kernel_mm) {
-+        radeon_cs_manager_gem_dtor(r300->cmdbuf.csm);
-+    } else {
-+        radeon_cs_manager_legacy_dtor(r300->cmdbuf.csm);
-+    }
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-index a8eaa58..bb7e0bf 100644
---- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
-@@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define __R300_CMDBUF_H__
- 
- #include "r300_context.h"
-+#include "radeon_cs.h"
- 
- extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
- extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
-@@ -45,72 +46,92 @@ extern void r300EmitState(r300ContextPtr r300);
- 
- extern void r300InitCmdBuf(r300ContextPtr r300);
- extern void r300DestroyCmdBuf(r300ContextPtr r300);
-+extern void r300EnsureCmdBufSpace(r300ContextPtr r300, int dwords, const char *caller);
-+
-+void r300BeginBatch(r300ContextPtr r300,
-+		    int n,
-+		    int dostate,
-+                    const char *file,
-+                    const char *function,
-+                    int line);
- 
- /**
-- * Make sure that enough space is available in the command buffer
-- * by flushing if necessary.
-- *
-- * \param dwords The number of dwords we need to be free on the command buffer
-+ * Every function writing to the command buffer needs to declare this
-+ * to get the necessary local variables.
-  */
--static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
--					     int dwords, const char *caller)
--{
--	assert(dwords < r300->cmdbuf.size);
-+#define BATCH_LOCALS(r300) \
-+	const r300ContextPtr b_l_r300 = r300
- 
--	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
--		r300FlushCmdBuf(r300, caller);
--}
-+/**
-+ * Prepare writing n dwords to the command buffer,
-+ * including producing any necessary state emits on buffer wraparound.
-+ */
-+#define BEGIN_BATCH(n) r300BeginBatch(b_l_r300, n, 1, __FILE__, __FUNCTION__, __LINE__)
- 
- /**
-- * Allocate the given number of dwords in the command buffer and return
-- * a pointer to the allocated area.
-- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
-- * causes state reemission after a flush. This is necessary to ensure
-- * correct hardware state after an unlock.
-+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
-  */
--static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
--					       int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
--
--static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
--					    int dwords, const char *caller)
--{
--	uint32_t *ptr;
--
--	r300EnsureCmdBufSpace(r300, dwords, caller);
--
--	if (!r300->cmdbuf.count_used) {
--		if (RADEON_DEBUG & DEBUG_IOCTL)
--			fprintf(stderr,
--				"Reemit state after flush (from %s)\n", caller);
--		r300EmitState(r300);
--	}
--
--	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
--	r300->cmdbuf.count_used += dwords;
--	return ptr;
--}
--
--extern void r300EmitBlit(r300ContextPtr rmesa,
--			 GLuint color_fmt,
--			 GLuint src_pitch,
--			 GLuint src_offset,
--			 GLuint dst_pitch,
--			 GLuint dst_offset,
--			 GLint srcx, GLint srcy,
--			 GLint dstx, GLint dsty, GLuint w, GLuint h);
--
--extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
--extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
--extern void r300EmitVertexShader(r300ContextPtr rmesa);
--extern void r300EmitPixelShader(r300ContextPtr rmesa);
-+#define BEGIN_BATCH_NO_AUTOSTATE(n) r300BeginBatch(b_l_r300, n, 0, __FILE__, __FUNCTION__, __LINE__)
-+
-+/**
-+ * Write one dword to the command buffer.
-+ */
-+#define OUT_BATCH(data) \
-+	do { \
-+        radeon_cs_write_dword(b_l_r300->cmdbuf.cs, data);\
-+	} while(0)
-+
-+/**
-+ * Write a relocated dword to the command buffer.
-+ */
-+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \
-+	do { \
-+        if (offset) {\
-+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",\
-+            __FILE__, __FUNCTION__, __LINE__, offset);\
-+        }\
-+        radeon_cs_write_dword(b_l_r300->cmdbuf.cs, offset);\
-+        radeon_cs_write_reloc(b_l_r300->cmdbuf.cs, \
-+                              bo, \
-+                              rd, \
-+                              wd, \
-+                              flags);\
-+	} while(0)
-+
-+/**
-+ * Write n dwords from ptr to the command buffer.
-+ */
-+#define OUT_BATCH_TABLE(ptr,n) \
-+	do { \
-+		int _i; \
-+        for (_i=0; _i < n; _i++) {\
-+            radeon_cs_write_dword(b_l_r300->cmdbuf.cs, ptr[_i]);\
-+        }\
-+	} while(0)
-+
-+/**
-+ * Finish writing dwords to the command buffer.
-+ * The number of (direct or indirect) OUT_BATCH calls between the previous
-+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
-+ */
-+#define END_BATCH() \
-+	do { \
-+        radeon_cs_end(b_l_r300->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
-+	} while(0)
-+
-+/**
-+ * After the last END_BATCH() of rendering, this indicates that flushing
-+ * the command buffer now is okay.
-+ */
-+#define COMMIT_BATCH() \
-+	do { \
-+	} while(0)
-+
-+void emit_vpu(r300ContextPtr r300, struct r300_state_atom * atom);
-+int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom);
-+
-+void emit_r500fp(r300ContextPtr r300, struct r300_state_atom * atom);
-+int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom);
-+int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom);
- 
- #endif				/* __R300_CMDBUF_H__ */
-diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
-index 3743627..5fce841 100644
---- a/src/mesa/drivers/dri/r300/r300_context.c
-+++ b/src/mesa/drivers/dri/r300/r300_context.c
-@@ -59,15 +59,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_span.h"
- #include "r300_context.h"
- #include "r300_cmdbuf.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_tex.h"
- #include "r300_emit.h"
- #include "r300_swtcl.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #include "vblank.h"
- #include "utils.h"
-@@ -178,6 +176,17 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
- 	0,
- };
- 
-+static void r300RunPipeline(GLcontext * ctx)
-+{
-+    _mesa_lock_context_textures(ctx);
-+
-+    if (ctx->NewState)
-+        _mesa_update_state_locked(ctx);
-+    
-+    _tnl_run_pipeline(ctx);
-+    _mesa_unlock_context_textures(ctx);
-+}
-+
- /* Create the device specific rendering context.
-  */
- GLboolean r300CreateContext(const __GLcontextModes * glVisual,
-@@ -189,7 +198,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	struct dd_function_table functions;
- 	r300ContextPtr r300;
- 	GLcontext *ctx;
--	int tcl_mode, i;
-+	int tcl_mode;
- 
- 	assert(glVisual);
- 	assert(driContextPriv);
-@@ -221,10 +230,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	r300InitTextureFuncs(&functions);
- 	r300InitShaderFuncs(&functions);
- 
--#ifdef USER_BUFFERS
--	r300_mem_init(r300);
--#endif
--
- 	if (!radeonInitContext(&r300->radeon, &functions,
- 			       glVisual, driContextPriv,
- 			       sharedContextPrivate)) {
-@@ -233,33 +238,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	}
- 
- 	/* Init r300 context data */
--	r300->dma.buf0_address =
--	    r300->radeon.radeonScreen->buffers->list[0].address;
--
--	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
--	make_empty_list(&r300->swapped);
--
--	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
--	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
--	for (i = 0; i < r300->nr_heaps; i++) {
--		/* *INDENT-OFF* */
--		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
--							       screen->
--							       texSize[i], 12,
--							       RADEON_NR_TEX_REGIONS,
--							       (drmTextureRegionPtr)
--							       r300->radeon.sarea->
--							       tex_list[i],
--							       &r300->radeon.sarea->
--							       tex_age[i],
--							       &r300->swapped,
--							       sizeof
--							       (r300TexObj),
--							       (destroy_texture_object_t
--								*)
--							       r300DestroyTexObj);
--		/* *INDENT-ON* */
--	}
- 	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
- 					      "texture_depth");
- 	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
-@@ -298,13 +276,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
- 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
- 
--#ifdef USER_BUFFERS
- 	/* Needs further modifications */
- #if 0
- 	ctx->Const.MaxArrayLockSize =
- 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
- #endif
--#endif
- 
- 	/* Initialize the software rasterizer and helper modules.
- 	 */
-@@ -383,7 +359,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
- 	        r300InitSwtcl(ctx);
- 
--	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-+	TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline;
- 
- 	tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
- 	if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
-@@ -406,72 +382,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
- 	return GL_TRUE;
- }
- 
--static void r300FreeGartAllocations(r300ContextPtr r300)
--{
--	int i, ret, tries = 0, done_age, in_use = 0;
--	drm_radeon_mem_free_t memfree;
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--#ifdef USER_BUFFERS
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (r300->rmm->u_list[i].pending) {
--			in_use++;
--		}
--	}
--	/* Cannot flush/lock if no context exists. */
--	if (in_use)
--		r300FlushCmdBuf(r300, __FUNCTION__);
--
--	done_age = radeonGetAge((radeonContextPtr) r300);
--
--	for (i = r300->rmm->u_last; i > 0; i--) {
--		if (r300->rmm->u_list[i].ptr == NULL) {
--			continue;
--		}
--
--		/* check whether this buffer is still in use */
--		if (!r300->rmm->u_list[i].pending) {
--			continue;
--		}
--
--		assert(r300->rmm->u_list[i].h_pending == 0);
--
--		tries = 0;
--		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
--			usleep(10);
--			done_age = radeonGetAge((radeonContextPtr) r300);
--		}
--		if (tries >= 1000) {
--			WARN_ONCE("Failed to idle region!");
--		}
--
--		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
--		    (char *)r300->radeon.radeonScreen->gartTextures.map;
--
--		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
--				      DRM_RADEON_FREE, &memfree,
--				      sizeof(memfree));
--		if (ret) {
--			fprintf(stderr, "Failed to free at %p\nret = %s\n",
--				r300->rmm->u_list[i].ptr, strerror(-ret));
--		} else {
--			if (i == r300->rmm->u_last)
--				r300->rmm->u_last--;
--
--			r300->rmm->u_list[i].pending = 0;
--			r300->rmm->u_list[i].ptr = NULL;
--		}
--	}
--	r300->rmm->u_head = i;
--#endif				/* USER_BUFFERS */
--}
--
- /* Destroy the device specific context.
-  */
- void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
-@@ -495,23 +405,12 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
- 	assert(r300);		/* should never be null */
- 
- 	if (r300) {
--		GLboolean release_texture_heaps;
--
--		release_texture_heaps =
--		    (r300->radeon.glCtx->Shared->RefCount == 1);
- 		_swsetup_DestroyContext(r300->radeon.glCtx);
- 		_tnl_DestroyContext(r300->radeon.glCtx);
- 		_vbo_DestroyContext(r300->radeon.glCtx);
- 		_swrast_DestroyContext(r300->radeon.glCtx);
- 
--		if (r300->dma.current.buf) {
--			r300ReleaseDmaRegion(r300, &r300->dma.current,
--					     __FUNCTION__);
--#ifndef USER_BUFFERS
--			r300FlushCmdBuf(r300, __FUNCTION__);
--#endif
--		}
--		r300FreeGartAllocations(r300);
-+		r300FlushCmdBuf(r300, __FUNCTION__);
- 		r300DestroyCmdBuf(r300);
- 
- 		if (radeon->state.scissor.pClipRects) {
-@@ -519,28 +418,11 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
- 			radeon->state.scissor.pClipRects = NULL;
- 		}
- 
--		if (release_texture_heaps) {
--			/* This share group is about to go away, free our private
--			 * texture object data.
--			 */
--			int i;
--
--			for (i = 0; i < r300->nr_heaps; i++) {
--				driDestroyTextureHeap(r300->texture_heaps[i]);
--				r300->texture_heaps[i] = NULL;
--			}
--
--			assert(is_empty_list(&r300->swapped));
--		}
--
- 		radeonCleanupContext(&r300->radeon);
- 
--#ifdef USER_BUFFERS
- 		/* the memory manager might be accessed when Mesa frees the shared
- 		 * state, so don't destroy it earlier
- 		 */
--		r300_mem_destroy(r300);
--#endif
- 
- 		/* free the option cache */
- 		driDestroyOptionCache(&r300->radeon.optionCache);
-diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
-index c15e9fa..30229ed 100644
---- a/src/mesa/drivers/dri/r300/r300_context.h
-+++ b/src/mesa/drivers/dri/r300/r300_context.h
-@@ -42,13 +42,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_drm.h"
- #include "dri_util.h"
- #include "texmem.h"
-+#include "radeon_bo.h"
- 
- #include "main/macros.h"
- #include "main/mtypes.h"
- #include "main/colormac.h"
- 
--#define USER_BUFFERS
--
- struct r300_context;
- typedef struct r300_context r300ContextRec;
- typedef struct r300_context *r300ContextPtr;
-@@ -122,68 +121,51 @@ static INLINE uint32_t r300PackFloat24(float f)
- 
- /************ DMA BUFFERS **************/
- 
--/* Need refcounting on dma buffers:
-- */
--struct r300_dma_buffer {
--	int refcount;		/**< the number of retained regions in buf */
--	drmBufPtr buf;
--	int id;
--};
--#undef GET_START
--#ifdef USER_BUFFERS
--#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
--#else
--#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
--			(rvb)->address - rmesa->dma.buf0_address +	\
--			(rvb)->start)
--#endif
--/* A retained region, eg vertices for indexed vertices.
-- */
--struct r300_dma_region {
--	struct r300_dma_buffer *buf;
--	char *address;		/* == buf->address */
--	int start, end, ptr;	/* offsets from start of buf */
--
--	int aos_offset;		/* address in GART memory */
--	int aos_stride;		/* distance between elements, in dwords */
--	int aos_size;		/* number of components (1-4) */
--};
- 
--struct r300_dma {
--	/* Active dma region.  Allocations for vertices and retained
--	 * regions come from here.  Also used for emitting random vertices,
--	 * these may be flushed by calling flush_current();
--	 */
--	struct r300_dma_region current;
-+/* Texture related */
-+typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
-+typedef struct _r300_texture_image r300_texture_image;
- 
--	void (*flush) (r300ContextPtr);
- 
--	char *buf0_address;	/* start of buf[0], for index calcs */
-+struct _r300_texture_image {
-+	struct gl_texture_image base;
- 
--	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
--	 * for which a DISCARD command is currently queued in the command buffer.
-+	/**
-+	 * If mt != 0, the image is stored in hardware format in the
-+	 * given mipmap tree. In this case, base.Data may point into the
-+	 * mapping of the buffer object that contains the mipmap tree.
-+	 *
-+	 * If mt == 0, the image is stored in normal memory pointed to
-+	 * by base.Data.
- 	 */
--	GLuint nr_released_bufs;
-+	struct _r300_mipmap_tree *mt;
-+    struct radeon_bo *bo;
-+
-+	int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
-+	int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
- };
- 
--       /* Texture related */
-+static INLINE r300_texture_image *get_r300_texture_image(struct gl_texture_image *image)
-+{
-+	return (r300_texture_image*)image;
-+}
- 
--typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
- 
- /* Texture object in locally shared texture space.
-  */
- struct r300_tex_obj {
--	driTextureObject base;
-+	struct gl_texture_object base;
-+	struct _r300_mipmap_tree *mt;
- 
--	GLuint bufAddr;		/* Offset to start of locally
--				   shared texture block */
--
--	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
--	/* Six, for the cube faces */
-+	/**
-+	 * This is true if we've verified that the mipmap tree above is complete
-+	 * and so on.
-+	 */
-+	GLboolean validated;
- 
- 	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
-+	GLuint override_offset;
- 
--	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
- 	/* hardware register values */
- 	/* Note that R200 has 8 registers per texture and R300 only 7 */
- 	GLuint filter;
-@@ -191,30 +173,17 @@ struct r300_tex_obj {
- 	GLuint pitch_reg;
- 	GLuint size;		/* npot only */
- 	GLuint format;
--	GLuint offset;		/* Image location in the card's address space.
--				   All cube faces follow. */
--	GLuint unknown4;
--	GLuint unknown5;
--	/* end hardware registers */
--
--	/* registers computed by r200 code - keep them here to
--	   compare against what is actually written.
--
--	   to be removed later.. */
- 	GLuint pp_border_color;
--	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
--	GLuint format_x;
--
--	GLboolean border_fallback;
-+	/* end hardware registers */
- 
- 	GLuint tile_bits;	/* hw texture tile bits used on this texture */
-+    struct radeon_bo *bo;
- };
- 
--struct r300_texture_env_state {
--	r300TexObjPtr texobj;
--	GLenum format;
--	GLenum envMode;
--};
-+static INLINE r300TexObj* r300_tex_obj(struct gl_texture_object *texObj)
-+{
-+	return (r300TexObj*)texObj;
-+}
- 
- /* The blit width for texture uploads
-  */
-@@ -222,7 +191,6 @@ struct r300_texture_env_state {
- #define R300_MAX_TEXTURE_UNITS 8
- 
- struct r300_texture_state {
--	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
- 	int tc_count;		/* number of incoming texture coordinates from VAP */
- };
- 
-@@ -242,6 +210,7 @@ struct r300_state_atom {
- 	GLboolean dirty;
- 
- 	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
-+	void (*emit) (r300ContextPtr, struct r300_state_atom * atom);
- };
- 
- #define R300_VPT_CMD_0		0
-@@ -549,6 +518,8 @@ struct r300_hw_state {
- 		struct r300_state_atom border_color;
- 	} tex;
- 	struct r300_state_atom txe;	/* tex enable (4104) */
-+
-+	r300TexObj *textures[R300_MAX_TEXTURE_UNITS];
- };
- 
- /**
-@@ -559,10 +530,10 @@ struct r300_hw_state {
-  * otherwise.
-  */
- struct r300_cmdbuf {
--	int size;		/* DWORDs allocated for buffer */
--	uint32_t *cmd_buf;
--	int count_used;		/* DWORDs filled so far */
--	int count_reemit;	/* size of re-emission batch */
-+    struct radeon_cs_manager    *csm;
-+    struct radeon_cs            *cs;
-+	int size; /** # of dwords total */
-+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
- };
- 
- /**
-@@ -811,18 +782,25 @@ struct r500_fragment_program {
- #define REG_COLOR0	1
- #define REG_TEX0	2
- 
-+struct r300_aos {
-+	struct radeon_bo *bo; /** Buffer object where vertex data is stored */
-+	int offset; /** Offset into buffer object, in bytes */
-+	int components; /** Number of components per vertex */
-+	int stride; /** Stride in dwords (may be 0 for repeating) */
-+	int count; /** Number of vertices */
-+};
-+
- struct r300_state {
- 	struct r300_depthbuffer_state depth;
- 	struct r300_texture_state texture;
- 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
- 	struct r300_vertex_shader_state vertex_shader;
--	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
-+	struct r300_aos aos[R300_MAX_AOS_ARRAYS];
- 	int aos_count;
- 
--	GLuint *Elts;
--	struct r300_dma_region elt_dma;
-+	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
-+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
- 
--	struct r300_dma_region swtcl_dma;
- 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
- 							   They are the same as tnl->render_inputs for fixed pipeline */
- 
-@@ -881,12 +859,8 @@ struct r300_swtcl_info {
-     */
-    GLuint specoffset;
- 
--   /**
--    * Should Mesa project vertex data or will the hardware do it?
--    */
--   GLboolean needproj;
--
--   struct r300_dma_region indexed_verts;
-+   struct radeon_bo *bo;
-+   void (*flush) (r300ContextPtr);
- };
- 
- 
-@@ -904,26 +878,11 @@ struct r300_context {
- 
- 	/* Vertex buffers
- 	 */
--	struct r300_dma dma;
--	GLboolean save_on_next_unlock;
- 	GLuint NewGLState;
- 
--	/* Texture object bookkeeping
--	 */
--	unsigned nr_heaps;
--	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
--	driTextureObject swapped;
- 	int texture_depth;
- 	float initialMaxAnisotropy;
- 
--	/* Clientdata textures;
--	 */
--	GLuint prefer_gart_client_texturing;
--
--#ifdef USER_BUFFERS
--	struct r300_memory_manager *rmm;
--#endif
--
- 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
- 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
-index 80bd338..a984f55 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.c
-+++ b/src/mesa/drivers/dri/r300/r300_emit.c
-@@ -51,9 +51,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_emit.h"
- #include "r300_ioctl.h"
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
- #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
-     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
-@@ -86,11 +83,9 @@ do {						\
- } while (0)
- #endif
- 
--static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
-+static void r300EmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -106,11 +101,9 @@ static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
--			 GLvoid * data, int stride, int count)
-+static void r300EmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -127,18 +120,17 @@ static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
-+static void r300EmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
- 			__FUNCTION__, count, stride, (void *)out, (void *)data);
- 
--	if (stride == 12)
-+	if (stride == 12) {
- 		COPY_DWORDS(out, data, count * 3);
-+    }
- 	else
- 		for (i = 0; i < count; i++) {
- 			out[0] = *(int *)data;
-@@ -149,11 +141,9 @@ static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
--			  GLvoid * data, int stride, int count)
-+static void r300EmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
- {
- 	int i;
--	int *out = (int *)(rvb->address + rvb->start);
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
-@@ -172,39 +162,40 @@ static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
- 		}
- }
- 
--static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
-+static void r300EmitVec(GLcontext * ctx, struct r300_aos *aos,
- 			GLvoid * data, int size, int stride, int count)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	uint32_t *out;
-+    uint32_t bo_size;
- 
-+    memset(aos, 0, sizeof(struct r300_aos));
- 	if (stride == 0) {
--		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
-+        bo_size = size * 4;
- 		count = 1;
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = 0;
-+		aos->stride = 0;
- 	} else {
--		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
--		rvb->aos_offset = GET_START(rvb);
--		rvb->aos_stride = size;
-+        bo_size = size * count * 4;
-+		aos->stride = size;
- 	}
--
-+	aos->bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+                             0, bo_size, 32, RADEON_GEM_DOMAIN_GTT, 0);
-+    aos->offset = 0;
-+	aos->components = size;
-+	aos->count = count;
-+
-+	radeon_bo_map(aos->bo, 1);
-+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
- 	switch (size) {
--	case 1:
--		r300EmitVec4(ctx, rvb, data, stride, count);
--		break;
--	case 2:
--		r300EmitVec8(ctx, rvb, data, stride, count);
--		break;
--	case 3:
--		r300EmitVec12(ctx, rvb, data, stride, count);
--		break;
--	case 4:
--		r300EmitVec16(ctx, rvb, data, stride, count);
--		break;
-+	case 1: r300EmitVec4(out, data, stride, count); break;
-+	case 2: r300EmitVec8(out, data, stride, count); break;
-+	case 3: r300EmitVec12(out, data, stride, count); break;
-+	case 4: r300EmitVec16(out, data, stride, count); break;
- 	default:
- 		assert(0);
- 		break;
- 	}
-+	radeon_bo_unmap(aos->bo);
- }
- 
- #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) |	\
-@@ -314,10 +305,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
- 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
- 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
- 
--#if 0
--	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
--#endif
--
- 	if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
- 		ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
- 
-@@ -371,7 +358,6 @@ int r300EmitArrays(GLcontext * ctx)
- 
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
- 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
--		//assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
- 
- 		if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
- 			InputsRead |= 1 << VERT_ATTRIB_POS;
-@@ -433,7 +419,7 @@ int r300EmitArrays(GLcontext * ctx)
- 	}
- 
- 	for (i = 0; i < nr; i++) {
--		int ci, fix, found = 0;
-+		int ci;
- 
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
-@@ -443,52 +429,25 @@ int r300EmitArrays(GLcontext * ctx)
- 		for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
- 			swizzle[i][ci] = ci;
- 		}
--
--		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
--			if (vb->AttribPtr[tab[i]]->stride % 4) {
--				return R300_FALLBACK_TCL;
--			}
--			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].start = 0;
--			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
--			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
--			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--		} else {
--			r300EmitVec(ctx, &rmesa->state.aos[i],
--				    vb->AttribPtr[tab[i]]->data,
--				    vb->AttribPtr[tab[i]]->size,
--				    vb->AttribPtr[tab[i]]->stride, count);
--		}
--
--		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
--
--		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
--			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
--				continue;
--			}
--			found = 1;
--			break;
--		}
--
--		if (found) {
--			if (fix > 0) {
--				WARN_ONCE("Feeling lucky?\n");
--			}
--			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
--			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
--				swizzle[i][ci] += fix;
--			}
--		} else {
--			WARN_ONCE
--			    ("Cannot handle offset %x with stride %d, comp %d\n",
--			     rmesa->state.aos[i].aos_offset,
--			     rmesa->state.aos[i].aos_stride,
--			     vb->AttribPtr[tab[i]]->size);
--			return R300_FALLBACK_TCL;
--		}
-+		r300EmitVec(ctx, &rmesa->state.aos[i],
-+				vb->AttribPtr[tab[i]]->data,
-+				vb->AttribPtr[tab[i]]->size,
-+				vb->AttribPtr[tab[i]]->stride, count);
- 	}
- 
- 	/* Setup INPUT_ROUTE. */
-+    if (rmesa->radeon.radeonScreen->kernel_mm) {
-+      R300_STATECHANGE(rmesa, vir[0]);
-+      rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
-+      rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
-+	rmesa->hw.vir[0].cmd[0] |=
-+        (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-+                            vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
-+	R300_STATECHANGE(rmesa, vir[1]);
-+	rmesa->hw.vir[1].cmd[0] |=
-+	    (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
-+	                        nr) & 0x3FFF) << 16;
-+    } else {
- 	R300_STATECHANGE(rmesa, vir[0]);
- 	((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
- 	    r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
-@@ -497,6 +456,7 @@ int r300EmitArrays(GLcontext * ctx)
- 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- 	    r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- 			       nr);
-+    }
- 
- 	/* Setup INPUT_CNTL. */
- 	R300_STATECHANGE(rmesa, vic);
-@@ -515,45 +475,33 @@ int r300EmitArrays(GLcontext * ctx)
- 	return R300_FALLBACK_NONE;
- }
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	int i;
--
--	if (rmesa->state.elt_dma.buf)
--		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
--
--	for (i = 0; i < rmesa->state.aos_count; i++) {
--		if (rmesa->state.aos[i].buf)
--			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
--	}
--}
--#endif
--
- void r300ReleaseArrays(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	int i;
- 
--	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
-+	if (rmesa->state.elt_dma_bo) {
-+		radeon_bo_unref(rmesa->state.elt_dma_bo);
-+		rmesa->state.elt_dma_bo = 0;
-+	}
- 	for (i = 0; i < rmesa->state.aos_count; i++) {
--		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
-+		if (rmesa->state.aos[i].bo) {
-+			rmesa->state.aos[i].bo = radeon_bo_unref(rmesa->state.aos[i].bo);
-+		}
- 	}
- }
- 
- void r300EmitCacheFlush(r300ContextPtr rmesa)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
--	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
--	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
--
--	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
--	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
--	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	BATCH_LOCALS(rmesa);
-+
-+	BEGIN_BATCH(4);
-+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-+	END_BATCH();
-+	COMMIT_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
-index 89d7383..db43cc3 100644
---- a/src/mesa/drivers/dri/r300/r300_emit.h
-+++ b/src/mesa/drivers/dri/r300/r300_emit.h
-@@ -46,23 +46,31 @@
- 
- /* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
-  * with DRM */
-+#define CP_PACKET2  (2 << 30)
- #define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
- #define CP_PACKET3( pkt, n )						\
- 	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
- 
--static INLINE uint32_t cmdpacket0(int reg, int count)
-+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
-+                                  int reg, int count)
- {
--	drm_r300_cmd_header_t cmd;
--
--	cmd.packet0.cmd_type = R300_CMD_PACKET0;
--	cmd.packet0.count = count;
--	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
--	cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
--
--	return cmd.u;
-+    if (!rscrn->kernel_mm) {
-+	    drm_r300_cmd_header_t cmd;
-+
-+    	cmd.packet0.cmd_type = R300_CMD_PACKET0;
-+	    cmd.packet0.count = count;
-+    	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
-+	    cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
-+
-+    	return cmd.u;
-+    }
-+    if (count) {
-+        return CP_PACKET0(reg, count - 1);
-+    }
-+    return CP_PACKET2;
- }
- 
--static INLINE uint32_t cmdvpu(int addr, int count)
-+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -74,7 +82,8 @@ static INLINE uint32_t cmdvpu(int addr, int count)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
-+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
-+                                 int addr, int count, int type, int clamp)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -88,7 +97,7 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacket3(int packet)
-+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -98,7 +107,8 @@ static INLINE uint32_t cmdpacket3(int packet)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdcpdelay(unsigned short count)
-+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,  
-+                                  unsigned short count)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -108,7 +118,8 @@ static INLINE uint32_t cmdcpdelay(unsigned short count)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdwait(unsigned char flags)
-+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
-+                               unsigned char flags)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -118,7 +129,7 @@ static INLINE uint32_t cmdwait(unsigned char flags)
- 	return cmd.u;
- }
- 
--static INLINE uint32_t cmdpacify(void)
-+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
- {
- 	drm_r300_cmd_header_t cmd;
- 
-@@ -127,130 +138,100 @@ static INLINE uint32_t cmdpacify(void)
- 	return cmd.u;
- }
- 
--/**
-- * Prepare to write a register value to register at address reg.
-- * If num_extra > 0 then the following extra values are written
-- * to registers with address +4, +8 and so on..
-- */
--#define reg_start(reg, num_extra)					\
--	do {								\
--		int _n;							\
--		_n=(num_extra);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+2),				\
--					__FUNCTION__);			\
--		cmd_reserved=_n+2;					\
--		cmd_written=1;						\
--		cmd[0].i=cmdpacket0((reg), _n+1);			\
--	} while (0);
-+
-+/** Single register write to command buffer; requires 2 dwords. */
-+#define OUT_BATCH_REGVAL(reg, val) \
-+	OUT_BATCH(cmdpacket0(b_l_r300->radeon.radeonScreen, (reg), 1)); \
-+	OUT_BATCH((val))
-+
-+/** Continuous register range write to command buffer; requires 1 dword,
-+ * expects count dwords afterwards for register contents. */
-+#define OUT_BATCH_REGSEQ(reg, count) \
-+	OUT_BATCH(cmdpacket0(b_l_r300->radeon.radeonScreen, (reg), (count)));
-+
-+/** Write a 32 bit float to the ring; requires 1 dword. */
-+#define OUT_BATCH_FLOAT32(f) \
-+	OUT_BATCH(r300PackFloat32((f)));
- 
- /**
-- * Emit GLuint freestyle
-+ * Write the header of a packet3 to the command buffer.
-+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
-  */
--#define e32(dword)							\
--	do {								\
--		if(cmd_written<cmd_reserved) {				\
--			cmd[cmd_written].i=(dword);			\
--			cmd_written++;					\
--		} else {						\
--			fprintf(stderr,					\
--				"e32 but no previous packet "		\
--				"declaration.\n"			\
--				"Aborting! in %s::%s at line %d, "	\
--				"cmd_written=%d cmd_reserved=%d\n",	\
--				__FILE__, __FUNCTION__, __LINE__,	\
--				cmd_written, cmd_reserved);		\
--			_mesa_exit(-1);					\
--		}							\
-+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
-+    if (!b_l_r300->radeon.radeonScreen->kernel_mm) { \
-+    	OUT_BATCH(cmdpacket3(b_l_r300->radeon.radeonScreen,\
-+                  R300_CMD_PACKET3_RAW)); \
-+    }\
-+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
- 	} while(0)
- 
--#define	efloat(f) e32(r300PackFloat32(f))
--
--#define vsf_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+2;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdvpu((dest), _n/4);			\
--	} while (0);
--
--#define r500fp_start_fragment(dest, length)				\
--	do {								\
--		int _n;							\
--		_n = (length);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+1),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+1;					\
--		cmd_written =1;						\
--		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
--	} while (0);
--
--#define start_packet3(packet, count)					\
--	{								\
--		int _n;							\
--		GLuint _p;						\
--		_n = (count);						\
--		_p = (packet);						\
--		cmd = (drm_radeon_cmd_header_t*)			\
--			r300AllocCmdBuf(rmesa,				\
--					(_n+3),				\
--					__FUNCTION__);			\
--		cmd_reserved = _n+3;					\
--		cmd_written = 2;					\
--		if(_n > 0x3fff) {					\
--			fprintf(stderr,"Too big packet3 %08x: cannot "	\
--				"store %d dwords\n",			\
--				_p, _n);				\
--			_mesa_exit(-1);					\
--		}							\
--		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
--		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
--	}
--
- /**
-  * Must be sent to switch to 2d commands
-  */
- void static INLINE end_3d(r300ContextPtr rmesa)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].header.cmd_type = R300_CMD_END3D;
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+    	BEGIN_BATCH(1);
-+	    OUT_BATCH(cmdpacify(rmesa->radeon.radeonScreen));
-+    	END_BATCH();
-+    }
- }
- 
- void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdcpdelay(count);
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+    	BEGIN_BATCH(1);
-+	    OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
-+    	END_BATCH();
-+    }
- }
- 
- void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
- {
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	cmd =
--	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
--	cmd[0].i = cmdwait(flags);
-+	BATCH_LOCALS(rmesa);
-+    uint32_t wait_until;
-+
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+    	BEGIN_BATCH_NO_AUTOSTATE(1);
-+    	OUT_BATCH(cmdwait(rmesa->radeon.radeonScreen, flags));
-+	    END_BATCH();
-+    } else {
-+        switch(flags) {
-+        case R300_WAIT_2D:
-+            wait_until = (1 << 14);
-+            break;
-+        case R300_WAIT_3D:
-+            wait_until = (1 << 15);
-+            break;
-+        case R300_NEW_WAIT_2D_3D:
-+            wait_until = (1 << 14) | (1 << 15);
-+            break;
-+        case R300_NEW_WAIT_2D_2D_CLEAN:
-+            wait_until = (1 << 14) | (1 << 16) | (1 << 18);
-+            break;
-+        case R300_NEW_WAIT_3D_3D_CLEAN:
-+            wait_until = (1 << 15) | (1 << 17) | (1 << 18);
-+            break;
-+        case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
-+            wait_until  = (1 << 14) | (1 << 16) | (1 << 18);
-+            wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
-+            break;
-+        default:
-+            return;
-+        }
-+    	BEGIN_BATCH_NO_AUTOSTATE(2);
-+        OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
-+        OUT_BATCH(wait_until);
-+	    END_BATCH();
-+    }
- }
- 
- extern int r300EmitArrays(GLcontext * ctx);
- 
--#ifdef USER_BUFFERS
--void r300UseArrays(GLcontext * ctx);
--#endif
--
- extern void r300ReleaseArrays(GLcontext * ctx);
- extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
- extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
-index ee85e22..6d7b191 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
-@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "swrast/swrast.h"
- 
-+#include "radeon_buffer.h"
- #include "r300_context.h"
- #include "radeon_ioctl.h"
- #include "r300_ioctl.h"
-@@ -55,71 +56,83 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_reg.h"
- #include "r300_emit.h"
- #include "r300_fragprog.h"
-+#include "r300_context.h"
- 
- #include "vblank.h"
- 
-+#define R200_3D_DRAW_IMMD_2      0xC0003500
-+
- #define CLEARBUFFER_COLOR	0x1
- #define CLEARBUFFER_DEPTH	0x2
- #define CLEARBUFFER_STENCIL	0x4
- 
--static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
-+static void r300ClearBuffer(r300ContextPtr r300, int flags,
-+			    struct radeon_renderbuffer *rrb,
-+                struct radeon_renderbuffer *rrbd)
- {
-+	BATCH_LOCALS(r300);
- 	GLcontext *ctx = r300->radeon.glCtx;
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
--	GLuint cboffset, cbpitch;
--	drm_r300_cmd_header_t *cmd2;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	GLuint cbpitch = 0;
- 	r300ContextPtr rmesa = r300;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
--			__FUNCTION__, buffer ? "back" : "front",
--			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
--
--	if (buffer) {
--		cboffset = r300->radeon.radeonScreen->backOffset;
--		cbpitch = r300->radeon.radeonScreen->backPitch;
--	} else {
--		cboffset = r300->radeon.radeonScreen->frontOffset;
--		cbpitch = r300->radeon.radeonScreen->frontPitch;
-+		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
-+			__FUNCTION__, rrb, dPriv->x, dPriv->y,
-+			dPriv->w, dPriv->h);
-+
-+	if (rrb) {
-+		cbpitch = (rrb->pitch / rrb->cpp);
-+		if (rrb->cpp == 4)
-+			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
-+		else
-+			cbpitch |= R300_COLOR_FORMAT_RGB565;
-+
-+		if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_COLOR_TILE_ENABLE;
-+        }
- 	}
- 
--	cboffset += r300->radeon.radeonScreen->fbLocation;
--
-+	/* TODO in bufmgr */
- 	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
- 	end_3d(rmesa);
- 
--	R300_STATECHANGE(r300, cb);
--	reg_start(R300_RB3D_COLOROFFSET0, 0);
--	e32(cboffset);
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		cbpitch |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		cbpitch |= R300_COLOR_TILE_ENABLE;
--
--	reg_start(R300_RB3D_COLORPITCH0, 0);
--	e32(cbpitch);
--
--	R300_STATECHANGE(r300, cmk);
--	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
--
- 	if (flags & CLEARBUFFER_COLOR) {
--		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
--		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
--		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
--		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
-+		assert(rrb != 0);
-+		BEGIN_BATCH_NO_AUTOSTATE(4);
-+		OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
-+		OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
-+		END_BATCH();
-+	}
-+#if 1
-+	if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
-+		assert(rrbd != 0);
-+		cbpitch = (rrbd->pitch / rrbd->cpp);
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
-+			cbpitch |= R300_DEPTHMACROTILE_ENABLE;
-+        }
-+		if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
-+            cbpitch |= R300_DEPTHMICROTILE_TILED;
-+        }
-+		BEGIN_BATCH_NO_AUTOSTATE(4);
-+		OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
-+		OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+		OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
-+		END_BATCH();
-+	}
-+#endif
-+	BEGIN_BATCH_NO_AUTOSTATE(6);
-+	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
-+	if (flags & CLEARBUFFER_COLOR) {
-+		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
-+			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
- 	} else {
--		e32(0x0);
-+		OUT_BATCH(0);
- 	}
- 
--	R300_STATECHANGE(r300, zs);
--	reg_start(R300_ZB_CNTL, 2);
- 
- 	{
- 		uint32_t t1, t2;
-@@ -146,37 +159,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
- 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
- 		}
- 
--		e32(t1);
--		e32(t2);
--		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
--		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+        OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
-+		OUT_BATCH(t1);
-+		OUT_BATCH(t2);
-+		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
-+                   R300_STENCILWRITEMASK_SHIFT) |
-+			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
-+        END_BATCH();
- 	}
- 
--	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
--	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
--	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
--	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
--	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
--	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
--	cmd2[4].u = r300PackFloat32(1.0);
--	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
--	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
--	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
--	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+    	BEGIN_BATCH_NO_AUTOSTATE(9);
-+    	OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
-+    	OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+	    OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+    	OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+	    OUT_BATCH_FLOAT32(1.0);
-+    	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+	    OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+    	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+	    OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+    	END_BATCH();
-+    } else {
-+        OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
-+        OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-+                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-+    	OUT_BATCH_FLOAT32(dPriv->w / 2.0);
-+	    OUT_BATCH_FLOAT32(dPriv->h / 2.0);
-+    	OUT_BATCH_FLOAT32(ctx->Depth.Clear);
-+	    OUT_BATCH_FLOAT32(1.0);
-+    	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
-+	    OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
-+    	OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
-+	    OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
-+    }
- 
- 	r300EmitCacheFlush(rmesa);
- 	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
-+
-+	R300_STATECHANGE(r300, cb);
-+	R300_STATECHANGE(r300, cmk);
-+	R300_STATECHANGE(r300, zs);
- }
- 
- static void r300EmitClearState(GLcontext * ctx)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
--	r300ContextPtr rmesa = r300;
-+	BATCH_LOCALS(r300);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	int has_tcl = 1;
- 	int is_r500 = 0;
- 	GLuint vap_cntl;
-@@ -184,35 +215,37 @@ static void r300EmitClearState(GLcontext * ctx)
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
- 		has_tcl = 0;
- 
--        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--                is_r500 = 1;
--
-+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+		is_r500 = 1;
- 
--	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
--	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
--	 * quite complex; see the functions in r300_emit.c.
-+	/* State atom dirty tracking is a little subtle here.
-+	 *
-+	 * On the one hand, we need to make sure base state is emitted
-+	 * here if we start with an empty batch buffer, otherwise clear
-+	 * works incorrectly with multiple processes. Therefore, the first
-+	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
- 	 *
--	 * I believe it would be a good idea to extend the functions in
--	 * r300_emit.c so that they can be used to setup the default values for
--	 * these registers, as well as the actual values used for rendering.
-+	 * On the other hand, implicit state emission clears the state atom
-+	 * dirty bits, so we have to call R300_STATECHANGE later than the
-+	 * first BEGIN_BATCH.
-+	 *
-+	 * The final trickiness is that, because we change state, we need
-+	 * to ensure that any stored swtcl primitives are flushed properly
-+	 * before we start changing state. See the R300_NEWPRIM in r300Clear
-+	 * for this.
- 	 */
--	R300_STATECHANGE(r300, vir[0]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
-+	BEGIN_BATCH(31);
-+	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
- 	if (!has_tcl)
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 	else
--	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
-+		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
- 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
- 
--	/* disable fog */
--	R300_STATECHANGE(r300, fogs);
--	reg_start(R300_FG_FOG_BLEND, 0);
--	e32(0x0);
--
--	R300_STATECHANGE(r300, vir[1]);
--	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
--	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
-+	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
-+	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-+	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
- 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
-@@ -226,238 +259,276 @@ static void r300EmitClearState(GLcontext * ctx)
- 	      << R300_SWIZZLE1_SHIFT)));
- 
- 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
--	R300_STATECHANGE(r300, vic);
--	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
--	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
--	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
-+	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
-+	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
-+	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
- 
--	R300_STATECHANGE(r300, vte);
- 	/* comes from fglrx startup of clear */
--	reg_start(R300_SE_VTE_CNTL, 1);
--	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
--	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
--	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
--	    R300_VPORT_Z_OFFSET_ENA);
--	e32(0x8);
-+	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
-+	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
-+		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-+		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-+		  R300_VPORT_Z_OFFSET_ENA);
-+	OUT_BATCH(0x8);
- 
--	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
--	e32(0xaaaaaaaa);
-+	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
- 
--	R300_STATECHANGE(r300, vof);
--	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
--	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
--	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
--	e32(0x0);		/* no textures */
-+	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-+	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-+		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-+	OUT_BATCH(0); /* no textures */
- 
--	R300_STATECHANGE(r300, txe);
--	reg_start(R300_TX_ENABLE, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
- 
--	R300_STATECHANGE(r300, vpt);
--	reg_start(R300_SE_VPORT_XSCALE, 5);
--	efloat(1.0);
--	efloat(dPriv->x);
--	efloat(1.0);
--	efloat(dPriv->y);
--	efloat(1.0);
--	efloat(0.0);
-+	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->x);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(dPriv->y);
-+	OUT_BATCH_FLOAT32(1.0);
-+	OUT_BATCH_FLOAT32(0.0);
- 
--	R300_STATECHANGE(r300, at);
--	reg_start(R300_FG_ALPHA_FUNC, 0);
--	e32(0x0);
-+	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
- 
-+	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
-+	OUT_BATCH(0x0);
-+	OUT_BATCH(0x0);
-+	END_BATCH();
-+
-+	R300_STATECHANGE(r300, vir[0]);
-+	R300_STATECHANGE(r300, fogs);
-+	R300_STATECHANGE(r300, vir[1]);
-+	R300_STATECHANGE(r300, vic);
-+	R300_STATECHANGE(r300, vte);
-+	R300_STATECHANGE(r300, vof);
-+	R300_STATECHANGE(r300, txe);
-+	R300_STATECHANGE(r300, vpt);
-+	R300_STATECHANGE(r300, at);
- 	R300_STATECHANGE(r300, bld);
--	reg_start(R300_RB3D_CBLEND, 1);
--	e32(0x0);
--	e32(0x0);
-+	R300_STATECHANGE(r300, ps);
- 
- 	if (has_tcl) {
--	    R300_STATECHANGE(r300, vap_clip_cntl);
--	    reg_start(R300_VAP_CLIP_CNTL, 0);
--	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		R300_STATECHANGE(r300, vap_clip_cntl);
-+
-+		BEGIN_BATCH_NO_AUTOSTATE(2);
-+		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
-+		END_BATCH();
-         }
- 
--	R300_STATECHANGE(r300, ps);
--	reg_start(R300_GA_POINT_SIZE, 0);
--	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
--	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	BEGIN_BATCH_NO_AUTOSTATE(2);
-+	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
-+		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
-+		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
-+	END_BATCH();
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R300_RS_IP_0, 7);
--		for (i = 0; i < 8; ++i) {
--			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
--		}
--
- 		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
- 		R300_STATECHANGE(r300, rr);
--		reg_start(R300_RS_INST_0, 0);
--		e32(R300_RS_INST_COL_CN_WRITE);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
-+		for (i = 0; i < 8; ++i)
-+			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
-+
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
-+
-+		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	} else {
- 		R300_STATECHANGE(r300, ri);
--		reg_start(R500_RS_IP_0, 7);
-+		R300_STATECHANGE(r300, rc);
-+		R300_STATECHANGE(r300, rr);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
- 		for (i = 0; i < 8; ++i) {
--			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
--			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
--			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
-+			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-+				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-+				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
- 		}
- 
--		R300_STATECHANGE(r300, rc);
--		/* The second constant is needed to get glxgears display anything .. */
--		reg_start(R300_RS_COUNT, 1);
--		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
--		e32(0x0);
--
--		R300_STATECHANGE(r300, rr);
--		reg_start(R500_RS_INST_0, 0);
--		e32(R500_RS_INST_COL_CN_WRITE);
-+		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
-+		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
-+		OUT_BATCH(0x0);
- 
-+		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
-+		END_BATCH();
- 	}
- 
- 	if (!is_r500) {
- 		R300_STATECHANGE(r300, fp);
--		reg_start(R300_US_CONFIG, 2);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		reg_start(R300_US_CODE_ADDR_0, 3);
--		e32(0x0);
--		e32(0x0);
--		e32(0x0);
--		e32(R300_RGBA_OUT);
--
- 		R300_STATECHANGE(r300, fpi[0]);
- 		R300_STATECHANGE(r300, fpi[1]);
- 		R300_STATECHANGE(r300, fpi[2]);
- 		R300_STATECHANGE(r300, fpi[3]);
- 
--		reg_start(R300_US_ALU_RGB_INST_0, 0);
--		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
--
--		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
--		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
--
--		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
--		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
--
--		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
--		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		BEGIN_BATCH(17);
-+		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH(R300_RGBA_OUT);
-+
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
-+			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
-+			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
-+			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
-+		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
-+			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
-+		END_BATCH();
- 	} else {
-- 		R300_STATECHANGE(r300, fp);
-- 		reg_start(R500_US_CONFIG, 1);
-- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-- 		e32(0x0);
-- 		reg_start(R500_US_CODE_ADDR, 2);
-- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-- 		e32(R500_US_CODE_OFFSET_ADDR(0));
-+		struct r300_state_atom r500fp;
-+		uint32_t _cmd[10];
- 
-+		R300_STATECHANGE(r300, fp);
- 		R300_STATECHANGE(r300, r500fp);
--		r500fp_start_fragment(0, 6);
--
--		e32(R500_INST_TYPE_OUT |
--		    R500_INST_TEX_SEM_WAIT |
--		    R500_INST_LAST |
--		    R500_INST_RGB_OMASK_R |
--		    R500_INST_RGB_OMASK_G |
--		    R500_INST_RGB_OMASK_B |
--		    R500_INST_ALPHA_OMASK |
--		    R500_INST_RGB_CLAMP |
--		    R500_INST_ALPHA_CLAMP);
--
--		e32(R500_RGB_ADDR0(0) |
--		    R500_RGB_ADDR1(0) |
--		    R500_RGB_ADDR1_CONST |
--		    R500_RGB_ADDR2(0) |
--		    R500_RGB_ADDR2_CONST);
--
--		e32(R500_ALPHA_ADDR0(0) |
--		    R500_ALPHA_ADDR1(0) |
--		    R500_ALPHA_ADDR1_CONST |
--		    R500_ALPHA_ADDR2(0) |
--		    R500_ALPHA_ADDR2_CONST);
--
--		e32(R500_ALU_RGB_SEL_A_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_A_R |
--		    R500_ALU_RGB_G_SWIZ_A_G |
--		    R500_ALU_RGB_B_SWIZ_A_B |
--		    R500_ALU_RGB_SEL_B_SRC0 |
--		    R500_ALU_RGB_R_SWIZ_B_R |
--		    R500_ALU_RGB_B_SWIZ_B_G |
--		    R500_ALU_RGB_G_SWIZ_B_B);
--
--		e32(R500_ALPHA_OP_CMP |
--		    R500_ALPHA_SWIZ_A_A |
--		    R500_ALPHA_SWIZ_B_A);
--
--		e32(R500_ALU_RGBA_OP_CMP |
--		    R500_ALU_RGBA_R_SWIZ_0 |
--		    R500_ALU_RGBA_G_SWIZ_0 |
--		    R500_ALU_RGBA_B_SWIZ_0 |
--		    R500_ALU_RGBA_A_SWIZ_0);
-+
-+		BEGIN_BATCH(14);
-+		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
-+		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
-+		OUT_BATCH(0x0);
-+		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
-+		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
-+		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
-+		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
-+		END_BATCH();
-+
-+		r500fp.check = check_r500fp;
-+		r500fp.cmd = _cmd;
-+		r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
-+		r500fp.cmd[1] = R500_INST_TYPE_OUT |
-+			R500_INST_TEX_SEM_WAIT |
-+			R500_INST_LAST |
-+			R500_INST_RGB_OMASK_R |
-+			R500_INST_RGB_OMASK_G |
-+			R500_INST_RGB_OMASK_B |
-+			R500_INST_ALPHA_OMASK |
-+			R500_INST_RGB_CLAMP |
-+			R500_INST_ALPHA_CLAMP;
-+		r500fp.cmd[2] = R500_RGB_ADDR0(0) |
-+			R500_RGB_ADDR1(0) |
-+			R500_RGB_ADDR1_CONST |
-+			R500_RGB_ADDR2(0) |
-+			R500_RGB_ADDR2_CONST;
-+		r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
-+			R500_ALPHA_ADDR1(0) |
-+			R500_ALPHA_ADDR1_CONST |
-+			R500_ALPHA_ADDR2(0) |
-+			R500_ALPHA_ADDR2_CONST;
-+		r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_A_R |
-+			R500_ALU_RGB_G_SWIZ_A_G |
-+			R500_ALU_RGB_B_SWIZ_A_B |
-+			R500_ALU_RGB_SEL_B_SRC0 |
-+			R500_ALU_RGB_R_SWIZ_B_R |
-+			R500_ALU_RGB_B_SWIZ_B_G |
-+			R500_ALU_RGB_G_SWIZ_B_B;
-+		r500fp.cmd[5] = R500_ALPHA_OP_CMP |
-+			R500_ALPHA_SWIZ_A_A |
-+			R500_ALPHA_SWIZ_B_A;
-+		r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
-+			R500_ALU_RGBA_R_SWIZ_0 |
-+			R500_ALU_RGBA_G_SWIZ_0 |
-+			R500_ALU_RGBA_B_SWIZ_0 |
-+			R500_ALU_RGBA_A_SWIZ_0;
-+		
-+		r500fp.cmd[7] = 0;
-+		emit_r500fp(r300, &r500fp);
- 	}
- 
--	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
--	e32(0x00000000);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-+	END_BATCH();
-+
- 	if (has_tcl) {
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
--	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
--		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
--	} else
--	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-+		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-+			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
-+	} else {
-+		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
- 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
- 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
-+	}
- 
- 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
--	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
--	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
--	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
- 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
- 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
--	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
- 	else
--	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-+
-+	R300_STATECHANGE(r300, vap_cntl);
- 
--	R300_STATECHANGE(rmesa, vap_cntl);
--	reg_start(R300_VAP_CNTL, 0);
--	e32(vap_cntl);
-+	BEGIN_BATCH(2);
-+	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
-+	END_BATCH();
- 
- 	if (has_tcl) {
-+        struct r300_state_atom vpu;
-+        uint32_t _cmd[10];
- 		R300_STATECHANGE(r300, pvs);
--		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
--
--		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
--		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
--		    (1 << R300_PVS_LAST_INST_SHIFT));
--		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
--		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
--		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
--
- 		R300_STATECHANGE(r300, vpi);
--		vsf_start_fragment(0x0, 8);
- 
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
--
--		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
--		e32(0x0);
-+		BEGIN_BATCH(4);
-+		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
-+		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
-+			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-+			  (1 << R300_PVS_LAST_INST_SHIFT));
-+		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
-+			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
-+		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
-+		END_BATCH();
-+
-+		vpu.check = check_vpu;
-+		vpu.cmd = _cmd;
-+		vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
-+
-+		vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
-+                                         0, 0xf, PVS_DST_REG_OUT);
-+		vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
-+                                      PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[4] = 0x0;
-+
-+		vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
-+                                         PVS_DST_REG_OUT);
-+		vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
-+                                      PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-+                                      PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
-+
-+                                      VSF_FLAG_NONE);
-+		vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_SELECT_FORCE_0,
-+                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
-+		vpu.cmd[8] = 0x0;
-+		emit_vpu(r300, &vpu);
- 	}
- }
- 
-@@ -468,9 +539,11 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- {
- 	r300ContextPtr r300 = R300_CONTEXT(ctx);
- 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
-+	GLframebuffer *fb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
-+	struct radeon_renderbuffer *rrbd;
- 	int flags = 0;
- 	int bits = 0;
--	int swapped;
- 
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "r300Clear\n");
-@@ -482,6 +555,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 			return;
- 	}
- 
-+	/* Flush swtcl vertices if necessary, because we will change hardware
-+	 * state during clear. See also the state-related comment in
-+	 * r300EmitClearState.
-+	 */
-+	R300_NEWPRIM(r300);
-+
- 	if (mask & BUFFER_BIT_FRONT_LEFT) {
- 		flags |= BUFFER_BIT_FRONT_LEFT;
- 		mask &= ~BUFFER_BIT_FRONT_LEFT;
-@@ -509,26 +588,28 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
- 		_swrast_Clear(ctx, mask);
- 	}
- 
--	swapped = r300->radeon.sarea->pfCurrentPage == 1;
--
- 	/* Make sure it fits there. */
- 	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
- 	if (flags || bits)
- 		r300EmitClearState(ctx);
-+    rrbd = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- 
- 	if (flags & BUFFER_BIT_FRONT_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
-+		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (flags & BUFFER_BIT_BACK_LEFT) {
--		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
-+		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
- 		bits = 0;
- 	}
- 
- 	if (bits)
--		r300ClearBuffer(r300, bits, 0);
-+		r300ClearBuffer(r300, bits, NULL, rrbd);
- 
-+	COMMIT_BATCH();
- }
- 
- void r300Flush(GLcontext * ctx)
-@@ -538,302 +619,13 @@ void r300Flush(GLcontext * ctx)
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	if (rmesa->dma.flush)
--		rmesa->dma.flush( rmesa );
--
--	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--}
--
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--
--void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
--{
--	struct r300_dma_buffer *dmabuf;
--	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
--	}
--
--	if (rmesa->dma.current.buf) {
--#ifdef USER_BUFFERS
--		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
--#endif
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--	}
--	if (rmesa->dma.nr_released_bufs > 4)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = (void *)1;	/* hack */
--	dmabuf->refcount = 1;
--
--	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--	if (dmabuf->id == 0) {
--		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		radeonWaitForIdleLocked(&rmesa->radeon);
--
--		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
--
--		UNLOCK_HARDWARE(&rmesa->radeon);
--
--		if (dmabuf->id == 0) {
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
--	rmesa->dma.current.end = size;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		r300_mem_free(rmesa, region->buf->id);
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
--	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
--
--#else
--static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
--{
--	struct r300_dma_buffer *dmabuf;
--	int fd = rmesa->radeon.dri.fd;
--	int index = 0;
--	int size = 0;
--	drmDMAReq dma;
--	int ret;
--
--	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--		fprintf(stderr, "%s\n", __FUNCTION__);
--
--	if (rmesa->dma.flush) {
--		rmesa->dma.flush(rmesa);
--	}
-+    if (rmesa->swtcl.flush) {
-+        rmesa->swtcl.flush(rmesa);
-+    }
- 
--	if (rmesa->dma.current.buf)
--		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
--
--	if (rmesa->dma.nr_released_bufs > 4)
-+	if (rmesa->cmdbuf.cs->cdw) {
- 		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	dma.context = rmesa->radeon.dri.hwContext;
--	dma.send_count = 0;
--	dma.send_list = NULL;
--	dma.send_sizes = NULL;
--	dma.flags = 0;
--	dma.request_count = 1;
--	dma.request_size = RADEON_BUFFER_SIZE;
--	dma.request_list = &index;
--	dma.request_sizes = &size;
--	dma.granted_count = 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
--
--	ret = drmDMA(fd, &dma);
--
--	if (ret != 0) {
--		/* Try to release some buffers and wait until we can't get any more */
--		if (rmesa->dma.nr_released_bufs) {
--			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
--		}
--
--		if (RADEON_DEBUG & DEBUG_DMA)
--			fprintf(stderr, "Waiting for buffers\n");
--
--		radeonWaitForIdleLocked(&rmesa->radeon);
--		ret = drmDMA(fd, &dma);
--
--		if (ret != 0) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			fprintf(stderr,
--				"Error: Could not get dma buffer... exiting\n");
--			_mesa_exit(-1);
--		}
--	}
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (RADEON_DEBUG & DEBUG_DMA)
--		fprintf(stderr, "Allocated buffer %d\n", index);
--
--	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
--	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
--	dmabuf->refcount = 1;
--
--	rmesa->dma.current.buf = dmabuf;
--	rmesa->dma.current.address = dmabuf->buf->address;
--	rmesa->dma.current.end = dmabuf->buf->total;
--	rmesa->dma.current.start = 0;
--	rmesa->dma.current.ptr = 0;
--}
--
--void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--			  struct r300_dma_region *region, const char *caller)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
--
--	if (!region->buf)
--		return;
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (--region->buf->refcount == 0) {
--		drm_radeon_cmd_header_t *cmd;
--
--		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
--			fprintf(stderr, "%s -- DISCARD BUF %d\n",
--				__FUNCTION__, region->buf->buf->idx);
--		cmd =
--		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
--								sizeof
--								(*cmd) / 4,
--								__FUNCTION__);
--		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
--		cmd->dma.buf_idx = region->buf->buf->idx;
--
--		FREE(region->buf);
--		rmesa->dma.nr_released_bufs++;
- 	}
--
--	region->buf = 0;
--	region->start = 0;
--}
--
--/* Allocates a region from rmesa->dma.current.  If there isn't enough
-- * space in current, grab a new buffer (and discard what was left of current)
-- */
--void r300AllocDmaRegion(r300ContextPtr rmesa,
--			struct r300_dma_region *region,
--			int bytes, int alignment)
--{
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
--
--	if (rmesa->dma.flush)
--		rmesa->dma.flush(rmesa);
--
--	if (region->buf)
--		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
--
--	alignment--;
--	rmesa->dma.current.start = rmesa->dma.current.ptr =
--	    (rmesa->dma.current.ptr + alignment) & ~alignment;
--
--	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
--		r300RefillCurrentDmaRegion(rmesa);
--
--	region->start = rmesa->dma.current.start;
--	region->ptr = rmesa->dma.current.start;
--	region->end = rmesa->dma.current.start + bytes;
--	region->address = rmesa->dma.current.address;
--	region->buf = rmesa->dma.current.buf;
--	region->buf->refcount++;
--
--	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
--	rmesa->dma.current.start =
--	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
--
--	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
--}
--
--#endif
--
--GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
--			   GLint size)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	int valid = (size >= 0 && offset >= 0
--		     && offset + size <
--		     rmesa->radeon.radeonScreen->gartTextures.size);
--
--	if (RADEON_DEBUG & DEBUG_IOCTL)
--		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
--			valid);
--
--	return valid;
--}
--
--GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
--{
--	int offset =
--	    (char *)pointer -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--
--	//fprintf(stderr, "offset=%08x\n", offset);
--
--	if (offset < 0
--	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
--		return ~0;
--	else
--		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
- }
- 
- void r300InitIoctlFuncs(struct dd_function_table *functions)
-diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
-index e1143fb..5f00264 100644
---- a/src/mesa/drivers/dri/r300/r300_ioctl.h
-+++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
-@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "radeon_drm.h"
- 
--extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
--				  const GLvoid * pointer, GLint size);
--
--extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
--					const GLvoid * pointer);
--
- extern void r300Flush(GLcontext * ctx);
- 
--extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
--				 struct r300_dma_region *region,
--				 const char *caller);
- extern void r300AllocDmaRegion(r300ContextPtr rmesa,
--			       struct r300_dma_region *region, int bytes,
--			       int alignment);
-+			       struct radeon_bo **pbo, int *poffset,
-+			       int bytes, int alignment);
- 
- extern void r300InitIoctlFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
-deleted file mode 100644
-index f8f9d4f..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.c
-+++ /dev/null
-@@ -1,385 +0,0 @@
--/*
-- * Copyright (C) 2005 Aapo Tahkola.
-- *
-- * All Rights Reserved.
-- *
-- * Permission is hereby granted, free of charge, to any person obtaining
-- * a copy of this software and associated documentation files (the
-- * "Software"), to deal in the Software without restriction, including
-- * without limitation the rights to use, copy, modify, merge, publish,
-- * distribute, sublicense, and/or sell copies of the Software, and to
-- * permit persons to whom the Software is furnished to do so, subject to
-- * the following conditions:
-- *
-- * The above copyright notice and this permission notice (including the
-- * next paragraph) shall be included in all copies or substantial
-- * portions of the Software.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-- *
-- */
--
--/**
-- * \file
-- *
-- * \author Aapo Tahkola <aet@rasterburn.org>
-- */
--
--#include <unistd.h>
--
--#include "r300_context.h"
--#include "r300_cmdbuf.h"
--#include "r300_ioctl.h"
--#include "r300_mem.h"
--#include "radeon_ioctl.h"
--
--#ifdef USER_BUFFERS
--
--static void resize_u_list(r300ContextPtr rmesa)
--{
--	void *temp;
--	int nsize;
--
--	temp = rmesa->rmm->u_list;
--	nsize = rmesa->rmm->u_size * 2;
--
--	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
--	_mesa_memset(rmesa->rmm->u_list, 0,
--		     nsize * sizeof(*rmesa->rmm->u_list));
--
--	if (temp) {
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--		_mesa_memcpy(rmesa->rmm->u_list, temp,
--			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
--		_mesa_free(temp);
--	}
--
--	rmesa->rmm->u_size = nsize;
--}
--
--void r300_mem_init(r300ContextPtr rmesa)
--{
--	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
--	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
--
--	rmesa->rmm->u_size = 128;
--	resize_u_list(rmesa);
--}
--
--void r300_mem_destroy(r300ContextPtr rmesa)
--{
--	_mesa_free(rmesa->rmm->u_list);
--	rmesa->rmm->u_list = NULL;
--
--	_mesa_free(rmesa->rmm);
--	rmesa->rmm = NULL;
--}
--
--void *r300_mem_ptr(r300ContextPtr rmesa, int id)
--{
--	assert(id <= rmesa->rmm->u_last);
--	return rmesa->rmm->u_list[id].ptr;
--}
--
--int r300_mem_find(r300ContextPtr rmesa, void *ptr)
--{
--	int i;
--
--	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
--		if (rmesa->rmm->u_list[i].ptr &&
--		    ptr >= rmesa->rmm->u_list[i].ptr &&
--		    ptr <
--		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
--			break;
--
--	if (i < rmesa->rmm->u_size + 1)
--		return i;
--
--	fprintf(stderr, "%p failed\n", ptr);
--	return 0;
--}
--
--//#define MM_DEBUG
--int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
--{
--	drm_radeon_mem_alloc_t alloc;
--	int offset = 0, ret;
--	int i, free = -1;
--	int done_age;
--	drm_radeon_mem_free_t memfree;
--	int tries = 0;
--	static int bytes_wasted = 0, allocated = 0;
--
--	if (size < 4096)
--		bytes_wasted += 4096 - size;
--
--	allocated += size;
--
--#if 0
--	static int t = 0;
--	if (t != time(NULL)) {
--		t = time(NULL);
--		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
--			rmesa->rmm->u_last, bytes_wasted / 1024,
--			allocated / 1024);
--	}
--#endif
--
--	memfree.region = RADEON_MEM_REGION_GART;
--
--      again:
--
--	done_age = radeonGetAge((radeonContextPtr) rmesa);
--
--	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
--		resize_u_list(rmesa);
--
--	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
--		if (rmesa->rmm->u_list[i].ptr == NULL) {
--			free = i;
--			continue;
--		}
--
--		if (rmesa->rmm->u_list[i].h_pending == 0 &&
--		    rmesa->rmm->u_list[i].pending
--		    && rmesa->rmm->u_list[i].age <= done_age) {
--			memfree.region_offset =
--			    (char *)rmesa->rmm->u_list[i].ptr -
--			    (char *)rmesa->radeon.radeonScreen->gartTextures.
--			    map;
--
--			ret =
--			    drmCommandWrite(rmesa->radeon.radeonScreen->
--					    driScreen->fd, DRM_RADEON_FREE,
--					    &memfree, sizeof(memfree));
--
--			if (ret) {
--				fprintf(stderr, "Failed to free at %p\n",
--					rmesa->rmm->u_list[i].ptr);
--				fprintf(stderr, "ret = %s\n", strerror(-ret));
--				exit(1);
--			} else {
--#ifdef MM_DEBUG
--				fprintf(stderr, "really freed %d at age %x\n",
--					i,
--					radeonGetAge((radeonContextPtr) rmesa));
--#endif
--				if (i == rmesa->rmm->u_last)
--					rmesa->rmm->u_last--;
--
--				if (rmesa->rmm->u_list[i].size < 4096)
--					bytes_wasted -=
--					    4096 - rmesa->rmm->u_list[i].size;
--
--				allocated -= rmesa->rmm->u_list[i].size;
--				rmesa->rmm->u_list[i].pending = 0;
--				rmesa->rmm->u_list[i].ptr = NULL;
--				free = i;
--			}
--		}
--	}
--	rmesa->rmm->u_head = i;
--
--	if (free == -1) {
--		WARN_ONCE("Ran out of slots!\n");
--		//usleep(100);
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		tries++;
--		if (tries > 100) {
--			WARN_ONCE("Ran out of slots!\n");
--			exit(1);
--		}
--		goto again;
--	}
--
--	alloc.region = RADEON_MEM_REGION_GART;
--	alloc.alignment = alignment;
--	alloc.size = size;
--	alloc.region_offset = &offset;
--
--	ret =
--	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
--				sizeof(alloc));
--	if (ret) {
--#if 0
--		WARN_ONCE("Ran out of mem!\n");
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--		//usleep(100);
--		tries2++;
--		tries = 0;
--		if (tries2 > 100) {
--			WARN_ONCE("Ran out of GART memory!\n");
--			exit(1);
--		}
--		goto again;
--#else
--		WARN_ONCE
--		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
--		     size);
--		return 0;
--#endif
--	}
--
--	i = free;
--
--	if (i > rmesa->rmm->u_last)
--		rmesa->rmm->u_last = i;
--
--	rmesa->rmm->u_list[i].ptr =
--	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
--	rmesa->rmm->u_list[i].size = size;
--	rmesa->rmm->u_list[i].age = 0;
--	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
--
--#ifdef MM_DEBUG
--	fprintf(stderr, "allocated %d at age %x\n", i,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	return i;
--}
--
--void r300_mem_use(r300ContextPtr rmesa, int id)
--{
--	uint64_t ull;
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	drm_r300_cmd_header_t *cmd;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	cmd =
--	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
--						      2 + sizeof(ull) / 4,
--						      __FUNCTION__);
--	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
--	cmd[0].scratch.reg = R300_MEM_SCRATCH;
--	cmd[0].scratch.n_bufs = 1;
--	cmd[0].scratch.flags = 0;
--	cmd++;
--
--	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
--	_mesa_memcpy(cmd, &ull, sizeof(ull));
--	cmd += sizeof(ull) / 4;
--
--	cmd[0].u = /*id */ 0;
--
--	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
--	rmesa->rmm->u_list[id].h_pending++;
--	UNLOCK_HARDWARE(&rmesa->radeon);
--}
--
--unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
--{
--	unsigned long offset;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	offset = (char *)rmesa->rmm->u_list[id].ptr -
--	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
--	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
--
--	return offset;
--}
--
--void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--	void *ptr;
--	int tries = 0;
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (access == R300_MEM_R) {
--
--		if (rmesa->rmm->u_list[id].mapped == 1)
--			WARN_ONCE("buffer %d already mapped\n", id);
--
--		rmesa->rmm->u_list[id].mapped = 1;
--		ptr = r300_mem_ptr(rmesa, id);
--
--		return ptr;
--	}
--
--	if (rmesa->rmm->u_list[id].h_pending)
--		r300FlushCmdBuf(rmesa, __FUNCTION__);
--
--	if (rmesa->rmm->u_list[id].h_pending) {
--		return NULL;
--	}
--
--	while (rmesa->rmm->u_list[id].age >
--	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
--		usleep(10);
--
--	if (tries >= 1000) {
--		fprintf(stderr, "Idling failed (%x vs %x)\n",
--			rmesa->rmm->u_list[id].age,
--			radeonGetAge((radeonContextPtr) rmesa));
--		return NULL;
--	}
--
--	if (rmesa->rmm->u_list[id].mapped == 1)
--		WARN_ONCE("buffer %d already mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 1;
--	ptr = r300_mem_ptr(rmesa, id);
--
--	return ptr;
--}
--
--void r300_mem_unmap(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (rmesa->rmm->u_list[id].mapped == 0)
--		WARN_ONCE("buffer %d not mapped\n", id);
--
--	rmesa->rmm->u_list[id].mapped = 0;
--}
--
--void r300_mem_free(r300ContextPtr rmesa, int id)
--{
--#ifdef MM_DEBUG
--	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
--		radeonGetAge((radeonContextPtr) rmesa));
--#endif
--
--	assert(id <= rmesa->rmm->u_last);
--
--	if (id == 0)
--		return;
--
--	if (rmesa->rmm->u_list[id].ptr == NULL) {
--		WARN_ONCE("Not allocated!\n");
--		return;
--	}
--
--	if (rmesa->rmm->u_list[id].pending) {
--		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
--		return;
--	}
--
--	rmesa->rmm->u_list[id].pending = 1;
--}
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
-deleted file mode 100644
-index 625a7f6..0000000
---- a/src/mesa/drivers/dri/r300/r300_mem.h
-+++ /dev/null
-@@ -1,37 +0,0 @@
--#ifndef __R300_MEM_H__
--#define __R300_MEM_H__
--
--//#define R300_MEM_PDL 0
--#define R300_MEM_UL 1
--
--#define R300_MEM_R 1
--#define R300_MEM_W 2
--#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
--
--#define R300_MEM_SCRATCH 2
--
--struct r300_memory_manager {
--	struct {
--		void *ptr;
--		uint32_t size;
--		uint32_t age;
--		uint32_t h_pending;
--		int pending;
--		int mapped;
--	} *u_list;
--	int u_head, u_size, u_last;
--
--};
--
--extern void r300_mem_init(r300ContextPtr rmesa);
--extern void r300_mem_destroy(r300ContextPtr rmesa);
--extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
--extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
--extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
--extern void r300_mem_use(r300ContextPtr rmesa, int id);
--extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
--extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
--extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
--extern void r300_mem_free(r300ContextPtr rmesa, int id);
--
--#endif
-diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.c b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
-new file mode 100644
-index 0000000..097f9cd
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.c
-@@ -0,0 +1,319 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#include "r300_mipmap_tree.h"
-+
-+#include <errno.h>
-+#include <unistd.h>
-+
-+#include "main/simple_list.h"
-+#include "main/texcompress.h"
-+#include "main/texformat.h"
-+
-+#include "radeon_buffer.h"
-+
-+static GLuint r300_compressed_texture_size(GLcontext *ctx,
-+		GLsizei width, GLsizei height, GLsizei depth,
-+		GLuint mesaFormat)
-+{
-+	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
-+
-+	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
-+	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
-+		if (width + 3 < 8)	/* width one block */
-+			size = size * 4;
-+		else if (width + 3 < 16)
-+			size = size * 2;
-+	} else {
-+		/* DXT3/5, 16 bytes per block */
-+		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
-+		if (width + 3 < 8)
-+			size = size * 2;
-+	}
-+
-+	return size;
-+}
-+
-+/**
-+ * Compute sizes and fill in offset and blit information for the given
-+ * image (determined by \p face and \p level).
-+ *
-+ * \param curOffset points to the offset at which the image is to be stored
-+ * and is updated by this function according to the size of the image.
-+ */
-+static void compute_tex_image_offset(r300_mipmap_tree *mt,
-+	GLuint face, GLuint level, GLuint* curOffset)
-+{
-+	r300_mipmap_level *lvl = &mt->levels[level];
-+
-+	/* Find image size in bytes */
-+	if (mt->compressed) {
-+		/* TODO: Is this correct? Need test cases for compressed textures! */
-+		GLuint align;
-+
-+		if (mt->target == GL_TEXTURE_RECTANGLE_NV)
-+			align = 64 / mt->bpp;
-+		else
-+			align = 32 / mt->bpp;
-+		lvl->rowstride = (lvl->width + align - 1) & ~(align - 1);
-+		lvl->size = r300_compressed_texture_size(mt->r300->radeon.glCtx,
-+			lvl->width, lvl->height, lvl->depth, mt->compressed);
-+	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
-+		lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
-+		lvl->size = lvl->rowstride * lvl->height;
-+	} else if (mt->tilebits & R300_TXO_MICRO_TILE) {
-+		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
-+		 * though the actual offset may be different (if texture is less than
-+		 * 32 bytes width) to the untiled case */
-+		lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
-+		lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
-+	} else {
-+		lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31;
-+		lvl->size = lvl->rowstride * lvl->height * lvl->depth;
-+	}
-+	assert(lvl->size > 0);
-+
-+	/* All images are aligned to a 32-byte offset */
-+	*curOffset = (*curOffset + 0x1f) & ~0x1f;
-+	lvl->faces[face].offset = *curOffset;
-+	*curOffset += lvl->size;
-+}
-+
-+static GLuint minify(GLuint size, GLuint levels)
-+{
-+	size = size >> levels;
-+	if (size < 1)
-+		size = 1;
-+	return size;
-+}
-+
-+static void calculate_miptree_layout(r300_mipmap_tree *mt)
-+{
-+	GLuint curOffset;
-+	GLuint numLevels;
-+	GLuint i;
-+
-+	numLevels = mt->lastLevel - mt->firstLevel + 1;
-+	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
-+
-+	curOffset = 0;
-+	for(i = 0; i < numLevels; i++) {
-+		GLuint face;
-+
-+		mt->levels[i].width = minify(mt->width0, i);
-+		mt->levels[i].height = minify(mt->height0, i);
-+		mt->levels[i].depth = minify(mt->depth0, i);
-+
-+		for(face = 0; face < mt->faces; face++)
-+			compute_tex_image_offset(mt, face, i, &curOffset);
-+	}
-+
-+	/* Note the required size in memory */
-+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
-+}
-+
-+
-+/**
-+ * Create a new mipmap tree, calculate its layout and allocate memory.
-+ */
-+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed)
-+{
-+	r300_mipmap_tree *mt = CALLOC_STRUCT(_r300_mipmap_tree);
-+
-+	mt->r300 = rmesa;
-+	mt->refcount = 1;
-+	mt->t = t;
-+	mt->target = target;
-+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-+	mt->firstLevel = firstLevel;
-+	mt->lastLevel = lastLevel;
-+	mt->width0 = width0;
-+	mt->height0 = height0;
-+	mt->depth0 = depth0;
-+	mt->bpp = bpp;
-+	mt->tilebits = tilebits;
-+	mt->compressed = compressed;
-+
-+	calculate_miptree_layout(mt);
-+
-+	mt->bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+                            0, mt->totalsize, 1024,
-+                            RADEON_GEM_DOMAIN_VRAM,
-+                            0);
-+
-+	return mt;
-+}
-+
-+void r300_miptree_reference(r300_mipmap_tree *mt)
-+{
-+	mt->refcount++;
-+	assert(mt->refcount > 0);
-+}
-+
-+void r300_miptree_unreference(r300_mipmap_tree *mt)
-+{
-+	if (!mt)
-+		return;
-+
-+	assert(mt->refcount > 0);
-+	mt->refcount--;
-+	if (!mt->refcount) {
-+		radeon_bo_unref(mt->bo);
-+		free(mt);
-+	}
-+}
-+
-+
-+static void calculate_first_last_level(struct gl_texture_object *tObj,
-+				       GLuint *pfirstLevel, GLuint *plastLevel)
-+{
-+	const struct gl_texture_image * const baseImage =
-+		tObj->Image[0][tObj->BaseLevel];
-+
-+	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
-+	* and having firstLevel and lastLevel as signed prevents the need for
-+	* extra sign checks.
-+	*/
-+	int   firstLevel;
-+	int   lastLevel;
-+
-+	/* Yes, this looks overly complicated, but it's all needed.
-+	*/
-+	switch (tObj->Target) {
-+	case GL_TEXTURE_1D:
-+	case GL_TEXTURE_2D:
-+	case GL_TEXTURE_3D:
-+	case GL_TEXTURE_CUBE_MAP:
-+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-+			*/
-+			firstLevel = lastLevel = tObj->BaseLevel;
-+		} else {
-+			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
-+			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
-+			firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
-+			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
-+			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
-+			lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
-+			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
-+			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-+		}
-+		break;
-+	case GL_TEXTURE_RECTANGLE_NV:
-+	case GL_TEXTURE_4D_SGIS:
-+		firstLevel = lastLevel = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	/* save these values */
-+	*pfirstLevel = firstLevel;
-+	*plastLevel = lastLevel;
-+}
-+
-+
-+/**
-+ * Checks whether the given miptree can hold the given texture image at the
-+ * given face and level.
-+ */
-+GLboolean r300_miptree_matches_image(r300_mipmap_tree *mt,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level)
-+{
-+	r300_mipmap_level *lvl;
-+
-+	if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
-+		return GL_FALSE;
-+
-+	if (texImage->TexFormat->TexelBytes != mt->bpp)
-+		return GL_FALSE;
-+
-+	lvl = &mt->levels[level - mt->firstLevel];
-+	if (lvl->width != texImage->Width ||
-+	    lvl->height != texImage->Height ||
-+	    lvl->depth != texImage->Depth)
-+		return GL_FALSE;
-+
-+	return GL_TRUE;
-+}
-+
-+
-+/**
-+ * Checks whether the given miptree has the right format to store the given texture object.
-+ */
-+GLboolean r300_miptree_matches_texture(r300_mipmap_tree *mt, struct gl_texture_object *texObj)
-+{
-+	struct gl_texture_image *firstImage;
-+	GLuint compressed;
-+	GLuint numfaces = 1;
-+	GLuint firstLevel, lastLevel;
-+
-+	calculate_first_last_level(texObj, &firstLevel, &lastLevel);
-+	if (texObj->Target == GL_TEXTURE_CUBE_MAP)
-+		numfaces = 6;
-+
-+	firstImage = texObj->Image[0][firstLevel];
-+	compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
-+
-+	return (mt->firstLevel == firstLevel &&
-+	        mt->lastLevel == lastLevel &&
-+	        mt->width0 == firstImage->Width &&
-+	        mt->height0 == firstImage->Height &&
-+	        mt->depth0 == firstImage->Depth &&
-+	        mt->bpp == firstImage->TexFormat->TexelBytes &&
-+	        mt->compressed == compressed);
-+}
-+
-+
-+/**
-+ * Try to allocate a mipmap tree for the given texture that will fit the
-+ * given image in the given position.
-+ */
-+void r300_try_alloc_miptree(r300ContextPtr rmesa, r300TexObj *t,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level)
-+{
-+	GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0;
-+	GLuint numfaces = 1;
-+	GLuint firstLevel, lastLevel;
-+
-+	assert(!t->mt);
-+
-+	calculate_first_last_level(&t->base, &firstLevel, &lastLevel);
-+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
-+		numfaces = 6;
-+
-+	if (level != firstLevel || face >= numfaces)
-+		return;
-+
-+	t->mt = r300_miptree_create(rmesa, t, t->base.Target,
-+		firstLevel, lastLevel,
-+		texImage->Width, texImage->Height, texImage->Depth,
-+		texImage->TexFormat->TexelBytes, t->tile_bits, compressed);
-+}
-diff --git a/src/mesa/drivers/dri/r300/r300_mipmap_tree.h b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
-new file mode 100644
-index 0000000..aeb52dc
---- /dev/null
-+++ b/src/mesa/drivers/dri/r300/r300_mipmap_tree.h
-@@ -0,0 +1,97 @@
-+/*
-+ * Copyright (C) 2008 Nicolai Haehnle.
-+ *
-+ * All Rights Reserved.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining
-+ * a copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sublicense, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial
-+ * portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
-+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ */
-+
-+#ifndef __R300_MIPMAP_TREE_H_
-+#define __R300_MIPMAP_TREE_H_
-+
-+#include "r300_context.h"
-+
-+typedef struct _r300_mipmap_tree r300_mipmap_tree;
-+typedef struct _r300_mipmap_level r300_mipmap_level;
-+typedef struct _r300_mipmap_image r300_mipmap_image;
-+
-+struct _r300_mipmap_image {
-+	GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
-+};
-+
-+struct _r300_mipmap_level {
-+	GLuint width;
-+	GLuint height;
-+	GLuint depth;
-+	GLuint size; /** Size of each image, in bytes */
-+	GLuint rowstride; /** in bytes */
-+	r300_mipmap_image faces[6];
-+};
-+
-+
-+/**
-+ * A mipmap tree contains texture images in the layout that the hardware
-+ * expects.
-+ *
-+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
-+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
-+ * changed.
-+ */
-+struct _r300_mipmap_tree {
-+	r300ContextPtr r300;
-+	r300TexObj *t;
-+	struct radeon_bo *bo;
-+	GLuint refcount;
-+
-+	GLuint totalsize; /** total size of the miptree, in bytes */
-+
-+	GLenum target; /** GL_TEXTURE_xxx */
-+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
-+	GLuint firstLevel; /** First mip level stored in this mipmap tree */
-+	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
-+
-+	GLuint width0; /** Width of firstLevel image */
-+	GLuint height0; /** Height of firstLevel image */
-+	GLuint depth0; /** Depth of firstLevel image */
-+
-+	GLuint bpp; /** Bytes per texel */
-+	GLuint tilebits; /** R300_TXO_xxx_TILE */
-+	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
-+
-+	r300_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
-+};
-+
-+r300_mipmap_tree* r300_miptree_create(r300ContextPtr rmesa, r300TexObj *t,
-+		GLenum target, GLuint firstLevel, GLuint lastLevel,
-+		GLuint width0, GLuint height0, GLuint depth0,
-+		GLuint bpp, GLuint tilebits, GLuint compressed);
-+void r300_miptree_reference(r300_mipmap_tree *mt);
-+void r300_miptree_unreference(r300_mipmap_tree *mt);
-+
-+GLboolean r300_miptree_matches_image(r300_mipmap_tree *mt,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level);
-+GLboolean r300_miptree_matches_texture(r300_mipmap_tree *mt, struct gl_texture_object *texObj);
-+void r300_try_alloc_miptree(r300ContextPtr rmesa, r300TexObj *t,
-+		struct gl_texture_image *texImage, GLuint face, GLuint level);
-+
-+
-+#endif /* __R300_MIPMAP_TREE_H_ */
-diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
-index 778db96..8b3fe43 100644
---- a/src/mesa/drivers/dri/r300/r300_reg.h
-+++ b/src/mesa/drivers/dri/r300/r300_reg.h
-@@ -1525,6 +1525,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #	define R500_SEL_FILTER4_TC3		 (3 << 18)
- 
- #define R300_TX_OFFSET_0                    0x4540
-+#define R300_TX_OFFSET_1                    0x4544
-+#define R300_TX_OFFSET_2                    0x4548
-+#define R300_TX_OFFSET_3                    0x454C
-+#define R300_TX_OFFSET_4                    0x4550
-+#define R300_TX_OFFSET_5                    0x4554
-+#define R300_TX_OFFSET_6                    0x4558
-+#define R300_TX_OFFSET_7                    0x455C
- 	/* BEGIN: Guess from R200 */
- #       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0)
- #       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0)
-diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
-index 292f87a..ccc00b3 100644
---- a/src/mesa/drivers/dri/r300/r300_render.c
-+++ b/src/mesa/drivers/dri/r300/r300_render.c
-@@ -175,89 +175,163 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
- static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
- 	void *out;
- 
--	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
--		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
--		rvb->start = ((char *)elts) - rvb->address;
--		rvb->aos_offset =
--		    rmesa->radeon.radeonScreen->gart_texture_offset +
--		    rvb->start;
--		return;
--	} else if (r300IsGartMemory(rmesa, elts, 1)) {
--		WARN_ONCE("Pointer not within GART memory!\n");
--		_mesa_exit(-1);
--	}
--
--	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
--	rvb->aos_offset = GET_START(rvb);
--
--	out = rvb->address + rvb->start;
-+	rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+                                             0, n_elts * 4, 4,
-+                                             RADEON_GEM_DOMAIN_GTT, 0);
-+    rmesa->state.elt_dma_offset = 0;
-+    radeon_bo_map(rmesa->state.elt_dma_bo, 1);
-+	out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
- 	memcpy(out, elts, n_elts * 4);
-+    radeon_bo_unmap(rmesa->state.elt_dma_bo);
- }
- 
--static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
--		       int vertex_count, int type)
-+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
--	e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
--	e32(addr);
--	e32(vertex_count);
-+	BATCH_LOCALS(rmesa);
-+
-+    if (vertex_count > 0) {
-+    	BEGIN_BATCH(8);
-+    	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-+    	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-+                  ((vertex_count + 0) << 16) |
-+                  type |
-+                  R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
-+
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
-+    	OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+	    OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
-+    	OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
-+                        rmesa->state.elt_dma_bo,
-+                        rmesa->state.elt_dma_offset,
-+                        RADEON_GEM_DOMAIN_GTT, 0, 0);
-+        OUT_BATCH(vertex_count);
-+    } else {
-+    	OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
-+	    OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
-+        OUT_BATCH(rmesa->state.elt_dma_offset);
-+        OUT_BATCH(vertex_count);
-+        radeon_cs_write_reloc(rmesa->cmdbuf.cs,
-+                              rmesa->state.elt_dma_bo,
-+                              RADEON_GEM_DOMAIN_GTT, 0, 0);
-+    }
-+    	END_BATCH();
-+    }
- }
- 
- static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
- {
-+	BATCH_LOCALS(rmesa);
-+    uint32_t voffset;
- 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
- 	int i;
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	if (RADEON_DEBUG & DEBUG_VERTS)
- 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
- 			offset);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
--	e32(nr);
-+	BEGIN_BATCH(sz+2);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
-+	OUT_BATCH(nr);
- 
-+    
-+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
- 	for (i = 0; i + 1 < nr; i += 2) {
--		e32((rmesa->state.aos[i].aos_size << 0) |
--		    (rmesa->state.aos[i].aos_stride << 8) |
--		    (rmesa->state.aos[i + 1].aos_size << 16) |
--		    (rmesa->state.aos[i + 1].aos_stride << 24));
-+		OUT_BATCH((rmesa->state.aos[i].components << 0) |
-+			  (rmesa->state.aos[i].stride << 8) |
-+			  (rmesa->state.aos[i + 1].components << 16) |
-+			  (rmesa->state.aos[i + 1].stride << 24));
-+
-+        voffset =  rmesa->state.aos[i + 0].offset +
-+                   offset * 4 * rmesa->state.aos[i + 0].stride;
-+		OUT_BATCH_RELOC(voffset,
-+                        rmesa->state.aos[i].bo,
-+                        voffset,
-+                        RADEON_GEM_DOMAIN_GTT,
-+                        0, 0);
-+        voffset =  rmesa->state.aos[i + 1].offset +
-+                   offset * 4 * rmesa->state.aos[i + 1].stride;
-+		OUT_BATCH_RELOC(voffset,
-+                        rmesa->state.aos[i+1].bo,
-+                        voffset,
-+                        RADEON_GEM_DOMAIN_GTT,
-+                        0, 0);
-+	}
- 
--		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
--		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
-+	if (nr & 1) {
-+		OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
-+			  (rmesa->state.aos[nr - 1].stride << 8));
-+        voffset =  rmesa->state.aos[nr - 1].offset +
-+                   offset * 4 * rmesa->state.aos[nr - 1].stride;
-+		OUT_BATCH_RELOC(voffset,
-+                        rmesa->state.aos[nr - 1].bo,
-+			            voffset,
-+                        RADEON_GEM_DOMAIN_GTT,
-+                        0, 0);
-+	}
-+    } else {
-+	for (i = 0; i + 1 < nr; i += 2) {
-+		OUT_BATCH((rmesa->state.aos[i].components << 0) |
-+			  (rmesa->state.aos[i].stride << 8) |
-+			  (rmesa->state.aos[i + 1].components << 16) |
-+			  (rmesa->state.aos[i + 1].stride << 24));
-+
-+        voffset =  rmesa->state.aos[i + 0].offset +
-+                   offset * 4 * rmesa->state.aos[i + 0].stride;
-+		OUT_BATCH(voffset);
-+        voffset =  rmesa->state.aos[i + 1].offset +
-+                   offset * 4 * rmesa->state.aos[i + 1].stride;
-+		OUT_BATCH(voffset);
- 	}
- 
- 	if (nr & 1) {
--		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
--		    (rmesa->state.aos[nr - 1].aos_stride << 8));
--		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
-+		OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
-+			  (rmesa->state.aos[nr - 1].stride << 8));
-+        voffset =  rmesa->state.aos[nr - 1].offset +
-+                   offset * 4 * rmesa->state.aos[nr - 1].stride;
-+		OUT_BATCH(voffset);
-+	}
-+	for (i = 0; i + 1 < nr; i += 2) {
-+        voffset =  rmesa->state.aos[i + 0].offset +
-+                   offset * 4 * rmesa->state.aos[i + 0].stride;
-+        radeon_cs_write_reloc(rmesa->cmdbuf.cs,
-+                              rmesa->state.aos[i+0].bo,
-+                              RADEON_GEM_DOMAIN_GTT,
-+                              0, 0);
-+        voffset =  rmesa->state.aos[i + 1].offset +
-+                   offset * 4 * rmesa->state.aos[i + 1].stride;
-+        radeon_cs_write_reloc(rmesa->cmdbuf.cs,
-+                              rmesa->state.aos[i+1].bo,
-+                              RADEON_GEM_DOMAIN_GTT,
-+                              0, 0);
-+	}
-+	if (nr & 1) {
-+        voffset =  rmesa->state.aos[nr - 1].offset +
-+                   offset * 4 * rmesa->state.aos[nr - 1].stride;
-+        radeon_cs_write_reloc(rmesa->cmdbuf.cs,
-+                              rmesa->state.aos[nr-1].bo,
-+                              RADEON_GEM_DOMAIN_GTT,
-+                              0, 0);
- 	}
-+    }
-+	END_BATCH();
- }
- 
- static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
--	drm_radeon_cmd_header_t *cmd = NULL;
-+	BATCH_LOCALS(rmesa);
- 
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
-+	END_BATCH();
- }
- 
- static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 				   int start, int end, int prim)
- {
-+	BATCH_LOCALS(rmesa);
- 	int type, num_verts;
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *vb = &tnl->vb;
-@@ -268,6 +342,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 	if (type < 0 || num_verts <= 0)
- 		return;
- 
-+	/* Make space for at least 64 dwords.
-+	 * This is supposed to ensure that we can get all rendering
-+	 * commands into a single command buffer.
-+	 */
-+	r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
-+
- 	if (vb->Elts) {
- 		if (num_verts > 65535) {
- 			/* not implemented yet */
-@@ -287,11 +367,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
- 		 */
- 		r300EmitElts(ctx, vb->Elts, num_verts);
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
--		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
-+		r300FireEB(rmesa, num_verts, type);
- 	} else {
- 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
- 		r300FireAOS(rmesa, num_verts, type);
- 	}
-+	COMMIT_BATCH();
- }
- 
- static GLboolean r300RunRender(GLcontext * ctx,
-@@ -302,7 +383,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	struct vertex_buffer *vb = &tnl->vb;
- 
--
- 	if (RADEON_DEBUG & DEBUG_PRIMS)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
- 
-@@ -324,10 +404,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
- 
- 	r300EmitCacheFlush(rmesa);
- 
--#ifdef USER_BUFFERS
--	r300UseArrays(ctx);
--#endif
--
- 	r300ReleaseArrays(ctx);
- 
- 	return GL_FALSE;
-diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
-index 6a5c363..c79e69a 100644
---- a/src/mesa/drivers/dri/r300/r300_state.c
-+++ b/src/mesa/drivers/dri/r300/r300_state.c
-@@ -55,6 +55,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "radeon_ioctl.h"
- #include "radeon_state.h"
-+#include "radeon_buffer.h"
- #include "r300_context.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
-@@ -1100,10 +1101,19 @@ static void r300UpdateWindow(GLcontext * ctx)
- static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
- 			 GLsizei width, GLsizei height)
- {
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+    __DRIcontext *driContext = rmesa->radeon.dri.context;
- 	/* Don't pipeline viewport changes, conflict with window offset
- 	 * setting below.  Could apply deltas to rescue pipelined viewport
- 	 * values, or keep the originals hanging around.
- 	 */
-+    if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled) {
-+        radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
-+        if (driContext->driDrawablePriv != driContext->driReadablePriv) {
-+            radeon_update_renderbuffers(driContext,
-+                                        driContext->driReadablePriv);
-+        }
-+    }
- 	r300UpdateWindow(ctx);
- }
- 
-@@ -1144,55 +1154,25 @@ void r300UpdateViewportOffset(GLcontext * ctx)
- void r300UpdateDrawBuffer(GLcontext * ctx)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	r300ContextPtr r300 = rmesa;
- 	struct gl_framebuffer *fb = ctx->DrawBuffer;
--	driRenderbuffer *drb;
-+	struct radeon_renderbuffer *rrb;
- 
- 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
- 		/* draw to front */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
--		    Renderbuffer;
-+		rrb =
-+		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
- 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- 		/* draw to back */
--		drb =
--		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
--		    Renderbuffer;
-+		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- 	} else {
- 		/* drawing to multiple buffers, or none */
- 		return;
- 	}
- 
--	assert(drb);
--	assert(drb->flippedPitch);
-+	assert(rrb);
-+	assert(rrb->pitch);
- 
- 	R300_STATECHANGE(rmesa, cb);
--
--	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--#if 0
--	R200_STATECHANGE(rmesa, ctx);
--
--	/* Note: we used the (possibly) page-flipped values */
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
--	    = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
--	       & R200_COLOROFFSET_MASK);
--	rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
--
--	if (rmesa->sarea->tiling_enabled) {
--		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
--		    R200_COLOR_TILE_ENABLE;
--	}
--#endif
- }
- 
- static void
-@@ -1412,7 +1392,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
- 	}
- 
- 	r300->hw.fpt.cmd[R300_FPT_CMD_0] =
--		cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
-+		cmdpacket0(r300->radeon.radeonScreen,
-+                   R300_US_TEX_INST_0, code->tex.length);
- }
- 
- static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
-@@ -1497,14 +1478,9 @@ static void r300SetupTextures(GLcontext * ctx)
- 	/* We cannot let disabled tmu offsets pass DRM */
- 	for (i = 0; i < mtu; i++) {
- 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
--
--#if 0				/* Enables old behaviour */
--			hw_tmu = i;
--#endif
- 			tmu_mappings[i] = hw_tmu;
- 
--			t = r300->state.texture.unit[i].texobj;
--			/* XXX questionable fix for bug 9170: */
-+			t = r300_tex_obj(ctx->Texture.Unit[i]._Current);
- 			if (!t)
- 				continue;
- 
-@@ -1530,21 +1506,20 @@ static void r300SetupTextures(GLcontext * ctx)
- 			 */
- 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 				t->filter_1 |
--				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
-+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
- 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 			    t->size;
- 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
- 						hw_tmu] = t->format;
- 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
- 			    t->pitch_reg;
--			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
--						hw_tmu] = t->offset;
-+			r300->hw.textures[hw_tmu] = t;
- 
--			if (t->offset & R300_TXO_MACRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
- 				WARN_ONCE("macro tiling enabled!\n");
- 			}
- 
--			if (t->offset & R300_TXO_MICRO_TILE) {
-+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
- 				WARN_ONCE("micro tiling enabled!\n");
- 			}
- 
-@@ -1561,21 +1536,21 @@ static void r300SetupTextures(GLcontext * ctx)
- 	}
- 
- 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
- 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
- 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
- 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
- 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
- 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
- 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
- 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
--	    cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
-+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
- 
- 	if (!fp)		/* should only happenen once, just after context is created */
- 		return;
-@@ -1587,7 +1562,7 @@ static void r300SetupTextures(GLcontext * ctx)
- 			r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
- 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
- 			r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
--				cmdpacket0(R300_TX_FILTER0_0, 1);
-+				cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
- 		}
- 		r300SetupFragmentShaderTextures(ctx, tmu_mappings);
- 	} else
-@@ -1749,7 +1724,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
- 	  | R300_HIRES_EN;
- 
- 	assert(high_rr >= 0);
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1);
- 	r300->hw.rc.cmd[2] = high_rr;
- 
- 	if (InputsRead)
-@@ -1909,7 +1884,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
- 	  | R300_HIRES_EN;
- 
- 	assert(high_rr >= 0);
--	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
-+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1);
- 	r300->hw.rc.cmd[2] = 0xC0 | high_rr;
- 
- 	if (InputsRead)
-@@ -2107,6 +2082,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
- 	  (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
- }
- 
-+
- static void r300SetupVertexProgram(r300ContextPtr rmesa)
- {
- 	GLcontext *ctx = rmesa->radeon.glCtx;
-@@ -2193,6 +2169,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
- static void r300ResetHwState(r300ContextPtr r300)
- {
- 	GLcontext *ctx = r300->radeon.glCtx;
-+	struct radeon_renderbuffer *rrb;
- 	int has_tcl = 1;
- 
- 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
-@@ -2223,8 +2200,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300UpdateCulling(ctx);
- 
--	r300UpdateTextureState(ctx);
--
- 	r300SetBlendState(ctx);
- 	r300SetLogicOpState(ctx);
- 
-@@ -2371,20 +2346,6 @@ static void r300ResetHwState(r300ContextPtr r300)
- 
- 	r300BlendColor(ctx, ctx->Color.BlendColor);
- 
--	/* Again, r300ClearBuffer uses this */
--	r300->hw.cb.cmd[R300_CB_OFFSET] =
--	    r300->radeon.state.color.drawOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--
--	if (r300->radeon.radeonScreen->cpp == 4)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--	else
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--
--	if (r300->radeon.sarea->tiling_enabled)
--		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--
- 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
- 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
-@@ -2400,12 +2361,8 @@ static void r300ResetHwState(r300ContextPtr r300)
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
- 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
- 
--	r300->hw.zb.cmd[R300_ZB_OFFSET] =
--	    r300->radeon.radeonScreen->depthOffset +
--	    r300->radeon.radeonScreen->fbLocation;
--	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
--
--	if (r300->radeon.sarea->tiling_enabled) {
-+	rrb = r300->radeon.state.depth_buffer;
-+    if (rrb && rrb->bo && (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)) {
- 		/* XXX: Turn off when clearing buffers ? */
- 		r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
- 
-@@ -2539,10 +2496,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	R300_STATECHANGE(rmesa, fpi[1]);
- 	R300_STATECHANGE(rmesa, fpi[2]);
- 	R300_STATECHANGE(rmesa, fpi[3]);
--	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
--	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
--	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
--	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
-+	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
-+	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
-+	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
- 	for (i = 0; i < code->alu.length; i++) {
- 		rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
- 		rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
-@@ -2573,7 +2530,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
- 	}
- 
- 	R300_STATECHANGE(rmesa, fpp);
--	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
-+	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
- 	for (i = 0; i < code->const_nr; i++) {
- 		const GLfloat *constant = get_fragmentprogram_constant(ctx,
- 			&fp->mesa_program.Base, code->constant[i]);
-@@ -2675,7 +2632,7 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
- 	GLcontext *ctx;
- 	ctx = rmesa->radeon.glCtx;
- 
--	r300UpdateTextureState(ctx);
-+	r300ValidateTextures(ctx);
- 	r300SetEarlyZState(ctx);
- 
- 	GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
-diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
-index 0589ab7..4d0a25f 100644
---- a/src/mesa/drivers/dri/r300/r300_state.h
-+++ b/src/mesa/drivers/dri/r300/r300_state.h
-@@ -39,8 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define R300_NEWPRIM( rmesa )			\
-   do {						\
--    if ( rmesa->dma.flush )			\
--      rmesa->dma.flush( rmesa );		\
-   } while (0)
- 
- #define R300_STATECHANGE(r300, atom) \
-@@ -57,13 +55,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-    TODO: This has not been implemented yet
-  */
- #define R300_FIREVERTICES( r300 )			\
--do {							\
--    \
--   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
--      r300Flush( (r300)->radeon.glCtx );		\
--   }							\
--    \
--} while (0)
-+    do {							\
-+        r300Flush( (r300)->radeon.glCtx );		\
-+    } while (0)
- 
- // r300_state.c
- extern int future_hw_tcl_on;
-diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
-index b6e7ce1..fbfa8f4 100644
---- a/src/mesa/drivers/dri/r300/r300_swtcl.c
-+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
-@@ -56,12 +56,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "r300_emit.h"
--#include "r300_mem.h"
- 
- static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
- 
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset);
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
- #define EMIT_ATTR( ATTR, STYLE )					\
- do {									\
-@@ -86,7 +85,6 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	DECLARE_RENDERINPUTS(index_bitset);
- 	GLuint InputsRead = 0, OutputsWritten = 0;
- 	int vap_fmt_0 = 0;
--	int vap_vte_cntl = 0;
- 	int offset = 0;
- 	int vte = 0;
- 	GLint inputs[VERT_ATTRIB_MAX];
-@@ -175,7 +173,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 			inputs[i] = -1;
- 		}
- 	}
--	
-+
- 	/* Fixed, apply to vir0 only */
- 	if (InputsRead & (1 << VERT_ATTRIB_POS))
- 		inputs[VERT_ATTRIB_POS] = 0;
-@@ -186,16 +184,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
- 		if (InputsRead & (1 << i))
- 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
--	
-+
- 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
- 		if (InputsRead & (1 << i)) {
- 			tab[nr++] = i;
- 		}
- 	}
--	
-+
- 	for (i = 0; i < nr; i++) {
- 		int ci;
--		
-+
- 		swizzle[i][0] = SWIZZLE_ZERO;
- 		swizzle[i][1] = SWIZZLE_ZERO;
- 		swizzle[i][2] = SWIZZLE_ZERO;
-@@ -215,21 +213,21 @@ static void r300SetVertexFormat( GLcontext *ctx )
- 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
- 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
- 				   nr);
--   
-+
- 	R300_STATECHANGE(rmesa, vic);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
- 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
--   
-+
- 	R300_STATECHANGE(rmesa, vof);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
- 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
--   
-+
- 	rmesa->swtcl.vertex_size =
- 		_tnl_install_attrs( ctx,
--				    rmesa->swtcl.vertex_attrs, 
-+				    rmesa->swtcl.vertex_attrs,
- 				    rmesa->swtcl.vertex_attr_count,
- 				    NULL, 0 );
--	
-+
- 	rmesa->swtcl.vertex_size /= 4;
- 
- 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
-@@ -247,37 +245,22 @@ static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
- {
- 	if (RADEON_DEBUG & DEBUG_IOCTL)
- 		fprintf(stderr, "%s\n", __FUNCTION__);
--	
--	rmesa->dma.flush = NULL;
--
--	if (rmesa->dma.current.buf) {
--		struct r300_dma_region *current = &rmesa->dma.current;
--		GLuint current_offset = GET_START(current);
--
--		assert (current->start + 
--			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--			current->ptr);
--
--		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
--
--			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
--			
--			r300EmitState(rmesa);
--			
--			r300EmitVertexAOS( rmesa,
--					   rmesa->swtcl.vertex_size,
--					   current_offset);
--			
--			r300EmitVbufPrim( rmesa,
--					  rmesa->swtcl.hw_primitive,
--					  rmesa->swtcl.numverts);
--			
--			r300EmitCacheFlush(rmesa);
--		}
--		
--		rmesa->swtcl.numverts = 0;
--		current->start = current->ptr;
--	}
-+    rmesa->swtcl.flush = NULL;
-+    radeon_bo_unmap(rmesa->swtcl.bo);
-+    r300EnsureCmdBufSpace(rmesa,
-+                          rmesa->hw.max_state_size + (12*sizeof(int)),
-+                          __FUNCTION__);
-+    r300EmitState(rmesa);
-+    r300EmitVertexAOS(rmesa,
-+                      rmesa->swtcl.vertex_size,
-+                      rmesa->swtcl.bo,
-+                      0);
-+    r300EmitVbufPrim(rmesa,
-+                     rmesa->swtcl.hw_primitive,
-+                     rmesa->swtcl.numverts);
-+    r300EmitCacheFlush(rmesa);
-+    COMMIT_BATCH();
-+    rmesa->swtcl.numverts = 0;
- }
- 
- /* Alloc space in the current dma region.
-@@ -287,26 +270,14 @@ r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
- {
- 	GLuint bytes = vsize * nverts;
- 
--	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
--		r300RefillCurrentDmaRegion( rmesa, bytes);
--
--	if (!rmesa->dma.flush) {
--		rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
--		rmesa->dma.flush = flush_last_swtcl_prim;
--	}
--
--	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
--	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
--	ASSERT( rmesa->dma.current.start + 
--		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
--		rmesa->dma.current.ptr );
--
--	{
--		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
--		rmesa->dma.current.ptr += bytes;
--		rmesa->swtcl.numverts += nverts;
--		return head;
--	}
-+	rmesa->swtcl.bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
-+                                     0, bytes, 4, RADEON_GEM_DOMAIN_GTT, 0);
-+    radeon_bo_map(rmesa->swtcl.bo, 1);
-+    if (rmesa->swtcl.flush == NULL) {
-+        rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-+        rmesa->swtcl.flush = flush_last_swtcl_prim;
-+    }
-+    return rmesa->swtcl.bo->ptr;
- }
- 
- static GLuint reduced_prim[] = {
-@@ -352,7 +323,7 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
-    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
-    const char *r300verts = (char *)rmesa->swtcl.verts;
- #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
--#define VERTEX r300Vertex 
-+#define VERTEX r300Vertex
- #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
- #define PRINT_VERTEX(x)
- #undef TAG
-@@ -572,18 +543,16 @@ static void r300RenderStart(GLcontext *ctx)
-         r300ContextPtr rmesa = R300_CONTEXT( ctx );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
- 
--	r300ChooseRenderState(ctx);	
-+	r300ChooseRenderState(ctx);
- 	r300SetVertexFormat(ctx);
- 
- 	r300UpdateShaders(rmesa);
- 	r300UpdateShaderStates(rmesa);
- 
- 	r300EmitCacheFlush(rmesa);
--	
--	if (rmesa->dma.flush != 0 && 
--	    rmesa->dma.flush != flush_last_swtcl_prim)
--		rmesa->dma.flush( rmesa );
--
-+    if (rmesa->swtcl.flush != NULL) {
-+        rmesa->swtcl.flush(rmesa);
-+    }
- }
- 
- static void r300RenderFinish(GLcontext *ctx)
-@@ -593,7 +562,7 @@ static void r300RenderFinish(GLcontext *ctx)
- static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	
-+
- 	if (rmesa->swtcl.hw_primitive != hwprim) {
- 	        R300_NEWPRIM( rmesa );
- 		rmesa->swtcl.hw_primitive = hwprim;
-@@ -611,7 +580,7 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
- 
- 	r300RasterPrimitive( ctx, reduced_prim[prim] );
- 	//	fprintf(stderr, "%s\n", __FUNCTION__);
--	
-+
- }
- 
- static void r300ResetLineStipple(GLcontext *ctx)
-@@ -625,12 +594,12 @@ void r300InitSwtcl(GLcontext *ctx)
- 	TNLcontext *tnl = TNL_CONTEXT(ctx);
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
- 	static int firsttime = 1;
--	
-+
- 	if (firsttime) {
- 		init_rast_tab();
- 		firsttime = 0;
- 	}
--	
-+
- 	tnl->Driver.Render.Start = r300RenderStart;
- 	tnl->Driver.Render.Finish = r300RenderFinish;
- 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
-@@ -638,15 +607,15 @@ void r300InitSwtcl(GLcontext *ctx)
- 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
- 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
- 	tnl->Driver.Render.Interp = _tnl_interp;
--	
-+
- 	/* FIXME: what are these numbers? */
--	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
-+	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
- 			    48 * sizeof(GLfloat) );
--	
-+
- 	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- 	rmesa->swtcl.RenderIndex = ~0;
- 	rmesa->swtcl.render_primitive = GL_TRIANGLES;
--	rmesa->swtcl.hw_primitive = 0;	
-+	rmesa->swtcl.hw_primitive = 0;
- 
- 	_tnl_invalidate_vertex_state( ctx, ~0 );
- 	_tnl_invalidate_vertices( ctx, ~0 );
-@@ -655,9 +624,9 @@ void r300InitSwtcl(GLcontext *ctx)
- 	_tnl_need_projected_coords( ctx, GL_FALSE );
- 	r300ChooseRenderState(ctx);
- 
--	_mesa_validate_all_lighting_tables( ctx ); 
-+	_mesa_validate_all_lighting_tables( ctx );
- 
--	tnl->Driver.NotifyMaterialChange = 
-+	tnl->Driver.NotifyMaterialChange =
- 	  _mesa_validate_all_lighting_tables;
- }
- 
-@@ -665,33 +634,32 @@ void r300DestroySwtcl(GLcontext *ctx)
- {
- }
- 
--void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
-+void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
- {
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(rmesa);
- 
--	drm_radeon_cmd_header_t *cmd = NULL;
- 	if (RADEON_DEBUG & DEBUG_VERTS)
--	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
--		  __FUNCTION__, vertex_size, offset);
--
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
--	e32(1);
--	e32(vertex_size | (vertex_size << 8));
--	e32(offset);
-+		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
-+			__FUNCTION__, vertex_size, offset);
-+
-+	BEGIN_BATCH(5);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
-+	OUT_BATCH(1);
-+	OUT_BATCH(vertex_size | (vertex_size << 8));
-+	OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
-+	END_BATCH();
- }
- 
- void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
- {
--
--	int cmd_reserved = 0;
--	int cmd_written = 0;
-+	BATCH_LOCALS(rmesa);
- 	int type, num_verts;
--	drm_radeon_cmd_header_t *cmd = NULL;
- 
- 	type = r300PrimitiveType(rmesa, primitive);
- 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
--	
--	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
--	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+
-+	BEGIN_BATCH(3);
-+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
-+	END_BATCH();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
-index 8ab382c..9ceac70 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.c
-+++ b/src/mesa/drivers/dri/r300/r300_tex.c
-@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "main/context.h"
- #include "main/enums.h"
- #include "main/image.h"
-+#include "main/mipmap.h"
- #include "main/simple_list.h"
- #include "main/texformat.h"
- #include "main/texstore.h"
-@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_ioctl.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_tex.h"
- 
- #include "xmlpool.h"
-@@ -79,7 +81,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
-  */
- static void r300UpdateTexWrap(r300TexObjPtr t)
- {
--	struct gl_texture_object *tObj = t->base.tObj;
-+	struct gl_texture_object *tObj = &t->base;
- 
- 	t->filter &=
- 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
-@@ -119,6 +121,9 @@ static GLuint aniso_filter(GLfloat anisotropy)
-  */
- static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
- {
-+	/* Force revalidation to account for switches from/to mipmapping. */
-+	t->validated = GL_FALSE;
-+
- 	t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
- 	t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
- 
-@@ -176,39 +181,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
- 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
- }
- 
--/**
-- * Allocate space for and load the mesa images into the texture memory block.
-- * This will happen before drawing with a new texture, or drawing with a
-- * texture after it was swapped out or teximaged again.
-- */
--
--static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
--{
--	r300TexObjPtr t;
--
--	t = CALLOC_STRUCT(r300_tex_obj);
--	texObj->DriverData = t;
--	if (t != NULL) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE) {
--			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--				(void *)texObj, (void *)t);
--		}
--
--		/* Initialize non-image-dependent parts of the state:
--		 */
--		t->base.tObj = texObj;
--		t->border_fallback = GL_FALSE;
--
--		make_empty_list(&t->base);
--
--		r300UpdateTexWrap(t);
--		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
--		r300SetTexBorderColor(t, texObj->_BorderChan);
--	}
--
--	return t;
--}
--
- /* try to find a format which will only need a memcopy */
- static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
- 							       GLenum srcType)
-@@ -434,277 +406,208 @@ static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
- 	return NULL;		/* never get here */
- }
- 
--static GLboolean
--r300ValidateClientStorage(GLcontext * ctx, GLenum target,
--			  GLint internalFormat,
--			  GLint srcWidth, GLint srcHeight,
--			  GLenum format, GLenum type, const void *pixels,
--			  const struct gl_pixelstore_attrib *packing,
--			  struct gl_texture_object *texObj,
--			  struct gl_texture_image *texImage)
-+
-+/**
-+ * Allocate an empty texture image object.
-+ */
-+static struct gl_texture_image *r300NewTextureImage(GLcontext *ctx)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	return CALLOC(sizeof(r300_texture_image));
-+}
- 
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "intformat %s format %s type %s\n",
--			_mesa_lookup_enum_by_nr(internalFormat),
--			_mesa_lookup_enum_by_nr(format),
--			_mesa_lookup_enum_by_nr(type));
-+/**
-+ * Free memory associated with this texture image.
-+ */
-+static void r300FreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
-+{
-+	r300_texture_image* image = get_r300_texture_image(timage);
- 
--	if (!ctx->Unpack.ClientStorage)
--		return 0;
-+	if (image->mt) {
-+		r300_miptree_unreference(image->mt);
-+		image->mt = 0;
-+		assert(!image->base.Data);
-+	} else {
-+		_mesa_free_texture_image_data(ctx, timage);
-+	}
-+    if (image->bo) {
-+        radeon_bo_unref(image->bo);
-+        image->bo = NULL;
-+    }
-+}
- 
--	if (ctx->_ImageTransferState ||
--	    texImage->IsCompressed || texObj->GenerateMipmap)
--		return 0;
- 
--	/* This list is incomplete, may be different on ppc???
--	 */
--	switch (internalFormat) {
--	case GL_RGBA:
--		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
--			texImage->TexFormat = _dri_texformat_argb8888;
--		} else
--			return 0;
--		break;
-+/* Set Data pointer and additional data for mapped texture image */
-+static void teximage_set_map_data(r300_texture_image *image)
-+{
-+	r300_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+	image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
-+	image->base.RowStride = lvl->rowstride / image->mt->bpp;
-+}
- 
--	case GL_RGB:
--		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
--			texImage->TexFormat = _dri_texformat_rgb565;
--		} else
--			return 0;
--		break;
- 
--	case GL_YCBCR_MESA:
--		if (format == GL_YCBCR_MESA &&
--		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
--		} else if (format == GL_YCBCR_MESA &&
--			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
--			    type == GL_UNSIGNED_BYTE)) {
--			texImage->TexFormat = &_mesa_texformat_ycbcr;
--		} else
--			return 0;
--		break;
-+/**
-+ * Map a single texture image for glTexImage and friends.
-+ */
-+static void r300_teximage_map(r300_texture_image *image, GLboolean write_enable)
-+{
-+	if (image->mt) {
-+		assert(!image->base.Data);
- 
--	default:
--		return 0;
-+		radeon_bo_map(image->mt->bo, write_enable);
-+		teximage_set_map_data(image);
- 	}
-+}
- 
--	/* Could deal with these packing issues, but currently don't:
--	 */
--	if (packing->SkipPixels ||
--	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
--		return 0;
--	}
- 
--	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
--						    format, type);
-+static void r300_teximage_unmap(r300_texture_image *image)
-+{
-+	if (image->mt) {
-+		assert(image->base.Data);
- 
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: srcRowStride %d/%x\n",
--			__FUNCTION__, srcRowStride, srcRowStride);
-+		image->base.Data = 0;
-+		radeon_bo_unmap(image->mt->bo);
-+	}
-+}
- 
--	/* Could check this later in upload, pitch restrictions could be
--	 * relaxed, but would need to store the image pitch somewhere,
--	 * as packing details might change before image is uploaded:
--	 */
--	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
--	    || (srcRowStride & 63))
--		return 0;
-+/**
-+ * Map a validated texture for reading during software rendering.
-+ */
-+static void r300MapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
-+{
-+	r300TexObj* t = r300_tex_obj(texObj);
-+	int face, level;
- 
--	/* Have validated that _mesa_transfer_teximage would be a straight
--	 * memcpy at this point.  NOTE: future calls to TexSubImage will
--	 * overwrite the client data.  This is explicitly mentioned in the
--	 * extension spec.
--	 */
--	texImage->Data = (void *)pixels;
--	texImage->IsClientData = GL_TRUE;
--	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
-+	assert(texObj->_Complete);
-+	assert(t->mt);
- 
--	return 1;
-+	radeon_bo_map(t->mt->bo, GL_FALSE);
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
-+			teximage_set_map_data(get_r300_texture_image(texObj->Image[face][level]));
-+	}
- }
- 
--static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
-+static void r300UnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
-+	int face, level;
- 
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
--			return;
--		}
--	}
-+	assert(texObj->_Complete);
-+	assert(t->mt);
- 
--	/* Note, this will call ChooseTextureFormat */
--	_mesa_store_teximage1d(ctx, target, level, internalFormat,
--			       width, border, format, type, pixels,
--			       &ctx->Unpack, texObj, texImage);
--
--	t->dirty_images[0] |= (1 << level);
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
-+			texObj->Image[face][level]->Data = 0;
-+	}
-+	radeon_bo_unmap(t->mt->bo);
- }
- 
--static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset,
--			      GLsizei width,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
-+/**
-+ * All glTexImage calls go through this function.
-+ */
-+static void r300_teximage(
-+	GLcontext *ctx, int dims,
-+	GLint face, GLint level,
-+	GLint internalFormat,
-+	GLint width, GLint height, GLint depth,
-+	GLsizei imageSize,
-+	GLenum format, GLenum type, const GLvoid * pixels,
-+	const struct gl_pixelstore_attrib *packing,
-+	struct gl_texture_object *texObj,
-+	struct gl_texture_image *texImage,
-+	int compressed)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	r300TexObj* t = r300_tex_obj(texObj);
-+	r300_texture_image* image = get_r300_texture_image(texImage);
- 
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
--			return;
--		}
--	}
-+	R300_FIREVERTICES(rmesa);
- 
--	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
--				  format, type, pixels, packing, texObj,
--				  texImage);
-+	t->validated = GL_FALSE;
- 
--	t->dirty_images[0] |= (1 << level);
--}
-+	/* Choose and fill in the texture format for this image */
-+	texImage->TexFormat = r300ChooseTextureFormat(ctx, internalFormat, format, type);
-+	_mesa_set_fetch_functions(texImage, dims);
- 
--static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
--			   GLint internalFormat,
--			   GLint width, GLint height, GLint border,
--			   GLenum format, GLenum type, const GLvoid * pixels,
--			   const struct gl_pixelstore_attrib *packing,
--			   struct gl_texture_object *texObj,
--			   struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
--
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
-+	if (texImage->TexFormat->TexelBytes == 0) {
-+		texImage->IsCompressed = GL_TRUE;
-+		texImage->CompressedSize =
-+			ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
-+					   texImage->Height, texImage->Depth,
-+					   texImage->TexFormat->MesaFormat);
-+	} else {
-+		texImage->IsCompressed = GL_FALSE;
-+		texImage->CompressedSize = 0;
- 	}
- 
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
-+	/* Allocate memory for image */
-+	r300FreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
-+
-+	if (!t->mt)
-+		r300_try_alloc_miptree(rmesa, t, texImage, face, level);
-+	if (t->mt && r300_miptree_matches_image(t->mt, texImage, face, level)) {
-+		image->mt = t->mt;
-+		image->mtlevel = level - t->mt->firstLevel;
-+		image->mtface = face;
-+		r300_miptree_reference(t->mt);
- 	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
--			return;
-+		int size;
-+		if (texImage->IsCompressed) {
-+			size = texImage->CompressedSize;
-+		} else {
-+			size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
- 		}
-+		texImage->Data = _mesa_alloc_texmemory(size);
- 	}
- 
--	texImage->IsClientData = GL_FALSE;
--
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
-+	/* Upload texture image; note that the spec allows pixels to be NULL */
-+	if (compressed) {
-+		pixels = _mesa_validate_pbo_compressed_teximage(
-+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
- 	} else {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage2d(ctx, target, level, internalFormat,
--				       width, height, border, format, type,
--				       pixels, &ctx->Unpack, texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
-+		pixels = _mesa_validate_pbo_teximage(
-+			ctx, dims, width, height, depth,
-+			format, type, pixels, packing, "glTexImage");
- 	}
--}
- 
--static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
--			      GLint xoffset, GLint yoffset,
--			      GLsizei width, GLsizei height,
--			      GLenum format, GLenum type,
--			      const GLvoid * pixels,
--			      const struct gl_pixelstore_attrib *packing,
--			      struct gl_texture_object *texObj,
--			      struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
-+	if (pixels) {
-+		r300_teximage_map(image, GL_TRUE);
- 
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
--			return;
-+		if (compressed) {
-+			memcpy(texImage->Data, pixels, imageSize);
-+		} else {
-+			GLuint dstRowStride;
-+			if (image->mt) {
-+				r300_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+				dstRowStride = lvl->rowstride;
-+			} else {
-+				dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
-+			}
-+			if (!texImage->TexFormat->StoreImage(ctx, dims,
-+						texImage->_BaseFormat,
-+						texImage->TexFormat,
-+						texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
-+						dstRowStride,
-+						texImage->ImageOffsets,
-+						width, height, depth,
-+						format, type, pixels, packing))
-+				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
- 		}
-+
-+		r300_teximage_unmap(image);
- 	}
- 
--	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
--				  height, format, type, pixels, packing, texObj,
--				  texImage);
-+	_mesa_unmap_teximage_pbo(ctx, packing);
- 
--	t->dirty_images[face] |= (1 << level);
-+	/* SGIS_generate_mipmap */
-+	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-+		ctx->Driver.GenerateMipmap(ctx, texObj->Target, texObj);
-+	}
- }
- 
--static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
--				     GLint level, GLint internalFormat,
--				     GLint width, GLint height, GLint border,
--				     GLsizei imageSize, const GLvoid * data,
--				     struct gl_texture_object *texObj,
--				     struct gl_texture_image *texImage)
--{
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
- 
--	/* which cube face or ordinary 2D image */
-+static GLuint face_for_target(GLenum target)
-+{
- 	switch (target) {
- 	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- 	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-@@ -712,103 +615,50 @@ static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
- 	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- 	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- 	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
-+		return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
- 	default:
--		face = 0;
--	}
--
--	if (t != NULL) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexImage2D");
--			return;
--		}
-+		return 0;
- 	}
-+}
- 
--	texImage->IsClientData = GL_FALSE;
- 
--	/* can't call this, different parameters. Would never evaluate to true anyway currently */
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_compressed_teximage2d(ctx, target, level,
--						  internalFormat, width, height,
--						  border, imageSize, data,
--						  texObj, texImage);
--
--		t->dirty_images[face] |= (1 << level);
--	}
-+static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
-+			   GLint internalFormat,
-+			   GLint width, GLint border,
-+			   GLenum format, GLenum type, const GLvoid * pixels,
-+			   const struct gl_pixelstore_attrib *packing,
-+			   struct gl_texture_object *texObj,
-+			   struct gl_texture_image *texImage)
-+{
-+	r300_teximage(ctx, 1, 0, level, internalFormat, width, 1, 1,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
- }
- 
--static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
--					GLint level, GLint xoffset,
--					GLint yoffset, GLsizei width,
--					GLsizei height, GLenum format,
--					GLsizei imageSize, const GLvoid * data,
--					struct gl_texture_object *texObj,
--					struct gl_texture_image *texImage)
-+static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
-+			   GLint internalFormat,
-+			   GLint width, GLint height, GLint border,
-+			   GLenum format, GLenum type, const GLvoid * pixels,
-+			   const struct gl_pixelstore_attrib *packing,
-+			   struct gl_texture_object *texObj,
-+			   struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--	GLuint face;
-+	GLuint face = face_for_target(target);
- 
--	/* which cube face or ordinary 2D image */
--	switch (target) {
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
--	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
--	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
--		face =
--		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
--		ASSERT(face < 6);
--		break;
--	default:
--		face = 0;
--	}
--
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY,
--				    "glCompressedTexSubImage3D");
--			return;
--		}
--	}
-+	r300_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
-+}
- 
--	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
--					     yoffset, width, height, format,
--					     imageSize, data, texObj, texImage);
-+static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
-+				     GLint level, GLint internalFormat,
-+				     GLint width, GLint height, GLint border,
-+				     GLsizei imageSize, const GLvoid * data,
-+				     struct gl_texture_object *texObj,
-+				     struct gl_texture_image *texImage)
-+{
-+	GLuint face = face_for_target(target);
- 
--	t->dirty_images[face] |= (1 << level);
-+	r300_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
-+		imageSize, 0, 0, data, 0, texObj, texImage, 1);
- }
- 
- static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
-@@ -820,51 +670,100 @@ static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
- 			   struct gl_texture_object *texObj,
- 			   struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300_teximage(ctx, 3, 0, level, internalFormat, width, height, depth,
-+		0, format, type, pixels, packing, texObj, texImage, 0);
-+}
- 
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
--			return;
-+/**
-+ * Update a subregion of the given texture image.
-+ */
-+static void r300_texsubimage(GLcontext* ctx, int dims, int level,
-+		GLint xoffset, GLint yoffset, GLint zoffset,
-+		GLsizei width, GLsizei height, GLsizei depth,
-+		GLenum format, GLenum type,
-+		const GLvoid * pixels,
-+		const struct gl_pixelstore_attrib *packing,
-+		struct gl_texture_object *texObj,
-+		struct gl_texture_image *texImage,
-+		int compressed)
-+{
-+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
-+	r300_texture_image* image = get_r300_texture_image(texImage);
-+
-+	R300_FIREVERTICES(rmesa);
-+
-+	pixels = _mesa_validate_pbo_teximage(ctx, dims,
-+		width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
-+
-+	if (pixels) {
-+		GLint dstRowStride;
-+		r300_teximage_map(image, GL_TRUE);
-+
-+		if (image->mt) {
-+			r300_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
-+			dstRowStride = lvl->rowstride;
-+		} else {
-+			dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
- 		}
--	}
- 
--	texImage->IsClientData = GL_FALSE;
-+		if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
-+				texImage->TexFormat, texImage->Data,
-+				xoffset, yoffset, zoffset,
-+				dstRowStride,
-+				texImage->ImageOffsets,
-+				width, height, depth,
-+				format, type, pixels, packing))
-+			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
- 
--#if 0
--	if (r300ValidateClientStorage(ctx, target,
--				      internalFormat,
--				      width, height,
--				      format, type, pixels,
--				      packing, texObj, texImage)) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using client storage\n",
--				__FUNCTION__);
--	} else
--#endif
--	{
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: Using normal storage\n",
--				__FUNCTION__);
--
--		/* Normal path: copy (to cached memory) and eventually upload
--		 * via another copy to GART memory and then a blit...  Could
--		 * eliminate one copy by going straight to (permanent) GART.
--		 *
--		 * Note, this will call r300ChooseTextureFormat.
--		 */
--		_mesa_store_teximage3d(ctx, target, level, internalFormat,
--				       width, height, depth, border,
--				       format, type, pixels,
--				       &ctx->Unpack, texObj, texImage);
-+		r300_teximage_unmap(image);
-+	}
- 
--		t->dirty_images[0] |= (1 << level);
-+	_mesa_unmap_teximage_pbo(ctx, packing);
-+
-+	/* GL_SGIS_generate_mipmap */
-+	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
-+		ctx->Driver.GenerateMipmap(ctx, texObj->Target, texObj);
- 	}
- }
- 
-+static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
-+			      GLint xoffset,
-+			      GLsizei width,
-+			      GLenum format, GLenum type,
-+			      const GLvoid * pixels,
-+			      const struct gl_pixelstore_attrib *packing,
-+			      struct gl_texture_object *texObj,
-+			      struct gl_texture_image *texImage)
-+{
-+	r300_texsubimage(ctx, 1, level, xoffset, 0, 0, width, 1, 1,
-+		format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
-+			      GLint xoffset, GLint yoffset,
-+			      GLsizei width, GLsizei height,
-+			      GLenum format, GLenum type,
-+			      const GLvoid * pixels,
-+			      const struct gl_pixelstore_attrib *packing,
-+			      struct gl_texture_object *texObj,
-+			      struct gl_texture_image *texImage)
-+{
-+	r300_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
-+		format, type, pixels, packing, texObj, texImage, 0);
-+}
-+
-+static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
-+					GLint level, GLint xoffset,
-+					GLint yoffset, GLsizei width,
-+					GLsizei height, GLenum format,
-+					GLsizei imageSize, const GLvoid * data,
-+					struct gl_texture_object *texObj,
-+					struct gl_texture_image *texImage)
-+{
-+	r300_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
-+		format, 0, data, 0, texObj, texImage, 1);
-+}
-+
- static void
- r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
- 		  GLint xoffset, GLint yoffset, GLint zoffset,
-@@ -875,30 +774,29 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
- 		  struct gl_texture_object *texObj,
- 		  struct gl_texture_image *texImage)
- {
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
--
--/*     fprintf(stderr, "%s\n", __FUNCTION__); */
-+	r300_texsubimage(ctx, 3, level, xoffset, yoffset, zoffset, width, height, depth,
-+		format, type, pixels, packing, texObj, texImage, 0);
-+}
- 
--	assert(t);		/* this _should_ be true */
--	if (t) {
--		driSwapOutTextureObject(t);
--	} else {
--		t = (driTextureObject *) r300AllocTexObj(texObj);
--		if (!t) {
--			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
--			return;
--		}
--		texObj->DriverData = t;
--	}
- 
--	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
--				  width, height, depth,
--				  format, type, pixels, packing, texObj,
--				  texImage);
-+/**
-+ * Wraps Mesa's implementation to ensure that the base level image is mapped.
-+ *
-+ * This relies on internal details of _mesa_generate_mipmap, in particular
-+ * the fact that the memory for recreated texture images is always freed.
-+ */
-+static void r300_generate_mipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
-+{
-+	GLuint face = face_for_target(target);
-+	r300_texture_image *baseimage = get_r300_texture_image(texObj->Image[face][texObj->BaseLevel]);
- 
--	t->dirty_images[0] |= (1 << level);
-+	r300_teximage_map(baseimage, GL_FALSE);
-+	_mesa_generate_mipmap(ctx, target, texObj);
-+	r300_teximage_unmap(baseimage);
- }
- 
-+
-+
- /**
-  * Changes variables and flags for a state update, which will happen at the
-  * next UpdateTextureState
-@@ -908,7 +806,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 			     struct gl_texture_object *texObj,
- 			     GLenum pname, const GLfloat * params)
- {
--	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
-@@ -941,7 +839,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 		 * we just have to rely on loading the right subset of mipmap levels
- 		 * to simulate a clamped LOD.
- 		 */
--		driSwapOutTextureObject((driTextureObject *) t);
-+		if (t->mt) {
-+			r300_miptree_unreference(t->mt);
-+			t->mt = 0;
-+			t->validated = GL_FALSE;
-+		}
- 		break;
- 
- 	case GL_DEPTH_TEXTURE_MODE:
-@@ -964,27 +866,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
- 	}
- }
- 
--static void r300BindTexture(GLcontext * ctx, GLenum target,
--			    struct gl_texture_object *texObj)
--{
--	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
--		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
--			(void *)texObj, ctx->Texture.CurrentUnit);
--	}
--
--	if ((target == GL_TEXTURE_1D)
--	    || (target == GL_TEXTURE_2D)
--	    || (target == GL_TEXTURE_3D)
--	    || (target == GL_TEXTURE_CUBE_MAP)
--	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
--		assert(texObj->DriverData != NULL);
--	}
--}
--
- static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	driTextureObject *t = (driTextureObject *) texObj->DriverData;
-+	r300TexObj* t = r300_tex_obj(texObj);
- 
- 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
- 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-@@ -992,14 +877,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
- 			_mesa_lookup_enum_by_nr(texObj->Target));
- 	}
- 
--	if (t != NULL) {
--		if (rmesa) {
--			R300_FIREVERTICES(rmesa);
--		}
-+	if (rmesa) {
-+		int i;
-+		R300_FIREVERTICES(rmesa);
-+
-+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
-+			if (rmesa->hw.textures[i] == t)
-+				rmesa->hw.textures[i] = 0;
-+	}
- 
--		driDestroyTextureObject(t);
-+	if (t->mt) {
-+		r300_miptree_unreference(t->mt);
-+		t->mt = 0;
- 	}
--	/* Free mipmap images and the texture object itself */
- 	_mesa_delete_texture_object(ctx, texObj);
- }
- 
-@@ -1008,8 +898,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
-  * Called via ctx->Driver.NewTextureObject.
-  * Note: this function will be called during context creation to
-  * allocate the default texture objects.
-- * Note: we could use containment here to 'derive' the driver-specific
-- * texture object from the core mesa gl_texture_object.  Not done at this time.
-  * Fixup MaxAnisotropy according to user preference.
-  */
- static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
-@@ -1017,14 +905,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
- 						      GLenum target)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_object *obj;
--	obj = _mesa_new_texture_object(ctx, name, target);
--	if (!obj)
--		return NULL;
--	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+	r300TexObj* t = CALLOC_STRUCT(r300_tex_obj);
- 
--	r300AllocTexObj(obj);
--	return obj;
-+
-+	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
-+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
-+			t, _mesa_lookup_enum_by_nr(target));
-+	}
-+
-+	_mesa_initialize_texture_object(&t->base, name, target);
-+	t->base.MaxAnisotropy = rmesa->initialMaxAnisotropy;
-+
-+	/* Initialize hardware state */
-+	r300UpdateTexWrap(t);
-+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
-+	r300SetTexBorderColor(t, t->base._BorderChan);
-+
-+	return &t->base;
- }
- 
- void r300InitTextureFuncs(struct dd_function_table *functions)
-@@ -1032,6 +929,11 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	/* Note: we only plug in the functions we implement in the driver
- 	 * since _mesa_init_driver_functions() was already called.
- 	 */
-+	functions->NewTextureImage = r300NewTextureImage;
-+	functions->FreeTexImageData = r300FreeTexImageData;
-+	functions->MapTexture = r300MapTexture;
-+	functions->UnmapTexture = r300UnmapTexture;
-+
- 	functions->ChooseTextureFormat = r300ChooseTextureFormat;
- 	functions->TexImage1D = r300TexImage1D;
- 	functions->TexImage2D = r300TexImage2D;
-@@ -1040,7 +942,6 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	functions->TexSubImage2D = r300TexSubImage2D;
- 	functions->TexSubImage3D = r300TexSubImage3D;
- 	functions->NewTextureObject = r300NewTextureObject;
--	functions->BindTexture = r300BindTexture;
- 	functions->DeleteTexture = r300DeleteTexture;
- 	functions->IsTextureResident = driIsTextureResident;
- 
-@@ -1049,5 +950,7 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
- 	functions->CompressedTexImage2D = r300CompressedTexImage2D;
- 	functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
- 
-+	functions->GenerateMipmap = r300_generate_mipmap;
-+
- 	driInitTextureFormats();
- }
-diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
-index b86d45b..358b927 100644
---- a/src/mesa/drivers/dri/r300/r300_tex.h
-+++ b/src/mesa/drivers/dri/r300/r300_tex.h
-@@ -41,12 +41,7 @@ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
- 			     unsigned long long offset, GLint depth,
- 			     GLuint pitch);
- 
--extern void r300UpdateTextureState(GLcontext * ctx);
--
--extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLuint face);
--
--extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
-+extern void r300ValidateTextures(GLcontext * ctx);
- 
- extern void r300InitTextureFuncs(struct dd_function_table *functions);
- 
-diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
-index b03eefa..53eeca1 100644
---- a/src/mesa/drivers/dri/r300/r300_texmem.c
-+++ b/src/mesa/drivers/dri/r300/r300_texmem.c
-@@ -48,520 +48,11 @@ SOFTWARE.
- #include "r300_context.h"
- #include "r300_state.h"
- #include "r300_cmdbuf.h"
-+#include "r300_emit.h"
-+#include "r300_mipmap_tree.h"
- #include "radeon_ioctl.h"
- #include "r300_tex.h"
- #include "r300_ioctl.h"
- #include <unistd.h>		/* for usleep() */
- 
--#ifdef USER_BUFFERS
--#include "r300_mem.h"
--#endif
- 
--/**
-- * Destroy any device-dependent state associated with the texture.  This may
-- * include NULLing out hardware state that points to the texture.
-- */
--void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
--{
--	int i;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
--			(void *)t, (void *)t->base.tObj);
--	}
--
--	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
--		if (rmesa->state.texture.unit[i].texobj == t) {
--			rmesa->state.texture.unit[i].texobj = NULL;
--		}
--	}
--}
--
--/* ------------------------------------------------------------
-- * Texture image conversions
-- */
--
--static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
--					 r300TexObjPtr t,
--					 struct gl_texture_image *texImage,
--					 GLint hwlevel,
--					 GLint x, GLint y,
--					 GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	GLuint srcPitch, dstPitch;
--	int blit_format;
--	int srcOffset;
--
--	/*
--	 * XXX it appears that we always upload the full image, not a subimage.
--	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
--	 * changed, the src pitch will have to change.
--	 */
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
--		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][hwlevel].data = texImage->Data;
--	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--
--	assert(srcOffset != ~0);
--
--	/* Don't currently need to cope with small pitches?
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	r300EmitWait(rmesa, R300_WAIT_3D);
--
--	r300EmitBlit(rmesa, blit_format,
--		     srcPitch,
--		     srcOffset,
--		     dstPitch,
--		     t->bufAddr,
--		     x,
--		     y,
--		     t->image[0][hwlevel].x + x,
--		     t->image[0][hwlevel].y + y, width, height);
--
--	r300EmitWait(rmesa, R300_WAIT_2D);
--}
--
--static void r300UploadRectSubImage(r300ContextPtr rmesa,
--				   r300TexObjPtr t,
--				   struct gl_texture_image *texImage,
--				   GLint x, GLint y, GLint width, GLint height)
--{
--	const struct gl_texture_format *texFormat = texImage->TexFormat;
--	int blit_format, dstPitch, done;
--
--	switch (texFormat->TexelBytes) {
--	case 1:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	case 2:
--		blit_format = R300_CP_COLOR_FORMAT_RGB565;
--		break;
--	case 4:
--		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
--		break;
--	case 8:
--	case 16:
--		blit_format = R300_CP_COLOR_FORMAT_CI8;
--		break;
--	default:
--		return;
--	}
--
--	t->image[0][0].data = texImage->Data;
--
--	/* Currently don't need to cope with small pitches.
--	 */
--	width = texImage->Width;
--	height = texImage->Height;
--	dstPitch = t->pitch;
--
--	if (texFormat->TexelBytes > 4) {
--		width *= texFormat->TexelBytes;
--	}
--
--	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
--		/* In this case, could also use GART texturing.  This is
--		 * currently disabled, but has been tested & works.
--		 */
--		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
--		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
--
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"Using GART texturing for rectangular client texture\n");
--
--		/* Release FB memory allocated for this image:
--		 */
--		/* FIXME This may not be correct as driSwapOutTextureObject sets
--		 * FIXME dirty_images.  It may be fine, though.
--		 */
--		if (t->base.memBlock) {
--			driSwapOutTextureObject((driTextureObject *) t);
--		}
--	} else if (texImage->IsClientData) {
--		/* Data already in GART memory, with usable pitch.
--		 */
--		GLuint srcPitch;
--		srcPitch = texImage->RowStride * texFormat->TexelBytes;
--		r300EmitBlit(rmesa,
--			     blit_format,
--			     srcPitch,
--			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
--			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
--	} else {
--		/* Data not in GART memory, or bad pitch.
--		 */
--		for (done = 0; done < height;) {
--			struct r300_dma_region region;
--			int lines =
--			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
--			int src_pitch;
--			char *tex;
--
--			src_pitch = texImage->RowStride * texFormat->TexelBytes;
--
--			tex = (char *)texImage->Data + done * src_pitch;
--
--			memset(&region, 0, sizeof(region));
--			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
--					   1024);
--
--			/* Copy texdata to dma:
--			 */
--			if (RADEON_DEBUG & DEBUG_TEXTURE)
--				fprintf(stderr,
--					"%s: src_pitch %d dst_pitch %d\n",
--					__FUNCTION__, src_pitch, dstPitch);
--
--			if (src_pitch == dstPitch) {
--				memcpy(region.address + region.start, tex,
--				       lines * src_pitch);
--			} else {
--				char *buf = region.address + region.start;
--				int i;
--				for (i = 0; i < lines; i++) {
--					memcpy(buf, tex, src_pitch);
--					buf += dstPitch;
--					tex += src_pitch;
--				}
--			}
--
--			r300EmitWait(rmesa, R300_WAIT_3D);
--
--			/* Blit to framebuffer
--			 */
--			r300EmitBlit(rmesa,
--				     blit_format,
--				     dstPitch, GET_START(&region),
--				     dstPitch | (t->tile_bits >> 16),
--				     t->bufAddr, 0, 0, 0, done, width, lines);
--
--			r300EmitWait(rmesa, R300_WAIT_2D);
--#ifdef USER_BUFFERS
--			r300_mem_use(rmesa, region.buf->id);
--#endif
--
--			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
--			done += lines;
--		}
--	}
--}
--
--/**
-- * Upload the texture image associated with texture \a t at the specified
-- * level at the address relative to \a start.
-- */
--static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
--			       GLint hwlevel,
--			       GLint x, GLint y, GLint width, GLint height,
--			       GLuint face)
--{
--	struct gl_texture_image *texImage = NULL;
--	GLuint offset;
--	GLint imageWidth, imageHeight;
--	GLint ret;
--	drm_radeon_texture_t tex;
--	drm_radeon_tex_image_t tmp;
--	const int level = hwlevel + t->base.firstLevel;
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE) {
--		fprintf(stderr,
--			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
--			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
--			width, height, face);
--	}
--
--	ASSERT(face < 6);
--
--	/* Ensure we have a valid texture to upload */
--	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
--		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
--		return;
--	}
--
--	texImage = t->base.tObj->Image[face][level];
--
--	if (!texImage) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: texImage %d is NULL!\n",
--				__FUNCTION__, level);
--		return;
--	}
--	if (!texImage->Data) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is NULL!\n",
--				__FUNCTION__);
--		return;
--	}
--
--	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		assert(level == 0);
--		assert(hwlevel == 0);
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr, "%s: image data is rectangular\n",
--				__FUNCTION__);
--		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
--		return;
--	} else if (texImage->IsClientData) {
--		if (RADEON_DEBUG & DEBUG_TEXTURE)
--			fprintf(stderr,
--				"%s: image data is in GART client storage\n",
--				__FUNCTION__);
--		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
--					     width, height);
--		return;
--	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "%s: image data is in normal memory\n",
--			__FUNCTION__);
--
--	imageWidth = texImage->Width;
--	imageHeight = texImage->Height;
--
--	offset = t->bufAddr;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		GLint imageX = 0;
--		GLint imageY = 0;
--		GLint blitX = t->image[face][hwlevel].x;
--		GLint blitY = t->image[face][hwlevel].y;
--		GLint blitWidth = t->image[face][hwlevel].width;
--		GLint blitHeight = t->image[face][hwlevel].height;
--		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
--			imageWidth, imageHeight, imageX, imageY);
--		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
--			blitWidth, blitHeight, blitX, blitY);
--		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
--			(GLuint) offset, hwlevel, level);
--	}
--
--	t->image[face][hwlevel].data = texImage->Data;
--
--	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
--	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
--	 * We used to use 1, 2 and 4-byte texels and used to use the texture
--	 * width to dictate the blit width - but that won't work for compressed
--	 * textures. (Brian)
--	 * NOTE: can't do that with texture tiling. (sroland)
--	 */
--	tex.offset = offset;
--	tex.image = &tmp;
--	/* copy (x,y,width,height,data) */
--	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
--
--	if (texImage->TexFormat->TexelBytes > 4) {
--		const int log2TexelBytes =
--		    (3 + (texImage->TexFormat->TexelBytes >> 4));
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.height = imageHeight;
--		tex.width = imageWidth << log2TexelBytes;
--		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
--		tmp.x = tmp.x % (1024 >> log2TexelBytes);
--		tmp.width = tmp.width << log2TexelBytes;
--	} else if (texImage->TexFormat->TexelBytes) {
--		/* use multi-byte upload scheme */
--		tex.height = imageHeight;
--		tex.width = imageWidth;
--		switch (texImage->TexFormat->TexelBytes) {
--		case 1:
--			tex.format = RADEON_TXFORMAT_I8;
--			break;
--		case 2:
--			tex.format = RADEON_TXFORMAT_AI88;
--			break;
--		case 4:
--			tex.format = RADEON_TXFORMAT_ARGB8888;
--			break;
--		}
--		tex.pitch =
--		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
--			 64, 1);
--		tex.offset += tmp.x & ~1023;
--		tmp.x = tmp.x % 1024;
--
--		if (t->tile_bits & R300_TXO_MICRO_TILE) {
--			/* need something like "tiled coordinates" ? */
--			tmp.y = tmp.x / (tex.pitch * 128) * 2;
--			tmp.x =
--			    tmp.x % (tex.pitch * 128) / 2 /
--			    texImage->TexFormat->TexelBytes;
--			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
--		} else {
--			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
--		}
--#if 1
--		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
--		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
--		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
--			 && (texImage->Height >= 8))
--			|| (texImage->Height >= 16))) {
--			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
--			   OR if height is smaller than 8 automatically, but if micro tiling is active
--			   the limit is height 16 instead ? */
--			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
--		}
--#endif
--	} else {
--		/* In case of for instance 8x8 texture (2x2 dxt blocks),
--		   padding after the first two blocks is needed (only
--		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
--		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
--		   has 4 real pixels. Needed so the kernel module reads
--		   the right amount of data. */
--		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
--		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
--		tex.height = (imageHeight + 3) / 4;
--		tex.width = (imageWidth + 3) / 4;
--		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
--			tex.width *= 8;
--		} else {
--			tex.width *= 16;
--		}
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--	do {
--		ret =
--		    drmCommandWriteRead(rmesa->radeon.dri.fd,
--					DRM_RADEON_TEXTURE, &tex,
--					sizeof(drm_radeon_texture_t));
--		if (ret) {
--			if (RADEON_DEBUG & DEBUG_IOCTL)
--				fprintf(stderr,
--					"DRM_RADEON_TEXTURE:  again!\n");
--			usleep(1);
--		}
--	} while (ret == -EAGAIN);
--
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	if (ret) {
--		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
--		fprintf(stderr, "   offset=0x%08x\n", offset);
--		fprintf(stderr, "   image width=%d height=%d\n",
--			imageWidth, imageHeight);
--		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
--			t->image[face][hwlevel].width,
--			t->image[face][hwlevel].height,
--			t->image[face][hwlevel].data);
--		_mesa_exit(-1);
--	}
--}
--
--/**
-- * Upload the texture images associated with texture \a t.  This might
-- * require the allocation of texture memory.
-- *
-- * \param rmesa Context pointer
-- * \param t Texture to be uploaded
-- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
-- */
--
--int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
--{
--	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--	if (t->image_override)
--		return 0;
--
--	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
--		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
--			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
--			t->base.totalSize, t->base.firstLevel,
--			t->base.lastLevel);
--	}
--
--	if (t->base.totalSize == 0)
--		return 0;
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	LOCK_HARDWARE(&rmesa->radeon);
--
--	if (t->base.memBlock == NULL) {
--		int heap;
--
--		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
--					  (driTextureObject *) t);
--		if (heap == -1) {
--			UNLOCK_HARDWARE(&rmesa->radeon);
--			return -1;
--		}
--
--		/* Set the base offset of the texture image */
--		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
--		    + t->base.memBlock->ofs;
--		t->offset = t->bufAddr;
--
--		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
--			/* hope it's safe to add that here... */
--			t->offset |= t->tile_bits;
--		}
--	}
--
--	/* Let the world know we've used this memory recently.
--	 */
--	driUpdateTextureLRU((driTextureObject *) t);
--	UNLOCK_HARDWARE(&rmesa->radeon);
--
--	/* Upload any images that are new */
--	if (t->base.dirty_images[face]) {
--		int i;
--		for (i = 0; i < numLevels; i++) {
--			if ((t->base.
--			     dirty_images[face] & (1 <<
--						   (i + t->base.firstLevel))) !=
--			    0) {
--				r300UploadSubImage(rmesa, t, i, 0, 0,
--						   t->image[face][i].width,
--						   t->image[face][i].height,
--						   face);
--			}
--		}
--		t->base.dirty_images[face] = 0;
--	}
--
--	if (RADEON_DEBUG & DEBUG_SYNC) {
--		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
--		radeonFinish(rmesa->radeon.glCtx);
--	}
--
--	return 0;
--}
-diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
-index e2329f0..4fea822 100644
---- a/src/mesa/drivers/dri/r300/r300_texstate.c
-+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
-@@ -48,8 +48,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_state.h"
- #include "r300_ioctl.h"
- #include "radeon_ioctl.h"
-+#include "r300_mipmap_tree.h"
- #include "r300_tex.h"
- #include "r300_reg.h"
-+#include "radeon_buffer.h"
- 
- #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5			\
- 			   || ((f) >= MESA_FORMAT_RGBA_FLOAT32 &&	\
-@@ -148,8 +150,7 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
-+	t = r300_tex_obj(tObj);
- 
- 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
- 	case MESA_FORMAT_Z16:
-@@ -190,399 +191,228 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
- 
- 
- /**
-- * Compute sizes and fill in offset and blit information for the given
-- * image (determined by \p face and \p level).
-- *
-- * \param curOffset points to the offset at which the image is to be stored
-- * and is updated by this function according to the size of the image.
-- */
--static void compute_tex_image_offset(
--	struct gl_texture_object *tObj,
--	GLuint face,
--	GLint level,
--	GLint* curOffset)
--{
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image* texImage;
--	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
--	GLuint texelBytes;
--	GLuint size;
--
--	texImage = tObj->Image[0][level + t->base.firstLevel];
--	if (!texImage)
--		return;
--
--	texelBytes = texImage->TexFormat->TexelBytes;
--
--	/* find image size in bytes */
--	if (texImage->IsCompressed) {
--		if ((t->format & R300_TX_FORMAT_DXT1) ==
--			R300_TX_FORMAT_DXT1) {
--			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
--			if ((texImage->Width + 3) < 8)	/* width one block */
--				size = texImage->CompressedSize * 4;
--			else if ((texImage->Width + 3) < 16)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		} else {
--			/* DXT3/5, 16 bytes per block */
--			WARN_ONCE
--				("DXT 3/5 suffers from multitexturing problems!\n");
--			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
--			if ((texImage->Width + 3) < 8)
--				size = texImage->CompressedSize * 2;
--			else
--				size = texImage->CompressedSize;
--		}
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		size =
--			((texImage->Width * texelBytes +
--			63) & ~63) * texImage->Height;
--		blitWidth = 64 / texelBytes;
--	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
--		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
--			though the actual offset may be different (if texture is less than
--			32 bytes width) to the untiled case */
--		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
--		size =
--			(w * ((texImage->Height + 1) / 2)) *
--			texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	} else {
--		int w = (texImage->Width * texelBytes + 31) & ~31;
--		size = w * texImage->Height * texImage->Depth;
--		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
--	}
--	assert(size > 0);
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
--			texImage->Width, texImage->Height,
--			texImage->Depth,
--			texImage->TexFormat->TexelBytes,
--			texImage->InternalFormat);
--
--	/* All images are aligned to a 32-byte offset */
--	*curOffset = (*curOffset + 0x1f) & ~0x1f;
--
--	if (texelBytes) {
--		/* fix x and y coords up later together with offset */
--		t->image[face][level].x = *curOffset;
--		t->image[face][level].y = 0;
--		t->image[face][level].width =
--			MIN2(size / texelBytes, blitWidth);
--		t->image[face][level].height =
--			(size / texelBytes) / t->image[face][level].width;
--	} else {
--		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
--		t->image[face][level].width =
--			MIN2(size, R300_BLIT_WIDTH_BYTES);
--		t->image[face][level].height = size / t->image[face][level].width;
--	}
--
--	if (RADEON_DEBUG & DEBUG_TEXTURE)
--		fprintf(stderr,
--			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
--			level, face, texImage->Width, texImage->Height,
--			t->image[face][level].x, t->image[face][level].y,
--			t->image[face][level].width, t->image[face][level].height,
--			size, *curOffset);
--
--	*curOffset += size;
--}
--
--
--
--/**
-- * This function computes the number of bytes of storage needed for
-- * the given texture object (all mipmap levels, all cube faces).
-- * The \c image[face][level].x/y/width/height parameters for upload/blitting
-- * are computed here.  \c filter, \c format, etc. will be set here
-- * too.
-+ * Compute the cached hardware register values for the given texture object.
-  *
-  * \param rmesa Context pointer
-- * \param tObj GL texture object whose images are to be posted to
-- *                 hardware state.
-+ * \param t the r300 texture object
-  */
--static void r300SetTexImages(r300ContextPtr rmesa,
--			     struct gl_texture_object *tObj)
-+static void setup_hardware_state(r300ContextPtr rmesa, r300TexObj *t)
- {
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	const struct gl_texture_image *baseImage =
--	    tObj->Image[0][tObj->BaseLevel];
--	GLint curOffset;
--	GLint i, texelBytes;
--	GLint numLevels;
--	GLint log2Width, log2Height, log2Depth;
--
--	/* Set the hardware texture format
--	 */
-+	const struct gl_texture_image *firstImage =
-+	    t->base.Image[0][t->mt->firstLevel];
-+
- 	if (!t->image_override
--	    && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
--		if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
--			r300SetDepthTexMode(tObj);
-+	    && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
-+		if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
-+			r300SetDepthTexMode(&t->base);
- 		} else {
--			t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
-+			t->format = tx_table[firstImage->TexFormat->MesaFormat].format;
- 		}
- 
--		t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
-+		t->filter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
- 	} else if (!t->image_override) {
- 		_mesa_problem(NULL, "unexpected texture format in %s",
- 			      __FUNCTION__);
- 		return;
- 	}
- 
--	texelBytes = baseImage->TexFormat->TexelBytes;
--
--	/* Compute which mipmap levels we really want to send to the hardware.
--	 */
--	driCalculateTextureFirstLastLevel((driTextureObject *) t);
--	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
--	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
--	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
--
--	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
--
--	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
--
--	/* Calculate mipmap offsets and dimensions for blitting (uploading)
--	 * The idea is that we lay out the mipmap levels within a block of
--	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
--	 */
- 	t->tile_bits = 0;
- 
--	/* figure out if this texture is suitable for tiling. */
--#if 0				/* Disabled for now */
--	if (texelBytes) {
--		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
--		    /* texrect might be able to use micro tiling too in theory? */
--		    (baseImage->Height > 1)) {
--
--			/* allow 32 (bytes) x 1 mip (which will use two times the space
--			   the non-tiled version would use) max if base texture is large enough */
--			if ((numLevels == 1) ||
--			    (((baseImage->Width * texelBytes /
--			       baseImage->Height) <= 32)
--			     && (baseImage->Width * texelBytes > 64))
--			    ||
--			    ((baseImage->Width * texelBytes /
--			      baseImage->Height) <= 16)) {
--				t->tile_bits |= R300_TXO_MICRO_TILE;
--			}
--		}
--
--		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
--			/* we can set macro tiling even for small textures, they will be untiled anyway */
--			t->tile_bits |= R300_TXO_MACRO_TILE;
--		}
--	}
--#endif
--
--	curOffset = 0;
--
--	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
--		ASSERT(log2Width == log2Height);
-+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
- 		t->format |= R300_TX_FORMAT_CUBIC_MAP;
-+	if (t->base.Target == GL_TEXTURE_3D)
-+		t->format |= R300_TX_FORMAT_3D;
- 
--		for(i = 0; i < numLevels; i++) {
--			GLuint face;
--			for(face = 0; face < 6; face++)
--				compute_tex_image_offset(tObj, face, i, &curOffset);
--		}
--	} else {
--		if (tObj->Target == GL_TEXTURE_3D)
--                	t->format |= R300_TX_FORMAT_3D;
-+	t->size = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
-+		| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT))
-+		| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT);
- 
--		for (i = 0; i < numLevels; i++)
--			compute_tex_image_offset(tObj, 0, i, &curOffset);
--	}
--
--	/* Align the total size of texture memory block.
--	 */
--	t->base.totalSize =
--	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
--
--	t->size =
--	    (((tObj->Image[0][t->base.firstLevel]->Width -
--	       1) << R300_TX_WIDTHMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
--		R300_TX_HEIGHTMASK_SHIFT)
--	     | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
--		R300_TX_DEPTHMASK_SHIFT))
--	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
--
--	t->pitch = 0;
--
--	/* Only need to round to nearest 32 for textures, but the blitter
--	 * requires 64-byte aligned pitches, and we may/may not need the
--	 * blitter.   NPOT only!
--	 */
--	if (baseImage->IsCompressed) {
--		t->pitch |=
--		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
--	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
--		unsigned int align = (64 / texelBytes) - 1;
--		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
--			     texelBytes) + 63) & ~(63);
-+	if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
-+		unsigned int align = (64 / t->mt->bpp) - 1;
- 		t->size |= R300_TX_SIZE_TXPITCH_EN;
- 		if (!t->image_override)
--			t->pitch_reg =
--			    (((tObj->Image[0][t->base.firstLevel]->Width) +
--			      align) & ~align) - 1;
--	} else {
--		t->pitch |=
--		    ((tObj->Image[0][t->base.firstLevel]->Width *
--		      texelBytes) + 63) & ~(63);
-+			t->pitch_reg = ((firstImage->Width + align) & ~align) - 1;
- 	}
- 
- 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
--	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
-+	    if (firstImage->Width > 2048)
- 		t->pitch_reg |= R500_TXWIDTH_BIT11;
--	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
-+	    if (firstImage->Height > 2048)
- 		t->pitch_reg |= R500_TXHEIGHT_BIT11;
- 	}
- }
- 
--/* ================================================================
-- * Texture unit state management
-- */
- 
--static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
-+static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
-+	GLuint numrows, GLuint rowsize)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--
--	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
--
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
-+	assert(rowsize <= dststride);
-+	assert(rowsize <= srcstride);
- 
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override)
--			return GL_FALSE;
-+	if (rowsize == srcstride && rowsize == dststride) {
-+		memcpy(dst, src, numrows*rowsize);
-+	} else {
-+		GLuint i;
-+		for(i = 0; i < numrows; ++i) {
-+			memcpy(dst, src, rowsize);
-+			dst += dststride;
-+			src += srcstride;
-+		}
- 	}
--
--	return GL_TRUE;
- }
- 
--static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
-+
-+/**
-+ * Ensure that the given image is stored in the given miptree from now on.
-+ */
-+static void migrate_image_to_miptree(r300_mipmap_tree *mt, r300_texture_image *image, int face, int level)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
-+	unsigned char *dest;
- 
--	ASSERT(tObj->Target == GL_TEXTURE_3D);
-+	assert(image->mt != mt);
-+	assert(dstlvl->width == image->base.Width);
-+	assert(dstlvl->height == image->base.Height);
-+	assert(dstlvl->depth == image->base.Depth);
- 
--	/* r300 does not support mipmaps for 3D textures. */
--	if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
--		return GL_FALSE;
--	}
-+	radeon_bo_map(mt->bo, GL_TRUE);
-+	dest = mt->bo->ptr + dstlvl->faces[face].offset;
- 
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock)
--			return GL_FALSE;
--	}
-+	if (image->mt) {
-+		/* Format etc. should match, so we really just need a memcpy().
-+		 * In fact, that memcpy() could be done by the hardware in many
-+		 * cases, provided that we have a proper memory manager.
-+		 */
-+		r300_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
- 
--	return GL_TRUE;
--}
-+		assert(srclvl->size == dstlvl->size);
-+		assert(srclvl->rowstride == dstlvl->rowstride);
- 
--static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
--{
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
--	GLuint face;
--
--	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
--
--	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
--	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
--	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
--		/* flush */
--		R300_FIREVERTICES(rmesa);
--		/* layout memory space, once for all faces */
--		r300SetTexImages(rmesa, tObj);
--	}
-+		radeon_bo_map(image->mt->bo, GL_FALSE);
-+		memcpy(dest,
-+			image->mt->bo->ptr + srclvl->faces[face].offset,
-+			dstlvl->size);
-+		radeon_bo_unmap(image->mt->bo);
- 
--	/* upload (per face) */
--	for (face = 0; face < 6; face++) {
--		if (t->base.dirty_images[face]) {
--			r300UploadTexImages(rmesa,
--					    (r300TexObjPtr) tObj->DriverData,
--					    face);
--		}
--	}
-+		r300_miptree_unreference(image->mt);
-+	} else {
-+		uint srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
- 
--	if (!t->base.memBlock) {
--		/* texmem alloc failed, use s/w fallback */
--		return GL_FALSE;
-+		if (mt->tilebits)
-+			WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
-+
-+		copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
-+			image->base.Height * image->base.Depth, srcrowstride);
-+
-+		_mesa_free_texmemory(image->base.Data);
-+		image->base.Data = 0;
- 	}
- 
--	return GL_TRUE;
-+	radeon_bo_unmap(mt->bo);
-+
-+	image->mt = mt;
-+	image->mtface = face;
-+	image->mtlevel = level;
-+	r300_miptree_reference(image->mt);
- }
- 
--static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
-+
-+/**
-+ * Ensure the given texture is ready for rendering.
-+ *
-+ * Mostly this means populating the texture object's mipmap tree.
-+ */
-+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
- {
- 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	r300TexObj *t = r300_tex_obj(texObj);
-+	r300_texture_image *baseimage = get_r300_texture_image(texObj->Image[0][texObj->BaseLevel]);
-+	int face, level;
-+
-+	if (t->validated || t->image_override)
-+		return GL_TRUE;
-+
-+	if (RADEON_DEBUG & DEBUG_TEXTURE)
-+		fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
- 
--	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
-+	if (baseimage->base.Border > 0)
-+		return GL_FALSE;
- 
--	if (t->base.dirty_images[0]) {
--		R300_FIREVERTICES(rmesa);
-+	/* Ensure a matching miptree exists.
-+	 *
-+	 * Differing mipmap trees can result when the app uses TexImage to
-+	 * change texture dimensions.
-+	 *
-+	 * Prefer to use base image's miptree if it
-+	 * exists, since that most likely contains more valid data (remember
-+	 * that the base level is usually significantly larger than the rest
-+	 * of the miptree, so cubemaps are the only possible exception).
-+	 */
-+	if (baseimage->mt &&
-+	    baseimage->mt != t->mt &&
-+	    r300_miptree_matches_texture(baseimage->mt, &t->base)) {
-+		r300_miptree_unreference(t->mt);
-+		t->mt = baseimage->mt;
-+		r300_miptree_reference(t->mt);
-+	} else if (t->mt && !r300_miptree_matches_texture(t->mt, &t->base)) {
-+		r300_miptree_unreference(t->mt);
-+		t->mt = 0;
-+	}
- 
--		r300SetTexImages(rmesa, tObj);
--		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
--		if (!t->base.memBlock && !t->image_override &&
--		    !rmesa->prefer_gart_client_texturing)
-+	if (!t->mt) {
-+		if (RADEON_DEBUG & DEBUG_TEXTURE)
-+			fprintf(stderr, " Allocate new miptree\n");
-+		r300_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel);
-+		if (!t->mt) {
-+			_mesa_problem(ctx, "r300_validate_texture failed to alloc miptree");
- 			return GL_FALSE;
-+		}
- 	}
- 
-+	/* Ensure all images are stored in the single main miptree */
-+	for(face = 0; face < t->mt->faces; ++face) {
-+		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
-+			r300_texture_image *image = get_r300_texture_image(texObj->Image[face][level]);
-+			if (RADEON_DEBUG & DEBUG_TEXTURE)
-+				fprintf(stderr, " face %i, level %i... ", face, level);
-+			if (t->mt == image->mt) {
-+				if (RADEON_DEBUG & DEBUG_TEXTURE)
-+					fprintf(stderr, "OK\n");
-+				continue;
-+			}
-+
-+			if (RADEON_DEBUG & DEBUG_TEXTURE)
-+				fprintf(stderr, "migrating\n");
-+			migrate_image_to_miptree(t->mt, image, face, level);
-+		}
-+	}
-+
-+	/* Configure the hardware registers (more precisely, the cached version
-+	 * of the hardware registers). */
-+	setup_hardware_state(rmesa, t);
-+
-+	t->validated = GL_TRUE;
- 	return GL_TRUE;
- }
- 
--static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
-+
-+/**
-+ * Ensure all enabled and complete textures are uploaded.
-+ */
-+void r300ValidateTextures(GLcontext * ctx)
- {
--	r300ContextPtr rmesa = R300_CONTEXT(ctx);
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--	struct gl_texture_object *tObj = texUnit->_Current;
--	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
-+	int i;
- 
--	/* Fallback if there's a texture border */
--	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
--		return GL_FALSE;
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
-+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
-+			continue;
- 
--	/* Update state if this is a different texture object to last
--	 * time.
--	 */
--	if (rmesa->state.texture.unit[unit].texobj != t) {
--		if (rmesa->state.texture.unit[unit].texobj != NULL) {
--			/* The old texture is no longer bound to this texture unit.
--			 * Mark it as such.
--			 */
--
--			rmesa->state.texture.unit[unit].texobj->base.bound &=
--			    ~(1 << unit);
-+		if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
-+			_mesa_warning(ctx,
-+				      "failed to validate texture for unit %d.\n",
-+				      i);
- 		}
--
--		rmesa->state.texture.unit[unit].texobj = t;
--		t->base.bound |= (1 << unit);
--		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
- 	}
--
--	return !t->border_fallback;
- }
- 
- void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
-@@ -591,20 +421,18 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
- 	struct gl_texture_object *tObj =
- 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
--	r300TexObjPtr t;
-+	r300TexObjPtr t = r300_tex_obj(tObj);
- 	uint32_t pitch_val;
- 
- 	if (!tObj)
- 		return;
- 
--	t = (r300TexObjPtr) tObj->DriverData;
--
- 	t->image_override = GL_TRUE;
- 
- 	if (!offset)
- 		return;
--
--	t->offset = offset;
-+    t->bo = NULL;
-+	t->override_offset = offset;
- 	t->pitch_reg &= (1 << 13) -1;
- 	pitch_val = pitch;
- 
-@@ -631,38 +459,96 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
- 	t->pitch_reg |= pitch_val;
- }
- 
--static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
-+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
- {
--	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
--
--	if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
--		return (r300EnableTextureRect(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
--		return (r300EnableTexture2D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
--		return (r300EnableTexture3D(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
--		return (r300EnableTextureCube(ctx, unit) &&
--			r300UpdateTexture(ctx, unit));
--	} else if (texUnit->_ReallyEnabled) {
--		return GL_FALSE;
--	} else {
--		return GL_TRUE;
--	}
--}
--
--void r300UpdateTextureState(GLcontext * ctx)
--{
--	int i;
-+    struct gl_texture_unit *texUnit;
-+    struct gl_texture_object *texObj;
-+    struct gl_texture_image *texImage;
-+	struct radeon_renderbuffer *rb;
-+	r300_texture_image *rImage;
-+	radeonContextPtr radeon;
-+	r300ContextPtr rmesa;
-+	GLframebuffer *fb;
-+	r300TexObjPtr t;
-+	uint32_t pitch_val;
- 
--	for (i = 0; i < 8; i++) {
--		if (!r300UpdateTextureUnit(ctx, i)) {
--			_mesa_warning(ctx,
--				      "failed to update texture state for unit %d.\n",
--				      i);
--		}
-+    target = GL_TEXTURE_RECTANGLE_ARB;
-+	radeon = pDRICtx->driverPrivate;
-+	rmesa = pDRICtx->driverPrivate;
-+	fb = dPriv->driverPrivate;
-+    texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
-+    texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
-+    texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
-+	rImage = get_r300_texture_image(texImage);
-+	t = r300_tex_obj(texObj);
-+    if (t == NULL) {
-+        return;
-+    }
-+
-+    radeon_update_renderbuffers(pDRICtx, dPriv);
-+    /* back & depth buffer are useless free them right away */
-+    rb = (void*)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void*)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void*)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+    if (rb->bo == NULL) {
-+        /* Failed to BO for the buffer */
-+        return;
-+    }
-+
-+    _mesa_lock_texture(radeon->glCtx, texObj);
-+    if (t->bo) {
-+        t->bo = NULL;
-+    }
-+    if (t->mt) {
-+        t->mt = NULL;
-+    }
-+    if (rImage->mt) {
-+        r300_miptree_unreference(rImage->mt);
-+        rImage->mt = NULL;
-+    }
-+    _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
-+                               rb->width, rb->height, rb->cpp, 0, rb->cpp);
-+	texImage->TexFormat = &_mesa_texformat_rgba8888_rev;
-+    rImage->bo = rb->bo;
-+
-+    t->bo = rb->bo;
-+    t->tile_bits = 0;
-+	t->image_override = GL_TRUE;
-+	t->override_offset = 0;
-+	t->pitch_reg &= (1 << 13) -1;
-+	pitch_val = rb->pitch;
-+	switch (rb->cpp) {
-+	case 4:
-+		t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
-+		t->filter |= tx_table[2].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 3:
-+	default:
-+		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
-+		t->filter |= tx_table[4].filter;
-+		pitch_val /= 4;
-+		break;
-+	case 2:
-+		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
-+		t->filter |= tx_table[5].filter;
-+		pitch_val /= 2;
-+		break;
- 	}
-+	pitch_val--;
-+	t->size = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
-+              ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
-+    t->size |= R300_TX_SIZE_TXPITCH_EN;
-+	t->pitch_reg |= pitch_val;
-+	t->validated = GL_TRUE;
-+    _mesa_unlock_texture(radeon->glCtx, texObj);
-+    return;
- }
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
-index 5267fe9..2bb679b 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.c
-+++ b/src/mesa/drivers/dri/r300/radeon_context.c
-@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "drivers/common/driverfuncs.h"
- #include "swrast/swrast.h"
- 
-+#include "radeon_buffer.h"
- #include "radeon_screen.h"
- #include "radeon_ioctl.h"
- #include "radeon_macros.h"
-@@ -57,6 +58,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "utils.h"
- #include "vblank.h"
- #include "xmlpool.h"		/* for symbolic values of enum-type options */
-+#include "drirenderbuffer.h"
- 
- #define DRIVER_DATE "20060815"
- 
-@@ -189,6 +191,43 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
-  */
- void radeonCleanupContext(radeonContextPtr radeon)
- {
-+    FILE *track;
-+	struct radeon_renderbuffer *rb;
-+	GLframebuffer *fb;
-+    
-+    fb = (void*)radeon->dri.drawable->driverPrivate;
-+    rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    fb = (void*)radeon->dri.readable->driverPrivate;
-+    rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+
- 	/* _mesa_destroy_context() might result in calls to functions that
- 	 * depend on the DriverCtx, so don't set it to NULL before.
- 	 *
-@@ -202,6 +241,11 @@ void radeonCleanupContext(radeonContextPtr radeon)
- 		FREE(radeon->state.scissor.pClipRects);
- 		radeon->state.scissor.pClipRects = 0;
- 	}
-+    track = fopen("/tmp/tracklog", "w");
-+    if (track) {
-+        radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
-+        fclose(track);
-+    }
- }
- 
- 
-@@ -218,7 +262,7 @@ void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
- 		ctx = radeon->glCtx;
- 
- 		if (ctx->Visual.doubleBufferMode) {
--			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
-+			_mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
- 			if (radeon->doPageFlip) {
- 				radeonPageFlip(dPriv);
- 			} else {
-@@ -258,6 +302,232 @@ void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
-     }
- }
- 
-+static void
-+radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
-+					GLframebuffer *draw)
-+{
-+	/* if radeon->fake */
-+	struct radeon_renderbuffer *rb;
-+
-+	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->frontOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->backOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset,
-+						0,
-+						0,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+}
-+
-+static void
-+radeon_make_renderbuffer_current(radeonContextPtr radeon,
-+					GLframebuffer *draw)
-+{
-+	int size = 4096*4096*4;
-+	/* if radeon->fake */
-+	struct radeon_renderbuffer *rb;
-+
-+	if (radeon->radeonScreen->kernel_mm) {
-+		radeon_make_kernel_renderbuffer_current(radeon, draw);
-+		return;
-+	}
-+			
-+
-+	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->frontOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->backOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
-+	}
-+	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+		if (!rb->bo) {
-+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+						radeon->radeonScreen->depthOffset +
-+						radeon->radeonScreen->fbLocation,
-+						size,
-+						4096,
-+						RADEON_GEM_DOMAIN_VRAM,
-+						0);
-+		}
-+		rb->cpp = radeon->radeonScreen->cpp;
-+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
-+	}
-+}
-+
-+
-+void
-+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
-+{
-+    unsigned int attachments[10];
-+    __DRIbuffer *buffers;
-+    __DRIscreen *screen;
-+	struct radeon_renderbuffer *rb;
-+    int i, count;
-+	GLframebuffer *draw;
-+	radeonContextPtr radeon;
-+
-+	draw = drawable->driverPrivate;
-+    screen = context->driScreenPriv;
-+	radeon = (radeonContextPtr) context->driverPrivate;
-+    i = 0;
-+	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
-+        attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
-+    }
-+	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
-+        attachments[i++] = __DRI_BUFFER_BACK_LEFT;
-+    }
-+	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
-+        attachments[i++] = __DRI_BUFFER_DEPTH;
-+    }
-+
-+    buffers = (*screen->dri2.loader->getBuffers)(drawable,
-+                                                 &drawable->w,
-+                                                 &drawable->h,
-+                                                 attachments, i,
-+                                                 &count,
-+                                                 drawable->loaderPrivate);
-+    if (buffers == NULL)
-+        return;
-+
-+    /* set one cliprect to cover the whole drawable */
-+    drawable->x = 0;
-+    drawable->y = 0;
-+    drawable->backX = 0;
-+    drawable->backY = 0;
-+    drawable->numClipRects = 1;
-+    drawable->pClipRects[0].x1 = 0;
-+    drawable->pClipRects[0].y1 = 0;
-+    drawable->pClipRects[0].x2 = drawable->w;
-+    drawable->pClipRects[0].y2 = drawable->h;
-+    drawable->numBackClipRects = 1;
-+    drawable->pBackClipRects[0].x1 = 0;
-+    drawable->pBackClipRects[0].y1 = 0;
-+    drawable->pBackClipRects[0].x2 = drawable->w;
-+    drawable->pBackClipRects[0].y2 = drawable->h;
-+    for (i = 0; i < count; i++) {
-+        switch (buffers[i].attachment) {
-+        case __DRI_BUFFER_FRONT_LEFT:
-+            rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+            if (rb->bo) {
-+                radeon_bo_unref(rb->bo);
-+                rb->bo = NULL;
-+            }
-+            rb->cpp = buffers[i].cpp;
-+            rb->pitch = buffers[i].pitch;
-+            rb->width = drawable->w;
-+            rb->height = drawable->h;
-+            rb->has_surface = 0;
-+            rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+                                    buffers[i].name,
-+                                    0,
-+                                    0,
-+                                    RADEON_GEM_DOMAIN_VRAM,
-+                                    buffers[i].flags);
-+            if (rb->bo == NULL) {
-+                fprintf(stderr, "failled to attach front %d\n",
-+                        buffers[i].name);
-+            }
-+            break;
-+        case __DRI_BUFFER_BACK_LEFT:
-+            rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+            if (rb->bo) {
-+                radeon_bo_unref(rb->bo);
-+                rb->bo = NULL;
-+            }
-+            rb->cpp = buffers[i].cpp;
-+            rb->pitch = buffers[i].pitch;
-+            rb->width = drawable->w;
-+            rb->height = drawable->h;
-+            rb->has_surface = 0;
-+            rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+                                    buffers[i].name,
-+                                    0,
-+                                    0,
-+                                    RADEON_GEM_DOMAIN_VRAM,
-+                                    buffers[i].flags);
-+            break;
-+        case __DRI_BUFFER_DEPTH:
-+            rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer;
-+            if (rb->bo) {
-+                radeon_bo_unref(rb->bo);
-+                rb->bo = NULL;
-+            }
-+            rb->cpp = buffers[i].cpp;
-+            rb->pitch = buffers[i].pitch;
-+            rb->width = drawable->w;
-+            rb->height = drawable->h;
-+            rb->has_surface = 0;
-+            rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
-+                                    buffers[i].name,
-+                                    0,
-+                                    0,
-+                                    RADEON_GEM_DOMAIN_VRAM,
-+                                    buffers[i].flags);
-+            break;
-+        case __DRI_BUFFER_STENCIL:
-+            break;
-+        case __DRI_BUFFER_ACCUM:
-+        default:
-+            fprintf(stderr,
-+                    "unhandled buffer attach event, attacment type %d\n",
-+                    buffers[i].attachment);
-+            return;
-+        }
-+    }
-+	radeon = (radeonContextPtr) context->driverPrivate;
-+	driUpdateFramebufferSize(radeon->glCtx, drawable);
-+}
-+
-+
- /* Force the context `c' to be the current context and associate with it
-  * buffer `b'.
-  */
-@@ -265,51 +535,71 @@ GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- 			    __DRIdrawablePrivate * driDrawPriv,
- 			    __DRIdrawablePrivate * driReadPriv)
- {
--	if (driContextPriv) {
--		radeonContextPtr radeon =
--			(radeonContextPtr) driContextPriv->driverPrivate;
-+	radeonContextPtr radeon;
-+	GLframebuffer *dfb, *rfb;
- 
-+	if (!driContextPriv) {
- 		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
--				radeon->glCtx);
--
--		if (radeon->dri.drawable != driDrawPriv) {
--			if (driDrawPriv->swap_interval == (unsigned)-1) {
--				driDrawPriv->vblFlags =
--					(radeon->radeonScreen->irq != 0)
--					? driGetDefaultVBlankFlags(&radeon->
--								   optionCache)
--					: VBLANK_FLAG_NO_IRQ;
-+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
-+		_mesa_make_current(NULL, NULL, NULL);
-+		return GL_TRUE;
-+	}
-+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
-+	dfb = driDrawPriv->driverPrivate;
-+	rfb = driReadPriv->driverPrivate;
-+
-+	if (driContextPriv->driScreenPriv->dri2.enabled) {    
-+		radeon_update_renderbuffers(driContextPriv, driDrawPriv);
-+		if (driDrawPriv != driReadPriv)
-+			radeon_update_renderbuffers(driContextPriv, driReadPriv);
-+		radeon->state.color.rrb =
-+			(void *)dfb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+		radeon->state.depth_buffer =
-+			(void *)dfb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+	}
- 
--				driDrawableInitVBlank(driDrawPriv);
--			}
--		}
- 
--		radeon->dri.readable = driReadPriv;
-+	if (RADEON_DEBUG & DEBUG_DRI)
-+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__, radeon->glCtx);
- 
--		if (radeon->dri.drawable != driDrawPriv ||
--		    radeon->lastStamp != driDrawPriv->lastStamp) {
--			radeon->dri.drawable = driDrawPriv;
-+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
-+	if (driReadPriv != driDrawPriv)
-+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
-+
-+	if (!driContextPriv->driScreenPriv->dri2.enabled) {
-+		radeon_make_renderbuffer_current(radeon, dfb);
-+	}
-+	
-+	_mesa_make_current(radeon->glCtx, dfb, rfb);
-+
-+	if (radeon->dri.drawable != driDrawPriv) {
-+		if (driDrawPriv->swap_interval == (unsigned)-1) {
-+			driDrawPriv->vblFlags =
-+				(radeon->radeonScreen->irq != 0)
-+				? driGetDefaultVBlankFlags(&radeon->
-+							   optionCache)
-+					: VBLANK_FLAG_NO_IRQ;
- 
--			radeonSetCliprects(radeon);
--			r300UpdateViewportOffset(radeon->glCtx);
-+			driDrawableInitVBlank(driDrawPriv);
- 		}
-+	}
- 
--		_mesa_make_current(radeon->glCtx,
--				    (GLframebuffer *) driDrawPriv->
--				    driverPrivate,
--				    (GLframebuffer *) driReadPriv->
--				    driverPrivate);
-+	radeon->dri.readable = driReadPriv;
- 
--		_mesa_update_state(radeon->glCtx);		
-+	if (radeon->dri.drawable != driDrawPriv ||
-+	    radeon->lastStamp != driDrawPriv->lastStamp) {
-+		radeon->dri.drawable = driDrawPriv;
- 
--		radeonUpdatePageFlipping(radeon);
--	} else {
--		if (RADEON_DEBUG & DEBUG_DRI)
--			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
--		_mesa_make_current(0, 0, 0);
-+		radeonSetCliprects(radeon);
-+		r300UpdateViewportOffset(radeon->glCtx);
- 	}
- 
-+	_mesa_update_state(radeon->glCtx);
-+
-+    if (!driContextPriv->driScreenPriv->dri2.enabled) {    
-+	    radeonUpdatePageFlipping(radeon);
-+    }
-+
- 	if (RADEON_DEBUG & DEBUG_DRI)
- 		fprintf(stderr, "End %s\n", __FUNCTION__);
- 	return GL_TRUE;
-diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
-index 47cbc22..d5bbf29 100644
---- a/src/mesa/drivers/dri/r300/radeon_context.h
-+++ b/src/mesa/drivers/dri/r300/radeon_context.h
-@@ -132,12 +132,13 @@ struct radeon_scissor_state {
- 
- struct radeon_colorbuffer_state {
- 	GLuint clear;
--	GLint drawOffset, drawPitch;
-+	struct radeon_renderbuffer *rrb;
- };
- 
- struct radeon_state {
- 	struct radeon_colorbuffer_state color;
- 	struct radeon_scissor_state scissor;
-+	struct radeon_renderbuffer *depth_buffer;
- };
- 
- /**
-@@ -202,6 +203,7 @@ extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
- 				   __DRIdrawablePrivate * driDrawPriv,
- 				   __DRIdrawablePrivate * driReadPriv);
- extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
-+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable);
- 
- /* ================================================================
-  * Debugging:
-diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-index 36502eb..c8d6bf9 100644
---- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
-+++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c
-@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "swrast/swrast.h"
- #include "r300_context.h"
- #include "radeon_ioctl.h"
-+#include "radeon_buffer.h"
- #include "r300_ioctl.h"
- #include "r300_state.h"
- #include "radeon_reg.h"
-@@ -171,7 +172,7 @@ void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
- 	assert(dPriv->driContextPriv->driverPrivate);
- 
- 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
--
-+	
- 	if (RADEON_DEBUG & DEBUG_IOCTL) {
- 		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
- 			(void *)radeon->glCtx);
-@@ -261,6 +262,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 	GLint ret;
- 	GLboolean missed_target;
- 	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
-+	GLframebuffer *fb = dPriv->driverPrivate;
-+	struct radeon_renderbuffer *rrb;
- 
- 	assert(dPriv);
- 	assert(dPriv->driContextPriv);
-@@ -268,6 +271,8 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 
- 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
- 
-+	rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
- 	if (RADEON_DEBUG & DEBUG_IOCTL) {
- 		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
- 			radeon->sarea->pfCurrentPage);
-@@ -315,32 +320,10 @@ void radeonPageFlip(__DRIdrawablePrivate * dPriv)
- 	radeon->swap_count++;
- 	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
- 
--        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
-+        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer,
-                              radeon->sarea->pfCurrentPage);
- 
--	if (radeon->sarea->pfCurrentPage == 1) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	}
--
--	if (IS_R300_CLASS(radeon->radeonScreen)) {
--		r300ContextPtr r300 = (r300ContextPtr)radeon;
--		R300_STATECHANGE(r300, cb);
--		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
--						r300->radeon.radeonScreen->fbLocation;
--		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
--		
--		if (r300->radeon.radeonScreen->cpp == 4)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
--		else
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
--	
--		if (r300->radeon.sarea->tiling_enabled)
--			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
--	}
-+	radeon->state.color.rrb = rrb;
- }
- 
- void radeonWaitForIdleLocked(radeonContextPtr radeon)
-@@ -391,6 +374,7 @@ void radeonFinish(GLcontext * ctx)
- 		radeonEmitIrqLocked(radeon);
- 		UNLOCK_HARDWARE(radeon);
- 		radeonWaitIrq(radeon);
--	} else
-+	} else {
- 		radeonWaitForIdle(radeon);
-+	}
- }
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
-index 4f47afd..a1b2163 100644
---- a/src/mesa/drivers/dri/r300/radeon_lock.c
-+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
-@@ -59,11 +59,11 @@ int prevLockLine = 0;
- void radeonUpdatePageFlipping(radeonContextPtr rmesa)
- {
- 	int use_back;
-+	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
-+	GLframebuffer *fb = drawable->driverPrivate;
- 
- 	rmesa->doPageFlip = rmesa->sarea->pfState;
- 	if (rmesa->glCtx->WinSysDrawBuffer) {
--		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
--				     rmesa->sarea->pfCurrentPage);
- 		r300UpdateDrawBuffer(rmesa->glCtx);
- 	}
- 
-@@ -72,16 +72,12 @@ void radeonUpdatePageFlipping(radeonContextPtr rmesa)
- 	     BUFFER_BACK_LEFT) : 1;
- 	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
- 
--	if (use_back) {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->backOffset;
--		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
--	} else {
--		rmesa->state.color.drawOffset =
--		    rmesa->radeonScreen->frontOffset;
--		rmesa->state.color.drawPitch =
--		    rmesa->radeonScreen->frontPitch;
--	}
-+	if (use_back)
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+	else
-+		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+
-+	rmesa->state.depth_buffer = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
- }
- 
- /* Update the hardware state.  This is called if another context has
-@@ -98,7 +94,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
- 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
- 	drm_radeon_sarea_t *sarea = rmesa->sarea;
--	r300ContextPtr r300 = (r300ContextPtr) rmesa;
- 
- 	assert(drawable != NULL);
- 
-@@ -125,12 +120,9 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
- 	}
- 
- 	if (sarea->ctx_owner != rmesa->dri.hwContext) {
--		int i;
--
- 		sarea->ctx_owner = rmesa->dri.hwContext;
--		for (i = 0; i < r300->nr_heaps; i++) {
--			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
--		}
-+		if (!rmesa->radeonScreen->kernel_mm)
-+		    radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
- 	}
- 
- 	rmesa->lost_context = GL_TRUE;
-diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
-index a344837..eaef49c 100644
---- a/src/mesa/drivers/dri/r300/radeon_lock.h
-+++ b/src/mesa/drivers/dri/r300/radeon_lock.h
-@@ -97,19 +97,23 @@ extern int prevLockLine;
- 	do {								\
- 		char __ret = 0;						\
- 		DEBUG_CHECK_LOCK();					\
-+        if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \
- 		DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext,	\
- 			(DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
- 		if (__ret)						\
- 			radeonGetLock((rmesa), 0);			\
-+        }\
- 		DEBUG_LOCK();						\
- 	} while (0)
- 
- #define UNLOCK_HARDWARE( rmesa )					\
- 	do {								\
-+        if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) { \
- 		DRM_UNLOCK((rmesa)->dri.fd,				\
- 			(rmesa)->dri.hwLock,				\
- 			(rmesa)->dri.hwContext);			\
- 		DEBUG_RESET();						\
-+        }\
- 	} while (0)
- 
- #endif				/* __RADEON_LOCK_H__ */
-diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
-index 16f9fb9..30dde80 100644
---- a/src/mesa/drivers/dri/r300/radeon_span.c
-+++ b/src/mesa/drivers/dri/r300/radeon_span.c
-@@ -48,7 +48,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "r300_ioctl.h"
- #include "radeon_span.h"
- 
--#include "drirenderbuffer.h"
-+#include "radeon_buffer.h"
- 
- #define DBG 0
- 
-@@ -58,21 +58,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * information.
-  */
- #define LOCAL_VARS						\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
-+   struct radeon_renderbuffer *rrb = (void *) rb;		\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
-    const GLuint bottom = dPriv->h - 1;				\
--   GLubyte *buf = (GLubyte *) drb->flippedData			\
--      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
--   GLuint p;							\
--   (void) p;
-+   GLuint p;						\
-+   (void)p;
- 
- #define LOCAL_DEPTH_VARS				\
--   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
--   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
-+   struct radeon_renderbuffer *rrb = (void *) rb;	\
-+   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
-    const GLuint bottom = dPriv->h - 1;			\
-    GLuint xo = dPriv->x;				\
--   GLuint yo = dPriv->y;				\
--   GLubyte *buf = (GLubyte *) drb->Base.Data;
-+   GLuint yo = dPriv->y;
- 
- #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
- 
-@@ -82,6 +79,133 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define HW_UNLOCK()
- 
-+static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
-+                             GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 5;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 5) << 11;
-+                offset += ((x & 31) >> 2) << 5;
-+                offset += (x & 3) << 2;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 3) << 5;
-+            offset += (x & 7) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
-+                             GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                nmacroblkpl = rrb->pitch >> 6;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x >> 6) << 11;
-+                offset += ((x & 63) >> 3) << 5;
-+                offset += (x & 7) << 1;
-+            } else {
-+                nmacroblkpl = rrb->pitch >> 7;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x >> 7) << 11;
-+                offset += ((x & 127) >> 4) << 5;
-+                offset += (x & 15) << 2;
-+            }
-+        } else {
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x >> 4) << 5;
-+            offset += (x & 15) << 2;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
-+static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
-+                           GLint x, GLint y)
-+{
-+    GLubyte *ptr = rrb->bo->ptr;
-+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
-+    GLint offset;
-+    GLint microblkxs;
-+    GLint macroblkxs;
-+    GLint nmacroblkpl;
-+    GLint nmicroblkpl;
-+
-+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
-+        offset = x * rrb->cpp + y * rrb->pitch;
-+    } else {
-+        offset = 0;
-+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
-+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
-+                microblkxs = 16 / rrb->cpp;
-+                macroblkxs = 128 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 4) * nmacroblkpl) << 11;
-+                offset += ((y & 15) >> 1) << 8;
-+                offset += (y & 1) << 4;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            } else {
-+                microblkxs = 32 / rrb->cpp;
-+                macroblkxs = 256 / rrb->cpp;
-+                nmacroblkpl = rrb->pitch / macroblkxs;
-+                offset += ((y >> 3) * nmacroblkpl) << 11;
-+                offset += (y & 7) << 8;
-+                offset += (x / macroblkxs) << 11;
-+                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
-+                offset += (x & (microblkxs - 1)) * rrb->cpp;
-+            }
-+        } else {
-+            microblkxs = 32 / rrb->cpp;
-+            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
-+            offset += (y * nmicroblkpl) << 5;
-+            offset += (x / microblkxs) << 5;
-+            offset += (x & (microblkxs - 1)) * rrb->cpp;
-+        }
-+    }
-+    return &ptr[offset];
-+}
-+
- /* ================================================================
-  * Color buffer
-  */
-@@ -93,7 +217,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_RGB565
- #define TAG2(x,y) radeon##x##_RGB565##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
-+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
- #include "spantmp2.h"
- 
- /* 32 bit, ARGB8888 color spanline and pixel functions
-@@ -103,7 +227,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #define TAG(x)    radeon##x##_ARGB8888
- #define TAG2(x,y) radeon##x##_ARGB8888##y
--#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
-+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
- #include "spantmp2.h"
- 
- /* ================================================================
-@@ -120,65 +244,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * too...
-  */
- 
--static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 4 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0..1] = 0           */
--
--#ifdef COMPILE_R300
--		ba = (y / 8) * (pitch / 8) + (x / 8);
--#else
--		ba = (y / 16) * (pitch / 16) + (x / 16);
--#endif
--
--		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
--		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
--		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
--static INLINE GLuint
--radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
--{
--	GLuint pitch = drb->pitch;
--	if (drb->depthHasSurface) {
--		return 2 * (x + y * pitch);
--	} else {
--		GLuint ba, address = 0;	/* a[0]    = 0           */
--
--		ba = (y / 16) * (pitch / 32) + (x / 32);
--
--		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
--		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
--		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
--		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
--		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
--		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
--		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
--
--		return address;
--	}
--}
--
- /* 16-bit depth buffer functions
-  */
- #define VALUE_TYPE GLushort
- 
- #define WRITE_DEPTH( _x, _y, d )					\
--   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
-+   *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
- 
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
-+   d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
- 
- #define TAG(x) radeon##x##_z16
- #include "depthtmp.h"
-@@ -193,35 +267,36 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
- #ifdef COMPILE_R300
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo );		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x000000ff;							\
-    tmp |= ((d << 8) & 0xffffff00);					\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_DEPTH( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo );		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0xff000000;							\
-    tmp |= ((d) & 0x00ffffff);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_DEPTH( d, _x, _y )						\
-   do { \
--    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
--					 _y + yo )) & 0xffffff00) >> 8; \
-+    d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0xffffff00) >> 8; \
-   }while(0)
- #else
- #define READ_DEPTH( d, _x, _y )						\
--   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
--					 _y + yo )) & 0x00ffffff;
-+   d = *(GLuint*)(radeon_ptr32(rrb, _x + xo,	_y + yo )) & 0x00ffffff;
- #endif
--
-+/*
-+    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
-+   d = *(GLuint*)(radeon_ptr(rrb, _x + xo,	_y + yo )) & 0x00ffffff;
-+*/
- #define TAG(x) radeon##x##_z24_s8
- #include "depthtmp.h"
- 
-@@ -234,35 +309,35 @@ do {									\
- #ifdef COMPILE_R300
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0xffffff00;							\
-    tmp |= (d) & 0xff;							\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #else
- #define WRITE_STENCIL( _x, _y, d )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo);		\
-+   GLuint tmp = *_ptr;				\
-    tmp &= 0x00ffffff;							\
-    tmp |= (((d) & 0xff) << 24);						\
--   *(GLuint *)(buf + offset) = tmp;					\
-+   *_ptr = tmp;					\
- } while (0)
- #endif
- 
- #ifdef COMPILE_R300
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo );		\
-+   GLuint tmp = *_ptr;				\
-    d = tmp & 0x000000ff;						\
- } while (0)
- #else
- #define READ_STENCIL( d, _x, _y )					\
- do {									\
--   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
--   GLuint tmp = *(GLuint *)(buf + offset);				\
-+   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo );		\
-+   GLuint tmp = *_ptr;				\
-    d = (tmp & 0xff000000) >> 24;					\
- } while (0)
- #endif
-@@ -270,6 +345,29 @@ do {									\
- #define TAG(x) radeon##x##_z24_s8
- #include "stenciltmp.h"
- 
-+static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
-+{
-+	struct radeon_renderbuffer *rrb = (void*)rb;
-+    int r;
-+
-+	if (rrb->bo) {
-+        r = radeon_bo_map(rrb->bo, write);
-+        if (r) {
-+            fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
-+                    __FUNCTION__, r);
-+        }
-+    }
-+}
-+
-+static void unmap_buffer(struct gl_renderbuffer *rb)
-+{
-+	struct radeon_renderbuffer *rrb = (void*)rb;
-+
-+	if (rrb->bo) {
-+        radeon_bo_unmap(rrb->bo);
-+    }
-+}
-+
- /* Move locking out to get reasonable span performance (10x better
-  * than doing this in HW_LOCK above).  WaitForIdle() is the main
-  * culprit.
-@@ -278,45 +376,63 @@ do {									\
- static void radeonSpanRenderStart(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	int i;
- #ifdef COMPILE_R300
- 	r300ContextPtr r300 = (r300ContextPtr) rmesa;
- 	R300_FIREVERTICES(r300);
- #else
- 	RADEON_FIREVERTICES(rmesa);
- #endif
--	LOCK_HARDWARE(rmesa);
--	radeonWaitForIdleLocked(rmesa);
- 
--	/* Read the first pixel in the frame buffer.  This should
--	 * be a noop, right?  In fact without this conform fails as reading
--	 * from the framebuffer sometimes produces old results -- the
--	 * on-card read cache gets mixed up and doesn't notice that the
--	 * framebuffer has been updated.
--	 *
--	 * Note that we should probably be reading some otherwise unused
--	 * region of VRAM, otherwise we might get incorrect results when
--	 * reading pixels from the top left of the screen.
--	 *
--	 * I found this problem on an R420 with glean's texCube test.
--	 * Note that the R200 span code also *writes* the first pixel in the
--	 * framebuffer, but I've found this to be unnecessary.
--	 *  -- Nicolai Hähnle, June 2008
--	 */
--	{
--		int p;
--		driRenderbuffer *drb =
--			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
--		volatile int *buf =
--			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
--		p = *buf;
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
- 	}
-+
-+	/* color draw buffers */
-+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-+		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
-+    }
-+
-+	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
-+
-+	if (ctx->DrawBuffer->_DepthBuffer) {
-+		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
-+    }
-+	if (ctx->DrawBuffer->_StencilBuffer)
-+		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
-+
-+	/* The locking and wait for idle should really only be needed in classic mode.
-+	 * In a future memory manager based implementation, this should become
-+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
-+	 * should implicitly wait for any previous rendering commands that must
-+	 * be waited on. */
-+	LOCK_HARDWARE(rmesa);
-+	radeonWaitForIdleLocked(rmesa);
- }
- 
- static void radeonSpanRenderFinish(GLcontext * ctx)
- {
- 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-+	int i;
- 	_swrast_flush(ctx);
- 	UNLOCK_HARDWARE(rmesa);
-+
-+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-+		if (ctx->Texture.Unit[i]._ReallyEnabled)
-+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
-+	}
-+
-+	/* color draw buffers */
-+	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
-+		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
-+
-+	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
-+
-+	if (ctx->DrawBuffer->_DepthBuffer)
-+		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
-+	if (ctx->DrawBuffer->_StencilBuffer)
-+		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
- }
- 
- void radeonInitSpanFuncs(GLcontext * ctx)
-@@ -330,20 +446,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
- /**
-  * Plug in the Get/Put routines for the given driRenderbuffer.
-  */
--void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
-+void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
- {
--	if (drb->Base.InternalFormat == GL_RGBA) {
--		if (vis->redBits == 5 && vis->greenBits == 6
--		    && vis->blueBits == 5) {
--			radeonInitPointers_RGB565(&drb->Base);
--		} else {
--			radeonInitPointers_ARGB8888(&drb->Base);
--		}
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
--		radeonInitDepthPointers_z16(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
--		radeonInitDepthPointers_z24_s8(&drb->Base);
--	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
--		radeonInitStencilPointers_z24_s8(&drb->Base);
-+	if (rrb->base.InternalFormat == GL_RGB5) {
-+		radeonInitPointers_RGB565(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_RGBA8) {
-+		radeonInitPointers_ARGB8888(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
-+		radeonInitDepthPointers_z16(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
-+		radeonInitDepthPointers_z24_s8(&rrb->base);
-+	} else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-+		radeonInitStencilPointers_z24_s8(&rrb->base);
- 	}
- }
-diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
-index c401da6..14d489b 100644
---- a/src/mesa/drivers/dri/r300/radeon_state.c
-+++ b/src/mesa/drivers/dri/r300/radeon_state.c
-@@ -153,6 +153,7 @@ void radeonSetCliprects(radeonContextPtr radeon)
- 	GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
- 	GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
- 
-+    if (!radeon->radeonScreen->driScreen->dri2.enabled) {
- 	if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
- 		/* Can't ignore 2d windows if we are page flipping. */
- 		if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
-@@ -168,6 +169,7 @@ void radeonSetCliprects(radeonContextPtr radeon)
- 		radeon->numClipRects = drawable->numClipRects;
- 		radeon->pClipRects = drawable->pClipRects;
- 	}
-+    }
- 
- 	if ((draw_fb->Width != drawable->w) ||
- 	    (draw_fb->Height != drawable->h)) {
-@@ -223,14 +225,6 @@ void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
- void radeonInitState(radeonContextPtr radeon)
- {
- 	radeon->Fallback = 0;
--
--	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
--		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
--	} else {
--		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
--		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
--	}
- }
- 
- 
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
-new file mode 100644
-index 0000000..3aa1d86
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
-@@ -0,0 +1,724 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Dave Airlie
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Dave Airlie
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#include <stdio.h>
-+#include <stdint.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <errno.h>
-+#include <unistd.h>
-+#include <sys/mman.h>
-+#include <sys/ioctl.h>
-+#include "xf86drm.h"
-+#include "drm.h"
-+#include "radeon_drm.h"
-+#include "radeon_bo.h"
-+#include "radeon_bo_legacy.h"
-+#include "radeon_ioctl.h"
-+#include "texmem.h"
-+
-+struct bo_legacy {
-+    struct radeon_bo    base;
-+    driTextureObject    tobj_base;
-+    int                 map_count;
-+    uint32_t            pending;
-+    int                 is_pending;
-+    int                 validated;
-+    int                 static_bo;
-+    int                 got_dri_texture_obj;
-+    int                 dirty;
-+    uint32_t            offset;
-+    driTextureObject    dri_texture_obj;
-+    void                *ptr;
-+    struct bo_legacy    *next, *prev;
-+    struct bo_legacy    *pnext, *pprev;
-+};
-+
-+struct bo_manager_legacy {
-+    struct radeon_bo_manager    base;
-+    unsigned                    nhandle;
-+    unsigned                    nfree_handles;
-+    unsigned                    cfree_handles;
-+    uint32_t                    current_age;
-+    struct bo_legacy            bos;
-+    struct bo_legacy            pending_bos;
-+    uint32_t                    fb_location;
-+    uint32_t                    texture_offset;
-+    unsigned                    dma_alloc_size;
-+    unsigned                    cpendings;
-+    driTextureObject            texture_swapped;
-+    driTexHeap                  *texture_heap;
-+    struct radeon_screen        *screen;
-+    unsigned                    *free_handles;
-+};
-+
-+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
-+{
-+    struct bo_legacy *bo_legacy;
-+
-+    bo_legacy = (struct bo_legacy*)((char*)t)-sizeof(struct radeon_bo);
-+    bo_legacy->got_dri_texture_obj = 0;
-+    bo_legacy->validated = 0;
-+}
-+
-+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
-+{
-+    uint32_t tmp;
-+
-+    *handle = 0;
-+    if (bom->nhandle == 0xFFFFFFFF) {
-+        return -EINVAL;
-+    }
-+    if (bom->cfree_handles > 0) {
-+        tmp = bom->free_handles[--bom->cfree_handles];
-+        while (!bom->free_handles[bom->cfree_handles - 1]) {
-+            bom->cfree_handles--;
-+            if (bom->cfree_handles <= 0) {
-+                bom->cfree_handles = 0;
-+            }
-+        }
-+    } else {
-+        bom->cfree_handles = 0;
-+        tmp = bom->nhandle++;
-+    }
-+    assert(tmp);
-+    *handle = tmp;
-+    return 0;
-+}
-+
-+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
-+{
-+    uint32_t *handles;
-+
-+    if (!handle) {
-+        return 0;
-+    }
-+    if (handle == (bom->nhandle - 1)) {
-+        int i;
-+
-+        bom->nhandle--;
-+        for (i = bom->cfree_handles - 1; i >= 0; i--) {
-+            if (bom->free_handles[i] == (bom->nhandle - 1)) {
-+                bom->nhandle--;
-+                bom->free_handles[i] = 0;
-+            }
-+        }
-+        while (!bom->free_handles[bom->cfree_handles - 1]) {
-+            bom->cfree_handles--;
-+            if (bom->cfree_handles <= 0) {
-+                bom->cfree_handles = 0;
-+            }
-+        }
-+        return 0;
-+    }
-+    if (bom->cfree_handles < bom->nfree_handles) {
-+        bom->free_handles[bom->cfree_handles++] = handle;
-+        return 0;
-+    }
-+    bom->nfree_handles += 0x100;
-+    handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
-+    if (handles == NULL) {
-+        bom->nfree_handles -= 0x100;
-+        return -ENOMEM;
-+    }
-+    bom->free_handles = handles;
-+    bom->free_handles[bom->cfree_handles++] = handle;
-+    return 0;
-+}
-+
-+static void legacy_get_current_age(struct bo_manager_legacy *boml)
-+{
-+    drm_radeon_getparam_t gp;
-+    int r;
-+
-+    gp.param = RADEON_PARAM_LAST_CLEAR;
-+    gp.value = (int *)&boml->current_age;
-+    r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
-+                            &gp, sizeof(gp));
-+    if (r) {
-+        fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
-+        exit(1);
-+    }
-+}
-+
-+static int legacy_is_pending(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo_legacy->is_pending <= 0) {
-+        bo_legacy->is_pending = 0;
-+        return 0;
-+    }
-+    if (boml->current_age >= bo_legacy->pending) {
-+        if (boml->pending_bos.pprev == bo_legacy) {
-+            boml->pending_bos.pprev = bo_legacy->pprev;
-+        }
-+        bo_legacy->pprev->pnext = bo_legacy->pnext;
-+        if (bo_legacy->pnext) {
-+            bo_legacy->pnext->pprev = bo_legacy->pprev;
-+        }
-+        while (bo_legacy->is_pending--) {
-+            radeon_bo_unref(bo);
-+        }
-+        bo_legacy->is_pending = 0;
-+        boml->cpendings--;
-+        return 0;
-+    }
-+    return 1;
-+}
-+
-+static int legacy_wait_pending(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (!bo_legacy->is_pending) {
-+        return 0;
-+    }
-+    /* FIXME: lockup and userspace busy looping that's all the folks */
-+    legacy_get_current_age(boml);
-+    while (legacy_is_pending(bo)) {
-+        usleep(10);
-+        legacy_get_current_age(boml);
-+    }
-+    return 0;
-+}
-+
-+static void legacy_track_pending(struct bo_manager_legacy *boml)
-+{
-+    struct bo_legacy *bo_legacy;
-+    struct bo_legacy *next;
-+
-+    legacy_get_current_age(boml);
-+    bo_legacy = boml->pending_bos.pnext;
-+    while (bo_legacy) {
-+        next = bo_legacy->pnext;
-+        if (legacy_is_pending(&(bo_legacy->base))) {
-+        }
-+        bo_legacy = next;
-+    } 
-+}
-+
-+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
-+                                     uint32_t size,
-+                                     uint32_t alignment,
-+                                     uint32_t domains,
-+                                     uint32_t flags)
-+{
-+    struct bo_legacy *bo_legacy;
-+
-+    bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
-+    if (bo_legacy == NULL) {
-+        return NULL;
-+    }
-+    bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
-+    bo_legacy->base.handle = 0;
-+    bo_legacy->base.size = size;
-+    bo_legacy->base.alignment = alignment;
-+    bo_legacy->base.domains = domains;
-+    bo_legacy->base.flags = flags;
-+    bo_legacy->base.ptr = NULL;
-+    bo_legacy->map_count = 0;
-+    bo_legacy->next = NULL;
-+    bo_legacy->prev = NULL;
-+    bo_legacy->got_dri_texture_obj = 0;
-+    bo_legacy->pnext = NULL;
-+    bo_legacy->pprev = NULL;
-+    bo_legacy->next = boml->bos.next;
-+    bo_legacy->prev = &boml->bos;
-+    boml->bos.next = bo_legacy;
-+    if (bo_legacy->next) {
-+        bo_legacy->next->prev = bo_legacy;
-+    }
-+    return bo_legacy;
-+}
-+
-+static int bo_dma_alloc(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    drm_radeon_mem_alloc_t alloc;
-+    unsigned size;
-+    int base_offset;
-+    int r;
-+
-+    /* align size on 4Kb */
-+    size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
-+    alloc.region = RADEON_MEM_REGION_GART;
-+    alloc.alignment = bo_legacy->base.alignment;
-+    alloc.size = size;
-+    alloc.region_offset = &base_offset;
-+    r = drmCommandWriteRead(bo->bom->fd,
-+                            DRM_RADEON_ALLOC,
-+                            &alloc,
-+                            sizeof(alloc));
-+    if (r) {
-+        /* ptr is set to NULL if dma allocation failed */
-+        bo_legacy->ptr = NULL;
-+        exit(0);
-+        return r;
-+    }
-+    bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
-+    bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
-+    bo->size = size;
-+    boml->dma_alloc_size += size;
-+    return 0;
-+}
-+
-+static int bo_dma_free(struct radeon_bo *bo)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    drm_radeon_mem_free_t memfree;
-+    int r;
-+
-+    if (bo_legacy->ptr == NULL) {
-+        /* ptr is set to NULL if dma allocation failed */
-+        return 0;
-+    }
-+    legacy_get_current_age(boml);
-+    memfree.region = RADEON_MEM_REGION_GART;
-+    memfree.region_offset  = bo_legacy->offset;
-+    memfree.region_offset -= boml->screen->gart_texture_offset;
-+    r = drmCommandWrite(boml->base.fd,
-+                        DRM_RADEON_FREE,
-+                        &memfree,
-+                        sizeof(memfree));
-+    if (r) {
-+        fprintf(stderr, "Failed to free bo[%p] at %08x\n",
-+                &bo_legacy->base, memfree.region_offset);
-+        fprintf(stderr, "ret = %s\n", strerror(-r));
-+        return r;
-+    }
-+    boml->dma_alloc_size -= bo_legacy->base.size;
-+    return 0;
-+}
-+
-+static void bo_free(struct bo_legacy *bo_legacy)
-+{
-+    struct bo_manager_legacy *boml;
-+
-+    if (bo_legacy == NULL) {
-+        return;
-+    }
-+    boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
-+    bo_legacy->prev->next = bo_legacy->next;
-+    if (bo_legacy->next) {
-+        bo_legacy->next->prev = bo_legacy->prev;
-+    }
-+    if (!bo_legacy->static_bo) {
-+        legacy_free_handle(boml, bo_legacy->base.handle);
-+        if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
-+            /* dma buffers */
-+            bo_dma_free(&bo_legacy->base);
-+        } else {
-+            /* free backing store */
-+            free(bo_legacy->ptr);
-+        }
-+    }
-+    memset(bo_legacy, 0 , sizeof(struct bo_legacy));
-+    free(bo_legacy);
-+}
-+
-+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
-+                                 uint32_t handle,
-+                                 uint32_t size,
-+                                 uint32_t alignment,
-+                                 uint32_t domains,
-+                                 uint32_t flags)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    struct bo_legacy *bo_legacy;
-+    int r;
-+
-+    if (handle) {
-+        bo_legacy = boml->bos.next;
-+        while (bo_legacy) {
-+            if (bo_legacy->base.handle == handle) {
-+                radeon_bo_ref(&(bo_legacy->base));
-+                return (struct radeon_bo*)bo_legacy;
-+            }
-+            bo_legacy = bo_legacy->next;
-+        }
-+        return NULL;
-+    }
-+
-+    bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
-+    bo_legacy->static_bo = 0;
-+    r = legacy_new_handle(boml, &bo_legacy->base.handle);
-+    if (r) {
-+        bo_free(bo_legacy);
-+        return NULL;
-+    }
-+    if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
-+        legacy_track_pending(boml);
-+        /* dma buffers */
-+        r = bo_dma_alloc(&(bo_legacy->base));
-+        if (r) {
-+            fprintf(stderr, "Ran out of GART memory (for %d)!\n", size);
-+            fprintf(stderr, "Please consider adjusting GARTSize option.\n");
-+            bo_free(bo_legacy);
-+            exit(-1);
-+            return NULL;
-+        }
-+    } else {
-+        bo_legacy->ptr = malloc(bo_legacy->base.size);
-+        if (bo_legacy->ptr == NULL) {
-+            bo_free(bo_legacy);
-+            return NULL;
-+        }
-+    }
-+    radeon_bo_ref(&(bo_legacy->base));
-+    return (struct radeon_bo*)bo_legacy;
-+}
-+
-+static void bo_ref(struct radeon_bo *bo)
-+{
-+}
-+
-+static struct radeon_bo *bo_unref(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo->cref <= 0) {
-+        bo_legacy->prev->next = bo_legacy->next;
-+        if (bo_legacy->next) {
-+            bo_legacy->next->prev = bo_legacy->prev;
-+        }
-+        if (!bo_legacy->is_pending) {
-+            bo_free(bo_legacy);
-+        }
-+        return NULL;
-+    }
-+    return bo;
-+}
-+
-+static int bo_map(struct radeon_bo *bo, int write)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    
-+    legacy_wait_pending(bo);
-+    bo_legacy->validated = 0;
-+    bo_legacy->dirty = 1;
-+    bo_legacy->map_count++;
-+    bo->ptr = bo_legacy->ptr;
-+    /* Read the first pixel in the frame buffer.  This should
-+     * be a noop, right?  In fact without this conform fails as reading
-+     * from the framebuffer sometimes produces old results -- the
-+     * on-card read cache gets mixed up and doesn't notice that the
-+     * framebuffer has been updated.
-+     *
-+     * Note that we should probably be reading some otherwise unused
-+     * region of VRAM, otherwise we might get incorrect results when
-+     * reading pixels from the top left of the screen.
-+     *
-+     * I found this problem on an R420 with glean's texCube test.
-+     * Note that the R200 span code also *writes* the first pixel in the
-+     * framebuffer, but I've found this to be unnecessary.
-+     *  -- Nicolai Hähnle, June 2008
-+     */
-+    {
-+        int p;
-+        volatile int *buf = (int*)boml->screen->driScreen->pFB;
-+        p = *buf;
-+    }
-+
-+    return 0;
-+}
-+
-+static int bo_unmap(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (--bo_legacy->map_count > 0) {
-+        return 0;
-+    }
-+    bo->ptr = NULL;
-+    return 0;
-+}
-+
-+static struct radeon_bo_funcs bo_legacy_funcs = {
-+    bo_open,
-+    bo_ref,
-+    bo_unref,
-+    bo_map,
-+    bo_unmap
-+};
-+
-+static int bo_vram_validate(struct radeon_bo *bo,
-+                            uint32_t *soffset,
-+                            uint32_t *eoffset)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    int r;
-+    
-+    if (!bo_legacy->got_dri_texture_obj) {
-+        make_empty_list(&bo_legacy->dri_texture_obj);
-+        bo_legacy->dri_texture_obj.totalSize = bo->size;
-+        r = driAllocateTexture(&boml->texture_heap, 1,
-+                               &bo_legacy->dri_texture_obj);
-+        if (r) {
-+            uint8_t *segfault=NULL;
-+            fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
-+            *segfault=1;
-+            return -1;
-+        }
-+        bo_legacy->offset = boml->texture_offset +
-+                            bo_legacy->dri_texture_obj.memBlock->ofs;
-+        bo_legacy->got_dri_texture_obj = 1;
-+        bo_legacy->dirty = 1;
-+    }
-+    if (bo_legacy->dirty) {
-+        /* Copy to VRAM using a blit.
-+         * All memory is 4K aligned. We're using 1024 pixels wide blits.
-+         */
-+        drm_radeon_texture_t tex;
-+        drm_radeon_tex_image_t tmp;
-+        int ret;
-+
-+        tex.offset = bo_legacy->offset;
-+        tex.image = &tmp;
-+        assert(!(tex.offset & 1023));
-+
-+        tmp.x = 0;
-+        tmp.y = 0;
-+        if (bo->size < 4096) {
-+            tmp.width = (bo->size + 3) / 4;
-+            tmp.height = 1;
-+        } else {
-+            tmp.width = 1024;
-+            tmp.height = (bo->size + 4095) / 4096;
-+        }
-+        tmp.data = bo_legacy->ptr;
-+        tex.format = RADEON_TXFORMAT_ARGB8888;
-+        tex.width = tmp.width;
-+        tex.height = tmp.height;
-+        tex.pitch = MAX2(tmp.width / 16, 1);
-+        do {
-+            ret = drmCommandWriteRead(bo->bom->fd,
-+                                      DRM_RADEON_TEXTURE,
-+                                      &tex,
-+                                      sizeof(drm_radeon_texture_t));
-+            if (ret) {
-+                if (RADEON_DEBUG & DEBUG_IOCTL)
-+                    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
-+                usleep(1);
-+            }
-+        } while (ret == -EAGAIN);
-+        bo_legacy->dirty = 0;
-+    }
-+    return 0;
-+}
-+
-+int radeon_bo_legacy_validate(struct radeon_bo *bo,
-+                              uint32_t *soffset,
-+                              uint32_t *eoffset)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+    int r;
-+
-+    if (bo_legacy->map_count) {
-+        fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
-+                bo, bo->size, bo_legacy->map_count);
-+        return -EINVAL;
-+    }
-+    if (bo_legacy->static_bo || bo_legacy->validated) {
-+        *soffset = bo_legacy->offset;
-+        *eoffset = bo_legacy->offset + bo->size;
-+        return 0;
-+    }
-+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
-+        r = bo_vram_validate(bo, soffset, eoffset);
-+        if (r) {
-+            return r;
-+        }
-+    }
-+    *soffset = bo_legacy->offset;
-+    *eoffset = bo_legacy->offset + bo->size;
-+    bo_legacy->validated = 1;
-+    return 0;
-+}
-+
-+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    bo_legacy->pending = pending;
-+    bo_legacy->is_pending += 1;
-+    /* add to pending list */
-+    radeon_bo_ref(bo);
-+    if (bo_legacy->is_pending > 1) {
-+        return;    
-+    }
-+    bo_legacy->pprev = boml->pending_bos.pprev;
-+    bo_legacy->pnext = NULL;
-+    bo_legacy->pprev->pnext = bo_legacy;
-+    boml->pending_bos.pprev = bo_legacy;
-+    boml->cpendings++;
-+}
-+
-+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    struct bo_legacy *bo_legacy;
-+
-+    if (bom == NULL) {
-+        return;
-+    }
-+    bo_legacy = boml->bos.next;
-+    while (bo_legacy) {
-+        struct bo_legacy *next;
-+
-+        next = bo_legacy->next;
-+        bo_free(bo_legacy);
-+        bo_legacy = next;
-+    }
-+    free(boml->free_handles);
-+    free(boml);
-+}
-+
-+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
-+{
-+    struct bo_manager_legacy *bom;
-+    struct bo_legacy *bo;
-+    unsigned size;
-+
-+    bom = (struct bo_manager_legacy*)
-+          calloc(1, sizeof(struct bo_manager_legacy));
-+    if (bom == NULL) {
-+        return NULL;
-+    }
-+
-+    bom->texture_heap = driCreateTextureHeap(0,
-+                                             bom,
-+                                             scrn->texSize[0],
-+                                             12,
-+                                             RADEON_NR_TEX_REGIONS,
-+                                             (drmTextureRegionPtr)scrn->sarea->tex_list[0],
-+                                             &scrn->sarea->tex_age[0],
-+                                             &bom->texture_swapped,
-+                                             sizeof(struct bo_legacy),
-+                                             &bo_legacy_tobj_destroy);
-+    bom->texture_offset = scrn->texOffset[0];
-+
-+    bom->base.funcs = &bo_legacy_funcs;
-+    bom->base.fd = scrn->driScreen->fd;
-+    bom->bos.next = NULL;
-+    bom->bos.prev = NULL;
-+    bom->pending_bos.pprev = &bom->pending_bos;
-+    bom->pending_bos.pnext = NULL;
-+    bom->screen = scrn;
-+    bom->fb_location = scrn->fbLocation;
-+    bom->nhandle = 1;
-+    bom->cfree_handles = 0;
-+    bom->nfree_handles = 0x400;
-+    bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
-+    if (bom->free_handles == NULL) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+
-+    /* biggest framebuffer size */
-+    size = 4096*4096*4; 
-+    /* allocate front */
-+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+    if (bo == NULL) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
-+    }
-+    bo->static_bo = 1;
-+    bo->offset = bom->screen->frontOffset + bom->fb_location;
-+    bo->base.handle = bo->offset;
-+    bo->ptr = scrn->driScreen->pFB + bom->screen->frontOffset;
-+    if (bo->base.handle > bom->nhandle) {
-+        bom->nhandle = bo->base.handle + 1;
-+    }
-+    /* allocate back */
-+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+    if (bo == NULL) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
-+    }
-+    bo->static_bo = 1;
-+    bo->offset = bom->screen->backOffset + bom->fb_location;
-+    bo->base.handle = bo->offset;
-+    bo->ptr = scrn->driScreen->pFB + bom->screen->backOffset;
-+    if (bo->base.handle > bom->nhandle) {
-+        bom->nhandle = bo->base.handle + 1;
-+    }
-+    /* allocate depth */
-+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-+    if (bo == NULL) {
-+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
-+        return NULL;
-+    }
-+    bo->base.flags = 0;
-+    if (scrn->sarea->tiling_enabled) {
-+        bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
-+        bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
-+    }
-+    bo->static_bo = 1;
-+    bo->offset = bom->screen->depthOffset + bom->fb_location;
-+    bo->base.handle = bo->offset;
-+    bo->ptr = scrn->driScreen->pFB + bom->screen->depthOffset;
-+    if (bo->base.handle > bom->nhandle) {
-+        bom->nhandle = bo->base.handle + 1;
-+    }
-+    return (struct radeon_bo_manager*)bom;
-+}
-+
-+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
-+{
-+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
-+    DRI_AGE_TEXTURES(boml->texture_heap);
-+}
-+
-+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
-+{
-+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
-+
-+    if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
-+        return 0;
-+    }
-+    return bo->size;
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
-new file mode 100644
-index 0000000..208171e
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
-@@ -0,0 +1,47 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_BO_LEGACY_H
-+#define RADEON_BO_LEGACY_H
-+
-+#include "radeon_bo.h"
-+#include "radeon_screen.h"
-+
-+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
-+int radeon_bo_legacy_validate(struct radeon_bo *bo,
-+                              uint32_t *soffset,
-+                              uint32_t *eoffset);
-+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
-+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
-+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
-+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer.h b/src/mesa/drivers/dri/radeon/radeon_buffer.h
-new file mode 100644
-index 0000000..62cdfad
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_buffer.h
-@@ -0,0 +1,49 @@
-+/*
-+ * Copyright 2008 Red Hat, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software")
-+ * to deal in the software without restriction, including without limitation
-+ * on the rights to use, copy, modify, merge, publish, distribute, sub
-+ * license, and/or sell copies of the Software, and to permit persons to whom
-+ * them Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the next
-+ * paragraph) shall be included in all copies or substantial portions of the
-+ * Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
-+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * Authors:
-+ *	Adam Jackson <ajax@redhat.com>
-+ */
-+
-+#ifndef RADEON_BUFFER_H
-+#define RADEON_BUFFER_H
-+
-+#include "radeon_bo.h"
-+#include "dri_util.h"
-+
-+struct radeon_renderbuffer
-+{
-+    struct gl_renderbuffer base;
-+    struct radeon_bo *bo;
-+    unsigned int cpp;
-+    /* unsigned int offset; */
-+    unsigned int pitch;
-+    unsigned int width;
-+    unsigned int height;
-+
-+    /* boo Xorg 6.8.2 compat */
-+    int has_surface;
-+
-+
-+    __DRIdrawablePrivate *dPriv;
-+};
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
-new file mode 100644
-index 0000000..11b9f89
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
-@@ -0,0 +1,404 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#include <errno.h>
-+#include "r300_reg.h"
-+#include "r300_emit.h"
-+#include "r300_cmdbuf.h"
-+#include "radeon_cs.h"
-+#include "radeon_cs_legacy.h"
-+#include "radeon_bo_legacy.h"
-+#include "radeon_context.h"
-+
-+struct cs_manager_legacy {
-+    struct radeon_cs_manager    base;
-+    struct radeon_context       *ctx;
-+    /* hack for scratch stuff */
-+    uint32_t                    pending_age;
-+    uint32_t                    pending_count;
-+};
-+
-+struct cs_reloc_legacy {
-+    struct radeon_cs_reloc  base;
-+    uint32_t                cindices;
-+    uint32_t                *indices;
-+};
-+
-+
-+static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
-+                                   uint32_t ndw)
-+{
-+    struct radeon_cs *cs;
-+
-+    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
-+    if (cs == NULL) {
-+        return NULL;
-+    }
-+    cs->csm = csm;
-+    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
-+    cs->packets = (uint32_t*)malloc(4*cs->ndw);
-+    if (cs->packets == NULL) {
-+        free(cs);
-+        return NULL;
-+    }
-+    cs->relocs_total_size = 0;
-+    return cs;
-+}
-+
-+static int cs_write_dword(struct radeon_cs *cs, uint32_t dword)
-+{
-+    if (cs->cdw >= cs->ndw) {
-+        uint32_t tmp, *ptr;
-+        tmp = (cs->cdw + 1 + 0x3FF) & (~0x3FF);
-+        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
-+        if (ptr == NULL) {
-+            return -ENOMEM;
-+        }
-+        cs->packets = ptr;
-+        cs->ndw = tmp;
-+    }
-+    cs->packets[cs->cdw++] = dword;
-+    if (cs->section) {
-+        cs->section_cdw++;
-+    }
-+    return 0;
-+}
-+
-+static int cs_write_reloc(struct radeon_cs *cs,
-+                          struct radeon_bo *bo,
-+                          uint32_t read_domain,
-+                          uint32_t write_domain,
-+                          uint32_t flags)
-+{
-+    struct cs_reloc_legacy *relocs;
-+    int i;
-+
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+    /* check domains */
-+    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
-+        /* in one CS a bo can only be in read or write domain but not
-+         * in read & write domain at the same sime
-+         */
-+        return -EINVAL;
-+    }
-+    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
-+        return -EINVAL;
-+    }
-+    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
-+        return -EINVAL;
-+    }
-+    /* check if bo is already referenced */
-+    for(i = 0; i < cs->crelocs; i++) {
-+        uint32_t *indices;
-+
-+        if (relocs[i].base.bo->handle == bo->handle) {
-+            /* Check domains must be in read or write. As we check already
-+             * checked that in argument one of the read or write domain was
-+             * set we only need to check that if previous reloc as the read
-+             * domain set then the read_domain should also be set for this
-+             * new relocation.
-+             */
-+            if (relocs[i].base.read_domain && !read_domain) {
-+                return -EINVAL;
-+            }
-+            if (relocs[i].base.write_domain && !write_domain) {
-+                return -EINVAL;
-+            }
-+            relocs[i].base.read_domain |= read_domain;
-+            relocs[i].base.write_domain |= write_domain;
-+            /* save indice */
-+            relocs[i].cindices += 1;
-+            indices = (uint32_t*)realloc(relocs[i].indices,
-+                                         relocs[i].cindices * 4);
-+            if (indices == NULL) {
-+                relocs[i].cindices -= 1;
-+                return -ENOMEM;
-+            }
-+            relocs[i].indices = indices;
-+            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
-+            return 0;
-+        }
-+    }
-+    /* add bo to reloc */
-+    relocs = (struct cs_reloc_legacy*)
-+             realloc(cs->relocs,
-+                     sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
-+    if (relocs == NULL) {
-+        return -ENOMEM;
-+    }
-+    cs->relocs = relocs;
-+    relocs[cs->crelocs].base.bo = bo;
-+    relocs[cs->crelocs].base.read_domain = read_domain;
-+    relocs[cs->crelocs].base.write_domain = write_domain;
-+    relocs[cs->crelocs].base.flags = flags;
-+    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
-+    if (relocs[cs->crelocs].indices == NULL) {
-+        return -ENOMEM;
-+    }
-+    relocs[cs->crelocs].indices[0] = cs->cdw - 1;
-+    relocs[cs->crelocs].cindices = 1;
-+    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
-+    cs->crelocs++;
-+    radeon_bo_ref(bo);
-+    return 0;
-+}
-+
-+static int cs_begin(struct radeon_cs *cs,
-+                    uint32_t ndw,
-+                    const char *file,
-+                    const char *func,
-+                    int line)
-+{
-+    if (cs->section) {
-+        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
-+                cs->section_file, cs->section_func, cs->section_line);
-+        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    cs->section = 1;
-+    cs->section_ndw = ndw;
-+    cs->section_cdw = 0;
-+    cs->section_file = file;
-+    cs->section_func = func;
-+    cs->section_line = line;
-+    return 0;
-+}
-+
-+static int cs_end(struct radeon_cs *cs,
-+                  const char *file,
-+                  const char *func,
-+                  int line)
-+
-+{
-+    if (!cs->section) {
-+        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    cs->section = 0;
-+    if (cs->section_ndw != cs->section_cdw) {
-+        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d)\n",
-+                cs->section_file, cs->section_func, cs->section_line);
-+        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
-+                file, func, line);
-+        return -EPIPE;
-+    }
-+    return 0;
-+}
-+
-+static int cs_process_relocs(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    struct cs_reloc_legacy *relocs;
-+    int i, j, r;
-+
-+    if (!IS_R300_CLASS(csm->ctx->radeonScreen)) {
-+        /* FIXME: r300 only right now */
-+        return -EINVAL;
-+    }
-+    csm = (struct cs_manager_legacy*)cs->csm;
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+    for (i = 0; i < cs->crelocs; i++) {
-+        for (j = 0; j < relocs[i].cindices; j++) {
-+            uint32_t soffset, eoffset;
-+
-+            r = radeon_bo_legacy_validate(relocs[i].base.bo,
-+                                           &soffset, &eoffset);
-+            if (r) {
-+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
-+                        relocs[i].base.bo, soffset, eoffset);
-+                return r;
-+            }
-+            cs->packets[relocs[i].indices[j]] += soffset;
-+            if (cs->packets[relocs[i].indices[j]] >= eoffset) {
-+                radeon_bo_debug(relocs[i].base.bo, 12);
-+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
-+                        relocs[i].base.bo, soffset, eoffset);
-+                fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
-+                        relocs[i].base.bo,
-+                        cs->packets[relocs[i].indices[j]],
-+                        eoffset);
-+                exit(0);
-+                return -EINVAL;
-+            }
-+        }
-+    }
-+    return 0;
-+}
-+
-+static int cs_set_age(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    struct cs_reloc_legacy *relocs;
-+    int i;
-+
-+    relocs = (struct cs_reloc_legacy *)cs->relocs;
-+    for (i = 0; i < cs->crelocs; i++) {
-+        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
-+        radeon_bo_unref(relocs[i].base.bo);
-+    }
-+    return 0;
-+}
-+
-+static int cs_emit(struct radeon_cs *cs)
-+{
-+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
-+    drm_radeon_cmd_buffer_t cmd;
-+    drm_r300_cmd_header_t age;
-+    uint64_t ull;
-+    int r;
-+
-+    /* please flush pipe do all pending work */
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    cs_write_dword(cs, 0x0);
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_SC_SCREENDOOR, 1));
-+    cs_write_dword(cs, 0x00FFFFFF);
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_SC_HYPERZ, 1));
-+    cs_write_dword(cs, 0x0);
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_US_CONFIG, 1));
-+    cs_write_dword(cs, 0x0);
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_ZB_CNTL, 1));
-+    cs_write_dword(cs, 0x0);
-+    cs_write_dword(cs, cmdwait(csm->ctx->radeonScreen, R300_WAIT_3D));
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_RB3D_DSTCACHE_CTLSTAT, 1));
-+    cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-+    cs_write_dword(cs, cmdpacket0(csm->ctx->radeonScreen,
-+                                  R300_ZB_ZCACHE_CTLSTAT, 1));
-+    cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
-+    cs_write_dword(cs, cmdwait(csm->ctx->radeonScreen,
-+                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
-+
-+    /* append buffer age */
-+    age.scratch.cmd_type = R300_CMD_SCRATCH;
-+    /* Scratch register 2 corresponds to what radeonGetAge polls */
-+    csm->pending_age = 0;
-+    csm->pending_count = 1;
-+    ull = (uint64_t) (intptr_t) &csm->pending_age;
-+    age.scratch.reg = 2;
-+    age.scratch.n_bufs = 1;
-+    age.scratch.flags = 0;
-+    radeon_cs_write_dword(cs, age.u);
-+    radeon_cs_write_dword(cs, ull & 0xffffffff);
-+    radeon_cs_write_dword(cs, ull >> 32);
-+    radeon_cs_write_dword(cs, 0);
-+
-+    r = cs_process_relocs(cs);
-+    if (r) {
-+        return 0;
-+    }
-+
-+    cmd.buf = (char *)cs->packets;
-+    cmd.bufsz = cs->cdw * 4;
-+    if (csm->ctx->state.scissor.enabled) {
-+        cmd.nbox = csm->ctx->state.scissor.numClipRects;
-+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
-+    } else {
-+        cmd.nbox = csm->ctx->numClipRects;
-+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
-+    }
-+
-+    r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
-+    if (r) {
-+        return r;
-+    }
-+    cs_set_age(cs);
-+    return 0;
-+}
-+
-+static int cs_destroy(struct radeon_cs *cs)
-+{
-+    free(cs->relocs);
-+    free(cs->packets);
-+    free(cs);
-+    return 0;
-+}
-+
-+static int cs_erase(struct radeon_cs *cs)
-+{
-+    free(cs->relocs);
-+    cs->relocs_total_size = 0;
-+    cs->relocs = NULL;
-+    cs->crelocs = 0;
-+    cs->cdw = 0;
-+    cs->section = 0;
-+    return 0;
-+}
-+
-+static int cs_need_flush(struct radeon_cs *cs)
-+{
-+    /* FIXME: we should get the texture heap size */
-+    return (cs->relocs_total_size > (7*1024*1024));
-+}
-+
-+static void cs_print(struct radeon_cs *cs, FILE *file)
-+{
-+}
-+
-+static struct radeon_cs_funcs  radeon_cs_legacy_funcs = {
-+    cs_create,
-+    cs_write_dword,
-+    cs_write_reloc,
-+    cs_begin,
-+    cs_end,
-+    cs_emit,
-+    cs_destroy,
-+    cs_erase,
-+    cs_need_flush,
-+    cs_print
-+};
-+
-+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
-+{
-+    struct cs_manager_legacy *csm;
-+
-+    csm = (struct cs_manager_legacy*)
-+          calloc(1, sizeof(struct cs_manager_legacy));
-+    if (csm == NULL) {
-+        return NULL;
-+    }
-+    csm->base.funcs = &radeon_cs_legacy_funcs;
-+    csm->base.fd = ctx->dri.fd;
-+    csm->ctx = ctx;
-+    csm->pending_age = 1;
-+    return (struct radeon_cs_manager*)csm;
-+}
-+
-+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
-+{
-+    free(csm);
-+}
-diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
-new file mode 100644
-index 0000000..71a4dad
---- /dev/null
-+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
-@@ -0,0 +1,41 @@
-+/* 
-+ * Copyright © 2008 Nicolai Haehnle
-+ * Copyright © 2008 Jérôme Glisse
-+ * All Rights Reserved.
-+ * 
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ * 
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
-+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
-+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
-+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ */
-+/*
-+ * Authors:
-+ *      Aapo Tahkola <aet@rasterburn.org>
-+ *      Nicolai Haehnle <prefect_@gmx.net>
-+ *      Jérôme Glisse <glisse@freedesktop.org>
-+ */
-+#ifndef RADEON_CS_LEGACY_H
-+#define RADEON_CS_LEGACY_H
-+
-+#include "radeon_cs.h"
-+#include "radeon_context.h"
-+
-+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
-+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
-+
-+#endif
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
-index 5f32dd5..d579509 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.c
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
-@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-  * \author  Gareth Hughes <gareth@valinux.com>
-  */
- 
-+#include <errno.h>
- #include "main/glheader.h"
- #include "main/imports.h"
- #include "main/mtypes.h"
-@@ -45,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_chipset.h"
- #include "radeon_macros.h"
- #include "radeon_screen.h"
-+#include "radeon_buffer.h"
- #if !RADEON_COMMON
- #include "radeon_context.h"
- #include "radeon_span.h"
-@@ -70,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- /* Radeon configuration
-  */
- #include "xmlpool.h"
-+#include "radeon_bo_legacy.h"
-+#include "radeon_bo_gem.h"
- 
- #if !RADEON_COMMON	/* R100 */
- PUBLIC const char __driConfigOptions[] =
-@@ -346,6 +350,14 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
-     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
-    r300SetTexOffset,
- };
-+
-+void r300SetTexBuffer(__DRIcontext *pDRICtx,
-+                      GLint target,
-+                      __DRIdrawable *dPriv);
-+static const __DRItexBufferExtension r300TexBufferExtension = {
-+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-+   r300SetTexBuffer,
-+};
- #endif
- 
- /* Create the device specific screen private data struct.
-@@ -355,7 +367,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- {
-    radeonScreenPtr screen;
-    RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
--   unsigned char *RADEONMMIO;
-+   unsigned char *RADEONMMIO = NULL;
-    int i;
-    int ret;
-    uint32_t temp;
-@@ -387,6 +399,21 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
-    {
-       int ret;
-+
-+#ifdef RADEON_PARAM_KERNEL_MM
-+     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
-+                            &screen->kernel_mm);
-+
-+      if (ret && ret != -EINVAL) {
-+         FREE( screen );
-+         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
-+         return NULL;
-+      }
-+
-+      if (ret == -EINVAL)
-+          screen->kernel_mm = 0;
-+#endif
-+
-       ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
- 			    &screen->gart_buffer_offset);
- 
-@@ -420,58 +447,60 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-       screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
-    }
- 
--   screen->mmio.handle = dri_priv->registerHandle;
--   screen->mmio.size   = dri_priv->registerSize;
--   if ( drmMap( sPriv->fd,
--		screen->mmio.handle,
--		screen->mmio.size,
--		&screen->mmio.map ) ) {
--      FREE( screen );
--      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   RADEONMMIO = screen->mmio.map;
--
--   screen->status.handle = dri_priv->statusHandle;
--   screen->status.size   = dri_priv->statusSize;
--   if ( drmMap( sPriv->fd,
--		screen->status.handle,
--		screen->status.size,
--		&screen->status.map ) ) {
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
--      return NULL;
--   }
--   screen->scratch = (__volatile__ uint32_t *)
--      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
--
--   screen->buffers = drmMapBufs( sPriv->fd );
--   if ( !screen->buffers ) {
--      drmUnmap( screen->status.map, screen->status.size );
--      drmUnmap( screen->mmio.map, screen->mmio.size );
--      FREE( screen );
--      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
--      return NULL;
--   }
--
--   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
--      screen->gartTextures.handle = dri_priv->gartTexHandle;
--      screen->gartTextures.size   = dri_priv->gartTexMapSize;
--      if ( drmMap( sPriv->fd,
--		   screen->gartTextures.handle,
--		   screen->gartTextures.size,
--		   (drmAddressPtr)&screen->gartTextures.map ) ) {
-+   if (!screen->kernel_mm) {
-+     screen->mmio.handle = dri_priv->registerHandle;
-+     screen->mmio.size   = dri_priv->registerSize;
-+     if ( drmMap( sPriv->fd,
-+		  screen->mmio.handle,
-+		  screen->mmio.size,
-+		  &screen->mmio.map ) ) {
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+
-+     RADEONMMIO = screen->mmio.map;
-+
-+     screen->status.handle = dri_priv->statusHandle;
-+     screen->status.size   = dri_priv->statusSize;
-+     if ( drmMap( sPriv->fd,
-+		  screen->status.handle,
-+		  screen->status.size,
-+		  &screen->status.map ) ) {
-+       drmUnmap( screen->mmio.map, screen->mmio.size );
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+     screen->scratch = (__volatile__ uint32_t *)
-+       ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
-+
-+     screen->buffers = drmMapBufs( sPriv->fd );
-+     if ( !screen->buffers ) {
-+       drmUnmap( screen->status.map, screen->status.size );
-+       drmUnmap( screen->mmio.map, screen->mmio.size );
-+       FREE( screen );
-+       __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
-+       return NULL;
-+     }
-+     
-+     if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
-+       screen->gartTextures.handle = dri_priv->gartTexHandle;
-+       screen->gartTextures.size   = dri_priv->gartTexMapSize;
-+       if ( drmMap( sPriv->fd,
-+		    screen->gartTextures.handle,
-+		    screen->gartTextures.size,
-+		    (drmAddressPtr)&screen->gartTextures.map ) ) {
- 	 drmUnmapBufs( screen->buffers );
- 	 drmUnmap( screen->status.map, screen->status.size );
- 	 drmUnmap( screen->mmio.map, screen->mmio.size );
- 	 FREE( screen );
- 	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
- 	 return NULL;
--      }
--
--      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-+       }
-+       
-+       screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
-+     }
-    }
- 
-    screen->chip_flags = 0;
-@@ -838,7 +867,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
-    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
-                          &temp);
-    if (ret) {
--       if (screen->chip_family < CHIP_FAMILY_RS690)
-+       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
- 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
-        else {
-            FREE( screen );
-@@ -960,6 +989,85 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- 
-    screen->driScreen = sPriv;
-    screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
-+   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
-+					       screen->sarea_priv_offset);
-+
-+   if (screen->kernel_mm)
-+     screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
-+   else
-+     screen->bom = radeon_bo_manager_legacy_ctor(screen);
-+   if (screen->bom == NULL) {
-+     free(screen);
-+     return NULL;
-+   }
-+   return screen;
-+}
-+
-+static radeonScreenPtr
-+radeonCreateScreen2(__DRIscreenPrivate *sPriv)
-+{
-+   radeonScreenPtr screen;
-+   int i;
-+
-+   /* Allocate the private area */
-+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
-+   if ( !screen ) {
-+      __driUtilMessage("%s: Could not allocate memory for screen structure",
-+		       __FUNCTION__);
-+      fprintf(stderr, "leaving here\n");
-+      return NULL;
-+   }
-+
-+#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
-+#endif
-+
-+   /* parse information in __driConfigOptions */
-+   driParseOptionInfo (&screen->optionCache,
-+		       __driConfigOptions, __driNConfigOptions);
-+
-+   screen->kernel_mm = 1;
-+   screen->chip_flags = 0;
-+   /* FIXME: do either an ioctl (bad) or a sysfs file for driver to
-+    * information about which chipset is their */
-+   screen->chip_family = CHIP_FAMILY_RV350;
-+   screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CLASS_R300;
-+
-+   i = 0;
-+   screen->extensions[i++] = &driCopySubBufferExtension.base;
-+   screen->extensions[i++] = &driFrameTrackingExtension.base;
-+   screen->extensions[i++] = &driReadDrawableExtension;
-+
-+   if ( screen->irq != 0 ) {
-+       screen->extensions[i++] = &driSwapControlExtension.base;
-+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
-+   }
-+
-+#if !RADEON_COMMON
-+   screen->extensions[i++] = &radeonTexOffsetExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   if (IS_R200_CLASS(screen))
-+       screen->extensions[i++] = &r200AllocateExtension.base;
-+
-+   screen->extensions[i++] = &r200texOffsetExtension.base;
-+#endif
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+   screen->extensions[i++] = &r300texOffsetExtension.base;
-+   screen->extensions[i++] = &r300TexBufferExtension.base;
-+#endif
-+
-+   screen->extensions[i++] = NULL;
-+   sPriv->extensions = screen->extensions;
-+
-+   screen->driScreen = sPriv;
-+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
-+   if (screen->bom == NULL) {
-+       free(screen);
-+       return NULL;
-+   }
-    return screen;
- }
- 
-@@ -968,23 +1076,30 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
- static void
- radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- {
--   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
--
--   if (!screen)
--      return;
--
--   if ( screen->gartTextures.map ) {
--      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
--   }
--   drmUnmapBufs( screen->buffers );
--   drmUnmap( screen->status.map, screen->status.size );
--   drmUnmap( screen->mmio.map, screen->mmio.size );
-+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
-+
-+    if (!screen)
-+        return;
-+
-+    if (screen->kernel_mm) {
-+        radeon_tracker_print(&screen->bom->tracker, stderr);
-+        radeon_bo_manager_gem_dtor(screen->bom);
-+    } else {
-+        radeon_bo_manager_legacy_dtor(screen->bom);
-+
-+        if ( screen->gartTextures.map ) {
-+            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
-+        }
-+        drmUnmapBufs( screen->buffers );
-+        drmUnmap( screen->status.map, screen->status.size );
-+        drmUnmap( screen->mmio.map, screen->mmio.size );
-+    }
- 
--   /* free all option information */
--   driDestroyOptionInfo (&screen->optionCache);
-+    /* free all option information */
-+    driDestroyOptionInfo (&screen->optionCache);
- 
--   FREE( screen );
--   sPriv->private = NULL;
-+    FREE( screen );
-+    sPriv->private = NULL;
- }
- 
- 
-@@ -993,15 +1108,176 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
- static GLboolean
- radeonInitDriver( __DRIscreenPrivate *sPriv )
- {
--   sPriv->private = (void *) radeonCreateScreen( sPriv );
--   if ( !sPriv->private ) {
--      radeonDestroyScreen( sPriv );
--      return GL_FALSE;
--   }
-+    if (sPriv->dri2.enabled) {
-+        sPriv->private = (void *) radeonCreateScreen2( sPriv );
-+    } else {
-+        sPriv->private = (void *) radeonCreateScreen( sPriv );
-+    }
-+    if ( !sPriv->private ) {
-+        radeonDestroyScreen( sPriv );
-+        return GL_FALSE;
-+    }
-+
-+    return GL_TRUE;
-+}
-+
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
-+static GLboolean
-+radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
-+			    GLenum intFormat, GLuint w, GLuint h)
-+{
-+    rb->Width = w;
-+    rb->Height = h;
-+    rb->_ActualFormat = intFormat;
-+
-+    return GL_TRUE;
-+}
-+
-+
-+static struct radeon_renderbuffer *
-+radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
-+{
-+    struct radeon_renderbuffer *ret;
-+
-+    ret = CALLOC_STRUCT(radeon_renderbuffer);
-+    if (!ret)
-+	return NULL;
-+
-+    _mesa_init_renderbuffer(&ret->base, 0);
-+
-+    /* XXX format junk */
-+    switch (format) {
-+	case GL_RGB5:
-+	    ret->base._ActualFormat = GL_RGB5;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 5;
-+	    ret->base.GreenBits = 6;
-+	    ret->base.BlueBits = 5;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_RGBA8:
-+	    ret->base._ActualFormat = GL_RGBA8;
-+	    ret->base._BaseFormat = GL_RGBA;
-+	    ret->base.RedBits = 8;
-+	    ret->base.GreenBits = 8;
-+	    ret->base.BlueBits = 8;
-+	    ret->base.AlphaBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_STENCIL_INDEX8_EXT:
-+	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
-+	    ret->base._BaseFormat = GL_STENCIL_INDEX;
-+	    ret->base.StencilBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_BYTE;
-+	    break;
-+	case GL_DEPTH_COMPONENT16:
-+	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 16;
-+	    ret->base.DataType = GL_UNSIGNED_SHORT;
-+	    break;
-+	case GL_DEPTH_COMPONENT24:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.DataType = GL_UNSIGNED_INT;
-+	    break;
-+	case GL_DEPTH24_STENCIL8_EXT:
-+	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
-+	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
-+	    ret->base.DepthBits = 24;
-+	    ret->base.StencilBits = 8;
-+	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
-+	    break;
-+	default:
-+	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
-+	    _mesa_delete_renderbuffer(&ret->base);
-+	    return NULL;
-+    }
- 
--   return GL_TRUE;
-+    ret->dPriv = driDrawPriv;
-+    ret->base.InternalFormat = format;
-+
-+    ret->base.AllocStorage = radeon_alloc_window_storage;
-+
-+    radeonSetSpanFunctions(ret);
-+
-+    ret->bo = NULL;
-+    return ret;
- }
- 
-+/**
-+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-+ *
-+ * \todo This function (and its interface) will need to be updated to support
-+ * pbuffers.
-+ */
-+static GLboolean
-+radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-+                    __DRIdrawablePrivate *driDrawPriv,
-+                    const __GLcontextModes *mesaVis,
-+                    GLboolean isPixmap )
-+{
-+   radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
-+
-+    const GLboolean swDepth = GL_FALSE;
-+    const GLboolean swAlpha = GL_FALSE;
-+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
-+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
-+	mesaVis->depthBits != 24;
-+    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
-+    GLenum depthFormat = GL_NONE;
-+    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-+
-+    if (mesaVis->depthBits == 16)
-+	depthFormat = GL_DEPTH_COMPONENT16;
-+    else if (mesaVis->depthBits == 24)
-+	depthFormat = GL_DEPTH_COMPONENT24;
-+
-+    /* front color renderbuffer */
-+    {
-+	struct radeon_renderbuffer *front =
-+	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
-+	front->has_surface = 1;
-+    }
-+
-+    /* back color renderbuffer */
-+    if (mesaVis->doubleBufferMode) {
-+	struct radeon_renderbuffer *back =
-+	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
-+	back->has_surface = 1;
-+    }
-+
-+    /* depth renderbuffer */
-+    if (depthFormat != GL_NONE) {
-+	struct radeon_renderbuffer *depth =
-+	    radeon_create_renderbuffer(depthFormat, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
-+	depth->has_surface = screen->depthHasSurface;
-+    }
-+
-+    /* stencil renderbuffer */
-+    if (mesaVis->stencilBits > 0 && !swStencil) {
-+	struct radeon_renderbuffer *stencil =
-+	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv);
-+	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
-+	stencil->has_surface = screen->depthHasSurface;
-+    }
-+
-+    _mesa_add_soft_renderbuffers(fb,
-+	    GL_FALSE, /* color */
-+	    swDepth,
-+	    swStencil,
-+	    swAccum,
-+	    swAlpha,
-+	    GL_FALSE /* aux */);
-+    driDrawPriv->driverPrivate = (void *) fb;
-+
-+    return (driDrawPriv->driverPrivate != NULL);
-+}
-+#else
- 
- /**
-  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
-@@ -1062,7 +1338,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-                                  driDrawPriv);
-          radeonSetSpanFunctions(depthRb, mesaVis);
-          _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
-+    	// depthRb->has_surface = screen->depthHasSurface;
-       }
-       else if (mesaVis->depthBits == 24) {
-          driRenderbuffer *depthRb
-@@ -1073,7 +1349,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-                                  driDrawPriv);
-          radeonSetSpanFunctions(depthRb, mesaVis);
-          _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
--	 depthRb->depthHasSurface = screen->depthHasSurface;
-+ 	 //    depthRb->has_surface = screen->depthHasSurface;
-       }
- 
-       /* stencil renderbuffer */
-@@ -1086,7 +1362,7 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-                                  driDrawPriv);
-          radeonSetSpanFunctions(stencilRb, mesaVis);
-          _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
--	 stencilRb->depthHasSurface = screen->depthHasSurface;
-+         //stencilRb->has_surface = screen->depthHasSurface;
-       }
- 
-       _mesa_add_soft_renderbuffers(fb,
-@@ -1101,11 +1377,30 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-       return (driDrawPriv->driverPrivate != NULL);
-    }
- }
--
-+#endif
- 
- static void
- radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
- {
-+	struct radeon_renderbuffer *rb;
-+	GLframebuffer *fb;
-+    
-+    fb = (void*)driDrawPriv->driverPrivate;
-+    rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-+    rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
-+    if (rb && rb->bo) {
-+        radeon_bo_unref(rb->bo);
-+        rb->bo = NULL;
-+    }
-    _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
- }
- 
-@@ -1197,13 +1492,48 @@ radeonInitScreen(__DRIscreenPrivate *psp)
-    if (!radeonInitDriver(psp))
-        return NULL;
- 
-+   /* for now fill in all modes */
-    return radeonFillInModes( psp,
- 			     dri_priv->bpp,
- 			     (dri_priv->bpp == 16) ? 16 : 24,
--			     (dri_priv->bpp == 16) ? 0  : 8,
--			     (dri_priv->backOffset != dri_priv->depthOffset) );
-+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
- }
- 
-+/**
-+ * This is the driver specific part of the createNewScreen entry point.
-+ * Called when using DRI2.
-+ *
-+ * \return the __GLcontextModes supported by this driver
-+ */
-+static const
-+__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
-+{
-+   /* Calling driInitExtensions here, with a NULL context pointer,
-+    * does not actually enable the extensions.  It just makes sure
-+    * that all the dispatch offsets for all the extensions that
-+    * *might* be enables are known.  This is needed because the
-+    * dispatch offsets need to be known when _mesa_context_create
-+    * is called, but we can't enable the extensions until we have a
-+    * context pointer.
-+    *
-+    * Hello chicken.  Hello egg.  How are you two today?
-+    */
-+   driInitExtensions( NULL, card_extensions, GL_FALSE );
-+#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
-+   driInitExtensions( NULL, blend_extensions, GL_FALSE );
-+   driInitSingleExtension( NULL, ARB_vp_extension );
-+   driInitSingleExtension( NULL, NV_vp_extension );
-+   driInitSingleExtension( NULL, ATI_fs_extension );
-+   driInitExtensions( NULL, point_extensions, GL_FALSE );
-+#endif
-+
-+   if (!radeonInitDriver(psp)) {
-+       return NULL;
-+    }
-+
-+   /* for now fill in all modes */
-+   return radeonFillInModes( psp, 24, 24, 8, 1);
-+}
- 
- /**
-  * Get information about previous buffer swaps.
-@@ -1252,6 +1582,8 @@ const struct __DriverAPIRec driDriverAPI = {
-    .WaitForSBC      = NULL,
-    .SwapBuffersMSC  = NULL,
-    .CopySubBuffer   = radeonCopySubBuffer,
-+    /* DRI2 */
-+   .InitScreen2     = radeonInitScreen2,
- };
- #else
- const struct __DriverAPIRec driDriverAPI = {
-@@ -1272,3 +1604,4 @@ const struct __DriverAPIRec driDriverAPI = {
-    .CopySubBuffer   = r200CopySubBuffer,
- };
- #endif
-+
-diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
-index b84c70b..3287e12 100644
---- a/src/mesa/drivers/dri/radeon/radeon_screen.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
-@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #include "radeon_reg.h"
- #include "drm_sarea.h"
- #include "xmlconfig.h"
-+#include "radeon_bo.h"
- 
- 
- typedef struct {
-@@ -54,7 +55,7 @@ typedef struct {
-    drmAddress map;			/* Mapping of the DRM region */
- } radeonRegionRec, *radeonRegionPtr;
- 
--typedef struct {
-+typedef struct radeon_screen {
-    int chip_family;
-    int chip_flags;
-    int cpp;
-@@ -103,9 +104,12 @@ typedef struct {
-    /* Configuration cache with default values for all contexts */
-    driOptionCache optionCache;
- 
--   const __DRIextension *extensions[8];
-+   const __DRIextension *extensions[16];
- 
-    int num_gb_pipes;
-+   int kernel_mm;
-+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
-+   struct radeon_bo_manager *bom;
- } radeonScreenRec, *radeonScreenPtr;
- 
- #define IS_R100_CLASS(screen) \
-diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
-index 9abe086..1650a9b 100644
---- a/src/mesa/drivers/dri/radeon/radeon_span.h
-+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
-@@ -44,7 +44,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- #include "drirenderbuffer.h"
- 
-+#include "radeon_buffer.h"
-+
- extern void radeonInitSpanFuncs(GLcontext * ctx);
--extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
- 
-+#if COMPILE_R300
-+extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
-+#else
-+extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
-+#endif
- #endif
diff --git a/radeon-rewrite.patch b/radeon-rewrite.patch
new file mode 100644
index 0000000..9a86cc4
--- /dev/null
+++ b/radeon-rewrite.patch
@@ -0,0 +1,34385 @@
+diff --git a/configs/autoconf.in b/configs/autoconf.in
+index b352974..d786029 100644
+--- a/configs/autoconf.in
++++ b/configs/autoconf.in
+@@ -20,6 +20,8 @@ CXXFLAGS = @CPPFLAGS@ @CXXFLAGS@ \
+ 	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES)
+ LDFLAGS = @LDFLAGS@
+ EXTRA_LIB_PATH = @EXTRA_LIB_PATH@
++RADEON_CFLAGS = @RADEON_CFLAGS@
++RADEON_LDFLAGS = @RADEON_LDFLAGS@
+ 
+ # Assembler
+ MESA_ASM_SOURCES = @MESA_ASM_SOURCES@
+diff --git a/configure.ac b/configure.ac
+index a9a8d5a..ea2992d 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -573,6 +575,13 @@ dri)
+     GL_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED dri2proto >= $DRI2PROTO_REQUIRED"
+     DRI_PC_REQ_PRIV="libdrm >= $LIBDRM_REQUIRED"
+ 
++    PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon], HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
++
++    if test "$HAVE_LIBDRM_RADEON" = yes; then
++	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
++	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS
++    fi
++
+     # find the DRI deps for libGL
+     if test "$x11_pkgconfig" = yes; then
+         # add xcb modules if necessary
+@@ -578,6 +585,8 @@ AC_SUBST([GL_PC_REQ_PRIV])
+ AC_SUBST([GL_PC_LIB_PRIV])
+ AC_SUBST([GL_PC_CFLAGS])
+ AC_SUBST([DRI_PC_REQ_PRIV])
++AC_SUBST([RADEON_CFLAGS])
++AC_SUBST([RADEON_LDFLAGS])
+ 
+ dnl
+ dnl More X11 setup
+diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
+index e9144ac..e593ed9 100644
+--- a/src/mesa/drivers/dri/r200/Makefile
++++ b/src/mesa/drivers/dri/r200/Makefile
+@@ -3,6 +3,8 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+ 
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = r200_dri.so
+ 
+ MINIGLX_SOURCES = server/radeon_dri.c 
+@@ -11,25 +13,35 @@ ifeq ($(USING_EGL), 1)
+ EGL_SOURCES = server/radeon_egl.c
+ endif
+ 
++RADEON_COMMON_SOURCES = \
++	radeon_texture.c \
++	radeon_common_context.c \
++	radeon_common.c \
++	radeon_dma.c \
++	radeon_lock.c \
++	radeon_bo_legacy.c \
++	radeon_cs_legacy.c \
++	radeon_mipmap_tree.c \
++	radeon_span.c
++
++
+ DRIVER_SOURCES = r200_context.c \
+ 		 r200_ioctl.c \
+-		 r200_lock.c \
+ 		 r200_state.c \
+ 		 r200_state_init.c \
+ 		 r200_cmdbuf.c \
+ 		 r200_pixel.c \
+ 		 r200_tex.c \
+-		 r200_texmem.c \
+ 		 r200_texstate.c \
+ 		 r200_tcl.c \
+ 		 r200_swtcl.c \
+-		 r200_span.c \
+ 		 r200_maos.c \
+ 		 r200_sanity.c \
+ 		 r200_fragshader.c \
+ 		 r200_vertprog.c \
+ 		 radeon_screen.c \
+-		 $(EGL_SOURCES)
++		 $(EGL_SOURCES) \
++		 $(RADEON_COMMON_SOURCES)
+ 
+ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+ 
+@@ -48,7 +60,29 @@ SYMLINKS = \
+ COMMON_SYMLINKS = \
+ 	radeon_chipset.h \
+ 	radeon_screen.c \
+-	radeon_screen.h
++	radeon_screen.h \
++	radeon_bo_legacy.c \
++	radeon_cs_legacy.c \
++	radeon_bo_legacy.h \
++	radeon_cs_legacy.h \
++	radeon_bocs_wrapper.h \
++	radeon_span.h \
++	radeon_span.c \
++	radeon_lock.c \
++	radeon_lock.h \
++	radeon_common.c \
++	radeon_common_context.c \
++	radeon_common_context.h \
++	radeon_common.h \
++	radeon_cmdbuf.h \
++	radeon_mipmap_tree.c \
++	radeon_mipmap_tree.h \
++	radeon_texture.c \
++	radeon_texture.h \
++	radeon_dma.c \
++	radeon_dma.h
++
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+ 
+ ##### TARGETS #####
+ 
+diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+index e163377..ae31bcb 100644
+--- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
++++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast/swrast.h"
+ #include "main/simple_list.h"
+ 
++#include "radeon_common.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_sanity.h"
+ #include "radeon_reg.h"
+ 
+-static void print_state_atom( struct r200_state_atom *state )
+-{
+-   int i;
+-
+-   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+-
+-   if (0 & R200_DEBUG & DEBUG_VERBOSE) 
+-      for (i = 0 ; i < state->cmd_size ; i++) 
+-	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+-
+-}
+-
+ /* The state atoms will be emitted in the order they appear in the atom list,
+  * so this step is important.
+  */
+@@ -64,141 +53,56 @@ void r200SetUpAtomList( r200ContextPtr rmesa )
+ {
+    int i, mtu;
+ 
+-   mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+-   make_empty_list(&rmesa->hw.atomlist);
+-   rmesa->hw.atomlist.name = "atom-list";
+-
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf );
++   mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
++
++   make_empty_list(&rmesa->radeon.hw.atomlist);
++   rmesa->radeon.hw.atomlist.name = "atom-list";
++
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
+    for (i = 0; i < mtu; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
+    for (i = 0; i < mtu; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
+    for (i = 0; i < 6; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] );
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
+    for (i = 0; i < 8; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
+    for (i = 0; i < 3 + mtu; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
+    for (i = 0; i < 2; ++i)
+-      insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
++      insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
+    for (i = 0; i < 6; ++i)
+-       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] );
+-   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] );
+-}
+-
+-static void r200SaveHwState( r200ContextPtr rmesa )
+-{
+-   struct r200_state_atom *atom;
+-   char * dest = rmesa->backup_store.cmd_buf;
+-
+-   if (R200_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   rmesa->backup_store.cmd_used = 0;
+-
+-   foreach( atom, &rmesa->hw.atomlist ) {
+-      if ( atom->check( rmesa->glCtx, atom->idx ) ) {
+-	 int size = atom->cmd_size * 4;
+-	 memcpy( dest, atom->cmd, size);
+-	 dest += size;
+-	 rmesa->backup_store.cmd_used += size;
+-	 if (R200_DEBUG & DEBUG_STATE)
+-	    print_state_atom( atom );
+-      }
+-   }
+-
+-   assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ );
+-   if (R200_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "Returning to r200EmitState\n");
+-}
+-
+-void r200EmitState( r200ContextPtr rmesa )
+-{
+-   char *dest;
+-   int mtu;
+-   struct r200_state_atom *atom;
+-
+-   if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   if (rmesa->save_on_next_emit) {
+-      r200SaveHwState(rmesa);
+-      rmesa->save_on_next_emit = GL_FALSE;
+-   }
+-
+-   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+-      return;
+-
+-   mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+-   /* To avoid going across the entire set of states multiple times, just check
+-    * for enough space for the case of emitting all state, and inline the
+-    * r200AllocCmdBuf code here without all the checks.
+-    */
+-   r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
+-
+-   /* we need to calculate dest after EnsureCmdBufSpace
+-      as we may flush the buffer - airlied */
+-   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+-   if (R200_DEBUG & DEBUG_STATE) {
+-      foreach( atom, &rmesa->hw.atomlist ) {
+-	 if ( atom->dirty || rmesa->hw.all_dirty ) {
+-	    if ( atom->check( rmesa->glCtx, atom->idx ) )
+-	       print_state_atom( atom );
+-	    else
+-	       fprintf(stderr, "skip state %s\n", atom->name);
+-	 }
+-      }
+-   }
+-
+-   foreach( atom, &rmesa->hw.atomlist ) {
+-      if ( rmesa->hw.all_dirty )
+-	 atom->dirty = GL_TRUE;
+-      if ( atom->dirty ) {
+-	 if ( atom->check( rmesa->glCtx, atom->idx ) ) {
+-	    int size = atom->cmd_size * 4;
+-	    memcpy( dest, atom->cmd, size);
+-	    dest += size;
+-	    rmesa->store.cmd_used += size;
+-	    atom->dirty = GL_FALSE;
+-	 }
+-      }
+-   }
+-
+-   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
+-
+-   rmesa->hw.is_dirty = GL_FALSE;
+-   rmesa->hw.all_dirty = GL_FALSE;
++       insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
++   insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
+ }
+ 
+ /* Fire a section of the retained (indexed_verts) buffer as a regular
+@@ -209,50 +113,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa,
+                        GLuint vertex_nr )
+ {
+    drm_radeon_cmd_header_t *cmd;
++   BATCH_LOCALS(&rmesa->radeon);
+ 
+    assert(!(primitive & R200_VF_PRIM_WALK_IND));
+    
+-   r200EmitState( rmesa );
++   radeonEmitState(&rmesa->radeon);
+    
+    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+       fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+ 	      rmesa->store.cmd_used/4, primitive, vertex_nr);
+-   
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ,
+-						  __FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2;
+-   cmd[2].i = (primitive | 
+-	       R200_VF_PRIM_WALK_LIST |
+-	       R200_VF_COLOR_ORDER_RGBA |
+-	       (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
++ 
++   BEGIN_BATCH(3);
++   OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
++   OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
++	     (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
++   END_BATCH();
+ }
+ 
++static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
++{
++	BATCH_LOCALS(&rmesa->radeon);
++
++	if (vertex_count > 0) {
++		BEGIN_BATCH(8+2);
++		OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_INDX_2, 0);
++		OUT_BATCH(R200_VF_PRIM_WALK_IND |
++			  ((vertex_count + 0) << 16) |
++			  type);
++		
++		if (!rmesa->radeon.radeonScreen->kernel_mm) {
++			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
++			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
++			OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset,
++					rmesa->tcl.elt_dma_bo,
++					rmesa->tcl.elt_dma_offset,
++					RADEON_GEM_DOMAIN_GTT, 0, 0);
++			OUT_BATCH(vertex_count/2);
++		} else {
++			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
++			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
++			OUT_BATCH(rmesa->tcl.elt_dma_offset);
++			OUT_BATCH(vertex_count/2);
++			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++					      rmesa->tcl.elt_dma_bo,
++					      RADEON_GEM_DOMAIN_GTT, 0, 0);
++		}
++		END_BATCH();
++	}
++}
+ 
+-void r200FlushElts( r200ContextPtr rmesa )
++void r200FlushElts(GLcontext *ctx)
+ {
+-   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
++  r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    int dwords;
+-   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2;
++   int nr, elt_used = rmesa->tcl.elt_used;
+ 
+    if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS))
+-      fprintf(stderr, "%s\n", __FUNCTION__);
++     fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
++
++   assert( rmesa->radeon.dma.flush == r200FlushElts );
++   rmesa->radeon.dma.flush = NULL;
++
++   elt_used = (elt_used + 2) & ~2;
+ 
+-   assert( rmesa->dma.flush == r200FlushElts );
+-   rmesa->dma.flush = NULL;
++   nr = elt_used / 2;
+ 
+-   /* Cope with odd number of elts:
+-    */
+-   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+-   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
++   radeon_bo_unmap(rmesa->tcl.elt_dma_bo);
+ 
+-   cmd[1] |= (dwords - 3) << 16;
+-   cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT;
++   r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
++
++   radeon_bo_unref(rmesa->tcl.elt_dma_bo);
++   rmesa->tcl.elt_dma_bo = NULL;
+ 
+    if (R200_DEBUG & DEBUG_SYNC) {
+       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+-      r200Finish( rmesa->glCtx );
++      radeonFinish( rmesa->radeon.glCtx );
+    }
+ }
+ 
+@@ -261,7 +196,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ 				    GLuint primitive,
+ 				    GLuint min_nr )
+ {
+-   drm_radeon_cmd_header_t *cmd;
+    GLushort *retval;
+ 
+    if (R200_DEBUG & DEBUG_IOCTL)
+@@ -269,30 +203,25 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ 
+    assert((primitive & R200_VF_PRIM_WALK_IND));
+    
+-   r200EmitState( rmesa );
+-   
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr),
+-						__FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2;
+-   cmd[2].i = (primitive | 
+-	       R200_VF_PRIM_WALK_IND |
+-	       R200_VF_COLOR_ORDER_RGBA);
++   radeonEmitState(&rmesa->radeon);
+ 
++   rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
++					  0, R200_ELT_BUF_SZ, 4,
++					  RADEON_GEM_DOMAIN_GTT, 0);
++   rmesa->tcl.elt_dma_offset = 0;
++   rmesa->tcl.elt_used = min_nr * 2;
++
++   radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
++   retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
+    
+-   retval = (GLushort *)(cmd+3);
+ 
+    if (R200_DEBUG & DEBUG_PRIMS)
+-      fprintf(stderr, "%s: header 0x%x prim %x \n",
+-	      __FUNCTION__,
+-	      cmd[1].i, primitive);
+-
+-   assert(!rmesa->dma.flush);
+-   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+-   rmesa->dma.flush = r200FlushElts;
++      fprintf(stderr, "%s: header prim %x \n",
++	      __FUNCTION__, primitive);
+ 
+-   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
++   assert(!rmesa->radeon.dma.flush);
++   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
++   rmesa->radeon.dma.flush = r200FlushElts;
+ 
+    return retval;
+ }
+@@ -300,129 +229,130 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ 
+ 
+ void r200EmitVertexAOS( r200ContextPtr rmesa,
+-			  GLuint vertex_size,
+-			  GLuint offset )
++			GLuint vertex_size,
++ 			struct radeon_bo *bo,
++			GLuint offset )
+ {
+-   drm_radeon_cmd_header_t *cmd;
++   BATCH_LOCALS(&rmesa->radeon);
+ 
+    if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+ 	      __FUNCTION__, vertex_size, offset);
+ 
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
+-						  __FUNCTION__ );
+ 
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16);
+-   cmd[2].i = 1;
+-   cmd[3].i = vertex_size | (vertex_size << 8);
+-   cmd[4].i = offset;
++   BEGIN_BATCH(5);
++   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
++   OUT_BATCH(1);
++   OUT_BATCH(vertex_size | (vertex_size << 8));
++   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++   END_BATCH();
+ }
+-		       
+ 
+-void r200EmitAOS( r200ContextPtr rmesa,
+-		    struct r200_dma_region **component,
+-		    GLuint nr,
+-		    GLuint offset )
++void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
+ {
+-   drm_radeon_cmd_header_t *cmd;
+-   int sz = AOS_BUFSZ(nr);
++   BATCH_LOCALS(&rmesa->radeon);
++   uint32_t voffset;
++   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+    int i;
+-   int *tmp;
+-
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr);
+-
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16);
+-   cmd[2].i = nr;
+-   tmp = &cmd[0].i;
+-   cmd += 3;
+-
+-   for (i = 0 ; i < nr ; i++) {
+-      if (i & 1) {
+-	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
+-		      (component[i]->aos_size << 16));
+-	 cmd[2].i = (component[i]->aos_start + 
+-		     offset * component[i]->aos_stride * 4);
+-	 cmd += 3;
++   
++   if (RADEON_DEBUG & DEBUG_VERTS)
++      fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
++	      offset);
++
++   BEGIN_BATCH(sz+2+ (nr*2));
++   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
++   OUT_BATCH(nr);
++
++    
++   if (!rmesa->radeon.radeonScreen->kernel_mm) {
++      for (i = 0; i + 1 < nr; i += 2) {
++	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++		   (rmesa->tcl.aos[i].stride << 8) |
++		   (rmesa->tcl.aos[i + 1].components << 16) |
++		   (rmesa->tcl.aos[i + 1].stride << 24));
++			
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[i].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[i+1].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
+       }
+-      else {
+-	 cmd[0].i = ((component[i]->aos_stride << 8) | 
+-		     (component[i]->aos_size << 0));
+-	 cmd[1].i = (component[i]->aos_start + 
+-		     offset * component[i]->aos_stride * 4);
++      
++      if (nr & 1) {
++	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++		   (rmesa->tcl.aos[nr - 1].stride << 8));
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[nr - 1].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
++      }
++   } else {
++      for (i = 0; i + 1 < nr; i += 2) {
++	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++		   (rmesa->tcl.aos[i].stride << 8) |
++		   (rmesa->tcl.aos[i + 1].components << 16) |
++		   (rmesa->tcl.aos[i + 1].stride << 24));
++	 
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 OUT_BATCH(voffset);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 OUT_BATCH(voffset);
++      }
++      
++      if (nr & 1) {
++	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++		   (rmesa->tcl.aos[nr - 1].stride << 8));
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 OUT_BATCH(voffset);
++      }
++      for (i = 0; i + 1 < nr; i += 2) {
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[i+0].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[i+1].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
++      }
++      if (nr & 1) {
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[nr-1].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
+       }
+    }
+-
+-   if (R200_DEBUG & DEBUG_VERTS) {
+-      fprintf(stderr, "%s:\n", __FUNCTION__);
+-      for (i = 0 ; i < sz ; i++)
+-	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+-   }
++   END_BATCH();
+ }
+ 
+-void r200EmitBlit( r200ContextPtr rmesa,
+-		   GLuint color_fmt,
+-		   GLuint src_pitch,
+-		   GLuint src_offset,
+-		   GLuint dst_pitch,
+-		   GLuint dst_offset,
+-		   GLint srcx, GLint srcy,
+-		   GLint dstx, GLint dsty,
+-		   GLuint w, GLuint h )
++void r200FireAOS(r200ContextPtr rmesa, int vertex_count, int type)
+ {
+-   drm_radeon_cmd_header_t *cmd;
++	BATCH_LOCALS(&rmesa->radeon);
+ 
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+-	      __FUNCTION__, 
+-	      src_pitch, src_offset, srcx, srcy,
+-	      dst_pitch, dst_offset, dstx, dsty,
+-	      w, h);
+-
+-   assert( (src_pitch & 63) == 0 );
+-   assert( (dst_pitch & 63) == 0 );
+-   assert( (src_offset & 1023) == 0 );
+-   assert( (dst_offset & 1023) == 0 );
+-   assert( w < (1<<16) );
+-   assert( h < (1<<16) );
+-
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int),
+-						  __FUNCTION__ );
+-
+-
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16);
+-   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+-	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+-	       RADEON_GMC_BRUSH_NONE |
+-	       (color_fmt << 8) |
+-	       RADEON_GMC_SRC_DATATYPE_COLOR |
+-	       RADEON_ROP3_S |
+-	       RADEON_DP_SRC_SOURCE_MEMORY |
+-	       RADEON_GMC_CLR_CMP_CNTL_DIS |
+-	       RADEON_GMC_WR_MSK_DIS );
+-
+-   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+-   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+-   cmd[5].i = (srcx << 16) | srcy;
+-   cmd[6].i = (dstx << 16) | dsty; /* dst */
+-   cmd[7].i = (w << 16) | h;
++	BEGIN_BATCH(3);
++	OUT_BATCH_PACKET3(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
++	OUT_BATCH(R200_VF_PRIM_WALK_LIST | (vertex_count << 16) | type);
++	END_BATCH();
+ }
+ 
+-
+-void r200EmitWait( r200ContextPtr rmesa, GLuint flags )
+-{
+-   drm_radeon_cmd_header_t *cmd;
+-
+-   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+-
+-   cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int),
+-					   __FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+-   cmd[0].wait.flags = flags;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
+index c067515..a744469 100644
+--- a/src/mesa/drivers/dri/r200/r200_context.c
++++ b/src/mesa/drivers/dri/r200/r200_context.c
+@@ -52,9 +52,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DE
+ #include "drivers/common/driverfuncs.h"
+ 
+ #include "r200_context.h"
++#include "radeon_span.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+-#include "r200_span.h"
+ #include "r200_pixel.h"
+ #include "r200_tex.h"
+ #include "r200_swtcl.h"
+@@ -78,9 +79,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vblank.h"
+ #include "utils.h"
+ #include "xmlpool.h" /* for symbolic values of enum-type options */
+-#ifndef R200_DEBUG
+-int R200_DEBUG = (0);
+-#endif
+ 
+ /* Return various strings for glGetString().
+  */
+@@ -89,8 +87,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    static char buffer[128];
+    unsigned   offset;
+-   GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 :
+-      rmesa->r200Screen->AGPMode;
++   GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
++      rmesa->radeon.radeonScreen->AGPMode;
+ 
+    switch ( name ) {
+    case GL_VENDOR:
+@@ -101,7 +99,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+ 				     agp_mode );
+ 
+       sprintf( & buffer[ offset ], " %sTCL",
+-	       !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
++	       !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+ 	       ? "" : "NO-" );
+ 
+       return (GLubyte *)buffer;
+@@ -234,6 +232,40 @@ static const struct dri_debug_control debug_control[] =
+     { NULL,    0 }
+ };
+ 
++static void r200_get_lock(radeonContextPtr radeon)
++{
++   r200ContextPtr rmesa = (r200ContextPtr)radeon;
++   drm_radeon_sarea_t *sarea = radeon->sarea;
++   int i;
++
++   R200_STATECHANGE( rmesa, ctx );
++   if (rmesa->radeon.sarea->tiling_enabled) {
++      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
++   }
++   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
++
++   if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
++      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
++      if (!radeon->radeonScreen->kernel_mm)
++         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
++   }
++
++}
++
++static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++}
++
++
++static void r200_init_vtbl(radeonContextPtr radeon)
++{
++   radeon->vtbl.get_lock = r200_get_lock;
++   radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
++   radeon->vtbl.update_draw_buffer = r200UpdateDrawBuffer;
++   radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
++   radeon->vtbl.swtcl_flush = r200_swtcl_flush;
++}
++
+ 
+ /* Create the device specific rendering context.
+  */
+@@ -245,9 +277,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+    struct dd_function_table functions;
+    r200ContextPtr rmesa;
+-   GLcontext *ctx, *shareCtx;
++   GLcontext *ctx;
+    int i;
+-   int tcl_mode, fthrottle_mode;
++   int tcl_mode;
+ 
+    assert(glVisual);
+    assert(driContextPriv);
+@@ -257,7 +289,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+    rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
+    if ( !rmesa )
+       return GL_FALSE;
+-      
++
++   r200_init_vtbl(&rmesa->radeon);
+    /* init exp fog table data */
+    r200InitStaticFogData();
+ 
+@@ -265,12 +298,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+     * Do this here so that initialMaxAnisotropy is set before we create
+     * the default textures.
+     */
+-   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
++   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+ 			screen->driScreen->myNum, "r200");
+-   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
+-                                                 "def_max_anisotropy");
++   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
++							"def_max_anisotropy");
+ 
+-   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
++   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+       if ( sPriv->drm_version.minor < 13 )
+ 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+ 			  "disabling.\n", sPriv->drm_version.minor );
+@@ -291,59 +324,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+    r200InitTextureFuncs(&functions);
+    r200InitShaderFuncs(&functions); 
+ 
+-   /* Allocate and initialize the Mesa context */
+-   if (sharedContextPrivate)
+-      shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx;
+-   else
+-      shareCtx = NULL;
+-   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
+-                                       &functions, (void *) rmesa);
+-   if (!rmesa->glCtx) {
+-      FREE(rmesa);
+-      return GL_FALSE;
+-   }
+-   driContextPriv->driverPrivate = rmesa;
+-
+-   /* Init r200 context data */
+-   rmesa->dri.context = driContextPriv;
+-   rmesa->dri.screen = sPriv;
+-   rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */
+-   rmesa->dri.hwContext = driContextPriv->hHWContext;
+-   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+-   rmesa->dri.fd = sPriv->fd;
+-   rmesa->dri.drmMinor = sPriv->drm_version.minor;
+-
+-   rmesa->r200Screen = screen;
+-   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
+-				       screen->sarea_priv_offset);
+-
+-
+-   rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address;
+-
+-   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+-   make_empty_list( & rmesa->swapped );
+-
+-   rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ;
+-   assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS);
+-   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+-      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+-	    screen->texSize[i],
+-	    12,
+-	    RADEON_NR_TEX_REGIONS,
+-	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
+-	    & rmesa->sarea->tex_age[i],
+-	    & rmesa->swapped,
+-	    sizeof( r200TexObj ),
+-	    (destroy_texture_object_t *) r200DestroyTexObj );
++   if (!radeonInitContext(&rmesa->radeon, &functions,
++			  glVisual, driContextPriv,
++			  sharedContextPrivate)) {
++     FREE(rmesa);
++     return GL_FALSE;
+    }
+-   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
++
++   rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
+ 					   "texture_depth");
+-   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+-      rmesa->texture_depth = ( screen->cpp == 4 ) ?
++   if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++      rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
+ 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+ 
+-   rmesa->swtcl.RenderIndex = ~0;
+-   rmesa->hw.all_dirty = 1;
++   rmesa->radeon.swtcl.RenderIndex = ~0;
++   rmesa->radeon.hw.all_dirty = 1;
+ 
+    /* Set the maximum texture size small enough that we can guarentee that
+     * all texture units can bind a maximal texture and have all of them in
+@@ -351,29 +346,13 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+     * setting allow larger textures.
+     */
+ 
+-   ctx = rmesa->glCtx;
+-   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
++   ctx = rmesa->radeon.glCtx;
++   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+ 						 "texture_units");
+    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+ 
+-   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
+-
+-   driCalculateMaxTextureLevels( rmesa->texture_heaps,
+-				 rmesa->nr_heaps,
+-				 & ctx->Const,
+-				 4,
+-				 11, /* max 2D texture size is 2048x2048 */
+-#if ENABLE_HW_3D_TEXTURE
+-				 8,  /* max 3D texture size is 256^3 */
+-#else
+-				 0,  /* 3D textures unsupported */
+-#endif
+-				 11, /* max cube texture size is 2048x2048 */
+-				 11, /* max texture rectangle size is 2048x2048 */
+-				 12,
+-				 GL_FALSE,
+-				 i );
++   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+ 
+    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ 
+@@ -383,7 +362,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+    ctx->Const.MinPointSizeAA = 1.0;
+    ctx->Const.MaxPointSizeAA = 1.0;
+    ctx->Const.PointSizeGranularity = 0.0625;
+-   if (rmesa->r200Screen->drmSupportsPointSprites)
++   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+       ctx->Const.MaxPointSize = 2047.0;
+    else
+       ctx->Const.MaxPointSize = 1.0;
+@@ -439,32 +418,32 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+    _math_matrix_set_identity( &rmesa->tmpmat );
+ 
+    driInitExtensions( ctx, card_extensions, GL_TRUE );
+-   if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
++   if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
+      /* yuv textures don't work with some chips - R200 / rv280 okay so far
+ 	others get the bit ordering right but don't actually do YUV-RGB conversion */
+       _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
+    }
+-   if (rmesa->glCtx->Mesa_DXTn) {
++   if (rmesa->radeon.glCtx->Mesa_DXTn) {
+       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+    }
+-   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
++   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+    }
+ 
+-   if (rmesa->r200Screen->drmSupportsCubeMapsR200)
++   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
+       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+-   if (rmesa->r200Screen->drmSupportsBlendColor) {
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+        driInitExtensions( ctx, blend_extensions, GL_FALSE );
+    }
+-   if(rmesa->r200Screen->drmSupportsVertexProgram)
++   if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
+       driInitSingleExtension( ctx, ARB_vp_extension );
+-   if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program"))
++   if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
+       driInitSingleExtension( ctx, NV_vp_extension );
+ 
+-   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader)
++   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
+       driInitSingleExtension( ctx, ATI_fs_extension );
+-   if (rmesa->r200Screen->drmSupportsPointSprites)
++   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+       driInitExtensions( ctx, point_extensions, GL_FALSE );
+ #if 0
+    r200InitDriverFuncs( ctx );
+@@ -474,33 +453,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ #endif
+    /* plug in a few more device driver functions */
+    /* XXX these should really go right after _mesa_init_driver_functions() */
++   radeonInitSpanFuncs( ctx );
+    r200InitPixelFuncs( ctx );
+-   r200InitSpanFuncs( ctx );
+    r200InitTnlFuncs( ctx );
+    r200InitState( rmesa );
+    r200InitSwtcl( ctx );
+ 
+-   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
+-   rmesa->iw.irq_seq = -1;
+-   rmesa->irqsEmitted = 0;
+-   rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+-		     rmesa->r200Screen->irq);
+-
+-   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+-
+-   if (!rmesa->do_irqs)
+-      fprintf(stderr,
+-	      "IRQ's not enabled, falling back to %s: %d %d\n",
+-	      rmesa->do_usleeps ? "usleeps" : "busy waits",
+-	      fthrottle_mode,
+-	      rmesa->r200Screen->irq);
+-
+    rmesa->prefer_gart_client_texturing = 
+       (getenv("R200_GART_CLIENT_TEXTURES") != 0);
+ 
+-   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
+-
+-
+ #if DO_DEBUG
+    R200_DEBUG  = driParseDebugString( getenv( "R200_DEBUG" ),
+ 				      debug_control );
+@@ -508,18 +469,18 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ 				      debug_control );
+ #endif
+ 
+-   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
+-   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
++   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
++   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+       fprintf(stderr, "disabling 3D acceleration\n");
+       FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
+    }
+    else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
+-	    !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
+-      if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) {
+-	 rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL;
++	    !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ 	 fprintf(stderr, "Disabling HW TCL support\n");
+       }
+-      TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
++      TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
+    }
+ 
+    return GL_TRUE;
+@@ -538,55 +499,33 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+ 
+    /* check if we're deleting the currently bound context */
+    if (rmesa == current) {
+-      R200_FIREVERTICES( rmesa );
++      radeon_firevertices(&rmesa->radeon);
+       _mesa_make_current(NULL, NULL, NULL);
+    }
+ 
+    /* Free r200 context resources */
+    assert(rmesa); /* should never be null */
+    if ( rmesa ) {
+-      GLboolean   release_texture_heaps;
+-
+ 
+-      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+-      _swsetup_DestroyContext( rmesa->glCtx );
+-      _tnl_DestroyContext( rmesa->glCtx );
+-      _vbo_DestroyContext( rmesa->glCtx );
+-      _swrast_DestroyContext( rmesa->glCtx );
++      _swsetup_DestroyContext( rmesa->radeon.glCtx );
++      _tnl_DestroyContext( rmesa->radeon.glCtx );
++      _vbo_DestroyContext( rmesa->radeon.glCtx );
++      _swrast_DestroyContext( rmesa->radeon.glCtx );
+ 
+-      r200DestroySwtcl( rmesa->glCtx );
+-      r200ReleaseArrays( rmesa->glCtx, ~0 );
++      r200DestroySwtcl( rmesa->radeon.glCtx );
++      r200ReleaseArrays( rmesa->radeon.glCtx, ~0 );
+ 
+-      if (rmesa->dma.current.buf) {
+-	 r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-	 r200FlushCmdBuf( rmesa, __FUNCTION__ );
++      if (rmesa->radeon.dma.current) {
++	 radeonReleaseDmaRegion( &rmesa->radeon );
++	 rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+       }
+ 
+-      if (rmesa->state.scissor.pClipRects) {
+-	 FREE(rmesa->state.scissor.pClipRects);
+-	 rmesa->state.scissor.pClipRects = NULL;
+-      }
+-
+-      if ( release_texture_heaps ) {
+-         /* This share group is about to go away, free our private
+-          * texture object data.
+-          */
+-         int i;
+-
+-         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+-	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+-	    rmesa->texture_heaps[ i ] = NULL;
+-         }
+-
+-	 assert( is_empty_list( & rmesa->swapped ) );
++      if (rmesa->radeon.state.scissor.pClipRects) {
++	 FREE(rmesa->radeon.state.scissor.pClipRects);
++	 rmesa->radeon.state.scissor.pClipRects = NULL;
+       }
+ 
+-      /* free the Mesa context */
+-      rmesa->glCtx->DriverCtx = NULL;
+-      _mesa_destroy_context( rmesa->glCtx );
+-
+-      /* free the option cache */
+-      driDestroyOptionCache (&rmesa->optionCache);
++      radeonCleanupContext(&rmesa->radeon);
+ 
+       FREE( rmesa );
+    }
+@@ -594,107 +533,6 @@ void r200DestroyContext( __DRIcontextPrivate *driContextPriv )
+ 
+ 
+ 
+-
+-void
+-r200SwapBuffers( __DRIdrawablePrivate *dPriv )
+-{
+-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-      r200ContextPtr rmesa;
+-      GLcontext *ctx;
+-      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+-      ctx = rmesa->glCtx;
+-      if (ctx->Visual.doubleBufferMode) {
+-         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+-         if ( rmesa->doPageFlip ) {
+-            r200PageFlip( dPriv );
+-         }
+-         else {
+-	     r200CopyBuffer( dPriv, NULL );
+-         }
+-      }
+-   }
+-   else {
+-      /* XXX this shouldn't be an error but we can't handle it for now */
+-      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+-   }
+-}
+-
+-void
+-r200CopySubBuffer( __DRIdrawablePrivate *dPriv,
+-		   int x, int y, int w, int h )
+-{
+-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-      r200ContextPtr rmesa;
+-      GLcontext *ctx;
+-      rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+-      ctx = rmesa->glCtx;
+-      if (ctx->Visual.doubleBufferMode) {
+-	 drm_clip_rect_t rect;
+-	 rect.x1 = x + dPriv->x;
+-	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
+-	 rect.x2 = rect.x1 + w;
+-	 rect.y2 = rect.y1 + h;
+-         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+-	 r200CopyBuffer( dPriv, &rect );
+-      }
+-   }
+-   else {
+-      /* XXX this shouldn't be an error but we can't handle it for now */
+-      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+-   }
+-}
+-
+-/* Force the context `c' to be the current context and associate with it
+- * buffer `b'.
+- */
+-GLboolean
+-r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+-                   __DRIdrawablePrivate *driDrawPriv,
+-                   __DRIdrawablePrivate *driReadPriv )
+-{
+-   if ( driContextPriv ) {
+-      r200ContextPtr newCtx = 
+-	 (r200ContextPtr) driContextPriv->driverPrivate;
+-
+-      if (R200_DEBUG & DEBUG_DRI)
+-	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx);
+-
+-      newCtx->dri.readable = driReadPriv;
+-
+-      if ( newCtx->dri.drawable != driDrawPriv ||
+-           newCtx->lastStamp != driDrawPriv->lastStamp ) {
+-	 if (driDrawPriv->swap_interval == (unsigned)-1) {
+-	    driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0)
+-	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+-	       : VBLANK_FLAG_NO_IRQ;
+-
+-	    driDrawableInitVBlank( driDrawPriv );
+-	 }
+-
+-	 newCtx->dri.drawable = driDrawPriv;
+-
+-	 r200SetCliprects(newCtx);
+-	 r200UpdateViewportOffset( newCtx->glCtx );
+-      }
+-
+-      _mesa_make_current( newCtx->glCtx,
+-			  (GLframebuffer *) driDrawPriv->driverPrivate,
+-			  (GLframebuffer *) driReadPriv->driverPrivate );
+-
+-      _mesa_update_state( newCtx->glCtx );
+-      r200ValidateState( newCtx->glCtx );
+-
+-   } else {
+-      if (R200_DEBUG & DEBUG_DRI)
+-	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+-      _mesa_make_current( NULL, NULL, NULL );
+-   }
+-
+-   if (R200_DEBUG & DEBUG_DRI)
+-      fprintf(stderr, "End %s\n", __FUNCTION__);
+-   return GL_TRUE;
+-}
+-
+ /* Force the context `c' to be unbound from its buffer.
+  */
+ GLboolean
+@@ -703,7 +541,7 @@ r200UnbindContext( __DRIcontextPrivate *driContextPriv )
+    r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate;
+ 
+    if (R200_DEBUG & DEBUG_DRI)
+-      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx);
++      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->radeon.glCtx);
+ 
+    return GL_TRUE;
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
+index 14a1dda..fcbe725 100644
+--- a/src/mesa/drivers/dri/r200/r200_context.h
++++ b/src/mesa/drivers/dri/r200/r200_context.h
+@@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #error This driver requires a newer libdrm to compile
+ #endif
+ 
++#include "radeon_screen.h"
++#include "radeon_common.h"
++
++#include "radeon_lock.h"
++
+ struct r200_context;
+ typedef struct r200_context r200ContextRec;
+ typedef struct r200_context *r200ContextPtr;
+ 
+-/* This union is used to avoid warnings/miscompilation
+-   with float to uint32_t casts due to strict-aliasing */
+-typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+-
+-#include "r200_lock.h"
+-#include "radeon_screen.h"
+ #include "main/mm.h"
+ 
+-/* Flags for software fallback cases */
+-/* See correponding strings in r200_swtcl.c */
+-#define R200_FALLBACK_TEXTURE           0x01
+-#define R200_FALLBACK_DRAW_BUFFER       0x02
+-#define R200_FALLBACK_STENCIL           0x04
+-#define R200_FALLBACK_RENDER_MODE       0x08
+-#define R200_FALLBACK_DISABLE           0x10
+-#define R200_FALLBACK_BORDER_MODE       0x20
+-
+-/* The blit width for texture uploads
+- */
+-#define BLIT_WIDTH_BYTES 1024
+-
+-/* Use the templated vertex format:
+- */
+-#define COLOR_IS_RGBA
+-#define TAG(x) r200##x
+-#include "tnl_dd/t_dd_vertex.h"
+-#undef TAG
+-
+-typedef void (*r200_tri_func)( r200ContextPtr,
+-				 r200Vertex *,
+-				 r200Vertex *,
+-				 r200Vertex * );
+-
+-typedef void (*r200_line_func)( r200ContextPtr,
+-				  r200Vertex *,
+-				  r200Vertex * );
+-
+-typedef void (*r200_point_func)( r200ContextPtr,
+-				   r200Vertex * );
+-
+-
+ struct r200_vertex_program {
+         struct gl_vertex_program mesa_program; /* Must be first */
+         int translated;
+@@ -112,93 +78,11 @@ struct r200_vertex_program {
+         int fogmode;
+ };
+ 
+-struct r200_colorbuffer_state {
+-   GLuint clear;
+-#if 000
+-   GLint drawOffset, drawPitch;
+-#endif
+-   int roundEnable;
+-};
+-
+-
+-struct r200_depthbuffer_state {
+-   GLuint clear;
+-   GLfloat scale;
+-};
+-
+-#if 000
+-struct r200_pixel_state {
+-   GLint readOffset, readPitch;
+-};
+-#endif
+-
+-struct r200_scissor_state {
+-   drm_clip_rect_t rect;
+-   GLboolean enabled;
+-
+-   GLuint numClipRects;			/* Cliprects active */
+-   GLuint numAllocedClipRects;		/* Cliprects available */
+-   drm_clip_rect_t *pClipRects;
+-};
+-
+-struct r200_stencilbuffer_state {
+-   GLboolean hwBuffer;
+-   GLuint clear;			/* rb3d_stencilrefmask value */
+-};
+-
+-struct r200_stipple_state {
+-   GLuint mask[32];
+-};
+-
+-
+-
+-#define TEX_0   0x1
+-#define TEX_1   0x2
+-#define TEX_2	0x4
+-#define TEX_3	0x8
+-#define TEX_4	0x10
+-#define TEX_5	0x20
+-#define TEX_ALL 0x3f
+-
+-typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct r200_tex_obj {
+-   driTextureObject   base;
+-
+-   GLuint bufAddr;			/* Offset to start of locally
+-					   shared texture block */
+-
+-   GLuint dirty_state;		        /* Flags (1 per texunit) for
+-					   whether or not this texobj
+-					   has dirty hardware state
+-					   (pp_*) that needs to be
+-					   brought into the
+-					   texunit. */
+-
+-   drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+-					/* Six, for the cube faces */
+-   GLboolean image_override;		/* Image overridden by GLX_EXT_tfp */
+-
+-   GLuint pp_txfilter;		        /* hardware register values */
+-   GLuint pp_txformat;
+-   GLuint pp_txformat_x;
+-   GLuint pp_txoffset;		        /* Image location in texmem.
+-					   All cube faces follow. */
+-   GLuint pp_txsize;		        /* npot only */
+-   GLuint pp_txpitch;		        /* npot only */
+-   GLuint pp_border_color;
+-   GLuint pp_cubic_faces;	        /* cube face 1,2,3,4 log2 sizes */
+-
+-   GLboolean  border_fallback;
+-
+-   GLuint tile_bits;			/* hw texture tile bits used on this texture */
+-};
++#define R200_TEX_ALL 0x3f
+ 
+ 
+ struct r200_texture_env_state {
+-   r200TexObjPtr texobj;
++   radeonTexObjPtr texobj;
+    GLuint outputreg;
+    GLuint unitneeded;
+ };
+@@ -210,19 +94,6 @@ struct r200_texture_state {
+ };
+ 
+ 
+-struct r200_state_atom {
+-   struct r200_state_atom *next, *prev;
+-   const char *name;		         /* for debug */
+-   int cmd_size;		         /* size in bytes */
+-   GLuint idx;
+-   int *cmd;			         /* one or more cmd's */
+-   int *lastcmd;			 /* one or more cmd's */
+-   GLboolean dirty;
+-   GLboolean (*check)( GLcontext *, int );    /* is this state active? */
+-};
+-   
+-
+-
+ /* Trying to keep these relatively short as the variables are becoming
+  * extravagently long.  Drop the driver name prefix off the front of
+  * everything - I think we know which driver we're in by now, and keep the
+@@ -597,181 +468,85 @@ struct r200_state_atom {
+ 
+ 
+ struct r200_hw_state {
+-   /* Head of the linked list of state atoms. */
+-   struct r200_state_atom atomlist;
+-
+    /* Hardware state, stored as cmdbuf commands:  
+     *   -- Need to doublebuffer for
+     *           - reviving state after loss of context
+     *           - eliding noop statechange loops? (except line stipple count)
+     */
+-   struct r200_state_atom ctx;
+-   struct r200_state_atom set;
+-   struct r200_state_atom vte;
+-   struct r200_state_atom lin;
+-   struct r200_state_atom msk;
+-   struct r200_state_atom vpt;
+-   struct r200_state_atom vap;
+-   struct r200_state_atom vtx;
+-   struct r200_state_atom tcl;
+-   struct r200_state_atom msl;
+-   struct r200_state_atom tcg;
+-   struct r200_state_atom msc;
+-   struct r200_state_atom cst;
+-   struct r200_state_atom tam;
+-   struct r200_state_atom tf;
+-   struct r200_state_atom tex[6];
+-   struct r200_state_atom cube[6];
+-   struct r200_state_atom zbs;
+-   struct r200_state_atom mtl[2];
+-   struct r200_state_atom mat[9];
+-   struct r200_state_atom lit[8]; /* includes vec, scl commands */
+-   struct r200_state_atom ucp[6];
+-   struct r200_state_atom pix[6]; /* pixshader stages */
+-   struct r200_state_atom eye; /* eye pos */
+-   struct r200_state_atom grd; /* guard band clipping */
+-   struct r200_state_atom fog;
+-   struct r200_state_atom glt;
+-   struct r200_state_atom prf;
+-   struct r200_state_atom afs[2];
+-   struct r200_state_atom pvs;
+-   struct r200_state_atom vpi[2];
+-   struct r200_state_atom vpp[2];
+-   struct r200_state_atom atf;
+-   struct r200_state_atom spr;
+-   struct r200_state_atom ptp;
+-
+-   int max_state_size;	/* Number of bytes necessary for a full state emit. */
+-   GLboolean is_dirty, all_dirty;
++   struct radeon_state_atom ctx;
++   struct radeon_state_atom set;
++   struct radeon_state_atom vte;
++   struct radeon_state_atom lin;
++   struct radeon_state_atom msk;
++   struct radeon_state_atom vpt;
++   struct radeon_state_atom vap;
++   struct radeon_state_atom vtx;
++   struct radeon_state_atom tcl;
++   struct radeon_state_atom msl;
++   struct radeon_state_atom tcg;
++   struct radeon_state_atom msc;
++   struct radeon_state_atom cst;
++   struct radeon_state_atom tam;
++   struct radeon_state_atom tf;
++   struct radeon_state_atom tex[6];
++   struct radeon_state_atom cube[6];
++   struct radeon_state_atom zbs;
++   struct radeon_state_atom mtl[2];
++   struct radeon_state_atom mat[9];
++   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
++   struct radeon_state_atom ucp[6];
++   struct radeon_state_atom pix[6]; /* pixshader stages */
++   struct radeon_state_atom eye; /* eye pos */
++   struct radeon_state_atom grd; /* guard band clipping */
++   struct radeon_state_atom fog;
++   struct radeon_state_atom glt;
++   struct radeon_state_atom prf;
++   struct radeon_state_atom afs[2];
++   struct radeon_state_atom pvs;
++   struct radeon_state_atom vpi[2];
++   struct radeon_state_atom vpp[2];
++   struct radeon_state_atom atf;
++   struct radeon_state_atom spr;
++   struct radeon_state_atom ptp;
+ };
+ 
+ struct r200_state {
+    /* Derived state for internal purposes:
+     */
+-   struct r200_colorbuffer_state color;
+-   struct r200_depthbuffer_state depth;
+-#if 00
+-   struct r200_pixel_state pixel;
+-#endif
+-   struct r200_scissor_state scissor;
+-   struct r200_stencilbuffer_state stencil;
+-   struct r200_stipple_state stipple;
++   struct radeon_stipple_state stipple;
+    struct r200_texture_state texture;
+    GLuint envneeded;
+ };
+ 
+-/* Need refcounting on dma buffers:
+- */
+-struct r200_dma_buffer {
+-   int refcount;		/* the number of retained regions in buf */
+-   drmBufPtr buf;
+-};
+-
+-#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset +		\
+-			(rvb)->address - rmesa->dma.buf0_address +	\
+-			(rvb)->start)
+-
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct r200_dma_region {
+-   struct r200_dma_buffer *buf;
+-   char *address;		/* == buf->address */
+-   int start, end, ptr;		/* offsets from start of buf */
+-   int aos_start;
+-   int aos_stride;
+-   int aos_size;
+-};
+-
+-
+-struct r200_dma {
+-   /* Active dma region.  Allocations for vertices and retained
+-    * regions come from here.  Also used for emitting random vertices,
+-    * these may be flushed by calling flush_current();
+-    */
+-   struct r200_dma_region current;
+-   
+-   void (*flush)( r200ContextPtr );
+-
+-   char *buf0_address;		/* start of buf[0], for index calcs */
+-   GLuint nr_released_bufs;	/* flush after so many buffers released */
+-};
+-
+-struct r200_dri_mirror {
+-   __DRIcontextPrivate	*context;	/* DRI context */
+-   __DRIscreenPrivate	*screen;	/* DRI screen */
+-   __DRIdrawablePrivate	*drawable;	/* DRI drawable bound to this ctx */
+-   __DRIdrawablePrivate	*readable;	/* DRI readable bound to this ctx */
+-
+-   drm_context_t hwContext;
+-   drm_hw_lock_t *hwLock;
+-   int fd;
+-   int drmMinor;
+-};
+-
+-
+ #define R200_CMD_BUF_SZ  (16*1024) 
+ 
+-struct r200_store {
+-   GLuint statenr;
+-   GLuint primnr;
+-   char cmd_buf[R200_CMD_BUF_SZ];
+-   int cmd_used;   
+-   int elts_start;
+-};
+-
+-
++#define R200_ELT_BUF_SZ  (16*1024) 
+ /* r200_tcl.c
+  */
+ struct r200_tcl_info {
+    GLuint hw_primitive;
+ 
+ /* hw can handle 12 components max */
+-   struct r200_dma_region *aos_components[12];
++  struct radeon_aos aos[12];
+    GLuint nr_aos_components;
+ 
+    GLuint *Elts;
+ 
+-   struct r200_dma_region indexed_verts;
+-   struct r200_dma_region vertex_data[15];
++   struct radeon_bo *elt_dma_bo;
++   int elt_dma_offset; /** Offset into this buffer object, in bytes */
++   int elt_used;
++
+ };
+ 
+ 
+ /* r200_swtcl.c
+  */
+ struct r200_swtcl_info {
+-   GLuint RenderIndex;
+-   
+-   /**
+-    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
+-    * installed in the Mesa state vector.
+-    */
+-   GLuint vertex_size;
+ 
+-   /**
+-    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
+-    * data in the hardware buffer.
+-    */
+-   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+ 
+-   /**
+-    * Number of elements of \c ::vertex_attrs that are actually used.
+-    */
+-   GLuint vertex_attr_count;
+-
+-   /**
+-    * Cached pointer to the buffer where Mesa will store vertex data.
+-    */
+-   GLubyte *verts;
+-
+-   /* Fallback rasterization functions
+-    */
+-   r200_point_func draw_point;
+-   r200_line_func draw_line;
+-   r200_tri_func draw_tri;
+-
+-   GLuint hw_primitive;
+-   GLenum render_primitive;
+-   GLuint numverts;
++   radeon_point_func draw_point;
++   radeon_line_func draw_line;
++   radeon_tri_func draw_tri;
+ 
+    /**
+     * Offset of the 4UB color data within a hardware (swtcl) vertex.
+@@ -787,27 +562,10 @@ struct r200_swtcl_info {
+     * Should Mesa project vertex data or will the hardware do it?
+     */
+    GLboolean needproj;
+-
+-   struct r200_dma_region indexed_verts;
+-};
+-
+-
+-struct r200_ioctl {
+-   GLuint vertex_offset;
+-   GLuint vertex_size;
+ };
+ 
+ 
+ 
+-#define R200_MAX_PRIMS 64
+-
+-
+-
+-struct r200_prim {
+-   GLuint start;
+-   GLuint end;
+-   GLuint prim;
+-};
+ 
+    /* A maximum total of 29 elements per vertex:  3 floats for position, 3
+     * floats for normal, 4 floats for color, 4 bytes for secondary color,
+@@ -822,9 +580,8 @@ struct r200_prim {
+ 
+ #define R200_MAX_VERTEX_SIZE ((3*6)+11)
+ 
+-
+ struct r200_context {
+-   GLcontext *glCtx;			/* Mesa context */
++   struct radeon_context radeon;
+ 
+    /* Driver and hardware state management
+     */
+@@ -832,56 +589,15 @@ struct r200_context {
+    struct r200_state state;
+    struct r200_vertex_program *curr_vp_hw;
+ 
+-   /* Texture object bookkeeping
+-    */
+-   unsigned              nr_heaps;
+-   driTexHeap          * texture_heaps[ RADEON_NR_TEX_HEAPS ];
+-   driTextureObject      swapped;
+-   int                   texture_depth;
+-   float                 initialMaxAnisotropy;
+-
+-   /* Rasterization and vertex state:
+-    */
+-   GLuint TclFallback;
+-   GLuint Fallback;
+-   GLuint NewGLState;
+-   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
+-
+    /* Vertex buffers
+     */
+-   struct r200_ioctl ioctl;
+-   struct r200_dma dma;
+-   struct r200_store store;
+-   /* A full state emit as of the first state emit in the main store, in case
+-    * the context is lost.
+-    */
+-   struct r200_store backup_store;
+-
+-   /* Page flipping
+-    */
+-   GLuint doPageFlip;
+-
+-   /* Busy waiting
+-    */
+-   GLuint do_usleeps;
+-   GLuint do_irqs;
+-   GLuint irqsEmitted;
+-   drm_radeon_irq_wait_t iw;
++   struct radeon_ioctl ioctl;
++   struct radeon_store store;
+ 
+    /* Clientdata textures;
+     */
+    GLuint prefer_gart_client_texturing;
+ 
+-   /* Drawable, cliprect and scissor information
+-    */
+-   GLuint numClipRects;			/* Cliprects for the draw buffer */
+-   drm_clip_rect_t *pClipRects;
+-   unsigned int lastStamp;
+-   GLboolean lost_context;
+-   GLboolean save_on_next_emit;
+-   radeonScreenPtr r200Screen;	/* Screen private DRI data */
+-   drm_radeon_sarea_t *sarea;		/* Private SAREA data */
+-
+    /* TCL stuff
+     */
+    GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
+@@ -893,15 +609,6 @@ struct r200_context {
+    GLuint TexGenCompSel;
+    GLmatrix tmpmat;
+ 
+-   /* buffer swap
+-    */
+-   int64_t swap_ust;
+-   int64_t swap_missed_ust;
+-
+-   GLuint swap_count;
+-   GLuint swap_missed_count;
+-
+-
+    /* r200_tcl.c
+     */
+    struct r200_tcl_info tcl;
+@@ -910,14 +617,6 @@ struct r200_context {
+     */
+    struct r200_swtcl_info swtcl;
+ 
+-   /* Mirrors of some DRI state
+-    */
+-   struct r200_dri_mirror dri;
+-
+-   /* Configuration cache
+-    */
+-   driOptionCache optionCache;
+-
+    GLboolean using_hyperz;
+    GLboolean texmicrotile;
+ 
+@@ -927,28 +626,10 @@ struct r200_context {
+ #define R200_CONTEXT(ctx)		((r200ContextPtr)(ctx->DriverCtx))
+ 
+ 
+-static INLINE GLuint r200PackColor( GLuint cpp,
+-					GLubyte r, GLubyte g,
+-					GLubyte b, GLubyte a )
+-{
+-   switch ( cpp ) {
+-   case 2:
+-      return PACK_COLOR_565( r, g, b );
+-   case 4:
+-      return PACK_COLOR_8888( a, r, g, b );
+-   default:
+-      return 0;
+-   }
+-}
+-
+-
+ extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv );
+ extern GLboolean r200CreateContext( const __GLcontextModes *glVisual,
+ 				    __DRIcontextPrivate *driContextPriv,
+ 				    void *sharedContextPrivate);
+-extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv );
+-extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv,
+-			       int x, int y, int w, int h );
+ extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv,
+ 				  __DRIdrawablePrivate *driDrawPriv,
+ 				  __DRIdrawablePrivate *driReadPriv );
+@@ -957,28 +638,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv );
+ /* ================================================================
+  * Debugging:
+  */
+-#define DO_DEBUG		1
+ 
+-#if DO_DEBUG
+-extern int R200_DEBUG;
+-#else
+-#define R200_DEBUG		0
+-#endif
++#define R200_DEBUG RADEON_DEBUG
++
+ 
+-#define DEBUG_TEXTURE	0x001
+-#define DEBUG_STATE	0x002
+-#define DEBUG_IOCTL	0x004
+-#define DEBUG_PRIMS	0x008
+-#define DEBUG_VERTS	0x010
+-#define DEBUG_FALLBACKS	0x020
+-#define DEBUG_VFMT	0x040
+-#define DEBUG_CODEGEN	0x080
+-#define DEBUG_VERBOSE	0x100
+-#define DEBUG_DRI       0x200
+-#define DEBUG_DMA       0x400
+-#define DEBUG_SANITY    0x800
+-#define DEBUG_SYNC      0x1000
+-#define DEBUG_PIXEL     0x2000
+-#define DEBUG_MEMORY    0x4000
+ 
+ #endif /* __R200_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
+index d514b28..85c1b7b 100644
+--- a/src/mesa/drivers/dri/r200/r200_fragshader.c
++++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
+@@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx )
+ 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
+ 	 CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
+       }
+-      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor (
++      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
+ 	 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+    }
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
+index 0741e57..c08968f 100644
+--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
++++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
+@@ -41,6 +41,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "swrast/swrast.h"
+ 
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -54,635 +56,28 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define R200_TIMEOUT             512
+ #define R200_IDLE_RETRY           16
+ 
+-
+-static void r200WaitForIdle( r200ContextPtr rmesa );
+-
+-
+-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+- * we need to unwire our current cmdbuf, hook the one with the saved state in
+- * it, flush it, and then put the current one back.  This is so commands at the
+- * start of a cmdbuf can rely on the state being kept from the previous one.
+- */
+-static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
+-{
+-   GLuint nr_released_bufs;
+-   struct r200_store saved_store;
+-
+-   if (rmesa->backup_store.cmd_used == 0)
+-      return;
+-
+-   if (R200_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "Emitting backup state on lost context\n");
+-
+-   rmesa->lost_context = GL_FALSE;
+-
+-   nr_released_bufs = rmesa->dma.nr_released_bufs;
+-   saved_store = rmesa->store;
+-   rmesa->dma.nr_released_bufs = 0;
+-   rmesa->store = rmesa->backup_store;
+-   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+-   rmesa->dma.nr_released_bufs = nr_released_bufs;
+-   rmesa->store = saved_store;
+-}
+-
+-int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
+-{
+-   int ret, i;
+-   drm_radeon_cmd_buffer_t cmd;
+-
+-   if (rmesa->lost_context)
+-      r200BackUpAndEmitLostStateLocked( rmesa );
+-
+-   if (R200_DEBUG & DEBUG_IOCTL) {
+-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+-
+-      if (0 & R200_DEBUG & DEBUG_VERBOSE) 
+-	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+-	    fprintf(stderr, "%d: %x\n", i/4, 
+-		    *(int *)(&rmesa->store.cmd_buf[i]));
+-   }
+-
+-   if (R200_DEBUG & DEBUG_DMA)
+-      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+-	      rmesa->dma.nr_released_bufs);
+-
+-
+-   if (R200_DEBUG & DEBUG_SANITY) {
+-      if (rmesa->state.scissor.enabled) 
+-	 ret = r200SanityCmdBuffer( rmesa, 
+-				    rmesa->state.scissor.numClipRects,
+-				    rmesa->state.scissor.pClipRects);
+-      else
+-	 ret = r200SanityCmdBuffer( rmesa, 
+-				    rmesa->numClipRects,
+-				    rmesa->pClipRects);
+-      if (ret) {
+-	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
+-	 goto out;
+-      }
+-   }
+-
+-
+-   if (R200_DEBUG & DEBUG_MEMORY) {
+-      if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps,
+-				     & rmesa->swapped ) ) {
+-	 fprintf( stderr, "%s: texture memory is inconsistent - expect "
+-		  "mangled textures\n", __FUNCTION__ );
+-      }
+-   }
+-
+-
+-   cmd.bufsz = rmesa->store.cmd_used;
+-   cmd.buf = rmesa->store.cmd_buf;
+-
+-   if (rmesa->state.scissor.enabled) {
+-      cmd.nbox = rmesa->state.scissor.numClipRects;
+-      cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects;
+-   } else {
+-      cmd.nbox = rmesa->numClipRects;
+-      cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects;
+-   }
+-
+-   ret = drmCommandWrite( rmesa->dri.fd,
+-			  DRM_RADEON_CMDBUF,
+-			  &cmd, sizeof(cmd) );
+-
+-   if (ret)
+-      fprintf(stderr, "drmCommandWrite: %d\n", ret);
+-
+-   if (R200_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+-      r200WaitForIdleLocked( rmesa );
+-   }
+-
+-
+- out:
+-   rmesa->store.primnr = 0;
+-   rmesa->store.statenr = 0;
+-   rmesa->store.cmd_used = 0;
+-   rmesa->dma.nr_released_bufs = 0;
+-   rmesa->save_on_next_emit = 1;
+-
+-   return ret;
+-}
+-
+-
+-/* Note: does not emit any commands to avoid recursion on
+- * r200AllocCmdBuf.
+- */
+-void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller )
+-{
+-   int ret;
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-   ret = r200FlushCmdBufLocked( rmesa, caller );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if (ret) {
+-      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
+-      exit(ret);
+-   }
+-}
+-
+-
+-/* =============================================================
+- * Hardware vertex buffer handling
+- */
+-
+-
+-void r200RefillCurrentDmaRegion( r200ContextPtr rmesa )
+-{
+-   struct r200_dma_buffer *dmabuf;
+-   int fd = rmesa->dri.fd;
+-   int index = 0;
+-   int size = 0;
+-   drmDMAReq dma;
+-   int ret;
+-
+-   if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+-      fprintf(stderr, "%s\n", __FUNCTION__);  
+-
+-   if (rmesa->dma.flush) {
+-      rmesa->dma.flush( rmesa );
+-   }
+-
+-   if (rmesa->dma.current.buf)
+-      r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-
+-   if (rmesa->dma.nr_released_bufs > 4)
+-      r200FlushCmdBuf( rmesa, __FUNCTION__ );
+-
+-   dma.context = rmesa->dri.hwContext;
+-   dma.send_count = 0;
+-   dma.send_list = NULL;
+-   dma.send_sizes = NULL;
+-   dma.flags = 0;
+-   dma.request_count = 1;
+-   dma.request_size = RADEON_BUFFER_SIZE;
+-   dma.request_list = &index;
+-   dma.request_sizes = &size;
+-   dma.granted_count = 0;
+-
+-   LOCK_HARDWARE(rmesa);	/* no need to validate */
+-
+-   while (1) {
+-      ret = drmDMA( fd, &dma );
+-      if (ret == 0)
+-	 break;
+-   
+-      if (rmesa->dma.nr_released_bufs) {
+-	 r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+-      }
+-
+-      if (rmesa->do_usleeps) {
+-	 UNLOCK_HARDWARE( rmesa );
+-	 DO_USLEEP( 1 );
+-	 LOCK_HARDWARE( rmesa );
+-      }
+-   }
+-
+-   UNLOCK_HARDWARE(rmesa);
+-
+-   if (R200_DEBUG & DEBUG_DMA)
+-      fprintf(stderr, "Allocated buffer %d\n", index);
+-
+-   dmabuf = CALLOC_STRUCT( r200_dma_buffer );
+-   dmabuf->buf = &rmesa->r200Screen->buffers->list[index];
+-   dmabuf->refcount = 1;
+-
+-   rmesa->dma.current.buf = dmabuf;
+-   rmesa->dma.current.address = dmabuf->buf->address;
+-   rmesa->dma.current.end = dmabuf->buf->total;
+-   rmesa->dma.current.start = 0;
+-   rmesa->dma.current.ptr = 0;
+-}
+-
+-void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+-			     struct r200_dma_region *region,
+-			     const char *caller )
+-{
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+-   
+-   if (!region->buf)
+-      return;
+-
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
+-
+-   if (--region->buf->refcount == 0) {
+-      drm_radeon_cmd_header_t *cmd;
+-
+-      if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+-	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+-		 region->buf->buf->idx);  
+-      
+-      cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), 
+-						     __FUNCTION__ );
+-      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+-      cmd->dma.buf_idx = region->buf->buf->idx;
+-      FREE(region->buf);
+-      rmesa->dma.nr_released_bufs++;
+-   }
+-
+-   region->buf = NULL;
+-   region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current.  If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r200AllocDmaRegion( r200ContextPtr rmesa, 
+-			   struct r200_dma_region *region,
+-			   int bytes,
+-			   int alignment )
+-{
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
+-
+-   if (region->buf)
+-      r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+-
+-   alignment--;
+-   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+-      (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+-      r200RefillCurrentDmaRegion( rmesa );
+-
+-   region->start = rmesa->dma.current.start;
+-   region->ptr = rmesa->dma.current.start;
+-   region->end = rmesa->dma.current.start + bytes;
+-   region->address = rmesa->dma.current.address;
+-   region->buf = rmesa->dma.current.buf;
+-   region->buf->refcount++;
+-
+-   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+-   rmesa->dma.current.start = 
+-      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+-
+-   assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
+-}
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t r200GetLastFrame(r200ContextPtr rmesa)
+-{
+-   drm_radeon_getparam_t gp;
+-   int ret;
+-   uint32_t frame;
+-
+-   gp.param = RADEON_PARAM_LAST_FRAME;
+-   gp.value = (int *)&frame;
+-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+-			      &gp, sizeof(gp) );
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-
+-   return frame;
+-}
+-
+-static void r200EmitIrqLocked( r200ContextPtr rmesa )
+-{
+-   drm_radeon_irq_emit_t ie;
+-   int ret;
+-
+-   ie.irq_seq = &rmesa->iw.irq_seq;
+-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
+-			      &ie, sizeof(ie) );
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-}
+-
+-
+-static void r200WaitIrq( r200ContextPtr rmesa )
+-{
+-   int ret;
+-
+-   do {
+-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+-			     &rmesa->iw, sizeof(rmesa->iw) );
+-   } while (ret && (errno == EINTR || errno == EBUSY));
+-
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-}
+-
+-
+-static void r200WaitForFrameCompletion( r200ContextPtr rmesa )
+-{
+-   drm_radeon_sarea_t *sarea = rmesa->sarea;
+-
+-   if (rmesa->do_irqs) {
+-      if (r200GetLastFrame(rmesa) < sarea->last_frame) {
+-	 if (!rmesa->irqsEmitted) {
+-	    while (r200GetLastFrame (rmesa) < sarea->last_frame)
+-	       ;
+-	 }
+-	 else {
+-	    UNLOCK_HARDWARE( rmesa ); 
+-	    r200WaitIrq( rmesa );	
+-	    LOCK_HARDWARE( rmesa ); 
+-	 }
+-	 rmesa->irqsEmitted = 10;
+-      }
+-
+-      if (rmesa->irqsEmitted) {
+-	 r200EmitIrqLocked( rmesa );
+-	 rmesa->irqsEmitted--;
+-      }
+-   } 
+-   else {
+-      while (r200GetLastFrame (rmesa) < sarea->last_frame) {
+-	 UNLOCK_HARDWARE( rmesa ); 
+-	 if (rmesa->do_usleeps) 
+-	    DO_USLEEP( 1 );
+-	 LOCK_HARDWARE( rmesa ); 
+-      }
+-   }
+-}
+-
+-
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void r200CopyBuffer( __DRIdrawablePrivate *dPriv,
+-		      const drm_clip_rect_t	 *rect)
+-{
+-   r200ContextPtr rmesa;
+-   GLint nbox, i, ret;
+-   GLboolean   missed_target;
+-   int64_t ust;
+-   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+-   assert(dPriv);
+-   assert(dPriv->driContextPriv);
+-   assert(dPriv->driContextPriv->driverPrivate);
+-
+-   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+-   if ( R200_DEBUG & DEBUG_IOCTL ) {
+-      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx );
+-   }
+-
+-   R200_FIREVERTICES( rmesa );
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-
+-   /* Throttle the frame rate -- only allow one pending swap buffers
+-    * request at a time.
+-    */
+-   r200WaitForFrameCompletion( rmesa );
+-   if (!rect)
+-   {
+-       UNLOCK_HARDWARE( rmesa );
+-       driWaitForVBlank( dPriv, & missed_target );
+-       LOCK_HARDWARE( rmesa );
+-   }
+-
+-   nbox = dPriv->numClipRects; /* must be in locked region */
+-
+-   for ( i = 0 ; i < nbox ; ) {
+-      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+-      drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+-      GLint n = 0;
+-
+-      for ( ; i < nr ; i++ ) {
+-
+-	  *b = box[i];
+-
+-	  if (rect)
+-	  {
+-	     if (rect->x1 > b->x1)
+-		 b->x1 = rect->x1;
+-	     if (rect->y1 > b->y1)
+-		 b->y1 = rect->y1;
+-	     if (rect->x2 < b->x2)
+-		 b->x2 = rect->x2;
+-	     if (rect->y2 < b->y2)
+-		 b->y2 = rect->y2;
+-
+-	     if (b->x1 >= b->x2 || b->y1 >= b->y2)
+-		 continue;
+-	  }
+-
+-	  b++;
+-	  n++;
+-      }
+-      rmesa->sarea->nbox = n;
+-
+-      if (!n)
+-	 continue;
+-
+-      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+-
+-      if ( ret ) {
+-	 fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret );
+-	 UNLOCK_HARDWARE( rmesa );
+-	 exit( 1 );
+-      }
+-   }
+-
+-   UNLOCK_HARDWARE( rmesa );
+-   if (!rect)
+-   {
+-       rmesa->hw.all_dirty = GL_TRUE;
+-
+-       rmesa->swap_count++;
+-       (*psp->systemTime->getUST)( & ust );
+-       if ( missed_target ) {
+-	   rmesa->swap_missed_count++;
+-	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+-       }
+-
+-       rmesa->swap_ust = ust;
+-
+-       sched_yield();
+-   }
+-}
+-
+-void r200PageFlip( __DRIdrawablePrivate *dPriv )
++static void r200UserClear(GLcontext *ctx, GLuint flags)
+ {
+-   r200ContextPtr rmesa;
+-   GLint ret;
+-   GLboolean   missed_target;
+-   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+-   assert(dPriv);
+-   assert(dPriv->driContextPriv);
+-   assert(dPriv->driContextPriv->driverPrivate);
++   if (flags & (RADEON_FRONT | RADEON_BACK)) {
+ 
+-   rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate;
+ 
+-   if ( R200_DEBUG & DEBUG_IOCTL ) {
+-      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+-	      rmesa->sarea->pfCurrentPage);
+-   }
+-
+-   R200_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
+-
+-   if (!dPriv->numClipRects) {
+-      UNLOCK_HARDWARE( rmesa );
+-      usleep( 10000 );		/* throttle invisible client 10ms */
+-      return;
+    }
++	  
++   if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
++       && (flags & RADEON_CLEAR_FASTZ)) {
+ 
+-   /* Need to do this for the perf box placement:
+-    */
+-   {
+-      drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+-      b[0] = box[0];
+-      rmesa->sarea->nbox = 1;
+-   }
+-
+-   /* Throttle the frame rate -- only allow a few pending swap buffers
+-    * request at a time.
+-    */
+-   r200WaitForFrameCompletion( rmesa );
+-   UNLOCK_HARDWARE( rmesa );
+-   driWaitForVBlank( dPriv, & missed_target );
+-   if ( missed_target ) {
+-      rmesa->swap_missed_count++;
+-      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
+    }
+-   LOCK_HARDWARE( rmesa );
+ 
+-   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if ( ret ) {
+-      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+-      exit( 1 );
+-   }
+-
+-   rmesa->swap_count++;
+-   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
+-
+-#if 000
+-   if ( rmesa->sarea->pfCurrentPage == 1 ) {
+-	 rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+-	 rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
+-   } else {
+-	 rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+-	 rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
+-   }
+-
+-   R200_STATECHANGE( rmesa, ctx );
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
+-					   + rmesa->r200Screen->fbLocation;
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
+-   if (rmesa->sarea->tiling_enabled) {
+-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+-   }
+-#else
+-   /* Get ready for drawing next frame.  Update the renderbuffers'
+-    * flippedOffset/Pitch fields so we draw into the right place.
+-    */
+-   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-                        rmesa->sarea->pfCurrentPage);
+-
+-
+-   r200UpdateDrawBuffer(rmesa->glCtx);
+-#endif
+ }
+ 
+-
+-/* ================================================================
+- * Buffer clear
+- */
+-static void r200Clear( GLcontext *ctx, GLbitfield mask )
++static void r200KernelClear(GLcontext *ctx, GLuint flags)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-   GLuint flags = 0;
+-   GLuint color_mask = 0;
+-   GLint ret, i;
+-   GLint cx, cy, cw, ch;
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++   GLint cx, cy, cw, ch, ret;
++   GLuint i;
+ 
+-   if ( R200_DEBUG & DEBUG_IOCTL ) {
+-      fprintf( stderr, "r200Clear\n");
+-   }
+-
+-   {
+-      LOCK_HARDWARE( rmesa );
+-      UNLOCK_HARDWARE( rmesa );
+-      if ( dPriv->numClipRects == 0 ) 
+-	 return;
+-   }
+-
+-   r200Flush( ctx );
+-
+-   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+-      flags |= RADEON_FRONT;
+-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+-      mask &= ~BUFFER_BIT_FRONT_LEFT;
+-   }
+-
+-   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+-      flags |= RADEON_BACK;
+-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+-      mask &= ~BUFFER_BIT_BACK_LEFT;
+-   }
+-
+-   if ( mask & BUFFER_BIT_DEPTH ) {
+-      flags |= RADEON_DEPTH;
+-      mask &= ~BUFFER_BIT_DEPTH;
+-   }
+-
+-   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
+-      flags |= RADEON_STENCIL;
+-      mask &= ~BUFFER_BIT_STENCIL;
+-   }
+-
+-   if ( mask ) {
+-      if (R200_DEBUG & DEBUG_FALLBACKS)
+-	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+-      _swrast_Clear( ctx, mask );
+-   }
+-
+-   if ( !flags ) 
+-      return;
+-
+-   if (rmesa->using_hyperz) {
+-      flags |= RADEON_USE_COMP_ZBUF;
+-/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
+-	 flags |= RADEON_USE_HIERZ; */
+-      if (!(rmesa->state.stencil.hwBuffer) ||
+-	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+-	    ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+-	  flags |= RADEON_CLEAR_FASTZ;
+-      }
+-   }
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-   /* compute region after locking: */
+-   cx = ctx->DrawBuffer->_Xmin;
+-   cy = ctx->DrawBuffer->_Ymin;
+-   cw = ctx->DrawBuffer->_Xmax - cx;
+-   ch = ctx->DrawBuffer->_Ymax - cy;
+-
+-   /* Flip top to bottom */
+-   cx += dPriv->x;
+-   cy  = dPriv->y + dPriv->h - cy - ch;
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    /* Throttle the number of clear ioctls we do.
+     */
+@@ -693,7 +88,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ 
+       gp.param = RADEON_PARAM_LAST_CLEAR;
+       gp.value = (int *)&clear;
+-      ret = drmCommandWriteRead( rmesa->dri.fd,
++      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+ 		      DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+ 
+       if ( ret ) {
+@@ -703,24 +98,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ 
+       /* Clear throttling needs more thought.
+        */
+-      if ( rmesa->sarea->last_clear - clear <= 25 ) {
++      if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
+ 	 break;
+       }
+       
+-      if (rmesa->do_usleeps) {
+-	 UNLOCK_HARDWARE( rmesa );
++      if (rmesa->radeon.do_usleeps) {
++	 UNLOCK_HARDWARE( &rmesa->radeon );
+ 	 DO_USLEEP( 1 );
+-	 LOCK_HARDWARE( rmesa );
++	 LOCK_HARDWARE( &rmesa->radeon );
+       }
+    }
+ 
+    /* Send current state to the hardware */
+-   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
++
++
++  /* compute region after locking: */
++   cx = ctx->DrawBuffer->_Xmin;
++   cy = ctx->DrawBuffer->_Ymin;
++   cw = ctx->DrawBuffer->_Xmax - cx;
++   ch = ctx->DrawBuffer->_Ymax - cy;
+ 
++   /* Flip top to bottom */
++   cx += dPriv->x;
++   cy  = dPriv->y + dPriv->h - cy - ch;
+    for ( i = 0 ; i < dPriv->numClipRects ; ) {
+       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+       drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
++      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+       drm_radeon_clear_t clear;
+       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+       GLint n = 0;
+@@ -755,17 +160,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ 	 }
+       }
+ 
+-      rmesa->sarea->nbox = n;
++      rmesa->radeon.sarea->nbox = n;
+ 
+       clear.flags       = flags;
+-      clear.clear_color = rmesa->state.color.clear;
+-      clear.clear_depth = rmesa->state.depth.clear;	/* needed for hyperz */
++      clear.clear_color = rmesa->radeon.state.color.clear;
++      clear.clear_depth = rmesa->radeon.state.depth.clear;	/* needed for hyperz */
+       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+-      clear.depth_mask  = rmesa->state.stencil.clear;
++      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
+       clear.depth_boxes = depth_boxes;
+ 
+       n--;
+-      b = rmesa->sarea->boxes;
++      b = rmesa->radeon.sarea->boxes;
+       for ( ; n >= 0 ; n-- ) {
+ 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+ 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+@@ -774,83 +179,91 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ 	 depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
+       }
+ 
+-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
++      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+ 			     &clear, sizeof(clear));
+ 
+ 
+       if ( ret ) {
+-	 UNLOCK_HARDWARE( rmesa );
++	 UNLOCK_HARDWARE( &rmesa->radeon );
+ 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+ 	 exit( 1 );
+       }
+    }
+-
+-   UNLOCK_HARDWARE( rmesa );
+-   rmesa->hw.all_dirty = GL_TRUE;
++   UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+-
+-
+-void r200WaitForIdleLocked( r200ContextPtr rmesa )
++/* ================================================================
++ * Buffer clear
++ */
++static void r200Clear( GLcontext *ctx, GLbitfield mask )
+ {
+-    int ret;
+-    int i = 0;
+-    
+-    do {
+-       ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE);
+-       if (ret) 
+-	  DO_USLEEP( 1 );
+-    } while (ret && ++i < 100);
+-    
+-    if ( ret < 0 ) {
+-       UNLOCK_HARDWARE( rmesa );
+-       fprintf( stderr, "Error: R200 timed out... exiting\n" );
+-       exit( -1 );
+-    }
+-}
++   r200ContextPtr rmesa = R200_CONTEXT(ctx);
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++   GLuint flags = 0;
++   GLuint color_mask = 0;
++   GLint ret;
+ 
++   if ( R200_DEBUG & DEBUG_IOCTL ) {
++      fprintf( stderr, "r200Clear\n");
++   }
+ 
+-static void r200WaitForIdle( r200ContextPtr rmesa )
+-{
+-   LOCK_HARDWARE(rmesa);
+-   r200WaitForIdleLocked( rmesa );
+-   UNLOCK_HARDWARE(rmesa);
+-}
++   {
++      LOCK_HARDWARE( &rmesa->radeon );
++      UNLOCK_HARDWARE( &rmesa->radeon );
++      if ( dPriv->numClipRects == 0 ) 
++	 return;
++   }
+ 
++   radeonFlush( ctx );
+ 
+-void r200Flush( GLcontext *ctx )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
++   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
++      flags |= RADEON_FRONT;
++      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
++      mask &= ~BUFFER_BIT_FRONT_LEFT;
++   }
+ 
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
++   if ( mask & BUFFER_BIT_BACK_LEFT ) {
++      flags |= RADEON_BACK;
++      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
++      mask &= ~BUFFER_BIT_BACK_LEFT;
++   }
+ 
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
++   if ( mask & BUFFER_BIT_DEPTH ) {
++      flags |= RADEON_DEPTH;
++      mask &= ~BUFFER_BIT_DEPTH;
++   }
+ 
+-   r200EmitState( rmesa );
+-   
+-   if (rmesa->store.cmd_used)
+-      r200FlushCmdBuf( rmesa, __FUNCTION__ );
+-}
++   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
++      flags |= RADEON_STENCIL;
++      mask &= ~BUFFER_BIT_STENCIL;
++   }
+ 
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void r200Finish( GLcontext *ctx )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   r200Flush( ctx );
++   if ( mask ) {
++      if (R200_DEBUG & DEBUG_FALLBACKS)
++	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
++      _swrast_Clear( ctx, mask );
++   }
++
++   if ( !flags ) 
++      return;
+ 
+-   if (rmesa->do_irqs) {
+-      LOCK_HARDWARE( rmesa );
+-      r200EmitIrqLocked( rmesa );
+-      UNLOCK_HARDWARE( rmesa );
+-      r200WaitIrq( rmesa );
++   if (rmesa->using_hyperz) {
++      flags |= RADEON_USE_COMP_ZBUF;
++/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
++	 flags |= RADEON_USE_HIERZ; */
++      if (!(rmesa->radeon.state.stencil.hwBuffer) ||
++	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
++	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
++	  flags |= RADEON_CLEAR_FASTZ;
++      }
+    }
+-   else 
+-      r200WaitForIdle( rmesa );
+-}
+ 
++   if (rmesa->radeon.radeonScreen->kernel_mm)
++      r200UserClear(ctx, flags);
++   else
++      r200KernelClear(ctx, flags);
++
++   rmesa->radeon.hw.all_dirty = GL_TRUE;
++}
+ 
+ /* This version of AllocateMemoryMESA allocates only GART memory, and
+  * only does so after the point at which the driver has been
+@@ -875,7 +288,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+       fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
+ 	      writefreq, priority);
+ 
+-   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map)
++   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
+       return NULL;
+ 
+    if (getenv("R200_NO_ALLOC"))
+@@ -886,7 +299,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+    alloc.size = size;
+    alloc.region_offset = &region_offset;
+ 
+-   ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd,
++   ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
+ 			      DRM_RADEON_ALLOC,
+ 			      &alloc, sizeof(alloc));
+    
+@@ -896,7 +309,7 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+    }
+    
+    {
+-      char *region_start = (char *)rmesa->r200Screen->gartTextures.map;
++      char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+       return (void *)(region_start + region_offset);
+    }
+ }
+@@ -914,24 +327,24 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
+    if (R200_DEBUG & DEBUG_IOCTL)
+       fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
+ 
+-   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) {
++   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
+       fprintf(stderr, "%s: no context\n", __FUNCTION__);
+       return;
+    }
+ 
+-   region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++   region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ 
+    if (region_offset < 0 || 
+-       region_offset > rmesa->r200Screen->gartTextures.size) {
++       region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
+       fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
+-	      rmesa->r200Screen->gartTextures.size);
++	      rmesa->radeon.radeonScreen->gartTextures.size);
+       return;
+    }
+ 
+    memfree.region = RADEON_MEM_REGION_GART;
+    memfree.region_offset = region_offset;
+    
+-   ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd,
++   ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
+ 			  DRM_RADEON_FREE,
+ 			  &memfree, sizeof(memfree));
+    
+@@ -956,16 +369,16 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
+ 
+    card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
+ 
+-   return card_offset - rmesa->r200Screen->gart_base;
++   return card_offset - rmesa->radeon.radeonScreen->gart_base;
+ }
+ 
+ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+ 			   GLint size )
+ {
+-   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+    int valid = (size >= 0 &&
+ 		offset >= 0 &&
+-		offset + size < rmesa->r200Screen->gartTextures.size);
++		offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
+ 
+    if (R200_DEBUG & DEBUG_IOCTL)
+       fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
+@@ -976,12 +389,12 @@ GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+ 
+ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+ {
+-   ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map;
++   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+ 
+-   if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size)
++   if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+       return ~0;
+    else
+-      return rmesa->r200Screen->gart_texture_offset + offset;
++      return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+ }
+ 
+ 
+@@ -989,7 +402,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+ void r200InitIoctlFuncs( struct dd_function_table *functions )
+ {
+     functions->Clear = r200Clear;
+-    functions->Finish = r200Finish;
+-    functions->Flush = r200Flush;
++    functions->Finish = radeonFinish;
++    functions->Flush = radeonFlush;
+ }
+ 
+diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
+index f7458e4..2a4b8a1 100644
+--- a/src/mesa/drivers/dri/r200/r200_ioctl.h
++++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
+@@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "main/simple_list.h"
+ #include "radeon_dri.h"
+-#include "r200_lock.h"
++
++#include "radeon_bocs_wrapper.h"
+ 
+ #include "xf86drm.h"
+ #include "drm.h"
+ #include "radeon_drm.h"
+ 
+-extern void r200EmitState( r200ContextPtr rmesa );
+ extern void r200EmitVertexAOS( r200ContextPtr rmesa,
+-				 GLuint vertex_size,
+-				 GLuint offset );
++			       GLuint vertex_size,
++			       struct radeon_bo *bo,
++			       GLuint offset );
+ 
+ extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+ 				GLuint primitive,
+ 				GLuint vertex_nr );
+ 
+-extern void r200FlushElts( r200ContextPtr rmesa );
++extern void r200FlushElts(GLcontext *ctx);
+ 
+ extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+ 					   GLuint primitive,
+ 					   GLuint min_nr );
+ 
+-extern void r200EmitAOS( r200ContextPtr rmesa,
+-			   struct r200_dma_region **regions,
+-			   GLuint n,
+-			   GLuint offset );
+-
+-extern void r200EmitBlit( r200ContextPtr rmesa,
+-			  GLuint color_fmt,
+-			  GLuint src_pitch,
+-			  GLuint src_offset,
+-			  GLuint dst_pitch,
+-			  GLuint dst_offset,
+-			  GLint srcx, GLint srcy,
+-			  GLint dstx, GLint dsty,
+-			  GLuint w, GLuint h );
+-
+-extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags );
+-
+-extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * );
+-extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller );
+-
+-extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa );
+-
+-extern void r200AllocDmaRegion( r200ContextPtr rmesa,
+-				  struct r200_dma_region *region,
+-				  int bytes, 
+-				  int alignment );
+-
+-extern void r200ReleaseDmaRegion( r200ContextPtr rmesa,
+-				    struct r200_dma_region *region,
+-				    const char *caller );
+-
+-extern void r200CopyBuffer( __DRIdrawablePrivate *drawable,
+-			    const drm_clip_rect_t      *rect);
+-extern void r200PageFlip( __DRIdrawablePrivate *drawable );
+-extern void r200Flush( GLcontext *ctx );
+-extern void r200Finish( GLcontext *ctx );
+-extern void r200WaitForIdleLocked( r200ContextPtr rmesa );
+-extern void r200WaitForVBlank( r200ContextPtr rmesa );
++extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
++
+ extern void r200InitIoctlFuncs( struct dd_function_table *functions );
+ 
+ extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
+@@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa );
+  */
+ #define R200_NEWPRIM( rmesa )			\
+ do {						\
+-   if ( rmesa->dma.flush )			\
+-      rmesa->dma.flush( rmesa );	\
++   if ( rmesa->radeon.dma.flush )			\
++      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+ } while (0)
+ 
+ /* Can accomodate several state changes and primitive changes without
+@@ -130,7 +95,7 @@ do {						\
+ do {								\
+    R200_NEWPRIM( rmesa );					\
+    rmesa->hw.ATOM.dirty = GL_TRUE;				\
+-   rmesa->hw.is_dirty = GL_TRUE;				\
++   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
+ } while (0)
+ 
+ #define R200_DB_STATE( ATOM )			        \
+@@ -139,13 +104,13 @@ do {								\
+ 
+ static INLINE int R200_DB_STATECHANGE( 
+    r200ContextPtr rmesa,
+-   struct r200_state_atom *atom )
++   struct radeon_state_atom *atom )
+ {
+    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+-      int *tmp;
++      GLuint *tmp;
+       R200_NEWPRIM( rmesa );
+       atom->dirty = GL_TRUE;
+-      rmesa->hw.is_dirty = GL_TRUE;
++      rmesa->radeon.hw.is_dirty = GL_TRUE;
+       tmp = atom->cmd; 
+       atom->cmd = atom->lastcmd;
+       atom->lastcmd = tmp;
+@@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE(
+ }
+ 
+ 
+-/* Fire the buffered vertices no matter what.
+- */
+-#define R200_FIREVERTICES( rmesa )			\
+-do {							\
+-   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
+-      r200Flush( rmesa->glCtx );			\
+-   }							\
+-} while (0)
+-
+ /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+  * are available, you will also be adding an rmesa->state.max_state_size because
+  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+@@ -174,36 +130,36 @@ do {							\
+ #define ELTS_BUFSZ(nr)	(12 + nr * 2)
+ #define VBUF_BUFSZ	(3 * sizeof(int))
+ 
+-/* Ensure that a minimum amount of space is available in the command buffer.
+- * This is used to ensure atomicity of state updates with the rendering requests
+- * that rely on them.
+- *
+- * An alternative would be to implement a "soft lock" such that when the buffer
+- * wraps at an inopportune time, we grab the lock, flush the current buffer,
+- * and hang on to the lock until the critical section is finished and we flush
+- * the buffer again and unlock.
+- */
+-static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes )
++static inline uint32_t cmdpacket3(int cmd_type)
+ {
+-   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
+-      r200FlushCmdBuf( rmesa, __FUNCTION__ );
+-   assert( bytes <= R200_CMD_BUF_SZ );
+-}
++  drm_radeon_cmd_header_t cmd;
+ 
+-/* Alloc space in the command buffer
+- */
+-static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa,
+-					 int bytes, const char *where )
+-{
+-   char * head;
++  cmd.i = 0;
++  cmd.header.cmd_type = cmd_type;
+ 
+-   if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ)
+-      r200FlushCmdBuf( rmesa, where );
++  return (uint32_t)cmd.i;
+ 
+-   head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+-   rmesa->store.cmd_used += bytes;
+-   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
+-   return head;
+ }
+ 
++#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
++    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
++      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    } else {						      \
++      OUT_BATCH(CP_PACKET2);				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    }							      \
++  } while(0)
++
++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
++    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
++      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    } else {						      \
++      OUT_BATCH(CP_PACKET2);				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    }							      \
++  } while(0)
++
++
+ #endif /* __R200_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c
+deleted file mode 100644
+index 99661a4..0000000
+--- a/src/mesa/drivers/dri/r200/r200_lock.c
++++ /dev/null
+@@ -1,116 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+- 
+-#include "r200_context.h"
+-#include "r200_lock.h"
+-#include "r200_tex.h"
+-#include "r200_state.h"
+-#include "r200_ioctl.h"
+-
+-#include "drirenderbuffer.h"
+-
+-
+-#if DEBUG_LOCKING
+-char *prevLockFile = NULL;
+-int prevLockLine = 0;
+-#endif
+-
+-/* Turn on/off page flipping according to the flags in the sarea:
+- */
+-static void
+-r200UpdatePageFlipping( r200ContextPtr rmesa )
+-{
+-   rmesa->doPageFlip = rmesa->sarea->pfState;
+-   if (rmesa->glCtx->WinSysDrawBuffer) {
+-      driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-                           rmesa->sarea->pfCurrentPage);
+-   }
+-}
+-
+-
+-
+-/* Update the hardware state.  This is called if another main/context.has
+- * grabbed the hardware lock, which includes the X server.  This
+- * function also updates the driver's window state after the X server
+- * moves, resizes or restacks a window -- the change will be reflected
+- * in the drawable position and clip rects.  Since the X server grabs
+- * the hardware lock when it changes the window state, this routine will
+- * automatically be called after such a change.
+- */
+-void r200GetLock( r200ContextPtr rmesa, GLuint flags )
+-{
+-   __DRIdrawablePrivate *drawable = rmesa->dri.drawable;
+-   __DRIdrawablePrivate *readable = rmesa->dri.readable;
+-   __DRIscreenPrivate *sPriv = rmesa->dri.screen;
+-   drm_radeon_sarea_t *sarea = rmesa->sarea;
+-   int i;
+-
+-   drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags );
+-
+-   /* The window might have moved, so we might need to get new clip
+-    * rects.
+-    *
+-    * NOTE: This releases and regrabs the hw lock to allow the X server
+-    * to respond to the DRI protocol request for new drawable info.
+-    * Since the hardware state depends on having the latest drawable
+-    * clip rects, all state checking must be done _after_ this call.
+-    */
+-   DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable );
+-   if (drawable != readable) {
+-      DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable );
+-   }
+-
+-   if ( rmesa->lastStamp != drawable->lastStamp ) {
+-      r200UpdatePageFlipping( rmesa );
+-      r200SetCliprects( rmesa );
+-      r200UpdateViewportOffset( rmesa->glCtx );
+-      driUpdateFramebufferSize(rmesa->glCtx, drawable);
+-   }
+-
+-   R200_STATECHANGE( rmesa, ctx );
+-   if (rmesa->sarea->tiling_enabled) {
+-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+-   }
+-   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
+-
+-   if ( sarea->ctx_owner != rmesa->dri.hwContext ) {
+-      sarea->ctx_owner = rmesa->dri.hwContext;
+-   }
+-
+-   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+-      DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
+-   }
+-
+-   rmesa->lost_context = GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h
+deleted file mode 100644
+index 4ff9890..0000000
+--- a/src/mesa/drivers/dri/r200/r200_lock.h
++++ /dev/null
+@@ -1,106 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __R200_LOCK_H__
+-#define __R200_LOCK_H__
+-
+-extern void r200GetLock( r200ContextPtr rmesa, GLuint flags );
+-
+-/* Turn DEBUG_LOCKING on to find locking conflicts.
+- */
+-#define DEBUG_LOCKING	0
+-
+-#if DEBUG_LOCKING
+-extern char *prevLockFile;
+-extern int prevLockLine;
+-
+-#define DEBUG_LOCK()							\
+-   do {									\
+-      prevLockFile = (__FILE__);					\
+-      prevLockLine = (__LINE__);					\
+-   } while (0)
+-
+-#define DEBUG_RESET()							\
+-   do {									\
+-      prevLockFile = 0;							\
+-      prevLockLine = 0;							\
+-   } while (0)
+-
+-#define DEBUG_CHECK_LOCK()						\
+-   do {									\
+-      if ( prevLockFile ) {						\
+-	 fprintf( stderr,						\
+-		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+-		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+-	 exit( 1 );							\
+-      }									\
+-   } while (0)
+-
+-#else
+-
+-#define DEBUG_LOCK()
+-#define DEBUG_RESET()
+-#define DEBUG_CHECK_LOCK()
+-
+-#endif
+-
+-/*
+- * !!! We may want to separate locks from locks with validation.  This
+- * could be used to improve performance for those things commands that
+- * do not do any drawing !!!
+- */
+-
+-
+-/* Lock the hardware and validate our state.
+- */
+-#define LOCK_HARDWARE( rmesa )					\
+-   do {								\
+-      char __ret = 0;						\
+-      DEBUG_CHECK_LOCK();					\
+-      DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext,		\
+-	       (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret );	\
+-      if ( __ret )						\
+-	 r200GetLock( rmesa, 0 );				\
+-      DEBUG_LOCK();						\
+-   } while (0)
+-
+-#define UNLOCK_HARDWARE( rmesa )					\
+-   do {									\
+-      DRM_UNLOCK( rmesa->dri.fd,					\
+-		  rmesa->dri.hwLock,					\
+-		  rmesa->dri.hwContext );				\
+-      DEBUG_RESET();							\
+-   } while (0)
+-
+-#endif /* __R200_LOCK_H__ */
+diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+index 8512b9a..5dbc202 100644
+--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
++++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+@@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_maos.h"
+ #include "r200_tcl.h"
+ 
+-
+-#if 0
+-/* Usage:
+- *   - from r200_tcl_render
+- *   - call r200EmitArrays to ensure uptodate arrays in dma
+- *   - emit primitives (new type?) which reference the data
+- *       -- need to use elts for lineloop, quads, quadstrip/flat
+- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+- *
+- */
+-static void emit_ubyte_rgba3( GLcontext *ctx,
+-		       struct r200_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d out %p\n",
+-	      __FUNCTION__, count, stride, (void *)out);
+-
+-   for (i = 0; i < count; i++) {
+-      out->red   = *data;
+-      out->green = *(data+1);
+-      out->blue  = *(data+2);
+-      out->alpha = 0xFF;
+-      out++;
+-      data += stride;
+-   }
+-}
+-
+-static void emit_ubyte_rgba4( GLcontext *ctx,
+-			      struct r200_dma_region *rvb,
+-			      char *data,
+-			      int stride,
+-			      int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 4) {
+-      for (i = 0; i < count; i++)
+-	 ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]);
+-   } else {
+-      for (i = 0; i < count; i++) {
+-	 *(int *)out++ = LE32_TO_CPU(*(int *)data);
+-	 data += stride;
+-      }
+-   }
+-}
+-
+-
+-static void emit_ubyte_rgba( GLcontext *ctx,
+-			     struct r200_dma_region *rvb,
+-			     char *data,
+-			     int size,
+-			     int stride,
+-			     int count )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+-
+-   assert (!rvb->buf);
+-
+-   if (stride == 0) {
+-      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
+-      count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = 1;
+-   }
+-   else {
+-      r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 1;
+-      rvb->aos_size = 1;
+-   }
+-
+-   /* Emit the data
+-    */
+-   switch (size) {
+-   case 3:
+-      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+-      break;
+-   case 4:
+-      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+-      break;
+-   default:
+-      assert(0);
+-      exit(1);
+-      break;
+-   }
+-}
+-#endif
+-
+-
+ #if defined(USE_X86_ASM)
+ #define COPY_DWORDS( dst, src, nr )					\
+ do {									\
+@@ -174,204 +70,34 @@ do {						\
+ } while (0)
+ #endif
+ 
+-
+-static void emit_vecfog( GLcontext *ctx,
+-			 struct r200_dma_region *rvb,
+-			 char *data,
+-			 int stride,
+-			 int count )
++static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
++			     GLvoid *data, int stride, int count)
+ {
+-   int i;
+-   GLfloat *out;
+-
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   assert (!rvb->buf);
+-
+-   if (stride == 0) {
+-      r200AllocDmaRegion( rmesa, rvb, 4, 4 );
+-      count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = 1;
+-   }
+-   else {
+-      r200AllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 1;
+-      rvb->aos_size = 1;
+-   }
+-
+-   /* Emit the data
+-    */
+-   out = (GLfloat *)(rvb->address + rvb->start);
+-   for (i = 0; i < count; i++) {
+-      out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
+-      out++;
+-      data += stride;
+-   }
+-
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	uint32_t *out;
++	int i;
++	int size = 1;
++
++	if (stride == 0) {
++		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++		count = 1;
++		aos->stride = 0;
++	} else {
++		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++		aos->stride = size;
++	}
++
++	aos->components = size;
++	aos->count = count;
++
++	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
++	for (i = 0; i < count; i++) {
++	  out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
++	  out++;
++	  data += stride;
++	}
+ }
+ 
+-
+-static void emit_vec4( GLcontext *ctx,
+-		       struct r200_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 4)
+-      COPY_DWORDS( out, data, count );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out++;
+-	 data += stride;
+-      }
+-}
+-
+-
+-static void emit_vec8( GLcontext *ctx,
+-		       struct r200_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 8)
+-      COPY_DWORDS( out, data, count*2 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out += 2;
+-	 data += stride;
+-      }
+-}
+-
+-static void emit_vec12( GLcontext *ctx,
+-		       struct r200_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-	      __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-   if (stride == 12)
+-      COPY_DWORDS( out, data, count*3 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out[2] = *(int *)(data+8);
+-	 out += 3;
+-	 data += stride;
+-      }
+-}
+-
+-static void emit_vec16( GLcontext *ctx,
+-			struct r200_dma_region *rvb,
+-			char *data,
+-			int stride,
+-			int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 16)
+-      COPY_DWORDS( out, data, count*4 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out[2] = *(int *)(data+8);
+-	 out[3] = *(int *)(data+12);
+-	 out += 4;
+-	 data += stride;
+-      }
+-}
+-
+-
+-static void emit_vector( GLcontext *ctx,
+-			 struct r200_dma_region *rvb,
+-			 char *data,
+-			 int size,
+-			 int stride,
+-			 int count )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if (R200_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d size %d stride %d\n",
+-	      __FUNCTION__, count, size, stride);
+-
+-   assert (!rvb->buf);
+-
+-   if (stride == 0) {
+-      r200AllocDmaRegion( rmesa, rvb, size * 4, 4 );
+-      count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = size;
+-   }
+-   else {
+-      r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = size;
+-      rvb->aos_size = size;
+-   }
+-
+-   /* Emit the data
+-    */
+-   switch (size) {
+-   case 1:
+-      emit_vec4( ctx, rvb, data, stride, count );
+-      break;
+-   case 2:
+-      emit_vec8( ctx, rvb, data, stride, count );
+-      break;
+-   case 3:
+-      emit_vec12( ctx, rvb, data, stride, count );
+-      break;
+-   case 4:
+-      emit_vec16( ctx, rvb, data, stride, count );
+-      break;
+-   default:
+-      assert(0);
+-      exit(1);
+-      break;
+-   }
+-
+-}
+-
+-
+-
+ /* Emit any changed arrays to new GART memory, re-emit a packet to
+  * update the arrays.  
+  */
+@@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT( ctx );
+    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+-   struct r200_dma_region **component = rmesa->tcl.aos_components;
+    GLuint nr = 0;
+    GLuint vfmt0 = 0, vfmt1 = 0;
+    GLuint count = VB->Count;
+    GLuint i, emitsize;
+ 
++   //   fprintf(stderr,"emit arrays\n");
+    for ( i = 0; i < 15; i++ ) {
+       GLubyte attrib = vimap_rev[i];
+       if (attrib != 255) {
+@@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ 	 case 3:
+ 	    /* special handling to fix up fog. Will get us into trouble with vbos...*/
+ 	    assert(attrib == VERT_ATTRIB_FOG);
+-	    if (!rmesa->tcl.vertex_data[i].buf) {
++	    if (!rmesa->tcl.aos[i].bo) {
+ 	       if (ctx->VertexProgram._Enabled)
+-		  emit_vector( ctx,
+-			 &(rmesa->tcl.vertex_data[i]),
+-			 (char *)VB->AttribPtr[attrib]->data,
+-			 1,
+-			 VB->AttribPtr[attrib]->stride,
+-			 count);
++		  rcommon_emit_vector( ctx,
++				       &(rmesa->tcl.aos[nr]),
++				       (char *)VB->AttribPtr[attrib]->data,
++				       1,
++				       VB->AttribPtr[attrib]->stride,
++				       count);
+ 	       else
+-		  emit_vecfog( ctx,
+-			 &(rmesa->tcl.vertex_data[i]),
+-			 (char *)VB->AttribPtr[attrib]->data,
+-			 VB->AttribPtr[attrib]->stride,
+-			 count);
++		 r200_emit_vecfog( ctx,
++				   &(rmesa->tcl.aos[nr]),
++				   (char *)VB->AttribPtr[attrib]->data,
++				   VB->AttribPtr[attrib]->stride,
++				   count);
+ 	    }
+ 	    vfmt0 |= R200_VTX_DISCRETE_FOG;
+ 	    goto after_emit;
+@@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+ 	 default:
+ 	    assert(0);
+ 	 }
+-	 if (!rmesa->tcl.vertex_data[i].buf) {
+-	    emit_vector( ctx,
+-			 &(rmesa->tcl.vertex_data[i]),
+-			 (char *)VB->AttribPtr[attrib]->data,
+-			 emitsize,
+-			 VB->AttribPtr[attrib]->stride,
+-			 count );
++	 if (!rmesa->tcl.aos[nr].bo) {
++	   rcommon_emit_vector( ctx,
++				&(rmesa->tcl.aos[nr]),
++				(char *)VB->AttribPtr[attrib]->data,
++				emitsize,
++				VB->AttribPtr[attrib]->stride,
++				count );
+ 	 }
+ after_emit:
+ 	 assert(nr < 12);
+-	 component[nr++] = &rmesa->tcl.vertex_data[i];
++	 nr++;
+       }
+    }
+ 
+@@ -501,12 +227,11 @@ after_emit:
+ void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+-   /* only do it for changed inputs ? */
+    int i;
+-   for (i = 0; i < 15; i++) {
+-      if (newinputs & (1 << i))
+-	 r200ReleaseDmaRegion( rmesa,
+-	    &rmesa->tcl.vertex_data[i], __FUNCTION__ );
++   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++     if (rmesa->tcl.aos[i].bo) {
++       radeon_bo_unref(rmesa->tcl.aos[i].bo);
++       rmesa->tcl.aos[i].bo = NULL;
++     }
+    }
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
+index be68821..a6c6558 100644
+--- a/src/mesa/drivers/dri/r200/r200_pixel.c
++++ b/src/mesa/drivers/dri/r200/r200_pixel.c
+@@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
+ 	     const void *pixels, GLint sz, GLint pitch )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   GLuint cpp = rmesa->r200Screen->cpp;
++   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+ 
+    if (R200_DEBUG & DEBUG_PIXEL)
+       fprintf(stderr, "%s\n", __FUNCTION__);
+@@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx,
+    if (*height <= 0)
+       return GL_FALSE;
+ 
+-   *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch +
+-	    (*x + *width - 1) * rmesa->r200Screen->cpp);
++   *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
++	    (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
+ 
+    return GL_TRUE;
+ }
+@@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx,
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    GLint pitch = pack->RowLength ? pack->RowLength : width;
+    GLint blit_format;
+-   GLuint cpp = rmesa->r200Screen->cpp;
++   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+    GLint size = width * height * cpp;
+ 
++   return GL_FALSE;
++#if 0
+    if (R200_DEBUG & DEBUG_PIXEL)
+       fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+    /* Only accelerate reading to GART buffers.
+     */
+    if ( !r200IsGartMemory(rmesa, pixels, 
+-			 pitch * height * rmesa->r200Screen->cpp ) ) {
++			 pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
+       if (R200_DEBUG & DEBUG_PIXEL)
+ 	 fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
+-      return GL_FALSE;
+    }
+ 
+    /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
+@@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx,
+    if (!check_color(ctx, type, format, pack, pixels, size, pitch))
+       return GL_FALSE;
+ 
+-   switch ( rmesa->r200Screen->cpp ) {
++   switch ( rmesa->radeon.radeonScreen->cpp ) {
+    case 4:
+       blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+       break;
+@@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx,
+     * a full command buffer expects to be called unlocked.  As a
+     * workaround, immediately flush the buffer on aquiring the lock.
+     */
+-   LOCK_HARDWARE( rmesa );
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    if (rmesa->store.cmd_used)
+-      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+ 
+    if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
+ 		       &size)) {
+-      UNLOCK_HARDWARE( rmesa );
++      UNLOCK_HARDWARE( &rmesa->radeon );
+       if (R200_DEBUG & DEBUG_PIXEL)
+ 	 fprintf(stderr, "%s totally clipped -- nothing to do\n",
+ 		 __FUNCTION__);
+@@ -212,14 +213,14 @@ r200TryReadPixels( GLcontext *ctx,
+    }
+ 
+    {
+-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++      __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+       driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
+       int nbox = dPriv->numClipRects;
+       int src_offset = drb->offset
+-		     + rmesa->r200Screen->fbLocation;
++		     + rmesa->radeon.radeonScreen->fbLocation;
+       int src_pitch = drb->pitch * drb->cpp;
+       int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
+-      int dst_pitch = pitch * rmesa->r200Screen->cpp;
++      int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
+       drm_clip_rect_t *box = dPriv->pClipRects;
+       int i;
+ 
+@@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx,
+ 		       bw, bh );
+       }
+ 
+-      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+    }
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   r200Finish( ctx ); /* required by GL */
++   UNLOCK_HARDWARE( &rmesa->radeon );
+ 
++   radeonFinish( ctx ); /* required by GL */
++#endif
+    return GL_TRUE;
+ }
+ 
+@@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx,
+ 			 GLuint planemask)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+    drm_clip_rect_t *box = dPriv->pClipRects;
+    struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
+    driRenderbuffer *drb = (driRenderbuffer *) rb;
+@@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx,
+    int blit_format;
+    int size;
+    int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
+-   int src_pitch = pitch * rmesa->r200Screen->cpp;
++   int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
+ 
+    if (R200_DEBUG & DEBUG_PIXEL)
+       fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   switch ( rmesa->r200Screen->cpp ) {
++#if 0
++   switch ( rmesa->radeon.radeonScreen->cpp ) {
+    case 2:
+       blit_format = R200_CP_COLOR_FORMAT_RGB565;
+       break;
+@@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx,
+    }
+ 
+ 
+-   LOCK_HARDWARE( rmesa );
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    if (rmesa->store.cmd_used)
+-      r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
++      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+ 
+    y -= height;			/* cope with pixel zoom */
+    
+    if (!clip_pixelrect(ctx, ctx->DrawBuffer,
+ 		       &x, &y, &width, &height,
+ 		       &size)) {
+-      UNLOCK_HARDWARE( rmesa );
++      UNLOCK_HARDWARE( &rmesa->radeon );
+       return;
+    }
+ 
+@@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx,
+ 		    blit_format,
+ 		    src_pitch, src_offset,
+ 		    drb->pitch * drb->cpp,
+-		    drb->offset + rmesa->r200Screen->fbLocation,
++		    drb->offset + rmesa->radeon.radeonScreen->fbLocation,
+ 		    bx - x, by - y,
+ 		    bx, by,
+ 		    bw, bh );
+    }
+ 
+-   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+-   r200WaitForIdleLocked( rmesa ); /* required by GL */
+-   UNLOCK_HARDWARE( rmesa );
++   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
++   radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
++   UNLOCK_HARDWARE( &rmesa->radeon );
++#endif
+ }
+ 
+ 
+@@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx,
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+    GLuint planemask;
+-   GLuint cpp = rmesa->r200Screen->cpp;
++   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
+    GLint size = height * pitch * cpp;
+ 
+    if (R200_DEBUG & DEBUG_PIXEL)
+@@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx,
+    case GL_RGB:
+    case GL_RGBA:
+    case GL_BGRA:
+-      planemask = r200PackColor(cpp,
++      planemask = radeonPackColor(cpp,
+ 				ctx->Color.ColorMask[RCOMP],
+ 				ctx->Color.ColorMask[GCOMP],
+ 				ctx->Color.ColorMask[BCOMP],
+@@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx,
+       return GL_FALSE;
+    }
+ 
+-   if ( r200IsGartMemory(rmesa, pixels, size) )
++   if (0)// r200IsGartMemory(rmesa, pixels, size) )
+    {
+       do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
+       return GL_TRUE;
+@@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py,
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ 
+-   if (rmesa->Fallback)
++   if (rmesa->radeon.Fallback)
+       _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
+    else
+       r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
+diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
+index 5ce287f..526a624 100644
+--- a/src/mesa/drivers/dri/r200/r200_reg.h
++++ b/src/mesa/drivers/dri/r200/r200_reg.h
+@@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
+ /* gap */
+ #define R200_SE_TCL_VECTOR_INDX_REG                0x2200
++#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
++#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
+ #define R200_SE_TCL_VECTOR_DATA_REG                0x2204
+ #define R200_SE_TCL_SCALAR_INDX_REG                0x2208
++#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
+ #define R200_SE_TCL_SCALAR_DATA_REG                0x220c
+ /* gap */
+ #define R200_SE_TCL_MATRIX_SEL_0                   0x2230
+@@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define     R200_LOD_BIAS_MASK                        (0xfff80000)
+ #define     R200_LOD_BIAS_SHIFT                       19
+ #define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
++#define R200_PP_TX_WIDTHMASK_SHIFT 0
++#define R200_PP_TX_HEIGHTMASK_SHIFT 16
++
+ #define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
+ #define R200_PP_BORDER_COLOR_0            0x2c14
+ #define R200_PP_CUBIC_FACES_0             0x2c18
+diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c
+deleted file mode 100644
+index 9783678..0000000
+--- a/src/mesa/drivers/dri/r200/r200_span.c
++++ /dev/null
+@@ -1,307 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/colormac.h"
+-#include "swrast/swrast.h"
+-
+-#include "r200_context.h"
+-#include "r200_ioctl.h"
+-#include "r200_state.h"
+-#include "r200_span.h"
+-#include "r200_tex.h"
+-
+-#define DBG 0
+-
+-/*
+- * Note that all information needed to access pixels in a renderbuffer
+- * should be obtained through the gl_renderbuffer parameter, not per-context
+- * information.
+- */
+-#define LOCAL_VARS						\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
+-   const GLuint bottom = dPriv->h - 1;				\
+-   GLubyte *buf = (GLubyte *) drb->flippedData			\
+-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
+-   GLuint p;							\
+-   (void) p;
+-
+-#define LOCAL_DEPTH_VARS				\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+-   const GLuint bottom = dPriv->h - 1;			\
+-   GLuint xo = dPriv->x;				\
+-   GLuint yo = dPriv->y;				\
+-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+-
+-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+-
+-#define Y_FLIP(Y) (bottom - (Y))
+-
+-#define HW_LOCK() 
+-
+-#define HW_UNLOCK()							
+-
+-
+-
+-/* ================================================================
+- * Color buffer
+- */
+-
+-/* 16 bit, RGB565 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_RGB
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+-
+-#define TAG(x)    r200##x##_RGB565
+-#define TAG2(x,y) r200##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+-#include "spantmp2.h"
+-
+-/* 32 bit, ARGB8888 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_BGRA
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+-
+-#define TAG(x)    r200##x##_ARGB8888
+-#define TAG2(x,y) r200##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+-#include "spantmp2.h"
+-
+-
+-/* ================================================================
+- * Depth buffer
+- */
+-
+-/* The Radeon family has depth tiling on all the time, so we have to convert
+- * the x,y coordinates into the memory bus address (mba) in the same
+- * manner as the engine.  In each case, the linear block address (ba)
+- * is calculated, and then wired with x and y to produce the final
+- * memory address.
+- * The chip will do address translation on its own if the surface registers
+- * are set up correctly. It is not quite enough to get it working with hyperz too...
+- */
+-
+-/* extract bit 'b' of x, result is zero or one */
+-#define BIT(x,b) ((x & (1<<b))>>b)
+-
+-static GLuint
+-r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y )
+-{
+-   GLuint pitch = drb->pitch;
+-   if (drb->depthHasSurface) {
+-      return 4 * (x + y * pitch);
+-   }
+-   else {
+-      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5);
+-      GLuint a = 
+-         (BIT(x,0) << 2) |
+-         (BIT(y,0) << 3) |
+-         (BIT(x,1) << 4) |
+-         (BIT(y,1) << 5) |
+-         (BIT(x,3) << 6) |
+-         (BIT(x,4) << 7) |
+-         (BIT(x,2) << 8) |
+-         (BIT(y,2) << 9) |
+-         (BIT(y,3) << 10) |
+-         (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+-         ((b >> 1) << 12);
+-      return a;
+-   }
+-}
+-
+-static GLuint
+-r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y )
+-{
+-   GLuint pitch = drb->pitch;
+-   if (drb->depthHasSurface) {
+-      return 2 * (x + y * pitch);
+-   }
+-   else {
+-      GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6);
+-      GLuint a = 
+-         (BIT(x,0) << 1) |
+-         (BIT(y,0) << 2) |
+-         (BIT(x,1) << 3) |
+-         (BIT(y,1) << 4) |
+-         (BIT(x,2) << 5) |
+-         (BIT(x,4) << 6) |
+-         (BIT(x,5) << 7) |
+-         (BIT(x,3) << 8) |
+-         (BIT(y,2) << 9) |
+-         (BIT(y,3) << 10) |
+-         (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) |
+-         ((b >> 1) << 12);
+-      return a;
+-   }
+-}
+-
+-
+-/* 16-bit depth buffer functions
+- */
+-#define VALUE_TYPE GLushort
+-
+-#define WRITE_DEPTH( _x, _y, d )					\
+-   *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d;
+-
+-#define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo ));
+-
+-#define TAG(x) r200##x##_z16
+-#include "depthtmp.h"
+-
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#define VALUE_TYPE GLuint
+-
+-#define WRITE_DEPTH( _x, _y, d )					\
+-do {									\
+-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0xff000000;							\
+-   tmp |= ((d) & 0x00ffffff);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-
+-#define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo,			\
+-					 _y + yo )) & 0x00ffffff;
+-
+-#define TAG(x) r200##x##_z24_s8
+-#include "depthtmp.h"
+-
+-
+-/* ================================================================
+- * Stencil buffer
+- */
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#define WRITE_STENCIL( _x, _y, d )					\
+-do {									\
+-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0x00ffffff;							\
+-   tmp |= (((d) & 0xff) << 24);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-
+-#define READ_STENCIL( d, _x, _y )					\
+-do {									\
+-   GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0xff000000;							\
+-   d = tmp >> 24;							\
+-} while (0)
+-
+-#define TAG(x) r200##x##_z24_s8
+-#include "stenciltmp.h"
+-
+-
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above).  WaitForIdle() is the main
+- * culprit.
+- */
+-
+-static void r200SpanRenderStart( GLcontext *ctx )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+-   R200_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
+-   r200WaitForIdleLocked( rmesa );
+-
+-   /* Read & rewrite the first pixel in the frame buffer.  This should
+-    * be a noop, right?  In fact without this conform fails as reading
+-    * from the framebuffer sometimes produces old results -- the
+-    * on-card read cache gets mixed up and doesn't notice that the
+-    * framebuffer has been updated.
+-    *
+-    * In the worst case this is buggy too as p might get the wrong
+-    * value first time, so really need a hidden pixel somewhere for this.
+-    */
+-   {
+-      int p;
+-      driRenderbuffer *drb =
+-	 (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+-      volatile int *buf =
+-	 (volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+-      p = *buf;
+-      *buf = p;
+-   }
+-}
+-
+-static void r200SpanRenderFinish( GLcontext *ctx )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-   _swrast_flush( ctx );
+-   UNLOCK_HARDWARE( rmesa );
+-}
+-
+-void r200InitSpanFuncs( GLcontext *ctx )
+-{
+-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+-   swdd->SpanRenderStart          = r200SpanRenderStart;
+-   swdd->SpanRenderFinish         = r200SpanRenderFinish; 
+-}
+-
+-
+-
+-/**
+- * Plug in the Get/Put routines for the given driRenderbuffer.
+- */
+-void
+-radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+-{
+-   if (drb->Base.InternalFormat == GL_RGBA) {
+-      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+-         r200InitPointers_RGB565(&drb->Base);
+-      }
+-      else {
+-         r200InitPointers_ARGB8888(&drb->Base);
+-      }
+-   }
+-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+-      r200InitDepthPointers_z16(&drb->Base);
+-   }
+-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+-      r200InitDepthPointers_z24_s8(&drb->Base);
+-   }
+-   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+-      r200InitStencilPointers_z24_s8(&drb->Base);
+-   }
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h
+deleted file mode 100644
+index bae5644..0000000
+--- a/src/mesa/drivers/dri/r200/r200_span.h
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __R200_SPAN_H__
+-#define __R200_SPAN_H__
+-
+-#include "drirenderbuffer.h"
+-
+-extern void r200InitSpanFuncs( GLcontext *ctx );
+-
+-extern void
+-radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
+index 0eaaaf6..126f78b 100644
+--- a/src/mesa/drivers/dri/r200/r200_state.c
++++ b/src/mesa/drivers/dri/r200/r200_state.c
+@@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_pipeline.h"
+ #include "swrast_setup/swrast_setup.h"
+ 
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+@@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
+    CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+    CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+    CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+-   if (rmesa->r200Screen->drmSupportsBlendColor)
+-      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] );
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
++      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
+ }
+ 
+ /**
+@@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx )
+ 
+    R200_STATECHANGE( rmesa, ctx );
+ 
+-   if (rmesa->r200Screen->drmSupportsBlendColor) {
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+       if (ctx->Color.ColorLogicOpEnabled) {
+          rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+          rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+@@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx )
+       return;
+    }
+ 
+-   if (!rmesa->r200Screen->drmSupportsBlendColor) {
++   if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+       return;
+    }
+@@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+ 
+    switch ( format ) {
+    case R200_DEPTH_FORMAT_16BIT_INT_Z:
+-      rmesa->state.depth.clear = d * 0x0000ffff;
++      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+       break;
+    case R200_DEPTH_FORMAT_24BIT_INT_Z:
+-      rmesa->state.depth.clear = d * 0x00ffffff;
++      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+       break;
+    }
+ }
+@@ -480,7 +482,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+    case GL_FOG_COLOR: 
+       R200_STATECHANGE( rmesa, ctx );
+       UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+-      i = r200PackColor( 4, col[0], col[1], col[2], 0 );
++      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
+       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+       break;
+@@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+    }
+ }
+ 
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-
+-static GLboolean intersect_rect( drm_clip_rect_t *out,
+-				 drm_clip_rect_t *a,
+-				 drm_clip_rect_t *b )
+-{
+-   *out = *a;
+-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+-   if ( out->x1 >= out->x2 ) return GL_FALSE;
+-   if ( out->y1 >= out->y2 ) return GL_FALSE;
+-   return GL_TRUE;
+-}
+-
+-
+-void r200RecalcScissorRects( r200ContextPtr rmesa )
+-{
+-   drm_clip_rect_t *out;
+-   int i;
+-
+-   /* Grow cliprect store?
+-    */
+-   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+-      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+-	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
+-	 rmesa->state.scissor.numAllocedClipRects *= 2;
+-      }
+-
+-      if (rmesa->state.scissor.pClipRects)
+-	 FREE(rmesa->state.scissor.pClipRects);
+-
+-      rmesa->state.scissor.pClipRects = 
+-	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+-		 sizeof(drm_clip_rect_t) );
+-
+-      if ( rmesa->state.scissor.pClipRects == NULL ) {
+-	 rmesa->state.scissor.numAllocedClipRects = 0;
+-	 return;
+-      }
+-   }
+-   
+-   out = rmesa->state.scissor.pClipRects;
+-   rmesa->state.scissor.numClipRects = 0;
+-
+-   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+-      if ( intersect_rect( out, 
+-			   &rmesa->pClipRects[i], 
+-			   &rmesa->state.scissor.rect ) ) {
+-	 rmesa->state.scissor.numClipRects++;
+-	 out++;
+-      }
+-   }
+-}
+-
+-
+-static void r200UpdateScissor( GLcontext *ctx )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if ( rmesa->dri.drawable ) {
+-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-
+-      int x = ctx->Scissor.X;
+-      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+-      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+-      int h = dPriv->h - ctx->Scissor.Y - 1;
+-
+-      rmesa->state.scissor.rect.x1 = x + dPriv->x;
+-      rmesa->state.scissor.rect.y1 = y + dPriv->y;
+-      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+-      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+-
+-      r200RecalcScissorRects( rmesa );
+-   }
+-}
+-
+-
+-static void r200Scissor( GLcontext *ctx,
+-			   GLint x, GLint y, GLsizei w, GLsizei h )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if ( ctx->Scissor.Enabled ) {
+-      R200_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
+-      r200UpdateScissor( ctx );
+-   }
+-
+-}
+-
+-
+ /* =============================================================
+  * Culling
+  */
+@@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx,
+ 			   GLboolean b, GLboolean a )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   GLuint mask = r200PackColor( rmesa->r200Screen->cpp,
++   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ 				ctx->Color.ColorMask[RCOMP],
+ 				ctx->Color.ColorMask[GCOMP],
+ 				ctx->Color.ColorMask[BCOMP],
+@@ -834,7 +740,7 @@ static void r200PolygonOffset( GLcontext *ctx,
+ 			       GLfloat factor, GLfloat units )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   float_ui32_type constant =  { units * rmesa->state.depth.scale };
++   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
+    float_ui32_type factoru = { factor };
+ 
+ /*    factor *= 2; */
+@@ -861,15 +767,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+ 
+    /* TODO: push this into cmd mechanism
+     */
+-   R200_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
++   radeon_firevertices(&rmesa->radeon);
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    /* FIXME: Use window x,y offsets into stipple RAM.
+     */
+    stipple.mask = rmesa->state.stipple.mask;
+-   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
++   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
+                     &stipple, sizeof(stipple) );
+-   UNLOCK_HARDWARE( rmesa );
++   UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+ 
+ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+@@ -881,7 +787,7 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+     * cases work. 
+     */
+    TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
+-   if (rmesa->TclFallback) {
++   if (rmesa->radeon.TclFallback) {
+       r200ChooseRenderState( ctx );
+       r200ChooseVertexState( ctx );
+    }
+@@ -958,7 +864,7 @@ static void r200UpdateSpecular( GLcontext *ctx )
+ 
+    /* Update vertex/render formats
+     */
+-   if (rmesa->TclFallback) { 
++   if (rmesa->radeon.TclFallback) { 
+       r200ChooseRenderState( ctx );
+       r200ChooseVertexState( ctx );
+    }
+@@ -1430,7 +1336,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname,
+ 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+ 	 else
+ 	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
+-	 if (rmesa->TclFallback) {
++	 if (rmesa->radeon.TclFallback) {
+ 	    r200ChooseRenderState( ctx );
+ 	    r200ChooseVertexState( ctx );
+ 	 }
+@@ -1675,7 +1581,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ 
+-   rmesa->state.stencil.clear = 
++   rmesa->radeon.state.stencil.clear = 
+       ((GLuint) (ctx->Stencil.Clear & 0xff) |
+        (0xff << R200_STENCIL_MASK_SHIFT) |
+        ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
+@@ -1700,19 +1606,19 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
+ void r200UpdateWindow( GLcontext *ctx )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-   GLfloat xoffset = (GLfloat)dPriv->x;
+-   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
++   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+ 
+    float_ui32_type sx = { v[MAT_SX] };
+    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+    float_ui32_type sy = { - v[MAT_SY] };
+    float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+-   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
+-   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
++   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
++   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
+ 
+-   R200_FIREVERTICES( rmesa );
++   radeon_firevertices(&rmesa->radeon);
+    R200_STATECHANGE( rmesa, vpt );
+ 
+    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+@@ -1744,7 +1650,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
+ void r200UpdateViewportOffset( GLcontext *ctx )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+    GLfloat xoffset = (GLfloat)dPriv->x;
+    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+@@ -1774,8 +1680,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
+                 R200_STIPPLE_Y_OFFSET_MASK);
+ 
+          /* add magic offsets, then invert */
+-         stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
+-         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
++         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK);
++         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
+                      & R200_STIPPLE_COORD_MASK);
+ 
+          m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+@@ -1788,7 +1694,7 @@ void r200UpdateViewportOffset( GLcontext *ctx )
+       }
+    }
+ 
+-   r200UpdateScissor( ctx );
++   radeonUpdateScissor( ctx );
+ }
+ 
+ 
+@@ -1805,7 +1711,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
+    CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
+    CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
+    CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
+-   rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp,
++   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+                                              color[0], color[1],
+                                              color[2], color[3] );
+ }
+@@ -1849,56 +1755,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
+ }
+ 
+ 
+-/*
+- * Set up the cliprects for either front or back-buffer drawing.
+- */
+-void r200SetCliprects( r200ContextPtr rmesa )
+-{
+-   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+-   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+-   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
+-   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
+-
+-   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) {
+-      /* Can't ignore 2d windows if we are page flipping.
+-       */
+-      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
+-         rmesa->numClipRects = drawable->numClipRects;
+-         rmesa->pClipRects = drawable->pClipRects;
+-      }
+-      else {
+-         rmesa->numClipRects = drawable->numBackClipRects;
+-         rmesa->pClipRects = drawable->pBackClipRects;
+-      }
+-   }
+-   else {
+-     /* front buffer (or none, or multiple buffers) */
+-     rmesa->numClipRects = drawable->numClipRects;
+-     rmesa->pClipRects = drawable->pClipRects;
+-  }
+-
+-   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
+-      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
+-			       drawable->w, drawable->h);
+-      draw_fb->Initialized = GL_TRUE;
+-   }
+-
+-   if (drawable != readable) {
+-      if ((read_fb->Width != readable->w) ||
+-	  (read_fb->Height != readable->h)) {
+-	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
+-				  readable->w, readable->h);
+-	 read_fb->Initialized = GL_TRUE;
+-      }
+-   }
+-
+-   if (rmesa->state.scissor.enabled)
+-      r200RecalcScissorRects( rmesa );
+-
+-   rmesa->lastStamp = drawable->lastStamp;
+-}
+-
+-
+ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+@@ -1907,7 +1763,7 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+       fprintf(stderr, "%s %s\n", __FUNCTION__,
+ 	      _mesa_lookup_enum_by_nr( mode ));
+ 
+-   R200_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
++   radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
+ 
+    if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+       /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+@@ -1925,7 +1781,8 @@ static void r200DrawBuffer( GLcontext *ctx, GLenum mode )
+       return;
+    }
+ 
+-   r200SetCliprects( rmesa );
++   radeonSetCliprects( &rmesa->radeon );
++   radeonUpdatePageFlipping(&rmesa->radeon);
+ 
+    /* We'll set the drawing engine's offset/pitch parameters later
+     * when we update other state.
+@@ -2013,10 +1870,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+       R200_STATECHANGE(rmesa, ctx );
+       if ( state ) {
+ 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
++	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+       } else {
+ 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
++	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+       }
+       break;
+ 
+@@ -2031,7 +1888,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+ 	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+       }
+       r200UpdateSpecular( ctx ); /* for PK_SPEC */
+-      if (rmesa->TclFallback) 
++      if (rmesa->radeon.TclFallback) 
+ 	 r200ChooseVertexState( ctx );
+       _mesa_allow_light_in_model( ctx, !state );
+       break;
+@@ -2068,7 +1925,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+    case GL_LIGHTING:
+       r200UpdateSpecular(ctx);
+       /* for reflection map fixup - might set recheck_texgen for all units too */
+-      rmesa->NewGLState |= _NEW_TEXTURE;
++      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
+       break;
+ 
+    case GL_LINE_SMOOTH:
+@@ -2181,13 +2038,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+    }
+ 
+    case GL_SCISSOR_TEST:
+-      R200_FIREVERTICES( rmesa );
+-      rmesa->state.scissor.enabled = state;
+-      r200UpdateScissor( ctx );
++      radeon_firevertices(&rmesa->radeon);
++      rmesa->radeon.state.scissor.enabled = state;
++      radeonUpdateScissor( ctx );
+       break;
+ 
+    case GL_STENCIL_TEST:
+-      if ( rmesa->state.stencil.hwBuffer ) {
++      if ( rmesa->radeon.state.stencil.hwBuffer ) {
+ 	 R200_STATECHANGE( rmesa, ctx );
+ 	 if ( state ) {
+ 	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
+@@ -2443,42 +2300,99 @@ r200UpdateDrawBuffer(GLcontext *ctx)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    struct gl_framebuffer *fb = ctx->DrawBuffer;
+-   driRenderbuffer *drb;
++   struct radeon_renderbuffer *rrb;
+ 
+    if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+-      /* draw to front */
+-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+-   }
+-   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+-      /* draw to back */
+-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+-   }
+-   else {
+-      /* drawing to multiple buffers, or none */
+-      return;
++     /* draw to front */
++     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
++     /* draw to back */
++     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   } else {
++     /* drawing to multiple buffers, or none */
++     return;
+    }
+ 
+-   assert(drb);
+-   assert(drb->flippedPitch);
++   assert(rrb);
++   assert(rrb->pitch);
+ 
+    R200_STATECHANGE( rmesa, ctx );
+ 
++#if 0
+    /* Note: we used the (possibly) page-flipped values */
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+-     = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
++     = ((rrb->flippedOffset + rmesa->radeon.radeonScreen->fbLocation)
+ 	& R200_COLOROFFSET_MASK);
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+-   if (rmesa->sarea->tiling_enabled) {
++   if (rmesa->radeon.sarea->tiling_enabled) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+    }
++#endif
+ }
+ 
++static GLboolean r200ValidateBuffers(GLcontext *ctx)
++{
++   r200ContextPtr rmesa = R200_CONTEXT(ctx);
++   struct radeon_cs_space_check bos[8];
++   struct radeon_renderbuffer *rrb;
++   int num_bo = 0;
++   int i;
++   int flushed = 0, ret;
++again:
++   num_bo = 0;
++   
++   rrb = radeon_get_colorbuffer(&rmesa->radeon);
++   /* color buffer */
++   if (rrb && rrb->bo) {
++      bos[num_bo].bo = rrb->bo;
++      bos[num_bo].read_domains = 0;
++      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++      bos[num_bo].new_accounted = 0;
++      num_bo++;
++   }
++
++   /* depth buffer */
++   rrb = radeon_get_depthbuffer(&rmesa->radeon);
++   /* color buffer */
++   if (rrb && rrb->bo) {
++      bos[num_bo].bo = rrb->bo;
++      bos[num_bo].read_domains = 0;
++      bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++      bos[num_bo].new_accounted = 0;
++      num_bo++;
++   }
++
++   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
++      radeonTexObj *t;
++      
++      if (!ctx->Texture.Unit[i]._ReallyEnabled)
++	 continue;
++      
++      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
++      bos[num_bo].bo = t->mt->bo;
++      bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
++      bos[num_bo].write_domain = 0;
++      bos[num_bo].new_accounted = 0;
++      num_bo++;
++   }
++   
++   ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
++   if (ret == RADEON_CS_SPACE_OP_TO_BIG)
++      return GL_FALSE;
++   if (ret == RADEON_CS_SPACE_FLUSH) {
++      radeonFlush(ctx);
++      if (flushed)
++	 return GL_FALSE;
++      flushed = 1;
++      goto again;
++   }
++   return GL_TRUE;
++}
+ 
+-
+-void r200ValidateState( GLcontext *ctx )
++GLboolean r200ValidateState( GLcontext *ctx )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   GLuint new_state = rmesa->NewGLState;
++   GLuint new_state = rmesa->radeon.NewGLState;
+ 
+    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+      r200UpdateDrawBuffer(ctx);
+@@ -2486,10 +2400,14 @@ void r200ValidateState( GLcontext *ctx )
+ 
+    if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
+       r200UpdateTextureState( ctx );
+-      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
++      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+       r200UpdateLocalViewer( ctx );
+    }
+ 
++   /* we need to do a space check here */
++   if (!r200ValidateBuffers(ctx))
++     return GL_FALSE;
++
+ /* FIXME: don't really need most of these when vertex progs are enabled */
+ 
+    /* Need an event driven matrix update?
+@@ -2533,7 +2451,8 @@ void r200ValidateState( GLcontext *ctx )
+       else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+    }
+ 
+-   rmesa->NewGLState = 0;
++   rmesa->radeon.NewGLState = 0;
++   return GL_TRUE;
+ }
+ 
+ 
+@@ -2544,7 +2463,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
+    _vbo_InvalidateState( ctx, new_state );
+    _tnl_InvalidateState( ctx, new_state );
+    _ae_invalidate_state( ctx, new_state );
+-   R200_CONTEXT(ctx)->NewGLState |= new_state;
++   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+ }
+ 
+ /* A hack.  The r200 can actually cope just fine with materials
+@@ -2573,12 +2492,13 @@ static void r200WrapRunPipeline( GLcontext *ctx )
+    GLboolean has_material;
+ 
+    if (0)
+-      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
++      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+ 
+    /* Validate state:
+     */
+-   if (rmesa->NewGLState)
+-      r200ValidateState( ctx );
++   if (rmesa->radeon.NewGLState)
++      if (!r200ValidateState( ctx ))
++	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+ 
+    has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
+ 
+@@ -2636,7 +2556,7 @@ void r200InitStateFuncs( struct dd_function_table *functions )
+    functions->PointParameterfv		= r200PointParameter;
+    functions->PointSize			= r200PointSize;
+    functions->RenderMode		= r200RenderMode;
+-   functions->Scissor			= r200Scissor;
++   functions->Scissor			= radeonScissor;
+    functions->ShadeModel		= r200ShadeModel;
+    functions->StencilFuncSeparate	= r200StencilFuncSeparate;
+    functions->StencilMaskSeparate	= r200StencilMaskSeparate;
+diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
+index a917163..1dddbfd 100644
+--- a/src/mesa/drivers/dri/r200/r200_state.h
++++ b/src/mesa/drivers/dri/r200/r200_state.h
+@@ -43,13 +43,11 @@ extern void r200InitTnlFuncs( GLcontext *ctx );
+ 
+ extern void r200UpdateMaterial( GLcontext *ctx );
+ 
+-extern void r200SetCliprects( r200ContextPtr rmesa );
+-extern void r200RecalcScissorRects( r200ContextPtr rmesa );
+ extern void r200UpdateViewportOffset( GLcontext *ctx );
+ extern void r200UpdateWindow( GLcontext *ctx );
+ extern void r200UpdateDrawBuffer(GLcontext *ctx);
+ 
+-extern void r200ValidateState( GLcontext *ctx );
++extern GLboolean r200ValidateState( GLcontext *ctx );
+ 
+ extern void r200PrintDirty( r200ContextPtr rmesa,
+ 			      const char *msg );
+@@ -59,7 +57,7 @@ extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+ #define FALLBACK( rmesa, bit, mode ) do {				\
+    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+ 		     __FUNCTION__, bit, mode );				\
+-   r200Fallback( rmesa->glCtx, bit, mode );				\
++   r200Fallback( rmesa->radeon.glCtx, bit, mode );				\
+ } while (0)
+ 
+ extern void r200LightingSpaceChange( GLcontext *ctx );
+diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
+index 9e4677e..013064d 100644
+--- a/src/mesa/drivers/dri/r200/r200_state_init.c
++++ b/src/mesa/drivers/dri/r200/r200_state_init.c
+@@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_pipeline.h"
+ #include "swrast_setup/swrast_setup.h"
+ 
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+ #include "r200_state.h"
+@@ -52,31 +54,145 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "xmlpool.h"
+ 
++/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
++ * 1.3 cmdbuffers allow all previous state to be updated as well as
++ * the tcl scalar and vector areas.
++ */
++static struct {
++	int start;
++	int len;
++	const char *name;
++} packet[RADEON_MAX_STATE_PACKETS] = {
++	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
++	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
++	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
++	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
++	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
++	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
++	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
++	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
++	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
++	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
++	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
++	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
++	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
++	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
++	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
++	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
++	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
++	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
++	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
++	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
++	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
++		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
++	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
++	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
++	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
++	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
++	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
++	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
++	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
++	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
++	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
++	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
++	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
++	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
++	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
++	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
++	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
++	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
++	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
++	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
++	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
++	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
++	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
++	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
++	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
++	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
++	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
++	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
++	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
++	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
++	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
++	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
++	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
++	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
++	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
++	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
++	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
++	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
++	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
++	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
++	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
++	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
++	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
++		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
++	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
++	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
++	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
++	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
++	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
++	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
++	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
++	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
++	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
++	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
++	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
++	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
++	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
++	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
++	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
++	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
++	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
++	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
++	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
++	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
++	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
++	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
++	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
++	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
++	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
++	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
++	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
++	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
++	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
++	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
++	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
++	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
++	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
++	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
++};
++
+ /* =============================================================
+  * State initialization
+  */
+ 
+ void r200PrintDirty( r200ContextPtr rmesa, const char *msg )
+ {
+-   struct r200_state_atom *l;
++   struct radeon_state_atom *l;
+ 
+    fprintf(stderr, msg);
+    fprintf(stderr, ": ");
+ 
+-   foreach(l, &rmesa->hw.atomlist) {
+-      if (l->dirty || rmesa->hw.all_dirty)
++   foreach(l, &rmesa->radeon.hw.atomlist) {
++      if (l->dirty || rmesa->radeon.hw.all_dirty)
+ 	 fprintf(stderr, "%s, ", l->name);
+    }
+ 
+    fprintf(stderr, "\n");
+ }
+ 
+-static int cmdpkt( int id ) 
++static int cmdpkt( r200ContextPtr rmesa, int id ) 
+ {
+    drm_radeon_cmd_header_t h;
+-   h.i = 0;
+-   h.packet.cmd_type = RADEON_CMD_PACKET;
+-   h.packet.packet_id = id;
++
++   if (rmesa->radeon.radeonScreen->kernel_mm) {
++     return CP_PACKET0(packet[id].start, packet[id].len - 1);
++   } else {
++     h.i = 0;
++     h.packet.cmd_type = RADEON_CMD_PACKET;
++     h.packet.packet_id = id;
++   }
+    return h.i;
+ }
+ 
+@@ -127,71 +243,353 @@ static int cmdscl2( int offset, int stride, int count )
+ }
+ 
+ #define CHECK( NM, FLAG )				\
+-static GLboolean check_##NM( GLcontext *ctx, int idx )	\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
+ {							\
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   (void) idx;						\
+    (void) rmesa;					\
+-   return FLAG;						\
++   return (FLAG) ? atom->cmd_size : 0;			\
+ }
+ 
+ #define TCL_CHECK( NM, FLAG )				\
+-static GLboolean check_##NM( GLcontext *ctx, int idx )	\
+-{							\
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   (void) idx;						\
+-   return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG);	\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
++{									\
++   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
++   return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ }
+ 
+ #define TCL_OR_VP_CHECK( NM, FLAG )			\
+-static GLboolean check_##NM( GLcontext *ctx, int idx )	\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+ {							\
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   (void) idx;						\
+-   return !rmesa->TclFallback && (FLAG);		\
++   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
+ }
+ 
+ #define VP_CHECK( NM, FLAG )				\
+-static GLboolean check_##NM( GLcontext *ctx, int idx )	\
+-{							\
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   (void) idx;						\
+-   return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG);		\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
++{									\
++   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
++   (void) atom;								\
++   return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \
+ }
+ 
+-
+ CHECK( always, GL_TRUE )
+ CHECK( never, GL_FALSE )
+ CHECK( tex_any, ctx->Texture._EnabledUnits )
+ CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) );
+-CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) )
+-CHECK( tex, rmesa->state.texture.unit[idx].unitneeded )
++CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) )
++CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded )
+ CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled )
+-CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) )
++   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) )
+ CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) )
+ CHECK( afs, ctx->ATIFragmentShader._Enabled )
+-CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT )
++CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT )
+ TCL_CHECK( tcl_fog, ctx->Fog.Enabled )
+ TCL_CHECK( tcl, GL_TRUE )
+-TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded )
++TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded )
+ TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
+-TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled )
+-TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) )
++TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled )
++TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) )
+ TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE )
+ VP_CHECK( tcl_vp, GL_TRUE )
+ VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 )
+ VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 )
+ 
++#define OUT_VEC(hdr, data) do {			\
++    drm_radeon_cmd_header_t h;					\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
++    OUT_BATCH(0);							\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
++    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
++    OUT_BATCH_TABLE((data), h.vectors.count);				\
++  } while(0)
++
++#define OUT_VECLINEAR(hdr, data) do {			\
++    drm_radeon_cmd_header_t h;					\
++    uint32_t _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);	\
++    uint32_t _sz = h.veclinear.count * 4;				\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
++    OUT_BATCH(0);							\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
++    OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
++    OUT_BATCH_TABLE((data), _sz);					\
++  } while(0)
++
++#define OUT_SCL(hdr, data) do {					\
++    drm_radeon_cmd_header_t h;						\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
++    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
++    OUT_BATCH_TABLE((data), h.scalars.count);				\
++  } while(0)
++
++#define OUT_SCL2(hdr, data) do {					\
++    drm_radeon_cmd_header_t h;						\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
++    OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
++    OUT_BATCH_TABLE((data), h.scalars.count);				\
++  } while(0)
++
++static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
++   OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
++   END_BATCH();
++}
++
++static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
++   OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
++   END_BATCH();
++}
++
++static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
++   OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
++   END_BATCH();
++}
++
++static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
++   END_BATCH();
++}
++
++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_SCL(atom->cmd[0], atom->cmd+1);
++   END_BATCH();
++}
++
++
++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VEC(atom->cmd[0], atom->cmd+1);
++   END_BATCH();
++}
++
++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   struct radeon_renderbuffer *rrb;
++   uint32_t cbpitch;
++   uint32_t zbpitch;
++   uint32_t dwords = atom->cmd_size;
++   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
++
++   /* output the first 7 bytes of context */
++   BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2);
++   OUT_BATCH_TABLE(atom->cmd, 5);
++
++   rrb = r200->radeon.state.depth.rrb;
++   if (!rrb) {
++     OUT_BATCH(0);
++     OUT_BATCH(0);
++   } else {
++     zbpitch = (rrb->pitch / rrb->cpp);
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++     OUT_BATCH(zbpitch);
++   }
++     
++   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++   OUT_BATCH(atom->cmd[CTX_CMD_1]);
++   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++   rrb = r200->radeon.state.color.rrb;
++   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
++      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   }
++   if (!rrb || !rrb->bo) {
++     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
++   } else {
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++   }
++
++   OUT_BATCH(atom->cmd[CTX_CMD_2]);
++
++   if (!rrb || !rrb->bo) {
++     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
++   } else {
++     cbpitch = (rrb->pitch / rrb->cpp);
++     if (rrb->cpp == 4)
++       ;
++     else
++       ;
++     if (r200->radeon.sarea->tiling_enabled)
++       cbpitch |= R200_COLOR_TILE_ENABLE;
++     OUT_BATCH(cbpitch);
++   }
++
++   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
++     OUT_BATCH_TABLE((atom->cmd + 14), 4);
++
++   END_BATCH();
++}
++
++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   struct radeon_renderbuffer *rrb, *drb;
++   uint32_t cbpitch = 0;
++   uint32_t zbpitch = 0;
++   uint32_t dwords = atom->cmd_size;
++   GLframebuffer *fb = r200->radeon.dri.drawable->driverPrivate;
++
++   rrb = r200->radeon.state.color.rrb;
++   if (r200->radeon.radeonScreen->driScreen->dri2.enabled) {
++      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   }
++   if (rrb) {
++     assert(rrb->bo != NULL);
++     cbpitch = (rrb->pitch / rrb->cpp);
++     if (r200->radeon.sarea->tiling_enabled)
++       cbpitch |= R200_COLOR_TILE_ENABLE;
++   }
++
++   drb = r200->radeon.state.depth.rrb;
++   if (drb)
++     zbpitch = (drb->pitch / drb->cpp);
++
++   /* output the first 7 bytes of context */
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++
++   /* In the CS case we need to split this up */
++   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
++   OUT_BATCH_TABLE((atom->cmd + 1), 4);
++
++   if (drb) {
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
++     OUT_BATCH(zbpitch);
++   }
++
++   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
++   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
++   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++
++   if (rrb) {
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++   }
++
++   if (rrb) {
++     if (rrb->cpp == 4)
++       ;
++     else
++       ;
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
++     OUT_BATCH(cbpitch);
++   }
++
++   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
++     OUT_BATCH_TABLE((atom->cmd + 14), 4);
++   }
++
++   END_BATCH();
++}
++
++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++   int i = atom->idx;
++   radeonTexObj *t = r200->state.texture.unit[i].texobj;
++
++   if (t && t->mt && !t->image_override)
++     dwords += 2;
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_BATCH_TABLE(atom->cmd, 10);
++   if (t && !t->image_override) {
++     OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++		     RADEON_GEM_DOMAIN_VRAM, 0, 0);
++   } else if (!t) {
++     /* workaround for old CS mechanism */
++     OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
++   } else if (t->image_override)
++     OUT_BATCH(t->override_offset);
++
++   END_BATCH();
++}
++
++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r200ContextPtr r200 = R200_CONTEXT(ctx);
++   BATCH_LOCALS(&r200->radeon);
++   uint32_t dwords = atom->cmd_size;
++   int i = atom->idx;
++   radeonTexObj *t = r200->state.texture.unit[i].texobj;
++   GLuint size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords + (2 * 5));
++   OUT_BATCH_TABLE(atom->cmd, 3);
++
++   if (t && !t->image_override) {
++     size = t->mt->totalsize / 6;
++     OUT_BATCH_RELOC(0, t->mt->bo, size, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     OUT_BATCH_RELOC(0, t->mt->bo, size * 2, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     OUT_BATCH_RELOC(0, t->mt->bo, size * 3, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     OUT_BATCH_RELOC(0, t->mt->bo, size * 4, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     OUT_BATCH_RELOC(0, t->mt->bo, size * 5, RADEON_GEM_DOMAIN_VRAM, 0, 0);
++   }
++   END_BATCH();
++}
+ 
+ /* Initialize the context's hardware state.
+  */
+ void r200InitState( r200ContextPtr rmesa )
+ {
+-   GLcontext *ctx = rmesa->glCtx;
++   GLcontext *ctx = rmesa->radeon.glCtx;
+    GLuint color_fmt, depth_fmt, i;
+    GLint drawPitch, drawOffset;
+ 
+-   switch ( rmesa->r200Screen->cpp ) {
++   switch ( rmesa->radeon.radeonScreen->cpp ) {
+    case 2:
+       color_fmt = R200_COLOR_FORMAT_RGB565;
+       break;
+@@ -203,20 +601,20 @@ void r200InitState( r200ContextPtr rmesa )
+       exit( -1 );
+    }
+ 
+-   rmesa->state.color.clear = 0x00000000;
++   rmesa->radeon.state.color.clear = 0x00000000;
+ 
+    switch ( ctx->Visual.depthBits ) {
+    case 16:
+-      rmesa->state.depth.clear = 0x0000ffff;
+-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
++      rmesa->radeon.state.depth.clear = 0x0000ffff;
++      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
+       depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z;
+-      rmesa->state.stencil.clear = 0x00000000;
++      rmesa->radeon.state.stencil.clear = 0x00000000;
+       break;
+    case 24:
+-      rmesa->state.depth.clear = 0x00ffffff;
+-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
++      rmesa->radeon.state.depth.clear = 0x00ffffff;
++      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
+       depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z;
+-      rmesa->state.stencil.clear = 0xffff0000;
++      rmesa->radeon.state.stencil.clear = 0xffff0000;
+       break;
+    default:
+       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+@@ -225,52 +623,57 @@ void r200InitState( r200ContextPtr rmesa )
+    }
+ 
+    /* Only have hw stencil when depth buffer is 24 bits deep */
+-   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
++   rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+ 				     ctx->Visual.depthBits == 24 );
+ 
+-   rmesa->Fallback = 0;
++   rmesa->radeon.Fallback = 0;
+ 
+-   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+-      drawOffset = rmesa->r200Screen->backOffset;
+-      drawPitch  = rmesa->r200Screen->backPitch;
++   if ( ctx->Visual.doubleBufferMode && rmesa->radeon.sarea->pfCurrentPage == 0 ) {
++      drawOffset = rmesa->radeon.radeonScreen->backOffset;
++      drawPitch  = rmesa->radeon.radeonScreen->backPitch;
+    } else {
+-      drawOffset = rmesa->r200Screen->frontOffset;
+-      drawPitch  = rmesa->r200Screen->frontPitch;
++      drawOffset = rmesa->radeon.radeonScreen->frontOffset;
++      drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
+    }
+ #if 000
+    if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+-      rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset;
+-      rmesa->state.color.drawPitch  = rmesa->r200Screen->backPitch;
++      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->backOffset;
++      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->backPitch;
+    } else {
+-      rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset;
+-      rmesa->state.color.drawPitch  = rmesa->r200Screen->frontPitch;
++      rmesa->radeon.state.color.drawOffset = rmesa->radeon.radeonScreen->frontOffset;
++      rmesa->radeon.state.color.drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
+    }
+ 
+-   rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
+-   rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
++   rmesa->state.pixel.readOffset = rmesa->radeon.state.color.drawOffset;
++   rmesa->state.pixel.readPitch  = rmesa->radeon.state.color.drawPitch;
+ #endif
+ 
+-   rmesa->hw.max_state_size = 0;
++   rmesa->radeon.hw.max_state_size = 0;
+ 
+ #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
+    do {								\
+       rmesa->hw.ATOM.cmd_size = SZ;				\
+-      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
+-      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
++      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
++      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
+       rmesa->hw.ATOM.name = NM;					\
+       rmesa->hw.ATOM.idx = IDX;					\
+       rmesa->hw.ATOM.check = check_##CHK;			\
+       rmesa->hw.ATOM.dirty = GL_FALSE;				\
+-      rmesa->hw.max_state_size += SZ * sizeof(int);		\
++      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
+    } while (0)
+ 
+ 
+    /* Allocate state buffers:
+     */
+-   if (rmesa->r200Screen->drmSupportsBlendColor)
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+    else
+       ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
++
++   if (rmesa->radeon.radeonScreen->kernel_mm)
++     rmesa->hw.ctx.emit = ctx_emit_cs;
++   else
++     rmesa->hw.ctx.emit = ctx_emit;
+    ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+@@ -282,8 +685,8 @@ void r200InitState( r200ContextPtr rmesa )
+    ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
+    ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+    ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+-   if (rmesa->r200Screen->drmSupportsFragShader) {
+-      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
++      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+       /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+ 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 );
+ 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 );
+@@ -303,7 +706,7 @@ void r200InitState( r200ContextPtr rmesa )
+       ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+    }
+    else {
+-      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ 	 ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 );
+ 	 ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 );
+ 	 ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+@@ -321,13 +724,18 @@ void r200InitState( r200ContextPtr rmesa )
+       ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+       ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+    }
+-   if (rmesa->r200Screen->drmSupportsCubeMapsR200) {
++
++   for (i = 0; i < 5; i++)
++     rmesa->hw.tex[i].emit = tex_emit;
++   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
+       ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+       ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+       ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
+       ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
+       ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+       ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
++      for (i = 0; i < 5; i++)
++	rmesa->hw.cube[i].emit = cube_emit;
+    }
+    else {
+       ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+@@ -337,7 +745,8 @@ void r200InitState( r200ContextPtr rmesa )
+       ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+       ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+    }
+-   if (rmesa->r200Screen->drmSupportsVertexProgram) {
++
++   if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
+       ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
+       ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+       ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+@@ -390,13 +799,13 @@ void r200InitState( r200ContextPtr rmesa )
+    ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
+    ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
+    ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
+-   if (rmesa->r200Screen->drmSupportsTriPerf) {
++   if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
+       ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+    }
+    else {
+       ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+    }
+-   if (rmesa->r200Screen->drmSupportsPointSprites) {
++   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
+       ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
+       ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+    }
+@@ -409,87 +818,115 @@ void r200InitState( r200ContextPtr rmesa )
+ 
+    /* Fill in the packet headers:
+     */
+-   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+-   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+-   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+-   if (rmesa->r200Screen->drmSupportsBlendColor)
+-      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR);
+-   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+-   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+-   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+-   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+-   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+-   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+-   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X);
+-   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET);
+-   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL);
+-   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0);
+-   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS);
+-   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE);
+-   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+-   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3);
+-   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0);
+-   if (rmesa->r200Screen->drmSupportsFragShader) {
+-      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR);
+-      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0);
+-      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+-      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1);
+-      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+-      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2);
+-      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+-      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3);
+-      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+-      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4);
+-      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+-      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5);
+-      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
++   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
++   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
++   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
++      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
++   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
++   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
++   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
++   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
++   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
++   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
++   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
++   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
++   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
++   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
++   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
++   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
++   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
++   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
++   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
++   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
++      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
++      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
++      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
++      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
++      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
++      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
++      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
++      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
++      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
++      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
++      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
++      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
++      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+    } else {
+-      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0);
+-      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0);
+-      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1);
+-      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1);
+-      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2);
+-      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2);
+-      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3);
+-      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3);
+-      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4);
+-      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4);
+-      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5);
+-      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5);
+-   }
+-   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0);
+-   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1);
+-   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL);
+-   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0);
+-   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0);
+-   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1);
+-   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1);
+-   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2);
+-   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2);
+-   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3);
+-   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3);
+-   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4);
+-   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4);
+-   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5);
+-   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5);
+-   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0);
+-   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1);
+-   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2);
+-   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3);
+-   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4);
+-   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5);
+-   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+-   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+-   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+-   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2);
+-   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0);
+-   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL);
+-   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0);
+-   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL);
+-   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL);
+-   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL);
+-   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL);
+-   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL);
++      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
++      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
++      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
++      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
++      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
++      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
++      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
++      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
++      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
++      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
++      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
++      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
++   }
++   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
++   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
++   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
++   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
++   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
++   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
++   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
++   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
++   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
++   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
++   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
++   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
++   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
++   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
++   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
++   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
++   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
++   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
++   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
++   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
++   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
++   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
++   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
++   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
++   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
++   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
++   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
++   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
++   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
++   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
++   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
++   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
++   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
++   if (rmesa->radeon.radeonScreen->kernel_mm) {
++	rmesa->hw.mtl[0].emit = mtl_emit;
++	rmesa->hw.mtl[1].emit = mtl_emit;
++
++	rmesa->hw.vpi[0].emit = veclinear_emit;
++	rmesa->hw.vpi[1].emit = veclinear_emit;
++	rmesa->hw.vpp[0].emit = veclinear_emit;
++	rmesa->hw.vpp[1].emit = veclinear_emit;
++
++	rmesa->hw.grd.emit = scl_emit;
++	rmesa->hw.fog.emit = vec_emit;
++	rmesa->hw.glt.emit = vec_emit;
++	rmesa->hw.eye.emit = vec_emit;
++
++	for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
++	  rmesa->hw.mat[i].emit = vec_emit;
++
++	for (i = 0; i < 8; i++)
++	  rmesa->hw.lit[i].emit = lit_emit;
++
++	for (i = 0; i < 6; i++)
++	  rmesa->hw.ucp[i].emit = vec_emit;
++
++	rmesa->hw.ptp.emit = ptp_emit;
++   }
++
++
++   
+    rmesa->hw.mtl[0].cmd[MTL_CMD_0] = 
+       cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
+    rmesa->hw.mtl[0].cmd[MTL_CMD_1] = 
+@@ -567,7 +1004,7 @@ void r200InitState( r200ContextPtr rmesa )
+ 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+ 				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+ 
+-   if (rmesa->r200Screen->drmSupportsBlendColor) {
++   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
+       rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+ 				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+@@ -578,10 +1015,10 @@ void r200InitState( r200ContextPtr rmesa )
+    }
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+-      rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation;
++      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+-      ((rmesa->r200Screen->depthPitch &
++      ((rmesa->radeon.radeonScreen->depthPitch &
+ 	R200_DEPTHPITCH_MASK) |
+        R200_DEPTH_ENDIAN_NO_SWAP);
+    
+@@ -599,7 +1036,7 @@ void r200InitState( r200ContextPtr rmesa )
+    if (rmesa->using_hyperz) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
+ 						  R200_Z_DECOMPRESSION_ENABLE;
+-/*      if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200)
++/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+ 	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+    }
+ 
+@@ -607,7 +1044,7 @@ void r200InitState( r200ContextPtr rmesa )
+  				     | R200_TEX_BLEND_0_ENABLE);
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt;
+-   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
++   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+    case DRI_CONF_DITHER_XERRORDIFFRESET:
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
+       break;
+@@ -615,28 +1052,28 @@ void r200InitState( r200ContextPtr rmesa )
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
+       break;
+    }
+-   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
++   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+ 	DRI_CONF_ROUND_ROUND )
+-      rmesa->state.color.roundEnable = R200_ROUND_ENABLE;
++      rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
+    else
+-      rmesa->state.color.roundEnable = 0;
+-   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
++      rmesa->radeon.state.color.roundEnable = 0;
++   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+ 	DRI_CONF_COLOR_REDUCTION_DITHER )
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
+    else
+-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
++      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+ 
+ #if 000
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset +
+-					       rmesa->r200Screen->fbLocation)
++   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->radeon.state.color.drawOffset +
++					       rmesa->radeon.radeonScreen->fbLocation)
+ 					      & R200_COLOROFFSET_MASK);
+ 
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch &
++   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->radeon.state.color.drawPitch &
+ 					      R200_COLORPITCH_MASK) |
+ 					     R200_COLOR_ENDIAN_NO_SWAP);
+ #else
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
+-					       rmesa->r200Screen->fbLocation)
++					       rmesa->radeon.radeonScreen->fbLocation)
+ 					      & R200_COLOROFFSET_MASK);
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
+@@ -644,12 +1081,12 @@ void r200InitState( r200ContextPtr rmesa )
+ 					     R200_COLOR_ENDIAN_NO_SWAP);
+ #endif
+    /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
+-   if (rmesa->sarea->tiling_enabled) {
++   if (rmesa->radeon.sarea->tiling_enabled) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+    }
+ 
+    rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * 
+-			driQueryOptionf (&rmesa->optionCache,"texture_blend_quality");
++			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
+    rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
+ 
+    rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
+@@ -704,7 +1141,7 @@ void r200InitState( r200ContextPtr rmesa )
+ 						R200_VC_NO_SWAP;
+ #endif
+ 
+-   if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) {
++   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+       /* Bypass TCL */
+       rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
+    }
+@@ -743,28 +1180,28 @@ void r200InitState( r200ContextPtr rmesa )
+       rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
+          (/* R200_TEXCOORD_PROJ | */
+           0x100000);	/* Small default bias */
+-      if (rmesa->r200Screen->drmSupportsFragShader) {
++      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ 	 rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+-	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ 	 rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+ 	 rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+       }
+       else {
+ 	  rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
+-	     rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      }
+ 
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
+-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
+-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
+-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
+-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
+-         rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP];
++         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ 
+       rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
+          (R200_TXC_ARG_A_ZERO |
+@@ -967,5 +1404,7 @@ void r200InitState( r200ContextPtr rmesa )
+ 
+    r200LightingSpaceChange( ctx );
+ 
+-   rmesa->hw.all_dirty = GL_TRUE;
++   rmesa->radeon.hw.all_dirty = GL_TRUE;
++
++   rcommonInitCmdBuf(&rmesa->radeon);
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
+index b25f028..b006409 100644
+--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
++++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
+@@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r200_tcl.h"
+ 
+ 
+-static void flush_last_swtcl_prim( r200ContextPtr rmesa  );
+-
+-
+ /***********************************************************************
+  *                         Initialization 
+  ***********************************************************************/
+ 
+ #define EMIT_ATTR( ATTR, STYLE, F0 )					\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+    fmt_0 |= F0;								\
+ } while (0)
+ 
+ #define EMIT_PAD( N )							\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+ } while (0)
+ 
+ static void r200SetVertexFormat( GLcontext *ctx )
+@@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
+    }
+ 
+    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+-   rmesa->swtcl.vertex_attr_count = 0;
++   rmesa->radeon.swtcl.vertex_attr_count = 0;
+ 
+    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+     * build up a hardware vertex.
+@@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx )
+       rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
+    }
+ 
+-   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
++   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+ 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
+ 	(rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+       R200_NEWPRIM(rmesa);
+@@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx )
+       rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+       rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+ 
+-      rmesa->swtcl.vertex_size =
++      rmesa->radeon.swtcl.vertex_size =
+ 	  _tnl_install_attrs( ctx,
+-			      rmesa->swtcl.vertex_attrs, 
+-			      rmesa->swtcl.vertex_attr_count,
++			      rmesa->radeon.swtcl.vertex_attrs, 
++			      rmesa->radeon.swtcl.vertex_attr_count,
+ 			      NULL, 0 );
+-      rmesa->swtcl.vertex_size /= 4;
+-      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
++      rmesa->radeon.swtcl.vertex_size /= 4;
++      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+    }
+ }
+ 
+ 
+ static void r200RenderStart( GLcontext *ctx )
+ {
+-   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+-
+    r200SetVertexFormat( ctx );
+-
+-   if (rmesa->dma.flush != 0 && 
+-       rmesa->dma.flush != flush_last_swtcl_prim)
+-      rmesa->dma.flush( rmesa );
+ }
+ 
+ 
+@@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx )
+     * rasterization fallback.  As this function will be called again when we
+     * leave a rasterization fallback, we can just skip it for now.
+     */
+-   if (rmesa->Fallback != 0)
++   if (rmesa->radeon.Fallback != 0)
+       return;
+ 
+    vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+@@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx )
+    }
+ }
+ 
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( r200ContextPtr rmesa  )
+-{
+-   if (R200_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   rmesa->dma.flush = NULL;
+-
+-   if (rmesa->dma.current.buf) {
+-      struct r200_dma_region *current = &rmesa->dma.current;
+-      GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset +
+-			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+-			       current->start);
+-
+-      assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND));
+-
+-      assert (current->start + 
+-	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-	      current->ptr);
+-
+-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+-	 r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
+-			        rmesa->hw.max_state_size + VBUF_BUFSZ );
+-	 r200EmitVertexAOS( rmesa,
+-			      rmesa->swtcl.vertex_size,
+-			      current_offset);
+-
+-	 r200EmitVbufPrim( rmesa,
+-			   rmesa->swtcl.hw_primitive,
+-			   rmesa->swtcl.numverts);
+-      }
+-
+-      rmesa->swtcl.numverts = 0;
+-      current->start = current->ptr;
+-   }
+-}
+-
+-
+-/* Alloc space in the current dma region.
+- */
+-static INLINE void *
+-r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize )
++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+ {
+-   GLuint bytes = vsize * nverts;
+-
+-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+-      r200RefillCurrentDmaRegion( rmesa );
+-
+-   if (!rmesa->dma.flush) {
+-      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+-      rmesa->dma.flush = flush_last_swtcl_prim;
+-   }
++   r200ContextPtr rmesa = R200_CONTEXT(ctx);
++   rcommonEnsureCmdBufSpace(&rmesa->radeon,
++			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++			    __FUNCTION__);
+ 
+-   ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+-   ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+-   ASSERT( rmesa->dma.current.start + 
+-	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-	   rmesa->dma.current.ptr );
+ 
++   radeonEmitState(&rmesa->radeon);
++   r200EmitVertexAOS( rmesa,
++		      rmesa->radeon.swtcl.vertex_size,
++		      rmesa->radeon.dma.current,
++		      current_offset);
+ 
+-   {
+-      GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+-      rmesa->dma.current.ptr += bytes;
+-      rmesa->swtcl.numverts += nverts;
+-      return head;
+-   }
++		      
++   r200EmitVbufPrim( rmesa,
++		     rmesa->radeon.swtcl.hw_primitive,
++		     rmesa->radeon.swtcl.numverts);
+ 
+ }
+ 
+-
+ /**************************************************************************/
+ 
+ 
+@@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx );
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+ #define CTX_ARG r200ContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 )
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
+ #define LOCAL_VARS						\
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   const char *r200verts = (char *)rmesa->swtcl.verts;
+-#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int)))
+-#define VERTEX r200Vertex 
++   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
++#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
++#define VERTEX radeonVertex 
+ #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS))
+ 
+ #undef TAG
+@@ -456,11 +396,11 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+ 
+ #define VERT_SET_RGBA( v, c )  					\
+ do {								\
+-   r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]);	\
++   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
+    UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+    UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+    UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+@@ -472,7 +412,7 @@ do {								\
+ #define VERT_SET_SPEC( v, c )					\
+ do {								\
+    if (specoffset) {						\
+-      r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]);	\
++      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
+       UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
+       UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
+       UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
+@@ -481,8 +421,8 @@ do {								\
+ #define VERT_COPY_SPEC( v0, v1 )			\
+ do {							\
+    if (specoffset) {					\
+-      r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]);	\
+-      r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]);	\
++      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
++      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
+       spec0->red   = spec1->red;	\
+       spec0->green = spec1->green;	\
+       spec0->blue  = spec1->blue; 	\
+@@ -513,7 +453,7 @@ do {							\
+  ***********************************************************************/
+ 
+ #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -569,8 +509,8 @@ static void init_rast_tab( void )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS						\
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+-   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
+-   const char *r200verts = (char *)rmesa->swtcl.verts;		\
++   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
++   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    const GLboolean stipple = ctx->Line.StippleFlag;		\
+    (void) elt; (void) stipple;
+@@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx )
+    GLuint index = 0;
+    GLuint flags = ctx->_TriangleCaps;
+ 
+-   if (!rmesa->TclFallback || rmesa->Fallback) 
++   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
+       return;
+ 
+    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
+    if (flags & DD_TRI_UNFILLED)      index |= R200_UNFILLED_BIT;
+ 
+-   if (index != rmesa->swtcl.RenderIndex) {
++   if (index != rmesa->radeon.swtcl.RenderIndex) {
+       tnl->Driver.Render.Points = rast_tab[index].points;
+       tnl->Driver.Render.Line = rast_tab[index].line;
+       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx )
+ 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+       }
+ 
+-      rmesa->swtcl.RenderIndex = index;
++      rmesa->radeon.swtcl.RenderIndex = index;
+    }
+ }
+ 
+@@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ 
+-   if (rmesa->swtcl.hw_primitive != hwprim) {
++   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+       /* need to disable perspective-correct texturing for point sprites */
+       if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
+ 	 if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+@@ -649,14 +589,14 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ 	 rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+       }
+       R200_NEWPRIM( rmesa );
+-      rmesa->swtcl.hw_primitive = hwprim;
++      rmesa->radeon.swtcl.hw_primitive = hwprim;
+    }
+ }
+ 
+ static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   rmesa->swtcl.render_primitive = prim;
++   rmesa->radeon.swtcl.render_primitive = prim;
+    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+       r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
+ }
+@@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+-   GLuint oldfallback = rmesa->Fallback;
++   GLuint oldfallback = rmesa->radeon.Fallback;
+ 
+    if (mode) {
+-      rmesa->Fallback |= bit;
++      rmesa->radeon.Fallback |= bit;
+       if (oldfallback == 0) {
+-	 R200_FIREVERTICES( rmesa );
++	 radeon_firevertices(&rmesa->radeon);
+ 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
+ 	 _swsetup_Wakeup( ctx );
+-	 rmesa->swtcl.RenderIndex = ~0;
++	 rmesa->radeon.swtcl.RenderIndex = ~0;
+          if (R200_DEBUG & DEBUG_FALLBACKS) {
+             fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
+                     bit, getFallbackString(bit));
+@@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+       }
+    }
+    else {
+-      rmesa->Fallback &= ~bit;
++      rmesa->radeon.Fallback &= ~bit;
+       if (oldfallback == bit) {
+ 
+ 	 _swrast_flush( ctx );
+@@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ 
+ 	 tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+ 	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
+-	 if (rmesa->TclFallback) {
+-	    /* These are already done if rmesa->TclFallback goes to
++	 if (rmesa->radeon.TclFallback) {
++	    /* These are already done if rmesa->radeon.TclFallback goes to
+ 	     * zero above. But not if it doesn't (R200_NO_TCL for
+ 	     * example?)
+ 	     */
+ 	    _tnl_invalidate_vertex_state( ctx, ~0 );
+ 	    _tnl_invalidate_vertices( ctx, ~0 );
+-	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
++	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+ 	    r200ChooseVertexState( ctx );
+ 	    r200ChooseRenderState( ctx );
+ 	 }
+@@ -772,7 +712,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    const GLfloat *rc = ctx->Current.RasterColor; 
+    GLint row, col;
+-   r200Vertex vert;
++   radeonVertex vert;
+    GLuint orig_vte;
+    GLuint h;
+ 
+@@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+       vte |= R200_VTX_W0_FMT;
+       vap &= ~R200_VAP_FORCE_W_TO_ONE;
+ 
+-      rmesa->swtcl.vertex_size = 5;
++      rmesa->radeon.swtcl.vertex_size = 5;
+ 
+       if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
+ 	   || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+@@ -871,10 +811,10 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ 
+    /* Update window height
+     */
+-   LOCK_HARDWARE( rmesa );
+-   UNLOCK_HARDWARE( rmesa );
+-   h = rmesa->dri.drawable->h + rmesa->dri.drawable->y;
+-   px += rmesa->dri.drawable->x;
++   LOCK_HARDWARE( &rmesa->radeon );
++   UNLOCK_HARDWARE( &rmesa->radeon );
++   h = rmesa->radeon.dri.drawable->h + rmesa->radeon.dri.drawable->y;
++   px += rmesa->radeon.dri.drawable->x;
+ 
+    /* Clipping handled by existing mechansims in r200_ioctl.c?
+     */
+@@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ 
+    /* Need to restore vertexformat?
+     */
+-   if (rmesa->TclFallback)
++   if (rmesa->radeon.TclFallback)
+       r200ChooseVertexState( ctx );
+ }
+ 
+@@ -962,17 +902,13 @@ void r200InitSwtcl( GLcontext *ctx )
+    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+ 		       36 * sizeof(GLfloat) );
+    
+-   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+-   rmesa->swtcl.RenderIndex = ~0;
+-   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+-   rmesa->swtcl.hw_primitive = 0;
++   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++   rmesa->radeon.swtcl.RenderIndex = ~0;
++   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++   rmesa->radeon.swtcl.hw_primitive = 0;
+ }
+ 
+ 
+ void r200DestroySwtcl( GLcontext *ctx )
+ {
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if (rmesa->swtcl.indexed_verts.buf) 
+-      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ );
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
+index 8c29fd0..a4051a4 100644
+--- a/src/mesa/drivers/dri/r200/r200_swtcl.h
++++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
+@@ -52,15 +52,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+ extern void r200PrintSetupFlags(char *msg, GLuint flags );
+ 
+ 
+-extern void r200_emit_indexed_verts( GLcontext *ctx,
+-				       GLuint start,
+-				       GLuint count );
+-
+ extern void r200_translate_vertex( GLcontext *ctx, 
+-				     const r200Vertex *src, 
++				     const radeonVertex *src, 
+ 				     SWvertex *dst );
+ 
+-extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v );
++extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
+ 
+ extern void r200_import_float_colors( GLcontext *ctx );
+ extern void r200_import_float_spec_colors( GLcontext *ctx );
+@@ -70,5 +66,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+ 			      const struct gl_pixelstore_attrib *unpack,
+ 			      const GLubyte *bitmap );
+ 
+-
++void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
+index 99aecfe..8e0fb14 100644
+--- a/src/mesa/drivers/dri/r200/r200_tcl.c
++++ b/src/mesa/drivers/dri/r200/r200_tcl.c
+@@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = {
+ 
+ #define RESET_STIPPLE() do {			\
+    R200_STATECHANGE( rmesa, lin );		\
+-   r200EmitState( rmesa );			\
++   radeonEmitState(&rmesa->radeon);			\
+ } while (0)
+ 
+ #define AUTO_STIPPLE( mode )  do {		\
+@@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = {
+    else						\
+       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+ 	 ~R200_LINE_PATTERN_AUTO_RESET;	\
+-   r200EmitState( rmesa );			\
++   radeonEmitState(&rmesa->radeon);			\
+ } while (0)
+ 
+ 
+@@ -142,25 +142,23 @@ static GLboolean discrete_prim[0x10] = {
+ 
+ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) 
+ {
+-   if (rmesa->dma.flush == r200FlushElts &&
+-       rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) {
++   if (rmesa->radeon.dma.flush == r200FlushElts &&
++       rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
+ 
+-      GLushort *dest = (GLushort *)(rmesa->store.cmd_buf +
+-				    rmesa->store.cmd_used);
++      GLushort *dest = (GLushort *)(rmesa->tcl.elt_dma_bo->ptr +
++				    rmesa->tcl.elt_used);
+ 
+-      rmesa->store.cmd_used += nr*2;
++      rmesa->tcl.elt_used += nr*2;
+ 
+       return dest;
+    }
+    else {
+-      if (rmesa->dma.flush)
+-	 rmesa->dma.flush( rmesa );
++      if (rmesa->radeon.dma.flush)
++	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+ 
+-      r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+-			     rmesa->hw.max_state_size + ELTS_BUFSZ(nr) );
++      rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
+ 
+       r200EmitAOS( rmesa,
+-		   rmesa->tcl.aos_components,
+ 		   rmesa->tcl.nr_aos_components, 0 );
+ 
+       return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
+@@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx,
+    r200ContextPtr rmesa = R200_CONTEXT( ctx );
+    r200TclPrimitive( ctx, prim, hwprim );
+    
+-   r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+-			  rmesa->hw.max_state_size + VBUF_BUFSZ );
++   //   fprintf(stderr,"Emit prim %d\n", rmesa->tcl.nr_aos_components);
++   rcommonEnsureCmdBufSpace( &rmesa->radeon,
++			     AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
++			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
+ 
+    r200EmitAOS( rmesa,
+-		  rmesa->tcl.aos_components,
+-		  rmesa->tcl.nr_aos_components,
+-		  start );
++		rmesa->tcl.nr_aos_components,
++		start );
+    
+    /* Why couldn't this packet have taken an offset param?
+     */
+@@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
+ 
+    /* TODO: separate this from the swtnl pipeline 
+     */
+-   if (rmesa->TclFallback)
++   if (rmesa->radeon.TclFallback)
+       return GL_TRUE;	/* fallback to software t&l */
+ 
+    if (R200_DEBUG & DEBUG_PRIMS)
+@@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx,
+ 
+    /* Validate state:
+     */
+-   if (rmesa->NewGLState)
+-      r200ValidateState( ctx );
++   if (rmesa->radeon.NewGLState)
++      if (!r200ValidateState( ctx ))
++         return GL_TRUE; /* fallback to sw t&l */
+ 
+    if (!ctx->VertexProgram._Enabled) {
+    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+@@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx )
+ 
+    tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
+ 
+-   if ( rmesa->dma.flush )			
+-      rmesa->dma.flush( rmesa );	
++   if ( rmesa->radeon.dma.flush )			
++      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
+ 
+-   rmesa->dma.flush = NULL;
++   rmesa->radeon.dma.flush = NULL;
+    
+-   if (rmesa->swtcl.indexed_verts.buf) 
+-      r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+-			      __FUNCTION__ );
+-
+    R200_STATECHANGE( rmesa, vap );
+    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
+    rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
+@@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit)
+ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   GLuint oldfallback = rmesa->TclFallback;
++   GLuint oldfallback = rmesa->radeon.TclFallback;
+ 
+    if (mode) {
+-      rmesa->TclFallback |= bit;
++      rmesa->radeon.TclFallback |= bit;
+       if (oldfallback == 0) {
+ 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
+ 	    fprintf(stderr, "R200 begin tcl fallback %s\n",
+@@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+       }
+    }
+    else {
+-      rmesa->TclFallback &= ~bit;
++      rmesa->radeon.TclFallback &= ~bit;
+       if (oldfallback == bit) {
+ 	 if (R200_DEBUG & DEBUG_FALLBACKS) 
+ 	    fprintf(stderr, "R200 end tcl fallback %s\n",
+diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
+index 5a4db33..19a6cad 100644
+--- a/src/mesa/drivers/dri/r200/r200_tex.c
++++ b/src/mesa/drivers/dri/r200/r200_tex.c
+@@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/teximage.h"
+ #include "main/texobj.h"
+ 
+-#include "texmem.h"
+-
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * \param twrap Wrap mode for the \a t texture coordinate
+  */
+ 
+-static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
++static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+ {
+    GLboolean  is_clamp = GL_FALSE;
+    GLboolean  is_clamp_to_border = GL_FALSE;
++   struct gl_texture_object *tObj = &t->base;
+ 
+    t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
+ 
+@@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
+       _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+    }
+ 
+-   if (t->base.tObj->Target != GL_TEXTURE_1D) {
++   if (tObj->Target != GL_TEXTURE_1D) {
+       switch ( twrap ) {
+       case GL_REPEAT:
+          t->pp_txfilter |= R200_CLAMP_T_WRAP;
+@@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum
+    t->border_fallback = (is_clamp && is_clamp_to_border);
+ }
+ 
+-static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
++static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+ {
+    t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
+ 
+@@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max )
+  * \param magf Texture magnification mode
+  */
+ 
+-static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
++static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+ {
+    GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+ 
++   /* Force revalidation to account for switches from/to mipmapping. */
++   t->validated = GL_FALSE;
++
+    t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
+    t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
+ 
+@@ -267,693 +270,12 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf )
+    }
+ }
+ 
+-static void r200SetTexBorderColor( r200TexObjPtr t, GLubyte c[4] )
+-{
+-   t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] );
+-}
+-
+-
+-/**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj )
+-{
+-   r200TexObjPtr t;
+-
+-   t = CALLOC_STRUCT( r200_tex_obj );
+-   texObj->DriverData = t;
+-   if ( t != NULL ) {
+-      if ( R200_DEBUG & DEBUG_TEXTURE ) {
+-	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, 
+-		  (void *)t );
+-      }
+-
+-      /* Initialize non-image-dependent parts of the state:
+-       */
+-      t->base.tObj = texObj;
+-      t->border_fallback = GL_FALSE;
+-
+-      make_empty_list( & t->base );
+-
+-      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
+-      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+-      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+-      r200SetTexBorderColor( t, texObj->_BorderChan );
+-   }
+-
+-   return t;
+-}
+-
+-/* try to find a format which will only need a memcopy */
+-static const struct gl_texture_format *
+-r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType )
+-{
+-   const GLuint ui = 1;
+-   const GLubyte littleEndian = *((const GLubyte *) &ui);
+-
+-   if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+-       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+-       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+-       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+-      return &_mesa_texformat_rgba8888;
+-   }
+-   else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+-       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+-       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+-       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+-      return &_mesa_texformat_rgba8888_rev;
+-   }
+-   else return _dri_texformat_argb8888;
+-}
+-
+-static const struct gl_texture_format *
+-r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+-                           GLenum format, GLenum type )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   const GLboolean do32bpt =
+-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+-   const GLboolean force16bpt =
+-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+-   (void) format;
+-
+-   switch ( internalFormat ) {
+-   case 4:
+-   case GL_RGBA:
+-   case GL_COMPRESSED_RGBA:
+-      switch ( type ) {
+-      case GL_UNSIGNED_INT_10_10_10_2:
+-      case GL_UNSIGNED_INT_2_10_10_10_REV:
+-	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
+-      case GL_UNSIGNED_SHORT_4_4_4_4:
+-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-	 return _dri_texformat_argb4444;
+-      case GL_UNSIGNED_SHORT_5_5_5_1:
+-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-	 return _dri_texformat_argb1555;
+-      default:
+-         return do32bpt ?
+-	    r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
+-      }
+-
+-   case 3:
+-   case GL_RGB:
+-   case GL_COMPRESSED_RGB:
+-      switch ( type ) {
+-      case GL_UNSIGNED_SHORT_4_4_4_4:
+-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-	 return _dri_texformat_argb4444;
+-      case GL_UNSIGNED_SHORT_5_5_5_1:
+-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-	 return _dri_texformat_argb1555;
+-      case GL_UNSIGNED_SHORT_5_6_5:
+-      case GL_UNSIGNED_SHORT_5_6_5_REV:
+-	 return _dri_texformat_rgb565;
+-      default:
+-         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-      }
+-
+-   case GL_RGBA8:
+-   case GL_RGB10_A2:
+-   case GL_RGBA12:
+-   case GL_RGBA16:
+-      return !force16bpt ?
+-	  r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444;
+-
+-   case GL_RGBA4:
+-   case GL_RGBA2:
+-      return _dri_texformat_argb4444;
+-
+-   case GL_RGB5_A1:
+-      return _dri_texformat_argb1555;
+-
+-   case GL_RGB8:
+-   case GL_RGB10:
+-   case GL_RGB12:
+-   case GL_RGB16:
+-      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-
+-   case GL_RGB5:
+-   case GL_RGB4:
+-   case GL_R3_G3_B2:
+-      return _dri_texformat_rgb565;
+-
+-   case GL_ALPHA:
+-   case GL_ALPHA4:
+-   case GL_ALPHA8:
+-   case GL_ALPHA12:
+-   case GL_ALPHA16:
+-   case GL_COMPRESSED_ALPHA:
+-   /* can't use a8 format since interpreting hw I8 as a8 would result
+-      in wrong rgb values (same as alpha value instead of 0). */
+-      return _dri_texformat_al88;
+-
+-   case 1:
+-   case GL_LUMINANCE:
+-   case GL_LUMINANCE4:
+-   case GL_LUMINANCE8:
+-   case GL_LUMINANCE12:
+-   case GL_LUMINANCE16:
+-   case GL_COMPRESSED_LUMINANCE:
+-      return _dri_texformat_l8;
+-
+-   case 2:
+-   case GL_LUMINANCE_ALPHA:
+-   case GL_LUMINANCE4_ALPHA4:
+-   case GL_LUMINANCE6_ALPHA2:
+-   case GL_LUMINANCE8_ALPHA8:
+-   case GL_LUMINANCE12_ALPHA4:
+-   case GL_LUMINANCE12_ALPHA12:
+-   case GL_LUMINANCE16_ALPHA16:
+-   case GL_COMPRESSED_LUMINANCE_ALPHA:
+-      return _dri_texformat_al88;
+-
+-   case GL_INTENSITY:
+-   case GL_INTENSITY4:
+-   case GL_INTENSITY8:
+-   case GL_INTENSITY12:
+-   case GL_INTENSITY16:
+-   case GL_COMPRESSED_INTENSITY:
+-       return _dri_texformat_i8;
+-
+-   case GL_YCBCR_MESA:
+-      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+-          type == GL_UNSIGNED_BYTE)
+-         return &_mesa_texformat_ycbcr;
+-      else
+-         return &_mesa_texformat_ycbcr_rev;
+-
+-   case GL_RGB_S3TC:
+-   case GL_RGB4_S3TC:
+-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+-      return &_mesa_texformat_rgb_dxt1;
+-
+-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+-      return &_mesa_texformat_rgba_dxt1;
+-
+-   case GL_RGBA_S3TC:
+-   case GL_RGBA4_S3TC:
+-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+-      return &_mesa_texformat_rgba_dxt3;
+-
+-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+-      return &_mesa_texformat_rgba_dxt5;
+-
+-   default:
+-      _mesa_problem(ctx,
+-         "unexpected internalFormat 0x%x in r200ChooseTextureFormat",
+-         (int) internalFormat);
+-      return NULL;
+-   }
+-
+-   return NULL; /* never get here */
+-}
+-
+-
+-static GLboolean
+-r200ValidateClientStorage( GLcontext *ctx, GLenum target,
+-			   GLint internalFormat,
+-			   GLint srcWidth, GLint srcHeight, 
+-                           GLenum format, GLenum type,  const void *pixels,
+-			   const struct gl_pixelstore_attrib *packing,
+-			   struct gl_texture_object *texObj,
+-			   struct gl_texture_image *texImage)
+-
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if ( R200_DEBUG & DEBUG_TEXTURE )
+-      fprintf(stderr, "intformat %s format %s type %s\n",
+-	      _mesa_lookup_enum_by_nr( internalFormat ),
+-	      _mesa_lookup_enum_by_nr( format ),
+-	      _mesa_lookup_enum_by_nr( type ));
+-
+-   if (!ctx->Unpack.ClientStorage)
+-      return 0;
+-
+-   if (ctx->_ImageTransferState ||
+-       texImage->IsCompressed ||
+-       texObj->GenerateMipmap)
+-      return 0;
+-
+-
+-   /* This list is incomplete, may be different on ppc???
+-    */
+-   switch ( internalFormat ) {
+-   case GL_RGBA:
+-      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+-	 texImage->TexFormat = _dri_texformat_argb8888;
+-      }
+-      else
+-	 return 0;
+-      break;
+-
+-   case GL_RGB:
+-      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+-	 texImage->TexFormat = _dri_texformat_rgb565;
+-      }
+-      else
+-	 return 0;
+-      break;
+-
+-   case GL_YCBCR_MESA:
+-      if ( format == GL_YCBCR_MESA && 
+-	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
+-	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+-      }
+-      else if ( format == GL_YCBCR_MESA && 
+-		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
+-		 type == GL_UNSIGNED_BYTE)) {
+-	 texImage->TexFormat = &_mesa_texformat_ycbcr;
+-      }
+-      else
+-	 return 0;
+-      break;
+-
+-   default:
+-      return 0;
+-   }
+-
+-   /* Could deal with these packing issues, but currently don't:
+-    */
+-   if (packing->SkipPixels || 
+-       packing->SkipRows || 
+-       packing->SwapBytes ||
+-       packing->LsbFirst) {
+-      return 0;
+-   }
+-
+-   {      
+-      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+-						  format, type);
+-
+-      
+-      if ( R200_DEBUG & DEBUG_TEXTURE )
+-	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
+-		 __FUNCTION__, srcRowStride, srcRowStride);
+-
+-      /* Could check this later in upload, pitch restrictions could be
+-       * relaxed, but would need to store the image pitch somewhere,
+-       * as packing details might change before image is uploaded:
+-       */
+-      if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) ||
+-	  (srcRowStride & 63))
+-	 return 0;
+-
+-
+-      /* Have validated that _mesa_transfer_teximage would be a straight
+-       * memcpy at this point.  NOTE: future calls to TexSubImage will
+-       * overwrite the client data.  This is explicitly mentioned in the
+-       * extension spec.
+-       */
+-      texImage->Data = (void *)pixels;
+-      texImage->IsClientData = GL_TRUE;
+-      texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+-
+-      return 1;
+-   }
+-}
+-
+-
+-static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint border,
+-                              GLenum format, GLenum type, const GLvoid *pixels,
+-                              const struct gl_pixelstore_attrib *packing,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+-         return;
+-      }
+-   }
+-
+-   /* Note, this will call ChooseTextureFormat */
+-   _mesa_store_teximage1d(ctx, target, level, internalFormat,
+-                          width, border, format, type, pixels,
+-                          &ctx->Unpack, texObj, texImage);
+-
+-   t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset,
+-                                 GLsizei width,
+-                                 GLenum format, GLenum type,
+-                                 const GLvoid *pixels,
+-                                 const struct gl_pixelstore_attrib *packing,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+-			     format, type, pixels, packing, texObj,
+-			     texImage);
+-
+-   t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint height, GLint border,
+-                              GLenum format, GLenum type, const GLvoid *pixels,
+-                              const struct gl_pixelstore_attrib *packing,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   if ( t != NULL ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+-         return;
+-      }
+-   }
+-
+-   texImage->IsClientData = GL_FALSE;
+-
+-   if (r200ValidateClientStorage( ctx, target, 
+-				  internalFormat, 
+-				  width, height, 
+-				  format, type, pixels, 
+-				  packing, texObj, texImage)) {
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
+-   }
+-   else {
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
+-
+-      /* Normal path: copy (to cached memory) and eventually upload
+-       * via another copy to GART memory and then a blit...  Could
+-       * eliminate one copy by going straight to (permanent) GART.
+-       *
+-       * Note, this will call r200ChooseTextureFormat.
+-       */
+-      _mesa_store_teximage2d(ctx, target, level, internalFormat,
+-			     width, height, border, format, type, pixels,
+-			     &ctx->Unpack, texObj, texImage);
+-      
+-      t->dirty_images[face] |= (1 << level);
+-   }
+-}
+-
+-
+-static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset, GLint yoffset,
+-                                 GLsizei width, GLsizei height,
+-                                 GLenum format, GLenum type,
+-                                 const GLvoid *pixels,
+-                                 const struct gl_pixelstore_attrib *packing,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+-			     height, format, type, pixels, packing, texObj,
+-			     texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint height, GLint border,
+-                              GLsizei imageSize, const GLvoid *data,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
++static void r200SetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
+ {
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   if ( t != NULL ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+-         return;
+-      }
+-   }
+-
+-   texImage->IsClientData = GL_FALSE;
+-/* can't call this, different parameters. Would never evaluate to true anyway currently
+-   if (r200ValidateClientStorage( ctx, target, 
+-				  internalFormat,
+-				  width, height,
+-				  format, type, pixels,
+-				  packing, texObj, texImage)) {
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__);
+-   }
+-   else */{
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__);
+-
+-      /* Normal path: copy (to cached memory) and eventually upload
+-       * via another copy to GART memory and then a blit...  Could
+-       * eliminate one copy by going straight to (permanent) GART.
+-       *
+-       * Note, this will call r200ChooseTextureFormat.
+-       */
+-      _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+-                                 height, border, imageSize, data, texObj, texImage);
+-
+-      t->dirty_images[face] |= (1 << level);
+-   }
+-}
+-
+-
+-static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset, GLint yoffset,
+-                                 GLsizei width, GLsizei height,
+-                                 GLenum format,
+-                                 GLsizei imageSize, const GLvoid *data,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+-                            height, format, imageSize, data, texObj, texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-#if ENABLE_HW_3D_TEXTURE
+-static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level,
+-                            GLint internalFormat,
+-                            GLint width, GLint height, GLint depth,
+-                            GLint border,
+-                            GLenum format, GLenum type, const GLvoid *pixels,
+-                            const struct gl_pixelstore_attrib *packing,
+-                            struct gl_texture_object *texObj,
+-                            struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+-         return;
+-      }
+-   }
+-
+-   texImage->IsClientData = GL_FALSE;
+-
+-#if 0
+-   if (r200ValidateClientStorage( ctx, target, 
+-				  internalFormat, 
+-				  width, height, 
+-				  format, type, pixels, 
+-				  packing, texObj, texImage)) {
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
+-   }
+-   else
+-#endif
+-   {
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
+-
+-      /* Normal path: copy (to cached memory) and eventually upload
+-       * via another copy to GART memory and then a blit...  Could
+-       * eliminate one copy by going straight to (permanent) GART.
+-       *
+-       * Note, this will call r200ChooseTextureFormat.
+-       */
+-      _mesa_store_teximage3d(ctx, target, level, internalFormat,
+-			     width, height, depth, border,
+-                             format, type, pixels,
+-			     &ctx->Unpack, texObj, texImage);
+-      
+-      t->dirty_images[0] |= (1 << level);
+-   }
++   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+ }
+-#endif
+-
+ 
+-#if ENABLE_HW_3D_TEXTURE
+-static void
+-r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
+-                   GLint xoffset, GLint yoffset, GLint zoffset,
+-                   GLsizei width, GLsizei height, GLsizei depth,
+-                   GLenum format, GLenum type,
+-                   const GLvoid *pixels,
+-                   const struct gl_pixelstore_attrib *packing,
+-                   struct gl_texture_object *texObj,
+-                   struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) r200AllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+-         return;
+-      }
+-      texObj->DriverData = t;
+-   }
+ 
+-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+-                             width, height, depth,
+-                             format, type, pixels, packing, texObj, texImage);
+-
+-   t->dirty_images[0] |= (1 << level);
+-}
+-#endif
+ 
+ 
+ 
+@@ -978,7 +300,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
+       GLubyte c[4];
+       GLuint envColor;
+       UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+-      envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] );
++      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+       if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
+ 	 R200_STATECHANGE( rmesa, tf );
+ 	 rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
+@@ -997,7 +319,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target,
+        * NOTE: Add a small bias to the bias for conform mipsel.c test.
+        */
+       bias = *param + .01;
+-      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
++      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+ 	  0.0 : -16.0;
+       bias = CLAMP( bias, min, 16.0 );
+       b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK;
+@@ -1034,7 +356,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
+ 				struct gl_texture_object *texObj,
+ 				GLenum pname, const GLfloat *params )
+ {
+-   r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData;
++   radeonTexObj* t = radeon_tex_obj(texObj);
+ 
+    if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+@@ -1068,59 +390,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target,
+        * we just have to rely on loading the right subset of mipmap levels
+        * to simulate a clamped LOD.
+        */
+-      driSwapOutTextureObject( (driTextureObject *) t );
++      if (t->mt) {
++         radeon_miptree_unreference(t->mt);
++	 t->mt = 0;
++	 t->validated = GL_FALSE;
++      }
+       break;
+ 
+    default:
+       return;
+    }
+-
+-   /* Mark this texobj as dirty (one bit per tex unit)
+-    */
+-   t->dirty_state = TEX_ALL;
+ }
+ 
+ 
+-
+-static void r200BindTexture( GLcontext *ctx, GLenum target,
+-			       struct gl_texture_object *texObj )
+-{
+-   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+-      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
+-	       ctx->Texture.CurrentUnit );
+-   }
+-
+-   if ( (target == GL_TEXTURE_1D)
+-	|| (target == GL_TEXTURE_2D) 
+-#if ENABLE_HW_3D_TEXTURE
+-	|| (target == GL_TEXTURE_3D)
+-#endif
+-	|| (target == GL_TEXTURE_CUBE_MAP)
+-	|| (target == GL_TEXTURE_RECTANGLE_NV) ) {
+-      assert( texObj->DriverData != NULL );
+-   }
+-}
+-
+-
+-static void r200DeleteTexture( GLcontext *ctx,
+-				 struct gl_texture_object *texObj )
++static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+-      fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+-	       _mesa_lookup_enum_by_nr( texObj->Target ) );
++   radeonTexObj* t = radeon_tex_obj(texObj);
++
++   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++      fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++	      (void *)texObj,
++	      _mesa_lookup_enum_by_nr(texObj->Target));
++   }
++   
++   if (rmesa) {
++      int i;
++      radeon_firevertices(&rmesa->radeon);
++      for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
++	 if ( t == rmesa->state.texture.unit[i].texobj ) {
++	    rmesa->state.texture.unit[i].texobj = NULL;
++	    rmesa->hw.tex[i].dirty = GL_FALSE;
++	    rmesa->hw.cube[i].dirty = GL_FALSE;
++	 }
++      }      
+    }
+-
+-   if ( t != NULL ) {
+-      if ( rmesa ) {
+-         R200_FIREVERTICES( rmesa );
+-      }
+-
+-      driDestroyTextureObject( t );
++   
++   if (t->mt) {
++      radeon_miptree_unreference(t->mt);
++      t->mt = 0;
+    }
+-   /* Free mipmap images and the texture object itself */
+    _mesa_delete_texture_object(ctx, texObj);
+ }
+ 
+@@ -1150,46 +459,59 @@ static void r200TexGen( GLcontext *ctx,
+  * Called via ctx->Driver.NewTextureObject.
+  * Note: this function will be called during context creation to
+  * allocate the default texture objects.
+- * Note: we could use containment here to 'derive' the driver-specific
+- * texture object from the core mesa gl_texture_object.  Not done at this time.
+  * Fixup MaxAnisotropy according to user preference.
+  */
+-static struct gl_texture_object *
+-r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
++static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
++						      GLuint name,
++						      GLenum target)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_object *obj;
+-   obj = _mesa_new_texture_object(ctx, name, target);
+-   if (!obj)
+-      return NULL;
+-   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+-   r200AllocTexObj( obj );
+-   return obj;
++   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
++
++   if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++     fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++	     t, _mesa_lookup_enum_by_nr(target));
++   }
++
++   _mesa_initialize_texture_object(&t->base, name, target);
++   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++   /* Initialize hardware state */
++   r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
++   r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
++   r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
++   r200SetTexBorderColor(t, t->base._BorderChan);
++
++   return &t->base;
+ }
+ 
+ 
++
+ void r200InitTextureFuncs( struct dd_function_table *functions )
+ {
+    /* Note: we only plug in the functions we implement in the driver
+     * since _mesa_init_driver_functions() was already called.
+     */
+-   functions->ChooseTextureFormat	= r200ChooseTextureFormat;
+-   functions->TexImage1D		= r200TexImage1D;
+-   functions->TexImage2D		= r200TexImage2D;
++   functions->ChooseTextureFormat	= radeonChooseTextureFormat;
++   functions->TexImage1D		= radeonTexImage1D;
++   functions->TexImage2D		= radeonTexImage2D;
+ #if ENABLE_HW_3D_TEXTURE
+-   functions->TexImage3D		= r200TexImage3D;
++   functions->TexImage3D		= radeonTexImage3D;
+ #else
+    functions->TexImage3D		= _mesa_store_teximage3d;
+ #endif
+-   functions->TexSubImage1D		= r200TexSubImage1D;
+-   functions->TexSubImage2D		= r200TexSubImage2D;
++   functions->TexSubImage1D		= radeonTexSubImage1D;
++   functions->TexSubImage2D		= radeonTexSubImage2D;
+ #if ENABLE_HW_3D_TEXTURE
+-   functions->TexSubImage3D		= r200TexSubImage3D;
++   functions->TexSubImage3D		= radeonTexSubImage3D;
+ #else
+    functions->TexSubImage3D		= _mesa_store_texsubimage3d;
+ #endif
++   functions->GetTexImage               = radeonGetTexImage;
++   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
+    functions->NewTextureObject		= r200NewTextureObject;
+-   functions->BindTexture		= r200BindTexture;
++   //   functions->BindTexture		= r200BindTexture;
+    functions->DeleteTexture		= r200DeleteTexture;
+    functions->IsTextureResident		= driIsTextureResident;
+ 
+@@ -1197,22 +519,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions )
+    functions->TexParameter		= r200TexParameter;
+    functions->TexGen			= r200TexGen;
+ 
+-   functions->CompressedTexImage2D	= r200CompressedTexImage2D;
+-   functions->CompressedTexSubImage2D	= r200CompressedTexSubImage2D;
++   functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
++   functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
++
++   functions->GenerateMipmap = radeonGenerateMipmap;
++
++   functions->NewTextureImage = radeonNewTextureImage;
++   functions->FreeTexImageData = radeonFreeTexImageData;
++   functions->MapTexture = radeonMapTexture;
++   functions->UnmapTexture = radeonUnmapTexture;
+ 
+    driInitTextureFormats();
+ 
+-#if 000
+-   /* moved or obsolete code */
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   driInitTextureObjects( ctx, & rmesa->swapped,
+-			  DRI_TEXMGR_DO_TEXTURE_1D
+-			  | DRI_TEXMGR_DO_TEXTURE_2D );
+-
+-   /* Hack: r200NewTextureObject is not yet installed when the
+-    * default textures are created. Therefore set MaxAnisotropy of the
+-    * default 2D texture now. */
+-   ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache,
+-							    "def_max_anisotropy");
+-#endif
+ }
+diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
+index 10ff8e8..55592ed 100644
+--- a/src/mesa/drivers/dri/r200/r200_tex.h
++++ b/src/mesa/drivers/dri/r200/r200_tex.h
+@@ -41,9 +41,9 @@ extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ 
+ extern void r200UpdateTextureState( GLcontext *ctx );
+ 
+-extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face );
++extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
+ 
+-extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t );
++extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
+ 
+ extern void r200InitTextureFuncs( struct dd_function_table *functions );
+ 
+diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
+deleted file mode 100644
+index 3b81ac0..0000000
+--- a/src/mesa/drivers/dri/r200/r200_texmem.c
++++ /dev/null
+@@ -1,530 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.  
+-The Weather Channel, Inc. funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86
+-license. This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Kevin E. Martin <martin@valinux.com>
+- *   Gareth Hughes <gareth@valinux.com>
+- *
+- */
+- 
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/colormac.h"
+-#include "main/macros.h"
+-#include "r200_context.h"
+-#include "r200_ioctl.h"
+-#include "r200_tex.h"
+-#include "radeon_reg.h"
+-
+-#include <unistd.h>  /* for usleep() */
+-
+-
+-/**
+- * Destroy any device-dependent state associated with the texture.  This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void
+-r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t )
+-{
+-   if ( R200_DEBUG & DEBUG_TEXTURE ) {
+-      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, 
+-	       (void *)t, (void *)t->base.tObj );
+-   }
+-
+-   if ( rmesa != NULL ) {
+-      unsigned   i;
+-
+-
+-      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+-	 if ( t == rmesa->state.texture.unit[i].texobj ) {
+-	    rmesa->state.texture.unit[i].texobj = NULL;
+-	    rmesa->hw.tex[i].dirty = GL_FALSE;
+-	    rmesa->hw.cube[i].dirty = GL_FALSE;
+-	 }
+-      }
+-   }
+-}
+-
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-
+-static void r200UploadGARTClientSubImage( r200ContextPtr rmesa,
+-					  r200TexObjPtr t, 
+-					  struct gl_texture_image *texImage,
+-					  GLint hwlevel,
+-					  GLint x, GLint y, 
+-					  GLint width, GLint height )
+-{
+-   const struct gl_texture_format *texFormat = texImage->TexFormat;
+-   GLuint srcPitch, dstPitch;
+-   int blit_format;
+-   int srcOffset;
+-
+-   /*
+-    * XXX it appears that we always upload the full image, not a subimage.
+-    * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
+-    * changed, the src pitch will have to change.
+-    */
+-   switch ( texFormat->TexelBytes ) {
+-   case 1:
+-      blit_format = R200_CP_COLOR_FORMAT_CI8;
+-      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      break;
+-   case 2:
+-      blit_format = R200_CP_COLOR_FORMAT_RGB565;
+-      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      break;
+-   case 4:
+-      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+-      srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-      break;
+-   default:
+-      return;
+-   }
+-
+-   t->image[0][hwlevel].data = texImage->Data;
+-   srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
+-
+-   assert( srcOffset != ~0 );
+-
+-   /* Don't currently need to cope with small pitches?
+-    */
+-   width = texImage->Width;
+-   height = texImage->Height;
+-
+-   r200EmitWait( rmesa, RADEON_WAIT_3D );
+-
+-   r200EmitBlit( rmesa, blit_format, 
+-		 srcPitch,  
+-		 srcOffset,   
+-		 dstPitch,
+-		 t->bufAddr,
+-		 x, 
+-		 y, 
+-		 t->image[0][hwlevel].x + x,
+-		 t->image[0][hwlevel].y + y, 
+-		 width,
+-		 height );
+-
+-   r200EmitWait( rmesa, RADEON_WAIT_2D );
+-}
+-
+-static void r200UploadRectSubImage( r200ContextPtr rmesa,
+-				    r200TexObjPtr t, 
+-				    struct gl_texture_image *texImage,
+-				    GLint x, GLint y, 
+-				    GLint width, GLint height )
+-{
+-   const struct gl_texture_format *texFormat = texImage->TexFormat;
+-   int blit_format, dstPitch, done;
+-
+-   switch ( texFormat->TexelBytes ) {
+-   case 1:
+-      blit_format = R200_CP_COLOR_FORMAT_CI8;
+-      break;
+-   case 2:
+-      blit_format = R200_CP_COLOR_FORMAT_RGB565;
+-      break;
+-   case 4:
+-      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
+-      break;
+-   default:
+-      return;
+-   }
+-
+-   t->image[0][0].data = texImage->Data;
+-
+-   /* Currently don't need to cope with small pitches.
+-    */
+-   width = texImage->Width;
+-   height = texImage->Height;
+-   dstPitch = t->pp_txpitch + 32;
+-
+-   if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+-      /* In this case, could also use GART texturing.  This is
+-       * currently disabled, but has been tested & works.
+-       */
+-      if ( !t->image_override )
+-         t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data );
+-      t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32;
+-
+-      if (R200_DEBUG & DEBUG_TEXTURE)
+-	 fprintf(stderr, 
+-		 "Using GART texturing for rectangular client texture\n");
+-
+-      /* Release FB memory allocated for this image:
+-       */
+-      /* FIXME This may not be correct as driSwapOutTextureObject sets
+-       * FIXME dirty_images.  It may be fine, though.
+-       */
+-      if ( t->base.memBlock ) {
+-	 driSwapOutTextureObject( (driTextureObject *) t );
+-      }
+-   }
+-   else if (texImage->IsClientData) {
+-      /* Data already in GART memory, with usable pitch.
+-       */
+-      GLuint srcPitch;
+-      srcPitch = texImage->RowStride * texFormat->TexelBytes;
+-      r200EmitBlit( rmesa, 
+-		    blit_format, 
+-		    srcPitch,
+-		    r200GartOffsetFromVirtual( rmesa, texImage->Data ),   
+-		    dstPitch, t->bufAddr,
+-		    0, 0, 
+-		    0, 0, 
+-		    width, height );
+-   }
+-   else {
+-      /* Data not in GART memory, or bad pitch.
+-       */
+-      for (done = 0; done < height ; ) {
+-	 struct r200_dma_region region;
+-	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+-	 int src_pitch;
+-	 char *tex;
+-
+-         src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+-	 tex = (char *)texImage->Data + done * src_pitch;
+-
+-	 memset(&region, 0, sizeof(region));
+-	 r200AllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
+-
+-	 /* Copy texdata to dma:
+-	  */
+-	 if (0)
+-	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+-		    __FUNCTION__, src_pitch, dstPitch);
+-
+-	 if (src_pitch == dstPitch) {
+-	    memcpy( region.address + region.start, tex, lines * src_pitch );
+-	 } 
+-	 else {
+-	    char *buf = region.address + region.start;
+-	    int i;
+-	    for (i = 0 ; i < lines ; i++) {
+-	       memcpy( buf, tex, src_pitch );
+-	       buf += dstPitch;
+-	       tex += src_pitch;
+-	    }
+-	 }
+-
+-	 r200EmitWait( rmesa, RADEON_WAIT_3D );
+-
+-	 /* Blit to framebuffer
+-	  */
+-	 r200EmitBlit( rmesa,
+-		       blit_format,
+-		       dstPitch, GET_START( &region ),
+-		       dstPitch | (t->tile_bits >> 16),
+-		       t->bufAddr,
+-		       0, 0,
+-		       0, done,
+-		       width, lines );
+-	 
+-	 r200EmitWait( rmesa, RADEON_WAIT_2D );
+-
+-	 r200ReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+-	 done += lines;
+-      }
+-   }
+-}
+-
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, 
+-			    GLint hwlevel,
+-			    GLint x, GLint y, GLint width, GLint height,
+-			    GLuint face )
+-{
+-   struct gl_texture_image *texImage = NULL;
+-   GLuint offset;
+-   GLint imageWidth, imageHeight;
+-   GLint ret;
+-   drm_radeon_texture_t tex;
+-   drm_radeon_tex_image_t tmp;
+-   const int level = hwlevel + t->base.firstLevel;
+-
+-   if ( R200_DEBUG & DEBUG_TEXTURE ) {
+-      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
+-	       __FUNCTION__, (void *)t, (void *)t->base.tObj,
+-	       level, width, height, face );
+-   }
+-
+-   ASSERT(face < 6);
+-
+-   /* Ensure we have a valid texture to upload */
+-   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+-      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+-      return;
+-   }
+-
+-   texImage = t->base.tObj->Image[face][level];
+-
+-   if ( !texImage ) {
+-      if ( R200_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+-      return;
+-   }
+-   if ( !texImage->Data ) {
+-      if ( R200_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+-      return;
+-   }
+-
+-
+-   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-      assert(level == 0);
+-      assert(hwlevel == 0);
+-      if ( R200_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+-      r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+-      return;
+-   }
+-   else if (texImage->IsClientData) {
+-      if ( R200_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: image data is in GART client storage\n",
+-		  __FUNCTION__);
+-      r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel,
+-				   x, y, width, height );
+-      return;
+-   }
+-   else if ( R200_DEBUG & DEBUG_TEXTURE )
+-      fprintf( stderr, "%s: image data is in normal memory\n",
+-	       __FUNCTION__);
+-      
+-
+-   imageWidth = texImage->Width;
+-   imageHeight = texImage->Height;
+-
+-   offset = t->bufAddr + t->base.totalSize / 6 * face;
+-
+-   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+-      GLint imageX = 0;
+-      GLint imageY = 0;
+-      GLint blitX = t->image[face][hwlevel].x;
+-      GLint blitY = t->image[face][hwlevel].y;
+-      GLint blitWidth = t->image[face][hwlevel].width;
+-      GLint blitHeight = t->image[face][hwlevel].height;
+-      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
+-	       imageWidth, imageHeight, imageX, imageY );
+-      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
+-	       blitWidth, blitHeight, blitX, blitY );
+-      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+-	       (GLuint)offset, hwlevel, level );
+-   }
+-
+-   t->image[face][hwlevel].data = texImage->Data;
+-
+-   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+-    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+-    * We used to use 1, 2 and 4-byte texels and used to use the texture
+-    * width to dictate the blit width - but that won't work for compressed
+-    * textures. (Brian)
+-    * NOTE: can't do that with texture tiling. (sroland)
+-    */
+-   tex.offset = offset;
+-   tex.image = &tmp;
+-   /* copy (x,y,width,height,data) */
+-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
+-   
+-   if (texImage->TexFormat->TexelBytes) {
+-      /* use multi-byte upload scheme */
+-      tex.height = imageHeight;
+-      tex.width = imageWidth;
+-      tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
+-      if (tex.format == R200_TXFORMAT_ABGR8888) {
+-	 /* drm will refuse abgr8888 textures. */
+-	 tex.format = R200_TXFORMAT_ARGB8888;
+-      }
+-      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+-      tex.offset += tmp.x & ~1023;
+-      tmp.x = tmp.x % 1024;
+-      if (t->tile_bits & R200_TXO_MICRO_TILE) {
+-	 /* need something like "tiled coordinates" ? */
+-	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
+-	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+-	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+-      }
+-      else {
+-	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+-      }
+-      if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
+-	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
+-	 ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
+-	    (texImage->Height >= 16))) {
+-	 /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+-	    OR if height is smaller than 8 automatically, but if micro tiling is active
+-	    the limit is height 16 instead ? */
+-	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+-      }
+-   }
+-   else {
+-      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
+-         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+-      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
+-         so the kernel module reads the right amount of data. */
+-      tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+-      tex.pitch = (BLIT_WIDTH_BYTES / 64);
+-      tex.height = (imageHeight + 3) / 4;
+-      tex.width = (imageWidth + 3) / 4;
+-      switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
+-      case R200_TXFORMAT_DXT1:
+-           tex.width *= 8;
+-           break;
+-      case R200_TXFORMAT_DXT23:
+-      case R200_TXFORMAT_DXT45:
+-           tex.width *= 16;
+-           break;
+-      default:
+-          fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
+-      }
+-   }
+-
+-   LOCK_HARDWARE( rmesa );
+-   do {
+-      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+-                                 &tex, sizeof(drm_radeon_texture_t) );
+-      if (ret) {
+-	 if (R200_DEBUG & DEBUG_IOCTL)
+-	    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+-	 usleep(1);
+-      }
+-   } while ( ret == -EAGAIN );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if ( ret ) {
+-      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+-      fprintf( stderr, "   offset=0x%08x\n",
+-	       offset );
+-      fprintf( stderr, "   image width=%d height=%d\n",
+-	       imageWidth, imageHeight );
+-      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
+-	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+-	       t->image[face][hwlevel].data );
+-      exit( 1 );
+-   }
+-}
+-
+-
+-/**
+- * Upload the texture images associated with texture \a t.  This might
+- * require the allocation of texture memory.
+- * 
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
+- */
+-
+-int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
+-{
+-   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-   if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+-      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+-	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
+-	       t->base.firstLevel, t->base.lastLevel );
+-   }
+-
+-   if ( !t || t->base.totalSize == 0 || t->image_override )
+-      return 0;
+-
+-   if (R200_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+-      r200Finish( rmesa->glCtx );
+-   }
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-   if ( t->base.memBlock == NULL ) {
+-      int heap;
+-
+-      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+-				 (driTextureObject *) t );
+-      if ( heap == -1 ) {
+-	 UNLOCK_HARDWARE( rmesa );
+-	 return -1;
+-      }
+-
+-      /* Set the base offset of the texture image */
+-      t->bufAddr = rmesa->r200Screen->texOffset[heap] 
+-	   + t->base.memBlock->ofs;
+-      t->pp_txoffset = t->bufAddr;
+-       
+-      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+-	 /* hope it's safe to add that here... */
+-	 t->pp_txoffset |= t->tile_bits;
+-      }
+-
+-      /* Mark this texobj as dirty on all units:
+-       */
+-      t->dirty_state = TEX_ALL;
+-   }
+-
+-   /* Let the world know we've used this memory recently.
+-    */
+-   driUpdateTextureLRU( (driTextureObject *) t );
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   /* Upload any images that are new */
+-   if (t->base.dirty_images[face]) {
+-      int i;
+-      for ( i = 0 ; i < numLevels ; i++ ) {
+-         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+-            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+-			    t->image[face][i].height, face );
+-         }
+-      }
+-      t->base.dirty_images[face] = 0;
+-   }
+-
+-
+-   if (R200_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+-      r200Finish( rmesa->glCtx );
+-   }
+-
+-   return 0;
+-}
+diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
+index 3f9a2f4..6432068 100644
+--- a/src/mesa/drivers/dri/r200/r200_texstate.c
++++ b/src/mesa/drivers/dri/r200/r200_texstate.c
+@@ -40,6 +40,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/texobj.h"
+ #include "main/enums.h"
+ 
++#include "radeon_common.h"
++#include "radeon_mipmap_tree.h"
+ #include "r200_context.h"
+ #include "r200_state.h"
+ #include "r200_ioctl.h"
+@@ -139,257 +141,6 @@ static const struct tx_table tx_table_le[] =
+ #undef _ALPHA
+ #undef _INVALID
+ 
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
+- * too.
+- * 
+- * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- *                 hardware state.
+- */
+-static void r200SetTexImages( r200ContextPtr rmesa,
+-			      struct gl_texture_object *tObj )
+-{
+-   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
+-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+-   GLint curOffset, blitWidth;
+-   GLint i, texelBytes;
+-   GLint numLevels;
+-   GLint log2Width, log2Height, log2Depth;
+-
+-   /* Set the hardware texture format
+-    */
+-   if ( !t->image_override ) {
+-      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+-	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+-								tx_table_be;
+-
+-         t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+-                             R200_TXFORMAT_ALPHA_IN_MAP);
+-         t->pp_txfilter &= ~R200_YUV_TO_RGB;
+-
+-	 t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format;
+-	 t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter;
+-      }
+-      else {
+-         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+-         return;
+-      }
+-   }
+-
+-   texelBytes = baseImage->TexFormat->TexelBytes;
+-
+-   /* Compute which mipmap levels we really want to send to the hardware.
+-    */
+-
+-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+-   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+-   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+-   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+-
+-   /* Calculate mipmap offsets and dimensions for blitting (uploading)
+-    * The idea is that we lay out the mipmap levels within a block of
+-    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+-    */
+-   curOffset = 0;
+-   blitWidth = BLIT_WIDTH_BYTES;
+-   t->tile_bits = 0;
+-
+-   /* figure out if this texture is suitable for tiling. */
+-   if (texelBytes) {
+-      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+-      /* texrect might be able to use micro tiling too in theory? */
+-	 (baseImage->Height > 1)) {
+-	 /* allow 32 (bytes) x 1 mip (which will use two times the space
+-	 the non-tiled version would use) max if base texture is large enough */
+-	 if ((numLevels == 1) ||
+-	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+-	       (baseImage->Width * texelBytes > 64)) ||
+-	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+-	    t->tile_bits |= R200_TXO_MICRO_TILE;
+-	 }
+-      }
+-      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+-	 /* we can set macro tiling even for small textures, they will be untiled anyway */
+-	 t->tile_bits |= R200_TXO_MACRO_TILE;
+-      }
+-   }
+-
+-   for (i = 0; i < numLevels; i++) {
+-      const struct gl_texture_image *texImage;
+-      GLuint size;
+-
+-      texImage = tObj->Image[0][i + t->base.firstLevel];
+-      if ( !texImage )
+-	 break;
+-
+-      /* find image size in bytes */
+-      if (texImage->IsCompressed) {
+-      /* need to calculate the size AFTER padding even though the texture is
+-         submitted without padding.
+-         Only handle pot textures currently - don't know if npot is even possible,
+-         size calculation would certainly need (trivial) adjustments.
+-         Align (and later pad) to 32byte, not sure what that 64byte blit width is
+-         good for? */
+-         if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
+-            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+-            if ((texImage->Width + 3) < 8) /* width one block */
+-               size = texImage->CompressedSize * 4;
+-            else if ((texImage->Width + 3) < 16)
+-               size = texImage->CompressedSize * 2;
+-            else size = texImage->CompressedSize;
+-         }
+-         else /* DXT3/5, 16 bytes per block */
+-            if ((texImage->Width + 3) < 8)
+-               size = texImage->CompressedSize * 2;
+-            else size = texImage->CompressedSize;
+-      }
+-      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+-      }
+-      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
+-	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+-	    though the actual offset may be different (if texture is less than
+-	    32 bytes width) to the untiled case */
+-	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+-	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-      }
+-      else {
+-	 int w = (texImage->Width * texelBytes + 31) & ~31;
+-	 size = w * texImage->Height * texImage->Depth;
+-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-      }
+-      assert(size > 0);
+-
+-      /* Align to 32-byte offset.  It is faster to do this unconditionally
+-       * (no branch penalty).
+-       */
+-
+-      curOffset = (curOffset + 0x1f) & ~0x1f;
+-
+-      if (texelBytes) {
+-	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+-	 t->image[0][i].y = 0;
+-	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+-	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+-      }
+-      else {
+-         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+-         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+-         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+-         t->image[0][i].height = size / t->image[0][i].width;     
+-      }
+-
+-#if 0
+-      /* for debugging only and only  applicable to non-rectangle targets */
+-      assert(size % t->image[0][i].width == 0);
+-      assert(t->image[0][i].x == 0
+-             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+-#endif
+-
+-      if (0)
+-         fprintf(stderr,
+-                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+-                 i, texImage->Width, texImage->Height,
+-                 t->image[0][i].x, t->image[0][i].y,
+-                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
+-
+-      curOffset += size;
+-
+-   }
+-
+-   /* Align the total size of texture memory block.
+-    */
+-   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+-   /* Setup remaining cube face blits, if needed */
+-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-      const GLuint faceSize = t->base.totalSize;
+-      GLuint face;
+-      /* reuse face 0 x/y/width/height - just update the offset when uploading */
+-      for (face = 1; face < 6; face++) {
+-         for (i = 0; i < numLevels; i++) {
+-            t->image[face][i].x =  t->image[0][i].x;
+-            t->image[face][i].y =  t->image[0][i].y;
+-            t->image[face][i].width  = t->image[0][i].width;
+-            t->image[face][i].height = t->image[0][i].height;
+-         }
+-      }
+-      t->base.totalSize = 6 * faceSize; /* total texmem needed */
+-   }
+-
+-
+-   /* Hardware state:
+-    */
+-   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+-   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
+-
+-   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+-		       R200_TXFORMAT_HEIGHT_MASK |
+-                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
+-                       R200_TXFORMAT_F5_WIDTH_MASK |
+-                       R200_TXFORMAT_F5_HEIGHT_MASK);
+-   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
+-		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
+-
+-   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
+-   if (tObj->Target == GL_TEXTURE_3D) {
+-      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+-      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+-   }
+-   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-      ASSERT(log2Width == log2Height);
+-      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+-                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+-/* don't think we need this bit, if it exists at all - fglrx does not set it */
+-                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+-      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+-      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+-                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+-                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+-                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+-                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+-                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+-                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+-                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+-   }
+-   else {
+-      /* If we don't in fact send enough texture coordinates, q will be 1,
+-       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+-       */
+-      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+-   }
+-
+-   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
+-                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
+-
+-   /* Only need to round to nearest 32 for textures, but the blitter
+-    * requires 64-byte aligned pitches, and we may/may not need the
+-    * blitter.   NPOT only!
+-    */
+-   if ( !t->image_override ) {
+-      if (baseImage->IsCompressed)
+-         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+-      else
+-         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
+-      t->pp_txpitch -= 32;
+-   }
+-
+-   t->dirty_state = TEX_ALL;
+-
+-   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
+-}
+-
+-
+-
+ /* ================================================================
+  * Texture combine functions
+  */
+@@ -981,20 +732,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ {
+ 	r200ContextPtr rmesa = pDRICtx->driverPrivate;
+ 	struct gl_texture_object *tObj =
+-	    _mesa_lookup_texture(rmesa->glCtx, texname);
+-	r200TexObjPtr t;
++	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
++	radeonTexObjPtr t = radeon_tex_obj(tObj);
+ 
+ 	if (!tObj)
+ 		return;
+ 
+-	t = (r200TexObjPtr) tObj->DriverData;
+-
+ 	t->image_override = GL_TRUE;
+ 
+ 	if (!offset)
+ 		return;
+ 
+-	t->pp_txoffset = offset;
++	t->bo = NULL;
++	t->override_offset = offset;
+ 	t->pp_txpitch = pitch - 32;
+ 
+ 	switch (depth) {
+@@ -1207,12 +957,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
+                                 R200_VOLUME_FILTER_MASK)
+ 
+ 
++static void disable_tex_obj_state( r200ContextPtr rmesa, 
++				   int unit )
++{
++   
++   R200_STATECHANGE( rmesa, vtx );
++   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
++
++   if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
++      TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
++   }
++
++   /* Actually want to keep all units less than max active texture
++    * enabled, right?  Fix this for >2 texunits.
++    */
++
++   {
++      GLuint tmp = rmesa->TexGenEnabled;
++
++      rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
++      rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
++      rmesa->TexGenNeedNormals[unit] = GL_FALSE;
++      rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
++
++      if (tmp != rmesa->TexGenEnabled) {
++	 rmesa->recheck_texgen[unit] = GL_TRUE;
++	 rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
++      }
++   }
++}
+ static void import_tex_obj_state( r200ContextPtr rmesa,
+ 				  int unit,
+-				  r200TexObjPtr texobj )
++				  radeonTexObjPtr texobj )
+ {
+ /* do not use RADEON_DB_STATE to avoid stale texture caches */
+-   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+ 
+    R200_STATECHANGE( rmesa, tex[unit] );
+ 
+@@ -1225,36 +1004,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa,
+    cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
+    cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
+    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+-   if (rmesa->r200Screen->drmSupportsFragShader) {
+-      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
+-   }
+-   else {
+-      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
+-   }
+ 
+-   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-      int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+-      GLuint bytesPerFace = texobj->base.totalSize / 6;
+-      ASSERT(texobj->base.totalSize % 6 == 0);
++   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
++      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+ 
+       R200_STATECHANGE( rmesa, cube[unit] );
+       cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+-      if (rmesa->r200Screen->drmSupportsFragShader) {
++      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+ 	 /* that value is submitted twice. could change cube atom
+ 	    to not include that command when new drm is used */
+ 	 cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+       }
+-      cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
+-      cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
+-      cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
+-      cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
+-      cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
+    }
+ 
+-   texobj->dirty_state &= ~(1<<unit);
+ }
+ 
+-
+ static void set_texgen_matrix( r200ContextPtr rmesa, 
+ 			       GLuint unit,
+ 			       const GLfloat *s_plane,
+@@ -1377,7 +1141,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+    } else {
+       tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
+    }
+-
+    if (texUnit->TexGenEnabled & R_BIT) {
+       if (texUnit->GenModeR != mode)
+ 	 mixed_fallback = GL_TRUE;
+@@ -1513,52 +1276,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+    return GL_TRUE;
+ }
+ 
+-
+-static void disable_tex( GLcontext *ctx, int unit )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-
+-   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
+-      /* Texture unit disabled */
+-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+-	 /* The old texture is no longer bound to this texture unit.
+-	  * Mark it as such.
+-	  */
+-
+-	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+-	 rmesa->state.texture.unit[unit].texobj = NULL;
+-      }
+-
+-      R200_STATECHANGE( rmesa, ctx );
+-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
+-	 
+-      R200_STATECHANGE( rmesa, vtx );
+-      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+-	 
+-      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+-	 TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+-      }
+-
+-      /* Actually want to keep all units less than max active texture
+-       * enabled, right?  Fix this for >2 texunits.
+-       */
+-
+-      {
+-	 GLuint tmp = rmesa->TexGenEnabled;
+-
+-	 rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+-	 rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+-	 rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+-	 rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+-
+-	 if (tmp != rmesa->TexGenEnabled) {
+-	    rmesa->recheck_texgen[unit] = GL_TRUE;
+-	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+-	 }
+-      }
+-   }
+-}
+-
+ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+@@ -1575,237 +1292,165 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+    }
+ }
+ 
+-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+-   /* Need to load the 2d images associated with this unit.
+-    */
+-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+-      t->base.dirty_images[0] = ~0;
+-   }
+-
+-   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+-
+-   if ( t->base.dirty_images[0] ) {
+-      R200_FIREVERTICES( rmesa );
+-      r200SetTexImages( rmesa, tObj );
+-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+-      if ( !t->base.memBlock && !t->image_override ) 
+-	 return GL_FALSE;
+-   }
+-
+-   set_re_cntl_d3d( ctx, unit, GL_FALSE );
+-
+-   return GL_TRUE;
+-}
+-
+-#if ENABLE_HW_3D_TEXTURE
+-static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
++/**
++ * Compute the cached hardware register values for the given texture object.
++ *
++ * \param rmesa Context pointer
++ * \param t the r300 texture object
++ */
++static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
+ {
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+-   /* Need to load the 3d images associated with this unit.
+-    */
+-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+-      t->base.dirty_images[0] = ~0;
++   const struct gl_texture_image *firstImage =
++      t->base.Image[0][t->mt->firstLevel];
++   GLint log2Width, log2Height, log2Depth, texelBytes;
++   
++   log2Width  = firstImage->WidthLog2;
++   log2Height = firstImage->HeightLog2;
++   log2Depth  = firstImage->DepthLog2;
++   texelBytes = firstImage->TexFormat->TexelBytes;
++
++
++   if (!t->image_override) {
++      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
++	    tx_table_be;
++	 
++	 t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
++			     R200_TXFORMAT_ALPHA_IN_MAP);
++	 t->pp_txfilter &= ~R200_YUV_TO_RGB;
++	 
++	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
++	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
++      } else {
++	 _mesa_problem(NULL, "unexpected texture format in %s",
++		       __FUNCTION__);
++	 return;
++      }
+    }
++   
++   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
++   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT;
++	
++   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
++		       R200_TXFORMAT_HEIGHT_MASK |
++		       R200_TXFORMAT_CUBIC_MAP_ENABLE |
++		       R200_TXFORMAT_F5_WIDTH_MASK |
++		       R200_TXFORMAT_F5_HEIGHT_MASK);
++   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
++		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
++   
++   t->tile_bits = 0;
++   
++   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
++   if (t->base.Target == GL_TEXTURE_3D) {
++      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
++      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+ 
+-   ASSERT(tObj->Target == GL_TEXTURE_3D);
+-
+-   /* R100 & R200 do not support mipmaps for 3D textures.
+-    */
+-   if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
+-      return GL_FALSE;
+    }
+-
+-   if ( t->base.dirty_images[0] ) {
+-      R200_FIREVERTICES( rmesa );
+-      r200SetTexImages( rmesa, tObj );
+-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+-      if ( !t->base.memBlock ) 
+-	 return GL_FALSE;
++   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
++      ASSERT(log2Width == log2Height);
++      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
++			 (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
++			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
++			 (R200_TXFORMAT_CUBIC_MAP_ENABLE));
++      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
++      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
++                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
++                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
++                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
++                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
++                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
++                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
++                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+    }
+-
+-   set_re_cntl_d3d( ctx, unit, GL_TRUE );
+-
+-   return GL_TRUE;
+-}
+-#endif
+-
+-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-   GLuint face;
+-
+-   /* Need to load the 2d images associated with this unit.
+-    */
+-   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
+-      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
+-      for (face = 0; face < 6; face++)
+-         t->base.dirty_images[face] = ~0;
++   else {
++      /* If we don't in fact send enough texture coordinates, q will be 1,
++       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
++       */
++      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+    }
+ 
+-   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+-
+-   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+-        t->base.dirty_images[2] || t->base.dirty_images[3] ||
+-        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+-      /* flush */
+-      R200_FIREVERTICES( rmesa );
+-      /* layout memory space, once for all faces */
+-      r200SetTexImages( rmesa, tObj );
+-   }
++   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
++		   | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
+ 
+-   /* upload (per face) */
+-   for (face = 0; face < 6; face++) {
+-      if (t->base.dirty_images[face]) {
+-         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
+-      }
+-   }
+-      
+-   if ( !t->base.memBlock ) {
+-      /* texmem alloc failed, use s/w fallback */
+-      return GL_FALSE;
++   if ( !t->image_override ) {
++      if (firstImage->IsCompressed)
++         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
++      else
++         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
++      t->pp_txpitch -= 32;
+    }
+ 
+-   set_re_cntl_d3d( ctx, unit, GL_TRUE );
+-
+-   return GL_TRUE;
+-}
+-
+-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+-{
+-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+-   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
++   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+       t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+-      t->base.dirty_images[0] = ~0;
+    }
+ 
+-   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+-   if ( t->base.dirty_images[0] ) {
+-      R200_FIREVERTICES( rmesa );
+-      r200SetTexImages( rmesa, tObj );
+-      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
+-      if ( !t->base.memBlock &&
+-           !t->image_override &&
+-           !rmesa->prefer_gart_client_texturing ) 
+-	 return GL_FALSE;
+-   }
+-
+-   set_re_cntl_d3d( ctx, unit, GL_FALSE );
+-
+-   return GL_TRUE;
+ }
+ 
+-
+-static GLboolean update_tex_common( GLcontext *ctx, int unit )
++static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
+-
+-   /* Fallback if there's a texture border */
+-   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
+-       return GL_FALSE;
+-
+-   /* Update state if this is a different texture object to last
+-    * time.
+-    */
+-   if ( rmesa->state.texture.unit[unit].texobj != t ) {
+-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+-	 /* The old texture is no longer bound to this texture unit.
+-	  * Mark it as such.
+-	  */
+-
+-	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
+-	     ~(1UL << unit);
+-      }
++   radeonTexObj *t = radeon_tex_obj(texObj);
+ 
+-      rmesa->state.texture.unit[unit].texobj = t;
+-      t->base.bound |= (1UL << unit);
+-      t->dirty_state |= 1<<unit;
+-      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+-   }
+-
+-
+-   /* Newly enabled?
+-    */
+-   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
+-      R200_STATECHANGE( rmesa, ctx );
+-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
++   if (!radeon_validate_texture_miptree(ctx, texObj))
++      return GL_FALSE;
+ 
+-      R200_STATECHANGE( rmesa, vtx );
+-      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+-      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
++   r200_validate_texgen(ctx, unit);
++   /* Configure the hardware registers (more precisely, the cached version
++    * of the hardware registers). */
++   setup_hardware_state(rmesa, t);
++
++   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
++       texObj->Target == GL_TEXTURE_2D ||
++       texObj->Target == GL_TEXTURE_1D)
++      set_re_cntl_d3d( ctx, unit, GL_FALSE );
++   else
++      set_re_cntl_d3d( ctx, unit, GL_TRUE );
++   R200_STATECHANGE( rmesa, ctx );
++   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
++   
++   R200_STATECHANGE( rmesa, vtx );
++   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
++   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+ 
+-      rmesa->recheck_texgen[unit] = GL_TRUE;
+-   }
+-
+-   if (t->dirty_state & (1<<unit)) {
+-      import_tex_obj_state( rmesa, unit, t );
+-   }
++   rmesa->recheck_texgen[unit] = GL_TRUE;
++   import_tex_obj_state( rmesa, unit, t );
+ 
+    if (rmesa->recheck_texgen[unit]) {
+       GLboolean fallback = !r200_validate_texgen( ctx, unit );
+       TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+       rmesa->recheck_texgen[unit] = 0;
+-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+    }
+ 
+-   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
+-   return !t->border_fallback;
+-}
++   t->validated = GL_TRUE;
+ 
++   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+ 
++   return !t->border_fallback;
++}
+ 
+-static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
++static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
+ {
+    r200ContextPtr rmesa = R200_CONTEXT(ctx);
+    GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
+ 
+-   if ( unitneeded & (TEXTURE_RECT_BIT) ) {
+-      return (enable_tex_rect( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-   else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+-      return (enable_tex_2d( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-#if ENABLE_HW_3D_TEXTURE
+-   else if ( unitneeded & (TEXTURE_3D_BIT) ) {
+-      return (enable_tex_3d( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-#endif
+-   else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
+-      return (enable_tex_cube( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-   else if ( unitneeded ) {
+-      return GL_FALSE;
+-   }
+-   else {
+-      disable_tex( ctx, unit );
+-      return GL_TRUE;
++   if (!unitneeded) {
++      /* disable the unit */
++     disable_tex_obj_state(rmesa, unit);
++     return GL_TRUE;
+    }
++
++   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
++    _mesa_warning(ctx,
++		  "failed to validate texture for unit %d.\n",
++		  unit);
++    rmesa->state.texture.unit[unit].texobj = NULL;
++    return GL_FALSE;
++  }
++
++   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
++  return GL_TRUE;
+ }
+ 
+ 
+@@ -1846,11 +1491,11 @@ void r200UpdateTextureState( GLcontext *ctx )
+ 
+    FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+ 
+-   if (rmesa->TclFallback)
++   if (rmesa->radeon.TclFallback)
+       r200ChooseVertexState( ctx );
+ 
+ 
+-   if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
++   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+ 
+       /*
+        * T0 hang workaround -------------
+@@ -1863,7 +1508,7 @@ void r200UpdateTextureState( GLcontext *ctx )
+ 	 R200_STATECHANGE(rmesa, tex[1]);
+ 	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+ 	 if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+-	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
++	   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+ 	 rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
+       }
+       else if (!ctx->ATIFragmentShader._Enabled) {
+diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
+index 562992f..888f91d 100644
+--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
++++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
+@@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) {
+    }
+    /* could optimize setting up vertex progs away for non-tcl hw */
+    fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
+-      rmesa->r200Screen->drmSupportsVertexProgram);
++      rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
+    TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
+-   if (rmesa->TclFallback) return;
++   if (rmesa->radeon.TclFallback) return;
+ 
+    R200_STATECHANGE( rmesa, vap );
+    /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
+diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
+index 6ca9342..497b1ec 100644
+--- a/src/mesa/drivers/dri/r300/Makefile
++++ b/src/mesa/drivers/dri/r300/Makefile
+@@ -3,6 +3,8 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+ 
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = r300_dri.so
+ 
+ MINIGLX_SOURCES = server/radeon_dri.c
+@@ -20,20 +22,24 @@ COMMON_SOURCES = \
+ 	../common/xmlconfig.c \
+ 	../common/dri_util.c
+ 
++RADEON_COMMON_SOURCES = \
++	radeon_texture.c \
++	radeon_common_context.c \
++	radeon_common.c \
++	radeon_dma.c \
++	radeon_lock.c \
++	radeon_bo_legacy.c \
++	radeon_cs_legacy.c \
++	radeon_mipmap_tree.c \
++	radeon_span.c
++
+ DRIVER_SOURCES = \
+ 		 radeon_screen.c \
+-		 radeon_context.c \
+-		 radeon_ioctl.c \
+-		 radeon_lock.c \
+-		 radeon_span.c \
+-		 radeon_state.c \
+-		 r300_mem.c \
+ 		 r300_context.c \
+ 		 r300_ioctl.c \
+ 		 r300_cmdbuf.c \
+ 		 r300_state.c \
+ 		 r300_render.c \
+-		 r300_texmem.c \
+ 		 r300_tex.c \
+ 		 r300_texstate.c \
+ 		 radeon_program.c \
+@@ -49,12 +55,15 @@ DRIVER_SOURCES = \
+ 		 r300_shader.c \
+ 		 r300_emit.c \
+ 		 r300_swtcl.c \
++		 $(RADEON_COMMON_SOURCES) \
+ 		 $(EGL_SOURCES)
+ 
+ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+ 
+ DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
+-	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300
++	-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
++#	-DRADEON_BO_TRACK \
++	-Wall
+ 
+ SYMLINKS = \
+ 	server/radeon_dri.c \
+@@ -68,7 +77,28 @@ COMMON_SYMLINKS = \
+ 	radeon_chipset.h \
+ 	radeon_screen.c \
+ 	radeon_screen.h \
+-	radeon_span.h
++	radeon_span.h \
++	radeon_span.c \
++	radeon_bo_legacy.c \
++	radeon_cs_legacy.c \
++	radeon_bo_legacy.h \
++	radeon_cs_legacy.h \
++	radeon_bocs_wrapper.h \
++	radeon_lock.c \
++	radeon_lock.h \
++	radeon_common.c \
++	radeon_common.h \
++	radeon_common_context.c \
++	radeon_common_context.h \
++	radeon_cmdbuf.h \
++	radeon_dma.c \
++	radeon_dma.h \
++	radeon_mipmap_tree.c \
++	radeon_mipmap_tree.h \
++	radeon_texture.c \
++	radeon_texture.h
++
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+ 
+ ##### TARGETS #####
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+index c9e1dfe..ac1c128 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+@@ -44,235 +44,252 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "radeon_drm.h"
+ 
+-#include "radeon_ioctl.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "radeon_reg.h"
+ #include "r300_reg.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_emit.h"
++#include "radeon_bocs_wrapper.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_state.h"
++#include "radeon_reg.h"
+ 
+-// Set this to 1 for extremely verbose debugging of command buffers
+-#define DEBUG_CMDBUF		0
++#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
++#   define RADEON_ONE_REG_WR        (1 << 15)
+ 
+-/**
+- * Send the current command buffer via ioctl to the hardware.
++/** # of dwords reserved for additional instructions that may need to be written
++ * during flushing.
+  */
+-int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller)
++#define SPACE_FOR_FLUSHING	4
++
++static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
+ {
+-	int ret;
+-	int i;
+-	drm_radeon_cmd_buffer_t cmd;
+-	int start;
+-
+-	if (r300->radeon.lost_context) {
+-		start = 0;
+-		r300->radeon.lost_context = GL_FALSE;
+-	} else
+-		start = r300->cmdbuf.count_reemit;
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL) {
+-		fprintf(stderr, "%s from %s - %i cliprects\n",
+-			__FUNCTION__, caller, r300->radeon.numClipRects);
+-
+-		if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE)
+-			for (i = start; i < r300->cmdbuf.count_used; ++i)
+-				fprintf(stderr, "%d: %08x\n", i,
+-					r300->cmdbuf.cmd_buf[i]);
+-	}
++    if (r300->radeon.radeonScreen->kernel_mm) {
++        return ((((*pkt) >> 16) & 0x3FFF) + 1);
++    } else {
++        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
++        return t->packet0.count;
++    }
++    return 0;
++}
+ 
+-	cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start);
+-	cmd.bufsz = (r300->cmdbuf.count_used - start) * 4;
++#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
++#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+ 
+-	if (r300->radeon.state.scissor.enabled) {
+-		cmd.nbox = r300->radeon.state.scissor.numClipRects;
+-		cmd.boxes =
+-		    (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects;
+-	} else {
+-		cmd.nbox = r300->radeon.numClipRects;
+-		cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects;
++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
++{
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(&r300->radeon);
++	drm_r300_cmd_header_t cmd;
++	uint32_t addr, ndw, i;
++	
++	if (!r300->radeon.radeonScreen->kernel_mm) {
++		uint32_t dwords;
++		dwords = (*atom->check) (ctx, atom);
++		BEGIN_BATCH_NO_AUTOSTATE(dwords);
++		OUT_BATCH_TABLE(atom->cmd, dwords);
++		END_BATCH();
++		return;
+ 	}
+-
+-	ret = drmCommandWrite(r300->radeon.dri.fd,
+-			      DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+-
+-	if (RADEON_DEBUG & DEBUG_SYNC) {
+-		fprintf(stderr, "Syncing in %s (from %s)\n\n",
+-			__FUNCTION__, caller);
+-		radeonWaitForIdleLocked(&r300->radeon);
++	
++	cmd.u = atom->cmd[0];
++	addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
++	ndw = cmd.vpu.count * 4;
++	if (ndw) {
++		BEGIN_BATCH_NO_AUTOSTATE(13 + ndw);
++
++		/* flush processing vertices */
++		OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
++		OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++		OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
++		OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
++		OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 1);
++		OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
++		OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
++		for (i = 0; i < ndw; i++) {
++			OUT_BATCH(atom->cmd[i+1]);
++		}
++		OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++		END_BATCH();
+ 	}
+-
+-	r300->dma.nr_released_bufs = 0;
+-	r300->cmdbuf.count_used = 0;
+-	r300->cmdbuf.count_reemit = 0;
+-
+-	return ret;
+ }
+ 
+-int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+-	int ret;
+-
+-	LOCK_HARDWARE(&r300->radeon);
+-
+-	ret = r300FlushCmdBufLocked(r300, caller);
+-
+-	UNLOCK_HARDWARE(&r300->radeon);
+-
+-	if (ret) {
+-		fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
+-		_mesa_exit(ret);
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(&r300->radeon);
++	drm_r300_cmd_header_t cmd;
++	uint32_t addr, ndw, i, sz;
++	int type, clamp, stride;
++
++	if (!r300->radeon.radeonScreen->kernel_mm) {
++		uint32_t dwords;
++		dwords = (*atom->check) (ctx, atom);
++		BEGIN_BATCH_NO_AUTOSTATE(dwords);
++		OUT_BATCH_TABLE(atom->cmd, dwords);
++		END_BATCH();
++		return;
+ 	}
+ 
+-	return ret;
+-}
++	cmd.u = atom->cmd[0];
++	sz = cmd.r500fp.count;
++	addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
++	type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
++	clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+ 
+-static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
+-{
+-	int i;
+-	int dwords = (*state->check) (r300, state);
++	addr |= (type << 16);
++	addr |= (clamp << 17);
+ 
+-	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords,
+-		state->cmd_size);
++	stride = type ? 4 : 6;
+ 
+-	if (RADEON_DEBUG & DEBUG_VERBOSE) {
+-		for (i = 0; i < dwords; i++) {
+-			fprintf(stderr, "      %s[%d]: %08x\n",
+-				state->name, i, state->cmd[i]);
++	ndw = sz * stride;
++	if (ndw) {
++
++		BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
++		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
++		OUT_BATCH(addr);
++		OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
++		for (i = 0; i < ndw; i++) {
++			OUT_BATCH(atom->cmd[i+1]);
+ 		}
++		END_BATCH();
+ 	}
+ }
+ 
+-/**
+- * Emit all atoms with a dirty field equal to dirty.
+- *
+- * The caller must have ensured that there is enough space in the command
+- * buffer.
+- */
+-static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty)
++static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+-	struct r300_state_atom *atom;
+-	uint32_t *dest;
+-	int dwords;
+-
+-	dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used;
+-
+-	/* Emit WAIT */
+-	*dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	/* Emit cache flush */
+-	*dest = cmdpacket0(R300_TX_INVALTAGS, 1);
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	*dest = R300_TX_FLUSH;
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	/* Emit END3D */
+-	*dest = cmdpacify();
+-	dest++;
+-	r300->cmdbuf.count_used++;
+-
+-	/* Emit actual atoms */
+-
+-	foreach(atom, &r300->hw.atomlist) {
+-		if ((atom->dirty || r300->hw.all_dirty) == dirty) {
+-			dwords = (*atom->check) (r300, atom);
+-			if (dwords) {
+-				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+-					r300PrintStateAtom(r300, atom);
+-				}
+-				memcpy(dest, atom->cmd, dwords * 4);
+-				dest += dwords;
+-				r300->cmdbuf.count_used += dwords;
+-				atom->dirty = GL_FALSE;
+-			} else {
+-				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
+-					fprintf(stderr, "  skip state %s\n",
+-						atom->name);
+-				}
+-			}
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(&r300->radeon);
++	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
++
++	if (numtmus) {
++		int i;
++
++		for(i = 0; i < numtmus; ++i) {
++		    radeonTexObj *t = r300->hw.textures[i];
++		    if (t && !t->image_override) {
++		            BEGIN_BATCH_NO_AUTOSTATE(4);
++		            OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
++			    OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++					    RADEON_GEM_DOMAIN_VRAM, 0, 0);
++		            END_BATCH();
++		    } else if (!t) {
++			    //assert(0);
++		            BEGIN_BATCH_NO_AUTOSTATE(4);
++		            OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
++			    OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
++			    END_BATCH();
++		    } else {
++			    if (t->bo) {
++		            	    BEGIN_BATCH_NO_AUTOSTATE(4);
++		                    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
++				    OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
++						    RADEON_GEM_DOMAIN_VRAM, 0, 0);
++		                    END_BATCH();
++			    } else if (!r300->radeon.radeonScreen->kernel_mm) {
++		            	    BEGIN_BATCH_NO_AUTOSTATE(2);
++		                    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
++				    OUT_BATCH(t->override_offset);
++				    END_BATCH();
++			    }
++		    }
+ 		}
+ 	}
+ }
+ 
+-/**
+- * Copy dirty hardware state atoms into the command buffer.
+- *
+- * We also copy out clean state if we're at the start of a buffer. That makes
+- * it easy to recover from lost contexts.
+- */
+-void r300EmitState(r300ContextPtr r300)
++static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+ {
+-	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS))
+-		fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-	if (r300->cmdbuf.count_used && !r300->hw.is_dirty
+-	    && !r300->hw.all_dirty)
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(&r300->radeon);
++	struct radeon_renderbuffer *rrb;
++	uint32_t cbpitch;
++
++	rrb = radeon_get_colorbuffer(&r300->radeon);
++	if (!rrb || !rrb->bo) {
++		fprintf(stderr, "no rrb\n");
+ 		return;
+-
+-	/* To avoid going across the entire set of states multiple times, just check
+-	 * for enough space for the case of emitting all state, and inline the
+-	 * r300AllocCmdBuf code here without all the checks.
+-	 */
+-	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__);
+-
+-	if (!r300->cmdbuf.count_used) {
+-		if (RADEON_DEBUG & DEBUG_STATE)
+-			fprintf(stderr, "Begin reemit state\n");
+-
+-		r300EmitAtoms(r300, GL_FALSE);
+-		r300->cmdbuf.count_reemit = r300->cmdbuf.count_used;
+ 	}
+ 
+-	if (RADEON_DEBUG & DEBUG_STATE)
+-		fprintf(stderr, "Begin dirty state\n");
++	cbpitch = (rrb->pitch / rrb->cpp);
++	if (rrb->cpp == 4)
++		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++	else
++		cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
++		cbpitch |= R300_COLOR_TILE_ENABLE;
++
++	BEGIN_BATCH_NO_AUTOSTATE(6);
++	OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++	OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
++	OUT_BATCH(cbpitch);
++	END_BATCH();
++}
+ 
+-	r300EmitAtoms(r300, GL_TRUE);
++static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
++{
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
++	BATCH_LOCALS(&r300->radeon);
++	struct radeon_renderbuffer *rrb;
++	uint32_t zbpitch;
+ 
+-	assert(r300->cmdbuf.count_used < r300->cmdbuf.size);
++	rrb = radeon_get_depthbuffer(&r300->radeon);
++	if (!rrb)
++		return;
+ 
+-	r300->hw.is_dirty = GL_FALSE;
+-	r300->hw.all_dirty = GL_FALSE;
++	zbpitch = (rrb->pitch / rrb->cpp);
++	if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++		zbpitch |= R300_DEPTHMACROTILE_ENABLE;
++	}
++	if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
++		zbpitch |= R300_DEPTHMICROTILE_TILED;
++	}
++	
++	BEGIN_BATCH_NO_AUTOSTATE(6);
++	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
++	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++	OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
++	END_BATCH();
+ }
+ 
+-#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count)
+-#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+-#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+-
+-static int check_always(r300ContextPtr r300, struct r300_state_atom *atom)
++static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ 	return atom->cmd_size;
+ }
+ 
+-static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom)
++static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
++	r300ContextPtr r300 = R300_CONTEXT(ctx);
+ 	int cnt;
+-	cnt = packet0_count(atom->cmd);
++	if (atom->cmd[0] == CP_PACKET2) {
++		return 0;
++	}
++	cnt = packet0_count(r300, atom->cmd);
+ 	return cnt ? cnt + 1 : 0;
+ }
+ 
+-static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ 	int cnt;
++
+ 	cnt = vpu_count(atom->cmd);
+ 	return cnt ? (cnt * 4) + 1 : 0;
+ }
+ 
+-static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ 	int cnt;
++
+ 	cnt = r500fp_count(atom->cmd);
+ 	return cnt ? (cnt * 6) + 1 : 0;
+ }
+ 
+-static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
+ {
+ 	int cnt;
++
+ 	cnt = r500fp_count(atom->cmd);
+ 	return cnt ? (cnt * 4) + 1 : 0;
+ }
+@@ -285,8 +302,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
+       r300->hw.ATOM.idx = (IDX);					\
+       r300->hw.ATOM.check = check_##CHK;				\
+       r300->hw.ATOM.dirty = GL_FALSE;					\
+-      r300->hw.max_state_size += (SZ);					\
+-      insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM);		\
++      r300->radeon.hw.max_state_size += (SZ);					\
++      insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM);		\
+    } while (0)
+ /**
+  * Allocate memory for the command buffer and initialize the state atom
+@@ -294,7 +311,7 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom)
+  */
+ void r300InitCmdBuf(r300ContextPtr r300)
+ {
+-	int size, mtu;
++	int mtu;
+ 	int has_tcl = 1;
+ 	int is_r500 = 0;
+ 	int i;
+@@ -305,7 +322,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ 		is_r500 = 1;
+ 
+-	r300->hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
++	r300->radeon.hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
+ 
+ 	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ 	if (RADEON_DEBUG & DEBUG_TEXTURE) {
+@@ -313,97 +330,97 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	}
+ 
+ 	/* Setup the atom linked list */
+-	make_empty_list(&r300->hw.atomlist);
+-	r300->hw.atomlist.name = "atom-list";
++	make_empty_list(&r300->radeon.hw.atomlist);
++	r300->radeon.hw.atomlist.name = "atom-list";
+ 
+ 	/* Initialize state atoms */
+ 	ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
+-	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6);
++	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
+ 	ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
+-	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1);
++	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ 	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
+-	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1);
++	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
+ 	if (is_r500) {
+ 	    ALLOC_STATE(vap_index_offset, always, 2, 0);
+-	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1);
++	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
+ 	    r300->hw.vap_index_offset.cmd[1] = 0;
+ 	}
+ 	ALLOC_STATE(vte, always, 3, 0);
+-	r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2);
++	r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
+ 	ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
+-	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2);
++	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
+ 	ALLOC_STATE(vap_cntl_status, always, 2, 0);
+-	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1);
++	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
+ 	ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
+ 	r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
+-	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
+ 	ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
+ 	r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
+-	    cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
+ 	ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
+-	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2);
++	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
+ 	ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
+-	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
++	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+ 
+ 	if (has_tcl) {
+ 		ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+-		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1);
++		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
+ 		ALLOC_STATE(vap_clip, always, 5, 0);
+-		r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4);
++		r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ 		ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
+-		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1);
++		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
+ 	}
+ 
+ 	ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
+ 	r300->hw.vof.cmd[R300_VOF_CMD_0] =
+-	    cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ 
+ 	if (has_tcl) {
+ 		ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
+ 		r300->hw.pvs.cmd[R300_PVS_CMD_0] =
+-		    cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3);
++		    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
+ 	}
+ 
+ 	ALLOC_STATE(gb_enable, always, 2, 0);
+-	r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1);
++	r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
+ 	ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+-	r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5);
++	r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
+ 	ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
+-	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1);
++	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
+ 	ALLOC_STATE(ga_point_s0, always, 5, 0);
+-	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4);
++	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
+ 	ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
+-	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1);
++	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
+ 	ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
+-	r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1);
++	r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
+ 	ALLOC_STATE(ga_point_minmax, always, 4, 0);
+-	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3);
++	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
+ 	ALLOC_STATE(lcntl, always, 2, 0);
+-	r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1);
++	r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
+ 	ALLOC_STATE(ga_line_stipple, always, 4, 0);
+-	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3);
++	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
+ 	ALLOC_STATE(shade, always, 5, 0);
+-	r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4);
++	r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
+ 	ALLOC_STATE(polygon_mode, always, 4, 0);
+-	r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3);
++	r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
+ 	ALLOC_STATE(fogp, always, 3, 0);
+-	r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2);
++	r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
+ 	ALLOC_STATE(zbias_cntl, always, 2, 0);
+-	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1);
++	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
+ 	ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
+ 	r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
+-	    cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ 	ALLOC_STATE(occlusion_cntl, always, 2, 0);
+-	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1);
++	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
+ 	ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
+-	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1);
++	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
+ 	ALLOC_STATE(su_depth_scale, always, 3, 0);
+-	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2);
++	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
+ 	ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
+-	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2);
++	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
+ 	if (is_r500) {
+ 		ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
+-		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16);
++		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
+ 		for (i = 0; i < 8; i++) {
+ 			r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
+ 			  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+@@ -412,133 +429,146 @@ void r300InitCmdBuf(r300ContextPtr r300)
+                           (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
+ 		}
+ 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+-		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1);
++		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
+ 	} else {
+ 		ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
+-		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8);
++		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
+ 		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+-		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1);
++		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
+ 	}
+ 	ALLOC_STATE(sc_hyperz, always, 3, 0);
+-	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2);
++	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
+ 	ALLOC_STATE(sc_screendoor, always, 2, 0);
+-	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1);
++	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ 	ALLOC_STATE(us_out_fmt, always, 6, 0);
+-	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5);
++	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
+ 
+ 	if (is_r500) {
+ 		ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
+-		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2);
++		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
+ 		r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
+-		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3);
+-		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1);
++		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
++		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
+ 		r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
+ 
+ 		ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
+-		r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0);
++		r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
++			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
++		r300->hw.r500fp.emit = emit_r500fp;
+ 		ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
+-		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0);
++		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
++			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
++		r300->hw.r500fp_const.emit = emit_r500fp;
+ 	} else {
+ 		ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+-		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3);
+-		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4);
++		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
++		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
++
+ 		ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+-		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0);
++		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
+ 
+ 		ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+-		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1);
++		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
+ 		ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+-		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1);
++		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
+ 		ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+-		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1);
++		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
+ 		ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+-		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1);
++		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
+ 		ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+-		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0);
++		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
+ 	}
+ 	ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
+-	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1);
++	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
+ 	ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
+-	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3);
++	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
+ 	ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
+-	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2);
++	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
+ 	ALLOC_STATE(fg_depth_src, always, 2, 0);
+-	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1);
++	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
+ 	ALLOC_STATE(rb3d_cctl, always, 2, 0);
+-	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1);
++	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
+ 	ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
+-	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2);
++	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
+ 	ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
+-	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1);
++	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
+ 	if (is_r500) {
+ 		ALLOC_STATE(blend_color, always, 3, 0);
+-		r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2);
++		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
+ 	} else {
+ 		ALLOC_STATE(blend_color, always, 2, 0);
+-		r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1);
++		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
+ 	}
+ 	ALLOC_STATE(rop, always, 2, 0);
+-	r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1);
++	r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
+ 	ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
+-	r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1);
+-	r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1);
++	r300->hw.cb.emit = &emit_cb_offset;
+ 	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+-	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9);
++	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
+ 	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+-	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1);
++	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
+ 	ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+-	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
++	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
+ 	ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
+ 	r300->hw.zs.cmd[R300_ZS_CMD_0] =
+-	    cmdpacket0(R300_ZB_CNTL, 3);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+ 	ALLOC_STATE(zstencil_format, always, 5, 0);
+ 	r300->hw.zstencil_format.cmd[0] =
+-	    cmdpacket0(R300_ZB_FORMAT, 4);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
+ 	ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
+-	r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2);
++	r300->hw.zb.emit = emit_zb_offset;
+ 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+-	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1);
++	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
+ 	ALLOC_STATE(unk4F30, always, 3, 0);
+-	r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2);
++	r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
+ 	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
+-	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1);
++	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
+ 	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
+-	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1);
++	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
+ 
+ 	/* VPU only on TCL */
+ 	if (has_tcl) {
+    	        int i;
+ 		ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
+-		r300->hw.vpi.cmd[R300_VPI_CMD_0] =
+-		    cmdvpu(R300_PVS_CODE_START, 0);
++		r300->hw.vpi.cmd[0] =
++		    cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
++		r300->hw.vpi.emit = emit_vpu;
+ 
+ 		if (is_r500) {
+ 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+-		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+-			cmdvpu(R500_PVS_CONST_START, 0);
++		    r300->hw.vpp.cmd[0] =
++			cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
++		    r300->hw.vpp.emit = emit_vpu;
+ 
+ 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+-		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
+-			cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1);
++		    r300->hw.vps.cmd[0] =
++			cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
++		    r300->hw.vps.emit = emit_vpu;
+ 
+ 			for (i = 0; i < 6; i++) {
+-				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+-				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+-					cmdvpu(R500_PVS_UCP_START + i, 1);
++			  ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
++			  r300->hw.vpucp[i].cmd[0] =
++				  cmdvpu(r300->radeon.radeonScreen,
++                           R500_PVS_UCP_START + i, 1);
++				r300->hw.vpucp[i].emit = emit_vpu;
+ 			}
+ 		} else {
+ 		    ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+-		    r300->hw.vpp.cmd[R300_VPP_CMD_0] =
+-			cmdvpu(R300_PVS_CONST_START, 0);
++		    r300->hw.vpp.cmd[0] =
++			cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
++		    r300->hw.vpp.emit = emit_vpu;
+ 
+ 		    ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+-		    r300->hw.vps.cmd[R300_VPS_CMD_0] =
+-			cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1);
++		    r300->hw.vps.cmd[0] =
++			cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
++		    r300->hw.vps.emit = emit_vpu;
+ 
+ 			for (i = 0; i < 6; i++) {
+ 				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+-				r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] =
+-					cmdvpu(R300_PVS_UCP_START + i, 1);
++				r300->hw.vpucp[i].cmd[0] =
++					cmdvpu(r300->radeon.radeonScreen,
++					       R300_PVS_UCP_START + i, 1);
++				r300->hw.vpucp[i].emit = emit_vpu;
+ 			}
+ 		}
+ 	}
+@@ -546,61 +576,39 @@ void r300InitCmdBuf(r300ContextPtr r300)
+ 	/* Textures */
+ 	ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
+ 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FILTER0_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
+ 
+ 	ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
+ 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FILTER1_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
+ 
+ 	ALLOC_STATE(tex.size, variable, mtu + 1, 0);
+-	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0);
++	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
+ 
+ 	ALLOC_STATE(tex.format, variable, mtu + 1, 0);
+ 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FORMAT_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
+ 
+ 	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+-	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0);
++	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
+ 
+-	ALLOC_STATE(tex.offset, variable, mtu + 1, 0);
++	ALLOC_STATE(tex.offset, variable, 1, 0);
+ 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_OFFSET_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
++	r300->hw.tex.offset.emit = &emit_tex_offsets;
+ 
+ 	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+ 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_CHROMA_KEY_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
+ 
+ 	ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
+ 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_BORDER_COLOR_0, 0);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
+ 
+-	r300->hw.is_dirty = GL_TRUE;
+-	r300->hw.all_dirty = GL_TRUE;
++	r300->radeon.hw.is_dirty = GL_TRUE;
++	r300->radeon.hw.all_dirty = GL_TRUE;
+ 
+-	/* Initialize command buffer */
+-	size =
+-	    256 * driQueryOptioni(&r300->radeon.optionCache,
+-				  "command_buffer_size");
+-	if (size < 2 * r300->hw.max_state_size) {
+-		size = 2 * r300->hw.max_state_size + 65535;
+-	}
+-	if (size > 64 * 256)
+-		size = 64 * 256;
+-
+-	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
+-		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
+-			sizeof(drm_r300_cmd_header_t));
+-		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
+-			sizeof(drm_radeon_cmd_buffer_t));
+-		fprintf(stderr,
+-			"Allocating %d bytes command buffer (max state is %d bytes)\n",
+-			size * 4, r300->hw.max_state_size * 4);
+-	}
+-
+-	r300->cmdbuf.size = size;
+-	r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4);
+-	r300->cmdbuf.count_used = 0;
+-	r300->cmdbuf.count_reemit = 0;
++	rcommonInitCmdBuf(&r300->radeon);
+ }
+ 
+ /**
+@@ -608,68 +616,10 @@ void r300InitCmdBuf(r300ContextPtr r300)
+  */
+ void r300DestroyCmdBuf(r300ContextPtr r300)
+ {
+-	struct r300_state_atom *atom;
+-
+-	FREE(r300->cmdbuf.cmd_buf);
++	struct radeon_state_atom *atom;
+ 
+-	foreach(atom, &r300->hw.atomlist) {
++	foreach(atom, &r300->radeon.hw.atomlist) {
+ 		FREE(atom->cmd);
+ 	}
+-}
+-
+-void r300EmitBlit(r300ContextPtr rmesa,
+-		  GLuint color_fmt,
+-		  GLuint src_pitch,
+-		  GLuint src_offset,
+-		  GLuint dst_pitch,
+-		  GLuint dst_offset,
+-		  GLint srcx, GLint srcy,
+-		  GLint dstx, GLint dsty, GLuint w, GLuint h)
+-{
+-	drm_r300_cmd_header_t *cmd;
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr,
+-			"%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+-			__FUNCTION__, src_pitch, src_offset, srcx, srcy,
+-			dst_pitch, dst_offset, dstx, dsty, w, h);
+-
+-	assert((src_pitch & 63) == 0);
+-	assert((dst_pitch & 63) == 0);
+-	assert((src_offset & 1023) == 0);
+-	assert((dst_offset & 1023) == 0);
+-	assert(w < (1 << 16));
+-	assert(h < (1 << 16));
+-
+-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__);
+-
+-	cmd[0].header.cmd_type = R300_CMD_PACKET3;
+-	cmd[0].header.pad0 = R300_CMD_PACKET3_RAW;
+-	cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16);
+-	cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+-		    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+-		    RADEON_GMC_BRUSH_NONE |
+-		    (color_fmt << 8) |
+-		    RADEON_GMC_SRC_DATATYPE_COLOR |
+-		    RADEON_ROP3_S |
+-		    RADEON_DP_SRC_SOURCE_MEMORY |
+-		    RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
+-
+-	cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10);
+-	cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10);
+-	cmd[5].u = (srcx << 16) | srcy;
+-	cmd[6].u = (dstx << 16) | dsty;	/* dst */
+-	cmd[7].u = (w << 16) | h;
+-}
+-
+-void r300EmitWait(r300ContextPtr rmesa, GLuint flags)
+-{
+-	drm_r300_cmd_header_t *cmd;
+-
+-	assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D)));
+ 
+-	cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].u = 0;
+-	cmd[0].wait.cmd_type = R300_CMD_WAIT;
+-	cmd[0].wait.flags = flags;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+index a8eaa58..b7798eb 100644
+--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h
++++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
+@@ -38,79 +38,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "r300_context.h"
+ 
+-extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller);
+-extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller);
+-
+-extern void r300EmitState(r300ContextPtr r300);
+-
+ extern void r300InitCmdBuf(r300ContextPtr r300);
+ extern void r300DestroyCmdBuf(r300ContextPtr r300);
+ 
+-/**
+- * Make sure that enough space is available in the command buffer
+- * by flushing if necessary.
+- *
+- * \param dwords The number of dwords we need to be free on the command buffer
+- */
+-static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300,
+-					     int dwords, const char *caller)
+-{
+-	assert(dwords < r300->cmdbuf.size);
+-
+-	if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size)
+-		r300FlushCmdBuf(r300, caller);
+-}
+-
+-/**
+- * Allocate the given number of dwords in the command buffer and return
+- * a pointer to the allocated area.
+- * When necessary, these functions cause a flush. r300AllocCmdBuf() also
+- * causes state reemission after a flush. This is necessary to ensure
+- * correct hardware state after an unlock.
+- */
+-static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300,
+-					       int dwords, const char *caller)
+-{
+-	uint32_t *ptr;
+-
+-	r300EnsureCmdBufSpace(r300, dwords, caller);
+-
+-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+-	r300->cmdbuf.count_used += dwords;
+-	return ptr;
+-}
+-
+-static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300,
+-					    int dwords, const char *caller)
+-{
+-	uint32_t *ptr;
+-
+-	r300EnsureCmdBufSpace(r300, dwords, caller);
+-
+-	if (!r300->cmdbuf.count_used) {
+-		if (RADEON_DEBUG & DEBUG_IOCTL)
+-			fprintf(stderr,
+-				"Reemit state after flush (from %s)\n", caller);
+-		r300EmitState(r300);
+-	}
+-
+-	ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used];
+-	r300->cmdbuf.count_used += dwords;
+-	return ptr;
+-}
+ 
+-extern void r300EmitBlit(r300ContextPtr rmesa,
+-			 GLuint color_fmt,
+-			 GLuint src_pitch,
+-			 GLuint src_offset,
+-			 GLuint dst_pitch,
+-			 GLuint dst_offset,
+-			 GLint srcx, GLint srcy,
+-			 GLint dstx, GLint dsty, GLuint w, GLuint h);
++void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
++int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
+ 
+-extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags);
+-extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start);
+-extern void r300EmitVertexShader(r300ContextPtr rmesa);
+-extern void r300EmitPixelShader(r300ContextPtr rmesa);
++void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
++int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
++int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
+ 
+ #endif				/* __R300_CMDBUF_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
+index 4c14c7e..dd63add 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.c
++++ b/src/mesa/drivers/dri/r300/r300_context.c
+@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/extensions.h"
+ #include "main/state.h"
+ #include "main/bufferobj.h"
++#include "main/texobj.h"
+ 
+ #include "swrast/swrast.h"
+ #include "swrast_setup/swrast_setup.h"
+@@ -55,19 +56,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "drivers/common/driverfuncs.h"
+ 
+-#include "radeon_ioctl.h"
+-#include "radeon_span.h"
+ #include "r300_context.h"
++#include "radeon_context.h"
++#include "radeon_span.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "r300_tex.h"
+ #include "r300_emit.h"
+ #include "r300_swtcl.h"
++#include "radeon_bocs_wrapper.h"
+ 
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+ 
+ #include "vblank.h"
+ #include "utils.h"
+@@ -183,6 +182,78 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
+ 	0,
+ };
+ 
++static void r300RunPipeline(GLcontext * ctx)
++{
++    _mesa_lock_context_textures(ctx);
++
++    if (ctx->NewState)
++        _mesa_update_state_locked(ctx);
++    
++    _tnl_run_pipeline(ctx);
++    _mesa_unlock_context_textures(ctx);
++}
++
++static void r300_get_lock(radeonContextPtr rmesa)
++{
++	drm_radeon_sarea_t *sarea = rmesa->sarea;
++
++	if (sarea->ctx_owner != rmesa->dri.hwContext) {
++		sarea->ctx_owner = rmesa->dri.hwContext;
++		if (!rmesa->radeonScreen->kernel_mm)
++			radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
++	}
++}		  
++
++static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++    /* please flush pipe do all pending work */
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_SC_SCREENDOOR, 1));
++    radeon_cs_write_dword(cs, 0x0);
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_SC_SCREENDOOR, 1));
++    radeon_cs_write_dword(cs, 0x00FFFFFF);
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_SC_HYPERZ, 1));
++    radeon_cs_write_dword(cs, 0x0);
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_US_CONFIG, 1));
++    radeon_cs_write_dword(cs, 0x0);
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_ZB_CNTL, 1));
++    radeon_cs_write_dword(cs, 0x0);
++    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_RB3D_DSTCACHE_CTLSTAT, 1));
++    radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
++                                  R300_ZB_ZCACHE_CTLSTAT, 1));
++    radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
++    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
++                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
++}
++
++static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
++{
++   BATCH_LOCALS(radeon);
++   cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++   BEGIN_BATCH_NO_AUTOSTATE(2);
++   OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
++   END_BATCH();
++   end_3d(radeon);
++}
++
++static void r300_init_vtbl(radeonContextPtr radeon)
++{
++   radeon->vtbl.get_lock = r300_get_lock;
++   radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
++   radeon->vtbl.update_draw_buffer = r300UpdateDrawBuffer;
++   radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
++   radeon->vtbl.swtcl_flush = r300_swtcl_flush;
++   radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
++}
++
++
+ /* Create the device specific rendering context.
+  */
+ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+@@ -194,7 +265,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	struct dd_function_table functions;
+ 	r300ContextPtr r300;
+ 	GLcontext *ctx;
+-	int tcl_mode, i;
++	int tcl_mode;
+ 
+ 	assert(glVisual);
+ 	assert(driContextPriv);
+@@ -208,13 +279,14 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ 		hw_tcl_on = future_hw_tcl_on = 0;
+ 
++	r300_init_vtbl(&r300->radeon);
+ 	/* Parse configuration files.
+ 	 * Do this here so that initialMaxAnisotropy is set before we create
+ 	 * the default textures.
+ 	 */
+ 	driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+ 			    screen->driScreen->myNum, "r300");
+-	r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
++	r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
+ 						     "def_max_anisotropy");
+ 
+ 	/* Init default driver functions then plug in our R300-specific functions
+@@ -226,10 +298,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	r300InitTextureFuncs(&functions);
+ 	r300InitShaderFuncs(&functions);
+ 
+-#ifdef USER_BUFFERS
+-	r300_mem_init(r300);
+-#endif
+-
+ 	if (!radeonInitContext(&r300->radeon, &functions,
+ 			       glVisual, driContextPriv,
+ 			       sharedContextPrivate)) {
+@@ -238,37 +306,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	}
+ 
+ 	/* Init r300 context data */
+-	r300->dma.buf0_address =
+-	    r300->radeon.radeonScreen->buffers->list[0].address;
+-
+-	(void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps));
+-	make_empty_list(&r300->swapped);
+-
+-	r300->nr_heaps = 1 /* screen->numTexHeaps */ ;
+-	assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS);
+-	for (i = 0; i < r300->nr_heaps; i++) {
+-		/* *INDENT-OFF* */
+-		r300->texture_heaps[i] = driCreateTextureHeap(i, r300,
+-							       screen->
+-							       texSize[i], 12,
+-							       RADEON_NR_TEX_REGIONS,
+-							       (drmTextureRegionPtr)
+-							       r300->radeon.sarea->
+-							       tex_list[i],
+-							       &r300->radeon.sarea->
+-							       tex_age[i],
+-							       &r300->swapped,
+-							       sizeof
+-							       (r300TexObj),
+-							       (destroy_texture_object_t
+-								*)
+-							       r300DestroyTexObj);
+-		/* *INDENT-ON* */
+-	}
+-	r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache,
++	r300->radeon.texture_depth = driQueryOptioni(&r300->radeon.optionCache,
+ 					      "texture_depth");
+-	if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+-		r300->texture_depth = (screen->cpp == 4) ?
++	if (r300->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++		r300->radeon.texture_depth = (screen->cpp == 4) ?
+ 		    DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+ 
+ 	/* Set the maximum texture size small enough that we can guarentee that
+@@ -303,13 +344,11 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+ 	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+ 
+-#ifdef USER_BUFFERS
+ 	/* Needs further modifications */
+ #if 0
+ 	ctx->Const.MaxArrayLockSize =
+ 	    ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
+ #endif
+-#endif
+ 
+ 	ctx->Const.MaxDrawBuffers = 1;
+ 
+@@ -384,13 +423,13 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	    driQueryOptionb(&r300->radeon.optionCache,
+ 			    "disable_lowimpact_fallback");
+ 
+-	radeonInitSpanFuncs(ctx);
++   	radeonInitSpanFuncs( ctx );
+ 	r300InitCmdBuf(r300);
+ 	r300InitState(r300);
+ 	if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
+ 	        r300InitSwtcl(ctx);
+ 
+-	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
++	TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline;
+ 
+ 	tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
+ 	if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
+@@ -413,72 +452,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
+ 	return GL_TRUE;
+ }
+ 
+-static void r300FreeGartAllocations(r300ContextPtr r300)
+-{
+-	int i, ret, tries = 0, done_age, in_use = 0;
+-	drm_radeon_mem_free_t memfree;
+-
+-	memfree.region = RADEON_MEM_REGION_GART;
+-
+-#ifdef USER_BUFFERS
+-	for (i = r300->rmm->u_last; i > 0; i--) {
+-		if (r300->rmm->u_list[i].ptr == NULL) {
+-			continue;
+-		}
+-
+-		/* check whether this buffer is still in use */
+-		if (r300->rmm->u_list[i].pending) {
+-			in_use++;
+-		}
+-	}
+-	/* Cannot flush/lock if no context exists. */
+-	if (in_use)
+-		r300FlushCmdBuf(r300, __FUNCTION__);
+-
+-	done_age = radeonGetAge((radeonContextPtr) r300);
+-
+-	for (i = r300->rmm->u_last; i > 0; i--) {
+-		if (r300->rmm->u_list[i].ptr == NULL) {
+-			continue;
+-		}
+-
+-		/* check whether this buffer is still in use */
+-		if (!r300->rmm->u_list[i].pending) {
+-			continue;
+-		}
+-
+-		assert(r300->rmm->u_list[i].h_pending == 0);
+-
+-		tries = 0;
+-		while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) {
+-			usleep(10);
+-			done_age = radeonGetAge((radeonContextPtr) r300);
+-		}
+-		if (tries >= 1000) {
+-			WARN_ONCE("Failed to idle region!");
+-		}
+-
+-		memfree.region_offset = (char *)r300->rmm->u_list[i].ptr -
+-		    (char *)r300->radeon.radeonScreen->gartTextures.map;
+-
+-		ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd,
+-				      DRM_RADEON_FREE, &memfree,
+-				      sizeof(memfree));
+-		if (ret) {
+-			fprintf(stderr, "Failed to free at %p\nret = %s\n",
+-				r300->rmm->u_list[i].ptr, strerror(-ret));
+-		} else {
+-			if (i == r300->rmm->u_last)
+-				r300->rmm->u_last--;
+-
+-			r300->rmm->u_list[i].pending = 0;
+-			r300->rmm->u_list[i].ptr = NULL;
+-		}
+-	}
+-	r300->rmm->u_head = i;
+-#endif				/* USER_BUFFERS */
+-}
+-
+ /* Destroy the device specific context.
+  */
+ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+@@ -502,55 +475,27 @@ void r300DestroyContext(__DRIcontextPrivate * driContextPriv)
+ 	assert(r300);		/* should never be null */
+ 
+ 	if (r300) {
+-		GLboolean release_texture_heaps;
+-
+-		release_texture_heaps =
+-		    (r300->radeon.glCtx->Shared->RefCount == 1);
+ 		_swsetup_DestroyContext(r300->radeon.glCtx);
+ 		_tnl_DestroyContext(r300->radeon.glCtx);
+ 		_vbo_DestroyContext(r300->radeon.glCtx);
+ 		_swrast_DestroyContext(r300->radeon.glCtx);
+ 
+-		if (r300->dma.current.buf) {
+-			r300ReleaseDmaRegion(r300, &r300->dma.current,
+-					     __FUNCTION__);
+-#ifndef USER_BUFFERS
+-			r300FlushCmdBuf(r300, __FUNCTION__);
+-#endif
+-		}
+-		r300FreeGartAllocations(r300);
+-		r300DestroyCmdBuf(r300);
++		rcommonFlushCmdBuf(&r300->radeon, __FUNCTION__);
+ 
+ 		if (radeon->state.scissor.pClipRects) {
+ 			FREE(radeon->state.scissor.pClipRects);
+ 			radeon->state.scissor.pClipRects = NULL;
+ 		}
+ 
+-		if (release_texture_heaps) {
+-			/* This share group is about to go away, free our private
+-			 * texture object data.
+-			 */
+-			int i;
+-
+-			for (i = 0; i < r300->nr_heaps; i++) {
+-				driDestroyTextureHeap(r300->texture_heaps[i]);
+-				r300->texture_heaps[i] = NULL;
+-			}
+-
+-			assert(is_empty_list(&r300->swapped));
+-		}
++		r300DestroyCmdBuf(r300);
+ 
+ 		radeonCleanupContext(&r300->radeon);
+ 
+-#ifdef USER_BUFFERS
++
+ 		/* the memory manager might be accessed when Mesa frees the shared
+ 		 * state, so don't destroy it earlier
+ 		 */
+-		r300_mem_destroy(r300);
+-#endif
+ 
+-		/* free the option cache */
+-		driDestroyOptionCache(&r300->radeon.optionCache);
+ 
+ 		FREE(r300);
+ 	}
+diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
+index c15e9fa..6d34727 100644
+--- a/src/mesa/drivers/dri/r300/r300_context.h
++++ b/src/mesa/drivers/dri/r300/r300_context.h
+@@ -42,21 +42,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_drm.h"
+ #include "dri_util.h"
+ #include "texmem.h"
++#include "radeon_common.h"
+ 
+ #include "main/macros.h"
+ #include "main/mtypes.h"
+ #include "main/colormac.h"
+ 
+-#define USER_BUFFERS
+-
+ struct r300_context;
+ typedef struct r300_context r300ContextRec;
+ typedef struct r300_context *r300ContextPtr;
+ 
+-#include "radeon_lock.h"
++
+ #include "main/mm.h"
+ 
+-/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
++/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+    I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+    with other compilers ... GLUE!
+ */
+@@ -75,174 +74,19 @@ typedef struct r300_context *r300ContextPtr;
+ #include "r300_vertprog.h"
+ #include "r500_fragprog.h"
+ 
+-/**
+- * This function takes a float and packs it into a uint32_t
+- */
+-static INLINE uint32_t r300PackFloat32(float fl)
+-{
+-	union {
+-		float fl;
+-		uint32_t u;
+-	} u;
+-
+-	u.fl = fl;
+-	return u.u;
+-}
+-
+-/* This is probably wrong for some values, I need to test this
+- * some more.  Range checking would be a good idea also..
+- *
+- * But it works for most things.  I'll fix it later if someone
+- * else with a better clue doesn't
+- */
+-static INLINE uint32_t r300PackFloat24(float f)
+-{
+-	float mantissa;
+-	int exponent;
+-	uint32_t float24 = 0;
+-
+-	if (f == 0.0)
+-		return 0;
+ 
+-	mantissa = frexpf(f, &exponent);
+-
+-	/* Handle -ve */
+-	if (mantissa < 0) {
+-		float24 |= (1 << 23);
+-		mantissa = mantissa * -1.0;
+-	}
+-	/* Handle exponent, bias of 63 */
+-	exponent += 62;
+-	float24 |= (exponent << 16);
+-	/* Kill 7 LSB of mantissa */
+-	float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
+-
+-	return float24;
+-}
+ 
+ /************ DMA BUFFERS **************/
+ 
+-/* Need refcounting on dma buffers:
+- */
+-struct r300_dma_buffer {
+-	int refcount;		/**< the number of retained regions in buf */
+-	drmBufPtr buf;
+-	int id;
+-};
+-#undef GET_START
+-#ifdef USER_BUFFERS
+-#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start))
+-#else
+-#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
+-			(rvb)->address - rmesa->dma.buf0_address +	\
+-			(rvb)->start)
+-#endif
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct r300_dma_region {
+-	struct r300_dma_buffer *buf;
+-	char *address;		/* == buf->address */
+-	int start, end, ptr;	/* offsets from start of buf */
+-
+-	int aos_offset;		/* address in GART memory */
+-	int aos_stride;		/* distance between elements, in dwords */
+-	int aos_size;		/* number of components (1-4) */
+-};
+-
+-struct r300_dma {
+-	/* Active dma region.  Allocations for vertices and retained
+-	 * regions come from here.  Also used for emitting random vertices,
+-	 * these may be flushed by calling flush_current();
+-	 */
+-	struct r300_dma_region current;
+-
+-	void (*flush) (r300ContextPtr);
+-
+-	char *buf0_address;	/* start of buf[0], for index calcs */
+-
+-	/* Number of "in-flight" DMA buffers, i.e. the number of buffers
+-	 * for which a DISCARD command is currently queued in the command buffer.
+-	 */
+-	GLuint nr_released_bufs;
+-};
+-
+-       /* Texture related */
+-
+-typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct r300_tex_obj {
+-	driTextureObject base;
+-
+-	GLuint bufAddr;		/* Offset to start of locally
+-				   shared texture block */
+-
+-	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+-	/* Six, for the cube faces */
+-
+-	GLboolean image_override;	/* Image overridden by GLX_EXT_tfp */
+-
+-	GLuint pitch;		/* this isn't sent to hardware just used in calculations */
+-	/* hardware register values */
+-	/* Note that R200 has 8 registers per texture and R300 only 7 */
+-	GLuint filter;
+-	GLuint filter_1;
+-	GLuint pitch_reg;
+-	GLuint size;		/* npot only */
+-	GLuint format;
+-	GLuint offset;		/* Image location in the card's address space.
+-				   All cube faces follow. */
+-	GLuint unknown4;
+-	GLuint unknown5;
+-	/* end hardware registers */
+-
+-	/* registers computed by r200 code - keep them here to
+-	   compare against what is actually written.
+-
+-	   to be removed later.. */
+-	GLuint pp_border_color;
+-	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
+-	GLuint format_x;
+-
+-	GLboolean border_fallback;
+-
+-	GLuint tile_bits;	/* hw texture tile bits used on this texture */
+-};
+-
+-struct r300_texture_env_state {
+-	r300TexObjPtr texobj;
+-	GLenum format;
+-	GLenum envMode;
+-};
+-
+ /* The blit width for texture uploads
+  */
+ #define R300_BLIT_WIDTH_BYTES 1024
+ #define R300_MAX_TEXTURE_UNITS 8
+ 
+ struct r300_texture_state {
+-	struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS];
+ 	int tc_count;		/* number of incoming texture coordinates from VAP */
+ };
+ 
+-/**
+- * A block of hardware state.
+- *
+- * When check returns non-zero, the returned number of dwords must be
+- * copied verbatim into the command buffer in order to update a state atom
+- * when it is dirty.
+- */
+-struct r300_state_atom {
+-	struct r300_state_atom *next, *prev;
+-	const char *name;	/* for debug */
+-	int cmd_size;		/* maximum size in dwords */
+-	GLuint idx;		/* index in an array (e.g. textures) */
+-	uint32_t *cmd;
+-	GLboolean dirty;
+-
+-	int (*check) (r300ContextPtr, struct r300_state_atom * atom);
+-};
+ 
+ #define R300_VPT_CMD_0		0
+ #define R300_VPT_XSCALE		1
+@@ -459,124 +303,98 @@ struct r300_state_atom {
+  * Cache for hardware register state.
+  */
+ struct r300_hw_state {
+-	struct r300_state_atom atomlist;
+-
+-	GLboolean is_dirty;
+-	GLboolean all_dirty;
+-	int max_state_size;	/* in dwords */
+-
+-	struct r300_state_atom vpt;	/* viewport (1D98) */
+-	struct r300_state_atom vap_cntl;
+-        struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */
+-	struct r300_state_atom vof;	/* VAP output format register 0x2090 */
+-	struct r300_state_atom vte;	/* (20B0) */
+-	struct r300_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
+-	struct r300_state_atom vap_cntl_status;
+-	struct r300_state_atom vir[2];	/* vap input route (2150/21E0) */
+-	struct r300_state_atom vic;	/* vap input control (2180) */
+-	struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
+-	struct r300_state_atom vap_clip_cntl;
+-	struct r300_state_atom vap_clip;
+-	struct r300_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
+-	struct r300_state_atom pvs;	/* pvs_cntl (22D0) */
+-	struct r300_state_atom gb_enable;	/* (4008) */
+-	struct r300_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
+-	struct r300_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+-	struct r300_state_atom ga_triangle_stipple;	/* (4214) */
+-	struct r300_state_atom ps;	/* pointsize (421C) */
+-	struct r300_state_atom ga_point_minmax;	/* (4230) */
+-	struct r300_state_atom lcntl;	/* line control */
+-	struct r300_state_atom ga_line_stipple;	/* (4260) */
+-	struct r300_state_atom shade;
+-	struct r300_state_atom polygon_mode;
+-	struct r300_state_atom fogp;	/* fog parameters (4294) */
+-	struct r300_state_atom ga_soft_reset;	/* (429C) */
+-	struct r300_state_atom zbias_cntl;
+-	struct r300_state_atom zbs;	/* zbias (42A4) */
+-	struct r300_state_atom occlusion_cntl;
+-	struct r300_state_atom cul;	/* cull cntl (42B8) */
+-	struct r300_state_atom su_depth_scale;	/* (42C0) */
+-	struct r300_state_atom rc;	/* rs control (4300) */
+-	struct r300_state_atom ri;	/* rs interpolators (4310) */
+-	struct r300_state_atom rr;	/* rs route (4330) */
+-	struct r300_state_atom sc_hyperz;	/* (43A4) */
+-	struct r300_state_atom sc_screendoor;	/* (43E8) */
+-	struct r300_state_atom fp;	/* fragment program cntl + nodes (4600) */
+-	struct r300_state_atom fpt;	/* texi - (4620) */
+-	struct r300_state_atom us_out_fmt;	/* (46A4) */
+-	struct r300_state_atom r500fp;	/* r500 fp instructions */
+-	struct r300_state_atom r500fp_const;	/* r500 fp constants */
+-	struct r300_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
+-	struct r300_state_atom fogs;	/* fog state (4BC0) */
+-	struct r300_state_atom fogc;	/* fog color (4BC8) */
+-	struct r300_state_atom at;	/* alpha test (4BD4) */
+-	struct r300_state_atom fg_depth_src;	/* (4BD8) */
+-	struct r300_state_atom fpp;	/* 0x4C00 and following */
+-	struct r300_state_atom rb3d_cctl;	/* (4E00) */
+-	struct r300_state_atom bld;	/* blending (4E04) */
+-	struct r300_state_atom cmk;	/* colormask (4E0C) */
+-	struct r300_state_atom blend_color;	/* constant blend color */
+-	struct r300_state_atom rop;	/* ropcntl */
+-	struct r300_state_atom cb;	/* colorbuffer (4E28) */
+-	struct r300_state_atom rb3d_dither_ctl;	/* (4E50) */
+-	struct r300_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
+-	struct r300_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
+-	struct r300_state_atom zs;	/* zstencil control (4F00) */
+-	struct r300_state_atom zstencil_format;
+-	struct r300_state_atom zb;	/* z buffer (4F20) */
+-	struct r300_state_atom zb_depthclearvalue;	/* (4F28) */
+-	struct r300_state_atom unk4F30;	/* (4F30) */
+-	struct r300_state_atom zb_hiz_offset;	/* (4F44) */
+-	struct r300_state_atom zb_hiz_pitch;	/* (4F54) */
+-
+-	struct r300_state_atom vpi;	/* vp instructions */
+-	struct r300_state_atom vpp;	/* vp parameters */
+-	struct r300_state_atom vps;	/* vertex point size (?) */
+-	struct r300_state_atom vpucp[6];	/* vp user clip plane - 6 */
++	struct radeon_state_atom vpt;	/* viewport (1D98) */
++	struct radeon_state_atom vap_cntl;
++        struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
++	struct radeon_state_atom vof;	/* VAP output format register 0x2090 */
++	struct radeon_state_atom vte;	/* (20B0) */
++	struct radeon_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
++	struct radeon_state_atom vap_cntl_status;
++	struct radeon_state_atom vir[2];	/* vap input route (2150/21E0) */
++	struct radeon_state_atom vic;	/* vap input control (2180) */
++	struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
++	struct radeon_state_atom vap_clip_cntl;
++	struct radeon_state_atom vap_clip;
++	struct radeon_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
++	struct radeon_state_atom pvs;	/* pvs_cntl (22D0) */
++	struct radeon_state_atom gb_enable;	/* (4008) */
++	struct radeon_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
++	struct radeon_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
++	struct radeon_state_atom ga_triangle_stipple;	/* (4214) */
++	struct radeon_state_atom ps;	/* pointsize (421C) */
++	struct radeon_state_atom ga_point_minmax;	/* (4230) */
++	struct radeon_state_atom lcntl;	/* line control */
++	struct radeon_state_atom ga_line_stipple;	/* (4260) */
++	struct radeon_state_atom shade;
++	struct radeon_state_atom polygon_mode;
++	struct radeon_state_atom fogp;	/* fog parameters (4294) */
++	struct radeon_state_atom ga_soft_reset;	/* (429C) */
++	struct radeon_state_atom zbias_cntl;
++	struct radeon_state_atom zbs;	/* zbias (42A4) */
++	struct radeon_state_atom occlusion_cntl;
++	struct radeon_state_atom cul;	/* cull cntl (42B8) */
++	struct radeon_state_atom su_depth_scale;	/* (42C0) */
++	struct radeon_state_atom rc;	/* rs control (4300) */
++	struct radeon_state_atom ri;	/* rs interpolators (4310) */
++	struct radeon_state_atom rr;	/* rs route (4330) */
++	struct radeon_state_atom sc_hyperz;	/* (43A4) */
++	struct radeon_state_atom sc_screendoor;	/* (43E8) */
++	struct radeon_state_atom fp;	/* fragment program cntl + nodes (4600) */
++	struct radeon_state_atom fpt;	/* texi - (4620) */
++	struct radeon_state_atom us_out_fmt;	/* (46A4) */
++	struct radeon_state_atom r500fp;	/* r500 fp instructions */
++	struct radeon_state_atom r500fp_const;	/* r500 fp constants */
++	struct radeon_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
++	struct radeon_state_atom fogs;	/* fog state (4BC0) */
++	struct radeon_state_atom fogc;	/* fog color (4BC8) */
++	struct radeon_state_atom at;	/* alpha test (4BD4) */
++	struct radeon_state_atom fg_depth_src;	/* (4BD8) */
++	struct radeon_state_atom fpp;	/* 0x4C00 and following */
++	struct radeon_state_atom rb3d_cctl;	/* (4E00) */
++	struct radeon_state_atom bld;	/* blending (4E04) */
++	struct radeon_state_atom cmk;	/* colormask (4E0C) */
++	struct radeon_state_atom blend_color;	/* constant blend color */
++	struct radeon_state_atom rop;	/* ropcntl */
++	struct radeon_state_atom cb;	/* colorbuffer (4E28) */
++	struct radeon_state_atom rb3d_dither_ctl;	/* (4E50) */
++	struct radeon_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
++	struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
++	struct radeon_state_atom zs;	/* zstencil control (4F00) */
++	struct radeon_state_atom zstencil_format;
++	struct radeon_state_atom zb;	/* z buffer (4F20) */
++	struct radeon_state_atom zb_depthclearvalue;	/* (4F28) */
++	struct radeon_state_atom unk4F30;	/* (4F30) */
++	struct radeon_state_atom zb_hiz_offset;	/* (4F44) */
++	struct radeon_state_atom zb_hiz_pitch;	/* (4F54) */
++
++	struct radeon_state_atom vpi;	/* vp instructions */
++	struct radeon_state_atom vpp;	/* vp parameters */
++	struct radeon_state_atom vps;	/* vertex point size (?) */
++	struct radeon_state_atom vpucp[6];	/* vp user clip plane - 6 */
+ 	/* 8 texture units */
+ 	/* the state is grouped by function and not by
+ 	   texture unit. This makes single unit updates
+ 	   really awkward - we are much better off
+ 	   updating the whole thing at once */
+ 	struct {
+-		struct r300_state_atom filter;
+-		struct r300_state_atom filter_1;
+-		struct r300_state_atom size;
+-		struct r300_state_atom format;
+-		struct r300_state_atom pitch;
+-		struct r300_state_atom offset;
+-		struct r300_state_atom chroma_key;
+-		struct r300_state_atom border_color;
++		struct radeon_state_atom filter;
++		struct radeon_state_atom filter_1;
++		struct radeon_state_atom size;
++		struct radeon_state_atom format;
++		struct radeon_state_atom pitch;
++		struct radeon_state_atom offset;
++		struct radeon_state_atom chroma_key;
++		struct radeon_state_atom border_color;
+ 	} tex;
+-	struct r300_state_atom txe;	/* tex enable (4104) */
+-};
++	struct radeon_state_atom txe;	/* tex enable (4104) */
+ 
+-/**
+- * This structure holds the command buffer while it is being constructed.
+- *
+- * The first batch of commands in the buffer is always the state that needs
+- * to be re-emitted when the context is lost. This batch can be skipped
+- * otherwise.
+- */
+-struct r300_cmdbuf {
+-	int size;		/* DWORDs allocated for buffer */
+-	uint32_t *cmd_buf;
+-	int count_used;		/* DWORDs filled so far */
+-	int count_reemit;	/* size of re-emission batch */
++	radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
+ };
+ 
+ /**
+  * State cache
+  */
+ 
+-struct r300_depthbuffer_state {
+-	GLfloat scale;
+-};
+-
+-struct r300_stencilbuffer_state {
+-	GLboolean hw_stencil;
+-};
+-
+ /* Vertex shader state */
+ 
+ /* Perhaps more if we store programs in vmem? */
+@@ -812,22 +630,18 @@ struct r500_fragment_program {
+ #define REG_TEX0	2
+ 
+ struct r300_state {
+-	struct r300_depthbuffer_state depth;
+ 	struct r300_texture_state texture;
+ 	int sw_tcl_inputs[VERT_ATTRIB_MAX];
+ 	struct r300_vertex_shader_state vertex_shader;
+-	struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
++	struct radeon_aos aos[R300_MAX_AOS_ARRAYS];
+ 	int aos_count;
+ 
+-	GLuint *Elts;
+-	struct r300_dma_region elt_dma;
++	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
++	int elt_dma_offset; /** Offset into this buffer object, in bytes */
+ 
+-	struct r300_dma_region swtcl_dma;
+ 	DECLARE_RENDERINPUTS(render_inputs_bitset);	/* actual render inputs that R300 was configured for.
+ 							   They are the same as tnl->render_inputs for fixed pipeline */
+ 
+-	struct r300_stencilbuffer_state stencil;
+-
+ };
+ 
+ #define R300_FALLBACK_NONE 0
+@@ -837,41 +651,7 @@ struct r300_state {
+ /* r300_swtcl.c
+  */
+ struct r300_swtcl_info {
+-   GLuint RenderIndex;
+-
+-   /**
+-    * Size of a hardware vertex.  This is calculated when \c ::vertex_attrs is
+-    * installed in the Mesa state vector.
+-    */
+-   GLuint vertex_size;
+-
+-   /**
+-    * Attributes instructing the Mesa TCL pipeline where / how to put vertex
+-    * data in the hardware buffer.
+-    */
+-   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+-
+-   /**
+-    * Number of elements of \c ::vertex_attrs that are actually used.
+-    */
+-   GLuint vertex_attr_count;
+-
+-   /**
+-    * Cached pointer to the buffer where Mesa will store vertex data.
+-    */
+-   GLubyte *verts;
+-
+-   /* Fallback rasterization functions
+-    */
+-  //   r200_point_func draw_point;
+-  //   r200_line_func draw_line;
+-  //   r200_tri_func draw_tri;
+-
+-   GLuint hw_primitive;
+-   GLenum render_primitive;
+-   GLuint numverts;
+-
+-   /**
++  /*
+     * Offset of the 4UB color data within a hardware (swtcl) vertex.
+     */
+    GLuint coloroffset;
+@@ -880,13 +660,6 @@ struct r300_swtcl_info {
+     * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+     */
+    GLuint specoffset;
+-
+-   /**
+-    * Should Mesa project vertex data or will the hardware do it?
+-    */
+-   GLboolean needproj;
+-
+-   struct r300_dma_region indexed_verts;
+ };
+ 
+ 
+@@ -897,33 +670,13 @@ struct r300_context {
+ 	struct radeon_context radeon;	/* parent class, must be first */
+ 
+ 	struct r300_hw_state hw;
+-	struct r300_cmdbuf cmdbuf;
++
+ 	struct r300_state state;
+ 	struct gl_vertex_program *curr_vp;
+ 	struct r300_vertex_program *selected_vp;
+ 
+ 	/* Vertex buffers
+ 	 */
+-	struct r300_dma dma;
+-	GLboolean save_on_next_unlock;
+-	GLuint NewGLState;
+-
+-	/* Texture object bookkeeping
+-	 */
+-	unsigned nr_heaps;
+-	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+-	driTextureObject swapped;
+-	int texture_depth;
+-	float initialMaxAnisotropy;
+-
+-	/* Clientdata textures;
+-	 */
+-	GLuint prefer_gart_client_texturing;
+-
+-#ifdef USER_BUFFERS
+-	struct r300_memory_manager *rmm;
+-#endif
+-
+ 	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ 	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+ 
+@@ -956,4 +709,7 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx,
+ #define RADEON_D_PLAYBACK_RAW 2
+ #define RADEON_D_T 3
+ 
++#define r300PackFloat32 radeonPackFloat32
++#define r300PackFloat24 radeonPackFloat24
++
+ #endif				/* __R300_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
+index 80bd338..1512e90 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.c
++++ b/src/mesa/drivers/dri/r300/r300_emit.c
+@@ -46,14 +46,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_context.h"
+ 
+ #include "r300_context.h"
+-#include "radeon_ioctl.h"
+ #include "r300_state.h"
+ #include "r300_emit.h"
+ #include "r300_ioctl.h"
+ 
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+ 
+ #if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+     SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+@@ -66,147 +62,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define DEBUG_ALL DEBUG_VERTS
+ 
+-#if defined(USE_X86_ASM)
+-#define COPY_DWORDS( dst, src, nr )					\
+-do {									\
+-	int __tmp;							\
+-	__asm__ __volatile__( "rep ; movsl"				\
+-			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+-			      : "0" (nr),				\
+-			        "D" ((long)dst),			\
+-			        "S" ((long)src) );			\
+-} while (0)
+-#else
+-#define COPY_DWORDS( dst, src, nr )		\
+-do {						\
+-   int j;					\
+-   for ( j = 0 ; j < nr ; j++ )			\
+-      dst[j] = ((int *)src)[j];			\
+-   dst += nr;					\
+-} while (0)
+-#endif
+-
+-static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb,
+-			 GLvoid * data, int stride, int count)
+-{
+-	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+-
+-	if (RADEON_DEBUG & DEBUG_VERTS)
+-		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-			__FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-	if (stride == 4)
+-		COPY_DWORDS(out, data, count);
+-	else
+-		for (i = 0; i < count; i++) {
+-			out[0] = *(int *)data;
+-			out++;
+-			data += stride;
+-		}
+-}
+-
+-static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb,
+-			 GLvoid * data, int stride, int count)
+-{
+-	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+-
+-	if (RADEON_DEBUG & DEBUG_VERTS)
+-		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-			__FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-	if (stride == 8)
+-		COPY_DWORDS(out, data, count * 2);
+-	else
+-		for (i = 0; i < count; i++) {
+-			out[0] = *(int *)data;
+-			out[1] = *(int *)(data + 4);
+-			out += 2;
+-			data += stride;
+-		}
+-}
+-
+-static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb,
+-			  GLvoid * data, int stride, int count)
+-{
+-	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+-
+-	if (RADEON_DEBUG & DEBUG_VERTS)
+-		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-			__FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-	if (stride == 12)
+-		COPY_DWORDS(out, data, count * 3);
+-	else
+-		for (i = 0; i < count; i++) {
+-			out[0] = *(int *)data;
+-			out[1] = *(int *)(data + 4);
+-			out[2] = *(int *)(data + 8);
+-			out += 3;
+-			data += stride;
+-		}
+-}
+-
+-static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb,
+-			  GLvoid * data, int stride, int count)
+-{
+-	int i;
+-	int *out = (int *)(rvb->address + rvb->start);
+-
+-	if (RADEON_DEBUG & DEBUG_VERTS)
+-		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-			__FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-	if (stride == 16)
+-		COPY_DWORDS(out, data, count * 4);
+-	else
+-		for (i = 0; i < count; i++) {
+-			out[0] = *(int *)data;
+-			out[1] = *(int *)(data + 4);
+-			out[2] = *(int *)(data + 8);
+-			out[3] = *(int *)(data + 12);
+-			out += 4;
+-			data += stride;
+-		}
+-}
+-
+-static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb,
+-			GLvoid * data, int size, int stride, int count)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+-	if (stride == 0) {
+-		r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+-		count = 1;
+-		rvb->aos_offset = GET_START(rvb);
+-		rvb->aos_stride = 0;
+-	} else {
+-		r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);
+-		rvb->aos_offset = GET_START(rvb);
+-		rvb->aos_stride = size;
+-	}
+-
+-	switch (size) {
+-	case 1:
+-		r300EmitVec4(ctx, rvb, data, stride, count);
+-		break;
+-	case 2:
+-		r300EmitVec8(ctx, rvb, data, stride, count);
+-		break;
+-	case 3:
+-		r300EmitVec12(ctx, rvb, data, stride, count);
+-		break;
+-	case 4:
+-		r300EmitVec16(ctx, rvb, data, stride, count);
+-		break;
+-	default:
+-		assert(0);
+-		break;
+-	}
+-}
+-
+ #define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) |	\
+ 		    (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
+ 
+@@ -314,10 +169,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
+ 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
+ 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+ 
+-#if 0
+-	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
+-#endif
+-
+ 	if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ 		ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ 
+@@ -371,7 +222,6 @@ int r300EmitArrays(GLcontext * ctx)
+ 
+ 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
+ 		assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
+-		//assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0));
+ 
+ 		if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
+ 			InputsRead |= 1 << VERT_ATTRIB_POS;
+@@ -433,7 +283,7 @@ int r300EmitArrays(GLcontext * ctx)
+ 	}
+ 
+ 	for (i = 0; i < nr; i++) {
+-		int ci, fix, found = 0;
++		int ci;
+ 
+ 		swizzle[i][0] = SWIZZLE_ZERO;
+ 		swizzle[i][1] = SWIZZLE_ZERO;
+@@ -443,61 +293,35 @@ int r300EmitArrays(GLcontext * ctx)
+ 		for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
+ 			swizzle[i][ci] = ci;
+ 		}
+-
+-		if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) {
+-			if (vb->AttribPtr[tab[i]]->stride % 4) {
+-				return R300_FALLBACK_TCL;
+-			}
+-			rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data);
+-			rmesa->state.aos[i].start = 0;
+-			rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data);
+-			rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4;
+-			rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+-		} else {
+-			r300EmitVec(ctx, &rmesa->state.aos[i],
++		rcommon_emit_vector(ctx, &rmesa->state.aos[i],
+ 				    vb->AttribPtr[tab[i]]->data,
+ 				    vb->AttribPtr[tab[i]]->size,
+ 				    vb->AttribPtr[tab[i]]->stride, count);
+-		}
+-
+-		rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size;
+-
+-		for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) {
+-			if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) {
+-				continue;
+-			}
+-			found = 1;
+-			break;
+-		}
+-
+-		if (found) {
+-			if (fix > 0) {
+-				WARN_ONCE("Feeling lucky?\n");
+-			}
+-			rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix;
+-			for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
+-				swizzle[i][ci] += fix;
+-			}
+-		} else {
+-			WARN_ONCE
+-			    ("Cannot handle offset %x with stride %d, comp %d\n",
+-			     rmesa->state.aos[i].aos_offset,
+-			     rmesa->state.aos[i].aos_stride,
+-			     vb->AttribPtr[tab[i]]->size);
+-			return R300_FALLBACK_TCL;
+-		}
+ 	}
+ 
+ 	/* Setup INPUT_ROUTE. */
+-	R300_STATECHANGE(rmesa, vir[0]);
+-	((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
+-	    r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
+-			       vb->AttribPtr, inputs, tab, nr);
+-	R300_STATECHANGE(rmesa, vir[1]);
+-	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+-	    r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+-			       nr);
+-
++	if (rmesa->radeon.radeonScreen->kernel_mm) {
++		R300_STATECHANGE(rmesa, vir[0]);
++		rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
++		rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
++		rmesa->hw.vir[0].cmd[0] |=
++			(r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
++					    vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
++		R300_STATECHANGE(rmesa, vir[1]);
++		rmesa->hw.vir[1].cmd[0] |=
++			(r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
++					    nr) & 0x3FFF) << 16;
++	} else {
++		R300_STATECHANGE(rmesa, vir[0]);
++		((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
++			r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
++					   vb->AttribPtr, inputs, tab, nr);
++		R300_STATECHANGE(rmesa, vir[1]);
++		((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
++			r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
++					   nr);
++	}
++	
+ 	/* Setup INPUT_CNTL. */
+ 	R300_STATECHANGE(rmesa, vic);
+ 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+@@ -515,45 +339,34 @@ int r300EmitArrays(GLcontext * ctx)
+ 	return R300_FALLBACK_NONE;
+ }
+ 
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	int i;
+-
+-	if (rmesa->state.elt_dma.buf)
+-		r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id);
+-
+-	for (i = 0; i < rmesa->state.aos_count; i++) {
+-		if (rmesa->state.aos[i].buf)
+-			r300_mem_use(rmesa, rmesa->state.aos[i].buf->id);
+-	}
+-}
+-#endif
+-
+ void r300ReleaseArrays(GLcontext * ctx)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	int i;
+ 
+-	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
++	if (rmesa->state.elt_dma_bo) {
++		radeon_bo_unref(rmesa->state.elt_dma_bo);
++		rmesa->state.elt_dma_bo = NULL;
++	}
+ 	for (i = 0; i < rmesa->state.aos_count; i++) {
+-		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
++		if (rmesa->state.aos[i].bo) {
++			radeon_bo_unref(rmesa->state.aos[i].bo);
++			rmesa->state.aos[i].bo = NULL;
++		}
+ 	}
+ }
+ 
+ void r300EmitCacheFlush(r300ContextPtr rmesa)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-
+-	drm_radeon_cmd_header_t *cmd = NULL;
+-
+-	reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
+-	e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+-	    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+-
+-	reg_start(R300_ZB_ZCACHE_CTLSTAT, 0);
+-	e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+-	    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++	BATCH_LOCALS(&rmesa->radeon);
++
++	BEGIN_BATCH(4);
++	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
++		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
++		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
++	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
++		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
++		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
++	END_BATCH();
++	COMMIT_BATCH();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
+index 89d7383..6bc8f8e 100644
+--- a/src/mesa/drivers/dri/r300/r300_emit.h
++++ b/src/mesa/drivers/dri/r300/r300_emit.h
+@@ -44,28 +44,31 @@
+ #include "r300_cmdbuf.h"
+ #include "radeon_reg.h"
+ 
+-/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up
+- * with DRM */
+-#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+-#define CP_PACKET3( pkt, n )						\
+-	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+-
+-static INLINE uint32_t cmdpacket0(int reg, int count)
++static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
++                                  int reg, int count)
+ {
+-	drm_r300_cmd_header_t cmd;
+-
+-	cmd.packet0.cmd_type = R300_CMD_PACKET0;
+-	cmd.packet0.count = count;
+-	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+-	cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+-
+-	return cmd.u;
++    if (!rscrn->kernel_mm) {
++	    drm_r300_cmd_header_t cmd;
++
++	cmd.u = 0;
++    	cmd.packet0.cmd_type = R300_CMD_PACKET0;
++	    cmd.packet0.count = count;
++    	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
++	    cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
++
++    	return cmd.u;
++    }
++    if (count) {
++        return CP_PACKET0(reg, count - 1);
++    }
++    return CP_PACKET2;
+ }
+ 
+-static INLINE uint32_t cmdvpu(int addr, int count)
++static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
+ 	cmd.vpu.cmd_type = R300_CMD_VPU;
+ 	cmd.vpu.count = count;
+ 	cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
+@@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count)
+ 	return cmd.u;
+ }
+ 
+-static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
++static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
++                                 int addr, int count, int type, int clamp)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
+ 	cmd.r500fp.cmd_type = R300_CMD_R500FP;
+ 	cmd.r500fp.count = count;
+ 	cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
+@@ -88,169 +93,131 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp)
+ 	return cmd.u;
+ }
+ 
+-static INLINE uint32_t cmdpacket3(int packet)
++static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
+ 	cmd.packet3.cmd_type = R300_CMD_PACKET3;
+ 	cmd.packet3.packet = packet;
+ 
+ 	return cmd.u;
+ }
+ 
+-static INLINE uint32_t cmdcpdelay(unsigned short count)
++static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,  
++                                  unsigned short count)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
++
+ 	cmd.delay.cmd_type = R300_CMD_CP_DELAY;
+ 	cmd.delay.count = count;
+ 
+ 	return cmd.u;
+ }
+ 
+-static INLINE uint32_t cmdwait(unsigned char flags)
++static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
++                               unsigned char flags)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
+ 	cmd.wait.cmd_type = R300_CMD_WAIT;
+ 	cmd.wait.flags = flags;
+ 
+ 	return cmd.u;
+ }
+ 
+-static INLINE uint32_t cmdpacify(void)
++static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
+ {
+ 	drm_r300_cmd_header_t cmd;
+ 
++	cmd.u = 0;
+ 	cmd.header.cmd_type = R300_CMD_END3D;
+ 
+ 	return cmd.u;
+ }
+ 
+ /**
+- * Prepare to write a register value to register at address reg.
+- * If num_extra > 0 then the following extra values are written
+- * to registers with address +4, +8 and so on..
+- */
+-#define reg_start(reg, num_extra)					\
+-	do {								\
+-		int _n;							\
+-		_n=(num_extra);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+2),				\
+-					__FUNCTION__);			\
+-		cmd_reserved=_n+2;					\
+-		cmd_written=1;						\
+-		cmd[0].i=cmdpacket0((reg), _n+1);			\
+-	} while (0);
+-
+-/**
+- * Emit GLuint freestyle
++ * Write the header of a packet3 to the command buffer.
++ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
+  */
+-#define e32(dword)							\
+-	do {								\
+-		if(cmd_written<cmd_reserved) {				\
+-			cmd[cmd_written].i=(dword);			\
+-			cmd_written++;					\
+-		} else {						\
+-			fprintf(stderr,					\
+-				"e32 but no previous packet "		\
+-				"declaration.\n"			\
+-				"Aborting! in %s::%s at line %d, "	\
+-				"cmd_written=%d cmd_reserved=%d\n",	\
+-				__FILE__, __FUNCTION__, __LINE__,	\
+-				cmd_written, cmd_reserved);		\
+-			_mesa_exit(-1);					\
+-		}							\
++#define OUT_BATCH_PACKET3(packet, num_extra) do {\
++    if (!b_l_rmesa->radeonScreen->kernel_mm) {		\
++    	OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
++                  R300_CMD_PACKET3_RAW)); \
++    } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
++	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ 	} while(0)
+ 
+-#define	efloat(f) e32(r300PackFloat32(f))
+-
+-#define vsf_start_fragment(dest, length)				\
+-	do {								\
+-		int _n;							\
+-		_n = (length);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+1),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+2;					\
+-		cmd_written =1;						\
+-		cmd[0].i = cmdvpu((dest), _n/4);			\
+-	} while (0);
+-
+-#define r500fp_start_fragment(dest, length)				\
+-	do {								\
+-		int _n;							\
+-		_n = (length);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+1),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+1;					\
+-		cmd_written =1;						\
+-		cmd[0].i = cmdr500fp((dest), _n/6, 0, 0);		\
+-	} while (0);
+-
+-#define start_packet3(packet, count)					\
+-	{								\
+-		int _n;							\
+-		GLuint _p;						\
+-		_n = (count);						\
+-		_p = (packet);						\
+-		cmd = (drm_radeon_cmd_header_t*)			\
+-			r300AllocCmdBuf(rmesa,				\
+-					(_n+3),				\
+-					__FUNCTION__);			\
+-		cmd_reserved = _n+3;					\
+-		cmd_written = 2;					\
+-		if(_n > 0x3fff) {					\
+-			fprintf(stderr,"Too big packet3 %08x: cannot "	\
+-				"store %d dwords\n",			\
+-				_p, _n);				\
+-			_mesa_exit(-1);					\
+-		}							\
+-		cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW);		\
+-		cmd[1].i = _p | ((_n & 0x3fff)<<16);			\
+-	}
+-
+ /**
+  * Must be sent to switch to 2d commands
+  */
+-void static INLINE end_3d(r300ContextPtr rmesa)
++void static INLINE end_3d(radeonContextPtr radeon)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(radeon);
+ 
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].header.cmd_type = R300_CMD_END3D;
++	if (!radeon->radeonScreen->kernel_mm) {
++		BEGIN_BATCH_NO_AUTOSTATE(1);
++		OUT_BATCH(cmdpacify(radeon->radeonScreen));
++		END_BATCH();
++	}
+ }
+ 
+ void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(&rmesa->radeon);
+ 
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].i = cmdcpdelay(count);
++	if (!rmesa->radeon.radeonScreen->kernel_mm) {
++		BEGIN_BATCH_NO_AUTOSTATE(1);
++		OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
++		END_BATCH();
++	}
+ }
+ 
+-void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags)
++void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
+ {
+-	drm_radeon_cmd_header_t *cmd = NULL;
+-
+-	cmd =
+-	    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__);
+-	cmd[0].i = cmdwait(flags);
++	BATCH_LOCALS(radeon);
++	uint32_t wait_until;
++
++	if (!radeon->radeonScreen->kernel_mm) {
++		BEGIN_BATCH_NO_AUTOSTATE(1);
++		OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
++		END_BATCH();
++	} else {
++		switch(flags) {
++		case R300_WAIT_2D:
++			wait_until = (1 << 14);
++			break;
++		case R300_WAIT_3D:
++			wait_until = (1 << 15);
++			break;
++		case R300_NEW_WAIT_2D_3D:
++			wait_until = (1 << 14) | (1 << 15);
++			break;
++		case R300_NEW_WAIT_2D_2D_CLEAN:
++			wait_until = (1 << 14) | (1 << 16) | (1 << 18);
++			break;
++		case R300_NEW_WAIT_3D_3D_CLEAN:
++			wait_until = (1 << 15) | (1 << 17) | (1 << 18);
++			break;
++		case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
++			wait_until  = (1 << 14) | (1 << 16) | (1 << 18);
++			wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
++			break;
++		default:
++			return;
++		}
++		BEGIN_BATCH_NO_AUTOSTATE(2);
++		OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
++		OUT_BATCH(wait_until);
++		END_BATCH();
++	}
+ }
+ 
+ extern int r300EmitArrays(GLcontext * ctx);
+ 
+-#ifdef USER_BUFFERS
+-void r300UseArrays(GLcontext * ctx);
+-#endif
+-
+ extern void r300ReleaseArrays(GLcontext * ctx);
+ extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+ extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
+index 4ef7f2b..8d030c6 100644
+--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
++++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
+@@ -163,6 +163,19 @@ static GLboolean transform_TEX(
+ 		}
+ 	}
+ 
++	if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
++		int tmpreg = radeonFindFreeTemporary(t);
++		tgt = radeonAppendInstructions(t->Program, 1);
++		tgt->Opcode = OPCODE_MOV;
++		tgt->DstReg.File = PROGRAM_TEMPORARY;
++		tgt->DstReg.Index = tmpreg;
++		tgt->SrcReg[0] = inst.SrcReg[0];
++
++		reset_srcreg(&inst.SrcReg[0]);
++		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
++		inst.SrcReg[0].Index = tmpreg;
++	}
++	
+ 	tgt = radeonAppendInstructions(t->Program, 1);
+ 	_mesa_copy_instructions(tgt, &inst, 1);
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c
+index ee85e22..90b85f0 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.c
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.c
+@@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "swrast/swrast.h"
+ 
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "r300_context.h"
+-#include "radeon_ioctl.h"
+ #include "r300_ioctl.h"
+ #include "r300_cmdbuf.h"
+ #include "r300_state.h"
+@@ -55,71 +56,83 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_reg.h"
+ #include "r300_emit.h"
+ #include "r300_fragprog.h"
++#include "r300_context.h"
+ 
+ #include "vblank.h"
+ 
++#define R200_3D_DRAW_IMMD_2      0xC0003500
++
+ #define CLEARBUFFER_COLOR	0x1
+ #define CLEARBUFFER_DEPTH	0x2
+ #define CLEARBUFFER_STENCIL	0x4
+ 
+-static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
++static void r300ClearBuffer(r300ContextPtr r300, int flags,
++			    struct radeon_renderbuffer *rrb,
++			    struct radeon_renderbuffer *rrbd)
+ {
++	BATCH_LOCALS(&r300->radeon);
+ 	GLcontext *ctx = r300->radeon.glCtx;
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+-	GLuint cboffset, cbpitch;
+-	drm_r300_cmd_header_t *cmd2;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	GLuint cbpitch = 0;
+ 	r300ContextPtr rmesa = r300;
+ 
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n",
+-			__FUNCTION__, buffer ? "back" : "front",
+-			dPriv->x, dPriv->y, dPriv->w, dPriv->h);
+-
+-	if (buffer) {
+-		cboffset = r300->radeon.radeonScreen->backOffset;
+-		cbpitch = r300->radeon.radeonScreen->backPitch;
+-	} else {
+-		cboffset = r300->radeon.radeonScreen->frontOffset;
+-		cbpitch = r300->radeon.radeonScreen->frontPitch;
++		fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
++			__FUNCTION__, rrb, dPriv->x, dPriv->y,
++			dPriv->w, dPriv->h);
++
++	if (rrb) {
++		cbpitch = (rrb->pitch / rrb->cpp);
++		if (rrb->cpp == 4)
++			cbpitch |= R300_COLOR_FORMAT_ARGB8888;
++		else
++			cbpitch |= R300_COLOR_FORMAT_RGB565;
++
++		if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
++			cbpitch |= R300_COLOR_TILE_ENABLE;
++        }
+ 	}
+ 
+-	cboffset += r300->radeon.radeonScreen->fbLocation;
+-
+-	cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+-	end_3d(rmesa);
+-
+-	R300_STATECHANGE(r300, cb);
+-	reg_start(R300_RB3D_COLOROFFSET0, 0);
+-	e32(cboffset);
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		cbpitch |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		cbpitch |= R300_COLOR_TILE_ENABLE;
+-
+-	reg_start(R300_RB3D_COLORPITCH0, 0);
+-	e32(cbpitch);
+-
+-	R300_STATECHANGE(r300, cmk);
+-	reg_start(RB3D_COLOR_CHANNEL_MASK, 0);
++	/* TODO in bufmgr */
++	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++	end_3d(&rmesa->radeon);
+ 
+ 	if (flags & CLEARBUFFER_COLOR) {
+-		e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+-		    (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
++		assert(rrb != 0);
++		BEGIN_BATCH_NO_AUTOSTATE(6);
++		OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
++		OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++		OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
++		END_BATCH();
++	}
++#if 1
++	if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
++		assert(rrbd != 0);
++		cbpitch = (rrbd->pitch / rrbd->cpp);
++		if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
++			cbpitch |= R300_DEPTHMACROTILE_ENABLE;
++        }
++		if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
++            cbpitch |= R300_DEPTHMICROTILE_TILED;
++        }
++		BEGIN_BATCH_NO_AUTOSTATE(6);
++		OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
++		OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++		OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
++		END_BATCH();
++	}
++#endif
++	BEGIN_BATCH_NO_AUTOSTATE(6);
++	OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
++	if (flags & CLEARBUFFER_COLOR) {
++		OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
++			  (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
++			  (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
++			  (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
+ 	} else {
+-		e32(0x0);
++		OUT_BATCH(0);
+ 	}
+ 
+-	R300_STATECHANGE(r300, zs);
+-	reg_start(R300_ZB_CNTL, 2);
+ 
+ 	{
+ 		uint32_t t1, t2;
+@@ -146,37 +159,55 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
+ 			     R300_S_FRONT_ZFAIL_OP_SHIFT);
+ 		}
+ 
+-		e32(t1);
+-		e32(t2);
+-		e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) |
+-		    (ctx->Stencil.Clear & R300_STENCILREF_MASK));
++		OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
++		OUT_BATCH(t1);
++		OUT_BATCH(t2);
++		OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
++                   R300_STENCILWRITEMASK_SHIFT) |
++			  (ctx->Stencil.Clear & R300_STENCILREF_MASK));
++		END_BATCH();
+ 	}
+ 
+-	cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__);
+-	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
+-	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
+-	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
+-	cmd2[2].u = r300PackFloat32(dPriv->h / 2.0);
+-	cmd2[3].u = r300PackFloat32(ctx->Depth.Clear);
+-	cmd2[4].u = r300PackFloat32(1.0);
+-	cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]);
+-	cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]);
+-	cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]);
+-	cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]);
+-
++	if (!rmesa->radeon.radeonScreen->kernel_mm) {
++		BEGIN_BATCH_NO_AUTOSTATE(9);
++		OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
++		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
++		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
++		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
++		OUT_BATCH_FLOAT32(1.0);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
++		END_BATCH();
++	} else {
++		OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
++		OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
++			  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
++		OUT_BATCH_FLOAT32(dPriv->w / 2.0);
++		OUT_BATCH_FLOAT32(dPriv->h / 2.0);
++		OUT_BATCH_FLOAT32(ctx->Depth.Clear);
++		OUT_BATCH_FLOAT32(1.0);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
++		OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
++	}
++	
+ 	r300EmitCacheFlush(rmesa);
+-	cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++	cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
++
++	R300_STATECHANGE(r300, cb);
++	R300_STATECHANGE(r300, cmk);
++	R300_STATECHANGE(r300, zs);
+ }
+ 
+ static void r300EmitClearState(GLcontext * ctx)
+ {
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+-	r300ContextPtr rmesa = r300;
++	BATCH_LOCALS(&r300->radeon);
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
+ 	int i;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 	int has_tcl = 1;
+ 	int is_r500 = 0;
+ 	GLuint vap_cntl;
+@@ -184,35 +215,37 @@ static void r300EmitClearState(GLcontext * ctx)
+ 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+ 		has_tcl = 0;
+ 
+-        if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+-                is_r500 = 1;
++	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++		is_r500 = 1;
+ 
+-
+-	/* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and
+-	 * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are
+-	 * quite complex; see the functions in r300_emit.c.
++	/* State atom dirty tracking is a little subtle here.
++	 *
++	 * On the one hand, we need to make sure base state is emitted
++	 * here if we start with an empty batch buffer, otherwise clear
++	 * works incorrectly with multiple processes. Therefore, the first
++	 * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
++	 *
++	 * On the other hand, implicit state emission clears the state atom
++	 * dirty bits, so we have to call R300_STATECHANGE later than the
++	 * first BEGIN_BATCH.
+ 	 *
+-	 * I believe it would be a good idea to extend the functions in
+-	 * r300_emit.c so that they can be used to setup the default values for
+-	 * these registers, as well as the actual values used for rendering.
++	 * The final trickiness is that, because we change state, we need
++	 * to ensure that any stored swtcl primitives are flushed properly
++	 * before we start changing state. See the R300_NEWPRIM in r300Clear
++	 * for this.
+ 	 */
+-	R300_STATECHANGE(r300, vir[0]);
+-	reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
++	BEGIN_BATCH(31);
++	OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
+ 	if (!has_tcl)
+-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ 		 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+ 	else
+-	    e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
++		OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
+ 		 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
+ 
+-	/* disable fog */
+-	R300_STATECHANGE(r300, fogs);
+-	reg_start(R300_FG_FOG_BLEND, 0);
+-	e32(0x0);
+-
+-	R300_STATECHANGE(r300, vir[1]);
+-	reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0);
+-	e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
++	OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
++	OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
++	   ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ 	       (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
+@@ -226,238 +259,276 @@ static void r300EmitClearState(GLcontext * ctx)
+ 	      << R300_SWIZZLE1_SHIFT)));
+ 
+ 	/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
+-	R300_STATECHANGE(r300, vic);
+-	reg_start(R300_VAP_VTX_STATE_CNTL, 1);
+-	e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
+-	e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
++	OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
++	OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
++	OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
+ 
+-	R300_STATECHANGE(r300, vte);
+ 	/* comes from fglrx startup of clear */
+-	reg_start(R300_SE_VTE_CNTL, 1);
+-	e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
+-	    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
+-	    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
+-	    R300_VPORT_Z_OFFSET_ENA);
+-	e32(0x8);
++	OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
++	OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
++		  R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
++		  R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
++		  R300_VPORT_Z_OFFSET_ENA);
++	OUT_BATCH(0x8);
+ 
+-	reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+-	e32(0xaaaaaaaa);
++	OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
+ 
+-	R300_STATECHANGE(r300, vof);
+-	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
+-	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
+-	    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
+-	e32(0x0);		/* no textures */
++	OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
++	OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
++		  R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
++	OUT_BATCH(0); /* no textures */
+ 
+-	R300_STATECHANGE(r300, txe);
+-	reg_start(R300_TX_ENABLE, 0);
+-	e32(0x0);
++	OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
+ 
+-	R300_STATECHANGE(r300, vpt);
+-	reg_start(R300_SE_VPORT_XSCALE, 5);
+-	efloat(1.0);
+-	efloat(dPriv->x);
+-	efloat(1.0);
+-	efloat(dPriv->y);
+-	efloat(1.0);
+-	efloat(0.0);
++	OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(dPriv->x);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(dPriv->y);
++	OUT_BATCH_FLOAT32(1.0);
++	OUT_BATCH_FLOAT32(0.0);
+ 
+-	R300_STATECHANGE(r300, at);
+-	reg_start(R300_FG_ALPHA_FUNC, 0);
+-	e32(0x0);
++	OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
++
++	OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
++	OUT_BATCH(0x0);
++	OUT_BATCH(0x0);
++	END_BATCH();
+ 
++	R300_STATECHANGE(r300, vir[0]);
++	R300_STATECHANGE(r300, fogs);
++	R300_STATECHANGE(r300, vir[1]);
++	R300_STATECHANGE(r300, vic);
++	R300_STATECHANGE(r300, vte);
++	R300_STATECHANGE(r300, vof);
++	R300_STATECHANGE(r300, txe);
++	R300_STATECHANGE(r300, vpt);
++	R300_STATECHANGE(r300, at);
+ 	R300_STATECHANGE(r300, bld);
+-	reg_start(R300_RB3D_CBLEND, 1);
+-	e32(0x0);
+-	e32(0x0);
++	R300_STATECHANGE(r300, ps);
+ 
+ 	if (has_tcl) {
+-	    R300_STATECHANGE(r300, vap_clip_cntl);
+-	    reg_start(R300_VAP_CLIP_CNTL, 0);
+-	    e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++		R300_STATECHANGE(r300, vap_clip_cntl);
++
++		BEGIN_BATCH_NO_AUTOSTATE(2);
++		OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
++		END_BATCH();
+         }
+ 
+-	R300_STATECHANGE(r300, ps);
+-	reg_start(R300_GA_POINT_SIZE, 0);
+-	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
+-	    ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++	BEGIN_BATCH_NO_AUTOSTATE(2);
++	OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
++		((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
++		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
++	END_BATCH();
+ 
+ 	if (!is_r500) {
+ 		R300_STATECHANGE(r300, ri);
+-		reg_start(R300_RS_IP_0, 7);
+-		for (i = 0; i < 8; ++i) {
+-			e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
+-		}
+-
+ 		R300_STATECHANGE(r300, rc);
+-		/* The second constant is needed to get glxgears display anything .. */
+-		reg_start(R300_RS_COUNT, 1);
+-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+-		e32(0x0);
+-
+ 		R300_STATECHANGE(r300, rr);
+-		reg_start(R300_RS_INST_0, 0);
+-		e32(R300_RS_INST_COL_CN_WRITE);
++
++		BEGIN_BATCH(14);
++		OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
++		for (i = 0; i < 8; ++i)
++			OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
++
++		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++		OUT_BATCH(0x0);
++
++		OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
++		END_BATCH();
+ 	} else {
+ 		R300_STATECHANGE(r300, ri);
+-		reg_start(R500_RS_IP_0, 7);
++		R300_STATECHANGE(r300, rc);
++		R300_STATECHANGE(r300, rr);
++
++		BEGIN_BATCH(14);
++		OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
+ 		for (i = 0; i < 8; ++i) {
+-			e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+-			    (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+-			    (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
++			OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
++				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
++				  (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
++				  (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ 		}
+ 
+-		R300_STATECHANGE(r300, rc);
+-		/* The second constant is needed to get glxgears display anything .. */
+-		reg_start(R300_RS_COUNT, 1);
+-		e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
+-		e32(0x0);
+-
+-		R300_STATECHANGE(r300, rr);
+-		reg_start(R500_RS_INST_0, 0);
+-		e32(R500_RS_INST_COL_CN_WRITE);
++		OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
++		OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
++		OUT_BATCH(0x0);
+ 
++		OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
++		END_BATCH();
+ 	}
+ 
+ 	if (!is_r500) {
+ 		R300_STATECHANGE(r300, fp);
+-		reg_start(R300_US_CONFIG, 2);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(0x0);
+-		reg_start(R300_US_CODE_ADDR_0, 3);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(0x0);
+-		e32(R300_RGBA_OUT);
+-
+ 		R300_STATECHANGE(r300, fpi[0]);
+ 		R300_STATECHANGE(r300, fpi[1]);
+ 		R300_STATECHANGE(r300, fpi[2]);
+ 		R300_STATECHANGE(r300, fpi[3]);
+ 
+-		reg_start(R300_US_ALU_RGB_INST_0, 0);
+-		e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
+-
+-		reg_start(R300_US_ALU_RGB_ADDR_0, 0);
+-		e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
+-
+-		reg_start(R300_US_ALU_ALPHA_INST_0, 0);
+-		e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
+-
+-		reg_start(R300_US_ALU_ALPHA_ADDR_0, 0);
+-		e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++		BEGIN_BATCH(17);
++		OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(0x0);
++		OUT_BATCH(R300_RGBA_OUT);
++
++		OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
++			FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
++		OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
++			FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
++		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
++			FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
++		OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
++			FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
++		END_BATCH();
+ 	} else {
+- 		R300_STATECHANGE(r300, fp);
+- 		reg_start(R500_US_CONFIG, 1);
+- 		e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+- 		e32(0x0);
+- 		reg_start(R500_US_CODE_ADDR, 2);
+- 		e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
+- 		e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
+- 		e32(R500_US_CODE_OFFSET_ADDR(0));
++		struct radeon_state_atom r500fp;
++		uint32_t _cmd[10];
+ 
++		R300_STATECHANGE(r300, fp);
+ 		R300_STATECHANGE(r300, r500fp);
+-		r500fp_start_fragment(0, 6);
+-
+-		e32(R500_INST_TYPE_OUT |
+-		    R500_INST_TEX_SEM_WAIT |
+-		    R500_INST_LAST |
+-		    R500_INST_RGB_OMASK_R |
+-		    R500_INST_RGB_OMASK_G |
+-		    R500_INST_RGB_OMASK_B |
+-		    R500_INST_ALPHA_OMASK |
+-		    R500_INST_RGB_CLAMP |
+-		    R500_INST_ALPHA_CLAMP);
+-
+-		e32(R500_RGB_ADDR0(0) |
+-		    R500_RGB_ADDR1(0) |
+-		    R500_RGB_ADDR1_CONST |
+-		    R500_RGB_ADDR2(0) |
+-		    R500_RGB_ADDR2_CONST);
+-
+-		e32(R500_ALPHA_ADDR0(0) |
+-		    R500_ALPHA_ADDR1(0) |
+-		    R500_ALPHA_ADDR1_CONST |
+-		    R500_ALPHA_ADDR2(0) |
+-		    R500_ALPHA_ADDR2_CONST);
+-
+-		e32(R500_ALU_RGB_SEL_A_SRC0 |
+-		    R500_ALU_RGB_R_SWIZ_A_R |
+-		    R500_ALU_RGB_G_SWIZ_A_G |
+-		    R500_ALU_RGB_B_SWIZ_A_B |
+-		    R500_ALU_RGB_SEL_B_SRC0 |
+-		    R500_ALU_RGB_R_SWIZ_B_R |
+-		    R500_ALU_RGB_B_SWIZ_B_G |
+-		    R500_ALU_RGB_G_SWIZ_B_B);
+-
+-		e32(R500_ALPHA_OP_CMP |
+-		    R500_ALPHA_SWIZ_A_A |
+-		    R500_ALPHA_SWIZ_B_A);
+-
+-		e32(R500_ALU_RGBA_OP_CMP |
+-		    R500_ALU_RGBA_R_SWIZ_0 |
+-		    R500_ALU_RGBA_G_SWIZ_0 |
+-		    R500_ALU_RGBA_B_SWIZ_0 |
+-		    R500_ALU_RGBA_A_SWIZ_0);
++
++		BEGIN_BATCH(7);
++		OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
++		OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
++		OUT_BATCH(0x0);
++		OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
++		OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
++		OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
++		OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
++		END_BATCH();
++
++		r500fp.check = check_r500fp;
++		r500fp.cmd = _cmd;
++		r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
++		r500fp.cmd[1] = R500_INST_TYPE_OUT |
++			R500_INST_TEX_SEM_WAIT |
++			R500_INST_LAST |
++			R500_INST_RGB_OMASK_R |
++			R500_INST_RGB_OMASK_G |
++			R500_INST_RGB_OMASK_B |
++			R500_INST_ALPHA_OMASK |
++			R500_INST_RGB_CLAMP |
++			R500_INST_ALPHA_CLAMP;
++		r500fp.cmd[2] = R500_RGB_ADDR0(0) |
++			R500_RGB_ADDR1(0) |
++			R500_RGB_ADDR1_CONST |
++			R500_RGB_ADDR2(0) |
++			R500_RGB_ADDR2_CONST;
++		r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
++			R500_ALPHA_ADDR1(0) |
++			R500_ALPHA_ADDR1_CONST |
++			R500_ALPHA_ADDR2(0) |
++			R500_ALPHA_ADDR2_CONST;
++		r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
++			R500_ALU_RGB_R_SWIZ_A_R |
++			R500_ALU_RGB_G_SWIZ_A_G |
++			R500_ALU_RGB_B_SWIZ_A_B |
++			R500_ALU_RGB_SEL_B_SRC0 |
++			R500_ALU_RGB_R_SWIZ_B_R |
++			R500_ALU_RGB_B_SWIZ_B_G |
++			R500_ALU_RGB_G_SWIZ_B_B;
++		r500fp.cmd[5] = R500_ALPHA_OP_CMP |
++			R500_ALPHA_SWIZ_A_A |
++			R500_ALPHA_SWIZ_B_A;
++		r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
++			R500_ALU_RGBA_R_SWIZ_0 |
++			R500_ALU_RGBA_G_SWIZ_0 |
++			R500_ALU_RGBA_B_SWIZ_0 |
++			R500_ALU_RGBA_A_SWIZ_0;
++		
++		r500fp.cmd[7] = 0;
++		emit_r500fp(ctx, &r500fp);
+ 	}
+ 
+-	reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+-	e32(0x00000000);
++	BEGIN_BATCH(2);
++	OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
++	END_BATCH();
++
+ 	if (has_tcl) {
+-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ 			(12 << R300_VF_MAX_VTX_NUM_SHIFT));
+-	    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+-		vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+-	} else
+-	    vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
++		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
++			vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
++	} else {
++		vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ 			(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ 			(5 << R300_VF_MAX_VTX_NUM_SHIFT));
++	}
+ 
+ 	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+-	    vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+-	    vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+-	    vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ 		 (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
+-	    vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ 	else
+-	    vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++		vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
++
++	R300_STATECHANGE(r300, vap_cntl);
+ 
+-	R300_STATECHANGE(rmesa, vap_cntl);
+-	reg_start(R300_VAP_CNTL, 0);
+-	e32(vap_cntl);
++	BEGIN_BATCH(2);
++	OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
++	END_BATCH();
+ 
+ 	if (has_tcl) {
++        struct radeon_state_atom vpu;
++        uint32_t _cmd[10];
+ 		R300_STATECHANGE(r300, pvs);
+-		reg_start(R300_VAP_PVS_CODE_CNTL_0, 2);
+-
+-		e32((0 << R300_PVS_FIRST_INST_SHIFT) |
+-		    (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+-		    (1 << R300_PVS_LAST_INST_SHIFT));
+-		e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
+-		    (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
+-		e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+-
+ 		R300_STATECHANGE(r300, vpi);
+-		vsf_start_fragment(0x0, 8);
+-
+-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT));
+-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(0x0);
+ 
+-		e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT));
+-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE));
+-		e32(0x0);
++		BEGIN_BATCH(4);
++		OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
++		OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
++			  (0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
++			  (1 << R300_PVS_LAST_INST_SHIFT));
++		OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
++			  (0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
++		OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
++		END_BATCH();
++
++		vpu.check = check_vpu;
++		vpu.cmd = _cmd;
++		vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
++
++		vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
++                                         0, 0xf, PVS_DST_REG_OUT);
++		vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
++                                      PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
++                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++		vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++		vpu.cmd[4] = 0x0;
++
++		vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
++                                         PVS_DST_REG_OUT);
++		vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
++                                      PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
++                                      PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
++
++                                      VSF_FLAG_NONE);
++		vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_SELECT_FORCE_0,
++                                      PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
++		vpu.cmd[8] = 0x0;
++		emit_vpu(ctx, &vpu);
+ 	}
+ }
+ 
+@@ -468,9 +539,11 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ {
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+ 	__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
++	GLframebuffer *fb = dPriv->driverPrivate;
++	struct radeon_renderbuffer *rrb;
++	struct radeon_renderbuffer *rrbd;
+ 	int flags = 0;
+ 	int bits = 0;
+-	int swapped;
+ 
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+ 		fprintf(stderr, "r300Clear\n");
+@@ -482,6 +555,12 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ 			return;
+ 	}
+ 
++	/* Flush swtcl vertices if necessary, because we will change hardware
++	 * state during clear. See also the state-related comment in
++	 * r300EmitClearState.
++	 */
++	R300_NEWPRIM(r300);
++
+ 	if (mask & BUFFER_BIT_FRONT_LEFT) {
+ 		flags |= BUFFER_BIT_FRONT_LEFT;
+ 		mask &= ~BUFFER_BIT_FRONT_LEFT;
+@@ -497,7 +576,7 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ 		mask &= ~BUFFER_BIT_DEPTH;
+ 	}
+ 
+-	if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) {
++	if ((mask & BUFFER_BIT_STENCIL) && r300->radeon.state.stencil.hwBuffer) {
+ 		bits |= CLEARBUFFER_STENCIL;
+ 		mask &= ~BUFFER_BIT_STENCIL;
+ 	}
+@@ -509,26 +588,28 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask)
+ 		_swrast_Clear(ctx, mask);
+ 	}
+ 
+-	swapped = r300->radeon.sarea->pfCurrentPage == 1;
+-
+ 	/* Make sure it fits there. */
+-	r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__);
++	rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
+ 	if (flags || bits)
+ 		r300EmitClearState(ctx);
++	rrbd = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+ 
+ 	if (flags & BUFFER_BIT_FRONT_LEFT) {
+-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
++		rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
+ 		bits = 0;
+ 	}
+ 
+ 	if (flags & BUFFER_BIT_BACK_LEFT) {
+-		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1);
++		rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
+ 		bits = 0;
+ 	}
+ 
+ 	if (bits)
+-		r300ClearBuffer(r300, bits, 0);
++		r300ClearBuffer(r300, bits, NULL, rrbd);
+ 
++	COMMIT_BATCH();
+ }
+ 
+ void r300Flush(GLcontext * ctx)
+@@ -538,302 +619,13 @@ void r300Flush(GLcontext * ctx)
+ 	if (RADEON_DEBUG & DEBUG_IOCTL)
+ 		fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush( rmesa );
+-
+-	if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-}
+-
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-
+-void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size)
+-{
+-	struct r300_dma_buffer *dmabuf;
+-	size = MAX2(size, RADEON_BUFFER_SIZE * 16);
+-
+-	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+-		fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-	if (rmesa->dma.flush) {
+-		rmesa->dma.flush(rmesa);
+-	}
+-
+-	if (rmesa->dma.current.buf) {
+-#ifdef USER_BUFFERS
+-		r300_mem_use(rmesa, rmesa->dma.current.buf->id);
+-#endif
+-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+-	}
+-	if (rmesa->dma.nr_released_bufs > 4)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+-	dmabuf->buf = (void *)1;	/* hack */
+-	dmabuf->refcount = 1;
+-
+-	dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+-	if (dmabuf->id == 0) {
+-		LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
+-
+-		r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+-		radeonWaitForIdleLocked(&rmesa->radeon);
+-
+-		dmabuf->id = r300_mem_alloc(rmesa, 4, size);
+-
+-		UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-		if (dmabuf->id == 0) {
+-			fprintf(stderr,
+-				"Error: Could not get dma buffer... exiting\n");
+-			_mesa_exit(-1);
+-		}
+-	}
+-
+-	rmesa->dma.current.buf = dmabuf;
+-	rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id);
+-	rmesa->dma.current.end = size;
+-	rmesa->dma.current.start = 0;
+-	rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-			  struct r300_dma_region *region, const char *caller)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+-	if (!region->buf)
+-		return;
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (--region->buf->refcount == 0) {
+-		r300_mem_free(rmesa, region->buf->id);
+-		FREE(region->buf);
+-		rmesa->dma.nr_released_bufs++;
+-	}
+-
+-	region->buf = 0;
+-	region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current.  If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			struct r300_dma_region *region,
+-			int bytes, int alignment)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (region->buf)
+-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+-
+-	alignment--;
+-	rmesa->dma.current.start = rmesa->dma.current.ptr =
+-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+-		r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7);
+-
+-	region->start = rmesa->dma.current.start;
+-	region->ptr = rmesa->dma.current.start;
+-	region->end = rmesa->dma.current.start + bytes;
+-	region->address = rmesa->dma.current.address;
+-	region->buf = rmesa->dma.current.buf;
+-	region->buf->refcount++;
+-
+-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
+-	rmesa->dma.current.start =
+-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
+-
+-#else
+-static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa)
+-{
+-	struct r300_dma_buffer *dmabuf;
+-	int fd = rmesa->radeon.dri.fd;
+-	int index = 0;
+-	int size = 0;
+-	drmDMAReq dma;
+-	int ret;
+-
+-	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+-		fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-	if (rmesa->dma.flush) {
+-		rmesa->dma.flush(rmesa);
++	if (rmesa->radeon.dma.flush) {
++		rmesa->radeon.dma.flush(ctx);
+ 	}
+-
+-	if (rmesa->dma.current.buf)
+-		r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__);
+-
+-	if (rmesa->dma.nr_released_bufs > 4)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-	dma.context = rmesa->radeon.dri.hwContext;
+-	dma.send_count = 0;
+-	dma.send_list = NULL;
+-	dma.send_sizes = NULL;
+-	dma.flags = 0;
+-	dma.request_count = 1;
+-	dma.request_size = RADEON_BUFFER_SIZE;
+-	dma.request_list = &index;
+-	dma.request_sizes = &size;
+-	dma.granted_count = 0;
+-
+-	LOCK_HARDWARE(&rmesa->radeon);	/* no need to validate */
+-
+-	ret = drmDMA(fd, &dma);
+-
+-	if (ret != 0) {
+-		/* Try to release some buffers and wait until we can't get any more */
+-		if (rmesa->dma.nr_released_bufs) {
+-			r300FlushCmdBufLocked(rmesa, __FUNCTION__);
+-		}
+-
+-		if (RADEON_DEBUG & DEBUG_DMA)
+-			fprintf(stderr, "Waiting for buffers\n");
+-
+-		radeonWaitForIdleLocked(&rmesa->radeon);
+-		ret = drmDMA(fd, &dma);
+-
+-		if (ret != 0) {
+-			UNLOCK_HARDWARE(&rmesa->radeon);
+-			fprintf(stderr,
+-				"Error: Could not get dma buffer... exiting\n");
+-			_mesa_exit(-1);
+-		}
+-	}
+-
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (RADEON_DEBUG & DEBUG_DMA)
+-		fprintf(stderr, "Allocated buffer %d\n", index);
+-
+-	dmabuf = CALLOC_STRUCT(r300_dma_buffer);
+-	dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index];
+-	dmabuf->refcount = 1;
+-
+-	rmesa->dma.current.buf = dmabuf;
+-	rmesa->dma.current.address = dmabuf->buf->address;
+-	rmesa->dma.current.end = dmabuf->buf->total;
+-	rmesa->dma.current.start = 0;
+-	rmesa->dma.current.ptr = 0;
+-}
+-
+-void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-			  struct r300_dma_region *region, const char *caller)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+-
+-	if (!region->buf)
+-		return;
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (--region->buf->refcount == 0) {
+-		drm_radeon_cmd_header_t *cmd;
+-
+-		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
+-			fprintf(stderr, "%s -- DISCARD BUF %d\n",
+-				__FUNCTION__, region->buf->buf->idx);
+-		cmd =
+-		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
+-								sizeof
+-								(*cmd) / 4,
+-								__FUNCTION__);
+-		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
+-		cmd->dma.buf_idx = region->buf->buf->idx;
+-
+-		FREE(region->buf);
+-		rmesa->dma.nr_released_bufs++;
++	
++	if (rmesa->radeon.cmdbuf.cs->cdw) {
++		rcommonFlushCmdBuf(&rmesa->radeon, __FUNCTION__);
+ 	}
+-
+-	region->buf = 0;
+-	region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current.  If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			struct r300_dma_region *region,
+-			int bytes, int alignment)
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+-	if (rmesa->dma.flush)
+-		rmesa->dma.flush(rmesa);
+-
+-	if (region->buf)
+-		r300ReleaseDmaRegion(rmesa, region, __FUNCTION__);
+-
+-	alignment--;
+-	rmesa->dma.current.start = rmesa->dma.current.ptr =
+-	    (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-	if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end)
+-		r300RefillCurrentDmaRegion(rmesa);
+-
+-	region->start = rmesa->dma.current.start;
+-	region->ptr = rmesa->dma.current.start;
+-	region->end = rmesa->dma.current.start + bytes;
+-	region->address = rmesa->dma.current.address;
+-	region->buf = rmesa->dma.current.buf;
+-	region->buf->refcount++;
+-
+-	rmesa->dma.current.ptr += bytes;	/* bug - if alignment > 7 */
+-	rmesa->dma.current.start =
+-	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
+-
+-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+-}
+-
+-#endif
+-
+-GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer,
+-			   GLint size)
+-{
+-	int offset =
+-	    (char *)pointer -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-	int valid = (size >= 0 && offset >= 0
+-		     && offset + size <
+-		     rmesa->radeon.radeonScreen->gartTextures.size);
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer,
+-			valid);
+-
+-	return valid;
+-}
+-
+-GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer)
+-{
+-	int offset =
+-	    (char *)pointer -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-
+-	//fprintf(stderr, "offset=%08x\n", offset);
+-
+-	if (offset < 0
+-	    || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+-		return ~0;
+-	else
+-		return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+ }
+ 
+ void r300InitIoctlFuncs(struct dd_function_table *functions)
+diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h
+index e1143fb..5f00264 100644
+--- a/src/mesa/drivers/dri/r300/r300_ioctl.h
++++ b/src/mesa/drivers/dri/r300/r300_ioctl.h
+@@ -39,20 +39,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "radeon_drm.h"
+ 
+-extern GLboolean r300IsGartMemory(r300ContextPtr rmesa,
+-				  const GLvoid * pointer, GLint size);
+-
+-extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa,
+-					const GLvoid * pointer);
+-
+ extern void r300Flush(GLcontext * ctx);
+ 
+-extern void r300ReleaseDmaRegion(r300ContextPtr rmesa,
+-				 struct r300_dma_region *region,
+-				 const char *caller);
+ extern void r300AllocDmaRegion(r300ContextPtr rmesa,
+-			       struct r300_dma_region *region, int bytes,
+-			       int alignment);
++			       struct radeon_bo **pbo, int *poffset,
++			       int bytes, int alignment);
+ 
+ extern void r300InitIoctlFuncs(struct dd_function_table *functions);
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c
+deleted file mode 100644
+index f8f9d4f..0000000
+--- a/src/mesa/drivers/dri/r300/r300_mem.c
++++ /dev/null
+@@ -1,385 +0,0 @@
+-/*
+- * Copyright (C) 2005 Aapo Tahkola.
+- *
+- * All Rights Reserved.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining
+- * a copy of this software and associated documentation files (the
+- * "Software"), to deal in the Software without restriction, including
+- * without limitation the rights to use, copy, modify, merge, publish,
+- * distribute, sublicense, and/or sell copies of the Software, and to
+- * permit persons to whom the Software is furnished to do so, subject to
+- * the following conditions:
+- *
+- * The above copyright notice and this permission notice (including the
+- * next paragraph) shall be included in all copies or substantial
+- * portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+- *
+- */
+-
+-/**
+- * \file
+- *
+- * \author Aapo Tahkola <aet@rasterburn.org>
+- */
+-
+-#include <unistd.h>
+-
+-#include "r300_context.h"
+-#include "r300_cmdbuf.h"
+-#include "r300_ioctl.h"
+-#include "r300_mem.h"
+-#include "radeon_ioctl.h"
+-
+-#ifdef USER_BUFFERS
+-
+-static void resize_u_list(r300ContextPtr rmesa)
+-{
+-	void *temp;
+-	int nsize;
+-
+-	temp = rmesa->rmm->u_list;
+-	nsize = rmesa->rmm->u_size * 2;
+-
+-	rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list));
+-	_mesa_memset(rmesa->rmm->u_list, 0,
+-		     nsize * sizeof(*rmesa->rmm->u_list));
+-
+-	if (temp) {
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-		_mesa_memcpy(rmesa->rmm->u_list, temp,
+-			     rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list));
+-		_mesa_free(temp);
+-	}
+-
+-	rmesa->rmm->u_size = nsize;
+-}
+-
+-void r300_mem_init(r300ContextPtr rmesa)
+-{
+-	rmesa->rmm = malloc(sizeof(struct r300_memory_manager));
+-	memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager));
+-
+-	rmesa->rmm->u_size = 128;
+-	resize_u_list(rmesa);
+-}
+-
+-void r300_mem_destroy(r300ContextPtr rmesa)
+-{
+-	_mesa_free(rmesa->rmm->u_list);
+-	rmesa->rmm->u_list = NULL;
+-
+-	_mesa_free(rmesa->rmm);
+-	rmesa->rmm = NULL;
+-}
+-
+-void *r300_mem_ptr(r300ContextPtr rmesa, int id)
+-{
+-	assert(id <= rmesa->rmm->u_last);
+-	return rmesa->rmm->u_list[id].ptr;
+-}
+-
+-int r300_mem_find(r300ContextPtr rmesa, void *ptr)
+-{
+-	int i;
+-
+-	for (i = 1; i < rmesa->rmm->u_size + 1; i++)
+-		if (rmesa->rmm->u_list[i].ptr &&
+-		    ptr >= rmesa->rmm->u_list[i].ptr &&
+-		    ptr <
+-		    rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size)
+-			break;
+-
+-	if (i < rmesa->rmm->u_size + 1)
+-		return i;
+-
+-	fprintf(stderr, "%p failed\n", ptr);
+-	return 0;
+-}
+-
+-//#define MM_DEBUG
+-int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size)
+-{
+-	drm_radeon_mem_alloc_t alloc;
+-	int offset = 0, ret;
+-	int i, free = -1;
+-	int done_age;
+-	drm_radeon_mem_free_t memfree;
+-	int tries = 0;
+-	static int bytes_wasted = 0, allocated = 0;
+-
+-	if (size < 4096)
+-		bytes_wasted += 4096 - size;
+-
+-	allocated += size;
+-
+-#if 0
+-	static int t = 0;
+-	if (t != time(NULL)) {
+-		t = time(NULL);
+-		fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n",
+-			rmesa->rmm->u_last, bytes_wasted / 1024,
+-			allocated / 1024);
+-	}
+-#endif
+-
+-	memfree.region = RADEON_MEM_REGION_GART;
+-
+-      again:
+-
+-	done_age = radeonGetAge((radeonContextPtr) rmesa);
+-
+-	if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size)
+-		resize_u_list(rmesa);
+-
+-	for (i = rmesa->rmm->u_last + 1; i > 0; i--) {
+-		if (rmesa->rmm->u_list[i].ptr == NULL) {
+-			free = i;
+-			continue;
+-		}
+-
+-		if (rmesa->rmm->u_list[i].h_pending == 0 &&
+-		    rmesa->rmm->u_list[i].pending
+-		    && rmesa->rmm->u_list[i].age <= done_age) {
+-			memfree.region_offset =
+-			    (char *)rmesa->rmm->u_list[i].ptr -
+-			    (char *)rmesa->radeon.radeonScreen->gartTextures.
+-			    map;
+-
+-			ret =
+-			    drmCommandWrite(rmesa->radeon.radeonScreen->
+-					    driScreen->fd, DRM_RADEON_FREE,
+-					    &memfree, sizeof(memfree));
+-
+-			if (ret) {
+-				fprintf(stderr, "Failed to free at %p\n",
+-					rmesa->rmm->u_list[i].ptr);
+-				fprintf(stderr, "ret = %s\n", strerror(-ret));
+-				exit(1);
+-			} else {
+-#ifdef MM_DEBUG
+-				fprintf(stderr, "really freed %d at age %x\n",
+-					i,
+-					radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-				if (i == rmesa->rmm->u_last)
+-					rmesa->rmm->u_last--;
+-
+-				if (rmesa->rmm->u_list[i].size < 4096)
+-					bytes_wasted -=
+-					    4096 - rmesa->rmm->u_list[i].size;
+-
+-				allocated -= rmesa->rmm->u_list[i].size;
+-				rmesa->rmm->u_list[i].pending = 0;
+-				rmesa->rmm->u_list[i].ptr = NULL;
+-				free = i;
+-			}
+-		}
+-	}
+-	rmesa->rmm->u_head = i;
+-
+-	if (free == -1) {
+-		WARN_ONCE("Ran out of slots!\n");
+-		//usleep(100);
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-		tries++;
+-		if (tries > 100) {
+-			WARN_ONCE("Ran out of slots!\n");
+-			exit(1);
+-		}
+-		goto again;
+-	}
+-
+-	alloc.region = RADEON_MEM_REGION_GART;
+-	alloc.alignment = alignment;
+-	alloc.size = size;
+-	alloc.region_offset = &offset;
+-
+-	ret =
+-	    drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc,
+-				sizeof(alloc));
+-	if (ret) {
+-#if 0
+-		WARN_ONCE("Ran out of mem!\n");
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-		//usleep(100);
+-		tries2++;
+-		tries = 0;
+-		if (tries2 > 100) {
+-			WARN_ONCE("Ran out of GART memory!\n");
+-			exit(1);
+-		}
+-		goto again;
+-#else
+-		WARN_ONCE
+-		    ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n",
+-		     size);
+-		return 0;
+-#endif
+-	}
+-
+-	i = free;
+-
+-	if (i > rmesa->rmm->u_last)
+-		rmesa->rmm->u_last = i;
+-
+-	rmesa->rmm->u_list[i].ptr =
+-	    ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset;
+-	rmesa->rmm->u_list[i].size = size;
+-	rmesa->rmm->u_list[i].age = 0;
+-	//fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i);
+-
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "allocated %d at age %x\n", i,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+-	return i;
+-}
+-
+-void r300_mem_use(r300ContextPtr rmesa, int id)
+-{
+-	uint64_t ull;
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-	drm_r300_cmd_header_t *cmd;
+-
+-	assert(id <= rmesa->rmm->u_last);
+-
+-	if (id == 0)
+-		return;
+-
+-	cmd =
+-	    (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa,
+-						      2 + sizeof(ull) / 4,
+-						      __FUNCTION__);
+-	cmd[0].scratch.cmd_type = R300_CMD_SCRATCH;
+-	cmd[0].scratch.reg = R300_MEM_SCRATCH;
+-	cmd[0].scratch.n_bufs = 1;
+-	cmd[0].scratch.flags = 0;
+-	cmd++;
+-
+-	ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age;
+-	_mesa_memcpy(cmd, &ull, sizeof(ull));
+-	cmd += sizeof(ull) / 4;
+-
+-	cmd[0].u = /*id */ 0;
+-
+-	LOCK_HARDWARE(&rmesa->radeon);	/* Protect from DRM. */
+-	rmesa->rmm->u_list[id].h_pending++;
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-}
+-
+-unsigned long r300_mem_offset(r300ContextPtr rmesa, int id)
+-{
+-	unsigned long offset;
+-
+-	assert(id <= rmesa->rmm->u_last);
+-
+-	offset = (char *)rmesa->rmm->u_list[id].ptr -
+-	    (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+-	offset += rmesa->radeon.radeonScreen->gart_texture_offset;
+-
+-	return offset;
+-}
+-
+-void *r300_mem_map(r300ContextPtr rmesa, int id, int access)
+-{
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-	void *ptr;
+-	int tries = 0;
+-
+-	assert(id <= rmesa->rmm->u_last);
+-
+-	if (access == R300_MEM_R) {
+-
+-		if (rmesa->rmm->u_list[id].mapped == 1)
+-			WARN_ONCE("buffer %d already mapped\n", id);
+-
+-		rmesa->rmm->u_list[id].mapped = 1;
+-		ptr = r300_mem_ptr(rmesa, id);
+-
+-		return ptr;
+-	}
+-
+-	if (rmesa->rmm->u_list[id].h_pending)
+-		r300FlushCmdBuf(rmesa, __FUNCTION__);
+-
+-	if (rmesa->rmm->u_list[id].h_pending) {
+-		return NULL;
+-	}
+-
+-	while (rmesa->rmm->u_list[id].age >
+-	       radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000)
+-		usleep(10);
+-
+-	if (tries >= 1000) {
+-		fprintf(stderr, "Idling failed (%x vs %x)\n",
+-			rmesa->rmm->u_list[id].age,
+-			radeonGetAge((radeonContextPtr) rmesa));
+-		return NULL;
+-	}
+-
+-	if (rmesa->rmm->u_list[id].mapped == 1)
+-		WARN_ONCE("buffer %d already mapped\n", id);
+-
+-	rmesa->rmm->u_list[id].mapped = 1;
+-	ptr = r300_mem_ptr(rmesa, id);
+-
+-	return ptr;
+-}
+-
+-void r300_mem_unmap(r300ContextPtr rmesa, int id)
+-{
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+-	assert(id <= rmesa->rmm->u_last);
+-
+-	if (rmesa->rmm->u_list[id].mapped == 0)
+-		WARN_ONCE("buffer %d not mapped\n", id);
+-
+-	rmesa->rmm->u_list[id].mapped = 0;
+-}
+-
+-void r300_mem_free(r300ContextPtr rmesa, int id)
+-{
+-#ifdef MM_DEBUG
+-	fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id,
+-		radeonGetAge((radeonContextPtr) rmesa));
+-#endif
+-
+-	assert(id <= rmesa->rmm->u_last);
+-
+-	if (id == 0)
+-		return;
+-
+-	if (rmesa->rmm->u_list[id].ptr == NULL) {
+-		WARN_ONCE("Not allocated!\n");
+-		return;
+-	}
+-
+-	if (rmesa->rmm->u_list[id].pending) {
+-		WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr);
+-		return;
+-	}
+-
+-	rmesa->rmm->u_list[id].pending = 1;
+-}
+-#endif
+diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h
+deleted file mode 100644
+index 625a7f6..0000000
+--- a/src/mesa/drivers/dri/r300/r300_mem.h
++++ /dev/null
+@@ -1,37 +0,0 @@
+-#ifndef __R300_MEM_H__
+-#define __R300_MEM_H__
+-
+-//#define R300_MEM_PDL 0
+-#define R300_MEM_UL 1
+-
+-#define R300_MEM_R 1
+-#define R300_MEM_W 2
+-#define R300_MEM_RW (R300_MEM_R | R300_MEM_W)
+-
+-#define R300_MEM_SCRATCH 2
+-
+-struct r300_memory_manager {
+-	struct {
+-		void *ptr;
+-		uint32_t size;
+-		uint32_t age;
+-		uint32_t h_pending;
+-		int pending;
+-		int mapped;
+-	} *u_list;
+-	int u_head, u_size, u_last;
+-
+-};
+-
+-extern void r300_mem_init(r300ContextPtr rmesa);
+-extern void r300_mem_destroy(r300ContextPtr rmesa);
+-extern void *r300_mem_ptr(r300ContextPtr rmesa, int id);
+-extern int r300_mem_find(r300ContextPtr rmesa, void *ptr);
+-extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size);
+-extern void r300_mem_use(r300ContextPtr rmesa, int id);
+-extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id);
+-extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access);
+-extern void r300_mem_unmap(r300ContextPtr rmesa, int id);
+-extern void r300_mem_free(r300ContextPtr rmesa, int id);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
+index 872a33e..5f344be 100644
+--- a/src/mesa/drivers/dri/r300/r300_reg.h
++++ b/src/mesa/drivers/dri/r300/r300_reg.h
+@@ -1525,6 +1525,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #	define R500_SEL_FILTER4_TC3		 (3 << 18)
+ 
+ #define R300_TX_OFFSET_0                    0x4540
++#define R300_TX_OFFSET_1                    0x4544
++#define R300_TX_OFFSET_2                    0x4548
++#define R300_TX_OFFSET_3                    0x454C
++#define R300_TX_OFFSET_4                    0x4550
++#define R300_TX_OFFSET_5                    0x4554
++#define R300_TX_OFFSET_6                    0x4558
++#define R300_TX_OFFSET_7                    0x455C
+ 	/* BEGIN: Guess from R200 */
+ #       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0)
+ #       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0)
+diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
+index 16ce4a1..f46477f 100644
+--- a/src/mesa/drivers/dri/r300/r300_render.c
++++ b/src/mesa/drivers/dri/r300/r300_render.c
+@@ -66,8 +66,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/t_vp_build.h"
+ #include "radeon_reg.h"
+ #include "radeon_macros.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+@@ -175,85 +173,164 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+ static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct r300_dma_region *rvb = &rmesa->state.elt_dma;
+ 	void *out;
+ 
+-	if (r300IsGartMemory(rmesa, elts, n_elts * 4)) {
+-		rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
+-		rvb->start = ((char *)elts) - rvb->address;
+-		rvb->aos_offset =
+-		    rmesa->radeon.radeonScreen->gart_texture_offset +
+-		    rvb->start;
+-		return;
+-	} else if (r300IsGartMemory(rmesa, elts, 1)) {
+-		WARN_ONCE("Pointer not within GART memory!\n");
+-		_mesa_exit(-1);
+-	}
+-
+-	r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4);
+-	rvb->aos_offset = GET_START(rvb);
+-
+-	out = rvb->address + rvb->start;
++	radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
++			     &rmesa->state.elt_dma_offset, n_elts * 4, 4);
++	radeon_bo_map(rmesa->state.elt_dma_bo, 1);
++	out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
+ 	memcpy(out, elts, n_elts * 4);
++	radeon_bo_unmap(rmesa->state.elt_dma_bo);
+ }
+ 
+-static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
+-		       int vertex_count, int type)
++static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+-
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+-
+-	start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2);
+-	e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+-	    (R300_VAP_PORT_IDX0 >> 2));
+-	e32(addr);
+-	e32(vertex_count);
++	BATCH_LOCALS(&rmesa->radeon);
++
++	if (vertex_count > 0) {
++		BEGIN_BATCH(10);
++		OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
++		OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
++			  ((vertex_count + 0) << 16) |
++			  type |
++			  R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
++		
++		if (!rmesa->radeon.radeonScreen->kernel_mm) {
++			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
++			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
++	    			 (R300_VAP_PORT_IDX0 >> 2));
++			OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
++					rmesa->state.elt_dma_bo,
++					rmesa->state.elt_dma_offset,
++					RADEON_GEM_DOMAIN_GTT, 0, 0);
++			OUT_BATCH(vertex_count);
++		} else {
++			OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
++			OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
++	    			 (R300_VAP_PORT_IDX0 >> 2));
++			OUT_BATCH(rmesa->state.elt_dma_offset);
++			OUT_BATCH(vertex_count);
++			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++					      rmesa->state.elt_dma_bo,
++					      RADEON_GEM_DOMAIN_GTT, 0, 0);
++		}
++		END_BATCH();
++	}
+ }
+ 
+ static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+ {
++	BATCH_LOCALS(&rmesa->radeon);
++	uint32_t voffset;
+ 	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ 	int i;
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+-
++	
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+ 		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+ 			offset);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1);
+-	e32(nr);
+-
+-	for (i = 0; i + 1 < nr; i += 2) {
+-		e32((rmesa->state.aos[i].aos_size << 0) |
+-		    (rmesa->state.aos[i].aos_stride << 8) |
+-		    (rmesa->state.aos[i + 1].aos_size << 16) |
+-		    (rmesa->state.aos[i + 1].aos_stride << 24));
++    
++	if (!rmesa->radeon.radeonScreen->kernel_mm) {
++		BEGIN_BATCH(sz+2+(nr * 2));
++		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++		OUT_BATCH(nr);
++
++		for (i = 0; i + 1 < nr; i += 2) {
++			OUT_BATCH((rmesa->state.aos[i].components << 0) |
++				  (rmesa->state.aos[i].stride << 8) |
++				  (rmesa->state.aos[i + 1].components << 16) |
++				  (rmesa->state.aos[i + 1].stride << 24));
++			
++			voffset =  rmesa->state.aos[i + 0].offset +
++				offset * 4 * rmesa->state.aos[i + 0].stride;
++			OUT_BATCH_RELOC(voffset,
++					rmesa->state.aos[i].bo,
++					voffset,
++					RADEON_GEM_DOMAIN_GTT,
++					0, 0);
++			voffset =  rmesa->state.aos[i + 1].offset +
++			  offset * 4 * rmesa->state.aos[i + 1].stride;
++			OUT_BATCH_RELOC(voffset,
++					rmesa->state.aos[i+1].bo,
++					voffset,
++					RADEON_GEM_DOMAIN_GTT,
++					0, 0);
++		}
++		
++		if (nr & 1) {
++			OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
++				  (rmesa->state.aos[nr - 1].stride << 8));
++			voffset =  rmesa->state.aos[nr - 1].offset +
++				offset * 4 * rmesa->state.aos[nr - 1].stride;
++			OUT_BATCH_RELOC(voffset,
++					rmesa->state.aos[nr - 1].bo,
++					voffset,
++					RADEON_GEM_DOMAIN_GTT,
++					0, 0);
++		}
++		END_BATCH();
++	} else {
+ 
+-		e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride);
+-		e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride);
++		BEGIN_BATCH(sz+2+(nr * 2));
++		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++		OUT_BATCH(nr);
++
++		for (i = 0; i + 1 < nr; i += 2) {
++			OUT_BATCH((rmesa->state.aos[i].components << 0) |
++				  (rmesa->state.aos[i].stride << 8) |
++				  (rmesa->state.aos[i + 1].components << 16) |
++				  (rmesa->state.aos[i + 1].stride << 24));
++			
++			voffset =  rmesa->state.aos[i + 0].offset +
++				offset * 4 * rmesa->state.aos[i + 0].stride;
++			OUT_BATCH(voffset);
++			voffset =  rmesa->state.aos[i + 1].offset +
++				offset * 4 * rmesa->state.aos[i + 1].stride;
++			OUT_BATCH(voffset);
++		}
++		
++		if (nr & 1) {
++			OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
++			  (rmesa->state.aos[nr - 1].stride << 8));
++			voffset =  rmesa->state.aos[nr - 1].offset +
++				offset * 4 * rmesa->state.aos[nr - 1].stride;
++			OUT_BATCH(voffset);
++		}
++		for (i = 0; i + 1 < nr; i += 2) {
++			voffset =  rmesa->state.aos[i + 0].offset +
++				offset * 4 * rmesa->state.aos[i + 0].stride;
++			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++					      rmesa->state.aos[i+0].bo,
++					      RADEON_GEM_DOMAIN_GTT,
++					      0, 0);
++			voffset =  rmesa->state.aos[i + 1].offset +
++				offset * 4 * rmesa->state.aos[i + 1].stride;
++			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++					      rmesa->state.aos[i+1].bo,
++					      RADEON_GEM_DOMAIN_GTT,
++					      0, 0);
++		}
++		if (nr & 1) {
++			voffset =  rmesa->state.aos[nr - 1].offset +
++				offset * 4 * rmesa->state.aos[nr - 1].stride;
++			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++					      rmesa->state.aos[nr-1].bo,
++					      RADEON_GEM_DOMAIN_GTT,
++					      0, 0);
++		}
++		END_BATCH();
+ 	}
+ 
+-	if (nr & 1) {
+-		e32((rmesa->state.aos[nr - 1].aos_size << 0) |
+-		    (rmesa->state.aos[nr - 1].aos_stride << 8));
+-		e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
+-	}
+ }
+ 
+ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
+-	drm_radeon_cmd_header_t *cmd = NULL;
++	BATCH_LOCALS(&rmesa->radeon);
+ 
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++	BEGIN_BATCH(3);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
++	END_BATCH();
+ }
+ 
+ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+@@ -269,6 +346,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ 	if (type < 0 || num_verts <= 0)
+ 		return;
+ 
++	/* Make space for at least 64 dwords.
++	 * This is supposed to ensure that we can get all rendering
++	 * commands into a single command buffer.
++	 */
++	rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
++
+ 	if (vb->Elts) {
+ 		if (num_verts > 65535) {
+ 			/* not implemented yet */
+@@ -288,11 +371,12 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
+ 		 */
+ 		r300EmitElts(ctx, vb->Elts, num_verts);
+ 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+-		r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type);
++		r300FireEB(rmesa, num_verts, type);
+ 	} else {
+ 		r300EmitAOS(rmesa, rmesa->state.aos_count, start);
+ 		r300FireAOS(rmesa, num_verts, type);
+ 	}
++	COMMIT_BATCH();
+ }
+ 
+ static GLboolean r300RunRender(GLcontext * ctx,
+@@ -303,7 +387,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ 	TNLcontext *tnl = TNL_CONTEXT(ctx);
+ 	struct vertex_buffer *vb = &tnl->vb;
+ 
+-
+ 	if (RADEON_DEBUG & DEBUG_PRIMS)
+ 		fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+@@ -314,7 +397,7 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ 	r300UpdateShaderStates(rmesa);
+ 
+ 	r300EmitCacheFlush(rmesa);
+-	r300EmitState(rmesa);
++	radeonEmitState(&rmesa->radeon);
+ 
+ 	for (i = 0; i < vb->PrimitiveCount; i++) {
+ 		GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
+@@ -325,10 +408,6 @@ static GLboolean r300RunRender(GLcontext * ctx,
+ 
+ 	r300EmitCacheFlush(rmesa);
+ 
+-#ifdef USER_BUFFERS
+-	r300UseArrays(ctx);
+-#endif
+-
+ 	r300ReleaseArrays(ctx);
+ 
+ 	return GL_FALSE;
+@@ -432,6 +511,9 @@ static GLboolean r300RunTCLRender(GLcontext * ctx,
+ 		return GL_TRUE;
+ 	}
+ 
++	if (!r300ValidateBuffers(ctx))
++	    return GL_TRUE;
++	
+ 	r300UpdateShaders(rmesa);
+ 
+ 	vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
+diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
+index c192fec..93ef06f 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.c
++++ b/src/mesa/drivers/dri/r300/r300_state.c
+@@ -53,8 +53,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vbo/vbo.h"
+ #include "tnl/tnl.h"
+ 
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+ #include "r300_context.h"
+ #include "r300_ioctl.h"
+ #include "r300_state.h"
+@@ -590,7 +588,7 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state)
+ {
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+ 
+-	if (r300->state.stencil.hw_stencil) {
++	if (r300->radeon.state.stencil.hwBuffer) {
+ 		R300_STATECHANGE(r300, zs);
+ 		if (state) {
+ 			r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+@@ -1068,10 +1066,10 @@ static void r300UpdateWindow(GLcontext * ctx)
+ 	GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X;
+ 	GLfloat sy = -v[MAT_SY];
+ 	GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+-	GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale;
+-	GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale;
++	GLfloat sz = v[MAT_SZ] * rmesa->radeon.state.depth.scale;
++	GLfloat tz = v[MAT_TZ] * rmesa->radeon.state.depth.scale;
+ 
+-	R300_FIREVERTICES(rmesa);
++	radeon_firevertices(&rmesa->radeon);
+ 	R300_STATECHANGE(rmesa, vpt);
+ 
+ 	rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
+@@ -1085,10 +1083,19 @@ static void r300UpdateWindow(GLcontext * ctx)
+ static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
+ 			 GLsizei width, GLsizei height)
+ {
++	r300ContextPtr rmesa = R300_CONTEXT(ctx);
++    __DRIcontext *driContext = rmesa->radeon.dri.context;
+ 	/* Don't pipeline viewport changes, conflict with window offset
+ 	 * setting below.  Could apply deltas to rescue pipelined viewport
+ 	 * values, or keep the originals hanging around.
+ 	 */
++    if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled) {
++        radeon_update_renderbuffers(driContext, driContext->driDrawablePriv);
++        if (driContext->driDrawablePriv != driContext->driReadablePriv) {
++            radeon_update_renderbuffers(driContext,
++                                        driContext->driReadablePriv);
++        }
++    }
+ 	r300UpdateWindow(ctx);
+ }
+ 
+@@ -1129,55 +1136,25 @@ void r300UpdateViewportOffset(GLcontext * ctx)
+ void r300UpdateDrawBuffer(GLcontext * ctx)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	r300ContextPtr r300 = rmesa;
+ 	struct gl_framebuffer *fb = ctx->DrawBuffer;
+-	driRenderbuffer *drb;
++	struct radeon_renderbuffer *rrb;
+ 
+ 	if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+ 		/* draw to front */
+-		drb =
+-		    (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].
+-		    Renderbuffer;
++		rrb =
++		    (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+ 	} else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+ 		/* draw to back */
+-		drb =
+-		    (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].
+-		    Renderbuffer;
++		rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+ 	} else {
+ 		/* drawing to multiple buffers, or none */
+ 		return;
+ 	}
+ 
+-	assert(drb);
+-	assert(drb->flippedPitch);
++	assert(rrb);
++	assert(rrb->pitch);
+ 
+ 	R300_STATECHANGE(rmesa, cb);
+-
+-	r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset +	//r300->radeon.state.color.drawOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch;	//r300->radeon.state.color.drawPitch;
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-#if 0
+-	R200_STATECHANGE(rmesa, ctx);
+-
+-	/* Note: we used the (possibly) page-flipped values */
+-	rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+-	    = ((drb->flippedOffset + rmesa->r200Screen->fbLocation)
+-	       & R200_COLOROFFSET_MASK);
+-	rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+-
+-	if (rmesa->sarea->tiling_enabled) {
+-		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+-		    R200_COLOR_TILE_ENABLE;
+-	}
+-#endif
+ }
+ 
+ static void
+@@ -1397,7 +1374,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+ 	}
+ 
+ 	r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+-		cmdpacket0(R300_US_TEX_INST_0, code->tex.length);
++		cmdpacket0(r300->radeon.radeonScreen,
++                   R300_US_TEX_INST_0, code->tex.length);
+ }
+ 
+ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+@@ -1448,7 +1426,7 @@ static GLuint translate_lod_bias(GLfloat bias)
+ static void r300SetupTextures(GLcontext * ctx)
+ {
+ 	int i, mtu;
+-	struct r300_tex_obj *t;
++	struct radeon_tex_obj *t;
+ 	r300ContextPtr r300 = R300_CONTEXT(ctx);
+ 	int hw_tmu = 0;
+ 	int last_hw_tmu = -1;	/* -1 translates into no setup costs for fields */
+@@ -1482,21 +1460,16 @@ static void r300SetupTextures(GLcontext * ctx)
+ 	/* We cannot let disabled tmu offsets pass DRM */
+ 	for (i = 0; i < mtu; i++) {
+ 		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+-
+-#if 0				/* Enables old behaviour */
+-			hw_tmu = i;
+-#endif
+ 			tmu_mappings[i] = hw_tmu;
+ 
+-			t = r300->state.texture.unit[i].texobj;
+-			/* XXX questionable fix for bug 9170: */
++			t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ 			if (!t)
+ 				continue;
+ 
+-			if ((t->format & 0xffffff00) == 0xffffff00) {
++			if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
+ 				WARN_ONCE
+ 				    ("unknown texture format (entry %x) encountered. Help me !\n",
+-				     t->format & 0xff);
++				     t->pp_txformat & 0xff);
+ 			}
+ 
+ 			if (RADEON_DEBUG & DEBUG_STATE)
+@@ -1507,29 +1480,28 @@ static void r300SetupTextures(GLcontext * ctx)
+ 
+ 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
+ 						hw_tmu] =
+-			    gen_fixed_filter(t->filter) | (hw_tmu << 28);
++			    gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
+ 			/* Note: There is a LOD bias per texture unit and a LOD bias
+ 			 * per texture object. We add them here to get the correct behaviour.
+ 			 * (The per-texture object LOD bias was introduced in OpenGL 1.4
+ 			 * and is not present in the EXT_texture_object extension).
+ 			 */
+ 			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+-				t->filter_1 |
+-				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias);
++				t->pp_txfilter_1 |
++				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
+ 			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+-			    t->size;
++			    t->pp_txsize;
+ 			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+-						hw_tmu] = t->format;
++						hw_tmu] = t->pp_txformat;
+ 			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+-			    t->pitch_reg;
+-			r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 +
+-						hw_tmu] = t->offset;
++			  t->pp_txpitch;
++			r300->hw.textures[hw_tmu] = t;
+ 
+-			if (t->offset & R300_TXO_MACRO_TILE) {
++			if (t->tile_bits & R300_TXO_MACRO_TILE) {
+ 				WARN_ONCE("macro tiling enabled!\n");
+ 			}
+ 
+-			if (t->offset & R300_TXO_MICRO_TILE) {
++			if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ 				WARN_ONCE("micro tiling enabled!\n");
+ 			}
+ 
+@@ -1546,21 +1518,21 @@ static void r300SetupTextures(GLcontext * ctx)
+ 	}
+ 
+ 	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
+ 	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
+ 	r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
+ 	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
+ 	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
+ 	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
+ 	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+ 	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+-	    cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
++	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+ 
+ 	if (!fp)		/* should only happenen once, just after context is created */
+ 		return;
+@@ -1572,7 +1544,7 @@ static void r300SetupTextures(GLcontext * ctx)
+ 			r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+ 			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ 			r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+-				cmdpacket0(R300_TX_FILTER0_0, 1);
++				cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1);
+ 		}
+ 		r300SetupFragmentShaderTextures(ctx, tmu_mappings);
+ 	} else
+@@ -1741,7 +1713,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
+ 	  | R300_HIRES_EN;
+ 
+ 	assert(high_rr >= 0);
+-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
++	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr + 1);
+ 	r300->hw.rc.cmd[2] = high_rr;
+ 
+ 	if (InputsRead)
+@@ -1901,7 +1873,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
+ 	  | R300_HIRES_EN;
+ 
+ 	assert(high_rr >= 0);
+-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
++	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr + 1);
+ 	r300->hw.rc.cmd[2] = 0xC0 | high_rr;
+ 
+ 	if (InputsRead)
+@@ -2099,6 +2071,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
+ 	  (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+ }
+ 
++
+ static void r300SetupVertexProgram(r300ContextPtr rmesa)
+ {
+ 	GLcontext *ctx = rmesa->radeon.glCtx;
+@@ -2128,6 +2101,7 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
+  */
+ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ {
++	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	if (RADEON_DEBUG & DEBUG_STATE)
+ 		fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+ 			_mesa_lookup_enum_by_nr(cap),
+@@ -2173,8 +2147,12 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ 	case GL_POLYGON_OFFSET_FILL:
+ 		r300SetPolygonOffsetState(ctx, state);
+ 		break;
++	case GL_SCISSOR_TEST:
++		radeon_firevertices(&rmesa->radeon);
++		rmesa->radeon.state.scissor.enabled = state;
++		radeonUpdateScissor( ctx );
++		break;
+ 	default:
+-		radeonEnable(ctx, cap, state);
+ 		break;
+ 	}
+ }
+@@ -2185,6 +2163,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+ static void r300ResetHwState(r300ContextPtr r300)
+ {
+ 	GLcontext *ctx = r300->radeon.glCtx;
++	struct radeon_renderbuffer *rrb;
+ 	int has_tcl = 1;
+ 
+ 	if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
+@@ -2215,8 +2194,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 
+ 	r300UpdateCulling(ctx);
+ 
+-	r300UpdateTextureState(ctx);
+-
+ 	r300SetBlendState(ctx);
+ 	r300SetLogicOpState(ctx);
+ 
+@@ -2363,20 +2340,6 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 
+ 	r300BlendColor(ctx, ctx->Color.BlendColor);
+ 
+-	/* Again, r300ClearBuffer uses this */
+-	r300->hw.cb.cmd[R300_CB_OFFSET] =
+-	    r300->radeon.state.color.drawOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-
+-	if (r300->radeon.radeonScreen->cpp == 4)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-	else
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-
+-	if (r300->radeon.sarea->tiling_enabled)
+-		r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-
+ 	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+ 	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+ 	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+@@ -2392,12 +2355,8 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ 	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
+ 
+-	r300->hw.zb.cmd[R300_ZB_OFFSET] =
+-	    r300->radeon.radeonScreen->depthOffset +
+-	    r300->radeon.radeonScreen->fbLocation;
+-	r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch;
+-
+-	if (r300->radeon.sarea->tiling_enabled) {
++	rrb = r300->radeon.state.depth.rrb;
++	if (rrb && rrb->bo && (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)) {
+ 		/* XXX: Turn off when clearing buffers ? */
+ 		r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE;
+ 
+@@ -2440,7 +2399,7 @@ static void r300ResetHwState(r300ContextPtr r300)
+ 		r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
+ 	}
+ 
+-	r300->hw.all_dirty = GL_TRUE;
++	r300->radeon.hw.all_dirty = GL_TRUE;
+ }
+ 
+ void r300UpdateShaders(r300ContextPtr rmesa)
+@@ -2451,8 +2410,8 @@ void r300UpdateShaders(r300ContextPtr rmesa)
+ 
+ 	ctx = rmesa->radeon.glCtx;
+ 
+-	if (rmesa->NewGLState && hw_tcl_on) {
+-		rmesa->NewGLState = 0;
++	if (rmesa->radeon.NewGLState && hw_tcl_on) {
++		rmesa->radeon.NewGLState = 0;
+ 
+ 		for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) {
+ 			rmesa->temp_attrib[i] =
+@@ -2531,10 +2490,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
+ 	R300_STATECHANGE(rmesa, fpi[1]);
+ 	R300_STATECHANGE(rmesa, fpi[2]);
+ 	R300_STATECHANGE(rmesa, fpi[3]);
+-	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length);
+-	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+-	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+-	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
++	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
++	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
++	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
++	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ 	for (i = 0; i < code->alu.length; i++) {
+ 		rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0;
+ 		rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1;
+@@ -2565,7 +2524,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa)
+ 	}
+ 
+ 	R300_STATECHANGE(rmesa, fpp);
+-	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4);
++	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4);
+ 	for (i = 0; i < code->const_nr; i++) {
+ 		const GLfloat *constant = get_fragmentprogram_constant(ctx,
+ 			&fp->mesa_program.Base, code->constant[i]);
+@@ -2667,7 +2626,6 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
+ 	GLcontext *ctx;
+ 	ctx = rmesa->radeon.glCtx;
+ 
+-	r300UpdateTextureState(ctx);
+ 	r300SetEarlyZState(ctx);
+ 
+ 	GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+@@ -2712,7 +2670,7 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
+ 
+ 	r300UpdateStateParameters(ctx, new_state);
+ 
+-	r300->NewGLState |= new_state;
++	r300->radeon.NewGLState |= new_state;
+ }
+ 
+ /**
+@@ -2725,15 +2683,13 @@ void r300InitState(r300ContextPtr r300)
+ 	GLcontext *ctx = r300->radeon.glCtx;
+ 	GLuint depth_fmt;
+ 
+-	radeonInitState(&r300->radeon);
+-
+ 	switch (ctx->Visual.depthBits) {
+ 	case 16:
+-		r300->state.depth.scale = 1.0 / (GLfloat) 0xffff;
++		r300->radeon.state.depth.scale = 1.0 / (GLfloat) 0xffff;
+ 		depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z;
+ 		break;
+ 	case 24:
+-		r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff;
++		r300->radeon.state.depth.scale = 1.0 / (GLfloat) 0xffffff;
+ 		depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ 		break;
+ 	default:
+@@ -2743,8 +2699,8 @@ void r300InitState(r300ContextPtr r300)
+ 	}
+ 
+ 	/* Only have hw stencil when depth buffer is 24 bits deep */
+-	r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 &&
+-					  ctx->Visual.depthBits == 24);
++	r300->radeon.state.stencil.hwBuffer = (ctx->Visual.stencilBits > 0 &&
++					       ctx->Visual.depthBits == 24);
+ 
+ 	memset(&(r300->state.texture), 0, sizeof(r300->state.texture));
+ 
+@@ -2776,12 +2732,32 @@ void r300UpdateClipPlanes( GLcontext *ctx )
+ 	}
+ }
+ 
++static void r300DrawBuffer( GLcontext *ctx, GLenum mode )
++{
++	r300ContextPtr rmesa = R300_CONTEXT(ctx);
++	if (RADEON_DEBUG & DEBUG_DRI)
++		fprintf(stderr, "%s %s\n", __FUNCTION__,
++			_mesa_lookup_enum_by_nr( mode ));
++
++	radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
++
++	radeonSetCliprects( &rmesa->radeon );
++	radeonUpdatePageFlipping(&rmesa->radeon);
++}
++
++static void r300ReadBuffer( GLcontext *ctx, GLenum mode )
++{
++	if (RADEON_DEBUG & DEBUG_DRI)
++		fprintf(stderr, "%s %s\n", __FUNCTION__,
++			_mesa_lookup_enum_by_nr( mode ));
++
++};
++
+ /**
+  * Initialize driver's state callback functions
+  */
+ void r300InitStateFuncs(struct dd_function_table *functions)
+ {
+-	radeonInitStateFuncs(functions);
+ 
+ 	functions->UpdateState = r300InvalidateState;
+ 	functions->AlphaFunc = r300AlphaFunc;
+@@ -2818,4 +2794,8 @@ void r300InitStateFuncs(struct dd_function_table *functions)
+ 	functions->RenderMode = r300RenderMode;
+ 
+ 	functions->ClipPlane = r300ClipPlane;
++	functions->Scissor = radeonScissor;
++
++	functions->DrawBuffer		= r300DrawBuffer;
++	functions->ReadBuffer		= r300ReadBuffer;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
+index 0589ab7..247a20e 100644
+--- a/src/mesa/drivers/dri/r300/r300_state.h
++++ b/src/mesa/drivers/dri/r300/r300_state.h
+@@ -39,32 +39,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define R300_NEWPRIM( rmesa )			\
+   do {						\
+-    if ( rmesa->dma.flush )			\
+-      rmesa->dma.flush( rmesa );		\
++  if ( rmesa->radeon.dma.flush )			\
++    rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+   } while (0)
+ 
+ #define R300_STATECHANGE(r300, atom) \
+ 	do {						\
+ 	  R300_NEWPRIM(r300);				\
+ 		r300->hw.atom.dirty = GL_TRUE;		\
+-		r300->hw.is_dirty = GL_TRUE;		\
++		r300->radeon.hw.is_dirty = GL_TRUE;		\
+ 	} while(0)
+ 
+-#define R300_PRINT_STATE(r300, atom) \
+-		r300PrintStateAtom(r300, &r300->hw.atom)
+-
+-/* Fire the buffered vertices no matter what.
+-   TODO: This has not been implemented yet
+- */
+-#define R300_FIREVERTICES( r300 )			\
+-do {							\
+-    \
+-   if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) {	\
+-      r300Flush( (r300)->radeon.glCtx );		\
+-   }							\
+-    \
+-} while (0)
+-
+ // r300_state.c
+ extern int future_hw_tcl_on;
+ void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
+diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
+index b6e7ce1..d73af86 100644
+--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
++++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
+@@ -56,26 +56,23 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+ #include "r300_emit.h"
+-#include "r300_mem.h"
++#include "r300_tex.h"
+ 
+-static void flush_last_swtcl_prim( r300ContextPtr rmesa  );
+-
+-
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset);
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset);
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr);
+ #define EMIT_ATTR( ATTR, STYLE )					\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+ } while (0)
+ 
+ #define EMIT_PAD( N )							\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+ } while (0)
+ 
+ static void r300SetVertexFormat( GLcontext *ctx )
+@@ -86,7 +83,6 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	DECLARE_RENDERINPUTS(index_bitset);
+ 	GLuint InputsRead = 0, OutputsWritten = 0;
+ 	int vap_fmt_0 = 0;
+-	int vap_vte_cntl = 0;
+ 	int offset = 0;
+ 	int vte = 0;
+ 	GLint inputs[VERT_ATTRIB_MAX];
+@@ -114,7 +110,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	}
+ 
+ 	assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+-	rmesa->swtcl.vertex_attr_count = 0;
++	rmesa->radeon.swtcl.vertex_attr_count = 0;
+ 
+ 	/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+ 	 * build up a hardware vertex.
+@@ -175,7 +171,7 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 			inputs[i] = -1;
+ 		}
+ 	}
+-	
++
+ 	/* Fixed, apply to vir0 only */
+ 	if (InputsRead & (1 << VERT_ATTRIB_POS))
+ 		inputs[VERT_ATTRIB_POS] = 0;
+@@ -186,16 +182,16 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
+ 		if (InputsRead & (1 << i))
+ 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
+-	
++
+ 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
+ 		if (InputsRead & (1 << i)) {
+ 			tab[nr++] = i;
+ 		}
+ 	}
+-	
++
+ 	for (i = 0; i < nr; i++) {
+ 		int ci;
+-		
++
+ 		swizzle[i][0] = SWIZZLE_ZERO;
+ 		swizzle[i][1] = SWIZZLE_ZERO;
+ 		swizzle[i][2] = SWIZZLE_ZERO;
+@@ -215,98 +211,29 @@ static void r300SetVertexFormat( GLcontext *ctx )
+ 	((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
+ 		r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
+ 				   nr);
+-   
++
+ 	R300_STATECHANGE(rmesa, vic);
+ 	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ 	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+-   
++
+ 	R300_STATECHANGE(rmesa, vof);
+ 	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ 	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1;
+-   
+-	rmesa->swtcl.vertex_size =
++
++	rmesa->radeon.swtcl.vertex_size =
+ 		_tnl_install_attrs( ctx,
+-				    rmesa->swtcl.vertex_attrs, 
+-				    rmesa->swtcl.vertex_attr_count,
++				    rmesa->radeon.swtcl.vertex_attrs,
++				    rmesa->radeon.swtcl.vertex_attr_count,
+ 				    NULL, 0 );
+-	
+-	rmesa->swtcl.vertex_size /= 4;
++
++	rmesa->radeon.swtcl.vertex_size /= 4;
+ 
+ 	RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
+ 
+ 
+ 	R300_STATECHANGE(rmesa, vte);
+ 	rmesa->hw.vte.cmd[1] = vte;
+-	rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size;
+-}
+-
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( r300ContextPtr rmesa  )
+-{
+-	if (RADEON_DEBUG & DEBUG_IOCTL)
+-		fprintf(stderr, "%s\n", __FUNCTION__);
+-	
+-	rmesa->dma.flush = NULL;
+-
+-	if (rmesa->dma.current.buf) {
+-		struct r300_dma_region *current = &rmesa->dma.current;
+-		GLuint current_offset = GET_START(current);
+-
+-		assert (current->start + 
+-			rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-			current->ptr);
+-
+-		if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+-
+-			r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__);
+-			
+-			r300EmitState(rmesa);
+-			
+-			r300EmitVertexAOS( rmesa,
+-					   rmesa->swtcl.vertex_size,
+-					   current_offset);
+-			
+-			r300EmitVbufPrim( rmesa,
+-					  rmesa->swtcl.hw_primitive,
+-					  rmesa->swtcl.numverts);
+-			
+-			r300EmitCacheFlush(rmesa);
+-		}
+-		
+-		rmesa->swtcl.numverts = 0;
+-		current->start = current->ptr;
+-	}
+-}
+-
+-/* Alloc space in the current dma region.
+- */
+-static void *
+-r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize )
+-{
+-	GLuint bytes = vsize * nverts;
+-
+-	if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+-		r300RefillCurrentDmaRegion( rmesa, bytes);
+-
+-	if (!rmesa->dma.flush) {
+-		rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+-		rmesa->dma.flush = flush_last_swtcl_prim;
+-	}
+-
+-	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+-	ASSERT( rmesa->dma.flush == flush_last_swtcl_prim );
+-	ASSERT( rmesa->dma.current.start + 
+-		rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-		rmesa->dma.current.ptr );
+-
+-	{
+-		GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr);
+-		rmesa->dma.current.ptr += bytes;
+-		rmesa->swtcl.numverts += nverts;
+-		return head;
+-	}
++	rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
+ }
+ 
+ static GLuint reduced_prim[] = {
+@@ -346,13 +273,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+ #define CTX_ARG r300ContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 )
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
+ #define LOCAL_VARS						\
+    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
+-   const char *r300verts = (char *)rmesa->swtcl.verts;
++   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
+ #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+-#define VERTEX r300Vertex 
++#define VERTEX r300Vertex
+ #define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS))
+ #define PRINT_VERTEX(x)
+ #undef TAG
+@@ -409,7 +336,7 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+ 
+ /* Only used to pull back colors into vertices (ie, we know color is
+  * floating point).
+@@ -455,7 +382,7 @@ do {							\
+  ***********************************************************************/
+ 
+ #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -512,8 +439,8 @@ static void init_rast_tab( void )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS						\
+    r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
+-   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
+-   const char *r300verts = (char *)rmesa->swtcl.verts;		\
++   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
++   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    const GLboolean stipple = ctx->Line.StippleFlag;		\
+    (void) elt; (void) stipple;
+@@ -545,7 +472,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
+ 	if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
+ 	if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
+ 
+-	if (index != rmesa->swtcl.RenderIndex) {
++	if (index != rmesa->radeon.swtcl.RenderIndex) {
+ 		tnl->Driver.Render.Points = rast_tab[index].points;
+ 		tnl->Driver.Render.Line = rast_tab[index].line;
+ 		tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -562,7 +489,7 @@ static void r300ChooseRenderState( GLcontext *ctx )
+ 			tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ 		}
+ 
+-		rmesa->swtcl.RenderIndex = index;
++		rmesa->radeon.swtcl.RenderIndex = index;
+ 	}
+ }
+ 
+@@ -572,18 +499,18 @@ static void r300RenderStart(GLcontext *ctx)
+         r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ 	//	fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-	r300ChooseRenderState(ctx);	
++	r300ChooseRenderState(ctx);
+ 	r300SetVertexFormat(ctx);
+ 
++	r300ValidateBuffers(ctx);
++
+ 	r300UpdateShaders(rmesa);
+ 	r300UpdateShaderStates(rmesa);
+ 
+ 	r300EmitCacheFlush(rmesa);
+-	
+-	if (rmesa->dma.flush != 0 && 
+-	    rmesa->dma.flush != flush_last_swtcl_prim)
+-		rmesa->dma.flush( rmesa );
+-
++	if (rmesa->radeon.dma.flush != NULL) {
++		rmesa->radeon.dma.flush(ctx);
++	}
+ }
+ 
+ static void r300RenderFinish(GLcontext *ctx)
+@@ -593,10 +520,10 @@ static void r300RenderFinish(GLcontext *ctx)
+ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	
+-	if (rmesa->swtcl.hw_primitive != hwprim) {
++
++	if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ 	        R300_NEWPRIM( rmesa );
+-		rmesa->swtcl.hw_primitive = hwprim;
++		rmesa->radeon.swtcl.hw_primitive = hwprim;
+ 	}
+ }
+ 
+@@ -604,14 +531,14 @@ static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+ {
+ 
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	rmesa->swtcl.render_primitive = prim;
++	rmesa->radeon.swtcl.render_primitive = prim;
+ 
+ 	if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ 	  return;
+ 
+ 	r300RasterPrimitive( ctx, reduced_prim[prim] );
+ 	//	fprintf(stderr, "%s\n", __FUNCTION__);
+-	
++
+ }
+ 
+ static void r300ResetLineStipple(GLcontext *ctx)
+@@ -625,12 +552,12 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	TNLcontext *tnl = TNL_CONTEXT(ctx);
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ 	static int firsttime = 1;
+-	
++
+ 	if (firsttime) {
+ 		init_rast_tab();
+ 		firsttime = 0;
+ 	}
+-	
++
+ 	tnl->Driver.Render.Start = r300RenderStart;
+ 	tnl->Driver.Render.Finish = r300RenderFinish;
+ 	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+@@ -638,15 +565,15 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ 	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ 	tnl->Driver.Render.Interp = _tnl_interp;
+-	
++
+ 	/* FIXME: what are these numbers? */
+-	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
++	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 			    48 * sizeof(GLfloat) );
+-	
+-	rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+-	rmesa->swtcl.RenderIndex = ~0;
+-	rmesa->swtcl.render_primitive = GL_TRIANGLES;
+-	rmesa->swtcl.hw_primitive = 0;	
++
++	rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++	rmesa->radeon.swtcl.RenderIndex = ~0;
++	rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++	rmesa->radeon.swtcl.hw_primitive = 0;
+ 
+ 	_tnl_invalidate_vertex_state( ctx, ~0 );
+ 	_tnl_invalidate_vertices( ctx, ~0 );
+@@ -655,9 +582,9 @@ void r300InitSwtcl(GLcontext *ctx)
+ 	_tnl_need_projected_coords( ctx, GL_FALSE );
+ 	r300ChooseRenderState(ctx);
+ 
+-	_mesa_validate_all_lighting_tables( ctx ); 
++	_mesa_validate_all_lighting_tables( ctx );
+ 
+-	tnl->Driver.NotifyMaterialChange = 
++	tnl->Driver.NotifyMaterialChange =
+ 	  _mesa_validate_all_lighting_tables;
+ }
+ 
+@@ -665,33 +592,53 @@ void r300DestroySwtcl(GLcontext *ctx)
+ {
+ }
+ 
+-void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset)
++void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
+ {
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
++	BATCH_LOCALS(&rmesa->radeon);
+ 
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 	if (RADEON_DEBUG & DEBUG_VERTS)
+-	  fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
+-		  __FUNCTION__, vertex_size, offset);
+-
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2);
+-	e32(1);
+-	e32(vertex_size | (vertex_size << 8));
+-	e32(offset);
++		fprintf(stderr, "%s:  vertex_size %d, offset 0x%x \n",
++			__FUNCTION__, vertex_size, offset);
++
++	BEGIN_BATCH(5);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
++	OUT_BATCH(1);
++	OUT_BATCH(vertex_size | (vertex_size << 8));
++	OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++	END_BATCH();
+ }
+ 
+ void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+ {
+-
+-	int cmd_reserved = 0;
+-	int cmd_written = 0;
++	BATCH_LOCALS(&rmesa->radeon);
+ 	int type, num_verts;
+-	drm_radeon_cmd_header_t *cmd = NULL;
+ 
+ 	type = r300PrimitiveType(rmesa, primitive);
+ 	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+-	
+-	start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0);
+-	e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++
++	BEGIN_BATCH(3);
++	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
++	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
++	END_BATCH();
++}
++
++void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
++{
++  r300ContextPtr rmesa = R300_CONTEXT(ctx);
++
++  rcommonEnsureCmdBufSpace(&rmesa->radeon,
++			   rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++			   __FUNCTION__);
++  radeonEmitState(&rmesa->radeon);
++  r300EmitVertexAOS(rmesa,
++		    rmesa->radeon.swtcl.vertex_size,
++		    rmesa->radeon.dma.current,
++		    current_offset);
++  
++  r300EmitVbufPrim(rmesa,
++		   rmesa->radeon.swtcl.hw_primitive,
++		   rmesa->radeon.swtcl.numverts);
++  r300EmitCacheFlush(rmesa);
++  COMMIT_BATCH();
++
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
+index 55df53c..23b4ce3 100644
+--- a/src/mesa/drivers/dri/r300/r300_swtcl.h
++++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
+@@ -42,4 +42,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ extern void r300InitSwtcl( GLcontext *ctx );
+ extern void r300DestroySwtcl( GLcontext *ctx );
+ 
++extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
+index 8ab382c..27b9070 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.c
++++ b/src/mesa/drivers/dri/r300/r300_tex.c
+@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/context.h"
+ #include "main/enums.h"
+ #include "main/image.h"
++#include "main/mipmap.h"
+ #include "main/simple_list.h"
+ #include "main/texformat.h"
+ #include "main/texstore.h"
+@@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_tex.h"
+ 
+ #include "xmlpool.h"
+@@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode)
+  *
+  * \param t Texture object whose wrap modes are to be set
+  */
+-static void r300UpdateTexWrap(r300TexObjPtr t)
++static void r300UpdateTexWrap(radeonTexObjPtr t)
+ {
+-	struct gl_texture_object *tObj = t->base.tObj;
++	struct gl_texture_object *tObj = &t->base;
+ 
+-	t->filter &=
++	t->pp_txfilter &=
+ 	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
+ 
+-	t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
++	t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
+ 
+ 	if (tObj->Target != GL_TEXTURE_1D) {
+-		t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
++		t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
+ 
+ 		if (tObj->Target == GL_TEXTURE_3D)
+-			t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
++			t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
+ 	}
+ }
+ 
+@@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy)
+  * \param magf Texture magnification mode
+  * \param anisotropy Maximum anisotropy level
+  */
+-static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
++static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+ {
+-	t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+-	t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
++	/* Force revalidation to account for switches from/to mipmapping. */
++	t->validated = GL_FALSE;
++
++	t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
++	t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
+ 
+ 	/* Note that EXT_texture_filter_anisotropic is extremely vague about
+ 	 * how anisotropic filtering interacts with the "normal" filter modes.
+@@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+ 	 * filter settings completely. This includes driconf's settings.
+ 	 */
+ 	if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+-		t->filter |= R300_TX_MAG_FILTER_ANISO
++		t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+ 			| R300_TX_MIN_FILTER_ANISO
+ 			| R300_TX_MIN_FILTER_MIP_LINEAR
+ 			| aniso_filter(anisotropy);
+@@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+ 
+ 	switch (minf) {
+ 	case GL_NEAREST:
+-		t->filter |= R300_TX_MIN_FILTER_NEAREST;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
+ 		break;
+ 	case GL_LINEAR:
+-		t->filter |= R300_TX_MIN_FILTER_LINEAR;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
+ 		break;
+ 	case GL_NEAREST_MIPMAP_NEAREST:
+-		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+ 		break;
+ 	case GL_NEAREST_MIPMAP_LINEAR:
+-		t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+ 		break;
+ 	case GL_LINEAR_MIPMAP_NEAREST:
+-		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+ 		break;
+ 	case GL_LINEAR_MIPMAP_LINEAR:
+-		t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
++		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+ 		break;
+ 	}
+ 
+@@ -163,743 +168,20 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat
+ 	 */
+ 	switch (magf) {
+ 	case GL_NEAREST:
+-		t->filter |= R300_TX_MAG_FILTER_NEAREST;
++		t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
+ 		break;
+ 	case GL_LINEAR:
+-		t->filter |= R300_TX_MAG_FILTER_LINEAR;
++		t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
+ 		break;
+ 	}
+ }
+ 
+-static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4])
++static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4])
+ {
+ 	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+ }
+ 
+ /**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj)
+-{
+-	r300TexObjPtr t;
+-
+-	t = CALLOC_STRUCT(r300_tex_obj);
+-	texObj->DriverData = t;
+-	if (t != NULL) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-			fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+-				(void *)texObj, (void *)t);
+-		}
+-
+-		/* Initialize non-image-dependent parts of the state:
+-		 */
+-		t->base.tObj = texObj;
+-		t->border_fallback = GL_FALSE;
+-
+-		make_empty_list(&t->base);
+-
+-		r300UpdateTexWrap(t);
+-		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+-		r300SetTexBorderColor(t, texObj->_BorderChan);
+-	}
+-
+-	return t;
+-}
+-
+-/* try to find a format which will only need a memcopy */
+-static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat,
+-							       GLenum srcType)
+-{
+-	const GLuint ui = 1;
+-	const GLubyte littleEndian = *((const GLubyte *)&ui);
+-
+-	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+-	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+-	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+-	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+-		return &_mesa_texformat_rgba8888;
+-	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+-		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+-		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+-		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+-		return &_mesa_texformat_rgba8888_rev;
+-	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+-					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+-		return &_mesa_texformat_argb8888_rev;
+-	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+-					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+-		return &_mesa_texformat_argb8888;
+-	} else
+-		return _dri_texformat_argb8888;
+-}
+-
+-static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx,
+-							       GLint
+-							       internalFormat,
+-							       GLenum format,
+-							       GLenum type)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	const GLboolean do32bpt =
+-	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+-	const GLboolean force16bpt =
+-	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+-	(void)format;
+-
+-#if 0
+-	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
+-		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+-		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+-	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
+-#endif
+-
+-	switch (internalFormat) {
+-	case 4:
+-	case GL_RGBA:
+-	case GL_COMPRESSED_RGBA:
+-		switch (type) {
+-		case GL_UNSIGNED_INT_10_10_10_2:
+-		case GL_UNSIGNED_INT_2_10_10_10_REV:
+-			return do32bpt ? _dri_texformat_argb8888 :
+-			    _dri_texformat_argb1555;
+-		case GL_UNSIGNED_SHORT_4_4_4_4:
+-		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-			return _dri_texformat_argb4444;
+-		case GL_UNSIGNED_SHORT_5_5_5_1:
+-		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-			return _dri_texformat_argb1555;
+-		default:
+-			return do32bpt ? r300Choose8888TexFormat(format, type) :
+-			    _dri_texformat_argb4444;
+-		}
+-
+-	case 3:
+-	case GL_RGB:
+-	case GL_COMPRESSED_RGB:
+-		switch (type) {
+-		case GL_UNSIGNED_SHORT_4_4_4_4:
+-		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-			return _dri_texformat_argb4444;
+-		case GL_UNSIGNED_SHORT_5_5_5_1:
+-		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-			return _dri_texformat_argb1555;
+-		case GL_UNSIGNED_SHORT_5_6_5:
+-		case GL_UNSIGNED_SHORT_5_6_5_REV:
+-			return _dri_texformat_rgb565;
+-		default:
+-			return do32bpt ? _dri_texformat_argb8888 :
+-			    _dri_texformat_rgb565;
+-		}
+-
+-	case GL_RGBA8:
+-	case GL_RGB10_A2:
+-	case GL_RGBA12:
+-	case GL_RGBA16:
+-		return !force16bpt ?
+-		    r300Choose8888TexFormat(format,
+-					    type) : _dri_texformat_argb4444;
+-
+-	case GL_RGBA4:
+-	case GL_RGBA2:
+-		return _dri_texformat_argb4444;
+-
+-	case GL_RGB5_A1:
+-		return _dri_texformat_argb1555;
+-
+-	case GL_RGB8:
+-	case GL_RGB10:
+-	case GL_RGB12:
+-	case GL_RGB16:
+-		return !force16bpt ? _dri_texformat_argb8888 :
+-		    _dri_texformat_rgb565;
+-
+-	case GL_RGB5:
+-	case GL_RGB4:
+-	case GL_R3_G3_B2:
+-		return _dri_texformat_rgb565;
+-
+-	case GL_ALPHA:
+-	case GL_ALPHA4:
+-	case GL_ALPHA8:
+-	case GL_ALPHA12:
+-	case GL_ALPHA16:
+-	case GL_COMPRESSED_ALPHA:
+-		return _dri_texformat_a8;
+-
+-	case 1:
+-	case GL_LUMINANCE:
+-	case GL_LUMINANCE4:
+-	case GL_LUMINANCE8:
+-	case GL_LUMINANCE12:
+-	case GL_LUMINANCE16:
+-	case GL_COMPRESSED_LUMINANCE:
+-		return _dri_texformat_l8;
+-
+-	case 2:
+-	case GL_LUMINANCE_ALPHA:
+-	case GL_LUMINANCE4_ALPHA4:
+-	case GL_LUMINANCE6_ALPHA2:
+-	case GL_LUMINANCE8_ALPHA8:
+-	case GL_LUMINANCE12_ALPHA4:
+-	case GL_LUMINANCE12_ALPHA12:
+-	case GL_LUMINANCE16_ALPHA16:
+-	case GL_COMPRESSED_LUMINANCE_ALPHA:
+-		return _dri_texformat_al88;
+-
+-	case GL_INTENSITY:
+-	case GL_INTENSITY4:
+-	case GL_INTENSITY8:
+-	case GL_INTENSITY12:
+-	case GL_INTENSITY16:
+-	case GL_COMPRESSED_INTENSITY:
+-		return _dri_texformat_i8;
+-
+-	case GL_YCBCR_MESA:
+-		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+-		    type == GL_UNSIGNED_BYTE)
+-			return &_mesa_texformat_ycbcr;
+-		else
+-			return &_mesa_texformat_ycbcr_rev;
+-
+-	case GL_RGB_S3TC:
+-	case GL_RGB4_S3TC:
+-	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+-		return &_mesa_texformat_rgb_dxt1;
+-
+-	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+-		return &_mesa_texformat_rgba_dxt1;
+-
+-	case GL_RGBA_S3TC:
+-	case GL_RGBA4_S3TC:
+-	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+-		return &_mesa_texformat_rgba_dxt3;
+-
+-	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+-		return &_mesa_texformat_rgba_dxt5;
+-
+-	case GL_ALPHA16F_ARB:
+-		return &_mesa_texformat_alpha_float16;
+-	case GL_ALPHA32F_ARB:
+-		return &_mesa_texformat_alpha_float32;
+-	case GL_LUMINANCE16F_ARB:
+-		return &_mesa_texformat_luminance_float16;
+-	case GL_LUMINANCE32F_ARB:
+-		return &_mesa_texformat_luminance_float32;
+-	case GL_LUMINANCE_ALPHA16F_ARB:
+-		return &_mesa_texformat_luminance_alpha_float16;
+-	case GL_LUMINANCE_ALPHA32F_ARB:
+-		return &_mesa_texformat_luminance_alpha_float32;
+-	case GL_INTENSITY16F_ARB:
+-		return &_mesa_texformat_intensity_float16;
+-	case GL_INTENSITY32F_ARB:
+-		return &_mesa_texformat_intensity_float32;
+-	case GL_RGB16F_ARB:
+-		return &_mesa_texformat_rgba_float16;
+-	case GL_RGB32F_ARB:
+-		return &_mesa_texformat_rgba_float32;
+-	case GL_RGBA16F_ARB:
+-		return &_mesa_texformat_rgba_float16;
+-	case GL_RGBA32F_ARB:
+-		return &_mesa_texformat_rgba_float32;
+-
+-	case GL_DEPTH_COMPONENT:
+-	case GL_DEPTH_COMPONENT16:
+-	case GL_DEPTH_COMPONENT24:
+-	case GL_DEPTH_COMPONENT32:
+-#if 0
+-		switch (type) {
+-		case GL_UNSIGNED_BYTE:
+-		case GL_UNSIGNED_SHORT:
+-			return &_mesa_texformat_z16;
+-		case GL_UNSIGNED_INT:
+-			return &_mesa_texformat_z32;
+-		case GL_UNSIGNED_INT_24_8_EXT:
+-		default:
+-			return &_mesa_texformat_z24_s8;
+-		}
+-#else
+-		return &_mesa_texformat_z16;
+-#endif
+-
+-	default:
+-		_mesa_problem(ctx,
+-			      "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
+-			      (int)internalFormat);
+-		return NULL;
+-	}
+-
+-	return NULL;		/* never get here */
+-}
+-
+-static GLboolean
+-r300ValidateClientStorage(GLcontext * ctx, GLenum target,
+-			  GLint internalFormat,
+-			  GLint srcWidth, GLint srcHeight,
+-			  GLenum format, GLenum type, const void *pixels,
+-			  const struct gl_pixelstore_attrib *packing,
+-			  struct gl_texture_object *texObj,
+-			  struct gl_texture_image *texImage)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "intformat %s format %s type %s\n",
+-			_mesa_lookup_enum_by_nr(internalFormat),
+-			_mesa_lookup_enum_by_nr(format),
+-			_mesa_lookup_enum_by_nr(type));
+-
+-	if (!ctx->Unpack.ClientStorage)
+-		return 0;
+-
+-	if (ctx->_ImageTransferState ||
+-	    texImage->IsCompressed || texObj->GenerateMipmap)
+-		return 0;
+-
+-	/* This list is incomplete, may be different on ppc???
+-	 */
+-	switch (internalFormat) {
+-	case GL_RGBA:
+-		if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+-			texImage->TexFormat = _dri_texformat_argb8888;
+-		} else
+-			return 0;
+-		break;
+-
+-	case GL_RGB:
+-		if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+-			texImage->TexFormat = _dri_texformat_rgb565;
+-		} else
+-			return 0;
+-		break;
+-
+-	case GL_YCBCR_MESA:
+-		if (format == GL_YCBCR_MESA &&
+-		    type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
+-			texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+-		} else if (format == GL_YCBCR_MESA &&
+-			   (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+-			    type == GL_UNSIGNED_BYTE)) {
+-			texImage->TexFormat = &_mesa_texformat_ycbcr;
+-		} else
+-			return 0;
+-		break;
+-
+-	default:
+-		return 0;
+-	}
+-
+-	/* Could deal with these packing issues, but currently don't:
+-	 */
+-	if (packing->SkipPixels ||
+-	    packing->SkipRows || packing->SwapBytes || packing->LsbFirst) {
+-		return 0;
+-	}
+-
+-	GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+-						    format, type);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "%s: srcRowStride %d/%x\n",
+-			__FUNCTION__, srcRowStride, srcRowStride);
+-
+-	/* Could check this later in upload, pitch restrictions could be
+-	 * relaxed, but would need to store the image pitch somewhere,
+-	 * as packing details might change before image is uploaded:
+-	 */
+-	if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride)
+-	    || (srcRowStride & 63))
+-		return 0;
+-
+-	/* Have validated that _mesa_transfer_teximage would be a straight
+-	 * memcpy at this point.  NOTE: future calls to TexSubImage will
+-	 * overwrite the client data.  This is explicitly mentioned in the
+-	 * extension spec.
+-	 */
+-	texImage->Data = (void *)pixels;
+-	texImage->IsClientData = GL_TRUE;
+-	texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes;
+-
+-	return 1;
+-}
+-
+-static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level,
+-			   GLint internalFormat,
+-			   GLint width, GLint border,
+-			   GLenum format, GLenum type, const GLvoid * pixels,
+-			   const struct gl_pixelstore_attrib *packing,
+-			   struct gl_texture_object *texObj,
+-			   struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+-			return;
+-		}
+-	}
+-
+-	/* Note, this will call ChooseTextureFormat */
+-	_mesa_store_teximage1d(ctx, target, level, internalFormat,
+-			       width, border, format, type, pixels,
+-			       &ctx->Unpack, texObj, texImage);
+-
+-	t->dirty_images[0] |= (1 << level);
+-}
+-
+-static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+-			      GLint xoffset,
+-			      GLsizei width,
+-			      GLenum format, GLenum type,
+-			      const GLvoid * pixels,
+-			      const struct gl_pixelstore_attrib *packing,
+-			      struct gl_texture_object *texObj,
+-			      struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+-			return;
+-		}
+-	}
+-
+-	_mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+-				  format, type, pixels, packing, texObj,
+-				  texImage);
+-
+-	t->dirty_images[0] |= (1 << level);
+-}
+-
+-static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level,
+-			   GLint internalFormat,
+-			   GLint width, GLint height, GLint border,
+-			   GLenum format, GLenum type, const GLvoid * pixels,
+-			   const struct gl_pixelstore_attrib *packing,
+-			   struct gl_texture_object *texObj,
+-			   struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-	GLuint face;
+-
+-	/* which cube face or ordinary 2D image */
+-	switch (target) {
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-		face =
+-		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-		ASSERT(face < 6);
+-		break;
+-	default:
+-		face = 0;
+-	}
+-
+-	if (t != NULL) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+-			return;
+-		}
+-	}
+-
+-	texImage->IsClientData = GL_FALSE;
+-
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_teximage2d(ctx, target, level, internalFormat,
+-				       width, height, border, format, type,
+-				       pixels, &ctx->Unpack, texObj, texImage);
+-
+-		t->dirty_images[face] |= (1 << level);
+-	}
+-}
+-
+-static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+-			      GLint xoffset, GLint yoffset,
+-			      GLsizei width, GLsizei height,
+-			      GLenum format, GLenum type,
+-			      const GLvoid * pixels,
+-			      const struct gl_pixelstore_attrib *packing,
+-			      struct gl_texture_object *texObj,
+-			      struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-	GLuint face;
+-
+-	/* which cube face or ordinary 2D image */
+-	switch (target) {
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-		face =
+-		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-		ASSERT(face < 6);
+-		break;
+-	default:
+-		face = 0;
+-	}
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+-			return;
+-		}
+-	}
+-
+-	_mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+-				  height, format, type, pixels, packing, texObj,
+-				  texImage);
+-
+-	t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target,
+-				     GLint level, GLint internalFormat,
+-				     GLint width, GLint height, GLint border,
+-				     GLsizei imageSize, const GLvoid * data,
+-				     struct gl_texture_object *texObj,
+-				     struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-	GLuint face;
+-
+-	/* which cube face or ordinary 2D image */
+-	switch (target) {
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-		face =
+-		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-		ASSERT(face < 6);
+-		break;
+-	default:
+-		face = 0;
+-	}
+-
+-	if (t != NULL) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
+-				    "glCompressedTexImage2D");
+-			return;
+-		}
+-	}
+-
+-	texImage->IsClientData = GL_FALSE;
+-
+-	/* can't call this, different parameters. Would never evaluate to true anyway currently */
+-#if 0
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else
+-#endif
+-	{
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_compressed_teximage2d(ctx, target, level,
+-						  internalFormat, width, height,
+-						  border, imageSize, data,
+-						  texObj, texImage);
+-
+-		t->dirty_images[face] |= (1 << level);
+-	}
+-}
+-
+-static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+-					GLint level, GLint xoffset,
+-					GLint yoffset, GLsizei width,
+-					GLsizei height, GLenum format,
+-					GLsizei imageSize, const GLvoid * data,
+-					struct gl_texture_object *texObj,
+-					struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-	GLuint face;
+-
+-	/* which cube face or ordinary 2D image */
+-	switch (target) {
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-		face =
+-		    (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-		ASSERT(face < 6);
+-		break;
+-	default:
+-		face = 0;
+-	}
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY,
+-				    "glCompressedTexSubImage3D");
+-			return;
+-		}
+-	}
+-
+-	_mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset,
+-					     yoffset, width, height, format,
+-					     imageSize, data, texObj, texImage);
+-
+-	t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level,
+-			   GLint internalFormat,
+-			   GLint width, GLint height, GLint depth,
+-			   GLint border,
+-			   GLenum format, GLenum type, const GLvoid * pixels,
+-			   const struct gl_pixelstore_attrib *packing,
+-			   struct gl_texture_object *texObj,
+-			   struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D");
+-			return;
+-		}
+-	}
+-
+-	texImage->IsClientData = GL_FALSE;
+-
+-#if 0
+-	if (r300ValidateClientStorage(ctx, target,
+-				      internalFormat,
+-				      width, height,
+-				      format, type, pixels,
+-				      packing, texObj, texImage)) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using client storage\n",
+-				__FUNCTION__);
+-	} else
+-#endif
+-	{
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: Using normal storage\n",
+-				__FUNCTION__);
+-
+-		/* Normal path: copy (to cached memory) and eventually upload
+-		 * via another copy to GART memory and then a blit...  Could
+-		 * eliminate one copy by going straight to (permanent) GART.
+-		 *
+-		 * Note, this will call r300ChooseTextureFormat.
+-		 */
+-		_mesa_store_teximage3d(ctx, target, level, internalFormat,
+-				       width, height, depth, border,
+-				       format, type, pixels,
+-				       &ctx->Unpack, texObj, texImage);
+-
+-		t->dirty_images[0] |= (1 << level);
+-	}
+-}
+-
+-static void
+-r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+-		  GLint xoffset, GLint yoffset, GLint zoffset,
+-		  GLsizei width, GLsizei height, GLsizei depth,
+-		  GLenum format, GLenum type,
+-		  const GLvoid * pixels,
+-		  const struct gl_pixelstore_attrib *packing,
+-		  struct gl_texture_object *texObj,
+-		  struct gl_texture_image *texImage)
+-{
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
+-
+-/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+-
+-	assert(t);		/* this _should_ be true */
+-	if (t) {
+-		driSwapOutTextureObject(t);
+-	} else {
+-		t = (driTextureObject *) r300AllocTexObj(texObj);
+-		if (!t) {
+-			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D");
+-			return;
+-		}
+-		texObj->DriverData = t;
+-	}
+-
+-	_mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+-				  width, height, depth,
+-				  format, type, pixels, packing, texObj,
+-				  texImage);
+-
+-	t->dirty_images[0] |= (1 << level);
+-}
+-
+-/**
+  * Changes variables and flags for a state update, which will happen at the
+  * next UpdateTextureState
+  */
+@@ -908,7 +190,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 			     struct gl_texture_object *texObj,
+ 			     GLenum pname, const GLfloat * params)
+ {
+-	r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData;
++	radeonTexObj* t = radeon_tex_obj(texObj);
+ 
+ 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ 		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+@@ -941,7 +223,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 		 * we just have to rely on loading the right subset of mipmap levels
+ 		 * to simulate a clamped LOD.
+ 		 */
+-		driSwapOutTextureObject((driTextureObject *) t);
++		if (t->mt) {
++			radeon_miptree_unreference(t->mt);
++			t->mt = 0;
++			t->validated = GL_FALSE;
++		}
+ 		break;
+ 
+ 	case GL_DEPTH_TEXTURE_MODE:
+@@ -964,27 +250,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target,
+ 	}
+ }
+ 
+-static void r300BindTexture(GLcontext * ctx, GLenum target,
+-			    struct gl_texture_object *texObj)
+-{
+-	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+-		fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__,
+-			(void *)texObj, ctx->Texture.CurrentUnit);
+-	}
+-
+-	if ((target == GL_TEXTURE_1D)
+-	    || (target == GL_TEXTURE_2D)
+-	    || (target == GL_TEXTURE_3D)
+-	    || (target == GL_TEXTURE_CUBE_MAP)
+-	    || (target == GL_TEXTURE_RECTANGLE_NV)) {
+-		assert(texObj->DriverData != NULL);
+-	}
+-}
+-
+ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	driTextureObject *t = (driTextureObject *) texObj->DriverData;
++	radeonTexObj* t = radeon_tex_obj(texObj);
+ 
+ 	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
+ 		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+@@ -992,14 +261,19 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+ 			_mesa_lookup_enum_by_nr(texObj->Target));
+ 	}
+ 
+-	if (t != NULL) {
+-		if (rmesa) {
+-			R300_FIREVERTICES(rmesa);
+-		}
++	if (rmesa) {
++		int i;
++		radeon_firevertices(&rmesa->radeon);
+ 
+-		driDestroyTextureObject(t);
++		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
++			if (rmesa->hw.textures[i] == t)
++				rmesa->hw.textures[i] = 0;
++	}
++
++	if (t->mt) {
++		radeon_miptree_unreference(t->mt);
++		t->mt = 0;
+ 	}
+-	/* Free mipmap images and the texture object itself */
+ 	_mesa_delete_texture_object(ctx, texObj);
+ }
+ 
+@@ -1008,8 +282,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+  * Called via ctx->Driver.NewTextureObject.
+  * Note: this function will be called during context creation to
+  * allocate the default texture objects.
+- * Note: we could use containment here to 'derive' the driver-specific
+- * texture object from the core mesa gl_texture_object.  Not done at this time.
+  * Fixup MaxAnisotropy according to user preference.
+  */
+ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+@@ -1017,14 +289,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+ 						      GLenum target)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_object *obj;
+-	obj = _mesa_new_texture_object(ctx, name, target);
+-	if (!obj)
+-		return NULL;
+-	obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
++	radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
+ 
+-	r300AllocTexObj(obj);
+-	return obj;
++	if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
++		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
++			t, _mesa_lookup_enum_by_nr(target));
++	}
++
++	_mesa_initialize_texture_object(&t->base, name, target);
++	t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++	/* Initialize hardware state */
++	r300UpdateTexWrap(t);
++	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
++	r300SetTexBorderColor(t, t->base._BorderChan);
++
++	return &t->base;
+ }
+ 
+ void r300InitTextureFuncs(struct dd_function_table *functions)
+@@ -1032,22 +313,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions)
+ 	/* Note: we only plug in the functions we implement in the driver
+ 	 * since _mesa_init_driver_functions() was already called.
+ 	 */
+-	functions->ChooseTextureFormat = r300ChooseTextureFormat;
+-	functions->TexImage1D = r300TexImage1D;
+-	functions->TexImage2D = r300TexImage2D;
+-	functions->TexImage3D = r300TexImage3D;
+-	functions->TexSubImage1D = r300TexSubImage1D;
+-	functions->TexSubImage2D = r300TexSubImage2D;
+-	functions->TexSubImage3D = r300TexSubImage3D;
++	functions->NewTextureImage = radeonNewTextureImage;
++	functions->FreeTexImageData = radeonFreeTexImageData;
++	functions->MapTexture = radeonMapTexture;
++	functions->UnmapTexture = radeonUnmapTexture;
++
++	functions->ChooseTextureFormat = radeonChooseTextureFormat;
++	functions->TexImage1D = radeonTexImage1D;
++	functions->TexImage2D = radeonTexImage2D;
++	functions->TexImage3D = radeonTexImage3D;
++	functions->TexSubImage1D = radeonTexSubImage1D;
++	functions->TexSubImage2D = radeonTexSubImage2D;
++	functions->TexSubImage3D = radeonTexSubImage3D;
++	functions->GetTexImage = radeonGetTexImage;
++	functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ 	functions->NewTextureObject = r300NewTextureObject;
+-	functions->BindTexture = r300BindTexture;
+ 	functions->DeleteTexture = r300DeleteTexture;
+ 	functions->IsTextureResident = driIsTextureResident;
+ 
+ 	functions->TexParameter = r300TexParameter;
+ 
+-	functions->CompressedTexImage2D = r300CompressedTexImage2D;
+-	functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D;
++	functions->CompressedTexImage2D = radeonCompressedTexImage2D;
++	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
++
++	functions->GenerateMipmap = radeonGenerateMipmap;
+ 
+ 	driInitTextureFormats();
+ }
+diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
+index b86d45b..baad3fe 100644
+--- a/src/mesa/drivers/dri/r300/r300_tex.h
++++ b/src/mesa/drivers/dri/r300/r300_tex.h
+@@ -37,16 +37,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+ 
++extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
++			     __DRIdrawable *dPriv);
++
+ extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ 			     unsigned long long offset, GLint depth,
+ 			     GLuint pitch);
+ 
+-extern void r300UpdateTextureState(GLcontext * ctx);
+-
+-extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t,
+-			       GLuint face);
+-
+-extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t);
++extern GLboolean r300ValidateBuffers(GLcontext * ctx);
+ 
+ extern void r300InitTextureFuncs(struct dd_function_table *functions);
+ 
+diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c
+deleted file mode 100644
+index b03eefa..0000000
+--- a/src/mesa/drivers/dri/r300/r300_texmem.c
++++ /dev/null
+@@ -1,567 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) Tungsten Graphics 2002.  All Rights Reserved.
+-The Weather Channel, Inc. funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86
+-license. This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/**
+- * \file
+- *
+- * \author Gareth Hughes <gareth@valinux.com>
+- *
+- * \author Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/colormac.h"
+-#include "main/macros.h"
+-#include "main/simple_list.h"
+-#include "radeon_reg.h"		/* gets definition for usleep */
+-#include "r300_context.h"
+-#include "r300_state.h"
+-#include "r300_cmdbuf.h"
+-#include "radeon_ioctl.h"
+-#include "r300_tex.h"
+-#include "r300_ioctl.h"
+-#include <unistd.h>		/* for usleep() */
+-
+-#ifdef USER_BUFFERS
+-#include "r300_mem.h"
+-#endif
+-
+-/**
+- * Destroy any device-dependent state associated with the texture.  This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t)
+-{
+-	int i;
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-		fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__,
+-			(void *)t, (void *)t->base.tObj);
+-	}
+-
+-	for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) {
+-		if (rmesa->state.texture.unit[i].texobj == t) {
+-			rmesa->state.texture.unit[i].texobj = NULL;
+-		}
+-	}
+-}
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-static void r300UploadGARTClientSubImage(r300ContextPtr rmesa,
+-					 r300TexObjPtr t,
+-					 struct gl_texture_image *texImage,
+-					 GLint hwlevel,
+-					 GLint x, GLint y,
+-					 GLint width, GLint height)
+-{
+-	const struct gl_texture_format *texFormat = texImage->TexFormat;
+-	GLuint srcPitch, dstPitch;
+-	int blit_format;
+-	int srcOffset;
+-
+-	/*
+-	 * XXX it appears that we always upload the full image, not a subimage.
+-	 * I.e. x==0, y==0, width=texWidth, height=texWidth.  If this is ever
+-	 * changed, the src pitch will have to change.
+-	 */
+-	switch (texFormat->TexelBytes) {
+-	case 1:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 2:
+-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 4:
+-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	case 8:
+-	case 16:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		srcPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		dstPitch = t->image[0][0].width * texFormat->TexelBytes;
+-		break;
+-	default:
+-		return;
+-	}
+-
+-	t->image[0][hwlevel].data = texImage->Data;
+-	srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+-
+-	assert(srcOffset != ~0);
+-
+-	/* Don't currently need to cope with small pitches?
+-	 */
+-	width = texImage->Width;
+-	height = texImage->Height;
+-
+-	if (texFormat->TexelBytes > 4) {
+-		width *= texFormat->TexelBytes;
+-	}
+-
+-	r300EmitWait(rmesa, R300_WAIT_3D);
+-
+-	r300EmitBlit(rmesa, blit_format,
+-		     srcPitch,
+-		     srcOffset,
+-		     dstPitch,
+-		     t->bufAddr,
+-		     x,
+-		     y,
+-		     t->image[0][hwlevel].x + x,
+-		     t->image[0][hwlevel].y + y, width, height);
+-
+-	r300EmitWait(rmesa, R300_WAIT_2D);
+-}
+-
+-static void r300UploadRectSubImage(r300ContextPtr rmesa,
+-				   r300TexObjPtr t,
+-				   struct gl_texture_image *texImage,
+-				   GLint x, GLint y, GLint width, GLint height)
+-{
+-	const struct gl_texture_format *texFormat = texImage->TexFormat;
+-	int blit_format, dstPitch, done;
+-
+-	switch (texFormat->TexelBytes) {
+-	case 1:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		break;
+-	case 2:
+-		blit_format = R300_CP_COLOR_FORMAT_RGB565;
+-		break;
+-	case 4:
+-		blit_format = R300_CP_COLOR_FORMAT_ARGB8888;
+-		break;
+-	case 8:
+-	case 16:
+-		blit_format = R300_CP_COLOR_FORMAT_CI8;
+-		break;
+-	default:
+-		return;
+-	}
+-
+-	t->image[0][0].data = texImage->Data;
+-
+-	/* Currently don't need to cope with small pitches.
+-	 */
+-	width = texImage->Width;
+-	height = texImage->Height;
+-	dstPitch = t->pitch;
+-
+-	if (texFormat->TexelBytes > 4) {
+-		width *= texFormat->TexelBytes;
+-	}
+-
+-	if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) {
+-		/* In this case, could also use GART texturing.  This is
+-		 * currently disabled, but has been tested & works.
+-		 */
+-		t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data);
+-		t->pitch = texImage->RowStride * texFormat->TexelBytes - 32;
+-
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr,
+-				"Using GART texturing for rectangular client texture\n");
+-
+-		/* Release FB memory allocated for this image:
+-		 */
+-		/* FIXME This may not be correct as driSwapOutTextureObject sets
+-		 * FIXME dirty_images.  It may be fine, though.
+-		 */
+-		if (t->base.memBlock) {
+-			driSwapOutTextureObject((driTextureObject *) t);
+-		}
+-	} else if (texImage->IsClientData) {
+-		/* Data already in GART memory, with usable pitch.
+-		 */
+-		GLuint srcPitch;
+-		srcPitch = texImage->RowStride * texFormat->TexelBytes;
+-		r300EmitBlit(rmesa,
+-			     blit_format,
+-			     srcPitch,
+-			     r300GartOffsetFromVirtual(rmesa, texImage->Data),
+-			     dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);
+-	} else {
+-		/* Data not in GART memory, or bad pitch.
+-		 */
+-		for (done = 0; done < height;) {
+-			struct r300_dma_region region;
+-			int lines =
+-			    MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch);
+-			int src_pitch;
+-			char *tex;
+-
+-			src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+-			tex = (char *)texImage->Data + done * src_pitch;
+-
+-			memset(&region, 0, sizeof(region));
+-			r300AllocDmaRegion(rmesa, &region, lines * dstPitch,
+-					   1024);
+-
+-			/* Copy texdata to dma:
+-			 */
+-			if (RADEON_DEBUG & DEBUG_TEXTURE)
+-				fprintf(stderr,
+-					"%s: src_pitch %d dst_pitch %d\n",
+-					__FUNCTION__, src_pitch, dstPitch);
+-
+-			if (src_pitch == dstPitch) {
+-				memcpy(region.address + region.start, tex,
+-				       lines * src_pitch);
+-			} else {
+-				char *buf = region.address + region.start;
+-				int i;
+-				for (i = 0; i < lines; i++) {
+-					memcpy(buf, tex, src_pitch);
+-					buf += dstPitch;
+-					tex += src_pitch;
+-				}
+-			}
+-
+-			r300EmitWait(rmesa, R300_WAIT_3D);
+-
+-			/* Blit to framebuffer
+-			 */
+-			r300EmitBlit(rmesa,
+-				     blit_format,
+-				     dstPitch, GET_START(&region),
+-				     dstPitch | (t->tile_bits >> 16),
+-				     t->bufAddr, 0, 0, 0, done, width, lines);
+-
+-			r300EmitWait(rmesa, R300_WAIT_2D);
+-#ifdef USER_BUFFERS
+-			r300_mem_use(rmesa, region.buf->id);
+-#endif
+-
+-			r300ReleaseDmaRegion(rmesa, &region, __FUNCTION__);
+-			done += lines;
+-		}
+-	}
+-}
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t,
+-			       GLint hwlevel,
+-			       GLint x, GLint y, GLint width, GLint height,
+-			       GLuint face)
+-{
+-	struct gl_texture_image *texImage = NULL;
+-	GLuint offset;
+-	GLint imageWidth, imageHeight;
+-	GLint ret;
+-	drm_radeon_texture_t tex;
+-	drm_radeon_tex_image_t tmp;
+-	const int level = hwlevel + t->base.firstLevel;
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE) {
+-		fprintf(stderr,
+-			"%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n",
+-			__FUNCTION__, (void *)t, (void *)t->base.tObj, level,
+-			width, height, face);
+-	}
+-
+-	ASSERT(face < 6);
+-
+-	/* Ensure we have a valid texture to upload */
+-	if ((hwlevel < 0) || (hwlevel >= RADEON_MAX_TEXTURE_LEVELS)) {
+-		_mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+-		return;
+-	}
+-
+-	texImage = t->base.tObj->Image[face][level];
+-
+-	if (!texImage) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: texImage %d is NULL!\n",
+-				__FUNCTION__, level);
+-		return;
+-	}
+-	if (!texImage->Data) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: image data is NULL!\n",
+-				__FUNCTION__);
+-		return;
+-	}
+-
+-	if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-		assert(level == 0);
+-		assert(hwlevel == 0);
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr, "%s: image data is rectangular\n",
+-				__FUNCTION__);
+-		r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height);
+-		return;
+-	} else if (texImage->IsClientData) {
+-		if (RADEON_DEBUG & DEBUG_TEXTURE)
+-			fprintf(stderr,
+-				"%s: image data is in GART client storage\n",
+-				__FUNCTION__);
+-		r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y,
+-					     width, height);
+-		return;
+-	} else if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "%s: image data is in normal memory\n",
+-			__FUNCTION__);
+-
+-	imageWidth = texImage->Width;
+-	imageHeight = texImage->Height;
+-
+-	offset = t->bufAddr;
+-
+-	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+-		GLint imageX = 0;
+-		GLint imageY = 0;
+-		GLint blitX = t->image[face][hwlevel].x;
+-		GLint blitY = t->image[face][hwlevel].y;
+-		GLint blitWidth = t->image[face][hwlevel].width;
+-		GLint blitHeight = t->image[face][hwlevel].height;
+-		fprintf(stderr, "   upload image: %d,%d at %d,%d\n",
+-			imageWidth, imageHeight, imageX, imageY);
+-		fprintf(stderr, "   upload  blit: %d,%d at %d,%d\n",
+-			blitWidth, blitHeight, blitX, blitY);
+-		fprintf(stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+-			(GLuint) offset, hwlevel, level);
+-	}
+-
+-	t->image[face][hwlevel].data = texImage->Data;
+-
+-	/* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+-	 * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+-	 * We used to use 1, 2 and 4-byte texels and used to use the texture
+-	 * width to dictate the blit width - but that won't work for compressed
+-	 * textures. (Brian)
+-	 * NOTE: can't do that with texture tiling. (sroland)
+-	 */
+-	tex.offset = offset;
+-	tex.image = &tmp;
+-	/* copy (x,y,width,height,data) */
+-	memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp));
+-
+-	if (texImage->TexFormat->TexelBytes > 4) {
+-		const int log2TexelBytes =
+-		    (3 + (texImage->TexFormat->TexelBytes >> 4));
+-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
+-		tex.pitch =
+-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+-			 64, 1);
+-		tex.height = imageHeight;
+-		tex.width = imageWidth << log2TexelBytes;
+-		tex.offset += (tmp.x << log2TexelBytes) & ~1023;
+-		tmp.x = tmp.x % (1024 >> log2TexelBytes);
+-		tmp.width = tmp.width << log2TexelBytes;
+-	} else if (texImage->TexFormat->TexelBytes) {
+-		/* use multi-byte upload scheme */
+-		tex.height = imageHeight;
+-		tex.width = imageWidth;
+-		switch (texImage->TexFormat->TexelBytes) {
+-		case 1:
+-			tex.format = RADEON_TXFORMAT_I8;
+-			break;
+-		case 2:
+-			tex.format = RADEON_TXFORMAT_AI88;
+-			break;
+-		case 4:
+-			tex.format = RADEON_TXFORMAT_ARGB8888;
+-			break;
+-		}
+-		tex.pitch =
+-		    MAX2((texImage->Width * texImage->TexFormat->TexelBytes) /
+-			 64, 1);
+-		tex.offset += tmp.x & ~1023;
+-		tmp.x = tmp.x % 1024;
+-
+-		if (t->tile_bits & R300_TXO_MICRO_TILE) {
+-			/* need something like "tiled coordinates" ? */
+-			tmp.y = tmp.x / (tex.pitch * 128) * 2;
+-			tmp.x =
+-			    tmp.x % (tex.pitch * 128) / 2 /
+-			    texImage->TexFormat->TexelBytes;
+-			tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+-		} else {
+-			tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+-		}
+-#if 1
+-		if ((t->tile_bits & R300_TXO_MACRO_TILE) &&
+-		    (texImage->Width * texImage->TexFormat->TexelBytes >= 256)
+-		    && ((!(t->tile_bits & R300_TXO_MICRO_TILE)
+-			 && (texImage->Height >= 8))
+-			|| (texImage->Height >= 16))) {
+-			/* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+-			   OR if height is smaller than 8 automatically, but if micro tiling is active
+-			   the limit is height 16 instead ? */
+-			tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+-		}
+-#endif
+-	} else {
+-		/* In case of for instance 8x8 texture (2x2 dxt blocks),
+-		   padding after the first two blocks is needed (only
+-		   with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+-		/* set tex.height to 1/4 since 1 "macropixel" (dxt-block)
+-		   has 4 real pixels. Needed so the kernel module reads
+-		   the right amount of data. */
+-		tex.format = RADEON_TXFORMAT_I8;	/* any 1-byte texel format */
+-		tex.pitch = (R300_BLIT_WIDTH_BYTES / 64);
+-		tex.height = (imageHeight + 3) / 4;
+-		tex.width = (imageWidth + 3) / 4;
+-		if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) {
+-			tex.width *= 8;
+-		} else {
+-			tex.width *= 16;
+-		}
+-	}
+-
+-	LOCK_HARDWARE(&rmesa->radeon);
+-	do {
+-		ret =
+-		    drmCommandWriteRead(rmesa->radeon.dri.fd,
+-					DRM_RADEON_TEXTURE, &tex,
+-					sizeof(drm_radeon_texture_t));
+-		if (ret) {
+-			if (RADEON_DEBUG & DEBUG_IOCTL)
+-				fprintf(stderr,
+-					"DRM_RADEON_TEXTURE:  again!\n");
+-			usleep(1);
+-		}
+-	} while (ret == -EAGAIN);
+-
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (ret) {
+-		fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret);
+-		fprintf(stderr, "   offset=0x%08x\n", offset);
+-		fprintf(stderr, "   image width=%d height=%d\n",
+-			imageWidth, imageHeight);
+-		fprintf(stderr, "    blit width=%d height=%d data=%p\n",
+-			t->image[face][hwlevel].width,
+-			t->image[face][hwlevel].height,
+-			t->image[face][hwlevel].data);
+-		_mesa_exit(-1);
+-	}
+-}
+-
+-/**
+- * Upload the texture images associated with texture \a t.  This might
+- * require the allocation of texture memory.
+- *
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
+- */
+-
+-int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face)
+-{
+-	const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-	if (t->image_override)
+-		return 0;
+-
+-	if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) {
+-		fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+-			(void *)rmesa->radeon.glCtx, (void *)t->base.tObj,
+-			t->base.totalSize, t->base.firstLevel,
+-			t->base.lastLevel);
+-	}
+-
+-	if (t->base.totalSize == 0)
+-		return 0;
+-
+-	if (RADEON_DEBUG & DEBUG_SYNC) {
+-		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+-		radeonFinish(rmesa->radeon.glCtx);
+-	}
+-
+-	LOCK_HARDWARE(&rmesa->radeon);
+-
+-	if (t->base.memBlock == NULL) {
+-		int heap;
+-
+-		heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps,
+-					  (driTextureObject *) t);
+-		if (heap == -1) {
+-			UNLOCK_HARDWARE(&rmesa->radeon);
+-			return -1;
+-		}
+-
+-		/* Set the base offset of the texture image */
+-		t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap]
+-		    + t->base.memBlock->ofs;
+-		t->offset = t->bufAddr;
+-
+-		if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+-			/* hope it's safe to add that here... */
+-			t->offset |= t->tile_bits;
+-		}
+-	}
+-
+-	/* Let the world know we've used this memory recently.
+-	 */
+-	driUpdateTextureLRU((driTextureObject *) t);
+-	UNLOCK_HARDWARE(&rmesa->radeon);
+-
+-	/* Upload any images that are new */
+-	if (t->base.dirty_images[face]) {
+-		int i;
+-		for (i = 0; i < numLevels; i++) {
+-			if ((t->base.
+-			     dirty_images[face] & (1 <<
+-						   (i + t->base.firstLevel))) !=
+-			    0) {
+-				r300UploadSubImage(rmesa, t, i, 0, 0,
+-						   t->image[face][i].width,
+-						   t->image[face][i].height,
+-						   face);
+-			}
+-		}
+-		t->base.dirty_images[face] = 0;
+-	}
+-
+-	if (RADEON_DEBUG & DEBUG_SYNC) {
+-		fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+-		radeonFinish(rmesa->radeon.glCtx);
+-	}
+-
+-	return 0;
+-}
+diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
+index e2329f0..25cd4d8 100644
+--- a/src/mesa/drivers/dri/r300/r300_texstate.c
++++ b/src/mesa/drivers/dri/r300/r300_texstate.c
+@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "r300_context.h"
+ #include "r300_state.h"
+ #include "r300_ioctl.h"
+-#include "radeon_ioctl.h"
++#include "radeon_mipmap_tree.h"
+ #include "r300_tex.h"
+ #include "r300_reg.h"
+ 
+@@ -143,13 +143,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ 		},
+ 	};
+ 	const GLuint *format;
+-	r300TexObjPtr t;
++	radeonTexObjPtr t;
+ 
+ 	if (!tObj)
+ 		return;
+ 
+-	t = (r300TexObjPtr) tObj->DriverData;
+-
++	t = radeon_tex_obj(tObj);
+ 
+ 	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
+ 	case MESA_FORMAT_Z16:
+@@ -171,13 +170,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ 
+ 	switch (tObj->DepthMode) {
+ 	case GL_LUMINANCE:
+-		t->format = format[0];
++		t->pp_txformat = format[0];
+ 		break;
+ 	case GL_INTENSITY:
+-		t->format = format[1];
++		t->pp_txformat = format[1];
+ 		break;
+ 	case GL_ALPHA:
+-		t->format = format[2];
++		t->pp_txformat = format[2];
+ 		break;
+ 	default:
+ 		/* Error...which should have already been caught by higher
+@@ -190,479 +189,296 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj)
+ 
+ 
+ /**
+- * Compute sizes and fill in offset and blit information for the given
+- * image (determined by \p face and \p level).
+- *
+- * \param curOffset points to the offset at which the image is to be stored
+- * and is updated by this function according to the size of the image.
+- */
+-static void compute_tex_image_offset(
+-	struct gl_texture_object *tObj,
+-	GLuint face,
+-	GLint level,
+-	GLint* curOffset)
+-{
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-	const struct gl_texture_image* texImage;
+-	GLuint blitWidth = R300_BLIT_WIDTH_BYTES;
+-	GLuint texelBytes;
+-	GLuint size;
+-
+-	texImage = tObj->Image[0][level + t->base.firstLevel];
+-	if (!texImage)
+-		return;
+-
+-	texelBytes = texImage->TexFormat->TexelBytes;
+-
+-	/* find image size in bytes */
+-	if (texImage->IsCompressed) {
+-		if ((t->format & R300_TX_FORMAT_DXT1) ==
+-			R300_TX_FORMAT_DXT1) {
+-			// fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format);
+-			if ((texImage->Width + 3) < 8)	/* width one block */
+-				size = texImage->CompressedSize * 4;
+-			else if ((texImage->Width + 3) < 16)
+-				size = texImage->CompressedSize * 2;
+-			else
+-				size = texImage->CompressedSize;
+-		} else {
+-			/* DXT3/5, 16 bytes per block */
+-			WARN_ONCE
+-				("DXT 3/5 suffers from multitexturing problems!\n");
+-			// fprintf(stderr,"DXT 3/5 %d\n", texImage->Width);
+-			if ((texImage->Width + 3) < 8)
+-				size = texImage->CompressedSize * 2;
+-			else
+-				size = texImage->CompressedSize;
+-		}
+-	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-		size =
+-			((texImage->Width * texelBytes +
+-			63) & ~63) * texImage->Height;
+-		blitWidth = 64 / texelBytes;
+-	} else if (t->tile_bits & R300_TXO_MICRO_TILE) {
+-		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+-			though the actual offset may be different (if texture is less than
+-			32 bytes width) to the untiled case */
+-		int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+-		size =
+-			(w * ((texImage->Height + 1) / 2)) *
+-			texImage->Depth;
+-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-	} else {
+-		int w = (texImage->Width * texelBytes + 31) & ~31;
+-		size = w * texImage->Height * texImage->Depth;
+-		blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-	}
+-	assert(size > 0);
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n",
+-			texImage->Width, texImage->Height,
+-			texImage->Depth,
+-			texImage->TexFormat->TexelBytes,
+-			texImage->InternalFormat);
+-
+-	/* All images are aligned to a 32-byte offset */
+-	*curOffset = (*curOffset + 0x1f) & ~0x1f;
+-
+-	if (texelBytes) {
+-		/* fix x and y coords up later together with offset */
+-		t->image[face][level].x = *curOffset;
+-		t->image[face][level].y = 0;
+-		t->image[face][level].width =
+-			MIN2(size / texelBytes, blitWidth);
+-		t->image[face][level].height =
+-			(size / texelBytes) / t->image[face][level].width;
+-	} else {
+-		t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES;
+-		t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES;
+-		t->image[face][level].width =
+-			MIN2(size, R300_BLIT_WIDTH_BYTES);
+-		t->image[face][level].height = size / t->image[face][level].width;
+-	}
+-
+-	if (RADEON_DEBUG & DEBUG_TEXTURE)
+-		fprintf(stderr,
+-			"level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+-			level, face, texImage->Width, texImage->Height,
+-			t->image[face][level].x, t->image[face][level].y,
+-			t->image[face][level].width, t->image[face][level].height,
+-			size, *curOffset);
+-
+-	*curOffset += size;
+-}
+-
+-
+-
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here.  \c filter, \c format, etc. will be set here
+- * too.
++ * Compute the cached hardware register values for the given texture object.
+  *
+  * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- *                 hardware state.
++ * \param t the r300 texture object
+  */
+-static void r300SetTexImages(r300ContextPtr rmesa,
+-			     struct gl_texture_object *tObj)
++static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
+ {
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-	const struct gl_texture_image *baseImage =
+-	    tObj->Image[0][tObj->BaseLevel];
+-	GLint curOffset;
+-	GLint i, texelBytes;
+-	GLint numLevels;
+-	GLint log2Width, log2Height, log2Depth;
+-
+-	/* Set the hardware texture format
+-	 */
++	const struct gl_texture_image *firstImage;
++	int firstlevel = t->mt ? t->mt->firstLevel : 0;
++	    
++	firstImage = t->base.Image[0][firstlevel];
++
+ 	if (!t->image_override
+-	    && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) {
+-		if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
+-			r300SetDepthTexMode(tObj);
++	    && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++		if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
++			r300SetDepthTexMode(&t->base);
+ 		} else {
+-			t->format = tx_table[baseImage->TexFormat->MesaFormat].format;
++			t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
+ 		}
+ 
+-		t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter;
++		t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
+ 	} else if (!t->image_override) {
+ 		_mesa_problem(NULL, "unexpected texture format in %s",
+ 			      __FUNCTION__);
+ 		return;
+ 	}
+ 
+-	texelBytes = baseImage->TexFormat->TexelBytes;
+-
+-	/* Compute which mipmap levels we really want to send to the hardware.
+-	 */
+-	driCalculateTextureFirstLastLevel((driTextureObject *) t);
+-	log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+-	log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+-	log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+-	numLevels = t->base.lastLevel - t->base.firstLevel + 1;
++	if (t->image_override)
++		return;
+ 
+-	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
++	t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
++			| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
++			| ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
++			| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
+ 
+-	/* Calculate mipmap offsets and dimensions for blitting (uploading)
+-	 * The idea is that we lay out the mipmap levels within a block of
+-	 * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+-	 */
+ 	t->tile_bits = 0;
+ 
+-	/* figure out if this texture is suitable for tiling. */
+-#if 0				/* Disabled for now */
+-	if (texelBytes) {
+-		if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+-		    /* texrect might be able to use micro tiling too in theory? */
+-		    (baseImage->Height > 1)) {
+-
+-			/* allow 32 (bytes) x 1 mip (which will use two times the space
+-			   the non-tiled version would use) max if base texture is large enough */
+-			if ((numLevels == 1) ||
+-			    (((baseImage->Width * texelBytes /
+-			       baseImage->Height) <= 32)
+-			     && (baseImage->Width * texelBytes > 64))
+-			    ||
+-			    ((baseImage->Width * texelBytes /
+-			      baseImage->Height) <= 16)) {
+-				t->tile_bits |= R300_TXO_MICRO_TILE;
+-			}
+-		}
+-
+-		if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+-			/* we can set macro tiling even for small textures, they will be untiled anyway */
+-			t->tile_bits |= R300_TXO_MACRO_TILE;
+-		}
+-	}
+-#endif
+-
+-	curOffset = 0;
++	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
++		t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
++	if (t->base.Target == GL_TEXTURE_3D)
++		t->pp_txformat |= R300_TX_FORMAT_3D;
+ 
+-	if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-		ASSERT(log2Width == log2Height);
+-		t->format |= R300_TX_FORMAT_CUBIC_MAP;
+ 
+-		for(i = 0; i < numLevels; i++) {
+-			GLuint face;
+-			for(face = 0; face < 6; face++)
+-				compute_tex_image_offset(tObj, face, i, &curOffset);
+-		}
+-	} else {
+-		if (tObj->Target == GL_TEXTURE_3D)
+-                	t->format |= R300_TX_FORMAT_3D;
+-
+-		for (i = 0; i < numLevels; i++)
+-			compute_tex_image_offset(tObj, 0, i, &curOffset);
+-	}
+-
+-	/* Align the total size of texture memory block.
+-	 */
+-	t->base.totalSize =
+-	    (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+-	t->size =
+-	    (((tObj->Image[0][t->base.firstLevel]->Width -
+-	       1) << R300_TX_WIDTHMASK_SHIFT)
+-	     | ((tObj->Image[0][t->base.firstLevel]->Height - 1) <<
+-		R300_TX_HEIGHTMASK_SHIFT)
+-	     | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) <<
+-		R300_TX_DEPTHMASK_SHIFT))
+-	    | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT);
+-
+-	t->pitch = 0;
+-
+-	/* Only need to round to nearest 32 for textures, but the blitter
+-	 * requires 64-byte aligned pitches, and we may/may not need the
+-	 * blitter.   NPOT only!
+-	 */
+-	if (baseImage->IsCompressed) {
+-		t->pitch |=
+-		    (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+-	} else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-		unsigned int align = (64 / texelBytes) - 1;
+-		t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width *
+-			     texelBytes) + 63) & ~(63);
+-		t->size |= R300_TX_SIZE_TXPITCH_EN;
++	if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
++		unsigned int align = (64 / t->mt->bpp) - 1;
++		t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+ 		if (!t->image_override)
+-			t->pitch_reg =
+-			    (((tObj->Image[0][t->base.firstLevel]->Width) +
+-			      align) & ~align) - 1;
+-	} else {
+-		t->pitch |=
+-		    ((tObj->Image[0][t->base.firstLevel]->Width *
+-		      texelBytes) + 63) & ~(63);
++			t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
+ 	}
+ 
+ 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+-	    if (tObj->Image[0][t->base.firstLevel]->Width > 2048)
+-		t->pitch_reg |= R500_TXWIDTH_BIT11;
+-	    if (tObj->Image[0][t->base.firstLevel]->Height > 2048)
+-		t->pitch_reg |= R500_TXHEIGHT_BIT11;
++	    if (firstImage->Width > 2048)
++		t->pp_txpitch |= R500_TXWIDTH_BIT11;
++	    if (firstImage->Height > 2048)
++		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ 	}
+ }
+ 
+-/* ================================================================
+- * Texture unit state management
++/**
++ * Ensure the given texture is ready for rendering.
++ *
++ * Mostly this means populating the texture object's mipmap tree.
+  */
+-
+-static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit)
++static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
++	radeonTexObj *t = radeon_tex_obj(texObj);
+ 
+-	ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+-
+-	if (t->base.dirty_images[0]) {
+-		R300_FIREVERTICES(rmesa);
++	if (!radeon_validate_texture_miptree(ctx, texObj))
++		return GL_FALSE;
+ 
+-		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock && !t->image_override)
+-			return GL_FALSE;
+-	}
++	/* Configure the hardware registers (more precisely, the cached version
++	 * of the hardware registers). */
++	setup_hardware_state(rmesa, t);
+ 
++	t->validated = GL_TRUE;
+ 	return GL_TRUE;
+ }
+ 
+-static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit)
++
++/**
++ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
++ */
++GLboolean r300ValidateBuffers(GLcontext * ctx)
+ {
+ 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+-	ASSERT(tObj->Target == GL_TEXTURE_3D);
+-
+-	/* r300 does not support mipmaps for 3D textures. */
+-	if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
+-		return GL_FALSE;
++	struct radeon_cs_space_check bos[16];
++	struct radeon_renderbuffer *rrb;
++	int num_bo = 0;
++	int i;
++	int flushed = 0, ret;
++again:
++	num_bo = 0;
++
++	rrb = radeon_get_colorbuffer(&rmesa->radeon);
++	/* color buffer */
++	if (rrb && rrb->bo) {
++		bos[num_bo].bo = rrb->bo;
++		bos[num_bo].read_domains = 0;
++		bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++		bos[num_bo].new_accounted = 0;
++		num_bo++;
+ 	}
+ 
+-	if (t->base.dirty_images[0]) {
+-		R300_FIREVERTICES(rmesa);
+-		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock)
+-			return GL_FALSE;
++	/* depth buffer */
++	rrb = radeon_get_depthbuffer(&rmesa->radeon);
++	/* color buffer */
++	if (rrb && rrb->bo) {
++		bos[num_bo].bo = rrb->bo;
++		bos[num_bo].read_domains = 0;
++		bos[num_bo].write_domain = RADEON_GEM_DOMAIN_VRAM;
++		bos[num_bo].new_accounted = 0;
++		num_bo++;
+ 	}
++	
++	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
++		radeonTexObj *t;
+ 
+-	return GL_TRUE;
+-}
+-
+-static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-	GLuint face;
+-
+-	ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+-
+-	if (t->base.dirty_images[0] || t->base.dirty_images[1] ||
+-	    t->base.dirty_images[2] || t->base.dirty_images[3] ||
+-	    t->base.dirty_images[4] || t->base.dirty_images[5]) {
+-		/* flush */
+-		R300_FIREVERTICES(rmesa);
+-		/* layout memory space, once for all faces */
+-		r300SetTexImages(rmesa, tObj);
+-	}
++		if (!ctx->Texture.Unit[i]._ReallyEnabled)
++			continue;
+ 
+-	/* upload (per face) */
+-	for (face = 0; face < 6; face++) {
+-		if (t->base.dirty_images[face]) {
+-			r300UploadTexImages(rmesa,
+-					    (r300TexObjPtr) tObj->DriverData,
+-					    face);
++		if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
++			_mesa_warning(ctx,
++				      "failed to validate texture for unit %d.\n",
++				      i);
+ 		}
++		t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
++		if (t->image_override && t->bo)
++			bos[num_bo].bo = t->bo;
++		else if (t->mt->bo)
++			bos[num_bo].bo = t->mt->bo;
++		bos[num_bo].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
++		bos[num_bo].write_domain = 0;
++		bos[num_bo].new_accounted = 0;
++		num_bo++;
+ 	}
+ 
+-	if (!t->base.memBlock) {
+-		/* texmem alloc failed, use s/w fallback */
++	ret = radeon_cs_space_check(rmesa->radeon.cmdbuf.cs, bos, num_bo);
++	if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+ 		return GL_FALSE;
+-	}
+-
+-	return GL_TRUE;
+-}
+-
+-static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+-	ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+-	if (t->base.dirty_images[0]) {
+-		R300_FIREVERTICES(rmesa);
+-
+-		r300SetTexImages(rmesa, tObj);
+-		r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0);
+-		if (!t->base.memBlock && !t->image_override &&
+-		    !rmesa->prefer_gart_client_texturing)
++	if (ret == RADEON_CS_SPACE_FLUSH) {
++		r300Flush(ctx);
++		if (flushed)
+ 			return GL_FALSE;
++		flushed = 1;
++		goto again;
+ 	}
+-
+ 	return GL_TRUE;
+ }
+ 
+-static GLboolean r300UpdateTexture(GLcontext * ctx, int unit)
+-{
+-	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-	struct gl_texture_object *tObj = texUnit->_Current;
+-	r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData;
+-
+-	/* Fallback if there's a texture border */
+-	if (tObj->Image[0][tObj->BaseLevel]->Border > 0)
+-		return GL_FALSE;
+-
+-	/* Update state if this is a different texture object to last
+-	 * time.
+-	 */
+-	if (rmesa->state.texture.unit[unit].texobj != t) {
+-		if (rmesa->state.texture.unit[unit].texobj != NULL) {
+-			/* The old texture is no longer bound to this texture unit.
+-			 * Mark it as such.
+-			 */
+-
+-			rmesa->state.texture.unit[unit].texobj->base.bound &=
+-			    ~(1 << unit);
+-		}
+-
+-		rmesa->state.texture.unit[unit].texobj = t;
+-		t->base.bound |= (1 << unit);
+-		driUpdateTextureLRU((driTextureObject *) t);	/* XXX: should be locked! */
+-	}
+-
+-	return !t->border_fallback;
+-}
+-
+ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ 		      unsigned long long offset, GLint depth, GLuint pitch)
+ {
+ 	r300ContextPtr rmesa = pDRICtx->driverPrivate;
+ 	struct gl_texture_object *tObj =
+ 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+-	r300TexObjPtr t;
++	radeonTexObjPtr t = radeon_tex_obj(tObj);
+ 	uint32_t pitch_val;
+ 
+ 	if (!tObj)
+ 		return;
+ 
+-	t = (r300TexObjPtr) tObj->DriverData;
+-
+ 	t->image_override = GL_TRUE;
+ 
+ 	if (!offset)
+ 		return;
+ 
+-	t->offset = offset;
+-	t->pitch_reg &= (1 << 13) -1;
++	t->bo = NULL;
++	t->override_offset = offset;
++	t->pp_txpitch &= (1 << 13) -1;
+ 	pitch_val = pitch;
+ 
+ 	switch (depth) {
+ 	case 32:
+-		t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+-		t->filter |= tx_table[2].filter;
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
++		t->pp_txfilter |= tx_table[2].filter;
+ 		pitch_val /= 4;
+ 		break;
+ 	case 24:
+ 	default:
+-		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+-		t->filter |= tx_table[4].filter;
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
++		t->pp_txfilter |= tx_table[4].filter;
+ 		pitch_val /= 4;
+ 		break;
+ 	case 16:
+-		t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+-		t->filter |= tx_table[5].filter;
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
++		t->pp_txfilter |= tx_table[5].filter;
+ 		pitch_val /= 2;
+ 		break;
+ 	}
+ 	pitch_val--;
+ 
+-	t->pitch_reg |= pitch_val;
++	t->pp_txpitch |= pitch_val;
+ }
+ 
+-static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit)
++void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+ {
+-	struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-
+-	if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) {
+-		return (r300EnableTextureRect(ctx, unit) &&
+-			r300UpdateTexture(ctx, unit));
+-	} else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+-		return (r300EnableTexture2D(ctx, unit) &&
+-			r300UpdateTexture(ctx, unit));
+-	} else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) {
+-		return (r300EnableTexture3D(ctx, unit) &&
+-			r300UpdateTexture(ctx, unit));
+-	} else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) {
+-		return (r300EnableTextureCube(ctx, unit) &&
+-			r300UpdateTexture(ctx, unit));
+-	} else if (texUnit->_ReallyEnabled) {
+-		return GL_FALSE;
+-	} else {
+-		return GL_TRUE;
+-	}
+-}
++	struct gl_texture_unit *texUnit;
++	struct gl_texture_object *texObj;
++	struct gl_texture_image *texImage;
++	struct radeon_renderbuffer *rb;
++	radeon_texture_image *rImage;
++	radeonContextPtr radeon;
++	r300ContextPtr rmesa;
++	GLframebuffer *fb;
++	radeonTexObjPtr t;
++	uint32_t pitch_val;
+ 
+-void r300UpdateTextureState(GLcontext * ctx)
+-{
+-	int i;
++	target = GL_TEXTURE_RECTANGLE_ARB;
+ 
+-	for (i = 0; i < 8; i++) {
+-		if (!r300UpdateTextureUnit(ctx, i)) {
+-			_mesa_warning(ctx,
+-				      "failed to update texture state for unit %d.\n",
+-				      i);
+-		}
++	radeon = pDRICtx->driverPrivate;
++	rmesa = pDRICtx->driverPrivate;
++
++	fb = dPriv->driverPrivate;
++        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
++	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
++        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
++
++	rImage = get_radeon_texture_image(texImage);
++	t = radeon_tex_obj(texObj);
++        if (t == NULL) {
++    	    return;
++    	}
++
++	radeon_update_renderbuffers(pDRICtx, dPriv);
++	/* back & depth buffer are useless free them right away */
++	rb = (void*)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++        rb->bo = NULL;
++	}
++	rb = (void*)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	rb = (void*)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++	if (rb->bo == NULL) {
++		/* Failed to BO for the buffer */
++		return;
++	}
++	
++	_mesa_lock_texture(radeon->glCtx, texObj);
++	if (t->bo) {
++		t->bo = NULL;
+ 	}
++	if (t->mt) {
++		t->mt = NULL;
++	}
++	if (rImage->mt) {
++		radeon_miptree_unreference(rImage->mt);
++		rImage->mt = NULL;
++	}
++	fprintf(stderr,"settexbuf %dx%d@%d\n", rb->width, rb->height, rb->cpp);
++	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
++				   rb->width, rb->height, 1, 0, rb->cpp);
++	texImage->TexFormat = &_mesa_texformat_rgba8888_rev;
++	rImage->bo = rb->bo;
++	
++	t->bo = rb->bo;
++	radeon_bo_ref(t->bo);
++	t->tile_bits = 0;
++	t->image_override = GL_TRUE;
++	t->override_offset = 0;
++	t->pp_txpitch &= (1 << 13) -1;
++	pitch_val = rb->pitch;
++	switch (rb->cpp) {
++	case 4:
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
++		t->pp_txfilter |= tx_table[2].filter;
++		pitch_val /= 4;
++		break;
++	case 3:
++	default:
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
++		t->pp_txfilter |= tx_table[4].filter;
++		pitch_val /= 4;
++		break;
++	case 2:
++		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
++		t->pp_txfilter |= tx_table[5].filter;
++		pitch_val /= 2;
++		break;
++	}
++	pitch_val--;
++	t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
++              ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
++	t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
++	t->pp_txpitch |= pitch_val;
++	t->validated = GL_TRUE;
++	_mesa_unlock_texture(radeon->glCtx, texObj);
++	return;
+ }
+diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c
+index 75dae86..926ddd5 100644
+--- a/src/mesa/drivers/dri/r300/r500_fragprog.c
++++ b/src/mesa/drivers/dri/r300/r500_fragprog.c
+@@ -31,6 +31,12 @@
+ #include "radeon_program_alu.h"
+ 
+ 
++static void reset_srcreg(struct prog_src_register* reg)
++{
++	_mesa_bzero(reg, sizeof(*reg));
++	reg->Swizzle = SWIZZLE_NOOP;
++}
++
+ static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
+ {
+ 	gl_state_index fail_value_tokens[STATE_LENGTH] = {
+@@ -99,6 +105,19 @@ static GLboolean transform_TEX(
+ 		destredirect = GL_TRUE;
+ 	}
+ 
++	if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
++		int tmpreg = radeonFindFreeTemporary(t);
++		tgt = radeonAppendInstructions(t->Program, 1);
++		tgt->Opcode = OPCODE_MOV;
++		tgt->DstReg.File = PROGRAM_TEMPORARY;
++		tgt->DstReg.Index = tmpreg;
++		tgt->SrcReg[0] = inst.SrcReg[0];
++
++		reset_srcreg(&inst.SrcReg[0]);
++		inst.SrcReg[0].File = PROGRAM_TEMPORARY;
++		inst.SrcReg[0].Index = tmpreg;
++	}
++
+ 	tgt = radeonAppendInstructions(t->Program, 1);
+ 	_mesa_copy_instructions(tgt, &inst, 1);
+ 
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c
+deleted file mode 100644
+index 5267fe9..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_context.c
++++ /dev/null
+@@ -1,330 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/**
+- * \file radeon_context.c
+- * Common context initialization.
+- *
+- * \author Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include <dlfcn.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/state.h"
+-#include "main/matrix.h"
+-#include "main/framebuffer.h"
+-
+-#include "drivers/common/driverfuncs.h"
+-#include "swrast/swrast.h"
+-
+-#include "radeon_screen.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_macros.h"
+-#include "radeon_reg.h"
+-
+-#include "radeon_state.h"
+-#include "r300_state.h"
+-
+-#include "utils.h"
+-#include "vblank.h"
+-#include "xmlpool.h"		/* for symbolic values of enum-type options */
+-
+-#define DRIVER_DATE "20060815"
+-
+-
+-/* Return various strings for glGetString().
+- */
+-static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+-{
+-	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-	static char buffer[128];
+-
+-	switch (name) {
+-	case GL_VENDOR:
+-		if (IS_R300_CLASS(radeon->radeonScreen))
+-			return (GLubyte *) "DRI R300 Project";
+-		else
+-			return (GLubyte *) "Tungsten Graphics, Inc.";
+-
+-	case GL_RENDERER:
+-	{
+-		unsigned offset;
+-		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+-			radeon->radeonScreen->AGPMode;
+-		const char* chipname;
+-
+-		if (IS_R300_CLASS(radeon->radeonScreen))
+-			chipname = "R300";
+-		else
+-			chipname = "R200";
+-
+-		offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
+-					      agp_mode);
+-
+-		if (IS_R300_CLASS(radeon->radeonScreen)) {
+-		sprintf(&buffer[offset], " %sTCL",
+-			(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+-			? "" : "NO-");
+-		} else {
+-			sprintf(&buffer[offset], " %sTCL",
+-			!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+-			? "" : "NO-");
+-		}
+-
+-		return (GLubyte *) buffer;
+-	}
+-
+-	default:
+-		return NULL;
+-	}
+-}
+-
+-/* Initialize the driver's misc functions.
+- */
+-static void radeonInitDriverFuncs(struct dd_function_table *functions)
+-{
+-	functions->GetString = radeonGetString;
+-}
+-
+-
+-/**
+- * Create and initialize all common fields of the context,
+- * including the Mesa context itself.
+- */
+-GLboolean radeonInitContext(radeonContextPtr radeon,
+-			    struct dd_function_table* functions,
+-			    const __GLcontextModes * glVisual,
+-			    __DRIcontextPrivate * driContextPriv,
+-			    void *sharedContextPrivate)
+-{
+-	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+-	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+-	GLcontext* ctx;
+-	GLcontext* shareCtx;
+-	int fthrottle_mode;
+-
+-	/* Fill in additional standard functions. */
+-	radeonInitDriverFuncs(functions);
+-
+-	radeon->radeonScreen = screen;
+-	/* Allocate and initialize the Mesa context */
+-	if (sharedContextPrivate)
+-		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+-	else
+-		shareCtx = NULL;
+-	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+-					    functions, (void *)radeon);
+-	if (!radeon->glCtx)
+-		return GL_FALSE;
+-
+-	ctx = radeon->glCtx;
+-	driContextPriv->driverPrivate = radeon;
+-
+-	/* DRI fields */
+-	radeon->dri.context = driContextPriv;
+-	radeon->dri.screen = sPriv;
+-	radeon->dri.drawable = NULL;
+-	radeon->dri.readable = NULL;
+-	radeon->dri.hwContext = driContextPriv->hHWContext;
+-	radeon->dri.hwLock = &sPriv->pSAREA->lock;
+-	radeon->dri.fd = sPriv->fd;
+-	radeon->dri.drmMinor = sPriv->drm_version.minor;
+-
+-	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+-					       screen->sarea_priv_offset);
+-
+-	/* Setup IRQs */
+-	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+-	radeon->iw.irq_seq = -1;
+-	radeon->irqsEmitted = 0;
+-	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+-			  radeon->radeonScreen->irq);
+-
+-	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+-
+-	if (!radeon->do_irqs)
+-		fprintf(stderr,
+-			"IRQ's not enabled, falling back to %s: %d %d\n",
+-			radeon->do_usleeps ? "usleeps" : "busy waits",
+-			fthrottle_mode, radeon->radeonScreen->irq);
+-
+-	(*sPriv->systemTime->getUST) (&radeon->swap_ust);
+-
+-	return GL_TRUE;
+-}
+-
+-
+-/**
+- * Cleanup common context fields.
+- * Called by r200DestroyContext/r300DestroyContext
+- */
+-void radeonCleanupContext(radeonContextPtr radeon)
+-{
+-	/* _mesa_destroy_context() might result in calls to functions that
+-	 * depend on the DriverCtx, so don't set it to NULL before.
+-	 *
+-	 * radeon->glCtx->DriverCtx = NULL;
+-	 */
+-
+-	/* free the Mesa context */
+-	_mesa_destroy_context(radeon->glCtx);
+-
+-	if (radeon->state.scissor.pClipRects) {
+-		FREE(radeon->state.scissor.pClipRects);
+-		radeon->state.scissor.pClipRects = 0;
+-	}
+-}
+-
+-
+-/**
+- * Swap front and back buffer.
+- */
+-void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
+-{
+-	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-		radeonContextPtr radeon;
+-		GLcontext *ctx;
+-
+-		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-		ctx = radeon->glCtx;
+-
+-		if (ctx->Visual.doubleBufferMode) {
+-			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
+-			if (radeon->doPageFlip) {
+-				radeonPageFlip(dPriv);
+-			} else {
+-			    radeonCopyBuffer(dPriv, NULL);
+-			}
+-		}
+-	} else {
+-		/* XXX this shouldn't be an error but we can't handle it for now */
+-		_mesa_problem(NULL, "%s: drawable has no context!",
+-			      __FUNCTION__);
+-	}
+-}
+-
+-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+-			 int x, int y, int w, int h )
+-{
+-    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-	radeonContextPtr radeon;
+-	GLcontext *ctx;
+-
+-	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-	ctx = radeon->glCtx;
+-
+-	if (ctx->Visual.doubleBufferMode) {
+-	    drm_clip_rect_t rect;
+-	    rect.x1 = x + dPriv->x;
+-	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
+-	    rect.x2 = rect.x1 + w;
+-	    rect.y2 = rect.y1 + h;
+-	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
+-	    radeonCopyBuffer(dPriv, &rect);
+-	}
+-    } else {
+-	/* XXX this shouldn't be an error but we can't handle it for now */
+-	_mesa_problem(NULL, "%s: drawable has no context!",
+-		      __FUNCTION__);
+-    }
+-}
+-
+-/* Force the context `c' to be the current context and associate with it
+- * buffer `b'.
+- */
+-GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+-			    __DRIdrawablePrivate * driDrawPriv,
+-			    __DRIdrawablePrivate * driReadPriv)
+-{
+-	if (driContextPriv) {
+-		radeonContextPtr radeon =
+-			(radeonContextPtr) driContextPriv->driverPrivate;
+-
+-		if (RADEON_DEBUG & DEBUG_DRI)
+-			fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+-				radeon->glCtx);
+-
+-		if (radeon->dri.drawable != driDrawPriv) {
+-			if (driDrawPriv->swap_interval == (unsigned)-1) {
+-				driDrawPriv->vblFlags =
+-					(radeon->radeonScreen->irq != 0)
+-					? driGetDefaultVBlankFlags(&radeon->
+-								   optionCache)
+-					: VBLANK_FLAG_NO_IRQ;
+-
+-				driDrawableInitVBlank(driDrawPriv);
+-			}
+-		}
+-
+-		radeon->dri.readable = driReadPriv;
+-
+-		if (radeon->dri.drawable != driDrawPriv ||
+-		    radeon->lastStamp != driDrawPriv->lastStamp) {
+-			radeon->dri.drawable = driDrawPriv;
+-
+-			radeonSetCliprects(radeon);
+-			r300UpdateViewportOffset(radeon->glCtx);
+-		}
+-
+-		_mesa_make_current(radeon->glCtx,
+-				    (GLframebuffer *) driDrawPriv->
+-				    driverPrivate,
+-				    (GLframebuffer *) driReadPriv->
+-				    driverPrivate);
+-
+-		_mesa_update_state(radeon->glCtx);		
+-
+-		radeonUpdatePageFlipping(radeon);
+-	} else {
+-		if (RADEON_DEBUG & DEBUG_DRI)
+-			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+-		_mesa_make_current(0, 0, 0);
+-	}
+-
+-	if (RADEON_DEBUG & DEBUG_DRI)
+-		fprintf(stderr, "End %s\n", __FUNCTION__);
+-	return GL_TRUE;
+-}
+-
+-/* Force the context `c' to be unbound from its buffer.
+- */
+-GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
+-{
+-	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+-
+-	if (RADEON_DEBUG & DEBUG_DRI)
+-		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+-			radeon->glCtx);
+-
+-	return GL_TRUE;
+-}
+-
+diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
+index 47cbc22..250570f 100644
+--- a/src/mesa/drivers/dri/r300/radeon_context.h
++++ b/src/mesa/drivers/dri/r300/radeon_context.h
+@@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "dri_util.h"
+ 
+-struct radeon_context;
+-typedef struct radeon_context radeonContextRec;
+-typedef struct radeon_context *radeonContextPtr;
+-
+-/* Rasterizing fallbacks */
+-/* See correponding strings in r200_swtcl.c */
+-#define RADEON_FALLBACK_TEXTURE		0x0001
+-#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
+-#define RADEON_FALLBACK_STENCIL		0x0004
+-#define RADEON_FALLBACK_RENDER_MODE	0x0008
+-#define RADEON_FALLBACK_BLEND_EQ	0x0010
+-#define RADEON_FALLBACK_BLEND_FUNC	0x0020
+-#define RADEON_FALLBACK_DISABLE		0x0040
+-#define RADEON_FALLBACK_BORDER_MODE	0x0080
++#include "radeon_screen.h"
+ 
+ #if R200_MERGED
+ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+@@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+ /* TCL fallbacks */
+ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+ 
+-#define RADEON_TCL_FALLBACK_RASTER		0x0001	/* rasterization */
+-#define RADEON_TCL_FALLBACK_UNFILLED		0x0002	/* unfilled tris */
+-#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE	0x0004	/* twoside tris */
+-#define RADEON_TCL_FALLBACK_MATERIAL		0x0008	/* material in vb */
+-#define RADEON_TCL_FALLBACK_TEXGEN_0		0x0010	/* texgen, unit 0 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_1		0x0020	/* texgen, unit 1 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_2		0x0040	/* texgen, unit 2 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_3		0x0080	/* texgen, unit 3 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_4		0x0100	/* texgen, unit 4 */
+-#define RADEON_TCL_FALLBACK_TEXGEN_5		0x0200	/* texgen, unit 5 */
+-#define RADEON_TCL_FALLBACK_TCL_DISABLE		0x0400	/* user disable */
+-#define RADEON_TCL_FALLBACK_BITMAP		0x0800	/* draw bitmap with points */
+-#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM	0x1000	/* vertex program active */
+-
+ #if R200_MERGED
+ #define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
+ #else
+ #define TCL_FALLBACK( ctx, bit, mode )	;
+ #endif
+ 
+-struct radeon_dri_mirror {
+-	__DRIcontextPrivate *context;	/* DRI context */
+-	__DRIscreenPrivate *screen;	/* DRI screen */
+-	/**
+-	 * DRI drawable bound to this context for drawing.
+-	 */
+-	__DRIdrawablePrivate *drawable;
+-
+-	/**
+-	 * DRI drawable bound to this context for reading.
+-	 */
+-	__DRIdrawablePrivate *readable;
+-
+-	drm_context_t hwContext;
+-	drm_hw_lock_t *hwLock;
+-	int fd;
+-	int drmMinor;
+-};
+-
+-/**
+- * Derived state for internal purposes.
+- */
+-struct radeon_scissor_state {
+-	drm_clip_rect_t rect;
+-	GLboolean enabled;
+-
+-	GLuint numClipRects;	/* Cliprects active */
+-	GLuint numAllocedClipRects;	/* Cliprects available */
+-	drm_clip_rect_t *pClipRects;
+-};
+-
+-struct radeon_colorbuffer_state {
+-	GLuint clear;
+-	GLint drawOffset, drawPitch;
+-};
+-
+-struct radeon_state {
+-	struct radeon_colorbuffer_state color;
+-	struct radeon_scissor_state scissor;
+-};
+-
+-/**
+- * Common per-context variables shared by R200 and R300.
+- * R200- and R300-specific code "derive" their own context from this
+- * structure.
+- */
+-struct radeon_context {
+-	GLcontext *glCtx;	/* Mesa context */
+-	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+-
+-	/* Fallback state */
+-	GLuint Fallback;
+-	GLuint TclFallback;
+-
+-	/* Page flipping */
+-	GLuint doPageFlip;
+-
+-	/* Drawable, cliprect and scissor information */
+-	GLuint numClipRects;	/* Cliprects for the draw buffer */
+-	drm_clip_rect_t *pClipRects;
+-	unsigned int lastStamp;
+-	GLboolean lost_context;
+-	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+-
+-	/* Mirrors of some DRI state */
+-	struct radeon_dri_mirror dri;
+-
+-	/* Busy waiting */
+-	GLuint do_usleeps;
+-	GLuint do_irqs;
+-	GLuint irqsEmitted;
+-	drm_radeon_irq_wait_t iw;
+-
+-	/* buffer swap */
+-	int64_t swap_ust;
+-	int64_t swap_missed_ust;
+-
+-	GLuint swap_count;
+-	GLuint swap_missed_count;
+-
+-	/* Derived state */
+-	struct radeon_state state;
+-
+-	/* Configuration cache
+-	 */
+-	driOptionCache optionCache;
+-};
+-
+-#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+-
+-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+-				int x, int y, int w, int h);
+-extern GLboolean radeonInitContext(radeonContextPtr radeon,
+-				   struct dd_function_table *functions,
+-				   const __GLcontextModes * glVisual,
+-				   __DRIcontextPrivate * driContextPriv,
+-				   void *sharedContextPrivate);
+-extern void radeonCleanupContext(radeonContextPtr radeon);
+-extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+-				   __DRIdrawablePrivate * driDrawPriv,
+-				   __DRIdrawablePrivate * driReadPriv);
+-extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+-
+-/* ================================================================
+- * Debugging:
+- */
+-#define DO_DEBUG		1
+-
+-#if DO_DEBUG
+-extern int RADEON_DEBUG;
+-#else
+-#define RADEON_DEBUG		0
+-#endif
+-
+-#define DEBUG_TEXTURE	0x0001
+-#define DEBUG_STATE	0x0002
+-#define DEBUG_IOCTL	0x0004
+-#define DEBUG_PRIMS	0x0008
+-#define DEBUG_VERTS	0x0010
+-#define DEBUG_FALLBACKS	0x0020
+-#define DEBUG_VFMT	0x0040
+-#define DEBUG_CODEGEN	0x0080
+-#define DEBUG_VERBOSE	0x0100
+-#define DEBUG_DRI       0x0200
+-#define DEBUG_DMA       0x0400
+-#define DEBUG_SANITY    0x0800
+-#define DEBUG_SYNC      0x1000
+-#define DEBUG_PIXEL     0x2000
+-#define DEBUG_MEMORY    0x4000
+ 
+ #endif				/* __RADEON_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
+deleted file mode 100644
+index 36502eb..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
++++ /dev/null
+@@ -1,396 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include <sched.h>
+-#include <errno.h>
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/macros.h"
+-#include "main/context.h"
+-#include "swrast/swrast.h"
+-#include "r300_context.h"
+-#include "radeon_ioctl.h"
+-#include "r300_ioctl.h"
+-#include "r300_state.h"
+-#include "radeon_reg.h"
+-
+-#include "drirenderbuffer.h"
+-#include "vblank.h"
+-
+-static void radeonWaitForIdle(radeonContextPtr radeon);
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+-{
+-	drm_radeon_getparam_t gp;
+-	int ret;
+-	uint32_t frame;
+-
+-	gp.param = RADEON_PARAM_LAST_FRAME;
+-	gp.value = (int *)&frame;
+-	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+-				  &gp, sizeof(gp));
+-	if (ret) {
+-		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+-			ret);
+-		exit(1);
+-	}
+-
+-	return frame;
+-}
+-
+-uint32_t radeonGetAge(radeonContextPtr radeon)
+-{
+-	drm_radeon_getparam_t gp;
+-	int ret;
+-	uint32_t age;
+-
+-	gp.param = RADEON_PARAM_LAST_CLEAR;
+-	gp.value = (int *)&age;
+-	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+-				  &gp, sizeof(gp));
+-	if (ret) {
+-		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+-			ret);
+-		exit(1);
+-	}
+-
+-	return age;
+-}
+-
+-static void radeonEmitIrqLocked(radeonContextPtr radeon)
+-{
+-	drm_radeon_irq_emit_t ie;
+-	int ret;
+-
+-	ie.irq_seq = &radeon->iw.irq_seq;
+-	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+-				  &ie, sizeof(ie));
+-	if (ret) {
+-		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+-			ret);
+-		exit(1);
+-	}
+-}
+-
+-static void radeonWaitIrq(radeonContextPtr radeon)
+-{
+-	int ret;
+-
+-	do {
+-		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+-				      &radeon->iw, sizeof(radeon->iw));
+-	} while (ret && (errno == EINTR || errno == EBUSY));
+-
+-	if (ret) {
+-		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+-			ret);
+-		exit(1);
+-	}
+-}
+-
+-static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+-{
+-	drm_radeon_sarea_t *sarea = radeon->sarea;
+-
+-	if (radeon->do_irqs) {
+-		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+-			if (!radeon->irqsEmitted) {
+-				while (radeonGetLastFrame(radeon) <
+-				       sarea->last_frame) ;
+-			} else {
+-				UNLOCK_HARDWARE(radeon);
+-				radeonWaitIrq(radeon);
+-				LOCK_HARDWARE(radeon);
+-			}
+-			radeon->irqsEmitted = 10;
+-		}
+-
+-		if (radeon->irqsEmitted) {
+-			radeonEmitIrqLocked(radeon);
+-			radeon->irqsEmitted--;
+-		}
+-	} else {
+-		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+-			UNLOCK_HARDWARE(radeon);
+-			if (radeon->do_usleeps)
+-				DO_USLEEP(1);
+-			LOCK_HARDWARE(radeon);
+-		}
+-	}
+-}
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void radeonCopyBuffer(__DRIdrawablePrivate * dPriv,
+-		      const drm_clip_rect_t	 * rect)
+-{
+-	radeonContextPtr radeon;
+-	GLint nbox, i, ret;
+-	GLboolean missed_target;
+-	int64_t ust;
+-	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+-	assert(dPriv);
+-	assert(dPriv->driContextPriv);
+-	assert(dPriv->driContextPriv->driverPrivate);
+-
+-	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL) {
+-		fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__,
+-			(void *)radeon->glCtx);
+-	}
+-
+-	r300Flush(radeon->glCtx);
+-
+-	LOCK_HARDWARE(radeon);
+-
+-	/* Throttle the frame rate -- only allow one pending swap buffers
+-	 * request at a time.
+-	 */
+-	radeonWaitForFrameCompletion(radeon);
+-	if (!rect)
+-	{
+-	    UNLOCK_HARDWARE(radeon);
+-	    driWaitForVBlank(dPriv, &missed_target);
+-	    LOCK_HARDWARE(radeon);
+-	}
+-
+-	nbox = dPriv->numClipRects;	/* must be in locked region */
+-
+-	for (i = 0; i < nbox;) {
+-		GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox);
+-		drm_clip_rect_t *box = dPriv->pClipRects;
+-		drm_clip_rect_t *b = radeon->sarea->boxes;
+-		GLint n = 0;
+-
+-		for ( ; i < nr ; i++ ) {
+-
+-		    *b = box[i];
+-
+-		    if (rect)
+-		    {
+-			if (rect->x1 > b->x1)
+-			    b->x1 = rect->x1;
+-			if (rect->y1 > b->y1)
+-			    b->y1 = rect->y1;
+-			if (rect->x2 < b->x2)
+-			    b->x2 = rect->x2;
+-			if (rect->y2 < b->y2)
+-			    b->y2 = rect->y2;
+-
+-			if (b->x1 >= b->x2 || b->y1 >= b->y2)
+-			    continue;
+-		    }
+-
+-		    b++;
+-		    n++;
+-		}
+-		radeon->sarea->nbox = n;
+-
+-		if (!n)
+-		   continue;
+-
+-		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP);
+-
+-		if (ret) {
+-			fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n",
+-				ret);
+-			UNLOCK_HARDWARE(radeon);
+-			exit(1);
+-		}
+-	}
+-
+-	UNLOCK_HARDWARE(radeon);
+-	if (!rect)
+-	{
+-	    ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE;
+-
+-	    radeon->swap_count++;
+-	    (*psp->systemTime->getUST) (&ust);
+-	    if (missed_target) {
+-		radeon->swap_missed_count++;
+-		radeon->swap_missed_ust = ust - radeon->swap_ust;
+-	    }
+-
+-	    radeon->swap_ust = ust;
+-
+-	    sched_yield();
+-	}
+-}
+-
+-void radeonPageFlip(__DRIdrawablePrivate * dPriv)
+-{
+-	radeonContextPtr radeon;
+-	GLint ret;
+-	GLboolean missed_target;
+-	__DRIscreenPrivate *psp = dPriv->driScreenPriv;
+-
+-	assert(dPriv);
+-	assert(dPriv->driContextPriv);
+-	assert(dPriv->driContextPriv->driverPrivate);
+-
+-	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+-	if (RADEON_DEBUG & DEBUG_IOCTL) {
+-		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+-			radeon->sarea->pfCurrentPage);
+-	}
+-
+-	r300Flush(radeon->glCtx);
+-	LOCK_HARDWARE(radeon);
+-
+-	if (!dPriv->numClipRects) {
+-		UNLOCK_HARDWARE(radeon);
+-		usleep(10000);	/* throttle invisible client 10ms */
+-		return;
+-	}
+-
+-	/* Need to do this for the perf box placement:
+-	 */
+-	{
+-		drm_clip_rect_t *box = dPriv->pClipRects;
+-		drm_clip_rect_t *b = radeon->sarea->boxes;
+-		b[0] = box[0];
+-		radeon->sarea->nbox = 1;
+-	}
+-
+-	/* Throttle the frame rate -- only allow a few pending swap buffers
+-	 * request at a time.
+-	 */
+-	radeonWaitForFrameCompletion(radeon);
+-	UNLOCK_HARDWARE(radeon);
+-	driWaitForVBlank(dPriv, &missed_target);
+-	if (missed_target) {
+-		radeon->swap_missed_count++;
+-		(void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust);
+-	}
+-	LOCK_HARDWARE(radeon);
+-
+-	ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP);
+-
+-	UNLOCK_HARDWARE(radeon);
+-
+-	if (ret) {
+-		fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret);
+-		exit(1);
+-	}
+-
+-	radeon->swap_count++;
+-	(void)(*psp->systemTime->getUST) (&radeon->swap_ust);
+-
+-        driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, 
+-                             radeon->sarea->pfCurrentPage);
+-
+-	if (radeon->sarea->pfCurrentPage == 1) {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+-	} else {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+-	}
+-
+-	if (IS_R300_CLASS(radeon->radeonScreen)) {
+-		r300ContextPtr r300 = (r300ContextPtr)radeon;
+-		R300_STATECHANGE(r300, cb);
+-		r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + 
+-						r300->radeon.radeonScreen->fbLocation;
+-		r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch;
+-		
+-		if (r300->radeon.radeonScreen->cpp == 4)
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888;
+-		else
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565;
+-	
+-		if (r300->radeon.sarea->tiling_enabled)
+-			r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE;
+-	}
+-}
+-
+-void radeonWaitForIdleLocked(radeonContextPtr radeon)
+-{
+-	int ret;
+-	int i = 0;
+-
+-	do {
+-		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+-		if (ret)
+-			DO_USLEEP(1);
+-	} while (ret && ++i < 100);
+-
+-	if (ret < 0) {
+-		UNLOCK_HARDWARE(radeon);
+-		fprintf(stderr, "Error: R300 timed out... exiting\n");
+-		exit(-1);
+-	}
+-}
+-
+-static void radeonWaitForIdle(radeonContextPtr radeon)
+-{
+-	LOCK_HARDWARE(radeon);
+-	radeonWaitForIdleLocked(radeon);
+-	UNLOCK_HARDWARE(radeon);
+-}
+-
+-void radeonFlush(GLcontext * ctx)
+-{
+-	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+-	if (IS_R300_CLASS(radeon->radeonScreen))
+-		r300Flush(ctx);
+-}
+-
+-
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void radeonFinish(GLcontext * ctx)
+-{
+-	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+-	radeonFlush(ctx);
+-
+-	if (radeon->do_irqs) {
+-		LOCK_HARDWARE(radeon);
+-		radeonEmitIrqLocked(radeon);
+-		UNLOCK_HARDWARE(radeon);
+-		radeonWaitIrq(radeon);
+-	} else
+-		radeonWaitForIdle(radeon);
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h
+deleted file mode 100644
+index 3add775..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_ioctl.h
++++ /dev/null
+@@ -1,57 +0,0 @@
+-/*
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#ifndef __RADEON_IOCTL_H__
+-#define __RADEON_IOCTL_H__
+-
+-#include "main/simple_list.h"
+-#include "radeon_dri.h"
+-#include "radeon_lock.h"
+-
+-#include "xf86drm.h"
+-#include "drm.h"
+-#if 0
+-#include "r200context.h"
+-#endif
+-#include "radeon_drm.h"
+-
+-extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable,
+-			     const drm_clip_rect_t	* rect);
+-extern void radeonPageFlip(__DRIdrawablePrivate * drawable);
+-extern void radeonFlush(GLcontext * ctx);
+-extern void radeonFinish(GLcontext * ctx);
+-extern void radeonWaitForIdleLocked(radeonContextPtr radeon);
+-extern uint32_t radeonGetAge(radeonContextPtr radeon);
+-
+-#endif				/* __RADEON_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
+deleted file mode 100644
+index 4f47afd..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_lock.c
++++ /dev/null
+@@ -1,137 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+-                     VA Linux Systems Inc., Fremont, California.
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Gareth Hughes <gareth@valinux.com>
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- *   Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#include "radeon_lock.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+-#include "r300_context.h"
+-#include "r300_state.h"
+-
+-#include "main/framebuffer.h"
+-
+-#include "drirenderbuffer.h"
+-
+-#if DEBUG_LOCKING
+-char *prevLockFile = NULL;
+-int prevLockLine = 0;
+-#endif
+-
+-/* Turn on/off page flipping according to the flags in the sarea:
+- */
+-void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+-{
+-	int use_back;
+-
+-	rmesa->doPageFlip = rmesa->sarea->pfState;
+-	if (rmesa->glCtx->WinSysDrawBuffer) {
+-		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-				     rmesa->sarea->pfCurrentPage);
+-		r300UpdateDrawBuffer(rmesa->glCtx);
+-	}
+-
+-	use_back = rmesa->glCtx->DrawBuffer ?
+-	    (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
+-	     BUFFER_BACK_LEFT) : 1;
+-	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
+-
+-	if (use_back) {
+-		rmesa->state.color.drawOffset =
+-		    rmesa->radeonScreen->backOffset;
+-		rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch;
+-	} else {
+-		rmesa->state.color.drawOffset =
+-		    rmesa->radeonScreen->frontOffset;
+-		rmesa->state.color.drawPitch =
+-		    rmesa->radeonScreen->frontPitch;
+-	}
+-}
+-
+-/* Update the hardware state.  This is called if another context has
+- * grabbed the hardware lock, which includes the X server.  This
+- * function also updates the driver's window state after the X server
+- * moves, resizes or restacks a window -- the change will be reflected
+- * in the drawable position and clip rects.  Since the X server grabs
+- * the hardware lock when it changes the window state, this routine will
+- * automatically be called after such a change.
+- */
+-void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+-{
+-	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+-	__DRIdrawablePrivate *const readable = rmesa->dri.readable;
+-	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
+-	drm_radeon_sarea_t *sarea = rmesa->sarea;
+-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
+-
+-	assert(drawable != NULL);
+-
+-	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+-
+-	/* The window might have moved, so we might need to get new clip
+-	 * rects.
+-	 *
+-	 * NOTE: This releases and regrabs the hw lock to allow the X server
+-	 * to respond to the DRI protocol request for new drawable info.
+-	 * Since the hardware state depends on having the latest drawable
+-	 * clip rects, all state checking must be done _after_ this call.
+-	 */
+-	DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+-	if (drawable != readable) {
+-		DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+-	}
+-
+-	if (rmesa->lastStamp != drawable->lastStamp) {
+-		radeonUpdatePageFlipping(rmesa);
+-		radeonSetCliprects(rmesa);
+-		r300UpdateViewportOffset(rmesa->glCtx);
+-		driUpdateFramebufferSize(rmesa->glCtx, drawable);
+-	}
+-
+-	if (sarea->ctx_owner != rmesa->dri.hwContext) {
+-		int i;
+-
+-		sarea->ctx_owner = rmesa->dri.hwContext;
+-		for (i = 0; i < r300->nr_heaps; i++) {
+-			DRI_AGE_TEXTURES(r300->texture_heaps[i]);
+-		}
+-	}
+-
+-	rmesa->lost_context = GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
+deleted file mode 100644
+index a344837..64bdf94
+--- a/src/mesa/drivers/dri/r300/radeon_lock.h
++++ /dev/null
+@@ -1,115 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+-                     VA Linux Systems Inc., Fremont, California.
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Gareth Hughes <gareth@valinux.com>
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- *   Kevin E. Martin <martin@valinux.com>
+- */
+-
+-#ifndef __RADEON_LOCK_H__
+-#define __RADEON_LOCK_H__
+-
+-#include "radeon_context.h"
+-
+-extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+-extern void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+-
+-/* Turn DEBUG_LOCKING on to find locking conflicts.
+- */
+-#define DEBUG_LOCKING	0
+-
+-#if DEBUG_LOCKING
+-extern char *prevLockFile;
+-extern int prevLockLine;
+-
+-#define DEBUG_LOCK()							\
+-   do {									\
+-      prevLockFile = (__FILE__);					\
+-      prevLockLine = (__LINE__);					\
+-   } while (0)
+-
+-#define DEBUG_RESET()							\
+-   do {									\
+-      prevLockFile = 0;							\
+-      prevLockLine = 0;							\
+-   } while (0)
+-
+-#define DEBUG_CHECK_LOCK()						\
+-   do {									\
+-      if (prevLockFile) {						\
+-	 fprintf(stderr,						\
+-		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+-		  prevLockFile, prevLockLine, __FILE__, __LINE__);	\
+-	 exit(1);							\
+-      }									\
+-   } while (0)
+-
+-#else
+-
+-#define DEBUG_LOCK()
+-#define DEBUG_RESET()
+-#define DEBUG_CHECK_LOCK()
+-
+-#endif
+-
+-/*
+- * !!! We may want to separate locks from locks with validation.  This
+- * could be used to improve performance for those things commands that
+- * do not do any drawing !!!
+- */
+-
+-/* Lock the hardware and validate our state.
+- */
+-#define LOCK_HARDWARE( rmesa )						\
+-	do {								\
+-		char __ret = 0;						\
+-		DEBUG_CHECK_LOCK();					\
+-		DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext,	\
+-			(DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \
+-		if (__ret)						\
+-			radeonGetLock((rmesa), 0);			\
+-		DEBUG_LOCK();						\
+-	} while (0)
+-
+-#define UNLOCK_HARDWARE( rmesa )					\
+-	do {								\
+-		DRM_UNLOCK((rmesa)->dri.fd,				\
+-			(rmesa)->dri.hwLock,				\
+-			(rmesa)->dri.hwContext);			\
+-		DEBUG_RESET();						\
+-	} while (0)
+-
+-#endif				/* __RADEON_LOCK_H__ */
+diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
+index 58bc0d5..8a945d8 100644
+--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
++++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
+@@ -35,7 +35,7 @@
+ 
+ #include "radeon_program_pair.h"
+ 
+-#include "radeon_context.h"
++#include "radeon_common.h"
+ 
+ #include "shader/prog_print.h"
+ 
+diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
+deleted file mode 100644
+index 16f9fb9..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_span.c
++++ /dev/null
+@@ -1,349 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+-                     VA Linux Systems Inc., Fremont, California.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Kevin E. Martin <martin@valinux.com>
+- *   Gareth Hughes <gareth@valinux.com>
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- *
+- */
+-
+-#include "main/glheader.h"
+-#include "swrast/swrast.h"
+-
+-#include "r300_state.h"
+-#include "radeon_ioctl.h"
+-#include "r300_ioctl.h"
+-#include "radeon_span.h"
+-
+-#include "drirenderbuffer.h"
+-
+-#define DBG 0
+-
+-/*
+- * Note that all information needed to access pixels in a renderbuffer
+- * should be obtained through the gl_renderbuffer parameter, not per-context
+- * information.
+- */
+-#define LOCAL_VARS						\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
+-   const GLuint bottom = dPriv->h - 1;				\
+-   GLubyte *buf = (GLubyte *) drb->flippedData			\
+-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
+-   GLuint p;							\
+-   (void) p;
+-
+-#define LOCAL_DEPTH_VARS				\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+-   const GLuint bottom = dPriv->h - 1;			\
+-   GLuint xo = dPriv->x;				\
+-   GLuint yo = dPriv->y;				\
+-   GLubyte *buf = (GLubyte *) drb->Base.Data;
+-
+-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+-
+-#define Y_FLIP(Y) (bottom - (Y))
+-
+-#define HW_LOCK()
+-
+-#define HW_UNLOCK()
+-
+-/* ================================================================
+- * Color buffer
+- */
+-
+-/* 16 bit, RGB565 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_RGB
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+-
+-#define TAG(x)    radeon##x##_RGB565
+-#define TAG2(x,y) radeon##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+-#include "spantmp2.h"
+-
+-/* 32 bit, ARGB8888 color spanline and pixel functions
+- */
+-#define SPANTMP_PIXEL_FMT GL_BGRA
+-#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+-
+-#define TAG(x)    radeon##x##_ARGB8888
+-#define TAG2(x,y) radeon##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+-#include "spantmp2.h"
+-
+-/* ================================================================
+- * Depth buffer
+- */
+-
+-/* The Radeon family has depth tiling on all the time, so we have to convert
+- * the x,y coordinates into the memory bus address (mba) in the same
+- * manner as the engine.  In each case, the linear block address (ba)
+- * is calculated, and then wired with x and y to produce the final
+- * memory address.
+- * The chip will do address translation on its own if the surface registers
+- * are set up correctly. It is not quite enough to get it working with hyperz
+- * too...
+- */
+-
+-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
+-		return 4 * (x + y * pitch);
+-	} else {
+-		GLuint ba, address = 0;	/* a[0..1] = 0           */
+-
+-#ifdef COMPILE_R300
+-		ba = (y / 8) * (pitch / 8) + (x / 8);
+-#else
+-		ba = (y / 16) * (pitch / 16) + (x / 16);
+-#endif
+-
+-		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
+-		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
+-		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
+-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
+-
+-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
+-		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
+-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
+-
+-		return address;
+-	}
+-}
+-
+-static INLINE GLuint
+-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
+-		return 2 * (x + y * pitch);
+-	} else {
+-		GLuint ba, address = 0;	/* a[0]    = 0           */
+-
+-		ba = (y / 16) * (pitch / 32) + (x / 32);
+-
+-		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
+-		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
+-		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
+-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
+-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
+-		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
+-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
+-
+-		return address;
+-	}
+-}
+-
+-/* 16-bit depth buffer functions
+- */
+-#define VALUE_TYPE GLushort
+-
+-#define WRITE_DEPTH( _x, _y, d )					\
+-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+-
+-#define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+-
+-#define TAG(x) radeon##x##_z16
+-#include "depthtmp.h"
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- *
+- * Careful: It looks like the R300 uses ZZZS byte order while the R200
+- * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+- */
+-#define VALUE_TYPE GLuint
+-
+-#ifdef COMPILE_R300
+-#define WRITE_DEPTH( _x, _y, d )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0x000000ff;							\
+-   tmp |= ((d << 8) & 0xffffff00);					\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-#else
+-#define WRITE_DEPTH( _x, _y, d )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0xff000000;							\
+-   tmp |= ((d) & 0x00ffffff);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-#endif
+-
+-#ifdef COMPILE_R300
+-#define READ_DEPTH( d, _x, _y )						\
+-  do { \
+-    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
+-					 _y + yo )) & 0xffffff00) >> 8; \
+-  }while(0)
+-#else
+-#define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
+-					 _y + yo )) & 0x00ffffff;
+-#endif
+-
+-#define TAG(x) radeon##x##_z24_s8
+-#include "depthtmp.h"
+-
+-/* ================================================================
+- * Stencil buffer
+- */
+-
+-/* 24 bit depth, 8 bit stencil depthbuffer functions
+- */
+-#ifdef COMPILE_R300
+-#define WRITE_STENCIL( _x, _y, d )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0xffffff00;							\
+-   tmp |= (d) & 0xff;							\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-#else
+-#define WRITE_STENCIL( _x, _y, d )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   tmp &= 0x00ffffff;							\
+-   tmp |= (((d) & 0xff) << 24);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
+-} while (0)
+-#endif
+-
+-#ifdef COMPILE_R300
+-#define READ_STENCIL( d, _x, _y )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   d = tmp & 0x000000ff;						\
+-} while (0)
+-#else
+-#define READ_STENCIL( d, _x, _y )					\
+-do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
+-   d = (tmp & 0xff000000) >> 24;					\
+-} while (0)
+-#endif
+-
+-#define TAG(x) radeon##x##_z24_s8
+-#include "stenciltmp.h"
+-
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above).  WaitForIdle() is the main
+- * culprit.
+- */
+-
+-static void radeonSpanRenderStart(GLcontext * ctx)
+-{
+-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-#ifdef COMPILE_R300
+-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
+-	R300_FIREVERTICES(r300);
+-#else
+-	RADEON_FIREVERTICES(rmesa);
+-#endif
+-	LOCK_HARDWARE(rmesa);
+-	radeonWaitForIdleLocked(rmesa);
+-
+-	/* Read the first pixel in the frame buffer.  This should
+-	 * be a noop, right?  In fact without this conform fails as reading
+-	 * from the framebuffer sometimes produces old results -- the
+-	 * on-card read cache gets mixed up and doesn't notice that the
+-	 * framebuffer has been updated.
+-	 *
+-	 * Note that we should probably be reading some otherwise unused
+-	 * region of VRAM, otherwise we might get incorrect results when
+-	 * reading pixels from the top left of the screen.
+-	 *
+-	 * I found this problem on an R420 with glean's texCube test.
+-	 * Note that the R200 span code also *writes* the first pixel in the
+-	 * framebuffer, but I've found this to be unnecessary.
+-	 *  -- Nicolai Hähnle, June 2008
+-	 */
+-	{
+-		int p;
+-		driRenderbuffer *drb =
+-			(driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0];
+-		volatile int *buf =
+-			(volatile int *)(rmesa->dri.screen->pFB + drb->offset);
+-		p = *buf;
+-	}
+-}
+-
+-static void radeonSpanRenderFinish(GLcontext * ctx)
+-{
+-	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-	_swrast_flush(ctx);
+-	UNLOCK_HARDWARE(rmesa);
+-}
+-
+-void radeonInitSpanFuncs(GLcontext * ctx)
+-{
+-	struct swrast_device_driver *swdd =
+-	    _swrast_GetDeviceDriverReference(ctx);
+-	swdd->SpanRenderStart = radeonSpanRenderStart;
+-	swdd->SpanRenderFinish = radeonSpanRenderFinish;
+-}
+-
+-/**
+- * Plug in the Get/Put routines for the given driRenderbuffer.
+- */
+-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+-{
+-	if (drb->Base.InternalFormat == GL_RGBA) {
+-		if (vis->redBits == 5 && vis->greenBits == 6
+-		    && vis->blueBits == 5) {
+-			radeonInitPointers_RGB565(&drb->Base);
+-		} else {
+-			radeonInitPointers_ARGB8888(&drb->Base);
+-		}
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+-		radeonInitDepthPointers_z16(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+-		radeonInitDepthPointers_z24_s8(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+-		radeonInitStencilPointers_z24_s8(&drb->Base);
+-	}
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c
+deleted file mode 100644
+index c401da6..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_state.c
++++ /dev/null
+@@ -1,244 +0,0 @@
+-/**************************************************************************
+-
+-Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+-
+-The Weather Channel (TM) funded Tungsten Graphics to develop the
+-initial release of the Radeon 8500 driver under the XFree86 license.
+-This notice must be preserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/api_arrayelt.h"
+-#include "main/enums.h"
+-#include "main/framebuffer.h"
+-#include "main/colormac.h"
+-#include "main/light.h"
+-
+-#include "swrast/swrast.h"
+-#include "vbo/vbo.h"
+-#include "tnl/tnl.h"
+-#include "tnl/t_pipeline.h"
+-#include "swrast_setup/swrast_setup.h"
+-
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
+-#include "r300_ioctl.h"
+-
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-static GLboolean intersect_rect(drm_clip_rect_t * out,
+-				drm_clip_rect_t * a, drm_clip_rect_t * b)
+-{
+-	*out = *a;
+-	if (b->x1 > out->x1)
+-		out->x1 = b->x1;
+-	if (b->y1 > out->y1)
+-		out->y1 = b->y1;
+-	if (b->x2 < out->x2)
+-		out->x2 = b->x2;
+-	if (b->y2 < out->y2)
+-		out->y2 = b->y2;
+-	if (out->x1 >= out->x2)
+-		return GL_FALSE;
+-	if (out->y1 >= out->y2)
+-		return GL_FALSE;
+-	return GL_TRUE;
+-}
+-
+-void radeonRecalcScissorRects(radeonContextPtr radeon)
+-{
+-	drm_clip_rect_t *out;
+-	int i;
+-
+-	/* Grow cliprect store?
+-	 */
+-	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+-		while (radeon->state.scissor.numAllocedClipRects <
+-		       radeon->numClipRects) {
+-			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
+-			radeon->state.scissor.numAllocedClipRects *= 2;
+-		}
+-
+-		if (radeon->state.scissor.pClipRects)
+-			FREE(radeon->state.scissor.pClipRects);
+-
+-		radeon->state.scissor.pClipRects =
+-		    MALLOC(radeon->state.scissor.numAllocedClipRects *
+-			   sizeof(drm_clip_rect_t));
+-
+-		if (radeon->state.scissor.pClipRects == NULL) {
+-			radeon->state.scissor.numAllocedClipRects = 0;
+-			return;
+-		}
+-	}
+-
+-	out = radeon->state.scissor.pClipRects;
+-	radeon->state.scissor.numClipRects = 0;
+-
+-	for (i = 0; i < radeon->numClipRects; i++) {
+-		if (intersect_rect(out,
+-				   &radeon->pClipRects[i],
+-				   &radeon->state.scissor.rect)) {
+-			radeon->state.scissor.numClipRects++;
+-			out++;
+-		}
+-	}
+-}
+-
+-void radeonUpdateScissor(GLcontext* ctx)
+-{
+-	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+-	if (radeon->dri.drawable) {
+-		__DRIdrawablePrivate *dPriv = radeon->dri.drawable;
+-		int x1 = dPriv->x + ctx->Scissor.X;
+-		int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height);
+-
+-		radeon->state.scissor.rect.x1 = x1;
+-		radeon->state.scissor.rect.y1 = y1;
+-		radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width;
+-		radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height;
+-
+-		radeonRecalcScissorRects(radeon);
+-	}
+-}
+-
+-static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+-{
+-	if (ctx->Scissor.Enabled) {
+-		/* We don't pipeline cliprect changes */
+-		r300Flush(ctx);
+-		radeonUpdateScissor(ctx);
+-	}
+-}
+-
+-
+-/**
+- * Update cliprects and scissors.
+- */
+-void radeonSetCliprects(radeonContextPtr radeon)
+-{
+-	__DRIdrawablePrivate *const drawable = radeon->dri.drawable;
+-	__DRIdrawablePrivate *const readable = radeon->dri.readable;
+-	GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
+-	GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
+-
+-	if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+-		/* Can't ignore 2d windows if we are page flipping. */
+-		if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
+-		    radeon->sarea->pfCurrentPage == 1) {
+-			radeon->numClipRects = drawable->numClipRects;
+-			radeon->pClipRects = drawable->pClipRects;
+-		} else {
+-			radeon->numClipRects = drawable->numBackClipRects;
+-			radeon->pClipRects = drawable->pBackClipRects;
+-		}
+-	} else {
+-		/* front buffer (or none, or multiple buffers */
+-		radeon->numClipRects = drawable->numClipRects;
+-		radeon->pClipRects = drawable->pClipRects;
+-	}
+-
+-	if ((draw_fb->Width != drawable->w) ||
+-	    (draw_fb->Height != drawable->h)) {
+-		_mesa_resize_framebuffer(radeon->glCtx, draw_fb,
+-					 drawable->w, drawable->h);
+-		draw_fb->Initialized = GL_TRUE;
+-	}
+-
+-	if (drawable != readable) {
+-		if ((read_fb->Width != readable->w) ||
+-		    (read_fb->Height != readable->h)) {
+-			_mesa_resize_framebuffer(radeon->glCtx, read_fb,
+-						 readable->w, readable->h);
+-			read_fb->Initialized = GL_TRUE;
+-		}
+-	}
+-
+-	if (radeon->state.scissor.enabled)
+-		radeonRecalcScissorRects(radeon);
+-
+-	radeon->lastStamp = drawable->lastStamp;
+-}
+-
+-
+-/**
+- * Handle common enable bits.
+- * Called as a fallback by r200Enable/r300Enable.
+- */
+-void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state)
+-{
+-	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+-
+-	switch(cap) {
+-	case GL_SCISSOR_TEST:
+-		/* We don't pipeline cliprect & scissor changes */
+-		r300Flush(ctx);
+-
+-		radeon->state.scissor.enabled = state;
+-		radeonUpdateScissor(ctx);
+-		break;
+-
+-	default:
+-		return;
+-	}
+-}
+-
+-
+-/**
+- * Initialize default state.
+- * This function is called once at context init time from
+- * r200InitState/r300InitState
+- */
+-void radeonInitState(radeonContextPtr radeon)
+-{
+-	radeon->Fallback = 0;
+-
+-	if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->backOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->backPitch;
+-	} else {
+-		radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset;
+-		radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch;
+-	}
+-}
+-
+-
+-/**
+- * Initialize common state functions.
+- * Called by r200InitStateFuncs/r300InitStateFuncs
+- */
+-void radeonInitStateFuncs(struct dd_function_table *functions)
+-{
+-	functions->Scissor = radeonScissor;
+-}
+diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h
+deleted file mode 100644
+index 821cb40..0000000
+--- a/src/mesa/drivers/dri/r300/radeon_state.h
++++ /dev/null
+@@ -1,43 +0,0 @@
+-/*
+-Copyright (C) 2004 Nicolai Haehnle.  All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation the rights to use, copy, modify, merge, publish,
+-distribute, sublicense, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+-IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Nicolai Haehnle <prefect_@gmx.net>
+- */
+-
+-#ifndef __RADEON_STATE_H__
+-#define __RADEON_STATE_H__
+-
+-extern void radeonRecalcScissorRects(radeonContextPtr radeon);
+-extern void radeonSetCliprects(radeonContextPtr radeon);
+-extern void radeonUpdateScissor(GLcontext* ctx);
+-
+-extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state);
+-
+-extern void radeonInitState(radeonContextPtr radeon);
+-extern void radeonInitStateFuncs(struct dd_function_table* functions);
+-
+-#endif
+diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
+index f223b2d..f469c6f 100644
+--- a/src/mesa/drivers/dri/radeon/Makefile
++++ b/src/mesa/drivers/dri/radeon/Makefile
+@@ -4,25 +4,36 @@
+ TOP = ../../../../..
+ include $(TOP)/configs/current
+ 
++CFLAGS += $(RADEON_CFLAGS)
++
+ LIBNAME = radeon_dri.so
+ 
+ MINIGLX_SOURCES = server/radeon_dri.c 
+ 
++RADEON_COMMON_SOURCES = \
++	radeon_texture.c \
++	radeon_common_context.c \
++	radeon_common.c \
++	radeon_dma.c \
++	radeon_lock.c \
++	radeon_bo_legacy.c \
++	radeon_cs_legacy.c \
++	radeon_mipmap_tree.c \
++	radeon_span.c
++
+ DRIVER_SOURCES = \
+ 	radeon_context.c \
+ 	radeon_ioctl.c \
+-	radeon_lock.c \
+ 	radeon_screen.c \
+ 	radeon_state.c \
+ 	radeon_state_init.c \
+ 	radeon_tex.c \
+-	radeon_texmem.c \
+ 	radeon_texstate.c \
+ 	radeon_tcl.c \
+ 	radeon_swtcl.c \
+-	radeon_span.c \
+ 	radeon_maos.c \
+-	radeon_sanity.c 
++	radeon_sanity.c \
++	$(RADEON_COMMON_SOURCES)
+ 
+ C_SOURCES = \
+ 	$(COMMON_SOURCES) \
+@@ -30,6 +41,8 @@ C_SOURCES = \
+ 
+ DRIVER_DEFINES = -DRADEON_COMMON=0
+ 
++DRI_LIB_DEPS += $(RADEON_LDFLAGS)
++
+ X86_SOURCES = 
+ 
+ include ../Makefile.template
+diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
+new file mode 100644
+index 0000000..1ed13f1
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
+@@ -0,0 +1,182 @@
++/* 
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
++ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#ifndef RADEON_BO_H
++#define RADEON_BO_H
++
++#include <stdio.h>
++#include <stdint.h>
++//#include "radeon_track.h"
++
++/* bo object */
++#define RADEON_BO_FLAGS_MACRO_TILE  1
++#define RADEON_BO_FLAGS_MICRO_TILE  2
++
++struct radeon_bo_manager;
++
++struct radeon_bo {
++    uint32_t                    alignment;
++    uint32_t                    handle;
++    uint32_t                    size;
++    uint32_t                    domains;
++    uint32_t                    flags;
++    unsigned                    cref;
++#ifdef RADEON_BO_TRACK
++    struct radeon_track         *track;
++#endif
++    void                        *ptr;
++    struct radeon_bo_manager    *bom;
++    uint32_t                    space_accounted;
++};
++
++/* bo functions */
++struct radeon_bo_funcs {
++    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
++                                 uint32_t handle,
++                                 uint32_t size,
++                                 uint32_t alignment,
++                                 uint32_t domains,
++                                 uint32_t flags);
++    void (*bo_ref)(struct radeon_bo *bo);
++    struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
++    int (*bo_map)(struct radeon_bo *bo, int write);
++    int (*bo_unmap)(struct radeon_bo *bo);
++    int (*bo_wait)(struct radeon_bo *bo);
++};
++
++struct radeon_bo_manager {
++    struct radeon_bo_funcs  *funcs;
++    int                     fd;
++
++#ifdef RADEON_BO_TRACK
++    struct radeon_tracker   tracker;
++#endif
++};
++    
++static inline void _radeon_bo_debug(struct radeon_bo *bo,
++                                    const char *op,
++                                    const char *file,
++                                    const char *func,
++                                    int line)
++{
++    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
++            op, bo, bo->handle, bo->size, bo->cref, file, func, line);
++}
++
++static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
++                                                uint32_t handle,
++                                                uint32_t size,
++                                                uint32_t alignment,
++                                                uint32_t domains,
++                                                uint32_t flags,
++                                                const char *file,
++                                                const char *func,
++                                                int line)
++{
++    struct radeon_bo *bo;
++
++    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
++#ifdef RADEON_BO_TRACK
++    if (bo) {
++        bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
++        radeon_track_add_event(bo->track, file, func, "open", line);
++    }
++#endif
++    return bo;
++}
++
++static inline void _radeon_bo_ref(struct radeon_bo *bo,
++                                  const char *file,
++                                  const char *func,
++                                  int line)
++{
++    bo->cref++;
++#ifdef RADEON_BO_TRACK
++    radeon_track_add_event(bo->track, file, func, "ref", line); 
++#endif
++    bo->bom->funcs->bo_ref(bo);
++}
++
++static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
++                                                 const char *file,
++                                                 const char *func,
++                                                 int line)
++{
++    bo->cref--;
++#ifdef RADEON_BO_TRACK
++    radeon_track_add_event(bo->track, file, func, "unref", line);
++    if (bo->cref <= 0) {
++        radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
++        bo->track = NULL;
++    }
++#endif
++    return bo->bom->funcs->bo_unref(bo);
++}
++
++static inline int _radeon_bo_map(struct radeon_bo *bo,
++                                 int write,
++                                 const char *file,
++                                 const char *func,
++                                 int line)
++{
++    return bo->bom->funcs->bo_map(bo, write);
++}
++
++static inline int _radeon_bo_unmap(struct radeon_bo *bo,
++                                   const char *file,
++                                   const char *func,
++                                   int line)
++{
++    return bo->bom->funcs->bo_unmap(bo);
++}
++
++static inline int _radeon_bo_wait(struct radeon_bo *bo,
++                                  const char *file,
++                                  const char *func,
++                                  int line)
++{
++    return bo->bom->funcs->bo_wait(bo);
++}
++
++#define radeon_bo_open(bom, h, s, a, d, f)\
++    _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_ref(bo)\
++    _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_unref(bo)\
++    _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_map(bo, w)\
++    _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_unmap(bo)\
++    _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_debug(bo, opcode)\
++    _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
++#define radeon_bo_wait(bo) \
++    _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
+new file mode 100644
+index 0000000..03a6299
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
+@@ -0,0 +1,825 @@
++/* 
++ * Copyright © 2008 Nicolai Haehnle
++ * Copyright © 2008 Dave Airlie
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Aapo Tahkola <aet@rasterburn.org>
++ *      Nicolai Haehnle <prefect_@gmx.net>
++ *      Dave Airlie
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#include <stdio.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <string.h>
++#include <errno.h>
++#include <unistd.h>
++#include <sys/mman.h>
++#include <sys/ioctl.h>
++#include "xf86drm.h"
++#include "texmem.h"
++#include "main/simple_list.h"
++
++#include "drm.h"
++#include "radeon_drm.h"
++#include "radeon_common.h"
++#include "radeon_bocs_wrapper.h"
++
++/* no seriously texmem.c is this screwed up */
++struct bo_legacy_texture_object {
++    driTextureObject    base;
++    struct bo_legacy *parent;
++};
++
++struct bo_legacy {
++    struct radeon_bo    base;
++    int                 map_count;
++    uint32_t            pending;
++    int                 is_pending;
++    int                 static_bo;
++    uint32_t            offset;
++    struct bo_legacy_texture_object *tobj;
++    int                 validated;
++    int                 dirty;
++    void                *ptr;
++    struct bo_legacy    *next, *prev;
++    struct bo_legacy    *pnext, *pprev;
++};
++
++struct bo_manager_legacy {
++    struct radeon_bo_manager    base;
++    unsigned                    nhandle;
++    unsigned                    nfree_handles;
++    unsigned                    cfree_handles;
++    uint32_t                    current_age;
++    struct bo_legacy            bos;
++    struct bo_legacy            pending_bos;
++    uint32_t                    fb_location;
++    uint32_t                    texture_offset;
++    unsigned                    dma_alloc_size;
++    uint32_t                    dma_buf_count;
++    unsigned                    cpendings;
++    driTextureObject            texture_swapped;
++    driTexHeap                  *texture_heap;
++    struct radeon_screen        *screen;
++    unsigned                    *free_handles;
++};
++
++static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
++{
++    struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t;
++    
++    if (tobj->parent) {
++        tobj->parent->tobj = NULL;
++        tobj->parent->validated = 0;
++    }
++}
++
++static void inline clean_handles(struct bo_manager_legacy *bom)
++{
++  while (bom->cfree_handles > 0 &&
++	 !bom->free_handles[bom->cfree_handles - 1])
++    bom->cfree_handles--;
++
++}
++static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
++{
++    uint32_t tmp;
++
++    *handle = 0;
++    if (bom->nhandle == 0xFFFFFFFF) {
++        return -EINVAL;
++    }
++    if (bom->cfree_handles > 0) {
++        tmp = bom->free_handles[--bom->cfree_handles];
++	clean_handles(bom);
++    } else {
++        bom->cfree_handles = 0;
++        tmp = bom->nhandle++;
++    }
++    assert(tmp);
++    *handle = tmp;
++    return 0;
++}
++
++static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
++{
++    uint32_t *handles;
++
++    if (!handle) {
++        return 0;
++    }
++    if (handle == (bom->nhandle - 1)) {
++        int i;
++
++        bom->nhandle--;
++        for (i = bom->cfree_handles - 1; i >= 0; i--) {
++            if (bom->free_handles[i] == (bom->nhandle - 1)) {
++                bom->nhandle--;
++                bom->free_handles[i] = 0;
++            }
++        }
++        clean_handles(bom);
++        return 0;
++    }
++    if (bom->cfree_handles < bom->nfree_handles) {
++        bom->free_handles[bom->cfree_handles++] = handle;
++        return 0;
++    }
++    bom->nfree_handles += 0x100;
++    handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
++    if (handles == NULL) {
++        bom->nfree_handles -= 0x100;
++        return -ENOMEM;
++    }
++    bom->free_handles = handles;
++    bom->free_handles[bom->cfree_handles++] = handle;
++    return 0;
++}
++
++static void legacy_get_current_age(struct bo_manager_legacy *boml)
++{
++    drm_radeon_getparam_t gp;
++    int r;
++
++    if (IS_R300_CLASS(boml->screen)) {
++    	gp.param = RADEON_PARAM_LAST_CLEAR;
++    	gp.value = (int *)&boml->current_age;
++    	r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
++       	                     &gp, sizeof(gp));
++    	if (r) {
++       	 fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
++         exit(1);
++       }
++    } else
++	boml->current_age = boml->screen->scratch[3];
++}
++
++static int legacy_is_pending(struct radeon_bo *bo)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    if (bo_legacy->is_pending <= 0) {
++        bo_legacy->is_pending = 0;
++        return 0;
++    }
++    if (boml->current_age >= bo_legacy->pending) {
++        if (boml->pending_bos.pprev == bo_legacy) {
++            boml->pending_bos.pprev = bo_legacy->pprev;
++        }
++        bo_legacy->pprev->pnext = bo_legacy->pnext;
++        if (bo_legacy->pnext) {
++            bo_legacy->pnext->pprev = bo_legacy->pprev;
++        }
++	assert(bo_legacy->is_pending <= bo->cref);
++        while (bo_legacy->is_pending--) {
++	    bo = radeon_bo_unref(bo);
++	    if (!bo)
++	      break;
++        }
++	if (bo)
++	  bo_legacy->is_pending = 0;
++        boml->cpendings--;
++        return 0;
++    }
++    return 1;
++}
++
++static int legacy_wait_pending(struct radeon_bo *bo)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    if (!bo_legacy->is_pending) {
++        return 0;
++    }
++    /* FIXME: lockup and userspace busy looping that's all the folks */
++    legacy_get_current_age(boml);
++    while (legacy_is_pending(bo)) {
++        usleep(10);
++        legacy_get_current_age(boml);
++    }
++    return 0;
++}
++
++static void legacy_track_pending(struct bo_manager_legacy *boml, int debug)
++{
++    struct bo_legacy *bo_legacy;
++    struct bo_legacy *next;
++
++    legacy_get_current_age(boml);
++    bo_legacy = boml->pending_bos.pnext;
++    while (bo_legacy) {
++        if (debug)
++	  fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size,
++		  boml->current_age, bo_legacy->pending);
++        next = bo_legacy->pnext;
++        if (legacy_is_pending(&(bo_legacy->base))) {
++        }
++        bo_legacy = next;
++    } 
++}
++
++static int legacy_wait_any_pending(struct bo_manager_legacy *boml)
++{
++    struct bo_legacy *bo_legacy;
++
++    legacy_get_current_age(boml);
++    bo_legacy = boml->pending_bos.pnext;
++    if (!bo_legacy)
++      return -1;
++    legacy_wait_pending(&bo_legacy->base);
++    return 0;
++}
++
++static void legacy_kick_all_buffers(struct bo_manager_legacy *boml)
++{
++    struct bo_legacy *legacy;
++
++    legacy = boml->bos.next;
++    while (legacy != &boml->bos) {
++	if (legacy->tobj) {
++	    if (legacy->validated) {
++		driDestroyTextureObject(&legacy->tobj->base);
++		legacy->tobj = 0;
++		legacy->validated = 0;
++	    }
++	}
++	legacy = legacy->next;
++    }
++}
++
++static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
++                                     uint32_t size,
++                                     uint32_t alignment,
++                                     uint32_t domains,
++                                     uint32_t flags)
++{
++    struct bo_legacy *bo_legacy;
++    static int pgsize;
++
++    if (pgsize == 0)
++        pgsize = getpagesize() - 1;
++
++    size = (size + pgsize) & ~pgsize;
++
++    bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
++    if (bo_legacy == NULL) {
++        return NULL;
++    }
++    bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
++    bo_legacy->base.handle = 0;
++    bo_legacy->base.size = size;
++    bo_legacy->base.alignment = alignment;
++    bo_legacy->base.domains = domains;
++    bo_legacy->base.flags = flags;
++    bo_legacy->base.ptr = NULL;
++    bo_legacy->map_count = 0;
++    bo_legacy->next = NULL;
++    bo_legacy->prev = NULL;
++    bo_legacy->pnext = NULL;
++    bo_legacy->pprev = NULL;
++    bo_legacy->next = boml->bos.next;
++    bo_legacy->prev = &boml->bos;
++    boml->bos.next = bo_legacy;
++    if (bo_legacy->next) {
++        bo_legacy->next->prev = bo_legacy;
++    }
++    return bo_legacy;
++}
++
++static int bo_dma_alloc(struct radeon_bo *bo)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    drm_radeon_mem_alloc_t alloc;
++    unsigned size;
++    int base_offset;
++    int r;
++
++    /* align size on 4Kb */
++    size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
++    alloc.region = RADEON_MEM_REGION_GART;
++    alloc.alignment = bo_legacy->base.alignment;
++    alloc.size = size;
++    alloc.region_offset = &base_offset;
++    r = drmCommandWriteRead(bo->bom->fd,
++                            DRM_RADEON_ALLOC,
++                            &alloc,
++                            sizeof(alloc));
++    if (r) {
++        /* ptr is set to NULL if dma allocation failed */
++        bo_legacy->ptr = NULL;
++        return r;
++    }
++    bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
++    bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
++    bo->size = size;
++    boml->dma_alloc_size += size;
++    boml->dma_buf_count++;
++    return 0;
++}
++
++static int bo_dma_free(struct radeon_bo *bo)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    drm_radeon_mem_free_t memfree;
++    int r;
++
++    if (bo_legacy->ptr == NULL) {
++        /* ptr is set to NULL if dma allocation failed */
++        return 0;
++    }
++    legacy_get_current_age(boml);
++    memfree.region = RADEON_MEM_REGION_GART;
++    memfree.region_offset  = bo_legacy->offset;
++    memfree.region_offset -= boml->screen->gart_texture_offset;
++    r = drmCommandWrite(boml->base.fd,
++                        DRM_RADEON_FREE,
++                        &memfree,
++                        sizeof(memfree));
++    if (r) {
++        fprintf(stderr, "Failed to free bo[%p] at %08x\n",
++                &bo_legacy->base, memfree.region_offset);
++        fprintf(stderr, "ret = %s\n", strerror(-r));
++        return r;
++    }
++    boml->dma_alloc_size -= bo_legacy->base.size;
++    boml->dma_buf_count--;
++    return 0;
++}
++
++static void bo_free(struct bo_legacy *bo_legacy)
++{
++    struct bo_manager_legacy *boml;
++
++    if (bo_legacy == NULL) {
++        return;
++    }
++    boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
++    bo_legacy->prev->next = bo_legacy->next;
++    if (bo_legacy->next) {
++        bo_legacy->next->prev = bo_legacy->prev;
++    }
++    if (!bo_legacy->static_bo) {
++        legacy_free_handle(boml, bo_legacy->base.handle);
++        if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
++            /* dma buffers */
++            bo_dma_free(&bo_legacy->base);
++        } else {
++  	    driDestroyTextureObject(&bo_legacy->tobj->base);
++	    bo_legacy->tobj = NULL;
++            /* free backing store */
++            free(bo_legacy->ptr);
++        }
++    }
++    memset(bo_legacy, 0 , sizeof(struct bo_legacy));
++    free(bo_legacy);
++}
++
++static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
++                                 uint32_t handle,
++                                 uint32_t size,
++                                 uint32_t alignment,
++                                 uint32_t domains,
++                                 uint32_t flags)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
++    struct bo_legacy *bo_legacy;
++    int r;
++
++    if (handle) {
++        bo_legacy = boml->bos.next;
++        while (bo_legacy) {
++            if (bo_legacy->base.handle == handle) {
++                radeon_bo_ref(&(bo_legacy->base));
++                return (struct radeon_bo*)bo_legacy;
++            }
++            bo_legacy = bo_legacy->next;
++        }
++        return NULL;
++    }
++
++    bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
++    bo_legacy->static_bo = 0;
++    r = legacy_new_handle(boml, &bo_legacy->base.handle);
++    if (r) {
++        bo_free(bo_legacy);
++        return NULL;
++    }
++    if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
++    retry:
++        legacy_track_pending(boml, 0);
++        /* dma buffers */
++
++        r = bo_dma_alloc(&(bo_legacy->base));
++        if (r) {
++	  if (legacy_wait_any_pending(boml) == -1) {
++            bo_free(bo_legacy);
++	    return NULL;
++	  }
++	  goto retry;
++	  return NULL;
++        }
++    } else {
++        bo_legacy->ptr = malloc(bo_legacy->base.size);
++        if (bo_legacy->ptr == NULL) {
++            bo_free(bo_legacy);
++            return NULL;
++        }
++    }
++    radeon_bo_ref(&(bo_legacy->base));
++    return (struct radeon_bo*)bo_legacy;
++}
++
++static void bo_ref(struct radeon_bo *bo)
++{
++}
++
++static struct radeon_bo *bo_unref(struct radeon_bo *bo)
++{
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    if (bo->cref <= 0) {
++        bo_legacy->prev->next = bo_legacy->next;
++        if (bo_legacy->next) {
++            bo_legacy->next->prev = bo_legacy->prev;
++        }
++        if (!bo_legacy->is_pending) {
++            bo_free(bo_legacy);
++        }
++        return NULL;
++    }
++    return bo;
++}
++
++static int bo_map(struct radeon_bo *bo, int write)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    
++    legacy_wait_pending(bo);
++    bo_legacy->validated = 0;
++    bo_legacy->dirty = 1;
++    bo_legacy->map_count++;
++    bo->ptr = bo_legacy->ptr;
++    /* Read the first pixel in the frame buffer.  This should
++     * be a noop, right?  In fact without this conform fails as reading
++     * from the framebuffer sometimes produces old results -- the
++     * on-card read cache gets mixed up and doesn't notice that the
++     * framebuffer has been updated.
++     *
++     * Note that we should probably be reading some otherwise unused
++     * region of VRAM, otherwise we might get incorrect results when
++     * reading pixels from the top left of the screen.
++     *
++     * I found this problem on an R420 with glean's texCube test.
++     * Note that the R200 span code also *writes* the first pixel in the
++     * framebuffer, but I've found this to be unnecessary.
++     *  -- Nicolai Hähnle, June 2008
++     */
++    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
++        int p;
++        volatile int *buf = (int*)boml->screen->driScreen->pFB;
++        p = *buf;
++    }
++    return 0;
++}
++
++static int bo_unmap(struct radeon_bo *bo)
++{
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    if (--bo_legacy->map_count > 0) {
++        return 0;
++    }
++    bo->ptr = NULL;
++    return 0;
++}
++
++static struct radeon_bo_funcs bo_legacy_funcs = {
++    bo_open,
++    bo_ref,
++    bo_unref,
++    bo_map,
++    bo_unmap
++};
++
++static int bo_vram_validate(struct radeon_bo *bo,
++                            uint32_t *soffset,
++                            uint32_t *eoffset)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    int r;
++    int retry_count = 0, pending_retry = 0;
++    
++    if (!bo_legacy->tobj) {
++	bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object));
++	bo_legacy->tobj->parent = bo_legacy;
++	make_empty_list(&bo_legacy->tobj->base);
++	bo_legacy->tobj->base.totalSize = bo->size;
++    retry:
++        r = driAllocateTexture(&boml->texture_heap, 1,
++                               &bo_legacy->tobj->base);
++        if (r) {
++		pending_retry = 0;
++		while(boml->cpendings && pending_retry++ < 10000) {
++			legacy_track_pending(boml, 0);
++			retry_count++;
++			if (retry_count > 2) {
++				free(bo_legacy->tobj);
++				bo_legacy->tobj = NULL;
++				fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
++				return -1;
++			}
++			goto retry;
++		}
++	}
++        bo_legacy->offset = boml->texture_offset +
++                            bo_legacy->tobj->base.memBlock->ofs;
++        bo_legacy->dirty = 1;
++    }
++
++    assert(bo_legacy->tobj->base.memBlock);
++
++    if (bo_legacy->tobj)
++	driUpdateTextureLRU(&bo_legacy->tobj->base);
++
++    if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) {
++        /* Copy to VRAM using a blit.
++         * All memory is 4K aligned. We're using 1024 pixels wide blits.
++         */
++        drm_radeon_texture_t tex;
++        drm_radeon_tex_image_t tmp;
++        int ret;
++
++        tex.offset = bo_legacy->offset;
++        tex.image = &tmp;
++        assert(!(tex.offset & 1023));
++
++        tmp.x = 0;
++        tmp.y = 0;
++        if (bo->size < 4096) {
++            tmp.width = (bo->size + 3) / 4;
++            tmp.height = 1;
++        } else {
++            tmp.width = 1024;
++            tmp.height = (bo->size + 4095) / 4096;
++        }
++        tmp.data = bo_legacy->ptr;
++        tex.format = RADEON_TXFORMAT_ARGB8888;
++        tex.width = tmp.width;
++        tex.height = tmp.height;
++        tex.pitch = MAX2(tmp.width / 16, 1);
++        do {
++            ret = drmCommandWriteRead(bo->bom->fd,
++                                      DRM_RADEON_TEXTURE,
++                                      &tex,
++                                      sizeof(drm_radeon_texture_t));
++            if (ret) {
++                if (RADEON_DEBUG & DEBUG_IOCTL)
++                    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
++                usleep(1);
++            }
++        } while (ret == -EAGAIN);
++        bo_legacy->dirty = 0;
++	bo_legacy->tobj->base.dirty_images[0] = 0;
++    }
++    return 0;
++}
++
++/* 
++ *  radeon_bo_legacy_validate -
++ *  returns:
++ *  0 - all good
++ *  -EINVAL - mapped buffer can't be validated
++ *  -EAGAIN - restart validation we've kicked all the buffers out
++ */
++int radeon_bo_legacy_validate(struct radeon_bo *bo,
++                              uint32_t *soffset,
++                              uint32_t *eoffset)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    int r;
++    int retries = 0;
++
++    if (bo_legacy->map_count) {
++        fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
++                bo, bo->size, bo_legacy->map_count);
++        return -EINVAL;
++    }
++    if (bo_legacy->static_bo || bo_legacy->validated) {
++        *soffset = bo_legacy->offset;
++        *eoffset = bo_legacy->offset + bo->size;
++        return 0;
++    }
++    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
++
++        r = bo_vram_validate(bo, soffset, eoffset);
++        if (r) {
++	    legacy_track_pending(boml, 0);
++	    legacy_kick_all_buffers(boml);
++	    retries++;
++	    if (retries == 2) {
++		fprintf(stderr,"legacy bo: failed to get relocations into aperture\n");
++		assert(0);
++		exit(-1);
++	    }
++	    return -EAGAIN;
++        }
++    }
++    *soffset = bo_legacy->offset;
++    *eoffset = bo_legacy->offset + bo->size;
++    bo_legacy->validated = 1;
++    return 0;
++}
++
++void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    bo_legacy->pending = pending;
++    bo_legacy->is_pending++;
++    /* add to pending list */
++    radeon_bo_ref(bo);
++    if (bo_legacy->is_pending > 1) {
++        return;    
++    }
++    bo_legacy->pprev = boml->pending_bos.pprev;
++    bo_legacy->pnext = NULL;
++    bo_legacy->pprev->pnext = bo_legacy;
++    boml->pending_bos.pprev = bo_legacy;
++    boml->cpendings++;
++}
++
++void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
++    struct bo_legacy *bo_legacy;
++
++    if (bom == NULL) {
++        return;
++    }
++    bo_legacy = boml->bos.next;
++    while (bo_legacy) {
++        struct bo_legacy *next;
++
++        next = bo_legacy->next;
++        bo_free(bo_legacy);
++        bo_legacy = next;
++    }
++    driDestroyTextureHeap(boml->texture_heap);
++    free(boml->free_handles);
++    free(boml);
++}
++
++static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom,
++						       int size, uint32_t offset)
++{
++    struct bo_legacy *bo;
++
++    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++    if (bo == NULL)
++	return NULL;
++    bo->static_bo = 1;
++    bo->offset = offset + bom->fb_location;
++    bo->base.handle = bo->offset;
++    bo->ptr = bom->screen->driScreen->pFB + offset;
++    if (bo->base.handle > bom->nhandle) {
++        bom->nhandle = bo->base.handle + 1;
++    }
++    radeon_bo_ref(&(bo->base));
++    return bo;
++}
++
++struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
++{
++    struct bo_manager_legacy *bom;
++    struct bo_legacy *bo;
++    unsigned size;
++
++    bom = (struct bo_manager_legacy*)
++          calloc(1, sizeof(struct bo_manager_legacy));
++    if (bom == NULL) {
++        return NULL;
++    }
++
++    make_empty_list(&bom->texture_swapped);
++
++    bom->texture_heap = driCreateTextureHeap(0,
++                                             bom,
++                                             scrn->texSize[0],
++                                             12,
++                                             RADEON_NR_TEX_REGIONS,
++                                             (drmTextureRegionPtr)scrn->sarea->tex_list[0],
++                                             &scrn->sarea->tex_age[0],
++                                             &bom->texture_swapped,
++                                             sizeof(struct bo_legacy_texture_object),
++                                             &bo_legacy_tobj_destroy);
++    bom->texture_offset = scrn->texOffset[0];
++
++    bom->base.funcs = &bo_legacy_funcs;
++    bom->base.fd = scrn->driScreen->fd;
++    bom->bos.next = NULL;
++    bom->bos.prev = NULL;
++    bom->pending_bos.pprev = &bom->pending_bos;
++    bom->pending_bos.pnext = NULL;
++    bom->screen = scrn;
++    bom->fb_location = scrn->fbLocation;
++    bom->nhandle = 1;
++    bom->cfree_handles = 0;
++    bom->nfree_handles = 0x400;
++    bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
++    if (bom->free_handles == NULL) {
++        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
++        return NULL;
++    }
++
++    /* biggest framebuffer size */
++    size = 4096*4096*4; 
++
++    /* allocate front */
++    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset);
++    if (!bo) {
++        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
++        return NULL;
++    }
++    if (scrn->sarea->tiling_enabled) {
++        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
++    }
++
++    /* allocate back */
++    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset);
++    if (!bo) {
++        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
++        return NULL;
++    }
++    if (scrn->sarea->tiling_enabled) {
++        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
++    }
++
++    /* allocate depth */
++    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset);
++    if (!bo) {
++        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
++        return NULL;
++    }
++    bo->base.flags = 0;
++    if (scrn->sarea->tiling_enabled) {
++        bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
++        bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
++    }
++    return (struct radeon_bo_manager*)bom;
++}
++
++void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
++{
++    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
++    DRI_AGE_TEXTURES(boml->texture_heap);
++}
++
++unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
++{
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++
++    if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
++        return 0;
++    }
++    return bo->size;
++}
++
++int radeon_legacy_bo_is_static(struct radeon_bo *bo)
++{
++    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
++    return bo_legacy->static_bo;
++}
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
+new file mode 100644
+index 0000000..9187cd7
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
+@@ -0,0 +1,47 @@
++/* 
++ * Copyright © 2008 Nicolai Haehnle
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Aapo Tahkola <aet@rasterburn.org>
++ *      Nicolai Haehnle <prefect_@gmx.net>
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#ifndef RADEON_BO_LEGACY_H
++#define RADEON_BO_LEGACY_H
++
++#include "radeon_screen.h"
++
++void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
++int radeon_bo_legacy_validate(struct radeon_bo *bo,
++                              uint32_t *soffset,
++                              uint32_t *eoffset);
++struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
++void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
++void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
++unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
++
++int radeon_legacy_bo_is_static(struct radeon_bo *bo);
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+new file mode 100644
+index 0000000..f80f0f7
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+@@ -0,0 +1,67 @@
++#ifndef RADEON_CS_WRAPPER_H
++#define RADEON_CS_WRAPPER_H
++
++#ifndef RADEON_PARAM_DEVICE_ID
++#define RADEON_PARAM_DEVICE_ID 17
++#endif
++
++#ifdef HAVE_LIBDRM_RADEON
++
++#include "radeon_bo.h"
++#include "radeon_bo_gem.h"
++#include "radeon_cs.h"
++#include "radeon_cs_gem.h"
++
++#else
++#include <stdint.h>
++
++#define RADEON_GEM_DOMAIN_CPU 0x1   // Cached CPU domain
++#define RADEON_GEM_DOMAIN_GTT 0x2   // GTT or cache flushed
++#define RADEON_GEM_DOMAIN_VRAM 0x4  // VRAM domain
++
++/* to be used to build locally in mesa with no libdrm bits */
++#include "../radeon/radeon_bo_drm.h"
++#include "../radeon/radeon_cs_drm.h"
++
++#ifndef DRM_RADEON_GEM_INFO
++#define DRM_RADEON_GEM_INFO 0x1c
++
++struct drm_radeon_gem_info {
++        uint64_t gart_start;
++        uint64_t gart_size;
++        uint64_t vram_start;
++        uint64_t vram_size;
++        uint64_t vram_visible;
++};
++#endif
++
++
++
++
++static inline void *radeon_bo_manager_gem_ctor(int fd)
++{
++  return NULL;
++}
++
++static inline void radeon_bo_manager_gem_dtor(void *dummy)
++{
++}
++
++static inline void *radeon_cs_manager_gem_ctor(int fd)
++{
++  return NULL;
++}
++
++static inline void radeon_cs_manager_gem_dtor(void *dummy)
++{
++}
++
++static inline void radeon_tracker_print(void *ptr, int io)
++{
++}
++#endif
++
++#include "radeon_bo_legacy.h"
++#include "radeon_cs_legacy.h"
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
+new file mode 100644
+index 0000000..4b5116c
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
+@@ -0,0 +1,143 @@
++#ifndef COMMON_CMDBUF_H
++#define COMMON_CMDBUF_H
++
++#include "radeon_bocs_wrapper.h"
++
++void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
++int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
++int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
++void rcommonInitCmdBuf(radeonContextPtr rmesa);
++void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
++
++void rcommonBeginBatch(radeonContextPtr rmesa,
++		       int n,
++		       int dostate,
++		       const char *file,
++		       const char *function,
++		       int line);
++
++#define RADEON_CP_PACKET3_NOP                       0xC0001000
++#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
++#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
++#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
++#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
++#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
++#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
++#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
++#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
++#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
++#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
++#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
++#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
++#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
++#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
++#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
++#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
++#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
++#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
++#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
++#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
++
++#define CP_PACKET2  (2 << 30)
++#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
++#define CP_PACKET0_ONE(reg, n)	(RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
++#define CP_PACKET3( pkt, n )						\
++	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
++
++/**
++ * Every function writing to the command buffer needs to declare this
++ * to get the necessary local variables.
++ */
++#define BATCH_LOCALS(rmesa) \
++	const radeonContextPtr b_l_rmesa = rmesa
++
++/**
++ * Prepare writing n dwords to the command buffer,
++ * including producing any necessary state emits on buffer wraparound.
++ */
++#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
++
++/**
++ * Same as BEGIN_BATCH, but do not cause automatic state emits.
++ */
++#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
++
++/**
++ * Write one dword to the command buffer.
++ */
++#define OUT_BATCH(data) \
++	do { \
++        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
++	} while(0)
++
++/**
++ * Write a relocated dword to the command buffer.
++ */
++#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) 	\
++	do { 							\
++        if (0 && offset) {					\
++            fprintf(stderr, "(%s:%s:%d) offset : %d\n",		\
++            __FILE__, __FUNCTION__, __LINE__, offset);		\
++        }							\
++        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset);	\
++        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, 		\
++                              bo, rd, wd, flags);		\
++	if (!b_l_rmesa->radeonScreen->kernel_mm) 		\
++		b_l_rmesa->cmdbuf.cs->section_cdw += 2;		\
++	} while(0)
++
++
++/**
++ * Write n dwords from ptr to the command buffer.
++ */
++#define OUT_BATCH_TABLE(ptr,n) \
++	do { \
++		int _i; \
++        for (_i=0; _i < n; _i++) {\
++            radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\
++        }\
++	} while(0)
++
++/**
++ * Finish writing dwords to the command buffer.
++ * The number of (direct or indirect) OUT_BATCH calls between the previous
++ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
++ */
++#define END_BATCH() \
++	do { \
++        radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
++	} while(0)
++
++/**
++ * After the last END_BATCH() of rendering, this indicates that flushing
++ * the command buffer now is okay.
++ */
++#define COMMIT_BATCH() \
++	do { \
++	} while(0)
++
++
++/** Single register write to command buffer; requires 2 dwords. */
++#define OUT_BATCH_REGVAL(reg, val) \
++	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
++	OUT_BATCH((val))
++
++/** Continuous register range write to command buffer; requires 1 dword,
++ * expects count dwords afterwards for register contents. */
++#define OUT_BATCH_REGSEQ(reg, count) \
++	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)));
++
++/** Write a 32 bit float to the ring; requires 1 dword. */
++#define OUT_BATCH_FLOAT32(f) \
++	OUT_BATCH(radeonPackFloat32((f)));
++
++
++/* Fire the buffered vertices no matter what.
++ */
++static INLINE void radeon_firevertices(radeonContextPtr radeon)
++{
++   if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
++      radeonFlush(radeon->glCtx);
++}
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
+new file mode 100644
+index 0000000..f7c0d7d
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_common.c
+@@ -0,0 +1,849 @@
++/**************************************************************************
++
++Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++
++The Weather Channel (TM) funded Tungsten Graphics to develop the
++initial release of the Radeon 8500 driver under the XFree86 license.
++This notice must be preserved.
++
++Permission is hereby granted, free of charge, to any person obtaining
++a copy of this software and associated documentation files (the
++"Software"), to deal in the Software without restriction, including
++without limitation the rights to use, copy, modify, merge, publish,
++distribute, sublicense, and/or sell copies of the Software, and to
++permit persons to whom the Software is furnished to do so, subject to
++the following conditions:
++
++The above copyright notice and this permission notice (including the
++next paragraph) shall be included in all copies or substantial
++portions of the Software.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++
++**************************************************************************/
++
++/*
++ * Authors:
++ *   Keith Whitwell <keith@tungstengraphics.com>
++ */
++
++/*
++   - Scissor implementation
++   - buffer swap/copy ioctls
++   - finish/flush
++   - state emission
++   - cmdbuffer management
++*/
++
++#include <errno.h>
++#include "main/glheader.h"
++#include "main/imports.h"
++#include "main/context.h"
++#include "main/api_arrayelt.h"
++#include "main/enums.h"
++#include "main/colormac.h"
++#include "main/light.h"
++#include "main/framebuffer.h"
++#include "main/simple_list.h"
++
++#include "swrast/swrast.h"
++#include "vbo/vbo.h"
++#include "tnl/tnl.h"
++#include "tnl/t_pipeline.h"
++#include "swrast_setup/swrast_setup.h"
++
++#include "dri_util.h"
++#include "vblank.h"
++
++#include "radeon_common.h"
++#include "radeon_bocs_wrapper.h"
++#include "radeon_lock.h"
++#include "radeon_drm.h"
++#include "radeon_mipmap_tree.h"
++
++#define DEBUG_CMDBUF         0
++
++/* =============================================================
++ * Scissoring
++ */
++
++static GLboolean intersect_rect(drm_clip_rect_t * out,
++				drm_clip_rect_t * a, drm_clip_rect_t * b)
++{
++	*out = *a;
++	if (b->x1 > out->x1)
++		out->x1 = b->x1;
++	if (b->y1 > out->y1)
++		out->y1 = b->y1;
++	if (b->x2 < out->x2)
++		out->x2 = b->x2;
++	if (b->y2 < out->y2)
++		out->y2 = b->y2;
++	if (out->x1 >= out->x2)
++		return GL_FALSE;
++	if (out->y1 >= out->y2)
++		return GL_FALSE;
++	return GL_TRUE;
++}
++
++void radeonRecalcScissorRects(radeonContextPtr radeon)
++{
++	drm_clip_rect_t *out;
++	int i;
++
++	/* Grow cliprect store?
++	 */
++	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
++		while (radeon->state.scissor.numAllocedClipRects <
++		       radeon->numClipRects) {
++			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
++			radeon->state.scissor.numAllocedClipRects *= 2;
++		}
++
++		if (radeon->state.scissor.pClipRects)
++			FREE(radeon->state.scissor.pClipRects);
++
++		radeon->state.scissor.pClipRects =
++			MALLOC(radeon->state.scissor.numAllocedClipRects *
++			       sizeof(drm_clip_rect_t));
++
++		if (radeon->state.scissor.pClipRects == NULL) {
++			radeon->state.scissor.numAllocedClipRects = 0;
++			return;
++		}
++	}
++
++	out = radeon->state.scissor.pClipRects;
++	radeon->state.scissor.numClipRects = 0;
++
++	for (i = 0; i < radeon->numClipRects; i++) {
++		if (intersect_rect(out,
++				   &radeon->pClipRects[i],
++				   &radeon->state.scissor.rect)) {
++			radeon->state.scissor.numClipRects++;
++			out++;
++		}
++	}
++}
++
++/**
++ * Update cliprects and scissors.
++ */
++void radeonSetCliprects(radeonContextPtr radeon)
++{
++	__DRIdrawablePrivate *const drawable = radeon->dri.drawable;
++	__DRIdrawablePrivate *const readable = radeon->dri.readable;
++	GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate;
++	GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate;
++
++	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
++		if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
++			/* Can't ignore 2d windows if we are page flipping. */
++			if (drawable->numBackClipRects == 0 || radeon->doPageFlip ||
++			    radeon->sarea->pfCurrentPage == 1) {
++				radeon->numClipRects = drawable->numClipRects;
++				radeon->pClipRects = drawable->pClipRects;
++			} else {
++				radeon->numClipRects = drawable->numBackClipRects;
++				radeon->pClipRects = drawable->pBackClipRects;
++			}
++		} else {
++			/* front buffer (or none, or multiple buffers */
++			radeon->numClipRects = drawable->numClipRects;
++			radeon->pClipRects = drawable->pClipRects;
++		}
++	}
++	
++	if ((draw_fb->Width != drawable->w) ||
++	    (draw_fb->Height != drawable->h)) {
++		_mesa_resize_framebuffer(radeon->glCtx, draw_fb,
++					 drawable->w, drawable->h);
++		draw_fb->Initialized = GL_TRUE;
++	}
++
++	if (drawable != readable) {
++		if ((read_fb->Width != readable->w) ||
++		    (read_fb->Height != readable->h)) {
++			_mesa_resize_framebuffer(radeon->glCtx, read_fb,
++						 readable->w, readable->h);
++			read_fb->Initialized = GL_TRUE;
++		}
++	}
++
++	if (radeon->state.scissor.enabled)
++		radeonRecalcScissorRects(radeon);
++
++	radeon->lastStamp = drawable->lastStamp;
++}
++
++void radeonUpdateScissor( GLcontext *ctx )
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++
++	if ( rmesa->dri.drawable ) {
++		__DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++      
++		int x = ctx->Scissor.X;
++		int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
++		int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
++		int h = dPriv->h - ctx->Scissor.Y - 1;
++
++		rmesa->state.scissor.rect.x1 = x + dPriv->x;
++		rmesa->state.scissor.rect.y1 = y + dPriv->y;
++		rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
++		rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
++
++		radeonRecalcScissorRects( rmesa );
++	}
++}
++
++/* =============================================================
++ * Scissoring
++ */
++
++void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
++{
++	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
++	if (ctx->Scissor.Enabled) {
++		/* We don't pipeline cliprect changes */
++		radeon_firevertices(radeon);
++		radeonUpdateScissor(ctx);
++	}
++}
++
++
++/* ================================================================
++ * SwapBuffers with client-side throttling
++ */
++
++static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
++{
++	drm_radeon_getparam_t gp;
++	int ret;
++	uint32_t frame;
++
++	gp.param = RADEON_PARAM_LAST_FRAME;
++	gp.value = (int *)&frame;
++	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
++				  &gp, sizeof(gp));
++	if (ret) {
++		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
++			ret);
++		exit(1);
++	}
++
++	return frame;
++}
++
++uint32_t radeonGetAge(radeonContextPtr radeon)
++{
++	drm_radeon_getparam_t gp;
++	int ret;
++	uint32_t age;
++
++	gp.param = RADEON_PARAM_LAST_CLEAR;
++	gp.value = (int *)&age;
++	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
++				  &gp, sizeof(gp));
++	if (ret) {
++		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
++			ret);
++		exit(1);
++	}
++
++	return age;
++}
++
++static void radeonEmitIrqLocked(radeonContextPtr radeon)
++{
++	drm_radeon_irq_emit_t ie;
++	int ret;
++
++	ie.irq_seq = &radeon->iw.irq_seq;
++	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
++				  &ie, sizeof(ie));
++	if (ret) {
++		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
++			ret);
++		exit(1);
++	}
++}
++
++static void radeonWaitIrq(radeonContextPtr radeon)
++{
++	int ret;
++
++	do {
++		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
++				      &radeon->iw, sizeof(radeon->iw));
++	} while (ret && (errno == EINTR || errno == EBUSY));
++
++	if (ret) {
++		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
++			ret);
++		exit(1);
++	}
++}
++
++static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
++{
++	drm_radeon_sarea_t *sarea = radeon->sarea;
++
++	if (radeon->do_irqs) {
++		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
++			if (!radeon->irqsEmitted) {
++				while (radeonGetLastFrame(radeon) <
++				       sarea->last_frame) ;
++			} else {
++				UNLOCK_HARDWARE(radeon);
++				radeonWaitIrq(radeon);
++				LOCK_HARDWARE(radeon);
++			}
++			radeon->irqsEmitted = 10;
++		}
++
++		if (radeon->irqsEmitted) {
++			radeonEmitIrqLocked(radeon);
++			radeon->irqsEmitted--;
++		}
++	} else {
++		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
++			UNLOCK_HARDWARE(radeon);
++			if (radeon->do_usleeps)
++				DO_USLEEP(1);
++			LOCK_HARDWARE(radeon);
++		}
++	}
++}
++
++/* wait for idle */
++void radeonWaitForIdleLocked(radeonContextPtr radeon)
++{
++	int ret;
++	int i = 0;
++
++	do {
++		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
++		if (ret)
++			DO_USLEEP(1);
++	} while (ret && ++i < 100);
++
++	if (ret < 0) {
++		UNLOCK_HARDWARE(radeon);
++		fprintf(stderr, "Error: R300 timed out... exiting\n");
++		exit(-1);
++	}
++}
++
++static void radeonWaitForIdle(radeonContextPtr radeon)
++{
++	LOCK_HARDWARE(radeon);
++	radeonWaitForIdleLocked(radeon);
++	UNLOCK_HARDWARE(radeon);
++}
++
++
++/* Copy the back color buffer to the front color buffer.
++ */
++void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
++		       const drm_clip_rect_t	  *rect)
++{
++	radeonContextPtr rmesa;
++	GLint nbox, i, ret;
++	GLboolean   missed_target;
++	int64_t ust;
++	__DRIscreenPrivate *psp;
++   
++	assert(dPriv);
++	assert(dPriv->driContextPriv);
++	assert(dPriv->driContextPriv->driverPrivate);
++   
++	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
++
++	if ( RADEON_DEBUG & DEBUG_IOCTL ) {
++		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
++	}
++
++	radeon_firevertices(rmesa);
++	LOCK_HARDWARE( rmesa );
++
++	/* Throttle the frame rate -- only allow one pending swap buffers
++	 * request at a time.
++	 */
++	radeonWaitForFrameCompletion( rmesa );
++	if (!rect)
++	{
++		UNLOCK_HARDWARE( rmesa );
++		driWaitForVBlank( dPriv, & missed_target );
++		LOCK_HARDWARE( rmesa );
++	}
++
++	nbox = dPriv->numClipRects; /* must be in locked region */
++
++	for ( i = 0 ; i < nbox ; ) {
++		GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
++		drm_clip_rect_t *box = dPriv->pClipRects;
++		drm_clip_rect_t *b = rmesa->sarea->boxes;
++		GLint n = 0;
++
++		for ( ; i < nr ; i++ ) {
++
++			*b = box[i];
++
++			if (rect)
++			{
++				if (rect->x1 > b->x1)
++					b->x1 = rect->x1;
++				if (rect->y1 > b->y1)
++					b->y1 = rect->y1;
++				if (rect->x2 < b->x2)
++					b->x2 = rect->x2;
++				if (rect->y2 < b->y2)
++					b->y2 = rect->y2;
++
++				if (b->x1 >= b->x2 || b->y1 >= b->y2)
++					continue;
++			}
++
++			b++;
++			n++;
++		}
++		rmesa->sarea->nbox = n;
++
++		if (!n)
++			continue;
++
++		ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
++
++		if ( ret ) {
++			fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
++			UNLOCK_HARDWARE( rmesa );
++			exit( 1 );
++		}
++	}
++
++	UNLOCK_HARDWARE( rmesa );
++	if (!rect)
++	{
++		psp = dPriv->driScreenPriv;
++		rmesa->swap_count++;
++		(*psp->systemTime->getUST)( & ust );
++		if ( missed_target ) {
++			rmesa->swap_missed_count++;
++			rmesa->swap_missed_ust = ust - rmesa->swap_ust;
++		}
++
++		rmesa->swap_ust = ust;
++		rmesa->hw.all_dirty = GL_TRUE;
++
++	}
++}
++
++void radeonPageFlip( __DRIdrawablePrivate *dPriv )
++{
++	radeonContextPtr rmesa;
++	GLint ret;
++	GLboolean   missed_target;
++	__DRIscreenPrivate *psp;
++	struct radeon_renderbuffer *rrb;
++	GLframebuffer *fb = dPriv->driverPrivate;
++	
++	assert(dPriv);
++	assert(dPriv->driContextPriv);
++	assert(dPriv->driContextPriv->driverPrivate);
++
++	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
++	rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++
++	psp = dPriv->driScreenPriv;
++
++	if ( RADEON_DEBUG & DEBUG_IOCTL ) {
++		fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
++			rmesa->sarea->pfCurrentPage);
++	}
++
++	radeon_firevertices(rmesa);
++
++	LOCK_HARDWARE( rmesa );
++
++	if (!dPriv->numClipRects) {
++		UNLOCK_HARDWARE(rmesa);
++		usleep(10000);	/* throttle invisible client 10ms */
++		return;
++	}
++
++	drm_clip_rect_t *box = dPriv->pClipRects;
++	drm_clip_rect_t *b = rmesa->sarea->boxes;
++	b[0] = box[0];
++	rmesa->sarea->nbox = 1;
++
++	/* Throttle the frame rate -- only allow a few pending swap buffers
++	 * request at a time.
++	 */
++	radeonWaitForFrameCompletion( rmesa );
++	UNLOCK_HARDWARE( rmesa );
++	driWaitForVBlank( dPriv, & missed_target );
++	if ( missed_target ) {
++		rmesa->swap_missed_count++;
++		(void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
++	}
++	LOCK_HARDWARE( rmesa );
++
++	ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
++	
++	UNLOCK_HARDWARE( rmesa );
++
++	if ( ret ) {
++		fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
++		exit( 1 );
++	}
++
++	rmesa->swap_count++;
++	(void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
++	
++	/* Get ready for drawing next frame.  Update the renderbuffers'
++	 * flippedOffset/Pitch fields so we draw into the right place.
++	 */
++	//	driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
++	//			     rmesa->sarea->pfCurrentPage);
++	
++	rmesa->state.color.rrb = rrb;
++
++	if (rmesa->vtbl.update_draw_buffer)
++		rmesa->vtbl.update_draw_buffer(rmesa->glCtx);
++}
++
++
++/**
++ * Swap front and back buffer.
++ */
++void radeonSwapBuffers(__DRIdrawablePrivate * dPriv)
++{
++	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
++		radeonContextPtr radeon;
++		GLcontext *ctx;
++
++		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
++		ctx = radeon->glCtx;
++
++		if (ctx->Visual.doubleBufferMode) {
++			_mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
++			if (radeon->doPageFlip) {
++				radeonPageFlip(dPriv);
++			} else {
++				radeonCopyBuffer(dPriv, NULL);
++			}
++		}
++	} else {
++		/* XXX this shouldn't be an error but we can't handle it for now */
++		_mesa_problem(NULL, "%s: drawable has no context!",
++			      __FUNCTION__);
++	}
++}
++
++void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
++			 int x, int y, int w, int h )
++{
++	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
++		radeonContextPtr radeon;
++		GLcontext *ctx;
++
++		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
++		ctx = radeon->glCtx;
++
++		if (ctx->Visual.doubleBufferMode) {
++			drm_clip_rect_t rect;
++			rect.x1 = x + dPriv->x;
++			rect.y1 = (dPriv->h - y - h) + dPriv->y;
++			rect.x2 = rect.x1 + w;
++			rect.y2 = rect.y1 + h;
++			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
++			radeonCopyBuffer(dPriv, &rect);
++		}
++	} else {
++		/* XXX this shouldn't be an error but we can't handle it for now */
++		_mesa_problem(NULL, "%s: drawable has no context!",
++			      __FUNCTION__);
++	}
++}
++
++
++static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state )
++{
++	int i;
++	int dwords = (*state->check)(radeon->glCtx, state);
++
++	fprintf(stderr, "emit %s %d/%d\n", state->name, state->cmd_size, dwords);
++
++	if (RADEON_DEBUG & DEBUG_VERBOSE) 
++		for (i = 0 ; i < dwords; i++) 
++			fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
++
++}
++
++static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
++{
++	BATCH_LOCALS(radeon);
++	struct radeon_state_atom *atom;
++	int dwords;
++
++	if (radeon->vtbl.pre_emit_atoms)
++		radeon->vtbl.pre_emit_atoms(radeon);
++
++	/* Emit actual atoms */
++	foreach(atom, &radeon->hw.atomlist) {
++		if ((atom->dirty || radeon->hw.all_dirty) == dirty) {
++			dwords = (*atom->check) (radeon->glCtx, atom);
++			if (dwords) {
++				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
++					radeon_print_state_atom(radeon, atom);
++				}
++				if (atom->emit) {
++					(*atom->emit)(radeon->glCtx, atom);
++				} else {
++					BEGIN_BATCH_NO_AUTOSTATE(dwords);
++					OUT_BATCH_TABLE(atom->cmd, dwords);
++					END_BATCH();
++				}
++				atom->dirty = GL_FALSE;
++			} else {
++				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
++					fprintf(stderr, "  skip state %s\n",
++						atom->name);
++				}
++			}
++		}
++	}
++   
++	COMMIT_BATCH();
++}
++
++void radeonEmitState(radeonContextPtr radeon)
++{
++	if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
++		fprintf(stderr, "%s\n", __FUNCTION__);
++
++	if (radeon->vtbl.pre_emit_state)
++		radeon->vtbl.pre_emit_state(radeon);
++
++	/* this code used to return here but now it emits zbs */
++	if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
++		return;
++
++	/* To avoid going across the entire set of states multiple times, just check
++	 * for enough space for the case of emitting all state, and inline the
++	 * radeonAllocCmdBuf code here without all the checks.
++	 */
++	rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__);
++
++	if (!radeon->cmdbuf.cs->cdw) {
++		if (RADEON_DEBUG & DEBUG_STATE)
++			fprintf(stderr, "Begin reemit state\n");
++		
++		radeonEmitAtoms(radeon, GL_FALSE);
++	}
++
++	if (RADEON_DEBUG & DEBUG_STATE)
++		fprintf(stderr, "Begin dirty state\n");
++
++	radeonEmitAtoms(radeon, GL_TRUE);
++	radeon->hw.is_dirty = GL_FALSE;
++	radeon->hw.all_dirty = GL_FALSE;
++
++}
++
++
++void radeonFlush(GLcontext *ctx)
++{
++	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
++	if (RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
++
++	if (radeon->dma.flush)
++		radeon->dma.flush( ctx );
++
++	radeonEmitState(radeon);
++   
++	if (radeon->cmdbuf.cs->cdw)
++		rcommonFlushCmdBuf(radeon, __FUNCTION__);
++}
++
++/* Make sure all commands have been sent to the hardware and have
++ * completed processing.
++ */
++void radeonFinish(GLcontext * ctx)
++{
++	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
++	struct gl_framebuffer *fb = ctx->DrawBuffer;
++	int i;
++
++	radeonFlush(ctx);
++
++	if (radeon->radeonScreen->kernel_mm) {
++		for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
++			struct radeon_renderbuffer *rrb;
++			rrb = (struct radeon_renderbuffer *)fb->_ColorDrawBuffers[i];
++			if (rrb->bo)
++				radeon_bo_wait(rrb->bo);
++		}
++	} else if (radeon->do_irqs) {
++		LOCK_HARDWARE(radeon);
++		radeonEmitIrqLocked(radeon);
++		UNLOCK_HARDWARE(radeon);
++		radeonWaitIrq(radeon);
++	} else {
++		radeonWaitForIdle(radeon);
++	}
++}
++
++/* cmdbuffer */
++/**
++ * Send the current command buffer via ioctl to the hardware.
++ */
++int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
++{
++	int ret = 0;
++
++	if (rmesa->cmdbuf.flushing) {
++		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
++		exit(-1);
++	}
++	rmesa->cmdbuf.flushing = 1;
++
++	if (RADEON_DEBUG & DEBUG_IOCTL) {
++		fprintf(stderr, "%s from %s - %i cliprects\n",
++			__FUNCTION__, caller, rmesa->numClipRects);
++	}
++
++	if (rmesa->cmdbuf.cs->cdw) {
++		ret = radeon_cs_emit(rmesa->cmdbuf.cs);
++		rmesa->hw.all_dirty = GL_TRUE;
++	}
++	radeon_cs_erase(rmesa->cmdbuf.cs);
++	rmesa->cmdbuf.flushing = 0;
++	return ret;
++}
++
++int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
++{
++	int ret;
++
++	radeonReleaseDmaRegion(rmesa);
++	
++	LOCK_HARDWARE(rmesa);
++	ret = rcommonFlushCmdBufLocked(rmesa, caller);
++	UNLOCK_HARDWARE(rmesa);
++
++	if (ret) {
++		fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret);
++		_mesa_exit(ret);
++	}
++
++	return ret;
++}
++
++/**
++ * Make sure that enough space is available in the command buffer
++ * by flushing if necessary.
++ *
++ * \param dwords The number of dwords we need to be free on the command buffer
++ */
++void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
++{
++	if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size ||
++	    radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
++		rcommonFlushCmdBuf(rmesa, caller);
++	}
++}
++
++void rcommonInitCmdBuf(radeonContextPtr rmesa)
++{
++	GLuint size;
++	/* Initialize command buffer */
++	size = 256 * driQueryOptioni(&rmesa->optionCache,
++				     "command_buffer_size");
++	if (size < 2 * rmesa->hw.max_state_size) {
++		size = 2 * rmesa->hw.max_state_size + 65535;
++	}
++	if (size > 64 * 256)
++		size = 64 * 256;
++
++	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) {
++		fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n",
++			sizeof(drm_r300_cmd_header_t));
++		fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n",
++			sizeof(drm_radeon_cmd_buffer_t));
++		fprintf(stderr,
++			"Allocating %d bytes command buffer (max state is %d bytes)\n",
++			size * 4, rmesa->hw.max_state_size * 4);
++	}
++
++	if (rmesa->radeonScreen->kernel_mm) {
++		int fd = rmesa->radeonScreen->driScreen->fd;
++		rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
++	} else {
++		rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa);
++	}
++	if (rmesa->cmdbuf.csm == NULL) {
++		/* FIXME: fatal error */
++		return;
++	}
++	rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
++	assert(rmesa->cmdbuf.cs != NULL);
++	rmesa->cmdbuf.size = size;
++	
++	if (!rmesa->radeonScreen->kernel_mm) {
++		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
++		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
++	} else {
++		struct drm_radeon_gem_info mminfo;
++
++		if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
++		{
++			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_size);
++			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
++		}
++	}
++
++}
++/**
++ * Destroy the command buffer
++ */
++void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
++{
++	radeon_cs_destroy(rmesa->cmdbuf.cs);
++	if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) {
++		radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
++	} else {
++		radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm);
++	}
++}
++
++void rcommonBeginBatch(radeonContextPtr rmesa, int n,
++		       int dostate,
++		       const char *file,
++		       const char *function,
++		       int line)
++{
++	rcommonEnsureCmdBufSpace(rmesa, n, function);
++	if (!rmesa->cmdbuf.cs->cdw && dostate) {
++		if (RADEON_DEBUG & DEBUG_IOCTL)
++			fprintf(stderr, "Reemit state after flush (from %s)\n", function);
++		radeonEmitState(rmesa);
++	}
++	radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
++
++        if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL)
++                fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
++                        n, rmesa->cmdbuf.cs->cdw, function, line);
++
++}
++
++
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
+new file mode 100644
+index 0000000..ead0f55
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_common.h
+@@ -0,0 +1,55 @@
++#ifndef COMMON_MISC_H
++#define COMMON_MISC_H
++
++#include "radeon_common_context.h"
++#include "radeon_dma.h"
++#include "radeon_texture.h"
++
++void radeonRecalcScissorRects(radeonContextPtr radeon);
++void radeonSetCliprects(radeonContextPtr radeon);
++void radeonUpdateScissor( GLcontext *ctx );
++void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
++
++void radeonWaitForIdleLocked(radeonContextPtr radeon);
++extern uint32_t radeonGetAge(radeonContextPtr radeon);
++void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
++		       const drm_clip_rect_t	  *rect);
++void radeonPageFlip( __DRIdrawablePrivate *dPriv );
++void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
++void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
++			 int x, int y, int w, int h );
++
++void radeonUpdatePageFlipping(radeonContextPtr rmesa);
++
++void radeonFlush(GLcontext *ctx);
++void radeonFinish(GLcontext * ctx);
++void radeonEmitState(radeonContextPtr radeon);
++
++static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
++{
++	struct radeon_renderbuffer *rrb;
++	rrb = rmesa->state.depth.rrb;
++	if (!rrb)
++		return NULL;
++
++	return rrb;
++}
++
++static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
++{
++	struct radeon_renderbuffer *rrb;
++	GLframebuffer *fb = rmesa->dri.drawable->driverPrivate;
++
++	rrb = rmesa->state.color.rrb;
++	if (rmesa->radeonScreen->driScreen->dri2.enabled) {
++		rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	}
++	if (!rrb)
++		return NULL;
++	return rrb;
++}
++
++#include "radeon_cmdbuf.h"
++
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
+new file mode 100644
+index 0000000..1b8a05d
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
+@@ -0,0 +1,589 @@
++/**************************************************************************
++
++Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
++                     VA Linux Systems Inc., Fremont, California.
++Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++
++The Weather Channel (TM) funded Tungsten Graphics to develop the
++initial release of the Radeon 8500 driver under the XFree86 license.
++This notice must be preserved.
++
++All Rights Reserved.
++
++Permission is hereby granted, free of charge, to any person obtaining
++a copy of this software and associated documentation files (the
++"Software"), to deal in the Software without restriction, including
++without limitation the rights to use, copy, modify, merge, publish,
++distribute, sublicense, and/or sell copies of the Software, and to
++permit persons to whom the Software is furnished to do so, subject to
++the following conditions:
++
++The above copyright notice and this permission notice (including the
++next paragraph) shall be included in all copies or substantial
++portions of the Software.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++
++**************************************************************************/
++
++#include "radeon_common.h"
++#include "xmlpool.h"		/* for symbolic values of enum-type options */
++#include "utils.h"
++#include "drirenderbuffer.h"
++#include "vblank.h"
++#include "main/state.h"
++
++#define DRIVER_DATE "20090101"
++
++#ifndef RADEON_DEBUG
++int RADEON_DEBUG = (0);
++#endif
++
++/* Return various strings for glGetString().
++ */
++static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
++{
++	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
++	static char buffer[128];
++
++	switch (name) {
++	case GL_VENDOR:
++		if (IS_R300_CLASS(radeon->radeonScreen))
++			return (GLubyte *) "DRI R300 Project";
++		else
++			return (GLubyte *) "Tungsten Graphics, Inc.";
++
++	case GL_RENDERER:
++	{
++		unsigned offset;
++		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
++			radeon->radeonScreen->AGPMode;
++		const char* chipname;
++
++		if (IS_R300_CLASS(radeon->radeonScreen))
++			chipname = "R300";
++		else if (IS_R200_CLASS(radeon->radeonScreen))
++			chipname = "R200";
++		else
++			chipname = "R100";
++
++		offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
++					      agp_mode);
++
++		if (IS_R300_CLASS(radeon->radeonScreen)) {
++			sprintf(&buffer[offset], " %sTCL",
++				(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
++				? "" : "NO-");
++		} else {
++			sprintf(&buffer[offset], " %sTCL",
++				!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
++				? "" : "NO-");
++		}
++
++		if (radeon->radeonScreen->driScreen->dri2.enabled)
++			strcat(buffer, " DRI2");
++
++		return (GLubyte *) buffer;
++	}
++
++	default:
++		return NULL;
++	}
++}
++
++/* Initialize the driver's misc functions.
++ */
++static void radeonInitDriverFuncs(struct dd_function_table *functions)
++{
++	functions->GetString = radeonGetString;
++}
++
++/**
++ * Create and initialize all common fields of the context,
++ * including the Mesa context itself.
++ */
++GLboolean radeonInitContext(radeonContextPtr radeon,
++			    struct dd_function_table* functions,
++			    const __GLcontextModes * glVisual,
++			    __DRIcontextPrivate * driContextPriv,
++			    void *sharedContextPrivate)
++{
++	__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
++	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
++	GLcontext* ctx;
++	GLcontext* shareCtx;
++	int fthrottle_mode;
++
++	/* Fill in additional standard functions. */
++	radeonInitDriverFuncs(functions);
++
++	radeon->radeonScreen = screen;
++	/* Allocate and initialize the Mesa context */
++	if (sharedContextPrivate)
++		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
++	else
++		shareCtx = NULL;
++	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
++					    functions, (void *)radeon);
++	if (!radeon->glCtx)
++		return GL_FALSE;
++
++	ctx = radeon->glCtx;
++	driContextPriv->driverPrivate = radeon;
++
++	/* DRI fields */
++	radeon->dri.context = driContextPriv;
++	radeon->dri.screen = sPriv;
++	radeon->dri.drawable = NULL;
++	radeon->dri.readable = NULL;
++	radeon->dri.hwContext = driContextPriv->hHWContext;
++	radeon->dri.hwLock = &sPriv->pSAREA->lock;
++	radeon->dri.fd = sPriv->fd;
++	radeon->dri.drmMinor = sPriv->drm_version.minor;
++
++	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
++					       screen->sarea_priv_offset);
++
++	/* Setup IRQs */
++	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
++	radeon->iw.irq_seq = -1;
++	radeon->irqsEmitted = 0;
++	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
++			  radeon->radeonScreen->irq);
++
++	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
++
++	if (!radeon->do_irqs)
++		fprintf(stderr,
++			"IRQ's not enabled, falling back to %s: %d %d\n",
++			radeon->do_usleeps ? "usleeps" : "busy waits",
++			fthrottle_mode, radeon->radeonScreen->irq);
++
++	(*sPriv->systemTime->getUST) (&radeon->swap_ust);
++
++	return GL_TRUE;
++}
++
++/**
++ * Cleanup common context fields.
++ * Called by r200DestroyContext/r300DestroyContext
++ */
++void radeonCleanupContext(radeonContextPtr radeon)
++{
++#ifdef RADEON_BO_TRACK
++	FILE *track;
++#endif
++	struct radeon_renderbuffer *rb;
++	GLframebuffer *fb;
++
++	/* free the Mesa context */
++	_mesa_destroy_context(radeon->glCtx);
++	
++	fb = (void*)radeon->dri.drawable->driverPrivate;
++	rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	fb = (void*)radeon->dri.readable->driverPrivate;
++	rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++	if (rb && rb->bo) {
++		radeon_bo_unref(rb->bo);
++		rb->bo = NULL;
++	}
++	
++	/* _mesa_destroy_context() might result in calls to functions that
++	 * depend on the DriverCtx, so don't set it to NULL before.
++	 *
++	 * radeon->glCtx->DriverCtx = NULL;
++	 */
++
++
++
++	/* free the option cache */
++	driDestroyOptionCache(&radeon->optionCache);
++
++	rcommonDestroyCmdBuf(radeon);
++
++	if (radeon->state.scissor.pClipRects) {
++		FREE(radeon->state.scissor.pClipRects);
++		radeon->state.scissor.pClipRects = 0;
++	}
++#ifdef RADEON_BO_TRACK
++	track = fopen("/tmp/tracklog", "w");
++	if (track) {
++		radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
++		fclose(track);
++	}
++#endif
++}
++
++/* Force the context `c' to be unbound from its buffer.
++ */
++GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv)
++{
++	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
++
++	if (RADEON_DEBUG & DEBUG_DRI)
++		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
++			radeon->glCtx);
++
++	return GL_TRUE;
++}
++
++
++static void
++radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
++					GLframebuffer *draw)
++{
++	/* if radeon->fake */
++	struct radeon_renderbuffer *rb;
++
++	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->frontOffset,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->backOffset,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->depthOffset,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_STENCIL].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->depthOffset,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
++	}
++}
++
++static void
++radeon_make_renderbuffer_current(radeonContextPtr radeon,
++					GLframebuffer *draw)
++{
++	int size = 4096*4096*4;
++	/* if radeon->fake */
++	struct radeon_renderbuffer *rb;
++	
++	if (radeon->radeonScreen->kernel_mm) {
++		radeon_make_kernel_renderbuffer_current(radeon, draw);
++		return;
++	}
++			
++
++	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->frontOffset +
++						radeon->radeonScreen->fbLocation,
++						size,
++						4096,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->backOffset +
++						radeon->radeonScreen->fbLocation,
++						size,
++						4096,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->depthOffset +
++						radeon->radeonScreen->fbLocation,
++						size,
++						4096,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_STENCIL].Renderbuffer)) {
++		if (!rb->bo) {
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						radeon->radeonScreen->depthOffset +
++						radeon->radeonScreen->fbLocation,
++						size,
++						4096,
++						RADEON_GEM_DOMAIN_VRAM,
++						0);
++		}
++		rb->cpp = radeon->radeonScreen->cpp;
++		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
++	}
++}
++
++
++void
++radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
++{
++	unsigned int attachments[10];
++	__DRIbuffer *buffers;
++	__DRIscreen *screen;
++	struct radeon_renderbuffer *rb;
++	int i, count;
++	GLframebuffer *draw;
++	radeonContextPtr radeon;
++
++	if (RADEON_DEBUG & DEBUG_DRI)
++	    fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
++	
++	draw = drawable->driverPrivate;
++	screen = context->driScreenPriv;
++	radeon = (radeonContextPtr) context->driverPrivate;
++	i = 0;
++	if ((rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
++		attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
++		attachments[i++] = __DRI_BUFFER_BACK_LEFT;
++	}
++	if ((rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer)) {
++		attachments[i++] = __DRI_BUFFER_DEPTH;
++	}
++	
++	buffers = (*screen->dri2.loader->getBuffers)(drawable,
++						     &drawable->w,
++						     &drawable->h,
++						     attachments, i,
++						     &count,
++						     drawable->loaderPrivate);
++	if (buffers == NULL)
++		return;
++
++	/* set one cliprect to cover the whole drawable */
++	drawable->x = 0;
++	drawable->y = 0;
++	drawable->backX = 0;
++	drawable->backY = 0;
++	drawable->numClipRects = 1;
++	drawable->pClipRects[0].x1 = 0;
++	drawable->pClipRects[0].y1 = 0;
++	drawable->pClipRects[0].x2 = drawable->w;
++	drawable->pClipRects[0].y2 = drawable->h;
++	drawable->numBackClipRects = 1;
++	drawable->pBackClipRects[0].x1 = 0;
++	drawable->pBackClipRects[0].y1 = 0;
++	drawable->pBackClipRects[0].x2 = drawable->w;
++	drawable->pBackClipRects[0].y2 = drawable->h;
++	for (i = 0; i < count; i++) {
++		switch (buffers[i].attachment) {
++		case __DRI_BUFFER_FRONT_LEFT:
++			rb = (void *)draw->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++			if (rb->bo) {
++				radeon_bo_unref(rb->bo);
++				rb->bo = NULL;
++			}
++			rb->cpp = buffers[i].cpp;
++			rb->pitch = buffers[i].pitch;
++			rb->width = drawable->w;
++			rb->height = drawable->h;
++			rb->has_surface = 0;
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						buffers[i].name,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						buffers[i].flags);
++			if (rb->bo == NULL) {
++				fprintf(stderr, "failled to attach front %d\n",
++					buffers[i].name);
++			}
++			break;
++		case __DRI_BUFFER_BACK_LEFT:
++			rb = (void *)draw->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++			if (rb->bo) {
++				radeon_bo_unref(rb->bo);
++				rb->bo = NULL;
++			}
++			rb->cpp = buffers[i].cpp;
++			rb->pitch = buffers[i].pitch;
++			rb->width = drawable->w;
++			rb->height = drawable->h;
++			rb->has_surface = 0;
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						buffers[i].name,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						buffers[i].flags);
++			break;
++		case __DRI_BUFFER_DEPTH:
++			rb = (void *)draw->Attachment[BUFFER_DEPTH].Renderbuffer;
++			if (rb->bo) {
++				radeon_bo_unref(rb->bo);
++				rb->bo = NULL;
++			}
++			rb->cpp = buffers[i].cpp;
++			rb->pitch = buffers[i].pitch;
++			rb->width = drawable->w;
++			rb->height = drawable->h;
++			rb->has_surface = 0;
++			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
++						buffers[i].name,
++						0,
++						0,
++						RADEON_GEM_DOMAIN_VRAM,
++						buffers[i].flags);
++			break;
++		case __DRI_BUFFER_STENCIL:
++			break;
++		case __DRI_BUFFER_ACCUM:
++		default:
++			fprintf(stderr,
++				"unhandled buffer attach event, attacment type %d\n",
++				buffers[i].attachment);
++			return;
++		}
++	}
++	radeon = (radeonContextPtr) context->driverPrivate;
++	driUpdateFramebufferSize(radeon->glCtx, drawable);
++}
++
++/* Force the context `c' to be the current context and associate with it
++ * buffer `b'.
++ */
++GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
++			    __DRIdrawablePrivate * driDrawPriv,
++			    __DRIdrawablePrivate * driReadPriv)
++{
++	radeonContextPtr radeon;
++	GLframebuffer *dfb, *rfb;
++
++	if (!driContextPriv) {
++		if (RADEON_DEBUG & DEBUG_DRI)
++			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
++		_mesa_make_current(NULL, NULL, NULL);
++		return GL_TRUE;
++	}
++	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
++	dfb = driDrawPriv->driverPrivate;
++	rfb = driReadPriv->driverPrivate;
++
++	if (driContextPriv->driScreenPriv->dri2.enabled) {    
++		radeon_update_renderbuffers(driContextPriv, driDrawPriv);
++		if (driDrawPriv != driReadPriv)
++			radeon_update_renderbuffers(driContextPriv, driReadPriv);
++		radeon->state.color.rrb =
++			(void *)dfb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++		radeon->state.depth.rrb =
++			(void *)dfb->Attachment[BUFFER_DEPTH].Renderbuffer;
++	} else {
++		radeon_make_renderbuffer_current(radeon, dfb);
++	}
++
++
++	if (RADEON_DEBUG & DEBUG_DRI)
++	     fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, dfb, rfb);
++
++	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
++	if (driReadPriv != driDrawPriv)
++		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
++
++
++	
++	_mesa_make_current(radeon->glCtx, dfb, rfb);
++
++	if (radeon->dri.drawable != driDrawPriv) {
++		if (driDrawPriv->swap_interval == (unsigned)-1) {
++			driDrawPriv->vblFlags =
++				(radeon->radeonScreen->irq != 0)
++				? driGetDefaultVBlankFlags(&radeon->
++							   optionCache)
++					: VBLANK_FLAG_NO_IRQ;
++
++			driDrawableInitVBlank(driDrawPriv);
++		}
++	}
++
++	radeon->dri.readable = driReadPriv;
++
++	if (radeon->dri.drawable != driDrawPriv ||
++	    radeon->lastStamp != driDrawPriv->lastStamp) {
++		radeon->dri.drawable = driDrawPriv;
++
++		radeonSetCliprects(radeon);
++		radeon->vtbl.update_viewport_offset(radeon->glCtx);
++	}
++
++	_mesa_update_state(radeon->glCtx);
++
++	if (!driContextPriv->driScreenPriv->dri2.enabled) {    
++		radeonUpdatePageFlipping(radeon);
++	}
++
++	if (RADEON_DEBUG & DEBUG_DRI)
++		fprintf(stderr, "End %s\n", __FUNCTION__);
++	return GL_TRUE;
++}
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
+new file mode 100644
+index 0000000..a200e90
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
+@@ -0,0 +1,508 @@
++
++#ifndef COMMON_CONTEXT_H
++#define COMMON_CONTEXT_H
++
++#include "main/mm.h"
++#include "math/m_vector.h"
++#include "texmem.h"
++#include "tnl/t_context.h"
++#include "main/colormac.h"
++
++#include "radeon_screen.h"
++#include "radeon_drm.h"
++#include "dri_util.h"
++#include "tnl/t_vertex.h"
++
++/* This union is used to avoid warnings/miscompilation
++   with float to uint32_t casts due to strict-aliasing */
++typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
++
++struct radeon_context;
++typedef struct radeon_context radeonContextRec;
++typedef struct radeon_context *radeonContextPtr;
++
++
++#define TEX_0   0x1
++#define TEX_1   0x2
++#define TEX_2   0x4
++#define TEX_3	0x8
++#define TEX_4	0x10
++#define TEX_5	0x20
++
++/* Rasterizing fallbacks */
++/* See correponding strings in r200_swtcl.c */
++#define RADEON_FALLBACK_TEXTURE		0x0001
++#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
++#define RADEON_FALLBACK_STENCIL		0x0004
++#define RADEON_FALLBACK_RENDER_MODE	0x0008
++#define RADEON_FALLBACK_BLEND_EQ	0x0010
++#define RADEON_FALLBACK_BLEND_FUNC	0x0020
++#define RADEON_FALLBACK_DISABLE 	0x0040
++#define RADEON_FALLBACK_BORDER_MODE	0x0080
++
++#define R200_FALLBACK_TEXTURE           0x01
++#define R200_FALLBACK_DRAW_BUFFER       0x02
++#define R200_FALLBACK_STENCIL           0x04
++#define R200_FALLBACK_RENDER_MODE       0x08
++#define R200_FALLBACK_DISABLE           0x10
++#define R200_FALLBACK_BORDER_MODE       0x20
++
++#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
++#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
++#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
++#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
++#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
++#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
++#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
++#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
++#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
++
++/* The blit width for texture uploads
++ */
++#define BLIT_WIDTH_BYTES 1024
++
++/* Use the templated vertex format:
++ */
++#define COLOR_IS_RGBA
++#define TAG(x) radeon##x
++#include "tnl_dd/t_dd_vertex.h"
++#undef TAG
++
++struct radeon_renderbuffer
++{
++	struct gl_renderbuffer base;
++	struct radeon_bo *bo;
++	unsigned int cpp;
++	/* unsigned int offset; */
++	unsigned int pitch;
++	unsigned int width;
++	unsigned int height;
++
++	/* boo Xorg 6.8.2 compat */
++	int has_surface;
++
++	__DRIdrawablePrivate *dPriv;
++};
++
++struct radeon_colorbuffer_state {
++	GLuint clear;
++	int roundEnable;
++	struct radeon_renderbuffer *rrb;
++};
++
++struct radeon_depthbuffer_state {
++	GLuint clear;
++	GLfloat scale;
++	struct radeon_renderbuffer *rrb;
++};
++
++struct radeon_scissor_state {
++	drm_clip_rect_t rect;
++	GLboolean enabled;
++
++	GLuint numClipRects;	/* Cliprects active */
++	GLuint numAllocedClipRects;	/* Cliprects available */
++	drm_clip_rect_t *pClipRects;
++};
++
++struct radeon_stencilbuffer_state {
++	GLboolean hwBuffer;
++	GLuint clear;		/* rb3d_stencilrefmask value */
++};
++
++struct radeon_stipple_state {
++	GLuint mask[32];
++};
++
++struct radeon_state_atom {
++	struct radeon_state_atom *next, *prev;
++	const char *name;	/* for debug */
++	int cmd_size;		/* size in bytes */
++        GLuint idx;
++	GLuint is_tcl;
++        GLuint *cmd;		/* one or more cmd's */
++	GLuint *lastcmd;		/* one or more cmd's */
++	GLboolean dirty;	/* dirty-mark in emit_state_list */
++        int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */
++        void (*emit) (GLcontext *, struct radeon_state_atom *atom);
++};
++
++struct radeon_hw_state {
++  	/* Head of the linked list of state atoms. */
++	struct radeon_state_atom atomlist;
++	int max_state_size;	/* Number of bytes necessary for a full state emit. */
++	GLboolean is_dirty, all_dirty;
++};
++
++
++/* Texture related */
++typedef struct _radeon_texture_image radeon_texture_image;
++
++struct _radeon_texture_image {
++	struct gl_texture_image base;
++
++	/**
++	 * If mt != 0, the image is stored in hardware format in the
++	 * given mipmap tree. In this case, base.Data may point into the
++	 * mapping of the buffer object that contains the mipmap tree.
++	 *
++	 * If mt == 0, the image is stored in normal memory pointed to
++	 * by base.Data.
++	 */
++	struct _radeon_mipmap_tree *mt;
++	struct radeon_bo *bo;
++
++	int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
++	int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
++};
++
++
++static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
++{
++	return (radeon_texture_image*)image;
++}
++
++
++typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
++
++#define RADEON_TXO_MICRO_TILE               (1 << 3)
++
++/* Texture object in locally shared texture space.
++ */
++struct radeon_tex_obj {
++	struct gl_texture_object base;
++	struct _radeon_mipmap_tree *mt;
++
++	/**
++	 * This is true if we've verified that the mipmap tree above is complete
++	 * and so on.
++	 */
++	GLboolean validated;
++
++	GLuint override_offset;
++	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
++	GLuint tile_bits;	/* hw texture tile bits used on this texture */
++        struct radeon_bo *bo;
++
++	GLuint pp_txfilter;	/* hardware register values */
++	GLuint pp_txformat;
++	GLuint pp_txformat_x;
++	GLuint pp_txsize;	/* npot only */
++	GLuint pp_txpitch;	/* npot only */
++	GLuint pp_border_color;
++	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
++
++        GLuint pp_txfilter_1;	/*  r300 */
++
++	GLboolean border_fallback;
++
++
++};
++
++static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
++{
++	return (radeonTexObj*)texObj;
++}
++
++/* Need refcounting on dma buffers:
++ */
++struct radeon_dma_buffer {
++	int refcount;		/* the number of retained regions in buf */
++	drmBufPtr buf;
++};
++
++struct radeon_aos {
++	struct radeon_bo *bo; /** Buffer object where vertex data is stored */
++	int offset; /** Offset into buffer object, in bytes */
++	int components; /** Number of components per vertex */
++	int stride; /** Stride in dwords (may be 0 for repeating) */
++	int count; /** Number of vertices */
++};
++
++struct radeon_dma {
++        /* Active dma region.  Allocations for vertices and retained
++         * regions come from here.  Also used for emitting random vertices,
++         * these may be flushed by calling flush_current();
++         */
++        struct radeon_bo *current; /** Buffer that DMA memory is allocated from */
++        int current_used; /** Number of bytes allocated and forgotten about */
++        int current_vertexptr; /** End of active vertex region */
++
++        /**
++         * If current_vertexptr != current_used then flush must be non-zero.
++         * flush must be called before non-active vertex allocations can be
++         * performed.
++         */
++        void (*flush) (GLcontext *);
++
++        /* Number of "in-flight" DMA buffers, i.e. the number of buffers
++         * for which a DISCARD command is currently queued in the command buffer
++.
++         */
++        GLuint nr_released_bufs;
++};
++
++/* radeon_swtcl.c
++ */
++struct radeon_swtcl_info {
++
++	GLuint RenderIndex;
++	GLuint vertex_size;
++	GLubyte *verts;
++
++	/* Fallback rasterization functions
++	 */
++	GLuint hw_primitive;
++	GLenum render_primitive;
++	GLuint numverts;
++
++	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
++	GLuint vertex_attr_count;
++
++};
++
++struct radeon_ioctl {
++	GLuint vertex_offset;
++        struct radeon_bo *bo;
++	GLuint vertex_size;
++};
++
++#define RADEON_MAX_PRIMS 64
++
++struct radeon_prim {
++	GLuint start;
++	GLuint end;
++	GLuint prim;
++};
++
++static INLINE GLuint radeonPackColor(GLuint cpp,
++                                     GLubyte r, GLubyte g,
++                                     GLubyte b, GLubyte a)
++{
++	switch (cpp) {
++	case 2:
++		return PACK_COLOR_565(r, g, b);
++	case 4:
++		return PACK_COLOR_8888(a, r, g, b);
++	default:
++		return 0;
++	}
++}
++
++#define MAX_CMD_BUF_SZ (16*1024)
++
++#define MAX_DMA_BUF_SZ (64*1024)
++
++struct radeon_store {
++	GLuint statenr;
++	GLuint primnr;
++	char cmd_buf[MAX_CMD_BUF_SZ];
++	int cmd_used;
++	int elts_start;
++};
++
++struct radeon_dri_mirror {
++	__DRIcontextPrivate *context;	/* DRI context */
++	__DRIscreenPrivate *screen;	/* DRI screen */
++
++   /**
++    * DRI drawable bound to this context for drawing.
++    */
++	__DRIdrawablePrivate *drawable;
++
++   /**
++    * DRI drawable bound to this context for reading.
++    */
++	__DRIdrawablePrivate *readable;
++
++	drm_context_t hwContext;
++	drm_hw_lock_t *hwLock;
++	int fd;
++	int drmMinor;
++};
++
++#define DEBUG_TEXTURE	0x001
++#define DEBUG_STATE	0x002
++#define DEBUG_IOCTL	0x004
++#define DEBUG_PRIMS	0x008
++#define DEBUG_VERTS	0x010
++#define DEBUG_FALLBACKS	0x020
++#define DEBUG_VFMT	0x040
++#define DEBUG_CODEGEN	0x080
++#define DEBUG_VERBOSE	0x100
++#define DEBUG_DRI       0x200
++#define DEBUG_DMA       0x400
++#define DEBUG_SANITY    0x800
++#define DEBUG_SYNC      0x1000
++#define DEBUG_PIXEL     0x2000
++#define DEBUG_MEMORY    0x4000
++
++
++
++typedef void (*radeon_tri_func) (radeonContextPtr,
++				 radeonVertex *,
++				 radeonVertex *, radeonVertex *);
++
++typedef void (*radeon_line_func) (radeonContextPtr,
++				  radeonVertex *, radeonVertex *);
++
++typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
++
++struct radeon_state {
++	struct radeon_colorbuffer_state color;
++	struct radeon_depthbuffer_state depth;
++	struct radeon_scissor_state scissor;
++	struct radeon_stencilbuffer_state stencil;
++};
++
++/**
++ * This structure holds the command buffer while it is being constructed.
++ *
++ * The first batch of commands in the buffer is always the state that needs
++ * to be re-emitted when the context is lost. This batch can be skipped
++ * otherwise.
++ */
++struct radeon_cmdbuf {
++	struct radeon_cs_manager    *csm;
++	struct radeon_cs            *cs;
++	int size; /** # of dwords total */
++	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
++};
++
++struct radeon_context {
++   GLcontext *glCtx;
++   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
++  
++   /* Texture object bookkeeping
++    */
++   int                   texture_depth;
++   float                 initialMaxAnisotropy;
++
++  struct radeon_dma dma;
++  struct radeon_hw_state hw;
++   /* Rasterization and vertex state:
++    */
++   GLuint TclFallback;
++   GLuint Fallback;
++   GLuint NewGLState;
++   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
++
++   /* Page flipping */
++   GLuint doPageFlip;
++
++   /* Drawable, cliprect and scissor information */
++   GLuint numClipRects;	/* Cliprects for the draw buffer */
++   drm_clip_rect_t *pClipRects;
++   unsigned int lastStamp;
++   GLboolean lost_context;
++   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
++
++   /* Mirrors of some DRI state */
++   struct radeon_dri_mirror dri;
++
++   /* Busy waiting */
++   GLuint do_usleeps;
++   GLuint do_irqs;
++   GLuint irqsEmitted;
++   drm_radeon_irq_wait_t iw;
++
++   /* buffer swap */
++   int64_t swap_ust;
++   int64_t swap_missed_ust;
++
++   GLuint swap_count;
++   GLuint swap_missed_count;
++
++   /* Derived state - for r300 only */
++   struct radeon_state state;
++
++   struct radeon_swtcl_info swtcl;
++   /* Configuration cache
++    */
++   driOptionCache optionCache;
++
++   struct radeon_cmdbuf cmdbuf;
++
++   struct {
++	   void (*get_lock)(radeonContextPtr radeon);
++	   void (*update_viewport_offset)(GLcontext *ctx);
++	   void (*update_draw_buffer)(GLcontext *ctx);
++	   void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
++	   void (*swtcl_flush)(GLcontext *ctx, uint32_t offset);
++	   void (*pre_emit_atoms)(radeonContextPtr rmesa);
++	   void (*pre_emit_state)(radeonContextPtr rmesa);
++   } vtbl;
++};
++
++#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
++
++/**
++ * This function takes a float and packs it into a uint32_t
++ */
++static INLINE uint32_t radeonPackFloat32(float fl)
++{
++	union {
++		float fl;
++		uint32_t u;
++	} u;
++
++	u.fl = fl;
++	return u.u;
++}
++
++/* This is probably wrong for some values, I need to test this
++ * some more.  Range checking would be a good idea also..
++ *
++ * But it works for most things.  I'll fix it later if someone
++ * else with a better clue doesn't
++ */
++static INLINE uint32_t radeonPackFloat24(float f)
++{
++	float mantissa;
++	int exponent;
++	uint32_t float24 = 0;
++
++	if (f == 0.0)
++		return 0;
++
++	mantissa = frexpf(f, &exponent);
++
++	/* Handle -ve */
++	if (mantissa < 0) {
++		float24 |= (1 << 23);
++		mantissa = mantissa * -1.0;
++	}
++	/* Handle exponent, bias of 63 */
++	exponent += 62;
++	float24 |= (exponent << 16);
++	/* Kill 7 LSB of mantissa */
++	float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7;
++
++	return float24;
++}
++
++GLboolean radeonInitContext(radeonContextPtr radeon,
++			    struct dd_function_table* functions,
++			    const __GLcontextModes * glVisual,
++			    __DRIcontextPrivate * driContextPriv,
++			    void *sharedContextPrivate);
++
++void radeonCleanupContext(radeonContextPtr radeon);
++GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
++void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable);
++GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
++			    __DRIdrawablePrivate * driDrawPriv,
++			    __DRIdrawablePrivate * driReadPriv);
++
++/* ================================================================
++ * Debugging:
++ */
++#define DO_DEBUG		1
++
++#if DO_DEBUG
++extern int RADEON_DEBUG;
++#else
++#define RADEON_DEBUG		0
++#endif
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c
+deleted file mode 100644
+index 46b490d..0000000
+--- a/src/mesa/drivers/dri/radeon/radeon_compat.c
++++ /dev/null
+@@ -1,301 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+-               Tungsten Graphics Inc., Austin, Texas.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining a
+-copy of this software and associated documentation files (the "Software"),
+-to deal in the Software without restriction, including without limitation
+-on the rights to use, copy, modify, merge, publish, distribute, sub
+-license, and/or sell copies of the Software, and to permit persons to whom
+-the Software is furnished to do so, subject to the following conditions:
+-
+-The above copyright notice and this permission notice (including the next
+-paragraph) shall be included in all copies or substantial portions of the
+-Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+-USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Keith Whitwell <keith@tungstengraphics.com>
+- *
+- */
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-
+-#include "radeon_context.h"
+-#include "radeon_state.h"
+-#include "radeon_ioctl.h"
+-
+-
+-static struct { 
+-	int start; 
+-	int len; 
+-	const char *name;
+-} packet[RADEON_MAX_STATE_PACKETS] = {
+-	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+-	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+-	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+-	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+-	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+-	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+-	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+-	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+-	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+-	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+-	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+-	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+-	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+-	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+-	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+-	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+-	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+-	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+-	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+-	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+-	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+-};
+-
+-
+-static void radeonCompatEmitPacket( radeonContextPtr rmesa, 
+-				    struct radeon_state_atom *state )
+-{
+-   drm_radeon_sarea_t *sarea = rmesa->sarea;
+-   drm_radeon_context_regs_t *ctx = &sarea->context_state;
+-   drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0];
+-   drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1];
+-   int i;
+-   int *buf = state->cmd;
+-
+-   for ( i = 0 ; i < state->cmd_size ; ) {
+-      drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++];
+-
+-      if (RADEON_DEBUG & DEBUG_STATE)
+-	 fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id,
+-		 packet[(int)header->packet.packet_id].name);
+-
+-      switch (header->packet.packet_id) {
+-      case RADEON_EMIT_PP_MISC:
+-	 ctx->pp_misc = buf[i++]; 
+-	 ctx->pp_fog_color = buf[i++];
+-	 ctx->re_solid_color = buf[i++];
+-	 ctx->rb3d_blendcntl = buf[i++];
+-	 ctx->rb3d_depthoffset = buf[i++];
+-	 ctx->rb3d_depthpitch = buf[i++];
+-	 ctx->rb3d_zstencilcntl = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+-	 break;
+-      case RADEON_EMIT_PP_CNTL:
+-	 ctx->pp_cntl = buf[i++];
+-	 ctx->rb3d_cntl = buf[i++];
+-	 ctx->rb3d_coloroffset = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+-	 break;
+-      case RADEON_EMIT_RB3D_COLORPITCH:
+-	 ctx->rb3d_colorpitch = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT;
+-	 break;
+-      case RADEON_EMIT_RE_LINE_PATTERN:
+-	 ctx->re_line_pattern = buf[i++];
+-	 ctx->re_line_state = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_LINE;
+-	 break;
+-      case RADEON_EMIT_SE_LINE_WIDTH:
+-	 ctx->se_line_width = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_LINE;
+-	 break;
+-      case RADEON_EMIT_PP_LUM_MATRIX:
+-	 ctx->pp_lum_matrix = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+-	 break;
+-      case RADEON_EMIT_PP_ROT_MATRIX_0:
+-	 ctx->pp_rot_matrix_0 = buf[i++];
+-	 ctx->pp_rot_matrix_1 = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_BUMPMAP;
+-	 break;
+-      case RADEON_EMIT_RB3D_STENCILREFMASK:
+-	 ctx->rb3d_stencilrefmask = buf[i++];
+-	 ctx->rb3d_ropcntl = buf[i++];
+-	 ctx->rb3d_planemask = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_MASKS;
+-	 break;
+-      case RADEON_EMIT_SE_VPORT_XSCALE:
+-	 ctx->se_vport_xscale = buf[i++];
+-	 ctx->se_vport_xoffset = buf[i++];
+-	 ctx->se_vport_yscale = buf[i++];
+-	 ctx->se_vport_yoffset = buf[i++];
+-	 ctx->se_vport_zscale = buf[i++];
+-	 ctx->se_vport_zoffset = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_VIEWPORT;
+-	 break;
+-      case RADEON_EMIT_SE_CNTL:
+-	 ctx->se_cntl = buf[i++];
+-	 ctx->se_coord_fmt = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT;
+-	 break;
+-      case RADEON_EMIT_SE_CNTL_STATUS:
+-	 ctx->se_cntl_status = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_SETUP;
+-	 break;
+-      case RADEON_EMIT_RE_MISC:
+-	 ctx->re_misc = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_MISC;
+-	 break;
+-      case RADEON_EMIT_PP_TXFILTER_0:
+-	 tex0->pp_txfilter = buf[i++];
+-	 tex0->pp_txformat = buf[i++];
+-	 tex0->pp_txoffset = buf[i++];
+-	 tex0->pp_txcblend = buf[i++];
+-	 tex0->pp_txablend = buf[i++];
+-	 tex0->pp_tfactor = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+-	 break;
+-      case RADEON_EMIT_PP_BORDER_COLOR_0:
+-	 tex0->pp_border_color = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_TEX0;
+-	 break;
+-      case RADEON_EMIT_PP_TXFILTER_1:
+-	 tex1->pp_txfilter = buf[i++];
+-	 tex1->pp_txformat = buf[i++];
+-	 tex1->pp_txoffset = buf[i++];
+-	 tex1->pp_txcblend = buf[i++];
+-	 tex1->pp_txablend = buf[i++];
+-	 tex1->pp_tfactor = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+-	 break;
+-      case RADEON_EMIT_PP_BORDER_COLOR_1:
+-	 tex1->pp_border_color = buf[i++];
+-	 sarea->dirty |= RADEON_UPLOAD_TEX1;
+-	 break;
+-
+-      case RADEON_EMIT_SE_ZBIAS_FACTOR:
+-	 i++;
+-	 i++;
+-	 break;
+-
+-      case RADEON_EMIT_PP_TXFILTER_2:
+-      case RADEON_EMIT_PP_BORDER_COLOR_2:
+-      case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
+-      case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
+-      default:
+-	 /* These states aren't understood by radeon drm 1.1 */
+-	 fprintf(stderr, "Tried to emit unsupported state\n");
+-	 return;
+-      }
+-   }
+-}
+-
+-
+-
+-static void radeonCompatEmitStateLocked( radeonContextPtr rmesa )
+-{
+-   struct radeon_state_atom *atom;
+-
+-   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+-      return;
+-
+-   foreach(atom, &rmesa->hw.atomlist) {
+-      if (rmesa->hw.all_dirty)
+-	 atom->dirty = GL_TRUE;
+-      if (atom->is_tcl)
+-	 atom->dirty = GL_FALSE;
+-      if (atom->dirty)
+-	 radeonCompatEmitPacket(rmesa, atom);
+-   }
+- 
+-   rmesa->hw.is_dirty = GL_FALSE;
+-   rmesa->hw.all_dirty = GL_FALSE;
+-}
+-
+-
+-static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa,
+-					     GLuint hw_primitive,
+-					     GLuint nverts,
+-					     drm_clip_rect_t *pbox,
+-					     GLuint nbox )
+-{
+-   int i;
+-
+-   for ( i = 0 ; i < nbox ; ) {
+-      int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox );
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+-      drm_radeon_vertex_t vtx;
+-      
+-      rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS;
+-      rmesa->sarea->nbox = nr - i;
+-
+-      for ( ; i < nr ; i++) 
+-	 *b++ = pbox[i];
+-      
+-      if (RADEON_DEBUG & DEBUG_IOCTL)
+-	 fprintf(stderr, 
+-		 "RadeonFlushVertexBuffer: prim %x buf %d verts %d "
+-		 "disc %d nbox %d\n",
+-		 hw_primitive, 
+-		 rmesa->dma.current.buf->buf->idx, 
+-		 nverts, 
+-		 nr == nbox,
+-		 rmesa->sarea->nbox );
+-
+-      vtx.prim = hw_primitive;
+-      vtx.idx = rmesa->dma.current.buf->buf->idx;
+-      vtx.count = nverts;
+-      vtx.discard = (nr == nbox);      
+-
+-      drmCommandWrite( rmesa->dri.fd, 
+-		       DRM_RADEON_VERTEX,
+-		       &vtx, sizeof(vtx));
+-   }
+-}
+-
+-
+-
+-/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer!  
+- */
+-void radeonCompatEmitPrimitive( radeonContextPtr rmesa,
+-				GLuint vertex_format,
+-				GLuint hw_primitive,
+-				GLuint nrverts )
+-{
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-   radeonCompatEmitStateLocked( rmesa );
+-   rmesa->sarea->vc_format = vertex_format;
+-   
+-   if (rmesa->state.scissor.enabled) {
+-      radeonCompatEmitPrimitiveLocked( rmesa, 
+-				       hw_primitive,
+-				       nrverts,
+-				       rmesa->state.scissor.pClipRects,
+-				       rmesa->state.scissor.numClipRects );
+-   }
+-   else {
+-      radeonCompatEmitPrimitiveLocked( rmesa, 
+-				       hw_primitive,
+-				       nrverts,
+-				       rmesa->pClipRects,
+-				       rmesa->numClipRects );
+-   }
+-
+-
+-   UNLOCK_HARDWARE( rmesa );
+-}
+-
+diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
+index ea81a32..e4202c7 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_context.c
++++ b/src/mesa/drivers/dri/radeon/radeon_context.c
+@@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "drivers/common/driverfuncs.h"
+ 
++#include "radeon_common.h"
+ #include "radeon_context.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_state.h"
+@@ -72,40 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "vblank.h"
+ #include "utils.h"
+ #include "xmlpool.h" /* for symbolic values of enum-type options */
+-#ifndef RADEON_DEBUG
+-int RADEON_DEBUG = (0);
+-#endif
+-
+-
+-/* Return various strings for glGetString().
+- */
+-static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   static char buffer[128];
+-   unsigned   offset;
+-   GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+-      rmesa->radeonScreen->AGPMode;
+-
+-   switch ( name ) {
+-   case GL_VENDOR:
+-      return (GLubyte *)"Tungsten Graphics, Inc.";
+-
+-   case GL_RENDERER:
+-      offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE,
+-				     agp_mode );
+-
+-      sprintf( & buffer[ offset ], " %sTCL",
+-	       !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+-	       ? "" : "NO-" );
+-
+-      return (GLubyte *)buffer;
+-
+-   default:
+-      return NULL;
+-   }
+-}
+-
+ 
+ /* Extension strings exported by the R100 driver.
+  */
+@@ -160,15 +127,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = {
+    NULL,
+ };
+ 
+-
+-
+-/* Initialize the driver's misc functions.
+- */
+-static void radeonInitDriverFuncs( struct dd_function_table *functions )
+-{
+-    functions->GetString	= radeonGetString;
+-}
+-
+ static const struct dri_debug_control debug_control[] =
+ {
+     { "fall",  DEBUG_FALLBACKS },
+@@ -188,6 +146,51 @@ static const struct dri_debug_control debug_control[] =
+     { NULL,    0 }
+ };
+ 
++static void r100_get_lock(radeonContextPtr radeon)
++{
++   r100ContextPtr rmesa = (r100ContextPtr)radeon;
++   drm_radeon_sarea_t *sarea = radeon->sarea;
++
++   RADEON_STATECHANGE(rmesa, ctx);
++   if (rmesa->radeon.sarea->tiling_enabled) {
++      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
++	 RADEON_COLOR_TILE_ENABLE;
++   } else {
++      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
++	 ~RADEON_COLOR_TILE_ENABLE;
++   }
++   
++   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
++      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
++      
++      if (!radeon->radeonScreen->kernel_mm)
++         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
++   }
++}
++
++static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
++{
++}
++
++static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
++{
++   r100ContextPtr rmesa = (r100ContextPtr)radeon;
++   
++   /* r100 always needs to emit ZBS to avoid TCL lockups */
++   rmesa->hw.zbs.dirty = 1;
++   radeon->hw.is_dirty = 1;
++}
++
++
++static void r100_init_vtbl(radeonContextPtr radeon)
++{
++   radeon->vtbl.get_lock = r100_get_lock;
++   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
++   radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer;
++   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
++   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
++   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
++}
+ 
+ /* Create the device specific context.
+  */
+@@ -199,8 +202,8 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+    __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+    radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+    struct dd_function_table functions;
+-   radeonContextPtr rmesa;
+-   GLcontext *ctx, *shareCtx;
++   r100ContextPtr rmesa;
++   GLcontext *ctx;
+    int i;
+    int tcl_mode, fthrottle_mode;
+ 
+@@ -209,10 +212,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+    assert(screen);
+ 
+    /* Allocate the Radeon context */
+-   rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) );
++   rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
+    if ( !rmesa )
+       return GL_FALSE;
+ 
++   r100_init_vtbl(&rmesa->radeon);
++
+    /* init exp fog table data */
+    radeonInitStaticFogData();
+    
+@@ -220,12 +225,12 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+     * Do this here so that initialMaxAnisotropy is set before we create
+     * the default textures.
+     */
+-   driParseConfigFiles (&rmesa->optionCache, &screen->optionCache,
++   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+ 			screen->driScreen->myNum, "radeon");
+-   rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache,
++   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+                                                  "def_max_anisotropy");
+ 
+-   if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) {
++   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+       if ( sPriv->drm_version.minor < 13 )
+ 	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+ 			  "disabling.\n", sPriv->drm_version.minor );
+@@ -240,65 +245,23 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+     * (the texture functions are especially important)
+     */
+    _mesa_init_driver_functions( &functions );
+-   radeonInitDriverFuncs( &functions );
+    radeonInitTextureFuncs( &functions );
+ 
+-   /* Allocate the Mesa context */
+-   if (sharedContextPrivate)
+-      shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx;
+-   else
+-      shareCtx = NULL;
+-   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
+-                                       &functions, (void *) rmesa);
+-   if (!rmesa->glCtx) {
+-      FREE(rmesa);
+-      return GL_FALSE;
+-   }
+-   driContextPriv->driverPrivate = rmesa;
+-
+-   /* Init radeon context data */
+-   rmesa->dri.context = driContextPriv;
+-   rmesa->dri.screen = sPriv;
+-   rmesa->dri.drawable = NULL;
+-   rmesa->dri.readable = NULL;
+-   rmesa->dri.hwContext = driContextPriv->hHWContext;
+-   rmesa->dri.hwLock = &sPriv->pSAREA->lock;
+-   rmesa->dri.fd = sPriv->fd;
+-   rmesa->dri.drmMinor = sPriv->drm_version.minor;
+-
+-   rmesa->radeonScreen = screen;
+-   rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA +
+-				       screen->sarea_priv_offset);
+-
+-
+-   rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address;
+-
+-   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+-   make_empty_list( & rmesa->swapped );
+-
+-   rmesa->nr_heaps = screen->numTexHeaps;
+-   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+-      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+-	    screen->texSize[i],
+-	    12,
+-	    RADEON_NR_TEX_REGIONS,
+-	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
+-	    & rmesa->sarea->tex_age[i],
+-	    & rmesa->swapped,
+-	    sizeof( radeonTexObj ),
+-	    (destroy_texture_object_t *) radeonDestroyTexObj );
+-
+-      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
+-					& rmesa->c_textureSwaps );
++   if (!radeonInitContext(&rmesa->radeon, &functions,
++			  glVisual, driContextPriv,
++			  sharedContextPrivate)) {
++     FREE(rmesa);
++     return GL_FALSE;
+    }
+-   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
++
++   rmesa->radeon.texture_depth = driQueryOptioni (&rmesa->radeon.optionCache,
+ 					   "texture_depth");
+-   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+-      rmesa->texture_depth = ( screen->cpp == 4 ) ?
++   if (rmesa->radeon.texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
++      rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
+ 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+ 
+-   rmesa->swtcl.RenderIndex = ~0;
+-   rmesa->hw.all_dirty = GL_TRUE;
++   rmesa->radeon.swtcl.RenderIndex = ~0;
++   rmesa->radeon.hw.all_dirty = GL_TRUE;
+ 
+    /* Set the maximum texture size small enough that we can guarentee that
+     * all texture units can bind a maximal texture and have all of them in
+@@ -306,26 +269,13 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+     * setting allow larger textures.
+     */
+ 
+-   ctx = rmesa->glCtx;
+-   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache,
++   ctx = rmesa->radeon.glCtx;
++   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+ 						 "texture_units");
+    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+ 
+-   i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures");
+-
+-   driCalculateMaxTextureLevels( rmesa->texture_heaps,
+-				 rmesa->nr_heaps,
+-				 & ctx->Const,
+-				 4,
+-				 11, /* max 2D texture size is 2048x2048 */
+-				 8,  /* 256^3 */
+-				 9,  /* \todo: max cube texture size seems to be 512x512(x6) */
+-				 11, /* max rect texture size is 2048x2048. */
+-				 12,
+-				 GL_FALSE,
+-				 i );
+-
++   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+ 
+    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ 
+@@ -388,38 +338,38 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+    }
+ 
+    driInitExtensions( ctx, card_extensions, GL_TRUE );
+-   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
++   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+       _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+-   if (rmesa->glCtx->Mesa_DXTn) {
++   if (rmesa->radeon.glCtx->Mesa_DXTn) {
+       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+    }
+-   else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) {
++   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+    }
+ 
+-   if (rmesa->dri.drmMinor >= 9)
++   if (rmesa->radeon.dri.drmMinor >= 9)
+       _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
+ 
+    /* XXX these should really go right after _mesa_init_driver_functions() */
++   radeonInitSpanFuncs( ctx );
+    radeonInitIoctlFuncs( ctx );
+    radeonInitStateFuncs( ctx );
+-   radeonInitSpanFuncs( ctx );
+    radeonInitState( rmesa );
+    radeonInitSwtcl( ctx );
+ 
+    _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
+ 			 ctx->Const.MaxArrayLockSize, 32 );
+ 
+-   fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode");
+-   rmesa->iw.irq_seq = -1;
+-   rmesa->irqsEmitted = 0;
+-   rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 &&
+-		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
++   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
++   rmesa->radeon.iw.irq_seq = -1;
++   rmesa->radeon.irqsEmitted = 0;
++   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
++			    fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+ 
+-   rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
++   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+ 
+-   (*sPriv->systemTime->getUST)( & rmesa->swap_ust );
++   (*sPriv->systemTime->getUST)( & rmesa->radeon.swap_ust );
+ 
+ 
+ #if DO_DEBUG
+@@ -427,20 +377,20 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ 				       debug_control );
+ #endif
+ 
+-   tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode");
+-   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
++   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
++   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+       fprintf(stderr, "disabling 3D acceleration\n");
+       FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
+    } else if (tcl_mode == DRI_CONF_TCL_SW ||
+-	      !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+-      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+-	 rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
++	      !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+ 	 fprintf(stderr, "Disabling HW TCL support\n");
+       }
+-      TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
++      TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+    }
+ 
+-   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ /*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
+    }
+    return GL_TRUE;
+@@ -454,179 +404,41 @@ radeonCreateContext( const __GLcontextModes *glVisual,
+ void radeonDestroyContext( __DRIcontextPrivate *driContextPriv )
+ {
+    GET_CURRENT_CONTEXT(ctx);
+-   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+-   radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
++   r100ContextPtr rmesa = (r100ContextPtr) driContextPriv->driverPrivate;
++   r100ContextPtr current = ctx ? R100_CONTEXT(ctx) : NULL;
+ 
+    /* check if we're deleting the currently bound context */
+    if (rmesa == current) {
+-      RADEON_FIREVERTICES( rmesa );
++      radeon_firevertices(&rmesa->radeon);
+       _mesa_make_current(NULL, NULL, NULL);
+    }
+ 
+    /* Free radeon context resources */
+    assert(rmesa); /* should never be null */
+    if ( rmesa ) {
+-      GLboolean   release_texture_heaps;
+ 
++      _swsetup_DestroyContext( rmesa->radeon.glCtx );
++      _tnl_DestroyContext( rmesa->radeon.glCtx );
++      _vbo_DestroyContext( rmesa->radeon.glCtx );
++      _swrast_DestroyContext( rmesa->radeon.glCtx );
+ 
+-      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+-      _swsetup_DestroyContext( rmesa->glCtx );
+-      _tnl_DestroyContext( rmesa->glCtx );
+-      _vbo_DestroyContext( rmesa->glCtx );
+-      _swrast_DestroyContext( rmesa->glCtx );
+-
+-      radeonDestroySwtcl( rmesa->glCtx );
+-      radeonReleaseArrays( rmesa->glCtx, ~0 );
+-      if (rmesa->dma.current.buf) {
+-	 radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-	 radeonFlushCmdBuf( rmesa, __FUNCTION__ );
++      radeonDestroySwtcl( rmesa->radeon.glCtx );
++      radeonReleaseArrays( rmesa->radeon.glCtx, ~0 );
++      if (rmesa->radeon.dma.current) {
++	 radeonReleaseDmaRegion( &rmesa->radeon );
++	 rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+       }
+ 
+       _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+ 
+-      if (rmesa->state.scissor.pClipRects) {
+-	 FREE(rmesa->state.scissor.pClipRects);
+-	 rmesa->state.scissor.pClipRects = NULL;
+-      }
+-
+-      if ( release_texture_heaps ) {
+-         /* This share group is about to go away, free our private
+-          * texture object data.
+-          */
+-         int i;
+-
+-         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+-	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+-	    rmesa->texture_heaps[ i ] = NULL;
+-         }
+-
+-	 assert( is_empty_list( & rmesa->swapped ) );
++      if (rmesa->radeon.state.scissor.pClipRects) {
++	 FREE(rmesa->radeon.state.scissor.pClipRects);
++	 rmesa->radeon.state.scissor.pClipRects = NULL;
+       }
+ 
+-      /* free the Mesa context */
+-      rmesa->glCtx->DriverCtx = NULL;
+-      _mesa_destroy_context( rmesa->glCtx );
+-
+-      /* free the option cache */
+-      driDestroyOptionCache (&rmesa->optionCache);
++      radeonCleanupContext(&rmesa->radeon);
+ 
+       FREE( rmesa );
+    }
+ }
+ 
+-
+-
+-
+-void
+-radeonSwapBuffers( __DRIdrawablePrivate *dPriv )
+-{
+-
+-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-      radeonContextPtr rmesa;
+-      GLcontext *ctx;
+-      rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-      ctx = rmesa->glCtx;
+-      if (ctx->Visual.doubleBufferMode) {
+-         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+-
+-         if ( rmesa->doPageFlip ) {
+-            radeonPageFlip( dPriv );
+-         }
+-         else {
+-	     radeonCopyBuffer( dPriv, NULL );
+-         }
+-      }
+-   }
+-   else {
+-      /* XXX this shouldn't be an error but we can't handle it for now */
+-      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+-   }
+-}
+-
+-void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+-			 int x, int y, int w, int h )
+-{
+-    if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+-	radeonContextPtr radeon;
+-	GLcontext *ctx;
+-
+-	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-	ctx = radeon->glCtx;
+-
+-	if (ctx->Visual.doubleBufferMode) {
+-	    drm_clip_rect_t rect;
+-	    rect.x1 = x + dPriv->x;
+-	    rect.y1 = (dPriv->h - y - h) + dPriv->y;
+-	    rect.x2 = rect.x1 + w;
+-	    rect.y2 = rect.y1 + h;
+-	    _mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
+-	    radeonCopyBuffer(dPriv, &rect);
+-	}
+-    } else {
+-	/* XXX this shouldn't be an error but we can't handle it for now */
+-	_mesa_problem(NULL, "%s: drawable has no context!",
+-		      __FUNCTION__);
+-    }
+-}
+-
+-/* Make context `c' the current context and bind it to the given
+- * drawing and reading surfaces.
+- */
+-GLboolean
+-radeonMakeCurrent( __DRIcontextPrivate *driContextPriv,
+-                   __DRIdrawablePrivate *driDrawPriv,
+-                   __DRIdrawablePrivate *driReadPriv )
+-{
+-   if ( driContextPriv ) {
+-      radeonContextPtr newCtx = 
+-	 (radeonContextPtr) driContextPriv->driverPrivate;
+-
+-      if (RADEON_DEBUG & DEBUG_DRI)
+-	 fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx);
+-
+-      newCtx->dri.readable = driReadPriv;
+-
+-      if ( (newCtx->dri.drawable != driDrawPriv) ||
+-           newCtx->lastStamp != driDrawPriv->lastStamp ) {
+-	 if (driDrawPriv->swap_interval == (unsigned)-1) {
+-	    driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0)
+-	       ? driGetDefaultVBlankFlags(&newCtx->optionCache)
+-	       : VBLANK_FLAG_NO_IRQ;
+-
+-	    driDrawableInitVBlank( driDrawPriv );
+-	 }
+-
+-	 newCtx->dri.drawable = driDrawPriv;
+-
+-	 radeonSetCliprects(newCtx);
+-	 radeonUpdateViewportOffset( newCtx->glCtx );
+-      }
+-
+-      _mesa_make_current( newCtx->glCtx,
+-			  (GLframebuffer *) driDrawPriv->driverPrivate,
+-			  (GLframebuffer *) driReadPriv->driverPrivate );
+-
+-      _mesa_update_state( newCtx->glCtx );
+-   } else {
+-      if (RADEON_DEBUG & DEBUG_DRI)
+-	 fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+-      _mesa_make_current( NULL, NULL, NULL );
+-   }
+-
+-   if (RADEON_DEBUG & DEBUG_DRI)
+-      fprintf(stderr, "End %s\n", __FUNCTION__);
+-   return GL_TRUE;
+-}
+-
+-/* Force the context `c' to be unbound from its buffer.
+- */
+-GLboolean
+-radeonUnbindContext( __DRIcontextPrivate *driContextPriv )
+-{
+-   radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate;
+-
+-   if (RADEON_DEBUG & DEBUG_DRI)
+-      fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx);
+-
+-   return GL_TRUE;
+-}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
+index 53df766..2efabd1 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_context.h
++++ b/src/mesa/drivers/dri/radeon/radeon_context.h
+@@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "drm.h"
+ #include "radeon_drm.h"
+ #include "texmem.h"
+-
+ #include "main/macros.h"
+ #include "main/mtypes.h"
+ #include "main/colormac.h"
+-
+-struct radeon_context;
+-typedef struct radeon_context radeonContextRec;
+-typedef struct radeon_context *radeonContextPtr;
+-
+-/* This union is used to avoid warnings/miscompilation
+-   with float to uint32_t casts due to strict-aliasing */
+-typedef union {
+-	GLfloat f;
+-	uint32_t ui32;
+-} float_ui32_type;
+-
+-#include "radeon_lock.h"
+ #include "radeon_screen.h"
+-#include "main/mm.h"
+-
+-#include "math/m_vector.h"
+-
+-#define TEX_0   0x1
+-#define TEX_1   0x2
+-#define TEX_2   0x4
+-#define TEX_ALL 0x7
+-
+-/* Rasterizing fallbacks */
+-/* See correponding strings in r200_swtcl.c */
+-#define RADEON_FALLBACK_TEXTURE		0x0001
+-#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
+-#define RADEON_FALLBACK_STENCIL		0x0004
+-#define RADEON_FALLBACK_RENDER_MODE	0x0008
+-#define RADEON_FALLBACK_BLEND_EQ	0x0010
+-#define RADEON_FALLBACK_BLEND_FUNC	0x0020
+-#define RADEON_FALLBACK_DISABLE 	0x0040
+-#define RADEON_FALLBACK_BORDER_MODE	0x0080
+-
+-/* The blit width for texture uploads
+- */
+-#define BLIT_WIDTH_BYTES 1024
+ 
+-/* Use the templated vertex format:
+- */
+-#define COLOR_IS_RGBA
+-#define TAG(x) radeon##x
+-#include "tnl_dd/t_dd_vertex.h"
+-#undef TAG
+-
+-typedef void (*radeon_tri_func) (radeonContextPtr,
+-				 radeonVertex *,
+-				 radeonVertex *, radeonVertex *);
+-
+-typedef void (*radeon_line_func) (radeonContextPtr,
+-				  radeonVertex *, radeonVertex *);
++#include "radeon_common.h"
+ 
+-typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+-
+-struct radeon_colorbuffer_state {
+-	GLuint clear;
+-	int roundEnable;
+-};
+ 
+-struct radeon_depthbuffer_state {
+-	GLuint clear;
+-	GLfloat scale;
+-};
++struct r100_context;
++typedef struct r100_context r100ContextRec;
++typedef struct r100_context *r100ContextPtr;
+ 
+-struct radeon_scissor_state {
+-	drm_clip_rect_t rect;
+-	GLboolean enabled;
++#include "radeon_lock.h"
+ 
+-	GLuint numClipRects;	/* Cliprects active */
+-	GLuint numAllocedClipRects;	/* Cliprects available */
+-	drm_clip_rect_t *pClipRects;
+-};
+ 
+-struct radeon_stencilbuffer_state {
+-	GLboolean hwBuffer;
+-	GLuint clear;		/* rb3d_stencilrefmask value */
+-};
+ 
+-struct radeon_stipple_state {
+-	GLuint mask[32];
+-};
++#define R100_TEX_ALL 0x7
+ 
+ /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
+ #define RADEON_ST_BIT(unit) \
+@@ -141,42 +73,6 @@ struct radeon_stipple_state {
+ #define RADEON_Q_BIT(unit) \
+ (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
+ 
+-typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+-
+-/* Texture object in locally shared texture space.
+- */
+-struct radeon_tex_obj {
+-	driTextureObject base;
+-
+-	GLuint bufAddr;		/* Offset to start of locally
+-				   shared texture block */
+-
+-	GLuint dirty_state;	/* Flags (1 per texunit) for
+-				   whether or not this texobj
+-				   has dirty hardware state
+-				   (pp_*) that needs to be
+-				   brought into the
+-				   texunit. */
+-
+-	drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS];
+-	/* Six, for the cube faces */
+-
+-	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+-
+-	GLuint pp_txfilter;	/* hardware register values */
+-	GLuint pp_txformat;
+-	GLuint pp_txoffset;	/* Image location in texmem.
+-				   All cube faces follow. */
+-	GLuint pp_txsize;	/* npot only */
+-	GLuint pp_txpitch;	/* npot only */
+-	GLuint pp_border_color;
+-	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
+-
+-	GLboolean border_fallback;
+-
+-	GLuint tile_bits;	/* hw texture tile bits used on this texture */
+-};
+-
+ struct radeon_texture_env_state {
+ 	radeonTexObjPtr texobj;
+ 	GLenum format;
+@@ -187,17 +83,6 @@ struct radeon_texture_state {
+ 	struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
+ };
+ 
+-struct radeon_state_atom {
+-	struct radeon_state_atom *next, *prev;
+-	const char *name;	/* for debug */
+-	int cmd_size;		/* size in bytes */
+-	GLuint is_tcl;
+-	int *cmd;		/* one or more cmd's */
+-	int *lastcmd;		/* one or more cmd's */
+-	GLboolean dirty;	/* dirty-mark in emit_state_list */
+-	 GLboolean(*check) (GLcontext *);	/* is this state active? */
+-};
+-
+ /* Trying to keep these relatively short as the variables are becoming
+  * extravagently long.  Drop the driver name prefix off the front of
+  * everything - I think we know which driver we're in by now, and keep the
+@@ -410,10 +295,7 @@ struct radeon_state_atom {
+ #define SHN_SHININESS      1
+ #define SHN_STATE_SIZE     2
+ 
+-struct radeon_hw_state {
+-	/* Head of the linked list of state atoms. */
+-	struct radeon_state_atom atomlist;
+-
++struct r100_hw_state {
+ 	/* Hardware state, stored as cmdbuf commands:  
+ 	 *   -- Need to doublebuffer for
+ 	 *           - eliding noop statechange loops? (except line stipple count)
+@@ -438,86 +320,16 @@ struct radeon_hw_state {
+ 	struct radeon_state_atom glt;
+ 	struct radeon_state_atom txr[3];	/* for NPOT */
+ 
+-	int max_state_size;	/* Number of bytes necessary for a full state emit. */
+-	GLboolean is_dirty, all_dirty;
+ };
+ 
+-struct radeon_state {
+-	/* Derived state for internal purposes:
+-	 */
+-	struct radeon_colorbuffer_state color;
+-	struct radeon_depthbuffer_state depth;
+-	struct radeon_scissor_state scissor;
+-	struct radeon_stencilbuffer_state stencil;
++
++struct r100_state {
+ 	struct radeon_stipple_state stipple;
+ 	struct radeon_texture_state texture;
+ };
+ 
+-/* Need refcounting on dma buffers:
+- */
+-struct radeon_dma_buffer {
+-	int refcount;		/* the number of retained regions in buf */
+-	drmBufPtr buf;
+-};
+-
+-#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset +			\
+-			(rvb)->address - rmesa->dma.buf0_address +	\
+-			(rvb)->start)
+-
+-/* A retained region, eg vertices for indexed vertices.
+- */
+-struct radeon_dma_region {
+-	struct radeon_dma_buffer *buf;
+-	char *address;		/* == buf->address */
+-	int start, end, ptr;	/* offsets from start of buf */
+-	int aos_start;
+-	int aos_stride;
+-	int aos_size;
+-};
+-
+-struct radeon_dma {
+-	/* Active dma region.  Allocations for vertices and retained
+-	 * regions come from here.  Also used for emitting random vertices,
+-	 * these may be flushed by calling flush_current();
+-	 */
+-	struct radeon_dma_region current;
+-
+-	void (*flush) (radeonContextPtr);
+-
+-	char *buf0_address;	/* start of buf[0], for index calcs */
+-	GLuint nr_released_bufs;	/* flush after so many buffers released */
+-};
+-
+-struct radeon_dri_mirror {
+-	__DRIcontextPrivate *context;	/* DRI context */
+-	__DRIscreenPrivate *screen;	/* DRI screen */
+-
+-   /**
+-    * DRI drawable bound to this context for drawing.
+-    */
+-	__DRIdrawablePrivate *drawable;
+-
+-   /**
+-    * DRI drawable bound to this context for reading.
+-    */
+-	__DRIdrawablePrivate *readable;
+-
+-	drm_context_t hwContext;
+-	drm_hw_lock_t *hwLock;
+-	int fd;
+-	int drmMinor;
+-};
+-
+ #define RADEON_CMD_BUF_SZ  (8*1024)
+-
+-struct radeon_store {
+-	GLuint statenr;
+-	GLuint primnr;
+-	char cmd_buf[RADEON_CMD_BUF_SZ];
+-	int cmd_used;
+-	int elts_start;
+-};
+-
++#define R200_ELT_BUF_SZ  (8*1024)
+ /* radeon_tcl.c
+  */
+ struct radeon_tcl_info {
+@@ -529,30 +341,23 @@ struct radeon_tcl_info {
+ 	 */
+ 	GLvector4f ObjClean;
+ 
+-	struct radeon_dma_region *aos_components[8];
++        struct radeon_aos aos[8];
+ 	GLuint nr_aos_components;
+ 
+ 	GLuint *Elts;
+ 
+-	struct radeon_dma_region indexed_verts;
+-	struct radeon_dma_region obj;
+-	struct radeon_dma_region rgba;
+-	struct radeon_dma_region spec;
+-	struct radeon_dma_region fog;
+-	struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
+-	struct radeon_dma_region norm;
++	struct radeon_bo *indexed_bo;
++
++        int elt_cmd_offset; /** Offset into the cmdbuf */
++	int elt_cmd_start;
++        int elt_used;
+ };
+ 
+ /* radeon_swtcl.c
+  */
+-struct radeon_swtcl_info {
+-	GLuint RenderIndex;
+-	GLuint vertex_size;
++struct r100_swtcl_info {
+ 	GLuint vertex_format;
+ 
+-	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+-	GLuint vertex_attr_count;
+-
+ 	GLubyte *verts;
+ 
+ 	/* Fallback rasterization functions
+@@ -561,10 +366,6 @@ struct radeon_swtcl_info {
+ 	radeon_line_func draw_line;
+ 	radeon_tri_func draw_tri;
+ 
+-	GLuint hw_primitive;
+-	GLenum render_primitive;
+-	GLuint numverts;
+-
+    /**
+     * Offset of the 4UB color data within a hardware (swtcl) vertex.
+     */
+@@ -576,22 +377,9 @@ struct radeon_swtcl_info {
+ 	GLuint specoffset;
+ 
+ 	GLboolean needproj;
+-
+-	struct radeon_dma_region indexed_verts;
+ };
+ 
+-struct radeon_ioctl {
+-	GLuint vertex_offset;
+-	GLuint vertex_size;
+-};
+ 
+-#define RADEON_MAX_PRIMS 64
+-
+-struct radeon_prim {
+-	GLuint start;
+-	GLuint end;
+-	GLuint prim;
+-};
+ 
+ /* A maximum total of 20 elements per vertex:  3 floats for position, 3
+  * floats for normal, 4 floats for color, 4 bytes for secondary color,
+@@ -602,59 +390,18 @@ struct radeon_prim {
+  */
+ #define RADEON_MAX_VERTEX_SIZE 20
+ 
+-struct radeon_context {
+-	GLcontext *glCtx;	/* Mesa context */
++struct r100_context {
++        struct radeon_context radeon;
+ 
+ 	/* Driver and hardware state management
+ 	 */
+-	struct radeon_hw_state hw;
+-	struct radeon_state state;
+-
+-	/* Texture object bookkeeping
+-	 */
+-	unsigned nr_heaps;
+-	driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS];
+-	driTextureObject swapped;
+-	int texture_depth;
+-	float initialMaxAnisotropy;
+-
+-	/* Rasterization and vertex state:
+-	 */
+-	GLuint TclFallback;
+-	GLuint Fallback;
+-	GLuint NewGLState;
+-	 DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
++	struct r100_hw_state hw;
++	struct r100_state state;
+ 
+ 	/* Vertex buffers
+ 	 */
+ 	struct radeon_ioctl ioctl;
+-	struct radeon_dma dma;
+ 	struct radeon_store store;
+-	/* A full state emit as of the first state emit in the main store, in case
+-	 * the context is lost.
+-	 */
+-	struct radeon_store backup_store;
+-
+-	/* Page flipping
+-	 */
+-	GLuint doPageFlip;
+-
+-	/* Busy waiting
+-	 */
+-	GLuint do_usleeps;
+-	GLuint do_irqs;
+-	GLuint irqsEmitted;
+-	drm_radeon_irq_wait_t iw;
+-
+-	/* Drawable, cliprect and scissor information
+-	 */
+-	GLuint numClipRects;	/* Cliprects for the draw buffer */
+-	drm_clip_rect_t *pClipRects;
+-	unsigned int lastStamp;
+-	GLboolean lost_context;
+-	GLboolean save_on_next_emit;
+-	radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+-	drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+ 
+ 	/* TCL stuff
+ 	 */
+@@ -667,29 +414,13 @@ struct radeon_context {
+ 	GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
+ 	GLuint last_ReallyEnabled;
+ 
+-	/* VBI
+-	 */
+-	int64_t swap_ust;
+-	int64_t swap_missed_ust;
+-
+-	GLuint swap_count;
+-	GLuint swap_missed_count;
+-
+ 	/* radeon_tcl.c
+ 	 */
+ 	struct radeon_tcl_info tcl;
+ 
+ 	/* radeon_swtcl.c
+ 	 */
+-	struct radeon_swtcl_info swtcl;
+-
+-	/* Mirrors of some DRI state
+-	 */
+-	struct radeon_dri_mirror dri;
+-
+-	/* Configuration cache
+-	 */
+-	driOptionCache optionCache;
++	struct r100_swtcl_info swtcl;
+ 
+ 	GLboolean using_hyperz;
+ 	GLboolean texmicrotile;
+@@ -703,23 +434,11 @@ struct radeon_context {
+ 	GLuint c_textureSwaps;
+ 	GLuint c_textureBytes;
+ 	GLuint c_vertexBuffers;
++
+ };
+ 
+-#define RADEON_CONTEXT(ctx)		((radeonContextPtr)(ctx->DriverCtx))
+-
+-static INLINE GLuint radeonPackColor(GLuint cpp,
+-                                     GLubyte r, GLubyte g,
+-                                     GLubyte b, GLubyte a)
+-{
+-	switch (cpp) {
+-	case 2:
+-		return PACK_COLOR_565(r, g, b);
+-	case 4:
+-		return PACK_COLOR_8888(a, r, g, b);
+-	default:
+-		return 0;
+-	}
+-}
++#define R100_CONTEXT(ctx)		((r100ContextPtr)(ctx->DriverCtx))
++
+ 
+ #define RADEON_OLD_PACKETS 1
+ 
+@@ -727,37 +446,11 @@ extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv);
+ extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
+ 				     __DRIcontextPrivate * driContextPriv,
+ 				     void *sharedContextPrivate);
+-extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv);
+-extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv,
+-				int x, int y, int w, int h);
+ extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv,
+ 				   __DRIdrawablePrivate * driDrawPriv,
+ 				   __DRIdrawablePrivate * driReadPriv);
+ extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv);
+ 
+-/* ================================================================
+- * Debugging:
+- */
+-#define DO_DEBUG		1
+-
+-#if DO_DEBUG
+-extern int RADEON_DEBUG;
+-#else
+-#define RADEON_DEBUG		0
+-#endif
+-
+-#define DEBUG_TEXTURE	0x0001
+-#define DEBUG_STATE	0x0002
+-#define DEBUG_IOCTL	0x0004
+-#define DEBUG_PRIMS	0x0008
+-#define DEBUG_VERTS	0x0010
+-#define DEBUG_FALLBACKS	0x0020
+-#define DEBUG_VFMT	0x0040
+-#define DEBUG_CODEGEN	0x0080
+-#define DEBUG_VERBOSE	0x0100
+-#define DEBUG_DRI       0x0200
+-#define DEBUG_DMA       0x0400
+-#define DEBUG_SANITY    0x0800
+-#define DEBUG_SYNC      0x1000
++
+ 
+ #endif				/* __RADEON_CONTEXT_H__ */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
+new file mode 100644
+index 0000000..984725a
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
+@@ -0,0 +1,207 @@
++/* 
++ * Copyright © 2008 Nicolai Haehnle
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Aapo Tahkola <aet@rasterburn.org>
++ *      Nicolai Haehnle <prefect_@gmx.net>
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#ifndef RADEON_CS_H
++#define RADEON_CS_H
++
++#include <stdint.h>
++#include <string.h>
++#include "drm.h"
++#include "radeon_drm.h"
++
++struct radeon_cs_reloc {
++    struct radeon_bo    *bo;
++    uint32_t            read_domain;
++    uint32_t            write_domain;
++    uint32_t            flags;
++};
++
++
++#define RADEON_CS_SPACE_OK 0
++#define RADEON_CS_SPACE_OP_TO_BIG 1
++#define RADEON_CS_SPACE_FLUSH 2
++
++struct radeon_cs_space_check {
++    struct radeon_bo *bo;
++    uint32_t read_domains;
++    uint32_t write_domain;
++    uint32_t new_accounted;
++};
++
++struct radeon_cs_manager;
++
++struct radeon_cs {
++    struct radeon_cs_manager    *csm;
++    void                        *relocs;
++    uint32_t                    *packets;
++    unsigned                    crelocs;
++    unsigned                    relocs_total_size;
++    unsigned                    cdw;
++    unsigned                    ndw;
++    int                         section;
++    unsigned                    section_ndw;
++    unsigned                    section_cdw;
++    const char                  *section_file;
++    const char                  *section_func;
++    int                         section_line;
++
++};
++
++/* cs functions */
++struct radeon_cs_funcs {
++    struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
++                                   uint32_t ndw);
++    int (*cs_write_reloc)(struct radeon_cs *cs,
++                          struct radeon_bo *bo,
++                          uint32_t read_domain,
++                          uint32_t write_domain,
++                          uint32_t flags);
++    int (*cs_begin)(struct radeon_cs *cs,
++                    uint32_t ndw,
++                    const char *file,
++                    const char *func,
++                    int line);
++    int (*cs_end)(struct radeon_cs *cs,
++                  const char *file,
++                  const char *func,
++                  int line);
++    int (*cs_emit)(struct radeon_cs *cs);
++    int (*cs_destroy)(struct radeon_cs *cs);
++    int (*cs_erase)(struct radeon_cs *cs);
++    int (*cs_need_flush)(struct radeon_cs *cs);
++    void (*cs_print)(struct radeon_cs *cs, FILE *file);
++    int (*cs_space_check)(struct radeon_cs *cs, struct radeon_cs_space_check *bos,
++			  int num_bo);
++};
++
++struct radeon_cs_manager {
++    struct radeon_cs_funcs  *funcs;
++    int                     fd;
++    uint32_t vram_limit, gart_limit;
++    uint32_t vram_write_used, gart_write_used;
++    uint32_t read_used;
++};
++
++static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
++                                                 uint32_t ndw)
++{
++    return csm->funcs->cs_create(csm, ndw);
++}
++
++static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
++                                        struct radeon_bo *bo,
++                                        uint32_t read_domain,
++                                        uint32_t write_domain,
++                                        uint32_t flags)
++{
++    return cs->csm->funcs->cs_write_reloc(cs,
++                                          bo,
++                                          read_domain,
++                                          write_domain,
++                                          flags);
++}
++
++static inline int radeon_cs_begin(struct radeon_cs *cs,
++                                  uint32_t ndw,
++                                  const char *file,
++                                  const char *func,
++                                  int line)
++{
++    return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
++}
++
++static inline int radeon_cs_end(struct radeon_cs *cs,
++                                const char *file,
++                                const char *func,
++                                int line)
++{
++    return cs->csm->funcs->cs_end(cs, file, func, line);
++}
++
++static inline int radeon_cs_emit(struct radeon_cs *cs)
++{
++    return cs->csm->funcs->cs_emit(cs);
++}
++
++static inline int radeon_cs_destroy(struct radeon_cs *cs)
++{
++    return cs->csm->funcs->cs_destroy(cs);
++}
++
++static inline int radeon_cs_erase(struct radeon_cs *cs)
++{
++    return cs->csm->funcs->cs_erase(cs);
++}
++
++static inline int radeon_cs_need_flush(struct radeon_cs *cs)
++{
++    return cs->csm->funcs->cs_need_flush(cs);
++}
++
++static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
++{
++    cs->csm->funcs->cs_print(cs, file);
++}
++
++static inline int radeon_cs_space_check(struct radeon_cs *cs,
++					    struct radeon_cs_space_check *bos,
++					    int num_bo)
++{
++    return cs->csm->funcs->cs_space_check(cs, bos, num_bo);
++}
++
++static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
++{
++    
++    if (domain == RADEON_GEM_DOMAIN_VRAM)
++	cs->csm->vram_limit = limit;
++    else
++	cs->csm->gart_limit = limit;
++}
++
++static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
++{
++    cs->packets[cs->cdw++] = dword;
++    if (cs->section) {
++        cs->section_cdw++;
++    }
++}
++
++static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
++{
++
++    memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
++    cs->cdw+=2;
++    if (cs->section) {
++        cs->section_cdw+=2;
++    }
++}
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+new file mode 100644
+index 0000000..b47b095
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+@@ -0,0 +1,504 @@
++/* 
++ * Copyright © 2008 Nicolai Haehnle
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Aapo Tahkola <aet@rasterburn.org>
++ *      Nicolai Haehnle <prefect_@gmx.net>
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#include <errno.h>
++
++#include "radeon_bocs_wrapper.h"
++
++struct cs_manager_legacy {
++    struct radeon_cs_manager    base;
++    struct radeon_context       *ctx;
++    /* hack for scratch stuff */
++    uint32_t                    pending_age;
++    uint32_t                    pending_count;
++
++
++};
++
++struct cs_reloc_legacy {
++    struct radeon_cs_reloc  base;
++    uint32_t                cindices;
++    uint32_t                *indices;
++};
++
++
++static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
++                                   uint32_t ndw)
++{
++    struct radeon_cs *cs;
++
++    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
++    if (cs == NULL) {
++        return NULL;
++    }
++    cs->csm = csm;
++    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
++    cs->packets = (uint32_t*)malloc(4*cs->ndw);
++    if (cs->packets == NULL) {
++        free(cs);
++        return NULL;
++    }
++    cs->relocs_total_size = 0;
++    return cs;
++}
++
++static int cs_write_reloc(struct radeon_cs *cs,
++                          struct radeon_bo *bo,
++                          uint32_t read_domain,
++                          uint32_t write_domain,
++                          uint32_t flags)
++{
++    struct cs_reloc_legacy *relocs;
++    int i;
++
++    relocs = (struct cs_reloc_legacy *)cs->relocs;
++    /* check domains */
++    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
++        /* in one CS a bo can only be in read or write domain but not
++         * in read & write domain at the same sime
++         */
++        return -EINVAL;
++    }
++    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
++        return -EINVAL;
++    }
++    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
++        return -EINVAL;
++    }
++    /* check if bo is already referenced */
++    for(i = 0; i < cs->crelocs; i++) {
++        uint32_t *indices;
++
++        if (relocs[i].base.bo->handle == bo->handle) {
++            /* Check domains must be in read or write. As we check already
++             * checked that in argument one of the read or write domain was
++             * set we only need to check that if previous reloc as the read
++             * domain set then the read_domain should also be set for this
++             * new relocation.
++             */
++            if (relocs[i].base.read_domain && !read_domain) {
++                return -EINVAL;
++            }
++            if (relocs[i].base.write_domain && !write_domain) {
++                return -EINVAL;
++            }
++            relocs[i].base.read_domain |= read_domain;
++            relocs[i].base.write_domain |= write_domain;
++            /* save indice */
++            relocs[i].cindices++;
++            indices = (uint32_t*)realloc(relocs[i].indices,
++                                         relocs[i].cindices * 4);
++            if (indices == NULL) {
++                relocs[i].cindices -= 1;
++                return -ENOMEM;
++            }
++            relocs[i].indices = indices;
++            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
++            return 0;
++        }
++    }
++    /* add bo to reloc */
++    relocs = (struct cs_reloc_legacy*)
++             realloc(cs->relocs,
++                     sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
++    if (relocs == NULL) {
++        return -ENOMEM;
++    }
++    cs->relocs = relocs;
++    relocs[cs->crelocs].base.bo = bo;
++    relocs[cs->crelocs].base.read_domain = read_domain;
++    relocs[cs->crelocs].base.write_domain = write_domain;
++    relocs[cs->crelocs].base.flags = flags;
++    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
++    if (relocs[cs->crelocs].indices == NULL) {
++        return -ENOMEM;
++    }
++    relocs[cs->crelocs].indices[0] = cs->cdw - 1;
++    relocs[cs->crelocs].cindices = 1;
++    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
++    cs->crelocs++;
++    radeon_bo_ref(bo);
++    return 0;
++}
++
++static int cs_begin(struct radeon_cs *cs,
++                    uint32_t ndw,
++                    const char *file,
++                    const char *func,
++                    int line)
++{
++    if (cs->section) {
++        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
++                cs->section_file, cs->section_func, cs->section_line);
++        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
++                file, func, line);
++        return -EPIPE;
++    }
++    cs->section = 1;
++    cs->section_ndw = ndw;
++    cs->section_cdw = 0;
++    cs->section_file = file;
++    cs->section_func = func;
++    cs->section_line = line;
++
++
++    if (cs->cdw + ndw > cs->ndw) {
++        uint32_t tmp, *ptr;
++	int num = (ndw > 0x3FF) ? ndw : 0x3FF;
++
++        tmp = (cs->cdw + 1 + num) & (~num);
++        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
++        if (ptr == NULL) {
++            return -ENOMEM;
++        }
++        cs->packets = ptr;
++        cs->ndw = tmp;
++    }
++
++    return 0;
++}
++
++static int cs_end(struct radeon_cs *cs,
++                  const char *file,
++                  const char *func,
++                  int line)
++
++{
++    if (!cs->section) {
++        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
++                file, func, line);
++        return -EPIPE;
++    }
++    cs->section = 0;
++    if (cs->section_ndw != cs->section_cdw) {
++        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
++                cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
++        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
++                file, func, line);
++        return -EPIPE;
++    }
++    return 0;
++}
++
++static int cs_process_relocs(struct radeon_cs *cs)
++{
++    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
++    struct cs_reloc_legacy *relocs;
++    int i, j, r;
++
++    csm = (struct cs_manager_legacy*)cs->csm;
++    relocs = (struct cs_reloc_legacy *)cs->relocs;
++ restart:
++    for (i = 0; i < cs->crelocs; i++) {
++        for (j = 0; j < relocs[i].cindices; j++) {
++            uint32_t soffset, eoffset;
++
++            r = radeon_bo_legacy_validate(relocs[i].base.bo,
++                                           &soffset, &eoffset);
++	    if (r == -EAGAIN)
++	      goto restart;
++            if (r) {
++                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
++                        relocs[i].base.bo, soffset, eoffset);
++                return r;
++            }
++            cs->packets[relocs[i].indices[j]] += soffset;
++            if (cs->packets[relocs[i].indices[j]] >= eoffset) {
++	      /*                radeon_bo_debug(relocs[i].base.bo, 12); */
++                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
++                        relocs[i].base.bo, soffset, eoffset);
++                fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
++                        relocs[i].base.bo,
++                        cs->packets[relocs[i].indices[j]],
++                        eoffset);
++                exit(0);
++                return -EINVAL;
++            }
++        }
++    }
++    return 0;
++}
++
++static int cs_set_age(struct radeon_cs *cs)
++{
++    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
++    struct cs_reloc_legacy *relocs;
++    int i;
++
++    relocs = (struct cs_reloc_legacy *)cs->relocs;
++    for (i = 0; i < cs->crelocs; i++) {
++        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
++        radeon_bo_unref(relocs[i].base.bo);
++    }
++    return 0;
++}
++
++static void dump_cmdbuf(struct radeon_cs *cs)
++{
++  int i;
++  for (i = 0; i < cs->cdw; i++){
++    fprintf(stderr,"%x: %08x\n", i, cs->packets[i]);
++  }
++
++}
++static int cs_emit(struct radeon_cs *cs)
++{
++    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
++    drm_radeon_cmd_buffer_t cmd;
++    drm_r300_cmd_header_t age;
++    uint64_t ull;
++    int r;
++
++    csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
++
++    /* append buffer age */
++    if (IS_R300_CLASS(csm->ctx->radeonScreen)) {
++      age.scratch.cmd_type = R300_CMD_SCRATCH;
++      /* Scratch register 2 corresponds to what radeonGetAge polls */
++      csm->pending_age = 0;
++      csm->pending_count = 1;
++      ull = (uint64_t) (intptr_t) &csm->pending_age;
++      age.scratch.reg = 2;
++      age.scratch.n_bufs = 1;
++      age.scratch.flags = 0;
++      radeon_cs_write_dword(cs, age.u);
++      radeon_cs_write_qword(cs, ull);
++      radeon_cs_write_dword(cs, 0);
++    }
++
++    r = cs_process_relocs(cs);
++    if (r) {
++        return 0;
++    }
++
++    cmd.buf = (char *)cs->packets;
++    cmd.bufsz = cs->cdw * 4;
++    if (csm->ctx->state.scissor.enabled) {
++        cmd.nbox = csm->ctx->state.scissor.numClipRects;
++        cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
++    } else {
++        cmd.nbox = csm->ctx->numClipRects;
++        cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
++    }
++
++    //dump_cmdbuf(cs);
++
++    r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
++    if (r) {
++        return r;
++    }
++    if (!IS_R300_CLASS(csm->ctx->radeonScreen)) {
++	drm_radeon_irq_emit_t emit_cmd;
++	emit_cmd.irq_seq = &csm->pending_age;
++	r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd));
++	if (r) {
++		return r;
++	}
++    }
++    cs_set_age(cs);
++
++    cs->csm->read_used = 0;
++    cs->csm->vram_write_used = 0;
++    cs->csm->gart_write_used = 0;
++    return 0;
++}
++
++static void inline cs_free_reloc(void *relocs_p, int crelocs)
++{
++    struct cs_reloc_legacy *relocs = relocs_p;
++    int i;
++    if (!relocs_p)
++      return;
++    for (i = 0; i < crelocs; i++)
++      free(relocs[i].indices);
++}
++
++static int cs_destroy(struct radeon_cs *cs)
++{
++    cs_free_reloc(cs->relocs, cs->crelocs);
++    free(cs->relocs);
++    free(cs->packets);
++    free(cs);
++    return 0;
++}
++
++static int cs_erase(struct radeon_cs *cs)
++{
++    cs_free_reloc(cs->relocs, cs->crelocs);
++    free(cs->relocs);
++    cs->relocs_total_size = 0;
++    cs->relocs = NULL;
++    cs->crelocs = 0;
++    cs->cdw = 0;
++    cs->section = 0;
++    return 0;
++}
++
++static int cs_need_flush(struct radeon_cs *cs)
++{
++    /* this function used to flush when the BO usage got to
++     * a certain size, now the higher levels handle this better */
++    return 0;
++}
++
++static void cs_print(struct radeon_cs *cs, FILE *file)
++{
++}
++
++static int cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo)
++{
++    struct radeon_cs_manager *csm = cs->csm;
++    int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0;
++    uint32_t read_domains, write_domain;
++    int i;
++    struct radeon_bo *bo;
++
++    /* check the totals for this operation */
++
++    if (num_bo == 0)
++        return 0;
++
++    /* prepare */
++    for (i = 0; i < num_bo; i++) {
++      bo = bos[i].bo;
++
++      bos[i].new_accounted = 0;
++      read_domains = bos[i].read_domains;
++      write_domain = bos[i].write_domain;
++		
++      /* pinned bos don't count */
++      if (radeon_legacy_bo_is_static(bo))
++	  continue;
++ 
++      /* already accounted this bo */
++      if (write_domain && (write_domain == bo->space_accounted))
++	  continue;
++
++      if (read_domains && ((read_domains << 16) == bo->space_accounted))
++	  continue;
++      
++      if (bo->space_accounted == 0) {
++	  if (write_domain == RADEON_GEM_DOMAIN_VRAM)
++	      this_op_vram_write += bo->size;
++	  else if (write_domain == RADEON_GEM_DOMAIN_GTT)
++	      this_op_gart_write += bo->size;
++	  else
++	      this_op_read += bo->size;
++	  bos[i].new_accounted = (read_domains << 16) | write_domain;
++      } else {
++	  uint16_t old_read, old_write;
++	  
++	  old_read = bo->space_accounted >> 16;
++	  old_write = bo->space_accounted & 0xffff;
++
++	  if (write_domain && (old_read & write_domain)) {
++	      bos[i].new_accounted = write_domain;
++	      /* moving from read to a write domain */
++	      if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
++		  this_op_read -= bo->size;
++		  this_op_vram_write += bo->size;
++	      } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
++		  this_op_read -= bo->size;
++		  this_op_gart_write += bo->size;
++	      }
++	  } else if (read_domains & old_write) {
++	      bos[i].new_accounted = bo->space_accounted & 0xffff;
++	  } else {
++	      /* rewrite the domains */
++	      if (write_domain != old_write)
++		  fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
++	      if (read_domains != old_read)
++		  fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
++	      return RADEON_CS_SPACE_FLUSH;
++	  }
++      }
++	}
++	
++	if (this_op_read < 0)
++		this_op_read = 0;
++
++	/* check sizes - operation first */
++	if ((this_op_read + this_op_gart_write > csm->gart_limit) ||
++	    (this_op_vram_write > csm->vram_limit)) {
++	    return RADEON_CS_SPACE_OP_TO_BIG;
++	}
++
++	if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) ||
++	    ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) {
++		return RADEON_CS_SPACE_FLUSH;
++	}
++
++	csm->gart_write_used += this_op_gart_write;
++	csm->vram_write_used += this_op_vram_write;
++	csm->read_used += this_op_read;
++	/* commit */
++	for (i = 0; i < num_bo; i++) {
++		bo = bos[i].bo;
++		bo->space_accounted = bos[i].new_accounted;
++	}
++
++	return RADEON_CS_SPACE_OK;
++}
++
++static struct radeon_cs_funcs  radeon_cs_legacy_funcs = {
++    cs_create,
++    cs_write_reloc,
++    cs_begin,
++    cs_end,
++    cs_emit,
++    cs_destroy,
++    cs_erase,
++    cs_need_flush,
++    cs_print,
++    cs_check_space
++};
++
++struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
++{
++    struct cs_manager_legacy *csm;
++
++    csm = (struct cs_manager_legacy*)
++          calloc(1, sizeof(struct cs_manager_legacy));
++    if (csm == NULL) {
++        return NULL;
++    }
++    csm->base.funcs = &radeon_cs_legacy_funcs;
++    csm->base.fd = ctx->dri.fd;
++    csm->ctx = ctx;
++    csm->pending_age = 1;
++    return (struct radeon_cs_manager*)csm;
++}
++
++void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
++{
++    free(csm);
++}
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
+new file mode 100644
+index 0000000..e177b4b
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
+@@ -0,0 +1,40 @@
++/* 
++ * Copyright © 2008 Nicolai Haehnle
++ * Copyright © 2008 Jérôme Glisse
++ * All Rights Reserved.
++ * 
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ * 
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
++ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
++ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
++ * USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ */
++/*
++ * Authors:
++ *      Aapo Tahkola <aet@rasterburn.org>
++ *      Nicolai Haehnle <prefect_@gmx.net>
++ *      Jérôme Glisse <glisse@freedesktop.org>
++ */
++#ifndef RADEON_CS_LEGACY_H
++#define RADEON_CS_LEGACY_H
++
++#include "radeon_common.h"
++
++struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
++void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
+new file mode 100644
+index 0000000..393b121
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
+@@ -0,0 +1,323 @@
++/**************************************************************************
++
++Copyright (C) 2004 Nicolai Haehnle.
++Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++
++The Weather Channel (TM) funded Tungsten Graphics to develop the
++initial release of the Radeon 8500 driver under the XFree86 license.
++This notice must be preserved.
++
++All Rights Reserved.
++
++Permission is hereby granted, free of charge, to any person obtaining a
++copy of this software and associated documentation files (the "Software"),
++to deal in the Software without restriction, including without limitation
++on the rights to use, copy, modify, merge, publish, distribute, sub
++license, and/or sell copies of the Software, and to permit persons to whom
++the Software is furnished to do so, subject to the following conditions:
++
++The above copyright notice and this permission notice (including the next
++paragraph) shall be included in all copies or substantial portions of the
++Software.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
++OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++USE OR OTHER DEALINGS IN THE SOFTWARE.
++
++**************************************************************************/
++
++#include "radeon_common.h"
++
++#if defined(USE_X86_ASM)
++#define COPY_DWORDS( dst, src, nr )					\
++do {									\
++	int __tmp;							\
++	__asm__ __volatile__( "rep ; movsl"				\
++			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
++			      : "0" (nr),				\
++			        "D" ((long)dst),			\
++			        "S" ((long)src) );			\
++} while (0)
++#else
++#define COPY_DWORDS( dst, src, nr )		\
++do {						\
++   int j;					\
++   for ( j = 0 ; j < nr ; j++ )			\
++      dst[j] = ((int *)src)[j];			\
++   dst += nr;					\
++} while (0)
++#endif
++
++static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
++{
++	int i;
++
++	if (RADEON_DEBUG & DEBUG_VERTS)
++		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
++			__FUNCTION__, count, stride, (void *)out, (void *)data);
++
++	if (stride == 4)
++		COPY_DWORDS(out, data, count);
++	else
++		for (i = 0; i < count; i++) {
++			out[0] = *(int *)data;
++			out++;
++			data += stride;
++		}
++}
++
++void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
++{
++	int i;
++
++	if (RADEON_DEBUG & DEBUG_VERTS)
++		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
++			__FUNCTION__, count, stride, (void *)out, (void *)data);
++
++	if (stride == 8)
++		COPY_DWORDS(out, data, count * 2);
++	else
++		for (i = 0; i < count; i++) {
++			out[0] = *(int *)data;
++			out[1] = *(int *)(data + 4);
++			out += 2;
++			data += stride;
++		}
++}
++
++void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
++{
++	int i;
++
++	if (RADEON_DEBUG & DEBUG_VERTS)
++		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
++			__FUNCTION__, count, stride, (void *)out, (void *)data);
++
++	if (stride == 12) {
++		COPY_DWORDS(out, data, count * 3);
++    }
++	else
++		for (i = 0; i < count; i++) {
++			out[0] = *(int *)data;
++			out[1] = *(int *)(data + 4);
++			out[2] = *(int *)(data + 8);
++			out += 3;
++			data += stride;
++		}
++}
++
++static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
++{
++	int i;
++
++	if (RADEON_DEBUG & DEBUG_VERTS)
++		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
++			__FUNCTION__, count, stride, (void *)out, (void *)data);
++
++	if (stride == 16)
++		COPY_DWORDS(out, data, count * 4);
++	else
++		for (i = 0; i < count; i++) {
++			out[0] = *(int *)data;
++			out[1] = *(int *)(data + 4);
++			out[2] = *(int *)(data + 8);
++			out[3] = *(int *)(data + 12);
++			out += 4;
++			data += stride;
++		}
++}
++
++void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
++			 GLvoid * data, int size, int stride, int count)
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	uint32_t *out;
++
++	if (stride == 0) {
++		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++		count = 1;
++		aos->stride = 0;
++	} else {
++		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
++		aos->stride = size;
++	}
++
++	aos->components = size;
++	aos->count = count;
++
++	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
++	switch (size) {
++	case 1: radeonEmitVec4(out, data, stride, count); break;
++	case 2: radeonEmitVec8(out, data, stride, count); break;
++	case 3: radeonEmitVec12(out, data, stride, count); break;
++	case 4: radeonEmitVec16(out, data, stride, count); break;
++	default:
++		assert(0);
++		break;
++	}
++}
++
++void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
++{
++	struct radeon_cs_space_check bos[1];
++	int flushed = 0, ret;
++
++	size = MAX2(size, MAX_DMA_BUF_SZ * 16);
++
++	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
++		fprintf(stderr, "%s\n", __FUNCTION__);
++
++	if (rmesa->dma.flush) {
++		rmesa->dma.flush(rmesa->glCtx);
++	}
++
++	if (rmesa->dma.nr_released_bufs > 4) {
++		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
++		rmesa->dma.nr_released_bufs = 0;
++	}
++
++	if (rmesa->dma.current) {
++		radeon_bo_unmap(rmesa->dma.current);
++		radeon_bo_unref(rmesa->dma.current);
++		rmesa->dma.current = 0;
++	}
++
++again_alloc:	
++	rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
++					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
++					    0);
++
++	if (!rmesa->dma.current) {
++		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
++		rmesa->dma.nr_released_bufs = 0;
++		goto again_alloc;
++	}
++
++	rmesa->dma.current_used = 0;
++	rmesa->dma.current_vertexptr = 0;
++	
++	bos[0].bo = rmesa->dma.current;
++	bos[0].read_domains = RADEON_GEM_DOMAIN_GTT;
++	bos[0].write_domain =0 ;
++	bos[0].new_accounted = 0;
++
++	ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1);
++	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
++		fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen");
++		assert(0);
++	} else if (ret == RADEON_CS_SPACE_FLUSH) {
++		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
++		if (flushed) {
++			fprintf(stderr,"flushed but still no space\n");
++			assert(0);
++		}
++		flushed = 1;
++		goto again_alloc;
++	}
++	radeon_bo_map(rmesa->dma.current, 1);
++}
++
++/* Allocates a region from rmesa->dma.current.  If there isn't enough
++ * space in current, grab a new buffer (and discard what was left of current)
++ */
++void radeonAllocDmaRegion(radeonContextPtr rmesa,
++			  struct radeon_bo **pbo, int *poffset,
++			  int bytes, int alignment)
++{
++	if (RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
++
++	if (rmesa->dma.flush)
++		rmesa->dma.flush(rmesa->glCtx);
++
++	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
++
++	alignment--;
++	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
++
++	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
++		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
++
++	*poffset = rmesa->dma.current_used;
++	*pbo = rmesa->dma.current;
++	radeon_bo_ref(*pbo);
++
++	/* Always align to at least 16 bytes */
++	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
++	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
++
++	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
++}
++
++void radeonReleaseDmaRegion(radeonContextPtr rmesa)
++{
++	if (RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
++	if (rmesa->dma.current) {
++		rmesa->dma.nr_released_bufs++;
++		radeon_bo_unmap(rmesa->dma.current);
++	        radeon_bo_unref(rmesa->dma.current);
++	}
++	rmesa->dma.current = NULL;
++}
++
++
++/* Flush vertices in the current dma region.
++ */
++void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	struct radeon_dma *dma = &rmesa->dma;
++		
++
++	if (RADEON_DEBUG & DEBUG_IOCTL)
++		fprintf(stderr, "%s\n", __FUNCTION__);
++	dma->flush = NULL;
++
++	if (dma->current) {
++	    GLuint current_offset = dma->current_used;
++
++	    assert (dma->current_used +
++		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
++		    dma->current_vertexptr);
++
++	    if (dma->current_used != dma->current_vertexptr) {
++		    dma->current_used = dma->current_vertexptr;
++
++		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
++	    }
++	    rmesa->swtcl.numverts = 0;
++	}
++}
++/* Alloc space in the current dma region.
++ */
++void *
++rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
++{
++	GLuint bytes = vsize * nverts;
++	void *head;
++
++	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
++                radeonRefillCurrentDmaRegion(rmesa, bytes);
++	}
++
++        if (!rmesa->dma.flush) {
++                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
++                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
++        }
++
++	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
++        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
++        ASSERT( rmesa->dma.current_used +
++                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
++                rmesa->dma.current_vertexptr );
++
++	head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
++	rmesa->dma.current_vertexptr += bytes;
++	rmesa->swtcl.numverts += nverts;
++	return head;
++}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
+new file mode 100644
+index 0000000..cee3744
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
+@@ -0,0 +1,51 @@
++/**************************************************************************
++
++Copyright (C) 2004 Nicolai Haehnle.
++Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++
++The Weather Channel (TM) funded Tungsten Graphics to develop the
++initial release of the Radeon 8500 driver under the XFree86 license.
++This notice must be preserved.
++
++All Rights Reserved.
++
++Permission is hereby granted, free of charge, to any person obtaining a
++copy of this software and associated documentation files (the "Software"),
++to deal in the Software without restriction, including without limitation
++on the rights to use, copy, modify, merge, publish, distribute, sub
++license, and/or sell copies of the Software, and to permit persons to whom
++the Software is furnished to do so, subject to the following conditions:
++
++The above copyright notice and this permission notice (including the next
++paragraph) shall be included in all copies or substantial portions of the
++Software.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
++ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
++OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
++USE OR OTHER DEALINGS IN THE SOFTWARE.
++
++**************************************************************************/
++
++#ifndef RADEON_DMA_H
++#define RADEON_DMA_H
++
++void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count);
++void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count);
++
++void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
++			 GLvoid * data, int size, int stride, int count);
++
++void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
++void radeonAllocDmaRegion(radeonContextPtr rmesa,
++			  struct radeon_bo **pbo, int *poffset,
++			  int bytes, int alignment);
++void radeonReleaseDmaRegion(radeonContextPtr rmesa);
++
++void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
++
++void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+index 09acf6b..fb3a236 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "swrast/swrast.h"
+ 
+ #include "radeon_context.h"
++#include "radeon_common.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_tcl.h"
+@@ -58,75 +59,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define RADEON_IDLE_RETRY           16
+ 
+ 
+-static void radeonWaitForIdle( radeonContextPtr rmesa );
+-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+-				    const char * caller );
+-
+-static void print_state_atom( struct radeon_state_atom *state )
+-{
+-   int i;
+-
+-   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
+-
+-   if (RADEON_DEBUG & DEBUG_VERBOSE) 
+-      for (i = 0 ; i < state->cmd_size ; i++) 
+-	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+-
+-}
+-
+-static void radeonSaveHwState( radeonContextPtr rmesa )
+-{
+-   struct radeon_state_atom *atom;
+-   char * dest = rmesa->backup_store.cmd_buf;
+-
+-   if (RADEON_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-   
+-   rmesa->backup_store.cmd_used = 0;
+-
+-   foreach( atom, &rmesa->hw.atomlist ) {
+-      if ( atom->check( rmesa->glCtx ) ) {
+-	 int size = atom->cmd_size * 4;
+-	 memcpy( dest, atom->cmd, size);
+-	 dest += size;
+-	 rmesa->backup_store.cmd_used += size;
+-	 if (RADEON_DEBUG & DEBUG_STATE)
+-	    print_state_atom( atom );
+-      }
+-   }
+-
+-   assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ );
+-   if (RADEON_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "Returning to radeonEmitState\n");
+-}
+-
+-/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+- * we need to unwire our current cmdbuf, hook the one with the saved state in
+- * it, flush it, and then put the current one back.  This is so commands at the
+- * start of a cmdbuf can rely on the state being kept from the previous one.
+- */
+-static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+-{
+-   GLuint nr_released_bufs;
+-   struct radeon_store saved_store;
+-
+-   if (rmesa->backup_store.cmd_used == 0)
+-      return;
+-
+-   if (RADEON_DEBUG & DEBUG_STATE)
+-      fprintf(stderr, "Emitting backup state on lost context\n");
+-
+-   rmesa->lost_context = GL_FALSE;
+-
+-   nr_released_bufs = rmesa->dma.nr_released_bufs;
+-   saved_store = rmesa->store;
+-   rmesa->dma.nr_released_bufs = 0;
+-   rmesa->store = rmesa->backup_store;
+-   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+-   rmesa->dma.nr_released_bufs = nr_released_bufs;
+-   rmesa->store = saved_store;
+-}
+-
+ /* =============================================================
+  * Kernel command buffer handling
+  */
+@@ -134,893 +66,308 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa )
+ /* The state atoms will be emitted in the order they appear in the atom list,
+  * so this step is important.
+  */
+-void radeonSetUpAtomList( radeonContextPtr rmesa )
++void radeonSetUpAtomList( r100ContextPtr rmesa )
+ {
+-   int i, mtu = rmesa->glCtx->Const.MaxTextureUnits;
+-
+-   make_empty_list(&rmesa->hw.atomlist);
+-   rmesa->hw.atomlist.name = "atom-list";
+-
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc);
++   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
++
++   make_empty_list(&rmesa->radeon.hw.atomlist);
++   rmesa->radeon.hw.atomlist.name = "atom-list";
++
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
+    for (i = 0; i < mtu; ++i) {
+-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
+-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
+-       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
++       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
++       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
++       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
+    }
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
+    for (i = 0; i < 3 + mtu; ++i)
+-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]);
++      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
+    for (i = 0; i < 8; ++i)
+-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]);
++      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
+    for (i = 0; i < 6; ++i)
+-      insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog);
+-   insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt);
+-}
+-
+-void radeonEmitState( radeonContextPtr rmesa )
+-{
+-   struct radeon_state_atom *atom;
+-   char *dest;
+-
+-   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   if (rmesa->save_on_next_emit) {
+-      radeonSaveHwState(rmesa);
+-      rmesa->save_on_next_emit = GL_FALSE;
+-   }
+-
+-   /* this code used to return here but now it emits zbs */
+-
+-   /* To avoid going across the entire set of states multiple times, just check
+-    * for enough space for the case of emitting all state, and inline the
+-    * radeonAllocCmdBuf code here without all the checks.
+-    */
+-   radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
+-   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+-
+-   /* We always always emit zbs, this is due to a bug found by keithw in
+-      the hardware and rediscovered after Erics changes by me.
+-      if you ever touch this code make sure you emit zbs otherwise
+-      you get tcl lockups on at least M7/7500 class of chips - airlied */
+-   rmesa->hw.zbs.dirty=1;
+-
+-   if (RADEON_DEBUG & DEBUG_STATE) {
+-      foreach(atom, &rmesa->hw.atomlist) {
+-	 if (atom->dirty || rmesa->hw.all_dirty) {
+-	    if (atom->check(rmesa->glCtx))
+-	       print_state_atom(atom);
+-	    else
+-	       fprintf(stderr, "skip state %s\n", atom->name);
+-	 }
+-      }
+-   }
+-
+-   foreach(atom, &rmesa->hw.atomlist) {
+-      if (rmesa->hw.all_dirty)
+-	 atom->dirty = GL_TRUE;
+-      if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) &&
+-	   atom->is_tcl)
+-	 atom->dirty = GL_FALSE;
+-      if (atom->dirty) {
+-	 if (atom->check(rmesa->glCtx)) {
+-	    int size = atom->cmd_size * 4;
+-	    memcpy(dest, atom->cmd, size);
+-	    dest += size;
+-	    rmesa->store.cmd_used += size;
+-	    atom->dirty = GL_FALSE;
+-	 }
+-      }
+-   }
+-
+-   assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ);
+- 
+-   rmesa->hw.is_dirty = GL_FALSE;
+-   rmesa->hw.all_dirty = GL_FALSE;
++      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
++   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
+ }
+ 
+ /* Fire a section of the retained (indexed_verts) buffer as a regular
+  * primtive.  
+  */
+-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
++extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+ 				GLuint vertex_format,
+ 				GLuint primitive,
+ 				GLuint vertex_nr )
+ {
+-   drm_radeon_cmd_header_t *cmd;
+-
++   BATCH_LOCALS(&rmesa->radeon);
+ 
+    assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+    
+-   radeonEmitState( rmesa );
++   radeonEmitState(&rmesa->radeon);
+ 
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
+-	      rmesa->store.cmd_used/4);
+-   
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
+-						       __FUNCTION__ );
+ #if RADEON_OLD_PACKETS
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
+-   cmd[2].i = rmesa->ioctl.vertex_offset;
+-   cmd[3].i = vertex_nr;
+-   cmd[4].i = vertex_format;
+-   cmd[5].i = (primitive | 
+-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+-
+-   if (RADEON_DEBUG & DEBUG_PRIMS)
+-      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
+-	      __FUNCTION__,
+-	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
+-#else
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
+-   cmd[2].i = vertex_format;
+-   cmd[3].i = (primitive | 
+-	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+-	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+-
+-
+-   if (RADEON_DEBUG & DEBUG_PRIMS)
+-      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
+-	      __FUNCTION__,
+-	      cmd[1].i, cmd[2].i, cmd[3].i);
++   BEGIN_BATCH(8);
++   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
++   OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++   OUT_BATCH(vertex_nr);
++   OUT_BATCH(vertex_format);
++   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
++	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
++	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
++   END_BATCH();
++   
++#else   
++   BEGIN_BATCH(4);
++   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
++   OUT_BATCH(vertex_format);
++   OUT_BATCH(primitive |
++	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
++	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
++	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
++	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
++   END_BATCH();
+ #endif
+ }
+ 
+-
+-void radeonFlushElts( radeonContextPtr rmesa )
++void radeonFlushElts( GLcontext *ctx )
+ {
+-   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
+-   int dwords;
+-#if RADEON_OLD_PACKETS
+-   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
+-#else
+-   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
+-#endif
+-
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&rmesa->radeon);
++   int nr;
++   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
++   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
++   
+    if (RADEON_DEBUG & DEBUG_IOCTL)
+       fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-   assert( rmesa->dma.flush == radeonFlushElts );
+-   rmesa->dma.flush = NULL;
++   assert( rmesa->radeon.dma.flush == radeonFlushElts );
++   rmesa->radeon.dma.flush = NULL;
+ 
+-   /* Cope with odd number of elts:
+-    */
+-   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
+-   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
++   nr = rmesa->tcl.elt_used;
++
++   rmesa->radeon.cmdbuf.cs->cdw += dwords;
+ 
+ #if RADEON_OLD_PACKETS
+-   cmd[1] |= (dwords - 3) << 16;
++   cmd[1] |= (dwords + 3) << 16;
+    cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+ #else
+-   cmd[1] |= (dwords - 3) << 16;
++   cmd[1] |= (dwords + 2) << 16;
+    cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+ #endif
+ 
++   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
++   END_BATCH();
++
+    if (RADEON_DEBUG & DEBUG_SYNC) {
+       fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+-      radeonFinish( rmesa->glCtx );
++      radeonFinish( rmesa->radeon.glCtx );
+    }
+-}
+ 
++}
+ 
+-GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
++GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+ 				    GLuint vertex_format,
+ 				    GLuint primitive,
+ 				    GLuint min_nr )
+ {
+-   drm_radeon_cmd_header_t *cmd;
+    GLushort *retval;
++   int align_min_nr;
++   BATCH_LOCALS(&rmesa->radeon);
+ 
+    if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
++      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+ 
+    assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+    
+-   radeonEmitState( rmesa );
++   radeonEmitState(&rmesa->radeon);
+    
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
+-						       ELTS_BUFSZ(min_nr),
+-						       __FUNCTION__ );
++   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
++
++   /* round up min_nr to align the state */
++   align_min_nr = (min_nr + 1) & ~1;
++
+ #if RADEON_OLD_PACKETS
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
+-   cmd[2].i = rmesa->ioctl.vertex_offset;
+-   cmd[3].i = 0xffff;
+-   cmd[4].i = vertex_format;
+-   cmd[5].i = (primitive | 
+-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+-
+-   retval = (GLushort *)(cmd+6);
+-#else   
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
+-   cmd[2].i = vertex_format;
+-   cmd[3].i = (primitive | 
+-	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+-	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+-	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
+-	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+-
+-   retval = (GLushort *)(cmd+4);
++   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
++   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
++   OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++   OUT_BATCH(0xffff);
++   OUT_BATCH(vertex_format);
++   OUT_BATCH(primitive | 
++	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
++	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
++
++#else
++   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
++   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
++   OUT_BATCH(vertex_format);
++   OUT_BATCH(primitive | 
++	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
++	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
++	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
++	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+ #endif
+ 
+-   if (RADEON_DEBUG & DEBUG_PRIMS)
+-      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
+-	      __FUNCTION__,
+-	      cmd[1].i, vertex_format, primitive);
+ 
+-   assert(!rmesa->dma.flush);
+-   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+-   rmesa->dma.flush = radeonFlushElts;
++   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
++   rmesa->tcl.elt_used = min_nr;
+ 
+-   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
++   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
++   
++   if (RADEON_DEBUG & DEBUG_PRIMS)
++      fprintf(stderr, "%s: header prim %x \n",
++	      __FUNCTION__, primitive);
++
++   assert(!rmesa->radeon.dma.flush);
++   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
++   rmesa->radeon.dma.flush = radeonFlushElts;
+ 
+    return retval;
+ }
+ 
+-
+-
+-void radeonEmitVertexAOS( radeonContextPtr rmesa,
++void radeonEmitVertexAOS( r100ContextPtr rmesa,
+ 			  GLuint vertex_size,
++			  struct radeon_bo *bo,
+ 			  GLuint offset )
+ {
+ #if RADEON_OLD_PACKETS
+-   rmesa->ioctl.vertex_size = vertex_size;
+    rmesa->ioctl.vertex_offset = offset;
++   rmesa->ioctl.bo = bo;
+ #else
+-   drm_radeon_cmd_header_t *cmd;
++   BATCH_LOCALS(&rmesa->radeon);
+ 
+    if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
+       fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+ 	      __FUNCTION__, vertex_size, offset);
+ 
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
+-						  __FUNCTION__ );
++   BEGIN_BATCH(7);
++   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
++   OUT_BATCH(1);
++   OUT_BATCH(vertex_size | (vertex_size << 8));
++   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
++   END_BATCH();
+ 
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
+-   cmd[2].i = 1;
+-   cmd[3].i = vertex_size | (vertex_size << 8);
+-   cmd[4].i = offset;
+ #endif
+ }
+ 		       
+ 
+-void radeonEmitAOS( radeonContextPtr rmesa,
+-		    struct radeon_dma_region **component,
++void radeonEmitAOS( r100ContextPtr rmesa,
+ 		    GLuint nr,
+ 		    GLuint offset )
+ {
+ #if RADEON_OLD_PACKETS
+    assert( nr == 1 );
+-   assert( component[0]->aos_size == component[0]->aos_stride );
+-   rmesa->ioctl.vertex_size = component[0]->aos_size;
++   rmesa->ioctl.bo = rmesa->tcl.aos[0].bo;
+    rmesa->ioctl.vertex_offset = 
+-      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
++     (rmesa->tcl.aos[0].offset + offset * rmesa->tcl.aos[0].stride * 4);
+ #else
+-   drm_radeon_cmd_header_t *cmd;
+-   int sz = AOS_BUFSZ(nr);
++   BATCH_LOCALS(&rmesa->radeon);
++   uint32_t voffset;
++   //   int sz = AOS_BUFSZ(nr);
++   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+    int i;
+-   int *tmp;
+ 
+    if (RADEON_DEBUG & DEBUG_IOCTL)
+       fprintf(stderr, "%s\n", __FUNCTION__);
+ 
+-
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
+-						  __FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
+-   cmd[2].i = nr;
+-   tmp = &cmd[0].i;
+-   cmd += 3;
+-
+-   for (i = 0 ; i < nr ; i++) {
+-      if (i & 1) {
+-	 cmd[0].i |= ((component[i]->aos_stride << 24) | 
+-		      (component[i]->aos_size << 16));
+-	 cmd[2].i = (component[i]->aos_start + 
+-		     offset * component[i]->aos_stride * 4);
+-	 cmd += 3;
+-      }
+-      else {
+-	 cmd[0].i = ((component[i]->aos_stride << 8) | 
+-		     (component[i]->aos_size << 0));
+-	 cmd[1].i = (component[i]->aos_start + 
+-		     offset * component[i]->aos_stride * 4);
+-      }
+-   }
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS) {
+-      fprintf(stderr, "%s:\n", __FUNCTION__);
+-      for (i = 0 ; i < sz ; i++)
+-	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
+-   }
+-#endif
+-}
+-
+-/* using already shifted color_fmt! */
+-void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
+-		   GLuint color_fmt,
+-		   GLuint src_pitch,
+-		   GLuint src_offset,
+-		   GLuint dst_pitch,
+-		   GLuint dst_offset,
+-		   GLint srcx, GLint srcy,
+-		   GLint dstx, GLint dsty,
+-		   GLuint w, GLuint h )
+-{
+-   drm_radeon_cmd_header_t *cmd;
+-
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
+-	      __FUNCTION__, 
+-	      src_pitch, src_offset, srcx, srcy,
+-	      dst_pitch, dst_offset, dstx, dsty,
+-	      w, h);
+-
+-   assert( (src_pitch & 63) == 0 );
+-   assert( (dst_pitch & 63) == 0 );
+-   assert( (src_offset & 1023) == 0 ); 
+-   assert( (dst_offset & 1023) == 0 ); 
+-   assert( w < (1<<16) );
+-   assert( h < (1<<16) );
+-
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
+-						  __FUNCTION__ );
+-
+-
+-   cmd[0].i = 0;
+-   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
+-   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
+-   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+-	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+-	       RADEON_GMC_BRUSH_NONE |
+-	       color_fmt |
+-	       RADEON_GMC_SRC_DATATYPE_COLOR |
+-	       RADEON_ROP3_S |
+-	       RADEON_DP_SRC_SOURCE_MEMORY |
+-	       RADEON_GMC_CLR_CMP_CNTL_DIS |
+-	       RADEON_GMC_WR_MSK_DIS );
+-
+-   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
+-   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
+-   cmd[5].i = (srcx << 16) | srcy;
+-   cmd[6].i = (dstx << 16) | dsty; /* dst */
+-   cmd[7].i = (w << 16) | h;
+-}
+-
+-
+-void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
+-{
+-   drm_radeon_cmd_header_t *cmd;
+-
+-   assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
+-
+-   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
+-					   __FUNCTION__ );
+-   cmd[0].i = 0;
+-   cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
+-   cmd[0].wait.flags = flags;
+-}
+-
+-
+-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+-				    const char * caller )
+-{
+-   int ret, i;
+-   drm_radeon_cmd_buffer_t cmd;
+-
+-   if (rmesa->lost_context)
+-      radeonBackUpAndEmitLostStateLocked(rmesa);
+-
+-   if (RADEON_DEBUG & DEBUG_IOCTL) {
+-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+-
+-      if (RADEON_DEBUG & DEBUG_VERBOSE) 
+-	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
+-	    fprintf(stderr, "%d: %x\n", i/4, 
+-		    *(int *)(&rmesa->store.cmd_buf[i]));
+-   }
+-
+-   if (RADEON_DEBUG & DEBUG_DMA)
+-      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
+-	      rmesa->dma.nr_released_bufs);
+-
+-
+-   if (RADEON_DEBUG & DEBUG_SANITY) {
+-      if (rmesa->state.scissor.enabled) 
+-	 ret = radeonSanityCmdBuffer( rmesa, 
+-				      rmesa->state.scissor.numClipRects,
+-				      rmesa->state.scissor.pClipRects);
+-      else
+-	 ret = radeonSanityCmdBuffer( rmesa, 
+-				      rmesa->numClipRects,
+-				      rmesa->pClipRects);
+-      if (ret) {
+-	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);	 
+-	 goto out;
++   BEGIN_BATCH(sz+2+(nr * 2));
++   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
++   OUT_BATCH(nr);
++
++   if (!rmesa->radeon.radeonScreen->kernel_mm) {
++      for (i = 0; i + 1 < nr; i += 2) {
++	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++		   (rmesa->tcl.aos[i].stride << 8) |
++		   (rmesa->tcl.aos[i + 1].components << 16) |
++		   (rmesa->tcl.aos[i + 1].stride << 24));
++			
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[i].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[i+1].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
+       }
+-   }
+-
+-
+-   cmd.bufsz = rmesa->store.cmd_used;
+-   cmd.buf = rmesa->store.cmd_buf;
+-
+-   if (rmesa->state.scissor.enabled) {
+-      cmd.nbox = rmesa->state.scissor.numClipRects;
+-      cmd.boxes = rmesa->state.scissor.pClipRects;
+-   } else {
+-      cmd.nbox = rmesa->numClipRects;
+-      cmd.boxes = rmesa->pClipRects;
+-   }
+-
+-   ret = drmCommandWrite( rmesa->dri.fd,
+-			  DRM_RADEON_CMDBUF,
+-			  &cmd, sizeof(cmd) );
+-
+-   if (ret)
+-      fprintf(stderr, "drmCommandWrite: %d\n", ret);
+-
+-   if (RADEON_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__);
+-      radeonWaitForIdleLocked( rmesa );
+-   }
+-
+- out:
+-   rmesa->store.primnr = 0;
+-   rmesa->store.statenr = 0;
+-   rmesa->store.cmd_used = 0;
+-   rmesa->dma.nr_released_bufs = 0;
+-   rmesa->save_on_next_emit = 1;
+-
+-   return ret;
+-}
+-
+-
+-/* Note: does not emit any commands to avoid recursion on
+- * radeonAllocCmdBuf.
+- */
+-void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
+-{
+-   int ret;
+-
+-	      
+-   LOCK_HARDWARE( rmesa );
+-
+-   ret = radeonFlushCmdBufLocked( rmesa, caller );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if (ret) {
+-      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
+-      exit(ret);
+-   }
+-}
+-
+-/* =============================================================
+- * Hardware vertex buffer handling
+- */
+-
+-
+-void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
+-{
+-   struct radeon_dma_buffer *dmabuf;
+-   int fd = rmesa->dri.fd;
+-   int index = 0;
+-   int size = 0;
+-   drmDMAReq dma;
+-   int ret;
+-
+-   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+-      fprintf(stderr, "%s\n", __FUNCTION__);  
+-
+-   if (rmesa->dma.flush) {
+-      rmesa->dma.flush( rmesa );
+-   }
+-
+-   if (rmesa->dma.current.buf)
+-      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
+-
+-   if (rmesa->dma.nr_released_bufs > 4)
+-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+-
+-   dma.context = rmesa->dri.hwContext;
+-   dma.send_count = 0;
+-   dma.send_list = NULL;
+-   dma.send_sizes = NULL;
+-   dma.flags = 0;
+-   dma.request_count = 1;
+-   dma.request_size = RADEON_BUFFER_SIZE;
+-   dma.request_list = &index;
+-   dma.request_sizes = &size;
+-   dma.granted_count = 0;
+-
+-   LOCK_HARDWARE(rmesa);	/* no need to validate */
+-
+-   ret = drmDMA( fd, &dma );
+       
+-   if (ret != 0) {
+-      /* Free some up this way?
+-       */
+-      if (rmesa->dma.nr_released_bufs) {
+-	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
++      if (nr & 1) {
++	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++		   (rmesa->tcl.aos[nr - 1].stride << 8));
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 OUT_BATCH_RELOC(voffset,
++			 rmesa->tcl.aos[nr - 1].bo,
++			 voffset,
++			 RADEON_GEM_DOMAIN_GTT,
++			 0, 0);
+       }
+-      
+-      if (RADEON_DEBUG & DEBUG_DMA)
+-	 fprintf(stderr, "Waiting for buffers\n");
+-
+-      radeonWaitForIdleLocked( rmesa );
+-      ret = drmDMA( fd, &dma );
+-
+-      if ( ret != 0 ) {
+-	 UNLOCK_HARDWARE( rmesa );
+-	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
+-	 exit( -1 );
++   } else {
++      for (i = 0; i + 1 < nr; i += 2) {
++	 OUT_BATCH((rmesa->tcl.aos[i].components << 0) |
++		   (rmesa->tcl.aos[i].stride << 8) |
++		   (rmesa->tcl.aos[i + 1].components << 16) |
++		   (rmesa->tcl.aos[i + 1].stride << 24));
++	 
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 OUT_BATCH(voffset);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 OUT_BATCH(voffset);
+       }
+-   }
+-
+-   UNLOCK_HARDWARE(rmesa);
+-
+-   if (RADEON_DEBUG & DEBUG_DMA)
+-      fprintf(stderr, "Allocated buffer %d\n", index);
+-
+-   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
+-   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
+-   dmabuf->refcount = 1;
+-
+-   rmesa->dma.current.buf = dmabuf;
+-   rmesa->dma.current.address = dmabuf->buf->address;
+-   rmesa->dma.current.end = dmabuf->buf->total;
+-   rmesa->dma.current.start = 0;
+-   rmesa->dma.current.ptr = 0;
+-
+-   rmesa->c_vertexBuffers++;
+-}
+-
+-void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+-			     struct radeon_dma_region *region,
+-			     const char *caller )
+-{
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
+-   
+-   if (!region->buf)
+-      return;
+-
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
+-
+-   if (--region->buf->refcount == 0) {
+-      drm_radeon_cmd_header_t *cmd;
+-
+-      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+-	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
+-		 region->buf->buf->idx);  
+       
+-      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), 
+-						     __FUNCTION__ );
+-      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
+-      cmd->dma.buf_idx = region->buf->buf->idx;
+-      FREE(region->buf);
+-      rmesa->dma.nr_released_bufs++;
+-   }
+-
+-   region->buf = NULL;
+-   region->start = 0;
+-}
+-
+-/* Allocates a region from rmesa->dma.current.  If there isn't enough
+- * space in current, grab a new buffer (and discard what was left of current)
+- */
+-void radeonAllocDmaRegion( radeonContextPtr rmesa, 
+-			   struct radeon_dma_region *region,
+-			   int bytes,
+-			   int alignment )
+-{
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+-
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
+-
+-   if (region->buf)
+-      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
+-
+-   alignment--;
+-   rmesa->dma.current.start = rmesa->dma.current.ptr = 
+-      (rmesa->dma.current.ptr + alignment) & ~alignment;
+-
+-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+-      radeonRefillCurrentDmaRegion( rmesa );
+-
+-   region->start = rmesa->dma.current.start;
+-   region->ptr = rmesa->dma.current.start;
+-   region->end = rmesa->dma.current.start + bytes;
+-   region->address = rmesa->dma.current.address;
+-   region->buf = rmesa->dma.current.buf;
+-   region->buf->refcount++;
+-
+-   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
+-   rmesa->dma.current.start = 
+-      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+-}
+-
+-/* ================================================================
+- * SwapBuffers with client-side throttling
+- */
+-
+-static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) 
+-{
+-   drm_radeon_getparam_t gp;
+-   int ret;
+-   uint32_t frame;
+-
+-   gp.param = RADEON_PARAM_LAST_FRAME;
+-   gp.value = (int *)&frame;
+-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
+-			      &gp, sizeof(gp) );
+-
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-
+-   return frame;
+-}
+-
+-static void radeonEmitIrqLocked( radeonContextPtr rmesa )
+-{
+-   drm_radeon_irq_emit_t ie;
+-   int ret;
+-
+-   ie.irq_seq = &rmesa->iw.irq_seq;
+-   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, 
+-			      &ie, sizeof(ie) );
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-}
+-
+-
+-static void radeonWaitIrq( radeonContextPtr rmesa )
+-{
+-   int ret;
+-
+-   do {
+-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
+-			     &rmesa->iw, sizeof(rmesa->iw) );
+-   } while (ret && (errno == EINTR || errno == EBUSY));
+-
+-   if ( ret ) {
+-      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
+-      exit(1);
+-   }
+-}
+-
+-
+-static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
+-{
+-   drm_radeon_sarea_t *sarea = rmesa->sarea;
+-
+-   if (rmesa->do_irqs) {
+-      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
+-	 if (!rmesa->irqsEmitted) {
+-	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
+-	       ;
+-	 }
+-	 else {
+-	    UNLOCK_HARDWARE( rmesa ); 
+-	    radeonWaitIrq( rmesa );	
+-	    LOCK_HARDWARE( rmesa ); 
+-	 }
+-	 rmesa->irqsEmitted = 10;
++      if (nr & 1) {
++	 OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) |
++		   (rmesa->tcl.aos[nr - 1].stride << 8));
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 OUT_BATCH(voffset);
+       }
+-
+-      if (rmesa->irqsEmitted) {
+-	 radeonEmitIrqLocked( rmesa );
+-	 rmesa->irqsEmitted--;
++      for (i = 0; i + 1 < nr; i += 2) {
++	 voffset =  rmesa->tcl.aos[i + 0].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 0].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[i+0].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
++	 voffset =  rmesa->tcl.aos[i + 1].offset +
++	    offset * 4 * rmesa->tcl.aos[i + 1].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[i+1].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
+       }
+-   } 
+-   else {
+-      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
+-	 UNLOCK_HARDWARE( rmesa ); 
+-	 if (rmesa->do_usleeps) 
+-	    DO_USLEEP( 1 );
+-	 LOCK_HARDWARE( rmesa ); 
++      if (nr & 1) {
++	 voffset =  rmesa->tcl.aos[nr - 1].offset +
++	    offset * 4 * rmesa->tcl.aos[nr - 1].stride;
++	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
++			       rmesa->tcl.aos[nr-1].bo,
++			       RADEON_GEM_DOMAIN_GTT,
++			       0, 0);
+       }
+    }
+-}
+-
+-/* Copy the back color buffer to the front color buffer.
+- */
+-void radeonCopyBuffer( __DRIdrawablePrivate *dPriv,
+-		       const drm_clip_rect_t	  *rect)
+-{
+-   radeonContextPtr rmesa;
+-   GLint nbox, i, ret;
+-   GLboolean   missed_target;
+-   int64_t ust;
+-   __DRIscreenPrivate *psp;
+-
+-   assert(dPriv);
+-   assert(dPriv->driContextPriv);
+-   assert(dPriv->driContextPriv->driverPrivate);
+-
+-   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-
+-   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+-      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
+-   }
+-
+-   RADEON_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
+-
+-   /* Throttle the frame rate -- only allow one pending swap buffers
+-    * request at a time.
+-    */
+-   radeonWaitForFrameCompletion( rmesa );
+-   if (!rect)
+-   {
+-       UNLOCK_HARDWARE( rmesa );
+-       driWaitForVBlank( dPriv, & missed_target );
+-       LOCK_HARDWARE( rmesa );
+-   }
++   END_BATCH();
+ 
+-   nbox = dPriv->numClipRects; /* must be in locked region */
+-
+-   for ( i = 0 ; i < nbox ; ) {
+-      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+-      drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+-      GLint n = 0;
+-
+-      for ( ; i < nr ; i++ ) {
+-
+-	  *b = box[i];
+-
+-	  if (rect)
+-	  {
+-	      if (rect->x1 > b->x1)
+-		  b->x1 = rect->x1;
+-	      if (rect->y1 > b->y1)
+-		  b->y1 = rect->y1;
+-	      if (rect->x2 < b->x2)
+-		  b->x2 = rect->x2;
+-	      if (rect->y2 < b->y2)
+-		  b->y2 = rect->y2;
+-
+-	      if (b->x1 >= b->x2 || b->y1 >= b->y2)
+-		  continue;
+-	  }
+-
+-	  b++;
+-	  n++;
+-      }
+-      rmesa->sarea->nbox = n;
+-
+-      if (!n)
+-	 continue;
+-
+-      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+-
+-      if ( ret ) {
+-	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+-	 UNLOCK_HARDWARE( rmesa );
+-	 exit( 1 );
+-      }
+-   }
+-
+-   UNLOCK_HARDWARE( rmesa );
+-   if (!rect)
+-   {
+-       psp = dPriv->driScreenPriv;
+-       rmesa->swap_count++;
+-       (*psp->systemTime->getUST)( & ust );
+-       if ( missed_target ) {
+-	   rmesa->swap_missed_count++;
+-	   rmesa->swap_missed_ust = ust - rmesa->swap_ust;
+-       }
+-
+-       rmesa->swap_ust = ust;
+-       rmesa->hw.all_dirty = GL_TRUE;
+-   }
+-}
+-
+-void radeonPageFlip( __DRIdrawablePrivate *dPriv )
+-{
+-   radeonContextPtr rmesa;
+-   GLint ret;
+-   GLboolean   missed_target;
+-   __DRIscreenPrivate *psp;
+-
+-   assert(dPriv);
+-   assert(dPriv->driContextPriv);
+-   assert(dPriv->driContextPriv->driverPrivate);
+-
+-   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+-   psp = dPriv->driScreenPriv;
+-
+-   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
+-      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
+-	      rmesa->sarea->pfCurrentPage);
+-   }
+-
+-   RADEON_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
+-
+-   /* Need to do this for the perf box placement:
+-    */
+-   if (dPriv->numClipRects)
+-   {
+-      drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
+-      b[0] = box[0];
+-      rmesa->sarea->nbox = 1;
+-   }
+-
+-   /* Throttle the frame rate -- only allow a few pending swap buffers
+-    * request at a time.
+-    */
+-   radeonWaitForFrameCompletion( rmesa );
+-   UNLOCK_HARDWARE( rmesa );
+-   driWaitForVBlank( dPriv, & missed_target );
+-   if ( missed_target ) {
+-      rmesa->swap_missed_count++;
+-      (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust );
+-   }
+-   LOCK_HARDWARE( rmesa );
+-
+-   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if ( ret ) {
+-      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+-      exit( 1 );
+-   }
+-
+-   rmesa->swap_count++;
+-   (void) (*psp->systemTime->getUST)( & rmesa->swap_ust );
+-
+-   /* Get ready for drawing next frame.  Update the renderbuffers'
+-    * flippedOffset/Pitch fields so we draw into the right place.
+-    */
+-   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-                        rmesa->sarea->pfCurrentPage);
+-
+-   radeonUpdateDrawBuffer(rmesa->glCtx);
++#endif
+ }
+ 
+-
+ /* ================================================================
+  * Buffer clear
+  */
+@@ -1028,9 +375,9 @@ void radeonPageFlip( __DRIdrawablePrivate *dPriv )
+ 
+ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-   drm_radeon_sarea_t *sarea = rmesa->sarea;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++   drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
+    uint32_t clear;
+    GLuint flags = 0;
+    GLuint color_mask = 0;
+@@ -1042,8 +389,8 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+    }
+ 
+    {
+-      LOCK_HARDWARE( rmesa );
+-      UNLOCK_HARDWARE( rmesa );
++      LOCK_HARDWARE( &rmesa->radeon );
++      UNLOCK_HARDWARE( &rmesa->radeon );
+       if ( dPriv->numClipRects == 0 ) 
+ 	 return;
+    }
+@@ -1067,7 +414,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+       mask &= ~BUFFER_BIT_DEPTH;
+    }
+ 
+-   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) {
++   if ( (mask & BUFFER_BIT_STENCIL) && rmesa->radeon.state.stencil.hwBuffer ) {
+       flags |= RADEON_STENCIL;
+       mask &= ~BUFFER_BIT_STENCIL;
+    }
+@@ -1083,16 +430,16 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ 
+    if (rmesa->using_hyperz) {
+       flags |= RADEON_USE_COMP_ZBUF;
+-/*      if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) 
++/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) 
+          flags |= RADEON_USE_HIERZ; */
+-      if (!(rmesa->state.stencil.hwBuffer) ||
++      if (!(rmesa->radeon.state.stencil.hwBuffer) ||
+ 	 ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+-	    ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
++	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+ 	  flags |= RADEON_CLEAR_FASTZ;
+       }
+    }
+ 
+-   LOCK_HARDWARE( rmesa );
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    /* compute region after locking: */
+    cx = ctx->DrawBuffer->_Xmin;
+@@ -1112,7 +459,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ 
+       gp.param = RADEON_PARAM_LAST_CLEAR;
+       gp.value = (int *)&clear;
+-      ret = drmCommandWriteRead( rmesa->dri.fd,
++      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+ 				 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+ 
+       if ( ret ) {
+@@ -1124,20 +471,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ 	 break;
+       }
+ 
+-      if ( rmesa->do_usleeps ) {
+-	 UNLOCK_HARDWARE( rmesa );
++      if ( rmesa->radeon.do_usleeps ) {
++	 UNLOCK_HARDWARE( &rmesa->radeon );
+ 	 DO_USLEEP( 1 );
+-	 LOCK_HARDWARE( rmesa );
++	 LOCK_HARDWARE( &rmesa->radeon );
+       }
+    }
+ 
+    /* Send current state to the hardware */
+-   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
++   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+ 
+    for ( i = 0 ; i < dPriv->numClipRects ; ) {
+       GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+       drm_clip_rect_t *box = dPriv->pClipRects;
+-      drm_clip_rect_t *b = rmesa->sarea->boxes;
++      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+       drm_radeon_clear_t clear;
+       drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+       GLint n = 0;
+@@ -1172,106 +519,40 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask )
+ 	 }
+       }
+ 
+-      rmesa->sarea->nbox = n;
++      rmesa->radeon.sarea->nbox = n;
+ 
+       clear.flags       = flags;
+-      clear.clear_color = rmesa->state.color.clear;
+-      clear.clear_depth = rmesa->state.depth.clear;
++      clear.clear_color = rmesa->radeon.state.color.clear;
++      clear.clear_depth = rmesa->radeon.state.depth.clear;
+       clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+-      clear.depth_mask  = rmesa->state.stencil.clear;
++      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
+       clear.depth_boxes = depth_boxes;
+ 
+       n--;
+-      b = rmesa->sarea->boxes;
++      b = rmesa->radeon.sarea->boxes;
+       for ( ; n >= 0 ; n-- ) {
+ 	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+ 	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+ 	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
+ 	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
+ 	 depth_boxes[n].f[CLEAR_DEPTH] = 
+-	    (float)rmesa->state.depth.clear;
++	    (float)rmesa->radeon.state.depth.clear;
+       }
+ 
+-      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
++      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+ 			     &clear, sizeof(drm_radeon_clear_t));
+ 
+       if ( ret ) {
+-	 UNLOCK_HARDWARE( rmesa );
++	 UNLOCK_HARDWARE( &rmesa->radeon );
+ 	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+ 	 exit( 1 );
+       }
+    }
+ 
+-   UNLOCK_HARDWARE( rmesa );
+-   rmesa->hw.all_dirty = GL_TRUE;
++   UNLOCK_HARDWARE( &rmesa->radeon );
++   rmesa->radeon.hw.all_dirty = GL_TRUE;
+ }
+ 
+-
+-void radeonWaitForIdleLocked( radeonContextPtr rmesa )
+-{
+-    int fd = rmesa->dri.fd;
+-    int to = 0;
+-    int ret, i = 0;
+-
+-    rmesa->c_drawWaits++;
+-
+-    do {
+-        do {
+-            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
+-        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
+-    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
+-
+-    if ( ret < 0 ) {
+-	UNLOCK_HARDWARE( rmesa );
+-	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
+-	exit( -1 );
+-    }
+-}
+-
+-
+-static void radeonWaitForIdle( radeonContextPtr rmesa )
+-{
+-   LOCK_HARDWARE(rmesa);
+-   radeonWaitForIdleLocked( rmesa );
+-   UNLOCK_HARDWARE(rmesa);
+-}
+-
+-
+-void radeonFlush( GLcontext *ctx )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+-
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
+-
+-   radeonEmitState( rmesa );
+-   
+-   if (rmesa->store.cmd_used)
+-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+-}
+-
+-/* Make sure all commands have been sent to the hardware and have
+- * completed processing.
+- */
+-void radeonFinish( GLcontext *ctx )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   radeonFlush( ctx );
+-
+-   if (rmesa->do_irqs) {
+-      LOCK_HARDWARE( rmesa );
+-      radeonEmitIrqLocked( rmesa );
+-      UNLOCK_HARDWARE( rmesa );
+-      radeonWaitIrq( rmesa );
+-   }
+-   else
+-      radeonWaitForIdle( rmesa );
+-}
+-
+-
+ void radeonInitIoctlFuncs( GLcontext *ctx )
+ {
+     ctx->Driver.Clear = radeonClear;
+diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+index 4e3a44d..18805d4 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h
++++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
+@@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "main/simple_list.h"
+ #include "radeon_lock.h"
++#include "radeon_bocs_wrapper.h"
+ 
+-
+-extern void radeonEmitState( radeonContextPtr rmesa );
+-extern void radeonEmitVertexAOS( radeonContextPtr rmesa,
++extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
+ 				 GLuint vertex_size,
++				 struct radeon_bo *bo,
+ 				 GLuint offset );
+ 
+-extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
++extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+ 				GLuint vertex_format,
+ 				GLuint primitive,
+ 				GLuint vertex_nr );
+ 
+-extern void radeonFlushElts( radeonContextPtr rmesa );
++extern void radeonFlushElts( GLcontext *ctx );
++			    
+ 
+-extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
++extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+ 					   GLuint vertex_format,
+ 					   GLuint primitive,
+ 					   GLuint min_nr );
+ 
+-extern void radeonEmitAOS( radeonContextPtr rmesa,
+-			   struct radeon_dma_region **regions,
++
++extern void radeonEmitAOS( r100ContextPtr rmesa,
+ 			   GLuint n,
+ 			   GLuint offset );
+ 
+-extern void radeonEmitBlit( radeonContextPtr rmesa,
++extern void radeonEmitBlit( r100ContextPtr rmesa,
+ 			    GLuint color_fmt,
+ 			    GLuint src_pitch,
+ 			    GLuint src_offset,
+@@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa,
+ 			    GLint dstx, GLint dsty,
+ 			    GLuint w, GLuint h );
+ 
+-extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
+-
+-extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
+-extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
++extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
+ 
+-extern void radeonAllocDmaRegion( radeonContextPtr rmesa,
+-				  struct radeon_dma_region *region,
+-				  int bytes, 
+-				  int alignment );
++extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
+ 
+-extern void radeonReleaseDmaRegion( radeonContextPtr rmesa,
+-				    struct radeon_dma_region *region,
+-				    const char *caller );
+-
+-extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable,
+-			      const drm_clip_rect_t	 *rect);
+-extern void radeonPageFlip( __DRIdrawablePrivate *drawable );
+ extern void radeonFlush( GLcontext *ctx );
+ extern void radeonFinish( GLcontext *ctx );
+-extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
+-extern void radeonWaitForVBlank( radeonContextPtr rmesa );
+ extern void radeonInitIoctlFuncs( GLcontext *ctx );
+-extern void radeonGetAllParams( radeonContextPtr rmesa );
+-extern void radeonSetUpAtomList( radeonContextPtr rmesa );
++extern void radeonGetAllParams( r100ContextPtr rmesa );
++extern void radeonSetUpAtomList( r100ContextPtr rmesa );
+ 
+ /* ================================================================
+  * Helper macros:
+@@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa );
+  */
+ #define RADEON_NEWPRIM( rmesa )			\
+ do {						\
+-   if ( rmesa->dma.flush )			\
+-      rmesa->dma.flush( rmesa );	\
++   if ( rmesa->radeon.dma.flush )			\
++      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+ } while (0)
+ 
+ /* Can accomodate several state changes and primitive changes without
+  * actually firing the buffer.
+  */
++
+ #define RADEON_STATECHANGE( rmesa, ATOM )			\
+ do {								\
+    RADEON_NEWPRIM( rmesa );					\
+    rmesa->hw.ATOM.dirty = GL_TRUE;				\
+-   rmesa->hw.is_dirty = GL_TRUE;				\
++   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
+ } while (0)
+ 
+-#define RADEON_DB_STATE( ATOM )			        \
++#define RADEON_DB_STATE( ATOM )				\
+    memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+ 	   rmesa->hw.ATOM.cmd_size * 4)
+ 
+-static INLINE int RADEON_DB_STATECHANGE( 
+-   radeonContextPtr rmesa,
+-   struct radeon_state_atom *atom )
++static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
++					struct radeon_state_atom *atom )
+ {
+    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+-      int *tmp;
++      GLuint *tmp;
+       RADEON_NEWPRIM( rmesa );
+       atom->dirty = GL_TRUE;
+-      rmesa->hw.is_dirty = GL_TRUE;
++      rmesa->radeon.hw.is_dirty = GL_TRUE;
+       tmp = atom->cmd; 
+       atom->cmd = atom->lastcmd;
+       atom->lastcmd = tmp;
+@@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE(
+       return 0;
+ }
+ 
+-
+-/* Fire the buffered vertices no matter what.
+- */
+-#define RADEON_FIREVERTICES( rmesa )			\
+-do {							\
+-   if ( rmesa->store.cmd_used || rmesa->dma.flush ) {	\
+-      radeonFlush( rmesa->glCtx );			\
+-   }							\
+-} while (0)
+-
+ /* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+  * are available, you will also be adding an rmesa->state.max_state_size because
+  * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+@@ -167,36 +143,37 @@ do {							\
+ #define VBUF_BUFSZ	(4 * sizeof(int))
+ #endif
+ 
+-/* Ensure that a minimum amount of space is available in the command buffer.
+- * This is used to ensure atomicity of state updates with the rendering requests
+- * that rely on them.
+- *
+- * An alternative would be to implement a "soft lock" such that when the buffer
+- * wraps at an inopportune time, we grab the lock, flush the current buffer,
+- * and hang on to the lock until the critical section is finished and we flush
+- * the buffer again and unlock.
+- */
+-static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa,
+-					      int bytes )
+-{
+-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+-   assert( bytes <= RADEON_CMD_BUF_SZ );
+-}
+ 
+-/* Alloc space in the command buffer
+- */
+-static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa,
+-					 int bytes, const char *where )
++static inline uint32_t cmdpacket3(int cmd_type)
+ {
+-   if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
+-      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
++  drm_radeon_cmd_header_t cmd;
++
++  cmd.i = 0;
++  cmd.header.cmd_type = cmd_type;
++
++  return (uint32_t)cmd.i;
+ 
+-   {
+-      char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+-      rmesa->store.cmd_used += bytes;
+-      return head;
+-   }
+ }
+ 
++#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
++    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
++      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    } else {						      \
++      OUT_BATCH(CP_PACKET2);				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    }							      \
++  } while(0)
++
++#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
++    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
++      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    } else {						      \
++      OUT_BATCH(CP_PACKET2);				      \
++      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
++    }							      \
++  } while(0)
++
++
+ #endif /* __RADEON_IOCTL_H__ */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
+index 64bb3ca..9a7e76b 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
++++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
+@@ -41,12 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "main/glheader.h"
+ #include "main/mtypes.h"
+-#include "radeon_context.h"
++#include "main/colormac.h"
++#include "dri_util.h"
++#include "radeon_screen.h"
++#include "radeon_common.h"
+ #include "radeon_lock.h"
+-#include "radeon_tex.h"
+-#include "radeon_state.h"
+-#include "radeon_ioctl.h"
+-
+ #include "drirenderbuffer.h"
+ 
+ #if DEBUG_LOCKING
+@@ -56,13 +55,28 @@ int prevLockLine = 0;
+ 
+ /* Turn on/off page flipping according to the flags in the sarea:
+  */
+-static void radeonUpdatePageFlipping(radeonContextPtr rmesa)
++void radeonUpdatePageFlipping(radeonContextPtr rmesa)
+ {
++	int use_back;
++	__DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
++	GLframebuffer *fb = drawable->driverPrivate;
++
+ 	rmesa->doPageFlip = rmesa->sarea->pfState;
+ 	if (rmesa->glCtx->WinSysDrawBuffer) {
+-		driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+-				     rmesa->sarea->pfCurrentPage);
++		rmesa->vtbl.update_draw_buffer(rmesa->glCtx);
+ 	}
++
++	use_back = rmesa->glCtx->DrawBuffer ?
++	    (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] ==
++	     BUFFER_BACK_LEFT) : 1;
++	use_back ^= (rmesa->sarea->pfCurrentPage == 1);
++
++	if (use_back)
++		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++	else
++		rmesa->state.color.rrb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++
++	rmesa->state.depth.rrb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+ }
+ 
+ /* Update the hardware state.  This is called if another context has
+@@ -80,6 +94,8 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+ 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
+ 	drm_radeon_sarea_t *sarea = rmesa->sarea;
+ 
++	assert(drawable != NULL);
++
+ 	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+ 
+ 	/* The window might have moved, so we might need to get new clip
+@@ -98,27 +114,11 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+ 	if (rmesa->lastStamp != drawable->lastStamp) {
+ 		radeonUpdatePageFlipping(rmesa);
+ 		radeonSetCliprects(rmesa);
+-		radeonUpdateViewportOffset(rmesa->glCtx);
++		rmesa->vtbl.update_viewport_offset(rmesa->glCtx);
+ 		driUpdateFramebufferSize(rmesa->glCtx, drawable);
+ 	}
+ 
+-	RADEON_STATECHANGE(rmesa, ctx);
+-	if (rmesa->sarea->tiling_enabled) {
+-		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+-		    RADEON_COLOR_TILE_ENABLE;
+-	} else {
+-		rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+-		    ~RADEON_COLOR_TILE_ENABLE;
+-	}
+-
+-	if (sarea->ctx_owner != rmesa->dri.hwContext) {
+-		int i;
+-		sarea->ctx_owner = rmesa->dri.hwContext;
+-
+-		for (i = 0; i < rmesa->nr_heaps; i++) {
+-			DRI_AGE_TEXTURES(rmesa->texture_heaps[i]);
+-		}
+-	}
++	rmesa->vtbl.get_lock(rmesa);
+ 
+ 	rmesa->lost_context = GL_TRUE;
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
+index 86e96aa..f5ebb8d 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_lock.h
++++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
+@@ -39,8 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  *   Kevin E. Martin <martin@valinux.com>
+  */
+ 
+-#ifndef __RADEON_LOCK_H__
+-#define __RADEON_LOCK_H__
++#ifndef COMMON_LOCK_H
++#define COMMON_LOCK_H
++
++#include "main/colormac.h"
++#include "radeon_screen.h"
++#include "radeon_common.h"
+ 
+ extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+ 
+@@ -94,19 +98,23 @@ extern int prevLockLine;
+    do {								\
+       char __ret = 0;						\
+       DEBUG_CHECK_LOCK();					\
+-      DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
+-	       (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
+-      if ( __ret )						\
+-	 radeonGetLock( (rmesa), 0 );				\
+-      DEBUG_LOCK();						\
++      if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) {		\
++	DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext,		\
++		 (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret );	\
++	if ( __ret )							\
++	  radeonGetLock( (rmesa), 0 );					\
++      }									\
++      DEBUG_LOCK();							\
+    } while (0)
+ 
+ #define UNLOCK_HARDWARE( rmesa )					\
+    do {									\
+-      DRM_UNLOCK( (rmesa)->dri.fd,					\
+-		  (rmesa)->dri.hwLock,					\
+-		  (rmesa)->dri.hwContext );				\
+-      DEBUG_RESET();							\
++     if (!(rmesa)->radeonScreen->driScreen->dri2.enabled) {		\
++       DRM_UNLOCK( (rmesa)->dri.fd,					\
++		   (rmesa)->dri.hwLock,					\
++		   (rmesa)->dri.hwContext );				\
++       DEBUG_RESET();							\
++     }									\
+    } while (0)
+ 
+-#endif				/* __RADEON_LOCK_H__ */
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+index 31eea13..7f5da16 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
++++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
+@@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_maos.h"
+ #include "radeon_tcl.h"
+ 
+-#if 0
+-/* Usage:
+- *   - from radeon_tcl_render
+- *   - call radeonEmitArrays to ensure uptodate arrays in dma
+- *   - emit primitives (new type?) which reference the data
+- *       -- need to use elts for lineloop, quads, quadstrip/flat
+- *       -- other primitives are all well-formed (need tristrip-1,fake-poly)
+- *
+- */
+-static void emit_ubyte_rgba3( GLcontext *ctx,
+-		       struct radeon_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
++static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
++			GLvoid *data, int stride, int count)
+ {
+    int i;
+-   radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d out %p\n",
+-	      __FUNCTION__, count, stride, (void *)out);
+-
+-   for (i = 0; i < count; i++) {
+-      out->red   = *data;
+-      out->green = *(data+1);
+-      out->blue  = *(data+2);
+-      out->alpha = 0xFF;
+-      out++;
+-      data += stride;
+-   }
+-}
+-
+-static void emit_ubyte_rgba4( GLcontext *ctx,
+-			      struct radeon_dma_region *rvb,
+-			      char *data,
+-			      int stride,
+-			      int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
++   uint32_t *out;
++   int size = 1;
++   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ 
+    if (RADEON_DEBUG & DEBUG_VERTS)
+       fprintf(stderr, "%s count %d stride %d\n",
+ 	      __FUNCTION__, count, stride);
+ 
+-   if (stride == 4)
+-       COPY_DWORDS( out, data, count );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 *out++ = LE32_TO_CPU(*(int *)data);
+-	 data += stride;
+-      }
+-}
+-
+-
+-static void emit_ubyte_rgba( GLcontext *ctx,
+-			     struct radeon_dma_region *rvb,
+-			     char *data,
+-			     int size,
+-			     int stride,
+-			     int count )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+-
+-   assert (!rvb->buf);
+-
+    if (stride == 0) {
+-      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
++      radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
+       count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = 1;
++      aos->stride = 0;
+    }
+    else {
+-      radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 1;
+-      rvb->aos_size = 1;
++      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
++      aos->stride = size;
+    }
+ 
+-   /* Emit the data
+-    */
+-   switch (size) {
+-   case 3:
+-      emit_ubyte_rgba3( ctx, rvb, data, stride, count );
+-      break;
+-   case 4:
+-      emit_ubyte_rgba4( ctx, rvb, data, stride, count );
+-      break;
+-   default:
+-      assert(0);
+-      exit(1);
+-      break;
+-   }
+-}
+-#endif
+-
+-#if defined(USE_X86_ASM)
+-#define COPY_DWORDS( dst, src, nr )					\
+-do {									\
+-	int __tmp;							\
+-	__asm__ __volatile__( "rep ; movsl"				\
+-			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+-			      : "0" (nr),				\
+-			        "D" ((long)dst),			\
+-			        "S" ((long)src) );			\
+-} while (0)
+-#else
+-#define COPY_DWORDS( dst, src, nr )		\
+-do {						\
+-   int j;					\
+-   for ( j = 0 ; j < nr ; j++ )			\
+-      dst[j] = ((int *)src)[j];			\
+-   dst += nr;					\
+-} while (0)
+-#endif
+-
+-static void emit_vecfog( GLcontext *ctx,
+-			 struct radeon_dma_region *rvb,
+-			 char *data,
+-			 int stride,
+-			 int count )
+-{
+-   int i;
+-   GLfloat *out;
+-
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   aos->components = size;
++   aos->count = count;
+ 
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   assert (!rvb->buf);
+-
+-   if (stride == 0) {
+-      radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
+-      count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = 1;
+-   }
+-   else {
+-      radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 1;
+-      rvb->aos_size = 1;
+-   }
+ 
+    /* Emit the data
+     */
+-   out = (GLfloat *)(rvb->address + rvb->start);
++   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+    for (i = 0; i < count; i++) {
+       out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
+       out++;
+@@ -209,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx,
+    }
+ }
+ 
+-static void emit_vec4( GLcontext *ctx,
+-		       struct radeon_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 4)
+-      COPY_DWORDS( out, data, count );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out++;
+-	 data += stride;
+-      }
+-}
+-
+-
+-static void emit_vec8( GLcontext *ctx,
+-		       struct radeon_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 8)
+-      COPY_DWORDS( out, data, count*2 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out += 2;
+-	 data += stride;
+-      }
+-}
+-
+-static void emit_vec12( GLcontext *ctx,
+-		       struct radeon_dma_region *rvb,
+-		       char *data,
+-		       int stride,
+-		       int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+-	      __FUNCTION__, count, stride, (void *)out, (void *)data);
+-
+-   if (stride == 12)
+-      COPY_DWORDS( out, data, count*3 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out[2] = *(int *)(data+8);
+-	 out += 3;
+-	 data += stride;
+-      }
+-}
+-
+-static void emit_vec16( GLcontext *ctx,
+-			struct radeon_dma_region *rvb,
+-			char *data,
+-			int stride,
+-			int count )
+-{
+-   int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d stride %d\n",
+-	      __FUNCTION__, count, stride);
+-
+-   if (stride == 16)
+-      COPY_DWORDS( out, data, count*4 );
+-   else
+-      for (i = 0; i < count; i++) {
+-	 out[0] = *(int *)data;
+-	 out[1] = *(int *)(data+4);
+-	 out[2] = *(int *)(data+8);
+-	 out[3] = *(int *)(data+12);
+-	 out += 4;
+-	 data += stride;
+-      }
+-}
+-
+-
+-static void emit_vector( GLcontext *ctx,
+-			 struct radeon_dma_region *rvb,
+-			 char *data,
+-			 int size,
+-			 int stride,
+-			 int count )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+-   if (RADEON_DEBUG & DEBUG_VERTS)
+-      fprintf(stderr, "%s count %d size %d stride %d\n",
+-	      __FUNCTION__, count, size, stride);
+-
+-   assert (!rvb->buf);
+-
+-   if (stride == 0) {
+-      radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
+-      count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = size;
+-   }
+-   else {
+-      radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 );	/* alignment? */
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = size;
+-      rvb->aos_size = size;
+-   }
+-
+-   /* Emit the data
+-    */
+-   switch (size) {
+-   case 1:
+-      emit_vec4( ctx, rvb, data, stride, count );
+-      break;
+-   case 2:
+-      emit_vec8( ctx, rvb, data, stride, count );
+-      break;
+-   case 3:
+-      emit_vec12( ctx, rvb, data, stride, count );
+-      break;
+-   case 4:
+-      emit_vec16( ctx, rvb, data, stride, count );
+-      break;
+-   default:
+-      assert(0);
+-      exit(1);
+-      break;
+-   }
+-
+-}
+-
+-
+-
+-static void emit_s0_vec( GLcontext *ctx,
+-			 struct radeon_dma_region *rvb,
+-			 char *data,
+-			 int stride,
+-			 int count )
++static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
+ {
+    int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+-
+    if (RADEON_DEBUG & DEBUG_VERTS)
+       fprintf(stderr, "%s count %d stride %d\n",
+ 	      __FUNCTION__, count, stride);
+@@ -384,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx,
+    }
+ }
+ 
+-static void emit_stq_vec( GLcontext *ctx,
+-			 struct radeon_dma_region *rvb,
+-			 char *data,
+-			 int stride,
+-			 int count )
++static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
+ {
+    int i;
+-   int *out = (int *)(rvb->address + rvb->start);
+ 
+    if (RADEON_DEBUG & DEBUG_VERTS)
+       fprintf(stderr, "%s count %d stride %d\n",
+@@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx,
+ 
+ 
+ 
+-static void emit_tex_vector( GLcontext *ctx,
+-			     struct radeon_dma_region *rvb,
+-			     char *data,
+-			     int size,
+-			     int stride,
+-			     int count )
++static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
++			    GLvoid *data, int size, int stride, int count)
+ {
+    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+    int emitsize;
++   uint32_t *out;
+ 
+    if (RADEON_DEBUG & DEBUG_VERTS)
+       fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+ 
+-   assert (!rvb->buf);
+-
+    switch (size) {
+    case 4: emitsize = 3; break;
+    case 3: emitsize = 3; break;
+@@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx,
+ 
+ 
+    if (stride == 0) {
+-      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
++      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
+       count = 1;
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = 0;
+-      rvb->aos_size = emitsize;
++      aos->stride = 0;
+    }
+    else {
+-      radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
+-      rvb->aos_start = GET_START(rvb);
+-      rvb->aos_stride = emitsize;
+-      rvb->aos_size = emitsize;
++      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
++      aos->stride = emitsize;
+    }
+ 
++   aos->components = emitsize;
++   aos->count = count;
+ 
+    /* Emit the data
+     */
++   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+    switch (size) {
+    case 1:
+-      emit_s0_vec( ctx, rvb, data, stride, count ); 
++      emit_s0_vec( out, data, stride, count );
+       break;
+    case 2:
+-      emit_vec8( ctx, rvb, data, stride, count );
++      radeonEmitVec8( out, data, stride, count );
+       break;
+    case 3:
+-      emit_vec12( ctx, rvb, data, stride, count );
++      radeonEmitVec12( out, data, stride, count );
+       break;
+    case 4:
+-      emit_stq_vec( ctx, rvb, data, stride, count );
++      emit_stq_vec( out, data, stride, count );
+       break;
+    default:
+       assert(0);
+@@ -476,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx,
+  */
+ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+-   struct radeon_dma_region **component = rmesa->tcl.aos_components;
+    GLuint nr = 0;
+    GLuint vfmt = 0;
+    GLuint count = VB->Count;
+@@ -491,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 
+    if (1) {
+       if (!rmesa->tcl.obj.buf) 
+-	 emit_vector( ctx, 
+-		      &rmesa->tcl.obj, 
+-		      (char *)VB->ObjPtr->data,
+-		      VB->ObjPtr->size,
+-		      VB->ObjPtr->stride,
+-		      count);
++	rcommon_emit_vector( ctx, 
++			     &(rmesa->tcl.aos[nr]),
++			     (char *)VB->ObjPtr->data,
++			     VB->ObjPtr->size,
++			     VB->ObjPtr->stride,
++			     count);
+ 
+       switch( VB->ObjPtr->size ) {
+       case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+@@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+       default:
+          break;
+       }
+-      component[nr++] = &rmesa->tcl.obj;
++      nr++;
+    }
+    
+ 
+    if (inputs & VERT_BIT_NORMAL) {
+       if (!rmesa->tcl.norm.buf)
+-	 emit_vector( ctx, 
+-		      &(rmesa->tcl.norm), 
+-		      (char *)VB->NormalPtr->data,
+-		      3,
+-		      VB->NormalPtr->stride,
+-		      count);
++	 rcommon_emit_vector( ctx, 
++			      &(rmesa->tcl.aos[nr]),
++			      (char *)VB->NormalPtr->data,
++			      3,
++			      VB->NormalPtr->stride,
++			      count);
+ 
+       vfmt |= RADEON_CP_VC_FRMT_N0;
+-      component[nr++] = &rmesa->tcl.norm;
++      nr++;
+    }
+ 
+    if (inputs & VERT_BIT_COLOR0) {
+@@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+       }
+ 
+       if (!rmesa->tcl.rgba.buf)
+-	 emit_vector( ctx,
+-		      &(rmesa->tcl.rgba),
+-		      (char *)VB->ColorPtr[0]->data,
+-		      emitsize,
+-		      VB->ColorPtr[0]->stride,
+-		      count);
+-
+-
+-      component[nr++] = &rmesa->tcl.rgba;
++	rcommon_emit_vector( ctx,
++			     &(rmesa->tcl.aos[nr]),
++			     (char *)VB->ColorPtr[0]->data,
++			     emitsize,
++			     VB->ColorPtr[0]->stride,
++			     count);
++
++      nr++;
+    }
+ 
+ 
+    if (inputs & VERT_BIT_COLOR1) {
+       if (!rmesa->tcl.spec.buf) {
+ 
+-	 emit_vector( ctx,
+-		      &rmesa->tcl.spec,
+-		      (char *)VB->SecondaryColorPtr[0]->data,
+-		      3,
+-		      VB->SecondaryColorPtr[0]->stride,
+-		      count);
++	rcommon_emit_vector( ctx,
++			     &(rmesa->tcl.aos[nr]),
++			     (char *)VB->SecondaryColorPtr[0]->data,
++			     3,
++			     VB->SecondaryColorPtr[0]->stride,
++			     count);
+       }
+ 
+       vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
+-      component[nr++] = &rmesa->tcl.spec;
++      nr++;
+    }
+ 
+ /* FIXME: not sure if this is correct. May need to stitch this together with
+@@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+    if (inputs & VERT_BIT_FOG) {
+       if (!rmesa->tcl.fog.buf)
+ 	 emit_vecfog( ctx,
+-		      &(rmesa->tcl.fog),
++		      &(rmesa->tcl.aos[nr]),
+ 		      (char *)VB->FogCoordPtr->data,
+ 		      VB->FogCoordPtr->stride,
+ 		      count);
+ 
+       vfmt |= RADEON_CP_VC_FRMT_FPFOG;
+-      component[nr++] = &rmesa->tcl.fog;
++      nr++;
+    }
+ 
+ 
+@@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+       if (inputs & VERT_BIT_TEX(unit)) {
+ 	 if (!rmesa->tcl.tex[unit].buf)
+ 	    emit_tex_vector( ctx,
+-			     &(rmesa->tcl.tex[unit]),
++			     &(rmesa->tcl.aos[nr]),
+ 			     (char *)VB->TexCoordPtr[unit]->data,
+ 			     VB->TexCoordPtr[unit]->size,
+ 			     VB->TexCoordPtr[unit]->stride,
+ 			     count );
++	 nr++;
+ 
+ 	 vfmt |= RADEON_ST_BIT(unit);
+          /* assume we need the 3rd coord if texgen is active for r/q OR at least
+@@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+ 	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+ 	 }
+-	 component[nr++] = &rmesa->tcl.tex[unit];
+       }
+    }
+ 
+@@ -625,31 +327,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 
+ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+-   GLuint unit;
+-
+-#if 0
+-   if (RADEON_DEBUG & DEBUG_VERTS) 
+-      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+-#endif
+-
+-   if (newinputs & VERT_BIT_POS) 
+-     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
+-
+-   if (newinputs & VERT_BIT_NORMAL) 
+-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
+-
+-   if (newinputs & VERT_BIT_COLOR0) 
+-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
+-
+-   if (newinputs & VERT_BIT_COLOR1) 
+-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
+-      
+-   if (newinputs & VERT_BIT_FOG)
+-      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
++   int i;
+ 
+-   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+-      if (newinputs & VERT_BIT_TEX(unit))
+-         radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
++   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++     if (rmesa->tcl.aos[i].bo) {
++       radeon_bo_unref(rmesa->tcl.aos[i].bo);
++       rmesa->tcl.aos[i].bo = NULL;
++     }
+    }
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+index 126d072..d468a97 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
++++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
+@@ -310,7 +310,7 @@ static void init_tcl_verts( void )
+ 
+ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+    GLuint req = 0;
+    GLuint unit;
+@@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 	 break;
+ 
+    if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+-       rmesa->tcl.indexed_verts.buf)
++       rmesa->tcl.aos[0].bo)
+       return;
+ 
+-   if (rmesa->tcl.indexed_verts.buf)
++   if (rmesa->tcl.aos[0].bo)
+       radeonReleaseArrays( ctx, ~0 );
+ 
+-   radeonAllocDmaRegion( rmesa,
+-			 &rmesa->tcl.indexed_verts, 
++   radeonAllocDmaRegion( &rmesa->radeon,
++			 &rmesa->tcl.aos[0].bo,
++			 &rmesa->tcl.aos[0].offset,
+ 			 VB->Count * setup_tab[i].vertex_size * 4, 
+ 			 4);
+ 
+@@ -421,15 +422,11 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 
+ 
+    setup_tab[i].emit( ctx, 0, VB->Count, 
+-		      rmesa->tcl.indexed_verts.address + 
+-		      rmesa->tcl.indexed_verts.start );
++		      rmesa->tcl.aos[0].bo->ptr + rmesa->tcl.aos[0].offset);
+ 
++   //   rmesa->tcl.aos[0].size = setup_tab[i].vertex_size;
++   rmesa->tcl.aos[0].stride = setup_tab[i].vertex_size;
+    rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+-   rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+-   rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
+-   rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
+-
+-   rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts;
+    rmesa->tcl.nr_aos_components = 1;
+ }
+ 
+@@ -437,13 +434,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+ 
+ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+-
+-#if 0
+-   if (RADEON_DEBUG & DEBUG_VERTS) 
+-      _tnl_print_vert_flags( __FUNCTION__, newinputs );
+-#endif
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
++   int i;
+ 
+-   if (newinputs) 
+-     radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
++   for (i = 0; i < rmesa->tcl.nr_aos_components; i++) {
++      if (rmesa->tcl.aos[i].bo) {
++         radeon_bo_unref(rmesa->tcl.aos[i].bo);
++         rmesa->tcl.aos[i].bo = NULL;
++      }
++   }
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+new file mode 100644
+index 0000000..3203ee1
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+@@ -0,0 +1,360 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ *
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#include "radeon_mipmap_tree.h"
++
++#include <errno.h>
++#include <unistd.h>
++
++#include "main/simple_list.h"
++#include "main/texcompress.h"
++#include "main/texformat.h"
++
++static GLuint radeon_compressed_texture_size(GLcontext *ctx,
++		GLsizei width, GLsizei height, GLsizei depth,
++		GLuint mesaFormat)
++{
++	GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat);
++
++	if (mesaFormat == MESA_FORMAT_RGB_DXT1 ||
++	    mesaFormat == MESA_FORMAT_RGBA_DXT1) {
++		if (width + 3 < 8)	/* width one block */
++			size = size * 4;
++		else if (width + 3 < 16)
++			size = size * 2;
++	} else {
++		/* DXT3/5, 16 bytes per block */
++	  //		WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n");
++		if (width + 3 < 8)
++			size = size * 2;
++	}
++
++	return size;
++}
++
++
++static int radeon_compressed_num_bytes(GLuint mesaFormat)
++{
++   int bytes = 0;
++   switch(mesaFormat) {
++     
++   case MESA_FORMAT_RGB_FXT1:
++   case MESA_FORMAT_RGBA_FXT1:
++   case MESA_FORMAT_RGB_DXT1:
++   case MESA_FORMAT_RGBA_DXT1:
++     bytes = 2;
++     break;
++     
++   case MESA_FORMAT_RGBA_DXT3:
++   case MESA_FORMAT_RGBA_DXT5:
++     bytes = 4;
++   default:
++     break;
++   }
++   
++   return bytes;
++}
++
++/**
++ * Compute sizes and fill in offset and blit information for the given
++ * image (determined by \p face and \p level).
++ *
++ * \param curOffset points to the offset at which the image is to be stored
++ * and is updated by this function according to the size of the image.
++ */
++static void compute_tex_image_offset(radeon_mipmap_tree *mt,
++	GLuint face, GLuint level, GLuint* curOffset)
++{
++	radeon_mipmap_level *lvl = &mt->levels[level];
++
++	/* Find image size in bytes */
++	if (mt->compressed) {
++		/* TODO: Is this correct? Need test cases for compressed textures! */
++		GLuint align;
++
++		lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
++		lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx,
++							   lvl->width, lvl->height, lvl->depth, mt->compressed);
++	} else if (mt->target == GL_TEXTURE_RECTANGLE_NV) {
++		lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63;
++		lvl->size = lvl->rowstride * lvl->height;
++	} else if (mt->tilebits & RADEON_TXO_MICRO_TILE) {
++		/* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
++		 * though the actual offset may be different (if texture is less than
++		 * 32 bytes width) to the untiled case */
++		lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31;
++		lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth;
++	} else {
++		lvl->rowstride = (lvl->width * mt->bpp + 31) & ~31;
++		lvl->size = lvl->rowstride * lvl->height * lvl->depth;
++	}
++	assert(lvl->size > 0);
++
++	/* All images are aligned to a 32-byte offset */
++	*curOffset = (*curOffset + 0x1f) & ~0x1f;
++	lvl->faces[face].offset = *curOffset;
++	*curOffset += lvl->size;
++
++	if (RADEON_DEBUG & DEBUG_TEXTURE)
++	  fprintf(stderr,
++		  "level %d, face %d: rs:%d %dx%d at %d\n",
++		  level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset);
++}
++
++static GLuint minify(GLuint size, GLuint levels)
++{
++	size = size >> levels;
++	if (size < 1)
++		size = 1;
++	return size;
++}
++
++static void calculate_miptree_layout(radeon_mipmap_tree *mt)
++{
++	GLuint curOffset;
++	GLuint numLevels;
++	GLuint i;
++
++	numLevels = mt->lastLevel - mt->firstLevel + 1;
++	assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
++
++	curOffset = 0;
++	for(i = 0; i < numLevels; i++) {
++		GLuint face;
++
++		mt->levels[i].width = minify(mt->width0, i);
++		mt->levels[i].height = minify(mt->height0, i);
++		mt->levels[i].depth = minify(mt->depth0, i);
++
++		for(face = 0; face < mt->faces; face++)
++			compute_tex_image_offset(mt, face, i, &curOffset);
++	}
++
++	/* Note the required size in memory */
++	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
++}
++
++
++/**
++ * Create a new mipmap tree, calculate its layout and allocate memory.
++ */
++radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
++		GLenum target, GLuint firstLevel, GLuint lastLevel,
++		GLuint width0, GLuint height0, GLuint depth0,
++		GLuint bpp, GLuint tilebits, GLuint compressed)
++{
++	radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
++
++	mt->radeon = rmesa;
++	mt->refcount = 1;
++	mt->t = t;
++	mt->target = target;
++	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
++	mt->firstLevel = firstLevel;
++	mt->lastLevel = lastLevel;
++	mt->width0 = width0;
++	mt->height0 = height0;
++	mt->depth0 = depth0;
++	mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp;
++	mt->tilebits = tilebits;
++	mt->compressed = compressed;
++
++	calculate_miptree_layout(mt);
++
++	mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
++                            0, mt->totalsize, 1024,
++                            RADEON_GEM_DOMAIN_VRAM,
++                            0);
++
++	return mt;
++}
++
++void radeon_miptree_reference(radeon_mipmap_tree *mt)
++{
++	mt->refcount++;
++	assert(mt->refcount > 0);
++}
++
++void radeon_miptree_unreference(radeon_mipmap_tree *mt)
++{
++	if (!mt)
++		return;
++
++	assert(mt->refcount > 0);
++	mt->refcount--;
++	if (!mt->refcount) {
++		radeon_bo_unref(mt->bo);
++		free(mt);
++	}
++}
++
++
++/**
++ * Calculate first and last mip levels for the given texture object,
++ * where the dimensions are taken from the given texture image at
++ * the given level.
++ *
++ * Note: level is the OpenGL level number, which is not necessarily the same
++ * as the first level that is actually present.
++ *
++ * The base level image of the given texture face must be non-null,
++ * or this will fail.
++ */
++static void calculate_first_last_level(struct gl_texture_object *tObj,
++				       GLuint *pfirstLevel, GLuint *plastLevel,
++				       GLuint face, GLuint level)
++{
++	const struct gl_texture_image * const baseImage =
++		tObj->Image[face][level];
++
++	assert(baseImage);
++	
++	/* These must be signed values.  MinLod and MaxLod can be negative numbers,
++	* and having firstLevel and lastLevel as signed prevents the need for
++	* extra sign checks.
++	*/
++	int   firstLevel;
++	int   lastLevel;
++
++	/* Yes, this looks overly complicated, but it's all needed.
++	*/
++	switch (tObj->Target) {
++	case GL_TEXTURE_1D:
++	case GL_TEXTURE_2D:
++	case GL_TEXTURE_3D:
++	case GL_TEXTURE_CUBE_MAP:
++		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
++			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
++			*/
++			firstLevel = lastLevel = tObj->BaseLevel;
++		} else {
++			firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
++			firstLevel = MAX2(firstLevel, tObj->BaseLevel);
++			firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2);
++			lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
++			lastLevel = MAX2(lastLevel, tObj->BaseLevel);
++			lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2);
++			lastLevel = MIN2(lastLevel, tObj->MaxLevel);
++			lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
++		}
++		break;
++	case GL_TEXTURE_RECTANGLE_NV:
++	case GL_TEXTURE_4D_SGIS:
++		firstLevel = lastLevel = 0;
++		break;
++	default:
++		return;
++	}
++
++	/* save these values */
++	*pfirstLevel = firstLevel;
++	*plastLevel = lastLevel;
++}
++
++
++/**
++ * Checks whether the given miptree can hold the given texture image at the
++ * given face and level.
++ */
++GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
++		struct gl_texture_image *texImage, GLuint face, GLuint level)
++{
++	radeon_mipmap_level *lvl;
++
++	if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel)
++		return GL_FALSE;
++
++	if (texImage->IsCompressed != mt->compressed)
++		return GL_FALSE;
++
++	if (!texImage->IsCompressed &&
++	    !mt->compressed &&
++	    texImage->TexFormat->TexelBytes != mt->bpp)
++		return GL_FALSE;
++
++	lvl = &mt->levels[level - mt->firstLevel];
++	if (lvl->width != texImage->Width ||
++	    lvl->height != texImage->Height ||
++	    lvl->depth != texImage->Depth)
++		return GL_FALSE;
++
++	return GL_TRUE;
++}
++
++
++/**
++ * Checks whether the given miptree has the right format to store the given texture object.
++ */
++GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
++{
++	struct gl_texture_image *firstImage;
++	GLuint compressed;
++	GLuint numfaces = 1;
++	GLuint firstLevel, lastLevel;
++
++	calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel);
++	if (texObj->Target == GL_TEXTURE_CUBE_MAP)
++		numfaces = 6;
++
++	firstImage = texObj->Image[0][firstLevel];
++	compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0;
++
++	return (mt->firstLevel == firstLevel &&
++	        mt->lastLevel == lastLevel &&
++	        mt->width0 == firstImage->Width &&
++	        mt->height0 == firstImage->Height &&
++	        mt->depth0 == firstImage->Depth &&
++	        mt->bpp == firstImage->TexFormat->TexelBytes &&
++	        mt->compressed == compressed);
++}
++
++
++/**
++ * Try to allocate a mipmap tree for the given texture that will fit the
++ * given image in the given position.
++ */
++void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
++		struct gl_texture_image *texImage, GLuint face, GLuint level)
++{
++	GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0;
++	GLuint numfaces = 1;
++	GLuint firstLevel, lastLevel;
++
++	assert(!t->mt);
++
++	calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level);
++	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
++		numfaces = 6;
++
++	if (level != firstLevel || face >= numfaces)
++		return;
++
++	t->mt = radeon_miptree_create(rmesa, t, t->base.Target,
++		firstLevel, lastLevel,
++		texImage->Width, texImage->Height, texImage->Depth,
++		texImage->TexFormat->TexelBytes, t->tile_bits, compressed);
++}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
+new file mode 100644
+index 0000000..43dfa48
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
+@@ -0,0 +1,97 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ *
++ * All Rights Reserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#ifndef __RADEON_MIPMAP_TREE_H_
++#define __RADEON_MIPMAP_TREE_H_
++
++#include "radeon_common.h"
++
++typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
++typedef struct _radeon_mipmap_level radeon_mipmap_level;
++typedef struct _radeon_mipmap_image radeon_mipmap_image;
++
++struct _radeon_mipmap_image {
++	GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
++};
++
++struct _radeon_mipmap_level {
++	GLuint width;
++	GLuint height;
++	GLuint depth;
++	GLuint size; /** Size of each image, in bytes */
++	GLuint rowstride; /** in bytes */
++	radeon_mipmap_image faces[6];
++};
++
++
++/**
++ * A mipmap tree contains texture images in the layout that the hardware
++ * expects.
++ *
++ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
++ * layout on-the-fly; however, the texture contents (i.e. texels) can be
++ * changed.
++ */
++struct _radeon_mipmap_tree {
++	radeonContextPtr radeon;
++	radeonTexObj *t;
++	struct radeon_bo *bo;
++	GLuint refcount;
++
++	GLuint totalsize; /** total size of the miptree, in bytes */
++
++	GLenum target; /** GL_TEXTURE_xxx */
++	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
++	GLuint firstLevel; /** First mip level stored in this mipmap tree */
++	GLuint lastLevel; /** Last mip level stored in this mipmap tree */
++
++	GLuint width0; /** Width of firstLevel image */
++	GLuint height0; /** Height of firstLevel image */
++	GLuint depth0; /** Depth of firstLevel image */
++
++	GLuint bpp; /** Bytes per texel */
++	GLuint tilebits; /** RADEON_TXO_xxx_TILE */
++	GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */
++
++	radeon_mipmap_level levels[RADEON_MAX_TEXTURE_LEVELS];
++};
++
++radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t,
++		GLenum target, GLuint firstLevel, GLuint lastLevel,
++		GLuint width0, GLuint height0, GLuint depth0,
++		GLuint bpp, GLuint tilebits, GLuint compressed);
++void radeon_miptree_reference(radeon_mipmap_tree *mt);
++void radeon_miptree_unreference(radeon_mipmap_tree *mt);
++
++GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
++		struct gl_texture_image *texImage, GLuint face, GLuint level);
++GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj);
++void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t,
++			      struct gl_texture_image *texImage, GLuint face, GLuint level);
++
++
++#endif /* __RADEON_MIPMAP_TREE_H_ */
+diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
+index 6613757..bbed838 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_sanity.c
++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
+@@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+ }
+ 
+ 
+-int radeonSanityCmdBuffer( radeonContextPtr rmesa,
++int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+ 			   int nbox,
+ 			   drm_clip_rect_t *boxes )
+ {
+diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
+index 1ec06bc..f30eb1c 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_sanity.h
++++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
+@@ -1,7 +1,7 @@
+ #ifndef RADEON_SANITY_H
+ #define RADEON_SANITY_H
+ 
+-extern int radeonSanityCmdBuffer( radeonContextPtr rmesa,
++extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+ 				  int nbox,
+ 				  drm_clip_rect_t *boxes );
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
+index 56c22fa..086a268 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
+@@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * \author  Gareth Hughes <gareth@valinux.com>
+  */
+ 
++#include <errno.h>
+ #include "main/glheader.h"
+ #include "main/imports.h"
+ #include "main/mtypes.h"
+@@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_chipset.h"
+ #include "radeon_macros.h"
+ #include "radeon_screen.h"
++#include "radeon_common.h"
++#include "radeon_span.h"
+ #if !RADEON_COMMON
+ #include "radeon_context.h"
+-#include "radeon_span.h"
+ #include "radeon_tex.h"
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ #include "r200_context.h"
+ #include "r200_ioctl.h"
+-#include "r200_span.h"
+ #include "r200_tex.h"
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+ #include "r300_context.h"
+ #include "r300_fragprog.h"
+ #include "r300_tex.h"
+-#include "radeon_span.h"
+ #endif
+ 
+ #include "utils.h"
+ #include "vblank.h"
+ #include "drirenderbuffer.h"
+ 
++#include "radeon_bocs_wrapper.h"
++
+ #include "GL/internal/dri_interface.h"
+ 
+ /* Radeon configuration
+  */
+ #include "xmlpool.h"
+ 
++#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
++DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
++        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
++        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
++DRI_CONF_OPT_END
++
+ #if !RADEON_COMMON	/* R100 */
+ PUBLIC const char __driConfigOptions[] =
+ DRI_CONF_BEGIN
+@@ -80,6 +88,7 @@ DRI_CONF_BEGIN
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+         DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
+         DRI_CONF_HYPERZ(false)
++        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+     DRI_CONF_SECTION_END
+     DRI_CONF_SECTION_QUALITY
+         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+@@ -95,7 +104,7 @@ DRI_CONF_BEGIN
+         DRI_CONF_NO_RAST(false)
+     DRI_CONF_SECTION_END
+ DRI_CONF_END;
+-static const GLuint __driNConfigOptions = 14;
++static const GLuint __driNConfigOptions = 15;
+ 
+ #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+ 
+@@ -107,6 +116,7 @@ DRI_CONF_BEGIN
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+         DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
+         DRI_CONF_HYPERZ(false)
++        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+     DRI_CONF_SECTION_END
+     DRI_CONF_SECTION_QUALITY
+         DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+@@ -126,7 +136,7 @@ DRI_CONF_BEGIN
+         DRI_CONF_NV_VERTEX_PROGRAM(false)
+     DRI_CONF_SECTION_END
+ DRI_CONF_END;
+-static const GLuint __driNConfigOptions = 16;
++static const GLuint __driNConfigOptions = 17;
+ 
+ extern const struct dri_extension blend_extensions[];
+ extern const struct dri_extension ARB_vp_extension[];
+@@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
+         DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
+ DRI_CONF_OPT_END
+ 
+-#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+-DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+-        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+-        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+-DRI_CONF_OPT_END
++
+ 
+ #define DRI_CONF_DISABLE_S3TC(def) \
+ DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
+@@ -209,7 +215,6 @@ static const GLuint __driNConfigOptions = 17;
+ extern const struct dri_extension gl_20_extension[];
+ 
+ #ifndef RADEON_DEBUG
+-int RADEON_DEBUG = 0;
+ 
+ static const struct dri_debug_control debug_control[] = {
+ 	{"fall", DEBUG_FALLBACKS},
+@@ -351,137 +356,17 @@ static const __DRItexOffsetExtension r300texOffsetExtension = {
+     { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+    r300SetTexOffset,
+ };
+-#endif
+ 
+-/* Create the device specific screen private data struct.
+- */
+-static radeonScreenPtr
+-radeonCreateScreen( __DRIscreenPrivate *sPriv )
+-{
+-   radeonScreenPtr screen;
+-   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+-   unsigned char *RADEONMMIO;
+-   int i;
+-   int ret;
+-   uint32_t temp;
+-
+-   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
+-      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
+-      return GL_FALSE;
+-   }
+-
+-   /* Allocate the private area */
+-   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+-   if ( !screen ) {
+-      __driUtilMessage("%s: Could not allocate memory for screen structure",
+-		       __FUNCTION__);
+-      return NULL;
+-   }
+-
+-#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+-	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++static const __DRItexBufferExtension r300TexBufferExtension = {
++    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
++   r300SetTexBuffer,
++};
+ #endif
+ 
+-   /* parse information in __driConfigOptions */
+-   driParseOptionInfo (&screen->optionCache,
+-		       __driConfigOptions, __driNConfigOptions);
+-
+-   /* This is first since which regions we map depends on whether or
+-    * not we are using a PCI card.
+-    */
+-   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+-   {
+-      int ret;
+-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
+-			    &screen->gart_buffer_offset);
+-
+-      if (ret) {
+-	 FREE( screen );
+-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
+-	 return NULL;
+-      }
+-
+-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
+-			    &screen->gart_base);
+-      if (ret) {
+-	 FREE( screen );
+-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
+-	 return NULL;
+-      }
+-
+-      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
+-			    &screen->irq);
+-      if (ret) {
+-	 FREE( screen );
+-	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
+-	 return NULL;
+-      }
+-      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+-      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+-      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+-      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+-      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+-      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+-      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+-   }
+-
+-   screen->mmio.handle = dri_priv->registerHandle;
+-   screen->mmio.size   = dri_priv->registerSize;
+-   if ( drmMap( sPriv->fd,
+-		screen->mmio.handle,
+-		screen->mmio.size,
+-		&screen->mmio.map ) ) {
+-      FREE( screen );
+-      __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+-      return NULL;
+-   }
+-
+-   RADEONMMIO = screen->mmio.map;
+-
+-   screen->status.handle = dri_priv->statusHandle;
+-   screen->status.size   = dri_priv->statusSize;
+-   if ( drmMap( sPriv->fd,
+-		screen->status.handle,
+-		screen->status.size,
+-		&screen->status.map ) ) {
+-      drmUnmap( screen->mmio.map, screen->mmio.size );
+-      FREE( screen );
+-      __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+-      return NULL;
+-   }
+-   screen->scratch = (__volatile__ uint32_t *)
+-      ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+-
+-   screen->buffers = drmMapBufs( sPriv->fd );
+-   if ( !screen->buffers ) {
+-      drmUnmap( screen->status.map, screen->status.size );
+-      drmUnmap( screen->mmio.map, screen->mmio.size );
+-      FREE( screen );
+-      __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+-      return NULL;
+-   }
+-
+-   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+-      screen->gartTextures.handle = dri_priv->gartTexHandle;
+-      screen->gartTextures.size   = dri_priv->gartTexMapSize;
+-      if ( drmMap( sPriv->fd,
+-		   screen->gartTextures.handle,
+-		   screen->gartTextures.size,
+-		   (drmAddressPtr)&screen->gartTextures.map ) ) {
+-	 drmUnmapBufs( screen->buffers );
+-	 drmUnmap( screen->status.map, screen->status.size );
+-	 drmUnmap( screen->mmio.map, screen->mmio.size );
+-	 FREE( screen );
+-	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+-	 return NULL;
+-      }
+-
+-      screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+-   }
+-
++static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
++{
+    screen->chip_flags = 0;
+-   /* XXX: add more chipsets */
+-   switch ( dri_priv->deviceID ) {
++   switch ( device_id ) {
+    case PCI_CHIP_RADEON_LY:
+    case PCI_CHIP_RADEON_LZ:
+    case PCI_CHIP_RADEON_QY:
+@@ -813,9 +698,162 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ 
+    default:
+       fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
+-	      dri_priv->deviceID);
++	      device_id);
++      return -1;
++   }
++
++   return 0;
++}
++
++
++/* Create the device specific screen private data struct.
++ */
++static radeonScreenPtr
++radeonCreateScreen( __DRIscreenPrivate *sPriv )
++{
++   radeonScreenPtr screen;
++   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
++   unsigned char *RADEONMMIO = NULL;
++   int i;
++   int ret;
++   uint32_t temp;
++
++   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
++      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
++      return GL_FALSE;
++   }
++
++   /* Allocate the private area */
++   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
++   if ( !screen ) {
++      __driUtilMessage("%s: Could not allocate memory for screen structure",
++		       __FUNCTION__);
+       return NULL;
+    }
++
++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++#endif
++
++   /* parse information in __driConfigOptions */
++   driParseOptionInfo (&screen->optionCache,
++		       __driConfigOptions, __driNConfigOptions);
++
++   /* This is first since which regions we map depends on whether or
++    * not we are using a PCI card.
++    */
++   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
++   {
++      int ret;
++
++#ifdef RADEON_PARAM_KERNEL_MM
++     ret = radeonGetParam( sPriv->fd, RADEON_PARAM_KERNEL_MM,
++                            &screen->kernel_mm);
++
++      if (ret && ret != -EINVAL) {
++         FREE( screen );
++         fprintf(stderr, "drm_radeon_getparam_t (RADEON_OFFSET): %d\n", ret);
++         return NULL;
++      }
++
++      if (ret == -EINVAL)
++          screen->kernel_mm = 0;
++#endif
++
++      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET,
++			    &screen->gart_buffer_offset);
++
++      if (ret) {
++	 FREE( screen );
++	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
++	 return NULL;
++      }
++
++      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE,
++			    &screen->gart_base);
++      if (ret) {
++	 FREE( screen );
++	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
++	 return NULL;
++      }
++
++      ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
++			    &screen->irq);
++      if (ret) {
++	 FREE( screen );
++	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
++	 return NULL;
++      }
++      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
++      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
++      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
++      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
++      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
++      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
++      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
++   }
++
++   if (!screen->kernel_mm) {
++     screen->mmio.handle = dri_priv->registerHandle;
++     screen->mmio.size   = dri_priv->registerSize;
++     if ( drmMap( sPriv->fd,
++		  screen->mmio.handle,
++		  screen->mmio.size,
++		  &screen->mmio.map ) ) {
++       FREE( screen );
++       __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
++       return NULL;
++     }
++
++     RADEONMMIO = screen->mmio.map;
++
++     screen->status.handle = dri_priv->statusHandle;
++     screen->status.size   = dri_priv->statusSize;
++     if ( drmMap( sPriv->fd,
++		  screen->status.handle,
++		  screen->status.size,
++		  &screen->status.map ) ) {
++       drmUnmap( screen->mmio.map, screen->mmio.size );
++       FREE( screen );
++       __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
++       return NULL;
++     }
++     screen->scratch = (__volatile__ uint32_t *)
++       ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
++
++     screen->buffers = drmMapBufs( sPriv->fd );
++     if ( !screen->buffers ) {
++       drmUnmap( screen->status.map, screen->status.size );
++       drmUnmap( screen->mmio.map, screen->mmio.size );
++       FREE( screen );
++       __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
++       return NULL;
++     }
++     
++     if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
++       screen->gartTextures.handle = dri_priv->gartTexHandle;
++       screen->gartTextures.size   = dri_priv->gartTexMapSize;
++       if ( drmMap( sPriv->fd,
++		    screen->gartTextures.handle,
++		    screen->gartTextures.size,
++		    (drmAddressPtr)&screen->gartTextures.map ) ) {
++	 drmUnmapBufs( screen->buffers );
++	 drmUnmap( screen->status.map, screen->status.size );
++	 drmUnmap( screen->mmio.map, screen->mmio.size );
++	 FREE( screen );
++	 __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
++	 return NULL;
++       }
++       
++       screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
++     }
++   }
++
++
++   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
++   if (ret == -1)
++     return NULL;
++
+    if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
+        sPriv->ddx_version.minor < 2) {
+       fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
+@@ -843,7 +881,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+    ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION,
+                          &temp);
+    if (ret) {
+-       if (screen->chip_family < CHIP_FAMILY_RS690)
++       if (screen->chip_family < CHIP_FAMILY_RS690 && !screen->kernel_mm)
+ 	   screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+        else {
+            FREE( screen );
+@@ -957,7 +995,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ #endif
+ 
+ #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+-   screen->extensions[i++] = &r300texOffsetExtension.base;
++   //screen->extensions[i++] = &r300texOffsetExtension.base;
+ #endif
+ 
+    screen->extensions[i++] = NULL;
+@@ -965,6 +1003,106 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ 
+    screen->driScreen = sPriv;
+    screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
++   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
++					       screen->sarea_priv_offset);
++
++   if (screen->kernel_mm)
++     screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
++   else
++     screen->bom = radeon_bo_manager_legacy_ctor(screen);
++   if (screen->bom == NULL) {
++     free(screen);
++     return NULL;
++   }
++
++   return screen;
++}
++
++static radeonScreenPtr
++radeonCreateScreen2(__DRIscreenPrivate *sPriv)
++{
++   radeonScreenPtr screen;
++   int i;
++   int ret;
++   uint32_t device_id;
++
++   /* Allocate the private area */
++   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
++   if ( !screen ) {
++      __driUtilMessage("%s: Could not allocate memory for screen structure",
++		       __FUNCTION__);
++      fprintf(stderr, "leaving here\n");
++      return NULL;
++   }
++
++#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++	RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
++#endif
++
++   /* parse information in __driConfigOptions */
++   driParseOptionInfo (&screen->optionCache,
++		       __driConfigOptions, __driNConfigOptions);
++
++   screen->kernel_mm = 1;
++   screen->chip_flags = 0;
++
++   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR,
++			 &screen->irq);
++
++   ret = radeonGetParam( sPriv->fd, RADEON_PARAM_DEVICE_ID,
++			 &device_id);
++   if (ret) {
++     FREE( screen );
++     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
++     return NULL;
++   }
++
++   ret = radeon_set_screen_flags(screen, device_id);
++   if (ret == -1)
++     return NULL;
++
++   if (screen->chip_family <= CHIP_FAMILY_RS200)
++      screen->chip_flags |= RADEON_CLASS_R100;
++   else if (screen->chip_family <= CHIP_FAMILY_RV280)
++      screen->chip_flags |= RADEON_CLASS_R200;
++   else
++      screen->chip_flags |= RADEON_CLASS_R300;
++
++   i = 0;
++   screen->extensions[i++] = &driCopySubBufferExtension.base;
++   screen->extensions[i++] = &driFrameTrackingExtension.base;
++   screen->extensions[i++] = &driReadDrawableExtension;
++
++   if ( screen->irq != 0 ) {
++       screen->extensions[i++] = &driSwapControlExtension.base;
++       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
++   }
++
++#if !RADEON_COMMON
++   screen->extensions[i++] = &radeonTexOffsetExtension.base;
++#endif
++
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
++   if (IS_R200_CLASS(screen))
++       screen->extensions[i++] = &r200AllocateExtension.base;
++
++   screen->extensions[i++] = &r200texOffsetExtension.base;
++#endif
++
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
++   screen->extensions[i++] = &r300texOffsetExtension.base;
++   screen->extensions[i++] = &r300TexBufferExtension.base;
++#endif
++
++   screen->extensions[i++] = NULL;
++   sPriv->extensions = screen->extensions;
++
++   screen->driScreen = sPriv;
++   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
++   if (screen->bom == NULL) {
++       free(screen);
++       return NULL;
++   }
+    return screen;
+ }
+ 
+@@ -973,23 +1111,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
+ static void
+ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+ {
+-   radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
++    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+ 
+-   if (!screen)
+-      return;
++    if (!screen)
++        return;
+ 
+-   if ( screen->gartTextures.map ) {
+-      drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+-   }
+-   drmUnmapBufs( screen->buffers );
+-   drmUnmap( screen->status.map, screen->status.size );
+-   drmUnmap( screen->mmio.map, screen->mmio.size );
++    if (screen->kernel_mm) {
++#ifdef RADEON_BO_TRACK
++        radeon_tracker_print(&screen->bom->tracker, stderr);
++#endif
++        radeon_bo_manager_gem_dtor(screen->bom);
++    } else {
++        radeon_bo_manager_legacy_dtor(screen->bom);
++
++        if ( screen->gartTextures.map ) {
++            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
++        }
++        drmUnmapBufs( screen->buffers );
++        drmUnmap( screen->status.map, screen->status.size );
++        drmUnmap( screen->mmio.map, screen->mmio.size );
++    }
+ 
+-   /* free all option information */
+-   driDestroyOptionInfo (&screen->optionCache);
++    /* free all option information */
++    driDestroyOptionInfo (&screen->optionCache);
+ 
+-   FREE( screen );
+-   sPriv->private = NULL;
++    FREE( screen );
++    sPriv->private = NULL;
+ }
+ 
+ 
+@@ -998,16 +1145,103 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv )
+ static GLboolean
+ radeonInitDriver( __DRIscreenPrivate *sPriv )
+ {
+-   sPriv->private = (void *) radeonCreateScreen( sPriv );
+-   if ( !sPriv->private ) {
+-      radeonDestroyScreen( sPriv );
+-      return GL_FALSE;
+-   }
++    if (sPriv->dri2.enabled) {
++        sPriv->private = (void *) radeonCreateScreen2( sPriv );
++    } else {
++        sPriv->private = (void *) radeonCreateScreen( sPriv );
++    }
++    if ( !sPriv->private ) {
++        radeonDestroyScreen( sPriv );
++        return GL_FALSE;
++    }
++
++    return GL_TRUE;
++}
+ 
+-   return GL_TRUE;
++static GLboolean
++radeon_alloc_window_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
++			    GLenum intFormat, GLuint w, GLuint h)
++{
++    rb->Width = w;
++    rb->Height = h;
++    rb->_ActualFormat = intFormat;
++
++    return GL_TRUE;
+ }
+ 
+ 
++static struct radeon_renderbuffer *
++radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv)
++{
++    struct radeon_renderbuffer *ret;
++
++    ret = CALLOC_STRUCT(radeon_renderbuffer);
++    if (!ret)
++	return NULL;
++
++    _mesa_init_renderbuffer(&ret->base, 0);
++
++    /* XXX format junk */
++    switch (format) {
++	case GL_RGB5:
++	    ret->base._ActualFormat = GL_RGB5;
++	    ret->base._BaseFormat = GL_RGBA;
++	    ret->base.RedBits = 5;
++	    ret->base.GreenBits = 6;
++	    ret->base.BlueBits = 5;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_RGBA8:
++	    ret->base._ActualFormat = GL_RGBA8;
++	    ret->base._BaseFormat = GL_RGBA;
++	    ret->base.RedBits = 8;
++	    ret->base.GreenBits = 8;
++	    ret->base.BlueBits = 8;
++	    ret->base.AlphaBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_STENCIL_INDEX8_EXT:
++	    ret->base._ActualFormat = GL_STENCIL_INDEX8_EXT;
++	    ret->base._BaseFormat = GL_STENCIL_INDEX;
++	    ret->base.StencilBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_BYTE;
++	    break;
++	case GL_DEPTH_COMPONENT16:
++	    ret->base._ActualFormat = GL_DEPTH_COMPONENT16;
++	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++	    ret->base.DepthBits = 16;
++	    ret->base.DataType = GL_UNSIGNED_SHORT;
++	    break;
++	case GL_DEPTH_COMPONENT24:
++	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++	    ret->base._BaseFormat = GL_DEPTH_COMPONENT;
++	    ret->base.DepthBits = 24;
++	    ret->base.DataType = GL_UNSIGNED_INT;
++	    break;
++	case GL_DEPTH24_STENCIL8_EXT:
++	    ret->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
++	    ret->base._BaseFormat = GL_DEPTH_STENCIL_EXT;
++	    ret->base.DepthBits = 24;
++	    ret->base.StencilBits = 8;
++	    ret->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
++	    break;
++	default:
++	    fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format);
++	    _mesa_delete_renderbuffer(&ret->base);
++	    return NULL;
++    }
++
++    ret->dPriv = driDrawPriv;
++    ret->base.InternalFormat = format;
++
++    ret->base.AllocStorage = radeon_alloc_window_storage;
++
++    radeonSetSpanFunctions(ret);
++
++    ret->bo = NULL;
++    return ret;
++}
++
+ /**
+  * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+  *
+@@ -1022,95 +1256,86 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+ {
+    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+ 
+-   if (isPixmap) {
+-      return GL_FALSE; /* not implemented */
+-   }
+-   else {
+-      const GLboolean swDepth = GL_FALSE;
+-      const GLboolean swAlpha = GL_FALSE;
+-      const GLboolean swAccum = mesaVis->accumRedBits > 0;
+-      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+-         mesaVis->depthBits != 24;
+-      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+-
+-      /* front color renderbuffer */
+-      {
+-         driRenderbuffer *frontRb
+-            = driNewRenderbuffer(GL_RGBA,
+-                                 driScrnPriv->pFB + screen->frontOffset,
+-                                 screen->cpp,
+-                                 screen->frontOffset, screen->frontPitch,
+-                                 driDrawPriv);
+-         radeonSetSpanFunctions(frontRb, mesaVis);
+-         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+-      }
++    const GLboolean swDepth = GL_FALSE;
++    const GLboolean swAlpha = GL_FALSE;
++    const GLboolean swAccum = mesaVis->accumRedBits > 0;
++    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
++	mesaVis->depthBits != 24;
++    GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
++    GLenum depthFormat = GL_NONE;
++    struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
++
++    if (mesaVis->depthBits == 16)
++	depthFormat = GL_DEPTH_COMPONENT16;
++    else if (mesaVis->depthBits == 24)
++	depthFormat = GL_DEPTH_COMPONENT24;
++
++    /* front color renderbuffer */
++    {
++	struct radeon_renderbuffer *front =
++	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &front->base);
++	front->has_surface = 1;
++    }
+ 
+-      /* back color renderbuffer */
+-      if (mesaVis->doubleBufferMode) {
+-         driRenderbuffer *backRb
+-            = driNewRenderbuffer(GL_RGBA,
+-                                 driScrnPriv->pFB + screen->backOffset,
+-                                 screen->cpp,
+-                                 screen->backOffset, screen->backPitch,
+-                                 driDrawPriv);
+-         radeonSetSpanFunctions(backRb, mesaVis);
+-         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+-      }
++    /* back color renderbuffer */
++    if (mesaVis->doubleBufferMode) {
++	struct radeon_renderbuffer *back =
++	    radeon_create_renderbuffer(rgbFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &back->base);
++	back->has_surface = 1;
++    }
+ 
+-      /* depth renderbuffer */
+-      if (mesaVis->depthBits == 16) {
+-         driRenderbuffer *depthRb
+-            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
+-                                 driScrnPriv->pFB + screen->depthOffset,
+-                                 screen->cpp,
+-                                 screen->depthOffset, screen->depthPitch,
+-                                 driDrawPriv);
+-         radeonSetSpanFunctions(depthRb, mesaVis);
+-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+-	 depthRb->depthHasSurface = screen->depthHasSurface;
+-      }
+-      else if (mesaVis->depthBits == 24) {
+-         driRenderbuffer *depthRb
+-            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
+-                                 driScrnPriv->pFB + screen->depthOffset,
+-                                 screen->cpp,
+-                                 screen->depthOffset, screen->depthPitch,
+-                                 driDrawPriv);
+-         radeonSetSpanFunctions(depthRb, mesaVis);
+-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+-	 depthRb->depthHasSurface = screen->depthHasSurface;
+-      }
++    /* depth renderbuffer */
++    if (depthFormat != GL_NONE) {
++	struct radeon_renderbuffer *depth =
++	    radeon_create_renderbuffer(depthFormat, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depth->base);
++	depth->has_surface = screen->depthHasSurface;
++    }
+ 
+-      /* stencil renderbuffer */
+-      if (mesaVis->stencilBits > 0 && !swStencil) {
+-         driRenderbuffer *stencilRb
+-            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
+-                                 driScrnPriv->pFB + screen->depthOffset,
+-                                 screen->cpp,
+-                                 screen->depthOffset, screen->depthPitch,
+-                                 driDrawPriv);
+-         radeonSetSpanFunctions(stencilRb, mesaVis);
+-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+-	 stencilRb->depthHasSurface = screen->depthHasSurface;
+-      }
++    /* stencil renderbuffer */
++    if (mesaVis->stencilBits > 0 && !swStencil) {
++	struct radeon_renderbuffer *stencil =
++	    radeon_create_renderbuffer(GL_STENCIL_INDEX8_EXT, driDrawPriv);
++	_mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencil->base);
++	stencil->has_surface = screen->depthHasSurface;
++    }
+ 
+-      _mesa_add_soft_renderbuffers(fb,
+-                                   GL_FALSE, /* color */
+-                                   swDepth,
+-                                   swStencil,
+-                                   swAccum,
+-                                   swAlpha,
+-                                   GL_FALSE /* aux */);
+-      driDrawPriv->driverPrivate = (void *) fb;
++    _mesa_add_soft_renderbuffers(fb,
++	    GL_FALSE, /* color */
++	    swDepth,
++	    swStencil,
++	    swAccum,
++	    swAlpha,
++	    GL_FALSE /* aux */);
++    driDrawPriv->driverPrivate = (void *) fb;
+ 
+-      return (driDrawPriv->driverPrivate != NULL);
+-   }
++    return (driDrawPriv->driverPrivate != NULL);
+ }
+ 
+-
+ static void
+ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+ {
++	struct radeon_renderbuffer *rb;
++	GLframebuffer *fb;
++    
++    fb = (void*)driDrawPriv->driverPrivate;
++    rb = (void *)fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++    if (rb && rb->bo) {
++        radeon_bo_unref(rb->bo);
++        rb->bo = NULL;
++    }
++    rb = (void *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++    if (rb && rb->bo) {
++        radeon_bo_unref(rb->bo);
++        rb->bo = NULL;
++    }
++    rb = (void *)fb->Attachment[BUFFER_DEPTH].Renderbuffer;
++    if (rb && rb->bo) {
++        radeon_bo_unref(rb->bo);
++        rb->bo = NULL;
++    }
+    _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+ }
+ 
+@@ -1205,13 +1430,48 @@ radeonInitScreen(__DRIscreenPrivate *psp)
+    if (!radeonInitDriver(psp))
+        return NULL;
+ 
++   /* for now fill in all modes */
+    return radeonFillInModes( psp,
+ 			     dri_priv->bpp,
+ 			     (dri_priv->bpp == 16) ? 16 : 24,
+-			     (dri_priv->bpp == 16) ? 0  : 8,
+-			     (dri_priv->backOffset != dri_priv->depthOffset) );
++			     (dri_priv->bpp == 16) ? 0  : 8, 1);
+ }
+ 
++/**
++ * This is the driver specific part of the createNewScreen entry point.
++ * Called when using DRI2.
++ *
++ * \return the __GLcontextModes supported by this driver
++ */
++static const
++__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
++{
++   /* Calling driInitExtensions here, with a NULL context pointer,
++    * does not actually enable the extensions.  It just makes sure
++    * that all the dispatch offsets for all the extensions that
++    * *might* be enables are known.  This is needed because the
++    * dispatch offsets need to be known when _mesa_context_create
++    * is called, but we can't enable the extensions until we have a
++    * context pointer.
++    *
++    * Hello chicken.  Hello egg.  How are you two today?
++    */
++   driInitExtensions( NULL, card_extensions, GL_FALSE );
++#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
++   driInitExtensions( NULL, blend_extensions, GL_FALSE );
++   driInitSingleExtension( NULL, ARB_vp_extension );
++   driInitSingleExtension( NULL, NV_vp_extension );
++   driInitSingleExtension( NULL, ATI_fs_extension );
++   driInitExtensions( NULL, point_extensions, GL_FALSE );
++#endif
++
++   if (!radeonInitDriver(psp)) {
++       return NULL;
++    }
++
++   /* for now fill in all modes */
++   return radeonFillInModes( psp, 24, 24, 8, 1);
++}
+ 
+ /**
+  * Get information about previous buffer swaps.
+@@ -1219,11 +1479,7 @@ radeonInitScreen(__DRIscreenPrivate *psp)
+ static int
+ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+ {
+-#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300))
+    radeonContextPtr  rmesa;
+-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+-   r200ContextPtr  rmesa;
+-#endif
+ 
+    if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+ 	|| (dPriv->driContextPriv->driverPrivate == NULL)
+@@ -1260,6 +1516,8 @@ const struct __DriverAPIRec driDriverAPI = {
+    .WaitForSBC      = NULL,
+    .SwapBuffersMSC  = NULL,
+    .CopySubBuffer   = radeonCopySubBuffer,
++    /* DRI2 */
++   .InitScreen2     = radeonInitScreen2,
+ };
+ #else
+ const struct __DriverAPIRec driDriverAPI = {
+@@ -1269,14 +1527,15 @@ const struct __DriverAPIRec driDriverAPI = {
+    .DestroyContext  = r200DestroyContext,
+    .CreateBuffer    = radeonCreateBuffer,
+    .DestroyBuffer   = radeonDestroyBuffer,
+-   .SwapBuffers     = r200SwapBuffers,
+-   .MakeCurrent     = r200MakeCurrent,
+-   .UnbindContext   = r200UnbindContext,
++   .SwapBuffers     = radeonSwapBuffers,
++   .MakeCurrent     = radeonMakeCurrent,
++   .UnbindContext   = radeonUnbindContext,
+    .GetSwapInfo     = getSwapInfo,
+    .GetDrawableMSC  = driDrawableGetMSC32,
+    .WaitForMSC      = driWaitForMSC32,
+    .WaitForSBC      = NULL,
+    .SwapBuffersMSC  = NULL,
+-   .CopySubBuffer   = r200CopySubBuffer,
++   .CopySubBuffer   = radeonCopySubBuffer,
+ };
+ #endif
++
+diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
+index b84c70b..1c0f5bb 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
++++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
+@@ -54,7 +54,7 @@ typedef struct {
+    drmAddress map;			/* Mapping of the DRM region */
+ } radeonRegionRec, *radeonRegionPtr;
+ 
+-typedef struct {
++typedef struct radeon_screen {
+    int chip_family;
+    int chip_flags;
+    int cpp;
+@@ -103,9 +103,12 @@ typedef struct {
+    /* Configuration cache with default values for all contexts */
+    driOptionCache optionCache;
+ 
+-   const __DRIextension *extensions[8];
++   const __DRIextension *extensions[16];
+ 
+    int num_gb_pipes;
++   int kernel_mm;
++   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
++   struct radeon_bo_manager *bom;
+ } radeonScreenRec, *radeonScreenPtr;
+ 
+ #define IS_R100_CLASS(screen) \
+diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
+index 12051ff..49ec2c3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_span.c
++++ b/src/mesa/drivers/dri/radeon/radeon_span.c
+@@ -43,37 +43,168 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/glheader.h"
+ #include "swrast/swrast.h"
+ 
+-#include "radeon_context.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_state.h"
++#include "radeon_common.h"
++#include "radeon_lock.h"
+ #include "radeon_span.h"
+-#include "radeon_tex.h"
+-
+-#include "drirenderbuffer.h"
+ 
+ #define DBG 0
+ 
++static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
++			     GLint x, GLint y)
++{
++    GLubyte *ptr = rrb->bo->ptr;
++    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++    GLint offset;
++    GLint nmacroblkpl;
++    GLint nmicroblkpl;
++
++    x += dPriv->x;
++    y += dPriv->y;
++
++    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++        offset = x * rrb->cpp + y * rrb->pitch;
++    } else {
++        offset = 0;
++        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++                nmacroblkpl = rrb->pitch >> 5;
++                offset += ((y >> 4) * nmacroblkpl) << 11;
++                offset += ((y & 15) >> 1) << 8;
++                offset += (y & 1) << 4;
++                offset += (x >> 5) << 11;
++                offset += ((x & 31) >> 2) << 5;
++                offset += (x & 3) << 2;
++            } else {
++                nmacroblkpl = rrb->pitch >> 6;
++                offset += ((y >> 3) * nmacroblkpl) << 11;
++                offset += (y & 7) << 8;
++                offset += (x >> 6) << 11;
++                offset += ((x & 63) >> 3) << 5;
++                offset += (x & 7) << 2;
++            }
++        } else {
++            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++            offset += (y * nmicroblkpl) << 5;
++            offset += (x >> 3) << 5;
++            offset += (x & 7) << 2;
++        }
++    }
++    return &ptr[offset];
++}
++
++static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
++			     GLint x, GLint y)
++{
++    GLubyte *ptr = rrb->bo->ptr;
++    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++    GLint offset;
++    GLint nmacroblkpl;
++    GLint nmicroblkpl;
++
++    x += dPriv->x;
++    y += dPriv->y;
++
++    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++        offset = x * rrb->cpp + y * rrb->pitch;
++    } else {
++        offset = 0;
++        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++                nmacroblkpl = rrb->pitch >> 6;
++                offset += ((y >> 4) * nmacroblkpl) << 11;
++                offset += ((y & 15) >> 1) << 8;
++                offset += (y & 1) << 4;
++                offset += (x >> 6) << 11;
++                offset += ((x & 63) >> 3) << 5;
++                offset += (x & 7) << 1;
++            } else {
++                nmacroblkpl = rrb->pitch >> 7;
++                offset += ((y >> 3) * nmacroblkpl) << 11;
++                offset += (y & 7) << 8;
++                offset += (x >> 7) << 11;
++                offset += ((x & 127) >> 4) << 5;
++                offset += (x & 15) << 2;
++            }
++        } else {
++            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++            offset += (y * nmicroblkpl) << 5;
++            offset += (x >> 4) << 5;
++            offset += (x & 15) << 2;
++        }
++    }
++    return &ptr[offset];
++}
++
++static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
++			   GLint x, GLint y)
++{
++    GLubyte *ptr = rrb->bo->ptr;
++    const __DRIdrawablePrivate *dPriv = rrb->dPriv;
++    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
++    GLint offset;
++    GLint microblkxs;
++    GLint macroblkxs;
++    GLint nmacroblkpl;
++    GLint nmicroblkpl;
++
++    x += dPriv->x;
++    y += dPriv->y;
++
++    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
++        offset = x * rrb->cpp + y * rrb->pitch;
++    } else {
++        offset = 0;
++        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
++            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
++                microblkxs = 16 / rrb->cpp;
++                macroblkxs = 128 / rrb->cpp;
++                nmacroblkpl = rrb->pitch / macroblkxs;
++                offset += ((y >> 4) * nmacroblkpl) << 11;
++                offset += ((y & 15) >> 1) << 8;
++                offset += (y & 1) << 4;
++                offset += (x / macroblkxs) << 11;
++                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
++                offset += (x & (microblkxs - 1)) * rrb->cpp;
++            } else {
++                microblkxs = 32 / rrb->cpp;
++                macroblkxs = 256 / rrb->cpp;
++                nmacroblkpl = rrb->pitch / macroblkxs;
++                offset += ((y >> 3) * nmacroblkpl) << 11;
++                offset += (y & 7) << 8;
++                offset += (x / macroblkxs) << 11;
++                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
++                offset += (x & (microblkxs - 1)) * rrb->cpp;
++            }
++        } else {
++            microblkxs = 32 / rrb->cpp;
++            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
++            offset += (y * nmicroblkpl) << 5;
++            offset += (x / microblkxs) << 5;
++            offset += (x & (microblkxs - 1)) * rrb->cpp;
++        }
++    }
++    return &ptr[offset];
++}
++
++
+ /*
+  * Note that all information needed to access pixels in a renderbuffer
+  * should be obtained through the gl_renderbuffer parameter, not per-context
+  * information.
+  */
+ #define LOCAL_VARS						\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
++   struct radeon_renderbuffer *rrb = (void *) rb;		\
++   const __DRIdrawablePrivate *dPriv = rrb->dPriv;		\
+    const GLuint bottom = dPriv->h - 1;				\
+-   GLubyte *buf = (GLubyte *) drb->flippedData			\
+-      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
+-   GLuint p;							\
+-   (void) p;
++   GLuint p;						\
++   (void)p;
+ 
+ #define LOCAL_DEPTH_VARS				\
+-   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+-   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
+-   const GLuint bottom = dPriv->h - 1;			\
+-   GLuint xo = dPriv->x;				\
+-   GLuint yo = dPriv->y;				\
+-   GLubyte *buf = (GLubyte *) drb->Base.Data;
++   struct radeon_renderbuffer *rrb = (void *) rb;	\
++   const __DRIdrawablePrivate *dPriv = rrb->dPriv;	\
++   const GLuint bottom = dPriv->h - 1;
+ 
+ #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+ 
+@@ -94,7 +225,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define TAG(x)    radeon##x##_RGB565
+ #define TAG2(x,y) radeon##x##_RGB565##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
++#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
+ #include "spantmp2.h"
+ 
+ /* 32 bit, ARGB8888 color spanline and pixel functions
+@@ -104,7 +235,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #define TAG(x)    radeon##x##_ARGB8888
+ #define TAG2(x,y) radeon##x##_ARGB8888##y
+-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
++#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
+ #include "spantmp2.h"
+ 
+ /* ================================================================
+@@ -121,65 +252,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * too...
+  */
+ 
+-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
+-		return 4 * (x + y * pitch);
+-	} else {
+-		GLuint ba, address = 0;	/* a[0..1] = 0           */
+-
+-#ifdef COMPILE_R300
+-		ba = (y / 8) * (pitch / 8) + (x / 8);
+-#else
+-		ba = (y / 16) * (pitch / 16) + (x / 16);
+-#endif
+-
+-		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
+-		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
+-		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
+-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
+-
+-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
+-		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
+-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
+-
+-		return address;
+-	}
+-}
+-
+-static INLINE GLuint
+-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+-{
+-	GLuint pitch = drb->pitch;
+-	if (drb->depthHasSurface) {
+-		return 2 * (x + y * pitch);
+-	} else {
+-		GLuint ba, address = 0;	/* a[0]    = 0           */
+-
+-		ba = (y / 16) * (pitch / 32) + (x / 32);
+-
+-		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
+-		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
+-		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
+-		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
+-		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
+-		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
+-		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
+-
+-		return address;
+-	}
+-}
+-
+ /* 16-bit depth buffer functions
+  */
+ #define VALUE_TYPE GLushort
+ 
+ #define WRITE_DEPTH( _x, _y, d )					\
+-   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
++   *(GLushort *)radeon_ptr(rrb, _x, _y) = d
+ 
+ #define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
++   d = *(GLushort *)radeon_ptr(rrb, _x, _y)
+ 
+ #define TAG(x) radeon##x##_z16
+ #include "depthtmp.h"
+@@ -194,35 +275,36 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
+ #ifdef COMPILE_R300
+ #define WRITE_DEPTH( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
++   GLuint tmp = *_ptr;				\
+    tmp &= 0x000000ff;							\
+    tmp |= ((d << 8) & 0xffffff00);					\
+-   *(GLuint *)(buf + offset) = tmp;					\
++   *_ptr = tmp;					\
+ } while (0)
+ #else
+ #define WRITE_DEPTH( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
++   GLuint tmp = *_ptr;				\
+    tmp &= 0xff000000;							\
+    tmp |= ((d) & 0x00ffffff);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
++   *_ptr = tmp;					\
+ } while (0)
+ #endif
+ 
+ #ifdef COMPILE_R300
+ #define READ_DEPTH( d, _x, _y )						\
+   do { \
+-    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
+-					 _y + yo )) & 0xffffff00) >> 8; \
++    d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \
+   }while(0)
+ #else
+ #define READ_DEPTH( d, _x, _y )						\
+-   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
+-					 _y + yo )) & 0x00ffffff;
++   d = *(GLuint*)(radeon_ptr32(rrb, _x,	_y )) & 0x00ffffff;
+ #endif
+-
++/*
++    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
++   d = *(GLuint*)(radeon_ptr(rrb, _x,	_y )) & 0x00ffffff;
++*/
+ #define TAG(x) radeon##x##_z24_s8
+ #include "depthtmp.h"
+ 
+@@ -235,35 +317,35 @@ do {									\
+ #ifdef COMPILE_R300
+ #define WRITE_STENCIL( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);		\
++   GLuint tmp = *_ptr;				\
+    tmp &= 0xffffff00;							\
+    tmp |= (d) & 0xff;							\
+-   *(GLuint *)(buf + offset) = tmp;					\
++   *_ptr = tmp;					\
+ } while (0)
+ #else
+ #define WRITE_STENCIL( _x, _y, d )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y);		\
++   GLuint tmp = *_ptr;				\
+    tmp &= 0x00ffffff;							\
+    tmp |= (((d) & 0xff) << 24);						\
+-   *(GLuint *)(buf + offset) = tmp;					\
++   *_ptr = tmp;					\
+ } while (0)
+ #endif
+ 
+ #ifdef COMPILE_R300
+ #define READ_STENCIL( d, _x, _y )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
++   GLuint tmp = *_ptr;				\
+    d = tmp & 0x000000ff;						\
+ } while (0)
+ #else
+ #define READ_STENCIL( d, _x, _y )					\
+ do {									\
+-   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
+-   GLuint tmp = *(GLuint *)(buf + offset);				\
++   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y );		\
++   GLuint tmp = *_ptr;				\
+    d = (tmp & 0xff000000) >> 24;					\
+ } while (0)
+ #endif
+@@ -271,20 +353,60 @@ do {									\
+ #define TAG(x) radeon##x##_z24_s8
+ #include "stenciltmp.h"
+ 
+-/* Move locking out to get reasonable span performance (10x better
+- * than doing this in HW_LOCK above).  WaitForIdle() is the main
+- * culprit.
+- */
++
++static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
++{
++	struct radeon_renderbuffer *rrb = (void*)rb;
++	int r;
++	
++	if (rrb->bo) {
++		r = radeon_bo_map(rrb->bo, write);
++		if (r) {
++			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
++				__FUNCTION__, r);
++		}
++	}
++}
++
++static void unmap_buffer(struct gl_renderbuffer *rb)
++{
++	struct radeon_renderbuffer *rrb = (void*)rb;
++
++	if (rrb->bo) {
++		radeon_bo_unmap(rrb->bo);
++	}
++}
+ 
+ static void radeonSpanRenderStart(GLcontext * ctx)
+ {
+ 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-#ifdef COMPILE_R300
+-	r300ContextPtr r300 = (r300ContextPtr) rmesa;
+-	R300_FIREVERTICES(r300);
+-#else
+-	RADEON_FIREVERTICES(rmesa);
+-#endif
++	int i;
++
++	radeon_firevertices(rmesa);
++
++	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
++		if (ctx->Texture.Unit[i]._ReallyEnabled)
++			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
++	}
++
++	/* color draw buffers */
++	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
++		map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
++	}
++
++	map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
++
++	if (ctx->DrawBuffer->_DepthBuffer) {
++		map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
++	}
++	if (ctx->DrawBuffer->_StencilBuffer)
++		map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
++
++	/* The locking and wait for idle should really only be needed in classic mode.
++	 * In a future memory manager based implementation, this should become
++	 * unnecessary due to the fact that mapping our buffers, textures, etc.
++	 * should implicitly wait for any previous rendering commands that must
++	 * be waited on. */
+ 	LOCK_HARDWARE(rmesa);
+ 	radeonWaitForIdleLocked(rmesa);
+ }
+@@ -292,8 +414,25 @@ static void radeonSpanRenderStart(GLcontext * ctx)
+ static void radeonSpanRenderFinish(GLcontext * ctx)
+ {
+ 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	int i;
+ 	_swrast_flush(ctx);
+ 	UNLOCK_HARDWARE(rmesa);
++
++	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
++		if (ctx->Texture.Unit[i]._ReallyEnabled)
++			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
++	}
++
++	/* color draw buffers */
++	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
++		unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
++
++	unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
++
++	if (ctx->DrawBuffer->_DepthBuffer)
++		unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
++	if (ctx->DrawBuffer->_StencilBuffer)
++		unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ }
+ 
+ void radeonInitSpanFuncs(GLcontext * ctx)
+@@ -307,20 +446,17 @@ void radeonInitSpanFuncs(GLcontext * ctx)
+ /**
+  * Plug in the Get/Put routines for the given driRenderbuffer.
+  */
+-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
++void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
+ {
+-	if (drb->Base.InternalFormat == GL_RGBA) {
+-		if (vis->redBits == 5 && vis->greenBits == 6
+-		    && vis->blueBits == 5) {
+-			radeonInitPointers_RGB565(&drb->Base);
+-		} else {
+-			radeonInitPointers_ARGB8888(&drb->Base);
+-		}
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+-		radeonInitDepthPointers_z16(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+-		radeonInitDepthPointers_z24_s8(&drb->Base);
+-	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+-		radeonInitStencilPointers_z24_s8(&drb->Base);
++	if (rrb->base.InternalFormat == GL_RGB5) {
++		radeonInitPointers_RGB565(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_RGBA8) {
++		radeonInitPointers_ARGB8888(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
++		radeonInitDepthPointers_z16(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
++		radeonInitDepthPointers_z24_s8(&rrb->base);
++	} else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
++		radeonInitStencilPointers_z24_s8(&rrb->base);
+ 	}
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
+index 9abe086..dd44ab5 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_span.h
++++ b/src/mesa/drivers/dri/radeon/radeon_span.h
+@@ -42,9 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #ifndef __RADEON_SPAN_H__
+ #define __RADEON_SPAN_H__
+ 
+-#include "drirenderbuffer.h"
+-
+ extern void radeonInitSpanFuncs(GLcontext * ctx);
+-extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis);
+ 
++extern void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+ #endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
+index 32bcff3..86d8720 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state.c
++++ b/src/mesa/drivers/dri/radeon/radeon_state.c
+@@ -62,7 +62,7 @@ static void radeonUpdateSpecular( GLcontext *ctx );
+ 
+ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+    GLubyte refByte;
+ 
+@@ -106,7 +106,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+ static void radeonBlendEquationSeparate( GLcontext *ctx,
+ 					 GLenum modeRGB, GLenum modeA )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
+    GLboolean fallback = GL_FALSE;
+ 
+@@ -147,7 +147,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
+ 				     GLenum sfactorRGB, GLenum dfactorRGB,
+ 				     GLenum sfactorA, GLenum dfactorA )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & 
+       ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
+    GLboolean fallback = GL_FALSE;
+@@ -257,7 +257,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx,
+ 
+ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    RADEON_STATECHANGE( rmesa, ctx );
+    rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
+@@ -293,7 +293,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+ 
+ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    RADEON_STATECHANGE( rmesa, ctx );
+ 
+    if ( ctx->Depth.Mask ) {
+@@ -305,16 +305,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+ 
+ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+ 		    RADEON_DEPTH_FORMAT_MASK);
+ 
+    switch ( format ) {
+    case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
+-      rmesa->state.depth.clear = d * 0x0000ffff;
++      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+       break;
+    case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
+-      rmesa->state.depth.clear = d * 0x00ffffff;
++      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+       break;
+    }
+ }
+@@ -327,7 +327,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+ 
+ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    union { int i; float f; } c, d;
+    GLchan col[4];
+ 
+@@ -406,109 +406,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+    }
+ }
+ 
+-
+-/* =============================================================
+- * Scissoring
+- */
+-
+-
+-static GLboolean intersect_rect( drm_clip_rect_t *out,
+-				 drm_clip_rect_t *a,
+-				 drm_clip_rect_t *b )
+-{
+-   *out = *a;
+-   if ( b->x1 > out->x1 ) out->x1 = b->x1;
+-   if ( b->y1 > out->y1 ) out->y1 = b->y1;
+-   if ( b->x2 < out->x2 ) out->x2 = b->x2;
+-   if ( b->y2 < out->y2 ) out->y2 = b->y2;
+-   if ( out->x1 >= out->x2 ) return GL_FALSE;
+-   if ( out->y1 >= out->y2 ) return GL_FALSE;
+-   return GL_TRUE;
+-}
+-
+-
+-void radeonRecalcScissorRects( radeonContextPtr rmesa )
+-{
+-   drm_clip_rect_t *out;
+-   int i;
+-
+-   /* Grow cliprect store?
+-    */
+-   if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+-      while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) {
+-	 rmesa->state.scissor.numAllocedClipRects += 1;	/* zero case */
+-	 rmesa->state.scissor.numAllocedClipRects *= 2;
+-      }
+-
+-      if (rmesa->state.scissor.pClipRects)
+-	 FREE(rmesa->state.scissor.pClipRects);
+-
+-      rmesa->state.scissor.pClipRects = 
+-	 MALLOC( rmesa->state.scissor.numAllocedClipRects * 
+-		 sizeof(drm_clip_rect_t) );
+-
+-      if ( rmesa->state.scissor.pClipRects == NULL ) {
+-	 rmesa->state.scissor.numAllocedClipRects = 0;
+-	 return;
+-      }
+-   }
+-   
+-   out = rmesa->state.scissor.pClipRects;
+-   rmesa->state.scissor.numClipRects = 0;
+-
+-   for ( i = 0 ; i < rmesa->numClipRects ;  i++ ) {
+-      if ( intersect_rect( out, 
+-			   &rmesa->pClipRects[i], 
+-			   &rmesa->state.scissor.rect ) ) {
+-	 rmesa->state.scissor.numClipRects++;
+-	 out++;
+-      }
+-   }
+-}
+-
+-
+-static void radeonUpdateScissor( GLcontext *ctx )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+-   if ( rmesa->dri.drawable ) {
+-      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-
+-      int x = ctx->Scissor.X;
+-      int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height;
+-      int w = ctx->Scissor.X + ctx->Scissor.Width - 1;
+-      int h = dPriv->h - ctx->Scissor.Y - 1;
+-
+-      rmesa->state.scissor.rect.x1 = x + dPriv->x;
+-      rmesa->state.scissor.rect.y1 = y + dPriv->y;
+-      rmesa->state.scissor.rect.x2 = w + dPriv->x + 1;
+-      rmesa->state.scissor.rect.y2 = h + dPriv->y + 1;
+-
+-      radeonRecalcScissorRects( rmesa );
+-   }
+-}
+-
+-
+-static void radeonScissor( GLcontext *ctx,
+-			   GLint x, GLint y, GLsizei w, GLsizei h )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+-   if ( ctx->Scissor.Enabled ) {
+-      RADEON_FIREVERTICES( rmesa );	/* don't pipeline cliprect changes */
+-      radeonUpdateScissor( ctx );
+-   }
+-
+-}
+-
+-
+ /* =============================================================
+  * Culling
+  */
+ 
+ static void radeonCullFace( GLcontext *ctx, GLenum unused )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+    GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+ 
+@@ -545,7 +449,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused )
+ 
+ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    RADEON_STATECHANGE( rmesa, set );
+    rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+@@ -570,7 +474,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+  */
+ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    RADEON_STATECHANGE( rmesa, lin );
+    RADEON_STATECHANGE( rmesa, set );
+@@ -587,7 +491,7 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+ 
+ static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    RADEON_STATECHANGE( rmesa, lin );
+    rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = 
+@@ -602,8 +506,8 @@ static void radeonColorMask( GLcontext *ctx,
+ 			     GLboolean r, GLboolean g,
+ 			     GLboolean b, GLboolean a )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp,
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ 				  ctx->Color.ColorMask[RCOMP],
+ 				  ctx->Color.ColorMask[GCOMP],
+ 				  ctx->Color.ColorMask[BCOMP],
+@@ -623,8 +527,8 @@ static void radeonColorMask( GLcontext *ctx,
+ static void radeonPolygonOffset( GLcontext *ctx,
+ 				 GLfloat factor, GLfloat units )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   float_ui32_type constant =  { units * rmesa->state.depth.scale };
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   float_ui32_type constant =  { units * rmesa->radeon.state.depth.scale };
+    float_ui32_type factoru = { factor };
+ 
+    RADEON_STATECHANGE( rmesa, zbs );
+@@ -634,7 +538,7 @@ static void radeonPolygonOffset( GLcontext *ctx,
+ 
+ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint i;
+    drm_radeon_stipple_t stipple;
+ 
+@@ -646,27 +550,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+ 
+    /* TODO: push this into cmd mechanism
+     */
+-   RADEON_FIREVERTICES( rmesa );
+-   LOCK_HARDWARE( rmesa );
++   radeon_firevertices(&rmesa->radeon);
++   LOCK_HARDWARE( &rmesa->radeon );
+ 
+    /* FIXME: Use window x,y offsets into stipple RAM.
+     */
+    stipple.mask = rmesa->state.stipple.mask;
+-   drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, 
++   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, 
+                     &stipple, sizeof(drm_radeon_stipple_t) );
+-   UNLOCK_HARDWARE( rmesa );
++   UNLOCK_HARDWARE( &rmesa->radeon );
+ }
+ 
+ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+ 
+    /* Can't generally do unfilled via tcl, but some good special
+     * cases work. 
+     */
+    TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+-   if (rmesa->TclFallback) {
++   if (rmesa->radeon.TclFallback) {
+       radeonChooseRenderState( ctx );
+       radeonChooseVertexState( ctx );
+    }
+@@ -686,7 +590,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+  */
+ static void radeonUpdateSpecular( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+    GLuint flag = 0;
+ 
+@@ -757,7 +661,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
+ 
+    /* Update vertex/render formats
+     */
+-   if (rmesa->TclFallback) { 
++   if (rmesa->radeon.TclFallback) { 
+       radeonChooseRenderState( ctx );
+       radeonChooseVertexState( ctx );
+    }
+@@ -774,7 +678,7 @@ static void radeonUpdateSpecular( GLcontext *ctx )
+  */
+ static void update_global_ambient( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    float *fcmd = (float *)RADEON_DB_STATE( glt );
+ 
+    /* Need to do more if both emmissive & ambient are PREMULT:
+@@ -809,7 +713,7 @@ static void update_light_colors( GLcontext *ctx, GLuint p )
+ /*     fprintf(stderr, "%s\n", __FUNCTION__); */
+ 
+    if (l->Enabled) {
+-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+       float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+ 
+       COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );	 
+@@ -849,7 +753,7 @@ static void check_twoside_fallback( GLcontext *ctx )
+ 
+ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+ {
+-      radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+       GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+ 
+       light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+@@ -913,7 +817,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+ 
+ void radeonUpdateMaterial( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+    GLuint mask = ~0;
+@@ -978,7 +882,7 @@ void radeonUpdateMaterial( GLcontext *ctx )
+  */
+ static void update_light( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    /* Have to check these, or have an automatic shortcircuit mechanism
+     * to remove noop statechanges. (Or just do a better job on the
+@@ -1043,7 +947,7 @@ static void update_light( GLcontext *ctx )
+ static void radeonLightfv( GLcontext *ctx, GLenum light,
+ 			   GLenum pname, const GLfloat *params )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLint p = light - GL_LIGHT0;
+    struct gl_light *l = &ctx->Light.Light[p];
+    GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+@@ -1164,7 +1068,7 @@ static void radeonLightfv( GLcontext *ctx, GLenum light,
+ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+ 				const GLfloat *param )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    switch (pname) {
+       case GL_LIGHT_MODEL_AMBIENT: 
+@@ -1188,7 +1092,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+ 
+ 	 check_twoside_fallback( ctx );
+ 
+-	 if (rmesa->TclFallback) {
++	 if (rmesa->radeon.TclFallback) {
+ 	    radeonChooseRenderState( ctx );
+ 	    radeonChooseVertexState( ctx );
+ 	 }
+@@ -1205,7 +1109,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+ 
+ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+ 
+    s &= ~(RADEON_DIFFUSE_SHADE_MASK |
+@@ -1244,7 +1148,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+ {
+    GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+ 
+    RADEON_STATECHANGE( rmesa, ucp[p] );
+@@ -1256,7 +1160,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+ 
+ static void radeonUpdateClipPlanes( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint p;
+ 
+    for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+@@ -1281,7 +1185,7 @@ static void
+ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                            GLint ref, GLuint mask )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
+ 		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
+ 
+@@ -1325,7 +1229,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+ static void
+ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    RADEON_STATECHANGE( rmesa, msk );
+    rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+@@ -1336,7 +1240,7 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                                      GLenum zfail, GLenum zpass )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
+       and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
+@@ -1349,7 +1253,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+    GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+    GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
+    
+-   if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
++   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+       tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
+       tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
+       tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
+@@ -1455,9 +1359,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+ 
+ static void radeonClearStencil( GLcontext *ctx, GLint s )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+-   rmesa->state.stencil.clear = 
++   rmesa->radeon.state.stencil.clear = 
+       ((GLuint) (ctx->Stencil.Clear & 0xff) |
+        (0xff << RADEON_STENCIL_MASK_SHIFT) |
+        ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
+@@ -1481,20 +1385,20 @@ static void radeonClearStencil( GLcontext *ctx, GLint s )
+  */
+ void radeonUpdateWindow( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+-   GLfloat xoffset = (GLfloat)dPriv->x;
+-   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
++   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
++   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+ 
+    float_ui32_type sx = { v[MAT_SX] };
+    float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+    float_ui32_type sy = { - v[MAT_SY] };
+    float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y };
+-   float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale };
+-   float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale };
++   float_ui32_type sz = { v[MAT_SZ] * rmesa->radeon.state.depth.scale };
++   float_ui32_type tz = { v[MAT_TZ] * rmesa->radeon.state.depth.scale };
+ 
+-   RADEON_FIREVERTICES( rmesa );
++   radeon_firevertices(&rmesa->radeon);
+    RADEON_STATECHANGE( rmesa, vpt );
+ 
+    rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+@@ -1524,8 +1428,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
+ 
+ void radeonUpdateViewportOffset( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable;
+    GLfloat xoffset = (GLfloat)dPriv->x;
+    GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+@@ -1555,8 +1459,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
+                 RADEON_STIPPLE_Y_OFFSET_MASK);
+ 
+          /* add magic offsets, then invert */
+-         stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
+-         sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1)
++         stx = 31 - ((rmesa->radeon.dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK);
++         sty = 31 - ((rmesa->radeon.dri.drawable->y + rmesa->radeon.dri.drawable->h - 1)
+                      & RADEON_STIPPLE_COORD_MASK);
+ 
+          m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+@@ -1580,20 +1484,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx )
+ 
+ static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLubyte c[4];
+    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+-   rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp,
++   rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp,
+ 					       c[0], c[1], c[2], c[3] );
+ }
+ 
+ 
+ static void radeonRenderMode( GLcontext *ctx, GLenum mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+ }
+ 
+@@ -1619,7 +1523,7 @@ static GLuint radeon_rop_tab[] = {
+ 
+ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint rop = (GLuint)opcode - GL_CLEAR;
+ 
+    ASSERT( rop < 16 );
+@@ -1630,66 +1534,17 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+ 
+ 
+ /**
+- * Set up the cliprects for either front or back-buffer drawing.
+- */
+-void radeonSetCliprects( radeonContextPtr rmesa )
+-{
+-   __DRIdrawablePrivate *const drawable = rmesa->dri.drawable;
+-   __DRIdrawablePrivate *const readable = rmesa->dri.readable;
+-   GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate;
+-   GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate;
+-
+-   if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+-      /* Can't ignore 2d windows if we are page flipping.
+-       */
+-      if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) {
+-	 rmesa->numClipRects = drawable->numClipRects;
+-	 rmesa->pClipRects = drawable->pClipRects;
+-      }
+-      else {
+-	 rmesa->numClipRects = drawable->numBackClipRects;
+-	 rmesa->pClipRects = drawable->pBackClipRects;
+-      }
+-   }
+-   else {
+-      /* front buffer (or none, or multiple buffers */
+-      rmesa->numClipRects = drawable->numClipRects;
+-      rmesa->pClipRects = drawable->pClipRects;
+-   }
+-
+-   if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) {
+-      _mesa_resize_framebuffer(rmesa->glCtx, draw_fb,
+-			       drawable->w, drawable->h);
+-      draw_fb->Initialized = GL_TRUE;
+-   }
+-
+-   if (drawable != readable) {
+-      if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) {
+-	 _mesa_resize_framebuffer(rmesa->glCtx, read_fb,
+-				  readable->w, readable->h);
+-	 read_fb->Initialized = GL_TRUE;
+-      }
+-   }
+-
+-   if (rmesa->state.scissor.enabled)
+-      radeonRecalcScissorRects( rmesa );
+-
+-   rmesa->lastStamp = drawable->lastStamp;
+-}
+-
+-
+-/**
+  * Called via glDrawBuffer.
+  */
+ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+    if (RADEON_DEBUG & DEBUG_DRI)
+       fprintf(stderr, "%s %s\n", __FUNCTION__,
+ 	      _mesa_lookup_enum_by_nr( mode ));
+ 
+-   RADEON_FIREVERTICES(rmesa);	/* don't pipeline cliprect changes */
++   radeon_firevertices(&rmesa->radeon);	/* don't pipeline cliprect changes */
+ 
+    if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+       /* 0 (GL_NONE) buffers or multiple color drawing buffers */
+@@ -1707,8 +1562,8 @@ static void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+       return;
+    }
+ 
+-   radeonSetCliprects( rmesa );
+-
++   radeonSetCliprects( &rmesa->radeon );
++   radeonUpdatePageFlipping(&rmesa->radeon);
+    /* We'll set the drawing engine's offset/pitch parameters later
+     * when we update other state.
+     */
+@@ -1726,7 +1581,7 @@ static void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+ 
+ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint p, flag;
+ 
+    if ( RADEON_DEBUG & DEBUG_STATE )
+@@ -1821,10 +1676,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+       RADEON_STATECHANGE(rmesa, ctx );
+       if ( state ) {
+ 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable;
++	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+       } else {
+ 	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
+-	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->state.color.roundEnable;
++	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+       }
+       break;
+ 
+@@ -1971,13 +1826,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+    }
+ 
+    case GL_SCISSOR_TEST:
+-      RADEON_FIREVERTICES( rmesa );
+-      rmesa->state.scissor.enabled = state;
++      radeon_firevertices(&rmesa->radeon);
++      rmesa->radeon.state.scissor.enabled = state;
+       radeonUpdateScissor( ctx );
+       break;
+ 
+    case GL_STENCIL_TEST:
+-      if ( rmesa->state.stencil.hwBuffer ) {
++      if ( rmesa->radeon.state.stencil.hwBuffer ) {
+ 	 RADEON_STATECHANGE( rmesa, ctx );
+ 	 if ( state ) {
+ 	    rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+@@ -2010,7 +1865,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+ 
+ static void radeonLightingSpaceChange( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLboolean tmp;
+    RADEON_STATECHANGE( rmesa, tcl );
+ 
+@@ -2039,7 +1894,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx )
+  */
+ 
+ 
+-void radeonUploadTexMatrix( radeonContextPtr rmesa,
++void radeonUploadTexMatrix( r100ContextPtr rmesa,
+ 			    int unit, GLboolean swapcols )
+ {
+ /* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+@@ -2065,7 +1920,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
+    int idx = TEXMAT_0 + unit;
+    float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+    int i;
+-   struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit];
++   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
+    GLfloat *src = rmesa->tmpmat[unit].m;
+ 
+    rmesa->TexMatColSwap &= ~(1 << unit);
+@@ -2119,7 +1974,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa,
+ }
+ 
+ 
+-static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
++static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
+ {
+    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+    int i;
+@@ -2135,7 +1990,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx )
+    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+ }
+ 
+-static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
++static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
+ {
+    float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+    memcpy(dest, src, 16*sizeof(float));
+@@ -2145,7 +2000,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx )
+ 
+ static void update_texturematrix( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+    GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+    int unit;
+@@ -2217,43 +2072,32 @@ static void update_texturematrix( GLcontext *ctx )
+ void
+ radeonUpdateDrawBuffer(GLcontext *ctx)
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    struct gl_framebuffer *fb = ctx->DrawBuffer;
+-   driRenderbuffer *drb;
++   struct radeon_renderbuffer *rrb;
+ 
+    if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+-      /* draw to front */
+-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+-   }
+-   else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
+-      /* draw to back */
+-      drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+-   }
+-   else {
+-      /* drawing to multiple buffers, or none */
+-      return;
++     /* draw to front */
++     rrb = (void *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
++   } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) {
++     /* draw to back */
++     rrb = (void *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   } else {
++     /* drawing to multiple buffers, or none */
++     return;
+    }
+ 
+-   assert(drb);
+-   assert(drb->flippedPitch);
++   assert(rrb);
++   assert(rrb->pitch);
+ 
+    RADEON_STATECHANGE( rmesa, ctx );
+-
+-   /* Note: we used the (possibly) page-flipped values */
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET]
+-     = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation)
+-	& RADEON_COLOROFFSET_MASK);
+-   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch;
+-   if (rmesa->sarea->tiling_enabled) {
+-      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+-   }
+ }
+ 
+ 
+ void radeonValidateState( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   GLuint new_state = rmesa->NewGLState;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   GLuint new_state = rmesa->radeon.NewGLState;
+ 
+    if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+      radeonUpdateDrawBuffer(ctx);
+@@ -2261,7 +2105,7 @@ void radeonValidateState( GLcontext *ctx )
+ 
+    if (new_state & _NEW_TEXTURE) {
+       radeonUpdateTextureState( ctx );
+-      new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
++      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+    }
+ 
+    /* Need an event driven matrix update?
+@@ -2295,7 +2139,7 @@ void radeonValidateState( GLcontext *ctx )
+    }
+ 
+ 
+-   rmesa->NewGLState = 0;
++   rmesa->radeon.NewGLState = 0;
+ }
+ 
+ 
+@@ -2306,7 +2150,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+    _vbo_InvalidateState( ctx, new_state );
+    _tnl_InvalidateState( ctx, new_state );
+    _ae_invalidate_state( ctx, new_state );
+-   RADEON_CONTEXT(ctx)->NewGLState |= new_state;
++   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+ }
+ 
+ 
+@@ -2330,15 +2174,15 @@ static GLboolean check_material( GLcontext *ctx )
+ 
+ static void radeonWrapRunPipeline( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLboolean has_material;
+ 
+    if (0)
+-      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState);
++      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+ 
+    /* Validate state:
+     */
+-   if (rmesa->NewGLState)
++   if (rmesa->radeon.NewGLState)
+       radeonValidateState( ctx );
+ 
+    has_material = (ctx->Light.Enabled && check_material( ctx ));
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
+index 2171879..17c2b11 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state.h
++++ b/src/mesa/drivers/dri/radeon/radeon_state.h
+@@ -39,22 +39,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ #include "radeon_context.h"
+ 
+-extern void radeonInitState( radeonContextPtr rmesa );
++extern void radeonInitState( r100ContextPtr rmesa );
+ extern void radeonInitStateFuncs( GLcontext *ctx );
+ 
+ extern void radeonUpdateMaterial( GLcontext *ctx );
+ 
+-extern void radeonSetCliprects( radeonContextPtr rmesa );
+-extern void radeonRecalcScissorRects( radeonContextPtr rmesa );
+ extern void radeonUpdateViewportOffset( GLcontext *ctx );
+ extern void radeonUpdateWindow( GLcontext *ctx );
+ extern void radeonUpdateDrawBuffer( GLcontext *ctx );
+-extern void radeonUploadTexMatrix( radeonContextPtr rmesa,
++extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
+ 				   int unit, GLboolean swapcols );
+ 
+ extern void radeonValidateState( GLcontext *ctx );
+ 
+-extern void radeonPrintDirty( radeonContextPtr rmesa,
++extern void radeonPrintDirty( r100ContextPtr rmesa,
+ 			      const char *msg );
+ 
+ 
+@@ -62,7 +60,7 @@ extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+ #define FALLBACK( rmesa, bit, mode ) do {				\
+    if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+ 		     __FUNCTION__, bit, mode );				\
+-   radeonFallback( rmesa->glCtx, bit, mode );				\
++   radeonFallback( rmesa->radeon.glCtx, bit, mode );				\
+ } while (0)
+ 
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
+index 57dc380..617e731 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
++++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
+@@ -38,39 +38,156 @@
+ #include "swrast_setup/swrast_setup.h"
+ 
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_state.h"
+ #include "radeon_tcl.h"
+ #include "radeon_tex.h"
+ #include "radeon_swtcl.h"
+ 
++#include "../r200/r200_reg.h"
++
+ #include "xmlpool.h"
+ 
++/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
++ * 1.3 cmdbuffers allow all previous state to be updated as well as
++ * the tcl scalar and vector areas.
++ */
++static struct {
++	int start;
++	int len;
++	const char *name;
++} packet[RADEON_MAX_STATE_PACKETS] = {
++	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
++	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
++	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
++	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
++	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
++	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
++	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
++	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
++	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
++	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
++	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
++	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
++	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
++	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
++	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
++	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
++	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
++	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
++	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
++	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
++	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
++		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
++	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
++	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
++	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
++	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
++	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
++	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
++	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
++	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
++	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
++	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
++	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
++	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
++	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
++	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
++	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
++	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
++	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
++	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
++	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
++	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
++	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
++	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
++	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
++	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
++	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
++	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
++	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
++	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
++	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
++	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
++	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
++	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
++	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
++	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
++	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
++	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
++	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
++	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
++	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
++	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
++	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
++		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
++	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
++	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
++	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
++	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
++	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
++	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
++	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
++	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
++	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
++	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
++	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
++	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
++	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
++	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
++	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
++	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
++	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
++	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
++	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
++	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
++	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
++	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
++	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
++	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
++	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
++	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
++	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
++	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
++	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
++	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
++	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
++	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
++	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
++	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
++};
++
+ /* =============================================================
+  * State initialization
+  */
+ 
+-void radeonPrintDirty( radeonContextPtr rmesa, const char *msg )
++void radeonPrintDirty( r100ContextPtr rmesa, const char *msg )
+ {
+    struct radeon_state_atom *l;
+ 
+    fprintf(stderr, msg);
+    fprintf(stderr, ": ");
+ 
+-   foreach(l, &rmesa->hw.atomlist) {
+-      if (l->dirty || rmesa->hw.all_dirty)
++   foreach(l, &rmesa->radeon.hw.atomlist) {
++      if (l->dirty || rmesa->radeon.hw.all_dirty)
+ 	 fprintf(stderr, "%s, ", l->name);
+    }
+ 
+    fprintf(stderr, "\n");
+ }
+ 
+-static int cmdpkt( int id ) 
++static int cmdpkt( r100ContextPtr rmesa, int id ) 
+ {
+    drm_radeon_cmd_header_t h;
+-   h.i = 0;
+-   h.packet.cmd_type = RADEON_CMD_PACKET;
+-   h.packet.packet_id = id;
++
++   if (rmesa->radeon.radeonScreen->kernel_mm) {
++     return CP_PACKET0(packet[id].start, packet[id].len - 1);
++   } else {
++     h.i = 0;
++     h.packet.cmd_type = RADEON_CMD_PACKET;
++     h.packet.packet_id = id;
++   }
+    return h.i;
+ }
+ 
+@@ -96,17 +213,17 @@ static int cmdscl( int offset, int stride, int count )
+    return h.i;
+ }
+ 
+-#define CHECK( NM, FLAG )			\
+-static GLboolean check_##NM( GLcontext *ctx )	\
+-{						\
+-   return FLAG;					\
++#define CHECK( NM, FLAG )				\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
++{							\
++   return FLAG ? atom->cmd_size : 0;			\
+ }
+ 
+ #define TCL_CHECK( NM, FLAG )				\
+-static GLboolean check_##NM( GLcontext *ctx )		\
++static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
+ {							\
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);	\
+-   return !rmesa->TclFallback && (FLAG);		\
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);	\
++   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0;	\
+ }
+ 
+ 
+@@ -146,17 +263,244 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT))
+ CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT))
+ CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT))
+ 
++#define OUT_VEC(hdr, data) do {			\
++    drm_radeon_cmd_header_t h;					\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
++    OUT_BATCH(0);							\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
++    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
++    OUT_BATCH_TABLE((data), h.vectors.count);				\
++  } while(0)
++
++#define OUT_SCL(hdr, data) do {					\
++    drm_radeon_cmd_header_t h;						\
++    h.i = hdr;								\
++    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
++    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
++    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
++    OUT_BATCH_TABLE((data), h.scalars.count);				\
++  } while(0)
++
++static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_SCL(atom->cmd[0], atom->cmd+1);
++   END_BATCH();
++}
++
++
++static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   uint32_t dwords = atom->cmd_size;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_VEC(atom->cmd[0], atom->cmd+1);
++   END_BATCH();
++}
++
++static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   struct radeon_renderbuffer *rrb;
++   uint32_t cbpitch;
++   uint32_t zbpitch;
++   uint32_t dwords = atom->cmd_size;
++   GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate;
++
++   /* output the first 7 bytes of context */
++   BEGIN_BATCH_NO_AUTOSTATE(dwords + 4);
++   OUT_BATCH_TABLE(atom->cmd, 5);
++
++   rrb = r100->radeon.state.depth.rrb;
++   if (!rrb) {
++     OUT_BATCH(0);
++     OUT_BATCH(0);
++   } else {
++     zbpitch = (rrb->pitch / rrb->cpp);
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++     OUT_BATCH(zbpitch);
++   }
++     
++   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++   OUT_BATCH(atom->cmd[CTX_CMD_1]);
++   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++   rrb = r100->radeon.state.color.rrb;
++   if (r100->radeon.radeonScreen->driScreen->dri2.enabled) {
++      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   }
++   if (!rrb || !rrb->bo) {
++     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
++   } else {
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++   }
+ 
++   OUT_BATCH(atom->cmd[CTX_CMD_2]);
++
++   if (!rrb || !rrb->bo) {
++     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
++   } else {
++     cbpitch = (rrb->pitch / rrb->cpp);
++     if (rrb->cpp == 4)
++       ;
++     else
++       ;
++     if (r100->radeon.sarea->tiling_enabled)
++       cbpitch |= R200_COLOR_TILE_ENABLE;
++     OUT_BATCH(cbpitch);
++   }
++
++   END_BATCH();
++}
++
++static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   struct radeon_renderbuffer *rrb, *drb;
++   uint32_t cbpitch = 0;
++   uint32_t zbpitch = 0;
++   uint32_t dwords = atom->cmd_size;
++   GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate;
++
++   rrb = r100->radeon.state.color.rrb;
++   if (r100->radeon.radeonScreen->driScreen->dri2.enabled) {
++      rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
++   }
++   if (rrb) {
++     assert(rrb->bo != NULL);
++     cbpitch = (rrb->pitch / rrb->cpp);
++     if (r100->radeon.sarea->tiling_enabled)
++       cbpitch |= R200_COLOR_TILE_ENABLE;
++   }
++
++   drb = r100->radeon.state.depth.rrb;
++   if (drb)
++     zbpitch = (drb->pitch / drb->cpp);
++
++   /* output the first 7 bytes of context */
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++
++   /* In the CS case we need to split this up */
++   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
++   OUT_BATCH_TABLE((atom->cmd + 1), 4);
++
++   if (drb) {
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
++     OUT_BATCH(zbpitch);
++   }
++
++   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
++   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
++   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
++   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
++   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
++
++
++   if (rrb) {
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
++     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
++   }
++
++   if (rrb) {
++     if (rrb->cpp == 4)
++       ;
++     else
++       ;
++     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
++     OUT_BATCH(cbpitch);
++   }
++
++   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
++   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
++   // }
++
++   END_BATCH();
++}
++
++static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   uint32_t dwords = atom->cmd_size;
++   int i = atom->idx, j;
++   radeonTexObj *t = r100->state.texture.unit[i].texobj;
++   radeon_mipmap_level *lvl;
++
++   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
++	return;
++
++   if (!t)
++	return;
++
++   if (!t->mt)
++	return;
++
++   BEGIN_BATCH_NO_AUTOSTATE(dwords + 10);
++   OUT_BATCH_TABLE(atom->cmd, 3);
++   lvl = &t->mt->levels[0];
++   for (j = 0; j < 5; j++) {
++	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
++			RADEON_GEM_DOMAIN_VRAM, 0, 0);
++   }
++   END_BATCH();
++}
++
++static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
++{
++   r100ContextPtr r100 = R100_CONTEXT(ctx);
++   BATCH_LOCALS(&r100->radeon);
++   uint32_t dwords = atom->cmd_size;
++   int i = atom->idx;
++   radeonTexObj *t = r100->state.texture.unit[i].texobj;
++   radeon_mipmap_level *lvl;
++
++   if (t && t->mt && !t->image_override)
++     dwords += 2;
++   BEGIN_BATCH_NO_AUTOSTATE(dwords);
++   OUT_BATCH_TABLE(atom->cmd, 3);
++   if (t && t->mt && !t->image_override) {
++     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
++   	lvl = &t->mt->levels[0];
++	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
++			RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     } else {
++        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
++		     RADEON_GEM_DOMAIN_VRAM, 0, 0);
++     }
++   } else if (!t) {
++     /* workaround for old CS mechanism */
++     OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
++     //     OUT_BATCH(r100->radeon.radeonScreen);
++   } else if (t->image_override)
++     OUT_BATCH(t->override_offset);
++
++   OUT_BATCH_TABLE((atom->cmd+4), 5);
++   END_BATCH();
++}
+ 
+ /* Initialize the context's hardware state.
+  */
+-void radeonInitState( radeonContextPtr rmesa )
++void radeonInitState( r100ContextPtr rmesa )
+ {
+-   GLcontext *ctx = rmesa->glCtx;
++   GLcontext *ctx = rmesa->radeon.glCtx;
+    GLuint color_fmt, depth_fmt, i;
+    GLint drawPitch, drawOffset;
+ 
+-   switch ( rmesa->radeonScreen->cpp ) {
++   switch ( rmesa->radeon.radeonScreen->cpp ) {
+    case 2:
+       color_fmt = RADEON_COLOR_FORMAT_RGB565;
+       break;
+@@ -168,20 +512,20 @@ void radeonInitState( radeonContextPtr rmesa )
+       exit( -1 );
+    }
+ 
+-   rmesa->state.color.clear = 0x00000000;
++   rmesa->radeon.state.color.clear = 0x00000000;
+ 
+    switch ( ctx->Visual.depthBits ) {
+    case 16:
+-      rmesa->state.depth.clear = 0x0000ffff;
+-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff;
++      rmesa->radeon.state.depth.clear = 0x0000ffff;
++      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffff;
+       depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+-      rmesa->state.stencil.clear = 0x00000000;
++      rmesa->radeon.state.stencil.clear = 0x00000000;
+       break;
+    case 24:
+-      rmesa->state.depth.clear = 0x00ffffff;
+-      rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff;
++      rmesa->radeon.state.depth.clear = 0x00ffffff;
++      rmesa->radeon.state.depth.scale = 1.0 / (GLfloat)0xffffff;
+       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+-      rmesa->state.stencil.clear = 0xffff0000;
++      rmesa->radeon.state.stencil.clear = 0xffff0000;
+       break;
+    default:
+       fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+@@ -190,37 +534,44 @@ void radeonInitState( radeonContextPtr rmesa )
+    }
+ 
+    /* Only have hw stencil when depth buffer is 24 bits deep */
+-   rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
++   rmesa->radeon.state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 &&
+ 				     ctx->Visual.depthBits == 24 );
+ 
+-   rmesa->Fallback = 0;
++   rmesa->radeon.Fallback = 0;
+ 
+-   if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) {
+-      drawOffset = rmesa->radeonScreen->backOffset;
+-      drawPitch  = rmesa->radeonScreen->backPitch;
++   if ( ctx->Visual.doubleBufferMode && rmesa->radeon.sarea->pfCurrentPage == 0 ) {
++      drawOffset = rmesa->radeon.radeonScreen->backOffset;
++      drawPitch  = rmesa->radeon.radeonScreen->backPitch;
+    } else {
+-      drawOffset = rmesa->radeonScreen->frontOffset;
+-      drawPitch  = rmesa->radeonScreen->frontPitch;
++      drawOffset = rmesa->radeon.radeonScreen->frontOffset;
++      drawPitch  = rmesa->radeon.radeonScreen->frontPitch;
+    }
+ 
+-   rmesa->hw.max_state_size = 0;
++   rmesa->radeon.hw.max_state_size = 0;
+ 
+-#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )				\
++#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )		\
+    do {								\
+       rmesa->hw.ATOM.cmd_size = SZ;				\
+-      rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
+-      rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
+-      rmesa->hw.ATOM.name = NM;					\
++      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
++      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
++      rmesa->hw.ATOM.name = NM;						\
+       rmesa->hw.ATOM.is_tcl = FLAG;					\
+       rmesa->hw.ATOM.check = check_##CHK;				\
+-      rmesa->hw.ATOM.dirty = GL_TRUE;				\
+-      rmesa->hw.max_state_size += SZ * sizeof(int);		\
++      rmesa->hw.ATOM.dirty = GL_TRUE;					\
++      rmesa->hw.ATOM.idx = IDX;					\
++      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
+    } while (0)
+-      
+-      
++
++#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )		\
++   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
++
+    /* Allocate state buffers:
+     */
+    ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 );
++   if (rmesa->radeon.radeonScreen->kernel_mm)
++     rmesa->hw.ctx.emit = ctx_emit_cs;
++   else
++     rmesa->hw.ctx.emit = ctx_emit;
+    ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+    ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+    ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+@@ -233,20 +584,25 @@ void radeonInitState( radeonContextPtr rmesa )
+    ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+    ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+    ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+-   ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
+-   ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+-   ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
+-   if (rmesa->radeonScreen->drmSupportsCubeMapsR100)
++   ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
++   ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
++   ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2 );
++
++   for (i = 0; i < 3; i++)
++     rmesa->hw.tex[i].emit = tex_emit;
++   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+    {
+-      ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+-      ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+-      ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
++      ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
++      ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
++      ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
++      for (i = 0; i < 3; i++)
++         rmesa->hw.cube[i].emit = cube_emit;
+    }
+    else
+    {
+-      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+-      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+-      ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 );
++      ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
++      ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
++      ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+    }
+    ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+    ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+@@ -268,43 +624,43 @@ void radeonInitState( radeonContextPtr rmesa )
+    ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+    ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+    ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+-   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
+-   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
+-   ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
++   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
++   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
++   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
+ 
+    radeonSetUpAtomList( rmesa );
+ 
+    /* Fill in the packet headers:
+     */
+-   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC);
+-   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL);
+-   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH);
+-   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN);
+-   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH);
+-   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK);
+-   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE);
+-   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL);
+-   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS);
+-   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC);
+-   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0);
+-   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
+-   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
+-   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+-   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
+-   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
+-   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
+-   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+-   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
+-   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+-   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2);
+-   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+-   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
+-   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
++   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
++   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
++   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
++   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
++   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
++   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
++   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
++   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
++   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
++   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
++   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
++   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
++   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
++   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
++   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
++   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
++   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
++   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
++   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
++   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
++   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
++   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
++   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
++   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
+-      cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+-   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
+-   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
+-   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
++      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
++   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
++   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
++   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
+    rmesa->hw.grd.cmd[GRD_CMD_0] = 
+       cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+    rmesa->hw.fog.cmd[FOG_CMD_0] = 
+@@ -353,10 +709,10 @@ void radeonInitState( radeonContextPtr rmesa )
+ 					    RADEON_DST_BLEND_GL_ZERO );
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+-      rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation;
++      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+-      ((rmesa->radeonScreen->depthPitch &
++      ((rmesa->radeon.radeonScreen->depthPitch &
+ 	RADEON_DEPTHPITCH_MASK) |
+        RADEON_DEPTH_ENDIAN_NO_SWAP);
+        
+@@ -374,7 +730,7 @@ void radeonInitState( radeonContextPtr rmesa )
+    if (rmesa->using_hyperz) {
+        rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
+ 						   RADEON_Z_DECOMPRESSION_ENABLE;
+-      if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
++      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+ 	 /* works for q3, but slight rendering errors with glxgears ? */
+ /*	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+ 	 /* need this otherwise get lots of lockups with q3 ??? */
+@@ -389,7 +745,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ 				       color_fmt |
+ 				       RADEON_ZBLOCK16);
+ 
+-   switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) {
++   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+    case DRI_CONF_DITHER_XERRORDIFFRESET:
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
+       break;
+@@ -397,19 +753,19 @@ void radeonInitState( radeonContextPtr rmesa )
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
+       break;
+    }
+-   if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) ==
++   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+ 	DRI_CONF_ROUND_ROUND )
+-      rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE;
++      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
+    else
+-      rmesa->state.color.roundEnable = 0;
+-   if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) ==
++      rmesa->radeon.state.color.roundEnable = 0;
++   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+ 	DRI_CONF_COLOR_REDUCTION_DITHER )
+       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
+    else
+-      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable;
++      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset +
+-					       rmesa->radeonScreen->fbLocation)
++					       rmesa->radeon.radeonScreen->fbLocation)
+ 					      & RADEON_COLOROFFSET_MASK);
+ 
+    rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch &
+@@ -418,7 +774,7 @@ void radeonInitState( radeonContextPtr rmesa )
+ 
+ 
+    /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */
+-   if (rmesa->sarea->tiling_enabled) {
++   if (rmesa->radeon.sarea->tiling_enabled) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE;
+    }
+ 
+@@ -444,7 +800,7 @@ void radeonInitState( radeonContextPtr rmesa )
+   					    RADEON_VC_NO_SWAP;
+ #endif
+ 
+-   if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
++   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+    }
+ 
+@@ -491,8 +847,8 @@ void radeonInitState( radeonContextPtr rmesa )
+ 	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+ 
+       /* Initialize the texture offset to the start of the card texture heap */
+-      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
++      //	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+ 
+       rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+       rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
+@@ -513,15 +869,15 @@ void radeonInitState( radeonContextPtr rmesa )
+ 
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+       rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
+-	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
++	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+    }
+ 
+    /* Can only add ST1 at the time of doing some multitex but can keep
+@@ -613,5 +969,7 @@ void radeonInitState( radeonContextPtr rmesa )
+    rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+    rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+    
+-   rmesa->hw.all_dirty = GL_TRUE;
++   rmesa->radeon.hw.all_dirty = GL_TRUE;
++
++   rcommonInitCmdBuf(&rmesa->radeon);
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+index ebea1fe..af933a3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+@@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "radeon_tcl.h"
+ 
+ 
+-static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+-
+ /* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
+ /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
+ #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
+@@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+ 
+ #define EMIT_ATTR( ATTR, STYLE, F0 )					\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+    fmt_0 |= F0;								\
+ } while (0)
+ 
+ #define EMIT_PAD( N )							\
+ do {									\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
+-   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
+-   rmesa->swtcl.vertex_attr_count++;					\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
++   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
++   rmesa->radeon.swtcl.vertex_attr_count++;					\
+ } while (0)
+ 
+ static GLuint radeon_cp_vc_frmts[3][2] =
+@@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] =
+ 
+ static void radeonSetVertexFormat( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *VB = &tnl->vb;
+    DECLARE_RENDERINPUTS(index_bitset);
+@@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx )
+    }
+ 
+    assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+-   rmesa->swtcl.vertex_attr_count = 0;
++   rmesa->radeon.swtcl.vertex_attr_count = 0;
+ 
+    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+     * build up a hardware vertex.
+@@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx )
+       }
+    }
+ 
+-   if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) ||
++   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+ 	fmt_0 != rmesa->swtcl.vertex_format) {
+       RADEON_NEWPRIM(rmesa);
+       rmesa->swtcl.vertex_format = fmt_0;
+-      rmesa->swtcl.vertex_size =
++      rmesa->radeon.swtcl.vertex_size =
+ 	  _tnl_install_attrs( ctx,
+-			      rmesa->swtcl.vertex_attrs, 
+-			      rmesa->swtcl.vertex_attr_count,
++			      rmesa->radeon.swtcl.vertex_attrs, 
++			      rmesa->radeon.swtcl.vertex_attr_count,
+ 			      NULL, 0 );
+-      rmesa->swtcl.vertex_size /= 4;
+-      RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset );
++      rmesa->radeon.swtcl.vertex_size /= 4;
++      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+       if (RADEON_DEBUG & DEBUG_VERTS)
+ 	 fprintf( stderr, "%s: vertex_size= %d floats\n",
+-		  __FUNCTION__, rmesa->swtcl.vertex_size);
++		  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
+    }
+ }
+ 
+ 
+ static void radeonRenderStart( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+ 
+    radeonSetVertexFormat( ctx );
+    
+-   if (rmesa->dma.flush != 0 && 
+-       rmesa->dma.flush != flush_last_swtcl_prim)
+-      rmesa->dma.flush( rmesa );
++   if (rmesa->radeon.dma.flush != 0 && 
++       rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
++      rmesa->radeon.dma.flush( ctx );
+ }
+ 
+ 
+@@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx )
+  */
+ void radeonChooseVertexState( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+ 
+    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+@@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx )
+     * rasterization fallback.  As this function will be called again when we
+     * leave a rasterization fallback, we can just skip it for now.
+     */
+-   if (rmesa->Fallback != 0)
++   if (rmesa->radeon.Fallback != 0)
+       return;
+ 
+    /* HW perspective divide is a win, but tiny vertex formats are a
+@@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx )
+    }
+ }
+ 
+-
+-/* Flush vertices in the current dma region.
+- */
+-static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
++void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+ {
+-   if (RADEON_DEBUG & DEBUG_IOCTL)
+-      fprintf(stderr, "%s\n", __FUNCTION__);
+-
+-   rmesa->dma.flush = NULL;
+-
+-   if (rmesa->dma.current.buf) {
+-      struct radeon_dma_region *current = &rmesa->dma.current;
+-      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
+-			       current->buf->buf->idx * RADEON_BUFFER_SIZE + 
+-			       current->start);
+-
+-      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+-      assert (current->start + 
+-	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-	      current->ptr);
++   rcommonEnsureCmdBufSpace(&rmesa->radeon,
++			    rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
++			    __FUNCTION__);
+ 
+-      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
+-	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
+-			          rmesa->hw.max_state_size + VBUF_BUFSZ );
+ 
+-	 radeonEmitVertexAOS( rmesa,
+-			      rmesa->swtcl.vertex_size,
+-			      current_offset);
++   radeonEmitState(&rmesa->radeon);
++   radeonEmitVertexAOS( rmesa,
++			rmesa->radeon.swtcl.vertex_size,
++			rmesa->radeon.dma.current,
++			current_offset);
+ 
+-	 radeonEmitVbufPrim( rmesa,
+-			     rmesa->swtcl.vertex_format,
+-			     rmesa->swtcl.hw_primitive,
+-			     rmesa->swtcl.numverts);
+-      }
++		      
++   radeonEmitVbufPrim( rmesa,
++		       rmesa->swtcl.vertex_format,
++		       rmesa->radeon.swtcl.hw_primitive,
++		       rmesa->radeon.swtcl.numverts);
+ 
+-      rmesa->swtcl.numverts = 0;
+-      current->start = current->ptr;
+-   }
+ }
+ 
+-
+-/* Alloc space in the current dma region.
+- */
+-static INLINE void *
+-radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+-{
+-   GLuint bytes = vsize * nverts;
+-
+-   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) 
+-      radeonRefillCurrentDmaRegion( rmesa );
+-
+-   if (!rmesa->dma.flush) {
+-      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+-      rmesa->dma.flush = flush_last_swtcl_prim;
+-   }
+-
+-   assert( vsize == rmesa->swtcl.vertex_size * 4 );
+-   assert( rmesa->dma.flush == flush_last_swtcl_prim );
+-   assert (rmesa->dma.current.start + 
+-	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+-	   rmesa->dma.current.ptr);
+-
+-
+-   {
+-      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
+-      rmesa->dma.current.ptr += bytes;
+-      rmesa->swtcl.numverts += nverts;
+-      return head;
+-   }
+-
+-}
+-
+-
+ /*
+  * Render unclipped vertex buffers by emitting vertices directly to
+  * dma buffers.  Use strip/fan hardware primitives where possible.
+@@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
+ };
+ 
+ static INLINE void
+-radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
++radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
+ {
+    RADEON_NEWPRIM( rmesa );
+-   rmesa->swtcl.hw_primitive = hw_prim[prim];
+-   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
++   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
++   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+ }
+ 
+-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+ #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+ #define FLUSH()  RADEON_NEWPRIM( rmesa )
+-#define GET_CURRENT_VB_MAX_VERTS() \
+-  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
++#define GET_CURRENT_VB_MAX_VERTS()					10\
++//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
+ #define GET_SUBSEQUENT_VB_MAX_VERTS() \
+-  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
++  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+ #define ALLOC_VERTS( nr ) \
+-  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
++  rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
+ #define EMIT_VERTS( ctx, j, nr, buf ) \
+   _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
+ 
+@@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
+ static GLboolean radeon_run_render( GLcontext *ctx,
+ 				    struct tnl_pipeline_stage *stage )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *VB = &tnl->vb;
+    tnl_render_func *tab = TAG(render_tab_verts);
+    GLuint i;
+ 
+-   if (rmesa->swtcl.indexed_verts.buf) 
+-      RELEASE_ELT_VERTS();
+-   	
+-   if (rmesa->swtcl.RenderIndex != 0 ||   
++   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
+        !radeon_dma_validate_render( ctx, VB ))
+       return GL_TRUE;		
+ 
+@@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx );
+ 
+ #undef LOCAL_VARS
+ #undef ALLOC_VERTS
+-#define CTX_ARG radeonContextPtr rmesa
+-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
+-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
++#define CTX_ARG r100ContextPtr rmesa
++#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
++#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
+ #undef LOCAL_VARS
+ #define LOCAL_VARS						\
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+-   const char *radeonverts = (char *)rmesa->swtcl.verts;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
++   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
+ #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
+ #define VERTEX radeonVertex 
+ #undef TAG
+@@ -560,7 +504,7 @@ static struct {
+ #define VERT_Y(_v) _v->v.y
+ #define VERT_Z(_v) _v->v.z
+ #define AREA_IS_CCW( a ) (a < 0)
+-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
++#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
+ 
+ #define VERT_SET_RGBA( v, c )  					\
+ do {								\
+@@ -606,7 +550,7 @@ do {							\
+ #undef INIT
+ 
+ #define LOCAL_VARS(n)							\
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);			\
+    GLuint color[n], spec[n];						\
+    GLuint coloroffset = rmesa->swtcl.coloroffset;	\
+    GLuint specoffset = rmesa->swtcl.specoffset;			\
+@@ -617,7 +561,7 @@ do {							\
+  ***********************************************************************/
+ 
+ #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
++#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+ #undef TAG
+ #define TAG(x) x
+ #include "tnl_dd/t_dd_unfilled.h"
+@@ -673,9 +617,9 @@ static void init_rast_tab( void )
+ } while (0)
+ #undef LOCAL_VARS
+ #define LOCAL_VARS						\
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
+-   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
+-   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
++   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
++   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    const GLboolean stipple = ctx->Line.StippleFlag;		\
+    (void) elt; (void) stipple;
+@@ -700,17 +644,17 @@ static void init_rast_tab( void )
+ void radeonChooseRenderState( GLcontext *ctx )
+ {
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint index = 0;
+    GLuint flags = ctx->_TriangleCaps;
+ 
+-   if (!rmesa->TclFallback || rmesa->Fallback) 
++   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
+       return;
+ 
+    if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+    if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
+ 
+-   if (index != rmesa->swtcl.RenderIndex) {
++   if (index != rmesa->radeon.swtcl.RenderIndex) {
+       tnl->Driver.Render.Points = rast_tab[index].points;
+       tnl->Driver.Render.Line = rast_tab[index].line;
+       tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+@@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx )
+ 	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+       }
+ 
+-      rmesa->swtcl.RenderIndex = index;
++      rmesa->radeon.swtcl.RenderIndex = index;
+    }
+ }
+ 
+@@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx )
+ 
+ static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+-   if (rmesa->swtcl.hw_primitive != hwprim) {
++   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+       RADEON_NEWPRIM( rmesa );
+-      rmesa->swtcl.hw_primitive = hwprim;
++      rmesa->radeon.swtcl.hw_primitive = hwprim;
+    }
+ }
+ 
+ static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   rmesa->swtcl.render_primitive = prim;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   rmesa->radeon.swtcl.render_primitive = prim;
+    if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+       radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+ }
+@@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx )
+ 
+ static void radeonResetLineStipple( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    RADEON_STATECHANGE( rmesa, lin );
+ }
+ 
+@@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit)
+ 
+ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+-   GLuint oldfallback = rmesa->Fallback;
++   GLuint oldfallback = rmesa->radeon.Fallback;
+ 
+    if (mode) {
+-      rmesa->Fallback |= bit;
++      rmesa->radeon.Fallback |= bit;
+       if (oldfallback == 0) {
+-	 RADEON_FIREVERTICES( rmesa );
++	 radeon_firevertices(&rmesa->radeon);
+ 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+ 	 _swsetup_Wakeup( ctx );
+-	 rmesa->swtcl.RenderIndex = ~0;
++	 rmesa->radeon.swtcl.RenderIndex = ~0;
+          if (RADEON_DEBUG & DEBUG_FALLBACKS) {
+             fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                     bit, getFallbackString(bit));
+@@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+       }
+    }
+    else {
+-      rmesa->Fallback &= ~bit;
++      rmesa->radeon.Fallback &= ~bit;
+       if (oldfallback == bit) {
+ 	 _swrast_flush( ctx );
+ 	 tnl->Driver.Render.Start = radeonRenderStart;
+@@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ 
+ 	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+ 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+-	 if (rmesa->TclFallback) {
+-	    /* These are already done if rmesa->TclFallback goes to
++	 if (rmesa->radeon.TclFallback) {
++	    /* These are already done if rmesa->radeon.TclFallback goes to
+ 	     * zero above. But not if it doesn't (RADEON_NO_TCL for
+ 	     * example?)
+ 	     */
+ 	    _tnl_invalidate_vertex_state( ctx, ~0 );
+ 	    _tnl_invalidate_vertices( ctx, ~0 );
+-	    RENDERINPUTS_ZERO( rmesa->tnl_index_bitset );
++	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+ 	    radeonChooseVertexState( ctx );
+ 	    radeonChooseRenderState( ctx );
+ 	 }
+@@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ void radeonInitSwtcl( GLcontext *ctx )
+ {
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    static int firsttime = 1;
+ 
+    if (firsttime) {
+@@ -872,18 +816,15 @@ void radeonInitSwtcl( GLcontext *ctx )
+    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+ 		       RADEON_MAX_TNL_VERTEX_SIZE);
+    
+-   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+-   rmesa->swtcl.RenderIndex = ~0;
+-   rmesa->swtcl.render_primitive = GL_TRIANGLES;
+-   rmesa->swtcl.hw_primitive = 0;
++   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
++   rmesa->radeon.swtcl.RenderIndex = ~0;
++   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
++   rmesa->radeon.swtcl.hw_primitive = 0;
+ }
+ 
+ 
+ void radeonDestroySwtcl( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+ 
+-   if (rmesa->swtcl.indexed_verts.buf) 
+-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+-			      __FUNCTION__ );
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+index e485052..3ada989 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h
++++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
+@@ -63,5 +63,5 @@ extern void radeon_translate_vertex( GLcontext *ctx,
+ 
+ extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
+ 
+-
++extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+ #endif
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
+index 779e9ae..5887ab3 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
++++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
+@@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "tnl/tnl.h"
+ #include "tnl/t_pipeline.h"
+ 
++#include "radeon_common.h"
+ #include "radeon_context.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+@@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = {
+ };
+    
+ 
+-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
++#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+ #define ELT_TYPE  GLushort
+ 
+ #define ELT_INIT(prim, hw_prim) \
+@@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = {
+ 
+ #define RESET_STIPPLE() do {			\
+    RADEON_STATECHANGE( rmesa, lin );		\
+-   radeonEmitState( rmesa );			\
++   radeonEmitState(&rmesa->radeon);			\
+ } while (0)
+ 
+ #define AUTO_STIPPLE( mode )  do {		\
+@@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = {
+    else						\
+       rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+ 	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+-   radeonEmitState( rmesa );			\
++   radeonEmitState(&rmesa->radeon);		\
+ } while (0)
+ 
+ 
+ 
+ #define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
+ 
+-static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) 
++static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
+ {
+-   if (rmesa->dma.flush)
+-      rmesa->dma.flush( rmesa );
++      if (rmesa->radeon.dma.flush)
++	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+ 
+-   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+-			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
++      rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + 
++			       AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__);
+ 
+-   radeonEmitAOS( rmesa,
+-		rmesa->tcl.aos_components,
+-		rmesa->tcl.nr_aos_components, 0 );
++      radeonEmitAOS( rmesa,
++		     rmesa->tcl.nr_aos_components, 0 );
+ 
+-   return radeonAllocEltsOpenEnded( rmesa,
+-				    rmesa->tcl.vertex_format, 
+-				    rmesa->tcl.hw_primitive, nr );
++      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
++				       rmesa->tcl.hw_primitive, nr );
+ }
+ 
+-#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
++#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
+ 
+ 
+ 
+@@ -174,14 +173,14 @@ static void radeonEmitPrim( GLcontext *ctx,
+ 		       GLuint start, 
+ 		       GLuint count)	
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
++   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    radeonTclPrimitive( ctx, prim, hwprim );
+    
+-   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+-			    rmesa->hw.max_state_size + VBUF_BUFSZ );
++   rcommonEnsureCmdBufSpace( &rmesa->radeon,
++			     AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
++			     rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ );
+ 
+    radeonEmitAOS( rmesa,
+-		  rmesa->tcl.aos_components,
+ 		  rmesa->tcl.nr_aos_components,
+ 		  start );
+    
+@@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx,
+ 			 GLenum prim,
+ 			 int hw_prim )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint se_cntl;
+    GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+ 
+@@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+ 					struct tnl_pipeline_stage *stage )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *VB = &tnl->vb;
+    GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
+@@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+ 
+    /* TODO: separate this from the swtnl pipeline 
+     */
+-   if (rmesa->TclFallback)
++   if (rmesa->radeon.TclFallback)
+       return GL_TRUE;	/* fallback to software t&l */
+ 
+    if (VB->Count == 0)
+@@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage =
+ 
+ static void transition_to_swtnl( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    GLuint se_cntl;
+ 
+@@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx )
+ 
+ static void transition_to_hwtnl( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+ 
+@@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx )
+ 
+    tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+ 
+-   if ( rmesa->dma.flush )			
+-      rmesa->dma.flush( rmesa );	
++   if ( rmesa->radeon.dma.flush )			
++      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
+ 
+-   rmesa->dma.flush = NULL;
++   rmesa->radeon.dma.flush = NULL;
+    rmesa->swtcl.vertex_format = 0;
+    
+-   if (rmesa->swtcl.indexed_verts.buf) 
+-      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+-			      __FUNCTION__ );
++   //   if (rmesa->swtcl.indexed_verts.buf) 
++   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
++   //			      __FUNCTION__ );
+ 
+    if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+       fprintf(stderr, "Radeon end tcl fallback\n");
+@@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit)
+ 
+ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   GLuint oldfallback = rmesa->TclFallback;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   GLuint oldfallback = rmesa->radeon.TclFallback;
+ 
+    if (mode) {
+-      rmesa->TclFallback |= bit;
++      rmesa->radeon.TclFallback |= bit;
+       if (oldfallback == 0) {
+ 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+ 	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
+@@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+       }
+    }
+    else {
+-      rmesa->TclFallback &= ~bit;
++      rmesa->radeon.TclFallback &= ~bit;
+       if (oldfallback == bit) {
+ 	 if (RADEON_DEBUG & DEBUG_FALLBACKS) 
+ 	    fprintf(stderr, "Radeon end tcl fallback %s\n",
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
+index b0aec21..2dfb504 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
++++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
+@@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/texobj.h"
+ 
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_swtcl.h"
+@@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+ {
+    GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+ 
++   /* Force revalidation to account for switches from/to mipmapping. */
++   t->validated = GL_FALSE;
++
+    t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
+ 
+    /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
+-   if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) {
++   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
+       switch ( minf ) {
+       case GL_NEAREST:
+       case GL_NEAREST_MIPMAP_NEAREST:
+@@ -244,433 +248,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, GLubyte c[4] )
+    t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+ }
+ 
+-
+-/**
+- * Allocate space for and load the mesa images into the texture memory block.
+- * This will happen before drawing with a new texture, or drawing with a
+- * texture after it was swapped out or teximaged again.
+- */
+-
+-static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj )
+-{
+-   radeonTexObjPtr t;
+-
+-   t = CALLOC_STRUCT( radeon_tex_obj );
+-   texObj->DriverData = t;
+-   if ( t != NULL ) {
+-      if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+-	 fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t );
+-      }
+-
+-      /* Initialize non-image-dependent parts of the state:
+-       */
+-      t->base.tObj = texObj;
+-      t->border_fallback = GL_FALSE;
+-
+-      t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+-      t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+-			RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+-
+-      make_empty_list( & t->base );
+-
+-      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
+-      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+-      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+-      radeonSetTexBorderColor( t, texObj->_BorderChan );
+-   }
+-
+-   return t;
+-}
+-
+-
+-static const struct gl_texture_format *
+-radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+-                           GLenum format, GLenum type )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   const GLboolean do32bpt =
+-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+-   const GLboolean force16bpt =
+-       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+-   (void) format;
+-
+-   switch ( internalFormat ) {
+-   case 4:
+-   case GL_RGBA:
+-   case GL_COMPRESSED_RGBA:
+-      switch ( type ) {
+-      case GL_UNSIGNED_INT_10_10_10_2:
+-      case GL_UNSIGNED_INT_2_10_10_10_REV:
+-	 return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555;
+-      case GL_UNSIGNED_SHORT_4_4_4_4:
+-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-	 return _dri_texformat_argb4444;
+-      case GL_UNSIGNED_SHORT_5_5_5_1:
+-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-	 return _dri_texformat_argb1555;
+-      default:
+-         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444;
+-      }
+-
+-   case 3:
+-   case GL_RGB:
+-   case GL_COMPRESSED_RGB:
+-      switch ( type ) {
+-      case GL_UNSIGNED_SHORT_4_4_4_4:
+-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+-	 return _dri_texformat_argb4444;
+-      case GL_UNSIGNED_SHORT_5_5_5_1:
+-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+-	 return _dri_texformat_argb1555;
+-      case GL_UNSIGNED_SHORT_5_6_5:
+-      case GL_UNSIGNED_SHORT_5_6_5_REV:
+-	 return _dri_texformat_rgb565;
+-      default:
+-         return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-      }
+-
+-   case GL_RGBA8:
+-   case GL_RGB10_A2:
+-   case GL_RGBA12:
+-   case GL_RGBA16:
+-      return !force16bpt ?
+-	  _dri_texformat_argb8888 : _dri_texformat_argb4444;
+-
+-   case GL_RGBA4:
+-   case GL_RGBA2:
+-      return _dri_texformat_argb4444;
+-
+-   case GL_RGB5_A1:
+-      return _dri_texformat_argb1555;
+-
+-   case GL_RGB8:
+-   case GL_RGB10:
+-   case GL_RGB12:
+-   case GL_RGB16:
+-      return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565;
+-
+-   case GL_RGB5:
+-   case GL_RGB4:
+-   case GL_R3_G3_B2:
+-      return _dri_texformat_rgb565;
+-
+-   case GL_ALPHA:
+-   case GL_ALPHA4:
+-   case GL_ALPHA8:
+-   case GL_ALPHA12:
+-   case GL_ALPHA16:
+-   case GL_COMPRESSED_ALPHA:
+-      return _dri_texformat_a8;
+-
+-   case 1:
+-   case GL_LUMINANCE:
+-   case GL_LUMINANCE4:
+-   case GL_LUMINANCE8:
+-   case GL_LUMINANCE12:
+-   case GL_LUMINANCE16:
+-   case GL_COMPRESSED_LUMINANCE:
+-      return _dri_texformat_l8;
+-
+-   case 2:
+-   case GL_LUMINANCE_ALPHA:
+-   case GL_LUMINANCE4_ALPHA4:
+-   case GL_LUMINANCE6_ALPHA2:
+-   case GL_LUMINANCE8_ALPHA8:
+-   case GL_LUMINANCE12_ALPHA4:
+-   case GL_LUMINANCE12_ALPHA12:
+-   case GL_LUMINANCE16_ALPHA16:
+-   case GL_COMPRESSED_LUMINANCE_ALPHA:
+-      return _dri_texformat_al88;
+-
+-   case GL_INTENSITY:
+-   case GL_INTENSITY4:
+-   case GL_INTENSITY8:
+-   case GL_INTENSITY12:
+-   case GL_INTENSITY16:
+-   case GL_COMPRESSED_INTENSITY:
+-      return _dri_texformat_i8;
+-
+-   case GL_YCBCR_MESA:
+-      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+-          type == GL_UNSIGNED_BYTE)
+-         return &_mesa_texformat_ycbcr;
+-      else
+-         return &_mesa_texformat_ycbcr_rev;
+-
+-   case GL_RGB_S3TC:
+-   case GL_RGB4_S3TC:
+-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+-      return &_mesa_texformat_rgb_dxt1;
+-
+-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+-      return &_mesa_texformat_rgba_dxt1;
+-
+-   case GL_RGBA_S3TC:
+-   case GL_RGBA4_S3TC:
+-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+-      return &_mesa_texformat_rgba_dxt3;
+-
+-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+-      return &_mesa_texformat_rgba_dxt5;
+-
+-   default:
+-      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
+-      return NULL;
+-   }
+-
+-   return NULL; /* never get here */
+-}
+-
+-
+-static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint border,
+-                              GLenum format, GLenum type, const GLvoid *pixels,
+-                              const struct gl_pixelstore_attrib *packing,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+-         return;
+-      }
+-   }
+-
+-   /* Note, this will call ChooseTextureFormat */
+-   _mesa_store_teximage1d(ctx, target, level, internalFormat,
+-                          width, border, format, type, pixels,
+-                          &ctx->Unpack, texObj, texImage);
+-
+-   t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset,
+-                                 GLsizei width,
+-                                 GLenum format, GLenum type,
+-                                 const GLvoid *pixels,
+-                                 const struct gl_pixelstore_attrib *packing,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+-			     format, type, pixels, packing, texObj,
+-			     texImage);
+-
+-   t->dirty_images[0] |= (1 << level);
+-}
+-
+-
+-static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint height, GLint border,
+-                              GLenum format, GLenum type, const GLvoid *pixels,
+-                              const struct gl_pixelstore_attrib *packing,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   if ( t != NULL ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+-         return;
+-      }
+-   }
+-
+-   /* Note, this will call ChooseTextureFormat */
+-   _mesa_store_teximage2d(ctx, target, level, internalFormat,
+-                          width, height, border, format, type, pixels,
+-                          &ctx->Unpack, texObj, texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset, GLint yoffset,
+-                                 GLsizei width, GLsizei height,
+-                                 GLenum format, GLenum type,
+-                                 const GLvoid *pixels,
+-                                 const struct gl_pixelstore_attrib *packing,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+-			     height, format, type, pixels, packing, texObj,
+-			     texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+-static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                              GLint internalFormat,
+-                              GLint width, GLint height, GLint border,
+-                              GLsizei imageSize, const GLvoid *data,
+-                              struct gl_texture_object *texObj,
+-                              struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   if ( t != NULL ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+-         return;
+-      }
+-   }
+-
+-   /* Note, this will call ChooseTextureFormat */
+-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+-                                 height, border, imageSize, data, texObj, texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+-
+-static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+-                                 GLint xoffset, GLint yoffset,
+-                                 GLsizei width, GLsizei height,
+-                                 GLenum format,
+-                                 GLsizei imageSize, const GLvoid *data,
+-                                 struct gl_texture_object *texObj,
+-                                 struct gl_texture_image *texImage )
+-{
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+-   GLuint face;
+-
+-
+-   /* which cube face or ordinary 2D image */
+-   switch (target) {
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+-      ASSERT(face < 6);
+-      break;
+-   default:
+-      face = 0;
+-   }
+-
+-   assert( t ); /* this _should_ be true */
+-   if ( t ) {
+-      driSwapOutTextureObject( t );
+-   }
+-   else {
+-      t = (driTextureObject *) radeonAllocTexObj( texObj );
+-      if (!t) {
+-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D");
+-         return;
+-      }
+-   }
+-
+-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+-                                 height, format, imageSize, data, texObj, texImage);
+-
+-   t->dirty_images[face] |= (1 << level);
+-}
+-
+ #define SCALED_FLOAT_TO_BYTE( x, scale ) \
+ 		(((GLuint)((255.0F / scale) * (x))) / 2)
+ 
+ static void radeonTexEnv( GLcontext *ctx, GLenum target,
+ 			  GLenum pname, const GLfloat *param )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint unit = ctx->Texture.CurrentUnit;
+    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 
+@@ -701,7 +285,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target,
+        * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
+        * [0.0,4.0] to [0,127].
+        */
+-      min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ?
++      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+ 	  0.0 : -1.0;
+       bias = CLAMP( *param, min, 4.0 );
+       if ( bias == 0 ) {
+@@ -734,7 +318,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
+ 				struct gl_texture_object *texObj,
+ 				GLenum pname, const GLfloat *params )
+ {
+-   radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData;
++   radeonTexObj* t = radeon_tex_obj(texObj);
+ 
+    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+       fprintf( stderr, "%s( %s )\n", __FUNCTION__,
+@@ -762,57 +346,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target,
+    case GL_TEXTURE_MAX_LEVEL:
+    case GL_TEXTURE_MIN_LOD:
+    case GL_TEXTURE_MAX_LOD:
++
+       /* This isn't the most efficient solution but there doesn't appear to
+        * be a nice alternative.  Since there's no LOD clamping,
+        * we just have to rely on loading the right subset of mipmap levels
+        * to simulate a clamped LOD.
+        */
+-      driSwapOutTextureObject( (driTextureObject *) t );
++      if (t->mt) {
++         radeon_miptree_unreference(t->mt);
++	 t->mt = 0;
++	 t->validated = GL_FALSE;
++      }
+       break;
+ 
+    default:
+       return;
+    }
+-
+-   /* Mark this texobj as dirty (one bit per tex unit)
+-    */
+-   t->dirty_state = TEX_ALL;
+-}
+-
+-
+-static void radeonBindTexture( GLcontext *ctx, GLenum target,
+-			       struct gl_texture_object *texObj )
+-{
+-   if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+-      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj,
+-	       ctx->Texture.CurrentUnit );
+-   }
+-
+-   assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D &&
+-            target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) ||
+-           (texObj->DriverData != NULL) );
+ }
+ 
+-
+ static void radeonDeleteTexture( GLcontext *ctx,
+ 				 struct gl_texture_object *texObj )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   radeonTexObj* t = radeon_tex_obj(texObj);
++   int i;
+ 
+    if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) {
+       fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+ 	       _mesa_lookup_enum_by_nr( texObj->Target ) );
+    }
+ 
+-   if ( t != NULL ) {
+-      if ( rmesa ) {
+-         RADEON_FIREVERTICES( rmesa );
+-      }
+-
+-      driDestroyTextureObject( t );
++   if ( rmesa ) {
++     radeon_firevertices(&rmesa->radeon);
++     for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
++       if ( t == rmesa->state.texture.unit[i].texobj ) {
++	 rmesa->state.texture.unit[i].texobj = NULL;
++	 rmesa->hw.tex[i].dirty = GL_FALSE;
++	 rmesa->hw.cube[i].dirty = GL_FALSE;
++       }
++     }
+    }
+ 
++   if (t->mt) {
++      radeon_miptree_unreference(t->mt);
++      t->mt = 0;
++   }
+    /* Free mipmap images and the texture object itself */
+    _mesa_delete_texture_object(ctx, texObj);
+ }
+@@ -832,7 +410,7 @@ static void radeonTexGen( GLcontext *ctx,
+ 			  GLenum pname,
+ 			  const GLfloat *params )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLuint unit = ctx->Texture.CurrentUnit;
+    rmesa->recheck_texgen[unit] = GL_TRUE;
+ }
+@@ -846,17 +424,27 @@ static void radeonTexGen( GLcontext *ctx,
+ static struct gl_texture_object *
+ radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   struct gl_texture_object *obj;
+-   obj = _mesa_new_texture_object(ctx, name, target);
+-   if (!obj)
+-      return NULL;
+-   obj->MaxAnisotropy = rmesa->initialMaxAnisotropy;
+-   radeonAllocTexObj( obj );
+-   return obj;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
++
++   _mesa_initialize_texture_object(&t->base, name, target);
++   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
++
++   t->border_fallback = GL_FALSE;
++
++   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
++   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
++		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
++   
++   radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
++   radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
++   radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
++   radeonSetTexBorderColor( t, t->base._BorderChan );
++   return &t->base;
+ }
+ 
+ 
++
+ void radeonInitTextureFuncs( struct dd_function_table *functions )
+ {
+    functions->ChooseTextureFormat	= radeonChooseTextureFormat;
+@@ -864,11 +452,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
+    functions->TexImage2D		= radeonTexImage2D;
+    functions->TexSubImage1D		= radeonTexSubImage1D;
+    functions->TexSubImage2D		= radeonTexSubImage2D;
++   functions->GetTexImage               = radeonGetTexImage;
++   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
+ 
+    functions->NewTextureObject		= radeonNewTextureObject;
+-   functions->BindTexture		= radeonBindTexture;
++   //   functions->BindTexture		= radeonBindTexture;
+    functions->DeleteTexture		= radeonDeleteTexture;
+-   functions->IsTextureResident		= driIsTextureResident;
+ 
+    functions->TexEnv			= radeonTexEnv;
+    functions->TexParameter		= radeonTexParameter;
+@@ -877,5 +466,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions )
+    functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
+    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
+ 
++   functions->GenerateMipmap = radeonGenerateMipmap;
++
++   functions->NewTextureImage = radeonNewTextureImage;
++   functions->FreeTexImageData = radeonFreeTexImageData;
++   functions->MapTexture = radeonMapTexture;
++   functions->UnmapTexture = radeonUnmapTexture;
++
+    driInitTextureFormats();
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
+index 8000880..8c2f9be 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
++++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
+@@ -43,10 +43,10 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ 
+ extern void radeonUpdateTextureState( GLcontext *ctx );
+ 
+-extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t,
++extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
+ 				  GLuint face );
+ 
+-extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t );
++extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
+ 
+ extern void radeonInitTextureFuncs( struct dd_function_table *functions );
+ 
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
+deleted file mode 100644
+index 5f7bbe6..0000000
+--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
++++ /dev/null
+@@ -1,404 +0,0 @@
+-/**************************************************************************
+-
+-Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+-                     VA Linux Systems Inc., Fremont, California.
+-
+-All Rights Reserved.
+-
+-Permission is hereby granted, free of charge, to any person obtaining
+-a copy of this software and associated documentation files (the
+-"Software"), to deal in the Software without restriction, including
+-without limitation on the rights to use, copy, modify, merge, publish,
+-distribute, sub license, and/or sell copies of the Software, and to
+-permit persons to whom the Software is furnished to do so, subject to
+-the following conditions:
+-
+-The above copyright notice and this permission notice (including the
+-next paragraph) shall be included in all copies or substantial
+-portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR
+-SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+-
+-**************************************************************************/
+-
+-/*
+- * Authors:
+- *   Kevin E. Martin <martin@valinux.com>
+- *   Gareth Hughes <gareth@valinux.com>
+- *
+- */
+-#include <errno.h> 
+-
+-#include "main/glheader.h"
+-#include "main/imports.h"
+-#include "main/context.h"
+-#include "main/macros.h"
+-
+-#include "radeon_context.h"
+-#include "radeon_ioctl.h"
+-#include "radeon_tex.h"
+-
+-#include <unistd.h>  /* for usleep() */
+-
+-
+-/**
+- * Destroy any device-dependent state associated with the texture.  This may
+- * include NULLing out hardware state that points to the texture.
+- */
+-void
+-radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t )
+-{
+-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+-      fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj );
+-   }
+-
+-   if ( rmesa != NULL ) {
+-      unsigned   i;
+-
+-
+-      for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) {
+-	 if ( t == rmesa->state.texture.unit[i].texobj ) {
+-	    rmesa->state.texture.unit[i].texobj = NULL;
+-	 }
+-      }
+-   }
+-}
+-
+-
+-/* ------------------------------------------------------------
+- * Texture image conversions
+- */
+-
+-
+-static void radeonUploadRectSubImage( radeonContextPtr rmesa,
+-				      radeonTexObjPtr t, 
+-				      struct gl_texture_image *texImage,
+-				      GLint x, GLint y, 
+-				      GLint width, GLint height )
+-{
+-   const struct gl_texture_format *texFormat = texImage->TexFormat;
+-   int blit_format, dstPitch, done;
+-
+-   switch ( texFormat->TexelBytes ) {
+-   case 1:
+-      blit_format = RADEON_GMC_DST_8BPP_CI;
+-      break;
+-   case 2:
+-      blit_format = RADEON_GMC_DST_16BPP;
+-      break;
+-   case 4:
+-      blit_format = RADEON_GMC_DST_32BPP;
+-      break;
+-   default:
+-      fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", 
+-      	       texFormat->TexelBytes);
+-      return;
+-   }
+-
+-   t->image[0][0].data = texImage->Data;
+-
+-   /* Currently don't need to cope with small pitches.
+-    */
+-   width = texImage->Width;
+-   height = texImage->Height;
+-   dstPitch = t->pp_txpitch + 32;
+-
+-   {	/* FIXME: prefer GART-texturing if possible */
+-      /* Data not in GART memory, or bad pitch.
+-       */
+-      for (done = 0; done < height ; ) {
+-	 struct radeon_dma_region region;
+-	 int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch );
+-	 int src_pitch;
+-	 char *tex;
+-
+-         src_pitch = texImage->RowStride * texFormat->TexelBytes;
+-
+-	 tex = (char *)texImage->Data + done * src_pitch;
+-
+-	 memset(&region, 0, sizeof(region));
+-	 radeonAllocDmaRegion( rmesa, &region, lines * dstPitch, 1024 );
+-
+-	 /* Copy texdata to dma:
+-	  */
+-	 if (0)
+-	    fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n",
+-		    __FUNCTION__, src_pitch, dstPitch);
+-
+-	 if (src_pitch == dstPitch) {
+-	    memcpy( region.address + region.start, tex, lines * src_pitch );
+-	 } 
+-	 else {
+-	    char *buf = region.address + region.start;
+-	    int i;
+-	    for (i = 0 ; i < lines ; i++) {
+-	       memcpy( buf, tex, src_pitch );
+-	       buf += dstPitch;
+-	       tex += src_pitch;
+-	    }
+-	 }
+-
+-	 radeonEmitWait( rmesa, RADEON_WAIT_3D );
+-
+-	 
+-
+-	 /* Blit to framebuffer
+-	  */
+-	 radeonEmitBlit( rmesa,
+-		       blit_format,
+-		       dstPitch, GET_START( &region ),
+-		       dstPitch, t->bufAddr,
+-		       0, 0,
+-		       0, done,
+-		       width, lines );
+-	 
+-	 radeonEmitWait( rmesa, RADEON_WAIT_2D );
+-
+-	 radeonReleaseDmaRegion( rmesa, &region, __FUNCTION__ );
+-	 done += lines;
+-      }
+-   }
+-}
+-
+-
+-/**
+- * Upload the texture image associated with texture \a t at the specified
+- * level at the address relative to \a start.
+- */
+-static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, 
+-			    GLint hwlevel,
+-			    GLint x, GLint y, GLint width, GLint height,
+-			    GLuint face )
+-{
+-   struct gl_texture_image *texImage = NULL;
+-   GLuint offset;
+-   GLint imageWidth, imageHeight;
+-   GLint ret;
+-   drm_radeon_texture_t tex;
+-   drm_radeon_tex_image_t tmp;
+-   const int level = hwlevel + t->base.firstLevel;
+-
+-   if ( RADEON_DEBUG & DEBUG_TEXTURE ) {
+-      fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", 
+-	       __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face );
+-   }
+-
+-   ASSERT(face < 6);
+-
+-   /* Ensure we have a valid texture to upload */
+-   if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) {
+-      _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__);
+-      return;
+-   }
+-
+-   texImage = t->base.tObj->Image[face][level];
+-
+-   if ( !texImage ) {
+-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level );
+-      return;
+-   }
+-   if ( !texImage->Data ) {
+-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ );
+-      return;
+-   }
+-
+-
+-   if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-      assert(level == 0);
+-      assert(hwlevel == 0);
+-      if ( RADEON_DEBUG & DEBUG_TEXTURE )
+-	 fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__);
+-      radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height );
+-      return;
+-   }
+-
+-   imageWidth = texImage->Width;
+-   imageHeight = texImage->Height;
+-
+-   offset = t->bufAddr + t->base.totalSize * face / 6;
+-
+-   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+-      GLint imageX = 0;
+-      GLint imageY = 0;
+-      GLint blitX = t->image[face][hwlevel].x;
+-      GLint blitY = t->image[face][hwlevel].y;
+-      GLint blitWidth = t->image[face][hwlevel].width;
+-      GLint blitHeight = t->image[face][hwlevel].height;
+-      fprintf( stderr, "   upload image: %d,%d at %d,%d\n",
+-	       imageWidth, imageHeight, imageX, imageY );
+-      fprintf( stderr, "   upload  blit: %d,%d at %d,%d\n",
+-	       blitWidth, blitHeight, blitX, blitY );
+-      fprintf( stderr, "       blit ofs: 0x%07x level: %d/%d\n",
+-	       (GLuint)offset, hwlevel, level );
+-   }
+-
+-   t->image[face][hwlevel].data = texImage->Data;
+-
+-   /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct.
+-    * NOTE: we're always use a 1KB-wide blit and I8 texture format.
+-    * We used to use 1, 2 and 4-byte texels and used to use the texture
+-    * width to dictate the blit width - but that won't work for compressed
+-    * textures. (Brian)
+-    * NOTE: can't do that with texture tiling. (sroland)
+-    */
+-   tex.offset = offset;
+-   tex.image = &tmp;
+-   /* copy (x,y,width,height,data) */
+-   memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
+-
+-   if (texImage->TexFormat->TexelBytes) {
+-      /* use multi-byte upload scheme */
+-      tex.height = imageHeight;
+-      tex.width = imageWidth;
+-      tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+-      tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+-      tex.offset += tmp.x & ~1023;
+-      tmp.x = tmp.x % 1024;
+-      if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+-	 /* need something like "tiled coordinates" ? */
+-	 tmp.y = tmp.x / (tex.pitch * 128) * 2;
+-	 tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+-	 tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+-      }
+-      else {
+-	 tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+-      }
+-      if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
+-	 (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
+-	 /* radeon switches off macro tiling for small textures/mipmaps it seems */
+-	 tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+-      }
+-   }
+-   else {
+-      /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
+-         needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
+-      /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
+-         so the kernel module reads the right amount of data. */
+-      tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+-      tex.pitch = (BLIT_WIDTH_BYTES / 64);
+-      tex.height = (imageHeight + 3) / 4;
+-      tex.width = (imageWidth + 3) / 4;
+-      switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
+-      case RADEON_TXFORMAT_DXT1:
+-         tex.width *= 8;
+-         break;
+-      case RADEON_TXFORMAT_DXT23:
+-      case RADEON_TXFORMAT_DXT45:
+-         tex.width *= 16;
+-         break;
+-      }
+-   }
+-
+-   LOCK_HARDWARE( rmesa );
+-   do {
+-      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
+-                                 &tex, sizeof(drm_radeon_texture_t) );
+-   } while ( ret == -EAGAIN );
+-
+-   UNLOCK_HARDWARE( rmesa );
+-
+-   if ( ret ) {
+-      fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret );
+-      fprintf( stderr, "   offset=0x%08x\n",
+-	       offset );
+-      fprintf( stderr, "   image width=%d height=%d\n",
+-	       imageWidth, imageHeight );
+-      fprintf( stderr, "    blit width=%d height=%d data=%p\n",
+-	       t->image[face][hwlevel].width, t->image[face][hwlevel].height,
+-	       t->image[face][hwlevel].data );
+-      exit( 1 );
+-   }
+-}
+-
+-
+-/**
+- * Upload the texture images associated with texture \a t.  This might
+- * require the allocation of texture memory.
+- * 
+- * \param rmesa Context pointer
+- * \param t Texture to be uploaded
+- * \param face Cube map face to be uploaded.  Zero for non-cube maps.
+- */
+-
+-int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face )
+-{
+-   int numLevels;
+-
+-   if ( !t || t->base.totalSize == 0 || t->image_override )
+-      return 0;
+-
+-   if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
+-      fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__,
+-	       (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize,
+-	       t->base.firstLevel, t->base.lastLevel );
+-   }
+-
+-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-   if (RADEON_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+-      radeonFinish( rmesa->glCtx );
+-   }
+-
+-   LOCK_HARDWARE( rmesa );
+-
+-   if ( t->base.memBlock == NULL ) {
+-      int heap;
+-
+-      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+-				 (driTextureObject *) t );
+-      if ( heap == -1 ) {
+-	 UNLOCK_HARDWARE( rmesa );
+-	 return -1;
+-      }
+-
+-      /* Set the base offset of the texture image */
+-      t->bufAddr = rmesa->radeonScreen->texOffset[heap] 
+-	   + t->base.memBlock->ofs;
+-      t->pp_txoffset = t->bufAddr;
+-
+-      if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+-	 /* hope it's safe to add that here... */
+-	 t->pp_txoffset |= t->tile_bits;
+-      }
+-
+-      /* Mark this texobj as dirty on all units:
+-       */
+-      t->dirty_state = TEX_ALL;
+-   }
+-
+-
+-   /* Let the world know we've used this memory recently.
+-    */
+-   driUpdateTextureLRU( (driTextureObject *) t );
+-   UNLOCK_HARDWARE( rmesa );
+-
+-
+-   /* Upload any images that are new */
+-   if (t->base.dirty_images[face]) {
+-      int i;
+-      for ( i = 0 ; i < numLevels ; i++ ) {
+-         if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) {
+-            uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width,
+-			    t->image[face][i].height, face );
+-         }
+-      }
+-      t->base.dirty_images[face] = 0;
+-   }
+-
+-   if (RADEON_DEBUG & DEBUG_SYNC) {
+-      fprintf(stderr, "%s: Syncing\n", __FUNCTION__ );
+-      radeonFinish( rmesa->glCtx );
+-   }
+-
+-   return 0;
+-}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
+index 1e2f654..6a34f1e 100644
+--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
++++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
+@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #include "main/enums.h"
+ 
+ #include "radeon_context.h"
++#include "radeon_mipmap_tree.h"
+ #include "radeon_state.h"
+ #include "radeon_ioctl.h"
+ #include "radeon_swtcl.h"
+@@ -75,10 +76,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+ 			     && (tx_table[f].format != 0xffffffff) )
+ 
+-static const struct {
++struct tx_table {
+    GLuint format, filter;
+-}
+-tx_table[] =
++};
++
++static const struct tx_table tx_table[] =
+ {
+    _ALPHA(RGBA8888),
+    _ALPHA_REV(RGBA8888),
+@@ -111,252 +113,6 @@ tx_table[] =
+ #undef _ALPHA
+ #undef _INVALID
+ 
+-/**
+- * This function computes the number of bytes of storage needed for
+- * the given texture object (all mipmap levels, all cube faces).
+- * The \c image[face][level].x/y/width/height parameters for upload/blitting
+- * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
+- * too.
+- * 
+- * \param rmesa Context pointer
+- * \param tObj GL texture object whose images are to be posted to
+- *                 hardware state.
+- */
+-static void radeonSetTexImages( radeonContextPtr rmesa,
+-				struct gl_texture_object *tObj )
+-{
+-   radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
+-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+-   GLint curOffset, blitWidth;
+-   GLint i, texelBytes;
+-   GLint numLevels;
+-   GLint log2Width, log2Height, log2Depth;
+-
+-   /* Set the hardware texture format
+-    */
+-   if ( !t->image_override ) {
+-      t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+-                          RADEON_TXFORMAT_ALPHA_IN_MAP);
+-      t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+-
+-      if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
+-         t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
+-         t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
+-      }
+-      else {
+-         _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+-         return;
+-      }
+-   }
+-
+-   texelBytes = baseImage->TexFormat->TexelBytes;
+-
+-   /* Compute which mipmap levels we really want to send to the hardware.
+-    */
+-
+-   if (tObj->Target != GL_TEXTURE_CUBE_MAP)
+-      driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+-   else {
+-      /* r100 can't handle mipmaps for cube/3d textures, so don't waste
+-         memory for them */
+-      t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel;
+-   }
+-   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+-   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+-   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
+-
+-   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+-
+-   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
+-
+-   /* Calculate mipmap offsets and dimensions for blitting (uploading)
+-    * The idea is that we lay out the mipmap levels within a block of
+-    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
+-    */
+-   curOffset = 0;
+-   blitWidth = BLIT_WIDTH_BYTES;
+-   t->tile_bits = 0;
+-
+-   /* figure out if this texture is suitable for tiling. */
+-   if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+-      if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+-	 /* allow 32 (bytes) x 1 mip (which will use two times the space
+-	    the non-tiled version would use) max if base texture is large enough */
+-	 if ((numLevels == 1) ||
+-	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+-	       (baseImage->Width * texelBytes > 64)) ||
+-	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+-	    /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+-	       weird: X2 + OPT: 32bit correct, 16bit completely hosed
+-		      X2: 32bit correct, 16bit correct
+-		      OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+-	    t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+-	 }
+-      }
+-      if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+-	 /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+-	    in the case if height is smaller than 16 (not 100% sure), as does the r200,
+-	    so need to disable macro tiling in that case */
+-	 if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+-	    t->tile_bits |= RADEON_TXO_MACRO_TILE;
+-	 }
+-      }
+-   }
+-
+-   for (i = 0; i < numLevels; i++) {
+-      const struct gl_texture_image *texImage;
+-      GLuint size;
+-
+-      texImage = tObj->Image[0][i + t->base.firstLevel];
+-      if ( !texImage )
+-	 break;
+-
+-      /* find image size in bytes */
+-      if (texImage->IsCompressed) {
+-      /* need to calculate the size AFTER padding even though the texture is
+-         submitted without padding.
+-         Only handle pot textures currently - don't know if npot is even possible,
+-         size calculation would certainly need (trivial) adjustments.
+-         Align (and later pad) to 32byte, not sure what that 64byte blit width is
+-         good for? */
+-         if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) {
+-            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
+-            if ((texImage->Width + 3) < 8) /* width one block */
+-               size = texImage->CompressedSize * 4;
+-            else if ((texImage->Width + 3) < 16)
+-               size = texImage->CompressedSize * 2;
+-            else size = texImage->CompressedSize;
+-         }
+-         else /* DXT3/5, 16 bytes per block */
+-            if ((texImage->Width + 3) < 8)
+-               size = texImage->CompressedSize * 2;
+-            else size = texImage->CompressedSize;
+-      }
+-      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+-	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+-      }
+-      else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+-	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+-	    though the actual offset may be different (if texture is less than
+-	    32 bytes width) to the untiled case */
+-	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+-	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-      }
+-      else {
+-	 int w = (texImage->Width * texelBytes + 31) & ~31;
+-	 size = w * texImage->Height * texImage->Depth;
+-	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
+-      }
+-      assert(size > 0);
+-
+-      /* Align to 32-byte offset.  It is faster to do this unconditionally
+-       * (no branch penalty).
+-       */
+-
+-      curOffset = (curOffset + 0x1f) & ~0x1f;
+-
+-      if (texelBytes) {
+-	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+-	 t->image[0][i].y = 0;
+-	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+-	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+-      }
+-      else {
+-         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+-         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+-         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+-         t->image[0][i].height = size / t->image[0][i].width;     
+-      }
+-
+-#if 0
+-      /* for debugging only and only  applicable to non-rectangle targets */
+-      assert(size % t->image[0][i].width == 0);
+-      assert(t->image[0][i].x == 0
+-             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
+-#endif
+-
+-      if (0)
+-         fprintf(stderr,
+-                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
+-                 i, texImage->Width, texImage->Height,
+-                 t->image[0][i].x, t->image[0][i].y,
+-                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
+-
+-      curOffset += size;
+-
+-   }
+-
+-   /* Align the total size of texture memory block.
+-    */
+-   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+-
+-   /* Setup remaining cube face blits, if needed */
+-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-      const GLuint faceSize = t->base.totalSize;
+-      GLuint face;
+-      /* reuse face 0 x/y/width/height - just update the offset when uploading */
+-      for (face = 1; face < 6; face++) {
+-         for (i = 0; i < numLevels; i++) {
+-            t->image[face][i].x =  t->image[0][i].x;
+-            t->image[face][i].y =  t->image[0][i].y;
+-            t->image[face][i].width  = t->image[0][i].width;
+-            t->image[face][i].height = t->image[0][i].height;
+-         }
+-      }
+-      t->base.totalSize = 6 * faceSize; /* total texmem needed */
+-   }
+-
+-   /* Hardware state:
+-    */
+-   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+-   t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT;
+-
+-   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+-		       RADEON_TXFORMAT_HEIGHT_MASK |
+-                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+-                       RADEON_TXFORMAT_F5_WIDTH_MASK |
+-                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
+-   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+-		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+-
+-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-      assert(log2Width == log2Height);
+-      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+-                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+-                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+-      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+-                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+-                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+-                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+-                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+-                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+-                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+-                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+-   }
+-
+-   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
+-                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
+-
+-   /* Only need to round to nearest 32 for textures, but the blitter
+-    * requires 64-byte aligned pitches, and we may/may not need the
+-    * blitter.   NPOT only!
+-    */
+-   if ( !t->image_override ) {
+-      if (baseImage->IsCompressed)
+-         t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
+-      else
+-         t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
+-      t->pp_txpitch -= 32;
+-   }
+-
+-   t->dirty_state = TEX_ALL;
+-
+-   /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */
+-}
+-
+-
+-
+ /* ================================================================
+  * Texture combine functions
+  */
+@@ -503,7 +259,7 @@ do {							\
+ 
+ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+    GLuint color_combine, alpha_combine;
+    const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+@@ -846,22 +602,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+                         unsigned long long offset, GLint depth, GLuint pitch)
+ {
+-	radeonContextPtr rmesa = pDRICtx->driverPrivate;
++	r100ContextPtr rmesa = pDRICtx->driverPrivate;
+ 	struct gl_texture_object *tObj =
+-	    _mesa_lookup_texture(rmesa->glCtx, texname);
+-	radeonTexObjPtr t;
++	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
++	radeonTexObjPtr t = radeon_tex_obj(tObj);
+ 
+ 	if (tObj == NULL)
+ 		return;
+ 
+-	t = (radeonTexObjPtr) tObj->DriverData;
+-
+ 	t->image_override = GL_TRUE;
+ 
+ 	if (!offset)
+ 		return;
+-
+-	t->pp_txoffset = offset;
++	
++	t->bo = NULL;
++	t->override_offset = offset;
+ 	t->pp_txpitch = pitch - 32;
+ 
+ 	switch (depth) {
+@@ -901,12 +656,58 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+                               RADEON_TXFORMAT_NON_POWER2)
+ 
+ 
+-static void import_tex_obj_state( radeonContextPtr rmesa,
++static void disable_tex_obj_state( r100ContextPtr rmesa, 
++				   int unit )
++{
++   /* do not use RADEON_DB_STATE to avoid stale texture caches */
++   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
++   GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
++
++   RADEON_STATECHANGE( rmesa, tex[unit] );
++
++   RADEON_STATECHANGE( rmesa, tcl );
++   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
++					     RADEON_Q_BIT(unit));
++   
++   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
++     TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
++     rmesa->recheck_texgen[unit] = GL_TRUE;
++   }
++
++   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
++     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
++	cubic_map bit on unit 2 when the unit is disabled, otherwise every
++	2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
++	units, better be safe than sorry though).*/
++     RADEON_STATECHANGE( rmesa, tex[unit] );
++     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
++   }
++
++   {
++      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
++      GLuint tmp = rmesa->TexGenEnabled;
++
++      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
++      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
++      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
++      rmesa->TexGenNeedNormals[unit] = 0;
++      rmesa->TexGenEnabled |= 
++	(RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
++
++      if (tmp != rmesa->TexGenEnabled) {
++	rmesa->recheck_texgen[unit] = GL_TRUE;
++	rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
++      }
++   }
++}
++
++static void import_tex_obj_state( r100ContextPtr rmesa,
+ 				  int unit,
+ 				  radeonTexObjPtr texobj )
+ {
+ /* do not use RADEON_DB_STATE to avoid stale texture caches */
+-   int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
++   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+    GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+ 
+    RADEON_STATECHANGE( rmesa, tex[unit] );
+@@ -915,10 +716,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+    cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+    cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+-   cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
+    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+ 
+-   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
++   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
+       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
+       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
+@@ -928,22 +728,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+    else {
+       se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
+ 
+-      if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+-	 int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+-	 GLuint bytesPerFace = texobj->base.totalSize / 6;
+-	 ASSERT(texobj->base.totalSize % 6 == 0);
++      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
++	 uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+ 
+ 	 RADEON_STATECHANGE( rmesa, cube[unit] );
+ 	 cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+-	 /* dont know if this setup conforms to OpenGL.. 
+-	  * at least it matches the behavior of mesa software renderer
+-	  */
+-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
+-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
+-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
+-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
+-	 cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
+-	 cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
++	 /* state filled out in the cube_emit */
+       }
+    }
+ 
+@@ -952,13 +742,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa,
+       rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+    }
+ 
+-   texobj->dirty_state &= ~(1<<unit);
++   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+ 
+ 
+-
+-
+-static void set_texgen_matrix( radeonContextPtr rmesa, 
++static void set_texgen_matrix( r100ContextPtr rmesa, 
+ 			       GLuint unit,
+ 			       const GLfloat *s_plane,
+ 			       const GLfloat *t_plane,
+@@ -986,14 +774,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa,
+    rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
+ 
+    rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
+-   rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+ }
+ 
+ /* Returns GL_FALSE if fallback required.
+  */
+ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+    GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+    GLuint tmp = rmesa->TexGenEnabled;
+@@ -1094,283 +882,185 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+    }
+ 
+    if (tmp != rmesa->TexGenEnabled) {
+-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+-
+-static void disable_tex( GLcontext *ctx, int unit )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-
+-   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) {
+-      /* Texture unit disabled */
+-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+-	 /* The old texture is no longer bound to this texture unit.
+-	  * Mark it as such.
+-	  */
+-
+-	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
+-	 rmesa->state.texture.unit[unit].texobj = NULL;
+-      }
+-
+-      RADEON_STATECHANGE( rmesa, ctx );
+-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= 
+-	  ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit);
+-
+-      RADEON_STATECHANGE( rmesa, tcl );
+-      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+-						RADEON_Q_BIT(unit));
+-
+-      if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+-	 TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+-	 rmesa->recheck_texgen[unit] = GL_TRUE;
+-      }
+-
+-      if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+-      /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+-         cubic_map bit on unit 2 when the unit is disabled, otherwise every
+-	 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+-	 units, better be safe than sorry though).*/
+-	 RADEON_STATECHANGE( rmesa, tex[unit] );
+-	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+-      }
+-
+-      {
+-	 GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+-	 GLuint tmp = rmesa->TexGenEnabled;
+-
+-	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+-	 rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+-	 rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+-	 rmesa->TexGenNeedNormals[unit] = 0;
+-	 rmesa->TexGenEnabled |= 
+-	     (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+-
+-	 if (tmp != rmesa->TexGenEnabled) {
+-	    rmesa->recheck_texgen[unit] = GL_TRUE;
+-	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+-	 }
+-      }
+-   }
+-}
+-
+-static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-
+-   /* Need to load the 2d images associated with this unit.
+-    */
+-   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+-      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+-      t->base.dirty_images[0] = ~0;
+-   }
+-
+-   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
+-
+-   if ( t->base.dirty_images[0] ) {
+-      RADEON_FIREVERTICES( rmesa );
+-      radeonSetTexImages( rmesa, tObj );
+-      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+-      if ( !t->base.memBlock && !t->image_override ) 
+-	return GL_FALSE;
+-   }
+-
+-   return GL_TRUE;
+-}
+-
+-static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
++/**
++ * Compute the cached hardware register values for the given texture object.
++ *
++ * \param rmesa Context pointer
++ * \param t the r300 texture object
++ */
++static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-   GLuint face;
+-
+-   /* Need to load the 2d images associated with this unit.
+-    */
+-   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+-      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+-      for (face = 0; face < 6; face++)
+-         t->base.dirty_images[face] = ~0;
+-   }
++   const struct gl_texture_image *firstImage;
++   GLint log2Width, log2Height, log2Depth, texelBytes;
+ 
+-   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
++   firstImage = t->base.Image[0][t->mt->firstLevel];   
+ 
+-   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+-        t->base.dirty_images[2] || t->base.dirty_images[3] ||
+-        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+-      /* flush */
+-      RADEON_FIREVERTICES( rmesa );
+-      /* layout memory space, once for all faces */
+-      radeonSetTexImages( rmesa, tObj );
++   if (firstImage->Border > 0) {
++      fprintf(stderr, "%s: border\n", __FUNCTION__);
++      return GL_FALSE;
+    }
+ 
+-   /* upload (per face) */
+-   for (face = 0; face < 6; face++) {
+-      if (t->base.dirty_images[face]) {
+-         radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
++   log2Width  = firstImage->WidthLog2;
++   log2Height = firstImage->HeightLog2;
++   log2Depth  = firstImage->DepthLog2;
++   texelBytes = firstImage->TexFormat->TexelBytes;
++
++   if (!t->image_override) {
++      if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
++	const struct tx_table *table = tx_table;
++
++	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
++			     RADEON_TXFORMAT_ALPHA_IN_MAP);
++	 t->pp_txfilter &= ~RADEON_YUV_TO_RGB;	 
++	 
++	 t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format;
++	 t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter;
++      } else {
++	 _mesa_problem(NULL, "unexpected texture format in %s",
++		       __FUNCTION__);
++	 return GL_FALSE;
+       }
+    }
+-      
+-   if ( !t->base.memBlock ) {
+-      /* texmem alloc failed, use s/w fallback */
+-      return GL_FALSE;
++   
++   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
++   t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT;
++	
++   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
++		       RADEON_TXFORMAT_HEIGHT_MASK |
++		       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
++		       RADEON_TXFORMAT_F5_WIDTH_MASK |
++		       RADEON_TXFORMAT_F5_HEIGHT_MASK);
++   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
++		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
++   
++   t->tile_bits = 0;
++   
++   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
++      ASSERT(log2Width == log2Height);
++      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
++			 (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
++			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
++			 (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
++      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
++                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
++                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
++                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
++                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
++                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
++                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
++                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+    }
+ 
+-   return GL_TRUE;
+-}
++   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
++		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
+ 
+-static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
+-{
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-
+-   if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) {
+-      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+-      t->base.dirty_images[0] = ~0;
++   if ( !t->image_override ) {
++      if (firstImage->IsCompressed)
++         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
++      else
++         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
++      t->pp_txpitch -= 32;
+    }
+ 
+-   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
+-
+-   if ( t->base.dirty_images[0] ) {
+-      RADEON_FIREVERTICES( rmesa );
+-      radeonSetTexImages( rmesa, tObj );
+-      radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 );
+-      if ( !t->base.memBlock &&
+-           !t->image_override /* && !rmesa->prefer_gart_client_texturing  FIXME */ ) {
+-	 fprintf(stderr, "%s: upload failed\n", __FUNCTION__);
+-	 return GL_FALSE;
+-      }
++   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
++      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+    }
+ 
+    return GL_TRUE;
+ }
+ 
+-
+-static GLboolean update_tex_common( GLcontext *ctx, int unit )
++static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+-   struct gl_texture_object *tObj = texUnit->_Current;
+-   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+-   GLenum format;
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
++   radeonTexObj *t = radeon_tex_obj(texObj);
++   int ret;
+ 
+-   /* Fallback if there's a texture border */
+-   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+-      fprintf(stderr, "%s: border\n", __FUNCTION__);
++   if (!radeon_validate_texture_miptree(ctx, texObj))
+       return GL_FALSE;
+-   }
++
++   ret = setup_hardware_state(rmesa, t, unit);
++   if (ret == GL_FALSE)
++     return GL_FALSE;
++
+    /* yuv conversion only works in first unit */
+    if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
+       return GL_FALSE;
+ 
+-   /* Update state if this is a different texture object to last
+-    * time.
+-    */
+-   if ( rmesa->state.texture.unit[unit].texobj != t ) {
+-      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
+-	 /* The old texture is no longer bound to this texture unit.
+-	  * Mark it as such.
+-	  */
+-
+-	 rmesa->state.texture.unit[unit].texobj->base.bound &= 
+-	     ~(1UL << unit);
+-      }
++   RADEON_STATECHANGE( rmesa, ctx );
++   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
++     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+ 
+-      rmesa->state.texture.unit[unit].texobj = t;
+-      t->base.bound |= (1UL << unit);
+-      t->dirty_state |= 1<<unit;
+-      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+-   }
++   RADEON_STATECHANGE( rmesa, tcl );
++   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+ 
++   rmesa->recheck_texgen[unit] = GL_TRUE;
+ 
+-   /* Newly enabled?
+-    */
+-   if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) {
+-      RADEON_STATECHANGE( rmesa, ctx );
+-      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+-	  (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+-
+-      RADEON_STATECHANGE( rmesa, tcl );
+-
+-      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+-
+-      rmesa->recheck_texgen[unit] = GL_TRUE;
+-   }
+-
+-   if (t->dirty_state & (1<<unit)) {
+-      import_tex_obj_state( rmesa, unit, t );
+-      /* may need to update texture matrix (for texrect adjustments) */
+-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
+-   }
++   import_tex_obj_state( rmesa, unit, t );
+ 
+    if (rmesa->recheck_texgen[unit]) {
+       GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+       TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+       rmesa->recheck_texgen[unit] = 0;
+-      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
++      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+    }
+ 
+-   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+-   if ( rmesa->state.texture.unit[unit].format != format ||
+-	rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
+-      rmesa->state.texture.unit[unit].format = format;
+-      rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
+-      if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+-	 return GL_FALSE;
+-      }
++   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
++     return GL_FALSE;
+    }
+-
+    FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
++
++   t->validated = GL_TRUE;
+    return !t->border_fallback;
+ }
+ 
+-
+-
+ static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+ {
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ 
+-   if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) {
+-      return (enable_tex_rect( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-   else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
+-      return (enable_tex_2d( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
+-   }
+-   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+-      return (enable_tex_cube( ctx, unit ) &&
+-	      update_tex_common( ctx, unit ));
++
++   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
++     return GL_FALSE;
+    }
+-   else if ( texUnit->_ReallyEnabled ) {
+-      return GL_FALSE;
++
++   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
++     /* disable the unit */
++     disable_tex_obj_state(rmesa, unit);
++     return GL_TRUE;
+    }
+-   else {
+-      disable_tex( ctx, unit );
+-      return GL_TRUE;
++
++   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
++    _mesa_warning(ctx,
++		  "failed to validate texture for unit %d.\n",
++		  unit);
++    rmesa->state.texture.unit[unit].texobj = NULL;
++    return GL_FALSE;
+    }
++   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
++   return GL_TRUE;
+ }
+ 
+ void radeonUpdateTextureState( GLcontext *ctx )
+ {
+-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+    GLboolean ok;
+ 
++   /* set the ctx all textures off */
++   RADEON_STATECHANGE( rmesa, ctx );
++   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
++
+    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+ 	 radeonUpdateTextureUnit( ctx, 1 ) &&
+ 	 radeonUpdateTextureUnit( ctx, 2 ));
+ 
+    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+ 
+-   if (rmesa->TclFallback)
++   if (rmesa->radeon.TclFallback)
+       radeonChooseVertexState( ctx );
+ }
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
+new file mode 100644
+index 0000000..63680b4
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
+@@ -0,0 +1,966 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++ *
++ * The Weather Channel (TM) funded Tungsten Graphics to develop the
++ * initial release of the Radeon 8500 driver under the XFree86 license.
++ * This notice must be preserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#include "main/glheader.h"
++#include "main/imports.h"
++#include "main/context.h"
++#include "main/convolve.h"
++#include "main/mipmap.h"
++#include "main/texcompress.h"
++#include "main/texformat.h"
++#include "main/texstore.h"
++#include "main/teximage.h"
++#include "main/texobj.h"
++
++#include "xmlpool.h"		/* for symbolic values of enum-type options */
++
++#include "radeon_common.h"
++
++#include "radeon_mipmap_tree.h"
++
++
++static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
++	GLuint numrows, GLuint rowsize)
++{
++	assert(rowsize <= dststride);
++	assert(rowsize <= srcstride);
++
++	if (rowsize == srcstride && rowsize == dststride) {
++		memcpy(dst, src, numrows*rowsize);
++	} else {
++		GLuint i;
++		for(i = 0; i < numrows; ++i) {
++			memcpy(dst, src, rowsize);
++			dst += dststride;
++			src += srcstride;
++		}
++	}
++}
++
++/* textures */
++/**
++ * Allocate an empty texture image object.
++ */
++struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx)
++{
++	return CALLOC(sizeof(radeon_texture_image));
++}
++
++/**
++ * Free memory associated with this texture image.
++ */
++void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
++{
++	radeon_texture_image* image = get_radeon_texture_image(timage);
++
++	if (image->mt) {
++		radeon_miptree_unreference(image->mt);
++		image->mt = 0;
++		assert(!image->base.Data);
++	} else {
++		_mesa_free_texture_image_data(ctx, timage);
++	}
++	if (image->bo) {
++		radeon_bo_unref(image->bo);
++		image->bo = NULL;
++	}
++	if (timage->Data) {
++		_mesa_free_texmemory(timage->Data);
++		timage->Data = NULL;
++	}
++}
++
++/* Set Data pointer and additional data for mapped texture image */
++static void teximage_set_map_data(radeon_texture_image *image)
++{
++	radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
++
++	image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
++	image->base.RowStride = lvl->rowstride / image->mt->bpp;
++}
++
++
++/**
++ * Map a single texture image for glTexImage and friends.
++ */
++void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable)
++{
++	if (image->mt) {
++		assert(!image->base.Data);
++
++		radeon_bo_map(image->mt->bo, write_enable);
++		teximage_set_map_data(image);
++	}
++}
++
++
++void radeon_teximage_unmap(radeon_texture_image *image)
++{
++	if (image->mt) {
++		assert(image->base.Data);
++
++		image->base.Data = 0;
++		radeon_bo_unmap(image->mt->bo);
++	}
++}
++
++/**
++ * Map a validated texture for reading during software rendering.
++ */
++void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
++{
++	radeonTexObj* t = radeon_tex_obj(texObj);
++	int face, level;
++
++	/* for r100 3D sw fallbacks don't have mt */
++	if (!t->mt)
++	  return;
++
++	radeon_bo_map(t->mt->bo, GL_FALSE);
++	for(face = 0; face < t->mt->faces; ++face) {
++		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
++			teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
++	}
++}
++
++void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
++{
++	radeonTexObj* t = radeon_tex_obj(texObj);
++	int face, level;
++
++	/* for r100 3D sw fallbacks don't have mt */
++	if (!t->mt)
++	  return;
++
++	for(face = 0; face < t->mt->faces; ++face) {
++		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level)
++			texObj->Image[face][level]->Data = 0;
++	}
++	radeon_bo_unmap(t->mt->bo);
++}
++
++GLuint radeon_face_for_target(GLenum target)
++{
++	switch (target) {
++	case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
++	case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
++	case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
++	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
++	case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
++	case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
++		return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
++	default:
++		return 0;
++	}
++}
++
++/**
++ * Wraps Mesa's implementation to ensure that the base level image is mapped.
++ *
++ * This relies on internal details of _mesa_generate_mipmap, in particular
++ * the fact that the memory for recreated texture images is always freed.
++ */
++static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
++				   struct gl_texture_object *texObj)
++{
++	radeonTexObj* t = radeon_tex_obj(texObj);
++	GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
++	int i, face;
++
++
++	_mesa_generate_mipmap(ctx, target, texObj);
++
++	for (face = 0; face < nr_faces; face++) {
++		for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
++			radeon_texture_image *image;
++
++			image = get_radeon_texture_image(texObj->Image[face][i]);
++
++			if (image == NULL)
++				break;
++
++			image->mtlevel = i;
++			image->mtface = face;
++
++			radeon_miptree_unreference(image->mt);
++			image->mt = NULL;
++		}
++	}
++	
++}
++
++void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
++{
++	GLuint face = radeon_face_for_target(target);
++	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
++
++	radeon_teximage_map(baseimage, GL_FALSE);
++	radeon_generate_mipmap(ctx, target, texObj);
++	radeon_teximage_unmap(baseimage);
++}
++
++
++/* try to find a format which will only need a memcopy */
++static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa,
++								 GLenum srcFormat,
++								 GLenum srcType)
++{
++	const GLuint ui = 1;
++	const GLubyte littleEndian = *((const GLubyte *)&ui);
++
++	/* r100 can only do this */
++	if (IS_R100_CLASS(rmesa->radeonScreen))
++	  return _dri_texformat_argb8888;
++
++	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
++	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
++	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
++	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
++		return &_mesa_texformat_rgba8888;
++	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
++		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
++		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
++		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
++		return &_mesa_texformat_rgba8888_rev;
++	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
++					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
++		return &_mesa_texformat_argb8888_rev;
++	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
++					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
++		return &_mesa_texformat_argb8888;
++	} else
++		return _dri_texformat_argb8888;
++}
++
++const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
++							  GLint internalFormat,
++							  GLenum format,
++							  GLenum type)
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	const GLboolean do32bpt =
++	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
++	const GLboolean force16bpt =
++	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
++	(void)format;
++
++#if 0
++	fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n",
++		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
++		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
++	fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt);
++#endif
++
++	switch (internalFormat) {
++	case 4:
++	case GL_RGBA:
++	case GL_COMPRESSED_RGBA:
++		switch (type) {
++		case GL_UNSIGNED_INT_10_10_10_2:
++		case GL_UNSIGNED_INT_2_10_10_10_REV:
++			return do32bpt ? _dri_texformat_argb8888 :
++			    _dri_texformat_argb1555;
++		case GL_UNSIGNED_SHORT_4_4_4_4:
++		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
++			return _dri_texformat_argb4444;
++		case GL_UNSIGNED_SHORT_5_5_5_1:
++		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
++			return _dri_texformat_argb1555;
++		default:
++			return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type) :
++			    _dri_texformat_argb4444;
++		}
++
++	case 3:
++	case GL_RGB:
++	case GL_COMPRESSED_RGB:
++		switch (type) {
++		case GL_UNSIGNED_SHORT_4_4_4_4:
++		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
++			return _dri_texformat_argb4444;
++		case GL_UNSIGNED_SHORT_5_5_5_1:
++		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
++			return _dri_texformat_argb1555;
++		case GL_UNSIGNED_SHORT_5_6_5:
++		case GL_UNSIGNED_SHORT_5_6_5_REV:
++			return _dri_texformat_rgb565;
++		default:
++			return do32bpt ? _dri_texformat_argb8888 :
++			    _dri_texformat_rgb565;
++		}
++
++	case GL_RGBA8:
++	case GL_RGB10_A2:
++	case GL_RGBA12:
++	case GL_RGBA16:
++		return !force16bpt ?
++			radeonChoose8888TexFormat(rmesa, format,type) :
++			_dri_texformat_argb4444;
++
++	case GL_RGBA4:
++	case GL_RGBA2:
++		return _dri_texformat_argb4444;
++
++	case GL_RGB5_A1:
++		return _dri_texformat_argb1555;
++
++	case GL_RGB8:
++	case GL_RGB10:
++	case GL_RGB12:
++	case GL_RGB16:
++		return !force16bpt ? _dri_texformat_argb8888 :
++		    _dri_texformat_rgb565;
++
++	case GL_RGB5:
++	case GL_RGB4:
++	case GL_R3_G3_B2:
++		return _dri_texformat_rgb565;
++
++	case GL_ALPHA:
++	case GL_ALPHA4:
++	case GL_ALPHA8:
++	case GL_ALPHA12:
++	case GL_ALPHA16:
++	case GL_COMPRESSED_ALPHA:
++		return _dri_texformat_a8;
++
++	case 1:
++	case GL_LUMINANCE:
++	case GL_LUMINANCE4:
++	case GL_LUMINANCE8:
++	case GL_LUMINANCE12:
++	case GL_LUMINANCE16:
++	case GL_COMPRESSED_LUMINANCE:
++		return _dri_texformat_l8;
++
++	case 2:
++	case GL_LUMINANCE_ALPHA:
++	case GL_LUMINANCE4_ALPHA4:
++	case GL_LUMINANCE6_ALPHA2:
++	case GL_LUMINANCE8_ALPHA8:
++	case GL_LUMINANCE12_ALPHA4:
++	case GL_LUMINANCE12_ALPHA12:
++	case GL_LUMINANCE16_ALPHA16:
++	case GL_COMPRESSED_LUMINANCE_ALPHA:
++		return _dri_texformat_al88;
++
++	case GL_INTENSITY:
++	case GL_INTENSITY4:
++	case GL_INTENSITY8:
++	case GL_INTENSITY12:
++	case GL_INTENSITY16:
++	case GL_COMPRESSED_INTENSITY:
++		return _dri_texformat_i8;
++
++	case GL_YCBCR_MESA:
++		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
++		    type == GL_UNSIGNED_BYTE)
++			return &_mesa_texformat_ycbcr;
++		else
++			return &_mesa_texformat_ycbcr_rev;
++
++	case GL_RGB_S3TC:
++	case GL_RGB4_S3TC:
++	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
++		return &_mesa_texformat_rgb_dxt1;
++
++	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
++		return &_mesa_texformat_rgba_dxt1;
++
++	case GL_RGBA_S3TC:
++	case GL_RGBA4_S3TC:
++	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
++		return &_mesa_texformat_rgba_dxt3;
++
++	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
++		return &_mesa_texformat_rgba_dxt5;
++
++	case GL_ALPHA16F_ARB:
++		return &_mesa_texformat_alpha_float16;
++	case GL_ALPHA32F_ARB:
++		return &_mesa_texformat_alpha_float32;
++	case GL_LUMINANCE16F_ARB:
++		return &_mesa_texformat_luminance_float16;
++	case GL_LUMINANCE32F_ARB:
++		return &_mesa_texformat_luminance_float32;
++	case GL_LUMINANCE_ALPHA16F_ARB:
++		return &_mesa_texformat_luminance_alpha_float16;
++	case GL_LUMINANCE_ALPHA32F_ARB:
++		return &_mesa_texformat_luminance_alpha_float32;
++	case GL_INTENSITY16F_ARB:
++		return &_mesa_texformat_intensity_float16;
++	case GL_INTENSITY32F_ARB:
++		return &_mesa_texformat_intensity_float32;
++	case GL_RGB16F_ARB:
++		return &_mesa_texformat_rgba_float16;
++	case GL_RGB32F_ARB:
++		return &_mesa_texformat_rgba_float32;
++	case GL_RGBA16F_ARB:
++		return &_mesa_texformat_rgba_float16;
++	case GL_RGBA32F_ARB:
++		return &_mesa_texformat_rgba_float32;
++
++	case GL_DEPTH_COMPONENT:
++	case GL_DEPTH_COMPONENT16:
++	case GL_DEPTH_COMPONENT24:
++	case GL_DEPTH_COMPONENT32:
++#if 0
++		switch (type) {
++		case GL_UNSIGNED_BYTE:
++		case GL_UNSIGNED_SHORT:
++			return &_mesa_texformat_z16;
++		case GL_UNSIGNED_INT:
++			return &_mesa_texformat_z32;
++		case GL_UNSIGNED_INT_24_8_EXT:
++		default:
++			return &_mesa_texformat_z24_s8;
++		}
++#else
++		return &_mesa_texformat_z16;
++#endif
++
++	default:
++		_mesa_problem(ctx,
++			      "unexpected internalFormat 0x%x in r300ChooseTextureFormat",
++			      (int)internalFormat);
++		return NULL;
++	}
++
++	return NULL;		/* never get here */
++}
++
++/**
++ * All glTexImage calls go through this function.
++ */
++static void radeon_teximage(
++	GLcontext *ctx, int dims,
++	GLint face, GLint level,
++	GLint internalFormat,
++	GLint width, GLint height, GLint depth,
++	GLsizei imageSize,
++	GLenum format, GLenum type, const GLvoid * pixels,
++	const struct gl_pixelstore_attrib *packing,
++	struct gl_texture_object *texObj,
++	struct gl_texture_image *texImage,
++	int compressed)
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	radeonTexObj* t = radeon_tex_obj(texObj);
++	radeon_texture_image* image = get_radeon_texture_image(texImage);
++	GLuint dstRowStride;
++	GLint postConvWidth = width;
++	GLint postConvHeight = height;
++	GLuint texelBytes;
++
++	radeon_firevertices(rmesa);
++
++	t->validated = GL_FALSE;
++
++	if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
++	       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
++						  &postConvHeight);
++	}
++
++	/* Choose and fill in the texture format for this image */
++	texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type);
++	_mesa_set_fetch_functions(texImage, dims);
++
++	if (texImage->TexFormat->TexelBytes == 0) {
++		texelBytes = 0;
++		texImage->IsCompressed = GL_TRUE;
++		texImage->CompressedSize =
++			ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
++					   texImage->Height, texImage->Depth,
++					   texImage->TexFormat->MesaFormat);
++	} else {
++		texImage->IsCompressed = GL_FALSE;
++		texImage->CompressedSize = 0;
++
++		texelBytes = texImage->TexFormat->TexelBytes;
++		/* Minimum pitch of 32 bytes */
++		if (postConvWidth * texelBytes < 32) {
++		  postConvWidth = 32 / texelBytes;
++		  texImage->RowStride = postConvWidth;
++		}
++		if (!image->mt) {      
++			assert(texImage->RowStride == postConvWidth);
++		}
++	}
++
++	/* Allocate memory for image */
++	radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */
++
++	if (t->mt &&
++	    t->mt->firstLevel == level &&
++	    t->mt->lastLevel == level &&
++	    t->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
++	    !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
++	  radeon_miptree_unreference(t->mt);
++	  t->mt = NULL;
++	}
++
++	if (!t->mt)
++		radeon_try_alloc_miptree(rmesa, t, texImage, face, level);
++	if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) {
++		radeon_mipmap_level *lvl;
++		image->mt = t->mt;
++		image->mtlevel = level - t->mt->firstLevel;
++		image->mtface = face;
++		radeon_miptree_reference(t->mt);
++		lvl = &image->mt->levels[image->mtlevel];
++		dstRowStride = lvl->rowstride;
++	} else {
++		int size;
++		if (texImage->IsCompressed) {
++			size = texImage->CompressedSize;
++		} else {
++			size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes;
++		}
++		texImage->Data = _mesa_alloc_texmemory(size);
++	}
++
++	/* Upload texture image; note that the spec allows pixels to be NULL */
++	if (compressed) {
++		pixels = _mesa_validate_pbo_compressed_teximage(
++			ctx, imageSize, pixels, packing, "glCompressedTexImage");
++	} else {
++		pixels = _mesa_validate_pbo_teximage(
++			ctx, dims, width, height, depth,
++			format, type, pixels, packing, "glTexImage");
++	}
++
++	if (pixels) {
++		radeon_teximage_map(image, GL_TRUE);
++
++		if (compressed) {
++			memcpy(texImage->Data, pixels, imageSize);
++		} else {
++			GLuint dstRowStride;
++			if (image->mt) {
++				radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
++				dstRowStride = lvl->rowstride;
++			} else {
++				dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes;
++			}
++
++			if (!texImage->TexFormat->StoreImage(ctx, dims,
++						texImage->_BaseFormat,
++						texImage->TexFormat,
++						texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
++						dstRowStride,
++						texImage->ImageOffsets,
++						width, height, depth,
++						format, type, pixels, packing))
++				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
++		}
++
++	}
++
++	/* SGIS_generate_mipmap */
++	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
++		radeon_generate_mipmap(ctx, texObj->Target, texObj);
++	}
++
++	_mesa_unmap_teximage_pbo(ctx, packing);
++
++	if (pixels)
++	  radeon_teximage_unmap(image);
++
++
++}
++
++void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
++		      GLint internalFormat,
++		      GLint width, GLint border,
++		      GLenum format, GLenum type, const GLvoid * pixels,
++		      const struct gl_pixelstore_attrib *packing,
++		      struct gl_texture_object *texObj,
++		      struct gl_texture_image *texImage)
++{
++	radeon_teximage(ctx, 1, 0, level, internalFormat, width, 1, 1,
++		0, format, type, pixels, packing, texObj, texImage, 0);
++}
++
++void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
++			   GLint internalFormat,
++			   GLint width, GLint height, GLint border,
++			   GLenum format, GLenum type, const GLvoid * pixels,
++			   const struct gl_pixelstore_attrib *packing,
++			   struct gl_texture_object *texObj,
++			   struct gl_texture_image *texImage)
++
++{
++	GLuint face = radeon_face_for_target(target);
++
++	radeon_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
++		0, format, type, pixels, packing, texObj, texImage, 0);
++}
++
++void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
++				     GLint level, GLint internalFormat,
++				     GLint width, GLint height, GLint border,
++				     GLsizei imageSize, const GLvoid * data,
++				     struct gl_texture_object *texObj,
++				     struct gl_texture_image *texImage)
++{
++	GLuint face = radeon_face_for_target(target);
++
++	radeon_teximage(ctx, 2, face, level, internalFormat, width, height, 1,
++		imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1);
++}
++
++void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
++		      GLint internalFormat,
++		      GLint width, GLint height, GLint depth,
++		      GLint border,
++		      GLenum format, GLenum type, const GLvoid * pixels,
++		      const struct gl_pixelstore_attrib *packing,
++		      struct gl_texture_object *texObj,
++		      struct gl_texture_image *texImage)
++{
++	radeon_teximage(ctx, 3, 0, level, internalFormat, width, height, depth,
++		0, format, type, pixels, packing, texObj, texImage, 0);
++}
++
++/**
++ * Update a subregion of the given texture image.
++ */
++static void radeon_texsubimage(GLcontext* ctx, int dims, int level,
++		GLint xoffset, GLint yoffset, GLint zoffset,
++		GLsizei width, GLsizei height, GLsizei depth,
++		GLsizei imageSize,
++		GLenum format, GLenum type,
++		const GLvoid * pixels,
++		const struct gl_pixelstore_attrib *packing,
++		struct gl_texture_object *texObj,
++		struct gl_texture_image *texImage,
++		int compressed)
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	radeonTexObj* t = radeon_tex_obj(texObj);
++	radeon_texture_image* image = get_radeon_texture_image(texImage);
++
++	radeon_firevertices(rmesa);
++
++	t->validated = GL_FALSE;
++	if (compressed) {
++		pixels = _mesa_validate_pbo_compressed_teximage(
++			ctx, imageSize, pixels, packing, "glCompressedTexImage");
++	} else {
++		pixels = _mesa_validate_pbo_teximage(ctx, dims,
++			width, height, depth, format, type, pixels, packing, "glTexSubImage1D");
++	}
++
++	if (pixels) {
++		GLint dstRowStride;
++		radeon_teximage_map(image, GL_TRUE);
++
++		if (image->mt) {
++			radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
++			dstRowStride = lvl->rowstride;
++		} else {
++			dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
++		}
++
++		if (compressed) {
++			uint32_t srcRowStride, bytesPerRow, rows; 
++			dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width);
++			srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
++			bytesPerRow = srcRowStride;
++			rows = height / 4;
++
++			copy_rows(texImage->Data, dstRowStride,  image->base.Data, srcRowStride, rows,
++				  bytesPerRow);
++			
++		} else {
++			if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
++							     texImage->TexFormat, texImage->Data,
++							     xoffset, yoffset, zoffset,
++							     dstRowStride,
++							     texImage->ImageOffsets,
++							     width, height, depth,
++							     format, type, pixels, packing))
++				_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
++		}
++
++	}
++
++	/* GL_SGIS_generate_mipmap */
++	if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
++		radeon_generate_mipmap(ctx, texObj->Target, texObj);
++	}
++	radeon_teximage_unmap(image);
++
++	_mesa_unmap_teximage_pbo(ctx, packing);
++
++
++}
++
++void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
++			 GLint xoffset,
++			 GLsizei width,
++			 GLenum format, GLenum type,
++			 const GLvoid * pixels,
++			 const struct gl_pixelstore_attrib *packing,
++			 struct gl_texture_object *texObj,
++			 struct gl_texture_image *texImage)
++{
++	radeon_texsubimage(ctx, 1, level, xoffset, 0, 0, width, 1, 1, 0,
++		format, type, pixels, packing, texObj, texImage, 0);
++}
++
++void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
++			 GLint xoffset, GLint yoffset,
++			 GLsizei width, GLsizei height,
++			 GLenum format, GLenum type,
++			 const GLvoid * pixels,
++			 const struct gl_pixelstore_attrib *packing,
++			 struct gl_texture_object *texObj,
++			 struct gl_texture_image *texImage)
++{
++	radeon_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
++			   0, format, type, pixels, packing, texObj, texImage,
++			   0);
++}
++
++void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
++				   GLint level, GLint xoffset,
++				   GLint yoffset, GLsizei width,
++				   GLsizei height, GLenum format,
++				   GLsizei imageSize, const GLvoid * data,
++				   struct gl_texture_object *texObj,
++				   struct gl_texture_image *texImage)
++{
++	radeon_texsubimage(ctx, 2, level, xoffset, yoffset, 0, width, height, 1,
++		imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1);
++}
++
++
++void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
++			 GLint xoffset, GLint yoffset, GLint zoffset,
++			 GLsizei width, GLsizei height, GLsizei depth,
++			 GLenum format, GLenum type,
++			 const GLvoid * pixels,
++			 const struct gl_pixelstore_attrib *packing,
++			 struct gl_texture_object *texObj,
++			 struct gl_texture_image *texImage)
++{
++	radeon_texsubimage(ctx, 3, level, xoffset, yoffset, zoffset, width, height, depth, 0,
++		format, type, pixels, packing, texObj, texImage, 0);
++}
++
++
++
++/**
++ * Ensure that the given image is stored in the given miptree from now on.
++ */
++static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level)
++{
++	radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel];
++	unsigned char *dest;
++
++	assert(image->mt != mt);
++	assert(dstlvl->width == image->base.Width);
++	assert(dstlvl->height == image->base.Height);
++	assert(dstlvl->depth == image->base.Depth);
++
++
++	radeon_bo_map(mt->bo, GL_TRUE);
++	dest = mt->bo->ptr + dstlvl->faces[face].offset;
++
++	if (image->mt) {
++		/* Format etc. should match, so we really just need a memcpy().
++		 * In fact, that memcpy() could be done by the hardware in many
++		 * cases, provided that we have a proper memory manager.
++		 */
++		radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
++
++		assert(srclvl->size == dstlvl->size);
++		assert(srclvl->rowstride == dstlvl->rowstride);
++
++		radeon_bo_map(image->mt->bo, GL_FALSE);
++
++		memcpy(dest,
++			image->mt->bo->ptr + srclvl->faces[face].offset,
++			dstlvl->size);
++		radeon_bo_unmap(image->mt->bo);
++
++		radeon_miptree_unreference(image->mt);
++	} else {
++		uint32_t srcrowstride;
++		uint32_t height;
++		/* need to confirm this value is correct */
++		if (mt->compressed) {
++			height = image->base.Height / 4;
++			srcrowstride = image->base.RowStride * mt->bpp;
++		} else {
++			height = image->base.Height * image->base.Depth;
++			srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes;
++		}
++
++//		if (mt->tilebits)
++//			WARN_ONCE("%s: tiling not supported yet", __FUNCTION__);
++
++		copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
++			  height, srcrowstride);
++
++		_mesa_free_texmemory(image->base.Data);
++		image->base.Data = 0;
++	}
++
++	radeon_bo_unmap(mt->bo);
++
++	image->mt = mt;
++	image->mtface = face;
++	image->mtlevel = level;
++	radeon_miptree_reference(image->mt);
++}
++
++int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
++{
++	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
++	radeonTexObj *t = radeon_tex_obj(texObj);
++	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]);
++	int face, level;
++
++	if (t->validated || t->image_override)
++		return GL_TRUE;
++
++	if (RADEON_DEBUG & DEBUG_TEXTURE)
++		fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj);
++
++	if (baseimage->base.Border > 0)
++		return GL_FALSE;
++
++	/* Ensure a matching miptree exists.
++	 *
++	 * Differing mipmap trees can result when the app uses TexImage to
++	 * change texture dimensions.
++	 *
++	 * Prefer to use base image's miptree if it
++	 * exists, since that most likely contains more valid data (remember
++	 * that the base level is usually significantly larger than the rest
++	 * of the miptree, so cubemaps are the only possible exception).
++	 */
++	if (baseimage->mt &&
++	    baseimage->mt != t->mt &&
++	    radeon_miptree_matches_texture(baseimage->mt, &t->base)) {
++		radeon_miptree_unreference(t->mt);
++		t->mt = baseimage->mt;
++		radeon_miptree_reference(t->mt);
++	} else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) {
++		radeon_miptree_unreference(t->mt);
++		t->mt = 0;
++	}
++
++	if (!t->mt) {
++		if (RADEON_DEBUG & DEBUG_TEXTURE)
++			fprintf(stderr, " Allocate new miptree\n");
++		radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel);
++		if (!t->mt) {
++			_mesa_problem(ctx, "r300_validate_texture failed to alloc miptree");
++			return GL_FALSE;
++		}
++	}
++
++	/* Ensure all images are stored in the single main miptree */
++	for(face = 0; face < t->mt->faces; ++face) {
++		for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) {
++			radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
++			if (RADEON_DEBUG & DEBUG_TEXTURE)
++				fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
++			if (t->mt == image->mt) {
++				if (RADEON_DEBUG & DEBUG_TEXTURE)
++					fprintf(stderr, "OK\n");
++				continue;
++			}
++
++			if (RADEON_DEBUG & DEBUG_TEXTURE)
++				fprintf(stderr, "migrating\n");
++			migrate_image_to_miptree(t->mt, image, face, level);
++		}
++	}
++
++	return GL_TRUE;
++}
++
++
++/**
++ * Need to map texture image into memory before copying image data,
++ * then unmap it.
++ */
++static void
++radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
++		     GLenum format, GLenum type, GLvoid * pixels,
++		     struct gl_texture_object *texObj,
++		     struct gl_texture_image *texImage, int compressed)
++{
++	radeon_texture_image *image = get_radeon_texture_image(texImage);
++
++	if (image->mt) {
++		/* Map the texture image read-only */
++		radeon_teximage_map(image, GL_FALSE);
++	} else {
++		/* Image hasn't been uploaded to a miptree yet */
++		assert(image->base.Data);
++	}
++
++	if (compressed) {
++		_mesa_get_compressed_teximage(ctx, target, level, pixels,
++					      texObj, texImage);
++	} else {
++		_mesa_get_teximage(ctx, target, level, format, type, pixels,
++				   texObj, texImage);
++	}
++     
++	if (image->mt) {
++		radeon_teximage_unmap(image);
++	}
++}
++
++void
++radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
++		  GLenum format, GLenum type, GLvoid * pixels,
++		  struct gl_texture_object *texObj,
++		  struct gl_texture_image *texImage)
++{
++	radeon_get_tex_image(ctx, target, level, format, type, pixels,
++			     texObj, texImage, 0);
++}
++
++void
++radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
++			    GLvoid *pixels,
++			    struct gl_texture_object *texObj,
++			    struct gl_texture_image *texImage)
++{
++	radeon_get_tex_image(ctx, target, level, 0, 0, pixels,
++			     texObj, texImage, 1);
++}
+diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
+new file mode 100644
+index 0000000..d90fda7
+--- /dev/null
++++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (C) 2008 Nicolai Haehnle.
++ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
++ *
++ * The Weather Channel (TM) funded Tungsten Graphics to develop the
++ * initial release of the Radeon 8500 driver under the XFree86 license.
++ * This notice must be preserved.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining
++ * a copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sublicense, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial
++ * portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
++ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
++ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
++ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ *
++ */
++
++#ifndef RADEON_TEXTURE_H
++#define RADEON_TEXTURE_H
++struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
++void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
++
++void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable);
++void radeon_teximage_unmap(radeon_texture_image *image);
++void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
++void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
++void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
++int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
++GLuint radeon_face_for_target(GLenum target);
++const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx,
++							  GLint internalFormat,
++							  GLenum format,
++							  GLenum type);
++
++void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
++		      GLint internalFormat,
++		      GLint width, GLint border,
++		      GLenum format, GLenum type, const GLvoid * pixels,
++		      const struct gl_pixelstore_attrib *packing,
++		      struct gl_texture_object *texObj,
++		      struct gl_texture_image *texImage);
++void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
++		      GLint internalFormat,
++		      GLint width, GLint height, GLint border,
++		      GLenum format, GLenum type, const GLvoid * pixels,
++		      const struct gl_pixelstore_attrib *packing,
++		      struct gl_texture_object *texObj,
++		      struct gl_texture_image *texImage);
++void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
++				GLint level, GLint internalFormat,
++				GLint width, GLint height, GLint border,
++				GLsizei imageSize, const GLvoid * data,
++				struct gl_texture_object *texObj,
++				struct gl_texture_image *texImage);
++void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
++		      GLint internalFormat,
++		      GLint width, GLint height, GLint depth,
++		      GLint border,
++		      GLenum format, GLenum type, const GLvoid * pixels,
++		      const struct gl_pixelstore_attrib *packing,
++		      struct gl_texture_object *texObj,
++		      struct gl_texture_image *texImage);
++void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
++			 GLint xoffset,
++			 GLsizei width,
++			 GLenum format, GLenum type,
++			 const GLvoid * pixels,
++			 const struct gl_pixelstore_attrib *packing,
++			 struct gl_texture_object *texObj,
++			 struct gl_texture_image *texImage);
++void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
++				GLint xoffset, GLint yoffset,
++				GLsizei width, GLsizei height,
++				GLenum format, GLenum type,
++				const GLvoid * pixels,
++				const struct gl_pixelstore_attrib *packing,
++				struct gl_texture_object *texObj,
++				struct gl_texture_image *texImage);
++void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
++				   GLint level, GLint xoffset,
++				   GLint yoffset, GLsizei width,
++				   GLsizei height, GLenum format,
++				   GLsizei imageSize, const GLvoid * data,
++				   struct gl_texture_object *texObj,
++				   struct gl_texture_image *texImage);
++
++void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
++			 GLint xoffset, GLint yoffset, GLint zoffset,
++			 GLsizei width, GLsizei height, GLsizei depth,
++			 GLenum format, GLenum type,
++			 const GLvoid * pixels,
++			 const struct gl_pixelstore_attrib *packing,
++			 struct gl_texture_object *texObj,
++			 struct gl_texture_image *texImage);
++
++void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
++		       GLenum format, GLenum type, GLvoid * pixels,
++		       struct gl_texture_object *texObj,
++		       struct gl_texture_image *texImage);
++void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
++				 GLvoid *pixels,
++				 struct gl_texture_object *texObj,
++				 struct gl_texture_image *texImage);
++
++#endif
+diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+index 596a8aa..0df634b 100644
+--- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h
++++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
+@@ -2031,6 +2031,9 @@
+ #define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
+ #define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
+ #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
++#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
++#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
++#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+ #define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
+ #define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
+ #define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300