/*
 * $Id:$
 *
 * $Date$
 * $Revision$
 *
 * (C) 1999-2005 The MiniGL team
 * All rights reserved
 *
 * This file is part of the MiniGL library project
 * See the file Licence.txt for more details
 *
 */

#include "displaylists.h"
#include "sysinc.h"
#include <stdio.h>
#include <string.h>
#include <alloca.h>
#include <proto/timer.h>
#include "mgl/gl.h"
#include "mgl/mgltypes.h"
#include <math.h>
#include "smartlock.h"
#include <assert.h>
#include "util.h"

#include "mgl_profileitems.h"

extern struct InternalToW3D i2w3d[];

BOOL EnsureBitmapBackingStore(GLcontext context, uint32 dataSize);
extern GLsizei next_pwr(GLsizei x);
extern void m_CombineMatrices(GLcontext context);
extern void RebindTextures(GLcontext context);
extern void tex_EstablishEnvCombine(GLcontext context);
extern int32 SelectInternalFormat(GLcontext context, GLenum internalformat);
extern float CLAMPF(float a);

_glTransferFn MGLSelectTransfer(GLenum format, struct GLpixel_state *pixelState);

extern _glUnpackFn MGLSelectUnpacker(GLenum format, GLenum type, GLuint *pixelStride);
extern GLboolean isPixelStateNeutral(GLcontext context);
extern int32 mapMGLZTestToW3D(int32 mglZ, int32 defZ);

typedef struct
{
	float x,y,z;
	float u,v,w;
} DPVertex;

#define DPVERTEX_FORMAT (W3D_VFORMAT_TCOORD_0)

void GLDrawPixelsDepth(GLcontext context, GLsizei width, GLsizei height, GLenum type, const GLvoid *pixels)
{
	/*
		This is a pretty crude implementation of glDrawPixels(GL_DEPTH_COMPONENT).
		Hubert Maier of ResidualVM told me about a drawing bug with "Grim Fandango" which I found was because of
		MiniGL's lack of support for this functionality here.

		First I did the obvious, nameley to base everything on W3D_WriteZSpan.
		Problems with that approach:
		1. W3D_WriteZSpan is not implemented in RadeonHD SI drivers at all.
		2. W3D_WriteZSpan is super-slow on R100 / R200.
		So all in all W3D_WriteZSpan is pretty useless on the most common AOS4 systems right now.
		
		Therefore I tried the next thing that came to my mind:
		draw lots of 3D-points without updating the color-buffer.
		As it turns out this approach, although pretty brute force, runs rather fast even on Rx00 systems.
		A typical Grim Fandango scene requires this function to render at least 307200 points (most often even more, about 420000 or so)
		and the framerate is still rather smooth.
		Funny enough: I tried to be smart and do some sort of RLE compression for lines of depth-pixels with the same value.
		As it turned out this slows things down on the R200 extremely.
		
		Note: So far this has only been tested with ResidualVM Grim Fandango.
		There it seems to work flawlessly.
		But this game only uses type=GL_UNSIGNED_SHORT, depth-bias = 0, depth-scale = 1,
		no pixel-zoom, no scissor, input coordinates are always valid, always 640x480 window-size.
		So this whole thing here has only been "tested" under optimal conditions so far :)
		
		There's room for improvement, for example:
		- probably it's better to do all bias/scale/normalize stuff inside the respective width-loop,
		  following the rule of thumb: do as much calculus as possible on date being in the cache.
		- scaling can be applied for free for certain input data types.
		- bias can be applied for free for certain input data types.
		- normalization is unnecessary for certain input data types / combination of scaling and bias.
		- increase the point-packet-size; row by row is simple but certainly not too efficient...
	
		Cheers,
		Daytona675x
	*/
	
	// note: only called from cgl_GLDrawPixels at a point where most parameters have been checked already.
	// raster-pos has been validated too.
	
	// determine pixel-zoom
	int dst_width=width;
	int dst_height=height;
	GLboolean zoom_neutral;
	float zoom_x,zoom_y; // remains uninitialized if neutral
	if(!context->pixel.pixel_state_neutral) {
		zoom_x=context->pixel.zoom_x;
		zoom_y=context->pixel.zoom_y;
		zoom_neutral=(zoom_x==1.0f && zoom_y==1.0f); // pixel_state_neutral is just an indicator that tells us to take a closer look at pixel.zoom
		if(!zoom_neutral) {
			dst_width*=zoom_x;
			dst_height*=zoom_y;
		}
	} else {
		zoom_neutral=1;
	}
	
	// compute display boundaries
	int bounds_x1;
	int bounds_y1;
	int bounds_x2;
	int bounds_y2;
	if(context->enable.ScissorTest) {
		bounds_x1=context->w3dScissor.left;
		bounds_y1=context->w3dScissor.top;
		bounds_x2=bounds_x1+context->w3dScissor.width;
		bounds_y2=bounds_y1+context->w3dScissor.height;
	} else {
		bounds_x1=0;
		bounds_y1=0;
		bounds_x2=context->w3dContext->width;
		bounds_y2=context->w3dContext->height;
	}
	
	// calculate output rectangle
	int x1=context->current.RasterPos.x;
	int y1=context->current.RasterPos.y-height;
	int x2=x1+dst_width;
	int y2=y1+dst_height;
	
	// if zoom isn't neutral the rectangle may be mirrored
	if(!zoom_neutral) {
		if(x2<x1) {
			int tmp_int=x1; x1=x2; x2=tmp_int;
		}
		if(y2<y1) {
			int tmp_int=y1; y1=y2; y2=tmp_int;
		}
	}
	
	// simple early exit conditions ( TODO maybe >= respectively <= instead? )
	if(x1>bounds_x2) return;
	if(y1>bounds_y2) return;
	if(x2<bounds_x1) return;
	if(y2<bounds_y1) return;
	
	// clip dst-rectangle
	int clip_x1=bounds_x1-x1;
	if(clip_x1<0) clip_x1=0; // no left clipping
	int clip_y1=bounds_y1-y1;
	if(clip_y1<0) clip_y1=0; // no top clipping
	
	int clip_x2=bounds_x2-x2;
	if(clip_x2>0) clip_x2=0; // no right clipping
	int clip_y2=bounds_y2-y2;
	if(clip_y2>0) clip_y2=0; // no right clipping
	
	x1+=clip_x1;
	y1+=clip_y1;
	y2+=clip_y2;

	// fetch eventual scale / bias
	float scale_depth=context->pixel.depth_scale;
	float bias_depth=context->pixel.depth_bias;
	
	// validate "type" parameter and set depth-pixel source size accordingly.
	// this check can be done pretty late because it's highly unlikely that GL_INVALID_ENUM happens (we support all meanigful types).
	// And by doing it this late we can already precalculate other stuff here too.
	// Notes:
	// - for integers the depth-scale can be applied for free.
	// - for signed integers the depth-bias can be applied for free too.
	// - for unsigned integers the 0..1 clamping only has to be done if ( scale_depth+bias_depth <0 or >1 ) or ( bias_depth <0 or > 1 )
	// - for floats clamping has to be done always (well, unless scale_depth = 0 and bias between 0 and 1, rare stuff like that)
	GLboolean needs_clamp=(type==GL_FLOAT) || (bias_depth<0.0f || bias_depth>1.0f) || ((scale_depth+bias_depth)<0.0f || (scale_depth+bias_depth)>1.0f);
	unsigned int src_pixel_size_in_bytes;
	switch(type) {
		case GL_BYTE:
			src_pixel_size_in_bytes=1;
			bias_depth-=scale_depth; // yes, looks funny, but I guess that's okay. For signed I'll have to subtract 1 at the end to correctly normalize them (see below).
			scale_depth/=127.5f; // note the .5f This is because we first bring the signed integer into an unsigned range.
			break;
		case GL_UNSIGNED_BYTE:
			src_pixel_size_in_bytes=1;
			scale_depth/=255.0f;
			break;
		case GL_SHORT:
			src_pixel_size_in_bytes=2;
			bias_depth-=scale_depth;
			scale_depth/=32767.5f;
			break;
		case GL_UNSIGNED_SHORT:
			src_pixel_size_in_bytes=2;
			scale_depth/=65535.0f;
			break;
		case GL_INT:
			src_pixel_size_in_bytes=4;
			bias_depth-=scale_depth;
			scale_depth/=2147483647.5f;
			break;
		case GL_UNSIGNED_INT:
			src_pixel_size_in_bytes=4;
			scale_depth/=4294967295.0f;
			break;
		case GL_FLOAT:
			src_pixel_size_in_bytes=4;
			break;
		default:
			GLFlagError(context,1,GL_INVALID_ENUM);
	}

	// prepare the source-pointer (float and ushort only for now)
	const unsigned int src_row_length_in_bytes=(MAX(context->pixel_store.unpack.row_length,width))*src_pixel_size_in_bytes;
	// by also applying the initial eventual skip_pixels and skip_rows settings
	const unsigned char *src_ptr=&((const unsigned char *)pixels)[(context->pixel_store.unpack.skip_pixels*src_pixel_size_in_bytes)+(src_row_length_in_bytes*context->pixel_store.unpack.skip_rows)];
	src_ptr+=clip_x1+clip_y1*src_row_length_in_bytes; // adjust src-pointer considering left and top clipping

	
	width=x2-x1;
	width+=clip_x2-clip_x1;
	
	// setup states
	const uint32 old_gouraud=IWarp3D->W3D_GetState(context->w3dContext,W3D_GOURAUD);
	if(old_gouraud!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_GOURAUD,W3D_DISABLE);
	const uint32 old_cullface=IWarp3D->W3D_GetState(context->w3dContext,W3D_CULLFACE);
	if(old_cullface!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE,W3D_DISABLE);
	const uint32 old_texmapping=IWarp3D->W3D_GetState(context->w3dContext,W3D_TEXMAPPING);
	if(old_texmapping!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING,W3D_DISABLE);
	const uint32 old_zbuffer=IWarp3D->W3D_GetState(context->w3dContext,W3D_ZBUFFER);
	if(old_zbuffer!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFER,W3D_ENABLE);
	const uint32 old_zbufferupdate=IWarp3D->W3D_GetState(context->w3dContext,W3D_ZBUFFERUPDATE);
	if(old_zbufferupdate!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFERUPDATE,W3D_ENABLE);
	const int32 old_zcomparemode=mapMGLZTestToW3D(context->depth_buffer.DepthFunc, W3D_ILLEGALINPUT);
	if(old_zcomparemode!=W3D_Z_ALWAYS) IWarp3D->W3D_SetZCompareMode(context->w3dContext,W3D_Z_ALWAYS);
	IWarp3D->W3D_SetColorMask(context->w3dContext,0,0,0,0);
	
	// create and draw z-geometry
	// line by line for now
	typedef struct {
		float coord[3];
	} ZVertex ;
	ZVertex *zvertex_buf=(ZVertex *)alloca(sizeof(ZVertex)*width);
	IWarp3D->W3D_InterleavedArray(context->w3dContext,zvertex_buf,sizeof(ZVertex),0,0);
	// x-coordinate is static
	for(int x=0;x<width;++x) {
		zvertex_buf[x].coord[0]=((float)(x+x1))+0.5f;
	}
	
	// lock
	if(context->LockMode==MGL_LOCK_SMART) smartlock_beginDraw(context->smartLock);
	else if(context->w3dLocked==GL_FALSE) IWarp3D->W3D_LockHardware(context->w3dContext);
	
	for(int y=y2;y>y1;) {
		--y;
		const float y_flt=((float)y)+0.5f;
		switch(type) {
			case GL_FLOAT:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=scale_depth*(((const float*)src_ptr)[x])+bias_depth;
				}
				break;
			case GL_UNSIGNED_SHORT:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=scale_depth*((float)(((const unsigned short*)src_ptr)[x]))+bias_depth;
				}
				break;
			case GL_SHORT:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=(scale_depth*((float)(32768+((int)(((const short*)src_ptr)[x])))))+bias_depth;
				}
				break;
			case GL_UNSIGNED_BYTE:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=scale_depth*((float)(((const unsigned char*)src_ptr)[x]))+bias_depth;
				}
				break;
			case GL_BYTE:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=(scale_depth*((float)(128+((int)(((const char*)src_ptr)[x])))))+bias_depth;
				}
				break;
			case GL_UNSIGNED_INT:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=scale_depth*((float)(((const unsigned int*)src_ptr)[x]))+bias_depth;
				}
				break;
			case GL_INT:
				for(int x=0;x<width;++x) {
					zvertex_buf[x].coord[1]=y_flt;
					zvertex_buf[x].coord[2]=(scale_depth*((float)(2147483648+((int)(((const int*)src_ptr)[x])))))+bias_depth;
				}
				break;
			default:
				break; // cannot happen
		}
		// clamp if necessary. Note: maybe better to do that inside the width-loops, more cache friendly
		if(needs_clamp) {
			// clamp
			for(int x=0;x<width;++x) {
				float v=zvertex_buf[x].coord[2];
				if(v<=0.0f) v=0.0f;
				else if(v>1.0f) v=1.0f;
				zvertex_buf[x].coord[2]=v;
			}
		}	
		src_ptr+=src_row_length_in_bytes;
		// draw
		IWarp3D->W3D_DrawArray(context->w3dContext,W3D_PRIMITIVE_POINTS,0,width);
	}
	
	// unlock
	if(context->LockMode==MGL_LOCK_SMART) smartlock_endDraw(context->smartLock);
	else if(context->w3dLocked==GL_FALSE) IWarp3D->W3D_UnLockHardware(context->w3dContext);

	// restore states
	IWarp3D->W3D_SetColorMask(context->w3dContext,context->color_buffer.WriteMaskRed,context->color_buffer.WriteMaskGreen,context->color_buffer.WriteMaskBlue,context->color_buffer.WriteMaskAlpha);
	if(old_zcomparemode!=W3D_Z_ALWAYS) IWarp3D->W3D_SetZCompareMode(context->w3dContext,old_zcomparemode);
	if(old_zbufferupdate!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFERUPDATE,old_zbufferupdate);
	if(old_zbuffer!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFER,old_zbuffer);
	if(old_texmapping!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING,old_texmapping);
	if(old_cullface!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_CULLFACE,old_cullface);
	if(old_gouraud!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_GOURAUD,old_gouraud);
}

void cgl_GLDrawPixels(struct GLContextIFace *Self, GLsizei width, GLsizei height,
		GLenum format, GLenum type, const GLvoid *pixels)
{
	GLcontext context = GET_INSTANCE(Self);
	DL_CHECK(DrawPixels(Self, width, height, format, type, pixels));

	PROFILE_ENTRY(FID_CGL_GL_DRAW_PIXELS);

	/* Check for valid raster pos. If invalid, just return */
	if (!context->current.RasterPosValid) {
		return;
	}

	/* Check input parameters */
	GLFlagError(context, pixels == NULL, GL_INVALID_VALUE);
	GLFlagError(context, width < 0, GL_INVALID_VALUE);
	GLFlagError(context, height < 0, GL_INVALID_VALUE);
	
	if(format== GL_DEPTH_COMPONENT) {
		GLDrawPixelsDepth(context, width, height, type, pixels);
		return;
	}

	GLuint src_pixel_stride;
	_glUnpackFn unpacker = MGLSelectUnpacker(format, type, &src_pixel_stride);
	GLFlagError(context, unpacker == NULL,  GL_INVALID_ENUM);

	uint32 maxTextureSize = IWarp3D->W3D_Query(context->w3dContext,	W3D_Q_MAXTEXWIDTH, 0); // should be cached
	GLsizei rw = next_pwr(width);
	GLsizei rh = next_pwr(height);
	uint32 texWidthReal  = MIN(maxTextureSize, rw);
	uint32 texHeightReal = MIN(maxTextureSize, rh);

	int32 internal = SelectInternalFormat(context, GL_RGBA);


	/* Select a transfer function */
	_glTransferFn transfer = MGLSelectTransfer(format, &context->pixel);

	/* Principle of operation:
	 * glDrawPixels transfers pixel from the source into the frame buffer at
	 * the current raster position. The image data is not going through the
	 * pixel pipeline, i.e. these aren't fragments that are produced.
	 *
	 * What we do is the following: We create texture(s) with the data, and draw
	 * one or more quads covering the destination area.
	 *
	 * We need to make sure we do not run into trouble concerning the upper
	 * bounds of the supported texture size.
	 */

	uint32 tilesWidth = width / texWidthReal;
	uint32 tilesHeight = height / texHeightReal;

	if ((texWidthReal * tilesWidth) < width) {
		tilesWidth++;
	}

	if ((texHeightReal * tilesHeight) < height) {
		tilesHeight++;
	}

	void *convertBuffer = alloca(4 * texWidthReal);

	if (!EnsureBitmapBackingStore(context, texWidthReal * texHeightReal * i2w3d[internal].w3dBpp)) {
		return;
	}

	int x, y, i;

	uint32 w3dFormat = W3D_A8R8G8B8;
	if (!(context->textureSupport[W3D_R8G8B8A8] & W3D_TEXFMT_FAST) || transfer) {
		w3dFormat = i2w3d[internal].w3dFormat;
	}
	uint32 error;
	W3D_Texture *tex = IWarp3D->W3D_AllocTexObjTags(
		context->w3dContext, &error,
		W3D_ATO_IMAGE,  context->CurrentBitmapBackingStore,
		W3D_ATO_FORMAT, w3dFormat,
		W3D_ATO_WIDTH,  texWidthReal,
		W3D_ATO_HEIGHT, texHeightReal,
		TAG_DONE
	);

	if (!tex || error != W3D_SUCCESS) {
		dprintf("Couldn't allocate texture, error %d\n", error);
		return;
	}

	/* Set texture parameters */
	IWarp3D->W3D_SetFilter(context->w3dContext, tex, W3D_NEAREST, W3D_NEAREST);
	IWarp3D->W3D_SetTexEnv(context->w3dContext, tex, W3D_MODULATE, NULL);
	IWarp3D->W3D_SetWrapMode(context->w3dContext, tex, W3D_CLAMP, W3D_CLAMP, 0);

	uint32 oldCullState = IWarp3D->W3D_GetState(context->w3dContext, W3D_CULLFACE);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE, W3D_DISABLE);

	uint32 oldTexmappingState = IWarp3D->W3D_GetState(context->w3dContext, W3D_TEXMAPPING);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING, W3D_ENABLE);

	uint32 oldGouraudState = IWarp3D->W3D_GetState(context->w3dContext, W3D_GOURAUD);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_GOURAUD, W3D_DISABLE);

	static W3D_Color color;
	color.r = 1.0;
	color.g = 1.0;
	color.b = 1.0;
	color.a = 1.0;
	IWarp3D->W3D_SetCurrentColor(context->w3dContext, &color);

	IWarp3D->W3D_SetTextureBlendTags(
		context->w3dContext,
		W3D_BLEND_STAGE, 0,
		W3D_ENV_MODE,    W3D_REPLACE,
		W3D_BLEND_STAGE, 1,
		W3D_ENV_MODE,    W3D_OFF,
		TAG_DONE
	);

	if (context->LockMode == MGL_LOCK_SMART) {
		smartlock_beginDraw(context->smartLock);
	}
	else if (context->w3dLocked == GL_FALSE) {
		IWarp3D->W3D_LockHardware(context->w3dContext);
	}

	GLint baseX = context->current.RasterPos.x;
	GLint baseY = context->current.RasterPos.y;

	uint32 lineWidth = (MAX(context->pixel_store.unpack.row_length, width)) * src_pixel_stride;

	/* Setup src image */
	GLimage_info src_image;
	src_image.data = (GLubyte *)pixels;
	src_image.stride = lineWidth;
	src_image.format = format;
	src_image.type = type;
	src_image.pixel_stride = src_pixel_stride;

	for (x = 0; x < tilesWidth; x++) {
		for (y = 0; y < tilesHeight; y++) {
			/* Convert the input data to a texture */
			src_image.current = (uint8 *)pixels
				+ (lineWidth * context->pixel_store.unpack.skip_rows)
				+ (lineWidth * (y * texWidthReal))
				+ (context->pixel_store.unpack.skip_pixels * src_pixel_stride)
				+ (x * texWidthReal * src_pixel_stride);
			uint8 *baseDst = context->CurrentBitmapBackingStore;

			/* If the remaining pixels do not cover the current tile, clear it to
			 * get black pixels
			 */
			BOOL doClear = FALSE;

			uint32 tileWidth = texWidthReal;
			if ((texWidthReal * x + texWidthReal) > width) {
				tileWidth = width - (texWidthReal * x);
				doClear = TRUE;
			}

			uint32 tileHeight = texHeightReal;
			if ((texHeightReal * y + texHeightReal) > height) {
				tileHeight = height - (texHeightReal * y);
				doClear = TRUE;
			}

			if (doClear) {
				IUtility->ClearMem(baseDst, texWidthReal * texHeightReal * 4);
			}

			/* Actual conversion */
			for (i = 0; i < tileHeight; i++) {
				if (w3dFormat == W3D_A8R8G8B8 && !transfer) {
					unpacker(context, tileWidth, &src_image, &context->pixel_store.unpack, baseDst); //convertBuffer);
				}
				else {
					unpacker(context, tileWidth, &src_image, &context->pixel_store.unpack, convertBuffer);
					if (transfer) {
						transfer(context, width, convertBuffer, &context->pixel);
					}
					i2w3d[internal].write(PROFILE_PARAM convertBuffer, baseDst, tileWidth, 4);
				}
				src_image.current += lineWidth;
				baseDst += (4 * texWidthReal);
			}

			/* Draw a quad with the converted texture */
			DPVertex quad[4];

			/* Lower left corner */
			quad[0].x = baseX + x * texWidthReal * context->pixel.zoom_x;
			quad[0].y = baseY - y * texHeightReal * context->pixel.zoom_y;
			quad[0].z = context->current.RasterDistance;
			quad[0].w = 1.0 - context->current.RasterDistance;
			quad[0].u = 0.0;
			quad[0].v = 0.0;

			/* Lower right corner */
			quad[1].x = baseX + x * texWidthReal * context->pixel.zoom_x + tileWidth* context->pixel.zoom_x;
			quad[1].y = baseY - y * texHeightReal * context->pixel.zoom_y;
			quad[1].z = context->current.RasterDistance;
			quad[1].w = 1.0 - context->current.RasterDistance;
			quad[1].u = (float)tileWidth;
			quad[1].v = 0.0;

			/* Upper right corner */
			quad[2].x = baseX + x * texWidthReal * context->pixel.zoom_x + tileWidth * context->pixel.zoom_x;
			quad[2].y = baseY - y * texHeightReal * context->pixel.zoom_y - tileHeight * context->pixel.zoom_y;
			quad[2].z = context->current.RasterDistance;
			quad[2].w = 1.0 - context->current.RasterDistance;
			quad[2].u = (float)tileWidth;
			quad[2].v = (float)tileHeight;

			/* Upper left corner */
			quad[3].x = baseX + x * texWidthReal * context->pixel.zoom_x;
			quad[3].y = baseY - y * texHeightReal * context->pixel.zoom_y - tileHeight * context->pixel.zoom_y;
			quad[3].z = context->current.RasterDistance;
			quad[3].w = 1.0 - context->current.RasterDistance;
			quad[3].u = 0.0;
			quad[3].v = (float)tileHeight;

			IWarp3D->W3D_UpdateTexImage(
				context->w3dContext, tex,
				context->CurrentBitmapBackingStore, 0, NULL
			);

			IWarp3D->W3D_BindTexture(context->w3dContext, 0, tex);

			IWarp3D->W3D_InterleavedArray(
				context->w3dContext,
				quad, sizeof(DPVertex), DPVERTEX_FORMAT, 0
			);

			error = IWarp3D->W3D_DrawArray(
				context->w3dContext, W3D_PRIMITIVE_TRIFAN,
				0, 4
			);
		}
	}

	color.r = CLAMPF(context->current.CurrentColor.r);
	color.g = CLAMPF(context->current.CurrentColor.g);
	color.b = CLAMPF(context->current.CurrentColor.b);
	color.a = CLAMPF(context->current.CurrentColor.a);
	IWarp3D->W3D_SetCurrentColor(context->w3dContext, &color);

	if (context->LockMode == MGL_LOCK_SMART) {
		smartlock_endDraw(context->smartLock);
	}
	else if (context->w3dLocked == GL_FALSE) {
		IWarp3D->W3D_UnLockHardware(context->w3dContext);
	}
	// Might not need this, but it doesn't hurt.
	IWarp3D->W3D_InterleavedArray(
		context->w3dContext,
		context->VertexBuffer, sizeof (MGLVertex), context->VertexFormat,
		W3D_TEXCOORD_NORMALIZED
	);

	IWarp3D->W3D_FreeTexObj(context->w3dContext, tex);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE, oldCullState);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING, oldTexmappingState);
	IWarp3D->W3D_SetState(context->w3dContext, W3D_GOURAUD, oldGouraudState);

	RebindTextures(context);
	tex_EstablishEnvCombine(context);

	context->fbDirty = GL_TRUE;

	PROFILE_EXIT(FID_CGL_GL_DRAW_PIXELS)
}

void cgl_GLPixelZoom(struct GLContextIFace *Self, GLfloat zoomX, GLfloat zoomY)
{
	GLcontext context = GET_INSTANCE(Self);

	DL_CHECK(PixelZoom(Self, zoomX, zoomY));

	context->pixel.zoom_x = zoomX;
	context->pixel.zoom_y = zoomY;
	context->pixel.pixel_state_neutral = isPixelStateNeutral(context);
}