/* * $Id:$ * * $Date$ * $Revision$ * * (C) 1999-2005 The MiniGL team * All rights reserved * * This file is part of the MiniGL library project * See the file Licence.txt for more details * */ #include "displaylists.h" #include "sysinc.h" #include #include #include #include #include "mgl/gl.h" #include "mgl/mgltypes.h" #include #include "smartlock.h" #include #include "util.h" #include "mgl_profileitems.h" extern struct InternalToW3D i2w3d[]; BOOL EnsureBitmapBackingStore(GLcontext context, uint32 dataSize); extern GLsizei next_pwr(GLsizei x); extern void m_CombineMatrices(GLcontext context); extern void RebindTextures(GLcontext context); extern void tex_EstablishEnvCombine(GLcontext context); extern int32 SelectInternalFormat(GLcontext context, GLenum internalformat); extern float CLAMPF(float a); _glTransferFn MGLSelectTransfer(GLenum format, struct GLpixel_state *pixelState); extern _glUnpackFn MGLSelectUnpacker(GLenum format, GLenum type, GLuint *pixelStride); extern GLboolean isPixelStateNeutral(GLcontext context); extern int32 mapMGLZTestToW3D(int32 mglZ, int32 defZ); typedef struct { float x,y,z; float u,v,w; } DPVertex; #define DPVERTEX_FORMAT (W3D_VFORMAT_TCOORD_0) void GLDrawPixelsDepth(GLcontext context, GLsizei width, GLsizei height, GLenum type, const GLvoid *pixels) { /* This is a pretty crude implementation of glDrawPixels(GL_DEPTH_COMPONENT). Hubert Maier of ResidualVM told me about a drawing bug with "Grim Fandango" which I found was because of MiniGL's lack of support for this functionality here. First I did the obvious, nameley to base everything on W3D_WriteZSpan. Problems with that approach: 1. W3D_WriteZSpan is not implemented in RadeonHD SI drivers at all. 2. W3D_WriteZSpan is super-slow on R100 / R200. So all in all W3D_WriteZSpan is pretty useless on the most common AOS4 systems right now. Therefore I tried the next thing that came to my mind: draw lots of 3D-points without updating the color-buffer. As it turns out this approach, although pretty brute force, runs rather fast even on Rx00 systems. A typical Grim Fandango scene requires this function to render at least 307200 points (most often even more, about 420000 or so) and the framerate is still rather smooth. Funny enough: I tried to be smart and do some sort of RLE compression for lines of depth-pixels with the same value. As it turned out this slows things down on the R200 extremely. Note: So far this has only been tested with ResidualVM Grim Fandango. There it seems to work flawlessly. But this game only uses type=GL_UNSIGNED_SHORT, depth-bias = 0, depth-scale = 1, no pixel-zoom, no scissor, input coordinates are always valid, always 640x480 window-size. So this whole thing here has only been "tested" under optimal conditions so far :) There's room for improvement, for example: - probably it's better to do all bias/scale/normalize stuff inside the respective width-loop, following the rule of thumb: do as much calculus as possible on date being in the cache. - scaling can be applied for free for certain input data types. - bias can be applied for free for certain input data types. - normalization is unnecessary for certain input data types / combination of scaling and bias. - increase the point-packet-size; row by row is simple but certainly not too efficient... Cheers, Daytona675x */ // note: only called from cgl_GLDrawPixels at a point where most parameters have been checked already. // raster-pos has been validated too. // determine pixel-zoom int dst_width=width; int dst_height=height; GLboolean zoom_neutral; float zoom_x,zoom_y; // remains uninitialized if neutral if(!context->pixel.pixel_state_neutral) { zoom_x=context->pixel.zoom_x; zoom_y=context->pixel.zoom_y; zoom_neutral=(zoom_x==1.0f && zoom_y==1.0f); // pixel_state_neutral is just an indicator that tells us to take a closer look at pixel.zoom if(!zoom_neutral) { dst_width*=zoom_x; dst_height*=zoom_y; } } else { zoom_neutral=1; } // compute display boundaries int bounds_x1; int bounds_y1; int bounds_x2; int bounds_y2; if(context->enable.ScissorTest) { bounds_x1=context->w3dScissor.left; bounds_y1=context->w3dScissor.top; bounds_x2=bounds_x1+context->w3dScissor.width; bounds_y2=bounds_y1+context->w3dScissor.height; } else { bounds_x1=0; bounds_y1=0; bounds_x2=context->w3dContext->width; bounds_y2=context->w3dContext->height; } // calculate output rectangle int x1=context->current.RasterPos.x; int y1=context->current.RasterPos.y-height; int x2=x1+dst_width; int y2=y1+dst_height; // if zoom isn't neutral the rectangle may be mirrored if(!zoom_neutral) { if(x2= respectively <= instead? ) if(x1>bounds_x2) return; if(y1>bounds_y2) return; if(x20) clip_x2=0; // no right clipping int clip_y2=bounds_y2-y2; if(clip_y2>0) clip_y2=0; // no right clipping x1+=clip_x1; y1+=clip_y1; y2+=clip_y2; // fetch eventual scale / bias float scale_depth=context->pixel.depth_scale; float bias_depth=context->pixel.depth_bias; // validate "type" parameter and set depth-pixel source size accordingly. // this check can be done pretty late because it's highly unlikely that GL_INVALID_ENUM happens (we support all meanigful types). // And by doing it this late we can already precalculate other stuff here too. // Notes: // - for integers the depth-scale can be applied for free. // - for signed integers the depth-bias can be applied for free too. // - for unsigned integers the 0..1 clamping only has to be done if ( scale_depth+bias_depth <0 or >1 ) or ( bias_depth <0 or > 1 ) // - for floats clamping has to be done always (well, unless scale_depth = 0 and bias between 0 and 1, rare stuff like that) GLboolean needs_clamp=(type==GL_FLOAT) || (bias_depth<0.0f || bias_depth>1.0f) || ((scale_depth+bias_depth)<0.0f || (scale_depth+bias_depth)>1.0f); unsigned int src_pixel_size_in_bytes; switch(type) { case GL_BYTE: src_pixel_size_in_bytes=1; bias_depth-=scale_depth; // yes, looks funny, but I guess that's okay. For signed I'll have to subtract 1 at the end to correctly normalize them (see below). scale_depth/=127.5f; // note the .5f This is because we first bring the signed integer into an unsigned range. break; case GL_UNSIGNED_BYTE: src_pixel_size_in_bytes=1; scale_depth/=255.0f; break; case GL_SHORT: src_pixel_size_in_bytes=2; bias_depth-=scale_depth; scale_depth/=32767.5f; break; case GL_UNSIGNED_SHORT: src_pixel_size_in_bytes=2; scale_depth/=65535.0f; break; case GL_INT: src_pixel_size_in_bytes=4; bias_depth-=scale_depth; scale_depth/=2147483647.5f; break; case GL_UNSIGNED_INT: src_pixel_size_in_bytes=4; scale_depth/=4294967295.0f; break; case GL_FLOAT: src_pixel_size_in_bytes=4; break; default: GLFlagError(context,1,GL_INVALID_ENUM); } // prepare the source-pointer (float and ushort only for now) const unsigned int src_row_length_in_bytes=(MAX(context->pixel_store.unpack.row_length,width))*src_pixel_size_in_bytes; // by also applying the initial eventual skip_pixels and skip_rows settings const unsigned char *src_ptr=&((const unsigned char *)pixels)[(context->pixel_store.unpack.skip_pixels*src_pixel_size_in_bytes)+(src_row_length_in_bytes*context->pixel_store.unpack.skip_rows)]; src_ptr+=clip_x1+clip_y1*src_row_length_in_bytes; // adjust src-pointer considering left and top clipping width=x2-x1; width+=clip_x2-clip_x1; // setup states const uint32 old_gouraud=IWarp3D->W3D_GetState(context->w3dContext,W3D_GOURAUD); if(old_gouraud!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_GOURAUD,W3D_DISABLE); const uint32 old_cullface=IWarp3D->W3D_GetState(context->w3dContext,W3D_CULLFACE); if(old_cullface!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE,W3D_DISABLE); const uint32 old_texmapping=IWarp3D->W3D_GetState(context->w3dContext,W3D_TEXMAPPING); if(old_texmapping!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING,W3D_DISABLE); const uint32 old_zbuffer=IWarp3D->W3D_GetState(context->w3dContext,W3D_ZBUFFER); if(old_zbuffer!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFER,W3D_ENABLE); const uint32 old_zbufferupdate=IWarp3D->W3D_GetState(context->w3dContext,W3D_ZBUFFERUPDATE); if(old_zbufferupdate!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFERUPDATE,W3D_ENABLE); const int32 old_zcomparemode=mapMGLZTestToW3D(context->depth_buffer.DepthFunc, W3D_ILLEGALINPUT); if(old_zcomparemode!=W3D_Z_ALWAYS) IWarp3D->W3D_SetZCompareMode(context->w3dContext,W3D_Z_ALWAYS); IWarp3D->W3D_SetColorMask(context->w3dContext,0,0,0,0); // create and draw z-geometry // line by line for now typedef struct { float coord[3]; } ZVertex ; ZVertex *zvertex_buf=(ZVertex *)alloca(sizeof(ZVertex)*width); IWarp3D->W3D_InterleavedArray(context->w3dContext,zvertex_buf,sizeof(ZVertex),0,0); // x-coordinate is static for(int x=0;xLockMode==MGL_LOCK_SMART) smartlock_beginDraw(context->smartLock); else if(context->w3dLocked==GL_FALSE) IWarp3D->W3D_LockHardware(context->w3dContext); for(int y=y2;y>y1;) { --y; const float y_flt=((float)y)+0.5f; switch(type) { case GL_FLOAT: for(int x=0;x1.0f) v=1.0f; zvertex_buf[x].coord[2]=v; } } src_ptr+=src_row_length_in_bytes; // draw IWarp3D->W3D_DrawArray(context->w3dContext,W3D_PRIMITIVE_POINTS,0,width); } // unlock if(context->LockMode==MGL_LOCK_SMART) smartlock_endDraw(context->smartLock); else if(context->w3dLocked==GL_FALSE) IWarp3D->W3D_UnLockHardware(context->w3dContext); // restore states IWarp3D->W3D_SetColorMask(context->w3dContext,context->color_buffer.WriteMaskRed,context->color_buffer.WriteMaskGreen,context->color_buffer.WriteMaskBlue,context->color_buffer.WriteMaskAlpha); if(old_zcomparemode!=W3D_Z_ALWAYS) IWarp3D->W3D_SetZCompareMode(context->w3dContext,old_zcomparemode); if(old_zbufferupdate!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFERUPDATE,old_zbufferupdate); if(old_zbuffer!=W3D_ENABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_ZBUFFER,old_zbuffer); if(old_texmapping!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING,old_texmapping); if(old_cullface!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_CULLFACE,old_cullface); if(old_gouraud!=W3D_DISABLED) IWarp3D->W3D_SetState(context->w3dContext,W3D_GOURAUD,old_gouraud); } void cgl_GLDrawPixels(struct GLContextIFace *Self, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels) { GLcontext context = GET_INSTANCE(Self); DL_CHECK(DrawPixels(Self, width, height, format, type, pixels)); PROFILE_ENTRY(FID_CGL_GL_DRAW_PIXELS); /* Check for valid raster pos. If invalid, just return */ if (!context->current.RasterPosValid) { return; } /* Check input parameters */ GLFlagError(context, pixels == NULL, GL_INVALID_VALUE); GLFlagError(context, width < 0, GL_INVALID_VALUE); GLFlagError(context, height < 0, GL_INVALID_VALUE); if(format== GL_DEPTH_COMPONENT) { GLDrawPixelsDepth(context, width, height, type, pixels); return; } GLuint src_pixel_stride; _glUnpackFn unpacker = MGLSelectUnpacker(format, type, &src_pixel_stride); GLFlagError(context, unpacker == NULL, GL_INVALID_ENUM); uint32 maxTextureSize = IWarp3D->W3D_Query(context->w3dContext, W3D_Q_MAXTEXWIDTH, 0); // should be cached GLsizei rw = next_pwr(width); GLsizei rh = next_pwr(height); uint32 texWidthReal = MIN(maxTextureSize, rw); uint32 texHeightReal = MIN(maxTextureSize, rh); int32 internal = SelectInternalFormat(context, GL_RGBA); /* Select a transfer function */ _glTransferFn transfer = MGLSelectTransfer(format, &context->pixel); /* Principle of operation: * glDrawPixels transfers pixel from the source into the frame buffer at * the current raster position. The image data is not going through the * pixel pipeline, i.e. these aren't fragments that are produced. * * What we do is the following: We create texture(s) with the data, and draw * one or more quads covering the destination area. * * We need to make sure we do not run into trouble concerning the upper * bounds of the supported texture size. */ uint32 tilesWidth = width / texWidthReal; uint32 tilesHeight = height / texHeightReal; if ((texWidthReal * tilesWidth) < width) { tilesWidth++; } if ((texHeightReal * tilesHeight) < height) { tilesHeight++; } void *convertBuffer = alloca(4 * texWidthReal); if (!EnsureBitmapBackingStore(context, texWidthReal * texHeightReal * i2w3d[internal].w3dBpp)) { return; } int x, y, i; uint32 w3dFormat = W3D_A8R8G8B8; if (!(context->textureSupport[W3D_R8G8B8A8] & W3D_TEXFMT_FAST) || transfer) { w3dFormat = i2w3d[internal].w3dFormat; } uint32 error; W3D_Texture *tex = IWarp3D->W3D_AllocTexObjTags( context->w3dContext, &error, W3D_ATO_IMAGE, context->CurrentBitmapBackingStore, W3D_ATO_FORMAT, w3dFormat, W3D_ATO_WIDTH, texWidthReal, W3D_ATO_HEIGHT, texHeightReal, TAG_DONE ); if (!tex || error != W3D_SUCCESS) { dprintf("Couldn't allocate texture, error %d\n", error); return; } /* Set texture parameters */ IWarp3D->W3D_SetFilter(context->w3dContext, tex, W3D_NEAREST, W3D_NEAREST); IWarp3D->W3D_SetTexEnv(context->w3dContext, tex, W3D_MODULATE, NULL); IWarp3D->W3D_SetWrapMode(context->w3dContext, tex, W3D_CLAMP, W3D_CLAMP, 0); uint32 oldCullState = IWarp3D->W3D_GetState(context->w3dContext, W3D_CULLFACE); IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE, W3D_DISABLE); uint32 oldTexmappingState = IWarp3D->W3D_GetState(context->w3dContext, W3D_TEXMAPPING); IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING, W3D_ENABLE); uint32 oldGouraudState = IWarp3D->W3D_GetState(context->w3dContext, W3D_GOURAUD); IWarp3D->W3D_SetState(context->w3dContext, W3D_GOURAUD, W3D_DISABLE); static W3D_Color color; color.r = 1.0; color.g = 1.0; color.b = 1.0; color.a = 1.0; IWarp3D->W3D_SetCurrentColor(context->w3dContext, &color); IWarp3D->W3D_SetTextureBlendTags( context->w3dContext, W3D_BLEND_STAGE, 0, W3D_ENV_MODE, W3D_REPLACE, W3D_BLEND_STAGE, 1, W3D_ENV_MODE, W3D_OFF, TAG_DONE ); if (context->LockMode == MGL_LOCK_SMART) { smartlock_beginDraw(context->smartLock); } else if (context->w3dLocked == GL_FALSE) { IWarp3D->W3D_LockHardware(context->w3dContext); } GLint baseX = context->current.RasterPos.x; GLint baseY = context->current.RasterPos.y; uint32 lineWidth = (MAX(context->pixel_store.unpack.row_length, width)) * src_pixel_stride; /* Setup src image */ GLimage_info src_image; src_image.data = (GLubyte *)pixels; src_image.stride = lineWidth; src_image.format = format; src_image.type = type; src_image.pixel_stride = src_pixel_stride; for (x = 0; x < tilesWidth; x++) { for (y = 0; y < tilesHeight; y++) { /* Convert the input data to a texture */ src_image.current = (uint8 *)pixels + (lineWidth * context->pixel_store.unpack.skip_rows) + (lineWidth * (y * texWidthReal)) + (context->pixel_store.unpack.skip_pixels * src_pixel_stride) + (x * texWidthReal * src_pixel_stride); uint8 *baseDst = context->CurrentBitmapBackingStore; /* If the remaining pixels do not cover the current tile, clear it to * get black pixels */ BOOL doClear = FALSE; uint32 tileWidth = texWidthReal; if ((texWidthReal * x + texWidthReal) > width) { tileWidth = width - (texWidthReal * x); doClear = TRUE; } uint32 tileHeight = texHeightReal; if ((texHeightReal * y + texHeightReal) > height) { tileHeight = height - (texHeightReal * y); doClear = TRUE; } if (doClear) { IUtility->ClearMem(baseDst, texWidthReal * texHeightReal * 4); } /* Actual conversion */ for (i = 0; i < tileHeight; i++) { if (w3dFormat == W3D_A8R8G8B8 && !transfer) { unpacker(context, tileWidth, &src_image, &context->pixel_store.unpack, baseDst); //convertBuffer); } else { unpacker(context, tileWidth, &src_image, &context->pixel_store.unpack, convertBuffer); if (transfer) { transfer(context, width, convertBuffer, &context->pixel); } i2w3d[internal].write(PROFILE_PARAM convertBuffer, baseDst, tileWidth, 4); } src_image.current += lineWidth; baseDst += (4 * texWidthReal); } /* Draw a quad with the converted texture */ DPVertex quad[4]; /* Lower left corner */ quad[0].x = baseX + x * texWidthReal * context->pixel.zoom_x; quad[0].y = baseY - y * texHeightReal * context->pixel.zoom_y; quad[0].z = context->current.RasterDistance; quad[0].w = 1.0 - context->current.RasterDistance; quad[0].u = 0.0; quad[0].v = 0.0; /* Lower right corner */ quad[1].x = baseX + x * texWidthReal * context->pixel.zoom_x + tileWidth* context->pixel.zoom_x; quad[1].y = baseY - y * texHeightReal * context->pixel.zoom_y; quad[1].z = context->current.RasterDistance; quad[1].w = 1.0 - context->current.RasterDistance; quad[1].u = (float)tileWidth; quad[1].v = 0.0; /* Upper right corner */ quad[2].x = baseX + x * texWidthReal * context->pixel.zoom_x + tileWidth * context->pixel.zoom_x; quad[2].y = baseY - y * texHeightReal * context->pixel.zoom_y - tileHeight * context->pixel.zoom_y; quad[2].z = context->current.RasterDistance; quad[2].w = 1.0 - context->current.RasterDistance; quad[2].u = (float)tileWidth; quad[2].v = (float)tileHeight; /* Upper left corner */ quad[3].x = baseX + x * texWidthReal * context->pixel.zoom_x; quad[3].y = baseY - y * texHeightReal * context->pixel.zoom_y - tileHeight * context->pixel.zoom_y; quad[3].z = context->current.RasterDistance; quad[3].w = 1.0 - context->current.RasterDistance; quad[3].u = 0.0; quad[3].v = (float)tileHeight; IWarp3D->W3D_UpdateTexImage( context->w3dContext, tex, context->CurrentBitmapBackingStore, 0, NULL ); IWarp3D->W3D_BindTexture(context->w3dContext, 0, tex); IWarp3D->W3D_InterleavedArray( context->w3dContext, quad, sizeof(DPVertex), DPVERTEX_FORMAT, 0 ); error = IWarp3D->W3D_DrawArray( context->w3dContext, W3D_PRIMITIVE_TRIFAN, 0, 4 ); } } color.r = CLAMPF(context->current.CurrentColor.r); color.g = CLAMPF(context->current.CurrentColor.g); color.b = CLAMPF(context->current.CurrentColor.b); color.a = CLAMPF(context->current.CurrentColor.a); IWarp3D->W3D_SetCurrentColor(context->w3dContext, &color); if (context->LockMode == MGL_LOCK_SMART) { smartlock_endDraw(context->smartLock); } else if (context->w3dLocked == GL_FALSE) { IWarp3D->W3D_UnLockHardware(context->w3dContext); } // Might not need this, but it doesn't hurt. IWarp3D->W3D_InterleavedArray( context->w3dContext, context->VertexBuffer, sizeof (MGLVertex), context->VertexFormat, W3D_TEXCOORD_NORMALIZED ); IWarp3D->W3D_FreeTexObj(context->w3dContext, tex); IWarp3D->W3D_SetState(context->w3dContext, W3D_CULLFACE, oldCullState); IWarp3D->W3D_SetState(context->w3dContext, W3D_TEXMAPPING, oldTexmappingState); IWarp3D->W3D_SetState(context->w3dContext, W3D_GOURAUD, oldGouraudState); RebindTextures(context); tex_EstablishEnvCombine(context); context->fbDirty = GL_TRUE; PROFILE_EXIT(FID_CGL_GL_DRAW_PIXELS) } void cgl_GLPixelZoom(struct GLContextIFace *Self, GLfloat zoomX, GLfloat zoomY) { GLcontext context = GET_INSTANCE(Self); DL_CHECK(PixelZoom(Self, zoomX, zoomY)); context->pixel.zoom_x = zoomX; context->pixel.zoom_y = zoomY; context->pixel.pixel_state_neutral = isPixelStateNeutral(context); }