diff --git a/console/rgl/src/ps3/include/rgl-inline.h b/console/rgl/src/ps3/include/rgl-inline.h index 151f2dda99..523f766106 100644 --- a/console/rgl/src/ps3/include/rgl-inline.h +++ b/console/rgl/src/ps3/include/rgl-inline.h @@ -85,30 +85,25 @@ static inline void rglGcmFifoGlViewport( GLint x, GLint y, GLsizei width, GLsize clipY0 = y; clipY1 = y + height; } + if ( clipX0 < 0 ) - { clipX0 = 0; - } if ( clipY0 < 0 ) - { clipY0 = 0; - } + if ( clipX1 >= RGLGCM_MAX_RT_DIMENSION ) - { clipX1 = RGLGCM_MAX_RT_DIMENSION; - } + if ( clipY1 >= RGLGCM_MAX_RT_DIMENSION ) - { clipY1 = RGLGCM_MAX_RT_DIMENSION; - } + if (( clipX1 <= clipX0 ) || ( clipY1 <= clipY0 ) ) - { clipX0 = clipY0 = clipX1 = clipY1 = 0; - } // update viewport info vp->xScale = width * 0.5f; vp->xCenter = ( GLfloat )( x + vp->xScale + RGLGCM_SUBPIXEL_ADJUST ); + if ( rt->yInverted ) { vp->yScale = height * -0.5f; @@ -226,7 +221,7 @@ static inline void rglGcmFifoGlDrawArrays( rglGcmEnum mode, GLint first, GLsizei static inline GLuint rglGcmMapMinTextureFilter( GLenum filter ) { - switch ( filter ) + switch (filter) { case GL_NEAREST: return CELL_GCM_TEXTURE_NEAREST; @@ -268,44 +263,6 @@ static inline GLuint rglGcmMapMagTextureFilter( GLenum filter ) return filter; } -static inline GLuint rglGcmMapAniso( GLuint maxAniso ) -{ - - if ( maxAniso >= 16 ) - return CELL_GCM_TEXTURE_MAX_ANISO_16; - if ( maxAniso == 1 ) - return CELL_GCM_TEXTURE_MAX_ANISO_1; - - switch ( maxAniso / 2 ) - { - case 1: - return CELL_GCM_TEXTURE_MAX_ANISO_2; - break; - case 2: - return CELL_GCM_TEXTURE_MAX_ANISO_4; - break; - case 3: - return CELL_GCM_TEXTURE_MAX_ANISO_6; - break; - case 4: - return CELL_GCM_TEXTURE_MAX_ANISO_8; - break; - case 5: - return CELL_GCM_TEXTURE_MAX_ANISO_10; - break; - case 6: - return CELL_GCM_TEXTURE_MAX_ANISO_12; - break; - case 7: - return CELL_GCM_TEXTURE_MAX_ANISO_16; - break; - default: - return 0; - break; - } - return 0; -} - static inline GLuint rglGcmMapWrapMode( GLuint mode ) { switch ( mode ) @@ -568,69 +525,6 @@ static inline void rglGcmFifoGlInvalidateTextureCache( void ) GCM_FUNC( cellGcmSetInvalidateTextureCache, CELL_GCM_INVALIDATE_TEXTURE ); } -/* writes the supplied new semaphore value once the gpu has completed all - ** currently pending work. - ** - ** note: - ** - we do not enforce pairing of Acquire/Release, so you can (ab)use it to - ** write synchronized signal values... - */ - -static inline void rglGcmFifoGlReleaseSemaphore( rglGcmEnum target, GLuint semaphoreId, GLuint newSemphoreValue ) -{ - rglGcmSemaphoreMemory *semaphores = rglGcmState_i.semaphores; - - switch ( target ) - { - case RGLGCM_SEMAPHORE_USING_GPU: - // let the backend(rop/fb) write the release value - // -- guarantees all reads/writes have completed - GCM_FUNC( cellGcmSetWriteBackEndLabel, semaphoreId, newSemphoreValue ); - break; - case RGLGCM_SEMAPHORE_USING_GPU_NO_WRITE_FLUSH: - // write the semaphore value once host/vb/ib/tex are no longer referencing - // any data prior to the method. - // -- does _NOT_ guarantee that read/writes on the render target surfaces - // have completed (iow: cpu read on the color buffer will be undefined) - - GCM_FUNC( cellGcmSetWriteTextureLabel, semaphoreId, newSemphoreValue ); - break; - case RGLGCM_SEMAPHORE_USING_CPU: - semaphores->userSemaphores[semaphoreId].val = newSemphoreValue; - break; - default: - break; - } -} - -/* lets the gpu/cpu wait until the specific semaphore is equal to the requested - ** semaphore value. - ** - ** note: - ** - we do not enforce pairing of Acquire/Release, so you can (ab)use it to - ** - What about aquire timeouts (after a few seconds) ? - */ -void static inline rglGcmFifoGlAcquireSemaphore( rglGcmEnum target, GLuint semaphoreId, GLuint reqSemphoreValue ) -{ - rglGcmSemaphoreMemory *semaphores = rglGcmState_i.semaphores; - - // pick location - switch ( target ) - { - case RGLGCM_SEMAPHORE_USING_GPU: - // let the frontend aquire the semaphore... - GCM_FUNC( cellGcmSetWaitLabel, semaphoreId, reqSemphoreValue ); - break; - case RGLGCM_SEMAPHORE_USING_CPU: - // lame polling for now... - for ( ;semaphores->userSemaphores[semaphoreId].val != reqSemphoreValue; ) - sys_timer_usleep(10); - break; - default: - break; - } -} - // Fast conversion for values between 0.0 and 65535.0 GLuint inline static RGLGCM_QUICK_FLOAT2UINT( const GLfloat f ) { @@ -902,7 +796,7 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl GCM_FUNC( cellGcmSetBlendEquation, mode, modeAlpha ); } - void static inline rglGcmFifoGlVertexAttribPointer +static inline void rglGcmFifoGlVertexAttribPointer ( GLuint index, GLint size, @@ -939,13 +833,9 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl { case RGLGCM_UNSIGNED_BYTE: if (normalized) - { gcmType = CELL_GCM_VERTEX_UB; - } else - { gcmType = CELL_GCM_VERTEX_UB256; - } break; case RGLGCM_SHORT: @@ -969,12 +859,7 @@ static inline void rglGcmFifoGlBlendEquation( rglGcmEnum mode, rglGcmEnum modeAl break; } - uint8_t location = CELL_GCM_LOCATION_LOCAL; - - if ( isMain ) - location = CELL_GCM_LOCATION_MAIN; - - GCM_FUNC( cellGcmSetVertexDataArray, index, frequency, stride, size, gcmType, location, offset ); + GCM_FUNC( cellGcmSetVertexDataArray, index, frequency, stride, size, gcmType, CELL_GCM_LOCATION_LOCAL, offset ); } // set the vertex attribute to the specified value. @@ -1104,10 +989,22 @@ static inline void rglFifoGlProgramParameterfvVP( const _CGprogram *program, con // set 4 consts { GLfloat v2[16]; - v2[0] = value[0];v2[1] = value[4];v2[2] = value[8];v2[3] = value[12]; - v2[4] = value[1];v2[5] = value[5];v2[6] = value[9];v2[7] = value[13]; - v2[8] = value[2];v2[9] = value[6];v2[10] = value[10];v2[11] = value[14]; - v2[12] = value[3];v2[13] = value[7];v2[14] = value[11];v2[15] = value[15]; + v2[0] = value[0]; + v2[1] = value[4]; + v2[2] = value[8]; + v2[3] = value[12]; + v2[4] = value[1]; + v2[5] = value[5]; + v2[6] = value[9]; + v2[7] = value[13]; + v2[8] = value[2]; + v2[9] = value[6]; + v2[10] = value[10]; + v2[11] = value[14]; + v2[12] = value[3]; + v2[13] = value[7]; + v2[14] = value[11]; + v2[15] = value[15]; GCM_FUNC( cellGcmSetVertexProgramParameterBlock, parameterResource->resource, 4, v2 ); // GCM_PORT_TESTED [Cedric] } break; @@ -1130,21 +1027,6 @@ static inline void rglFifoGlProgramParameterfvVP( const _CGprogram *program, con } } -// Push a CG program onto the current command buffer -static inline void rglGcmPushProgramPushBuffer( _CGprogram * cgprog ) -{ - // make sure there is space for the pushbuffer + any nops we need to add for alignment - rglGcmFifoWaitForFreeSpace( &rglGcmState_i.fifo, cgprog->constantPushBufferWordSize + 4 + 32); - // first add nops to get us the next alligned position in the fifo - // [YLIN] Use VMX register to copy - uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2; - uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf; - GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word ); - memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size); - rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize; - -} - // Look up the memory location of a buffer object (VBO, PBO) static inline GLuint rglGcmGetBufferObjectOrigin( GLuint buffer ) { diff --git a/console/rgl/src/ps3/include/rgl-typedefs.h b/console/rgl/src/ps3/include/rgl-typedefs.h index 8b16c9444a..a79845823a 100644 --- a/console/rgl/src/ps3/include/rgl-typedefs.h +++ b/console/rgl/src/ps3/include/rgl-typedefs.h @@ -19,7 +19,7 @@ typedef struct _tagMODESTRUC GLushort wVertSyncStart; GLushort wVertSyncEnd; GLushort wVertBlankEnd; - GLuint dwDotClock; // In 10K Hertz + GLuint dwDotClock; // In 10K Hertz GLushort wHSyncPolarity; GLushort wVSyncPolarity; } MODESTRUC; diff --git a/console/rgl/src/ps3/rgl_ps3.cpp b/console/rgl/src/ps3/rgl_ps3.cpp index f4b4ddff59..dffdcbc716 100644 --- a/console/rgl/src/ps3/rgl_ps3.cpp +++ b/console/rgl/src/ps3/rgl_ps3.cpp @@ -2375,38 +2375,6 @@ void rglGcmDestroyRM( rglGcmResource* gcmResource ) return; } -void rglGcmGraphicsHandler( const uint32_t head ) -{ - // GCM will call this Graphics Handler if there is a channel error which - // can be caused by bad fifo commands, and GPU error, or GPU memory access. - - printf( "========================================\n" ); - printf( " RGL [rglGcmGraphicsHandler] \n" ); - printf( " GCM triggers this because of RSX error \n" ); - printf( " due to invalid Fifo Commands, \n" ); - printf( " invalid GPU state, or invalid memory access\n" ); - printf( "========================================\n" ); - - // print out the previous 10 words from the current position; - rglGcmState_i.fifo.updateLastGetRead(); - - // Dumping current fifo state - printf(" Current RGL FIFO info \n" ); - printf(" Fifo Begin %p End %p Current %p and Get %p \n", - rglGcmState_i.fifo.begin, - rglGcmState_i.fifo.end, - rglGcmState_i.fifo.current, - rglGcmState_i.fifo.lastGetRead ); - - printf(" Last 10 words of the RGL Fifo from the ppu put/current position \n" ); - rglPrintFifoFromPut( 10 ); - - printf(" Last 10 words of the RGL Fifo from the gpu get position \n" ); - rglPrintFifoFromGet( 10 ); -} - -extern GLboolean _psglDisableCompression; - int rglGcmInitRM( rglGcmResource *gcmResource, unsigned int hostMemorySize, int inSysMem, unsigned int dmaPushBufferSize ) { memset( gcmResource, 0, sizeof( rglGcmResource ) ); @@ -2435,10 +2403,6 @@ int rglGcmInitRM( rglGcmResource *gcmResource, unsigned int hostMemorySize, int return GL_FALSE; } - cellGcmSetDebugOutputLevel( CELL_GCM_DEBUG_LEVEL2 ); - // set the rglGcm graphics error callback - cellGcmSetGraphicsHandler( &rglGcmGraphicsHandler ); - // Get Gpu configuration CellGcmConfig config; cellGcmGetConfiguration( &config ); @@ -2751,7 +2715,7 @@ static void rglGcmAllocateTiledSurface( // certain dimension combinations, but this is simple and may conserve // tiled region usage over some alternatives. GLuint padSize = RGLGCM_TILED_BUFFER_ALIGNMENT; // 64KB - + while (( padSize % ( tiledPitch*8 ) ) != 0 ) padSize += RGLGCM_TILED_BUFFER_ALIGNMENT; diff --git a/console/rgl/src/ps3/rgl_ps3_raster.cpp b/console/rgl/src/ps3/rgl_ps3_raster.cpp index 29e7e21322..a1bffa67e5 100644 --- a/console/rgl/src/ps3/rgl_ps3_raster.cpp +++ b/console/rgl/src/ps3/rgl_ps3_raster.cpp @@ -1106,14 +1106,14 @@ void rglSetDefaultValuesVP( _CGprogram *program ) const float *itemDefaultValues = program->defaultValues + program->defaultValuesIndices[i].defaultValueIndex; int registerStride = isMatrix(( CGtype )parameterResource->type ) ? rglGetTypeRowCount(( CGtype )parameterResource->type ) : 1; if ( parameterEntry->flags & CGP_CONTIGUOUS ) - memcpy( rtParameter->pushBufferPointer, itemDefaultValues, arrayCount * registerStride *4*sizeof( float ) ); + __builtin_memcpy( rtParameter->pushBufferPointer, itemDefaultValues, arrayCount * registerStride *4*sizeof( float ) ); else { unsigned int *pushBufferPointer = (( unsigned int * )rtParameter->pushBufferPointer ); for ( int j = 0;j < arrayCount;j++ ) { unsigned int *pushBufferAddress = isArray ? ( *( unsigned int** )pushBufferPointer ) : pushBufferPointer; - memcpy( pushBufferAddress, itemDefaultValues, registerStride*4*sizeof( float ) ); + __builtin_memcpy( pushBufferAddress, itemDefaultValues, registerStride*4*sizeof( float ) ); pushBufferPointer += isArray ? 1 : 3 + registerStride * 4; itemDefaultValues += 4 * registerStride; } @@ -1163,7 +1163,7 @@ void rglSetDefaultValuesFP( _CGprogram *program ) dst[2] = SWAP_IF_BIG_ENDIAN( itemDefaultValues[2] ); dst[3] = SWAP_IF_BIG_ENDIAN( itemDefaultValues[3] ); } - memcpy(( void* )hostMemoryCopy, ( void* )itemDefaultValues, sizeof( float )*4 ); + __builtin_memcpy(( void* )hostMemoryCopy, ( void* )itemDefaultValues, sizeof( float )*4 ); hostMemoryCopy += 4; itemDefaultValues += 4; resource++; //skip the register of the next item @@ -1255,9 +1255,7 @@ void rglPlatformBufferObjectSetData( rglBufferObject* bufferObject, GLintptr off rglGcmBufferObject *rglBuffer = ( rglGcmBufferObject * )bufferObject->platformBufferObject; if ( size == bufferObject->size && tryImmediateCopy ) - { - memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size ); - } + __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size ); else if ( size >= bufferObject->size ) { @@ -1276,16 +1274,14 @@ void rglPlatformBufferObjectSetData( rglBufferObject* bufferObject, GLintptr off rglSetError( GL_OUT_OF_MEMORY ); return; default: - memcpy( gmmIdToAddress( rglBuffer->bufferId ), data, size ); + __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ), data, size ); break; } } else { if ( tryImmediateCopy ) - { - memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size ); - } + __builtin_memcpy( gmmIdToAddress( rglBuffer->bufferId ) + offset, data, size ); else { // partial buffer write @@ -1440,7 +1436,7 @@ void rglFBClear( GLbitfield mask ) GLuint bufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo, CELL_GCM_LOCATION_LOCAL, 0, sizeof(rglClearVertexBuffer)); - memcpy( gmmIdToAddress(bufferId), rglClearVertexBuffer, sizeof( rglClearVertexBuffer ) ); + __builtin_memcpy(gmmIdToAddress(bufferId), rglClearVertexBuffer, sizeof(rglClearVertexBuffer)); rglGcmFifoGlVertexAttribPointer( 0, 3, RGLGCM_FLOAT, RGLGCM_FALSE, 3*sizeof( GLfloat ), 1, 0, gmmIdToOffset(bufferId) ); RGLBIT_TRUE( LContext->attribs->DirtyMask, 0 ); @@ -1867,9 +1863,9 @@ GLuint rglValidateAttributesSlow( rglDrawParams *dparams, GLboolean *isMain ) rglBitfield needsUpdateMask = ( as->DirtyMask | ( as->EnabledMask & ~as->HasVBOMask ) ); // for any remaining attributes that need updating, do it now. - if ( needsUpdateMask ) + if(needsUpdateMask) { - for ( GLuint i = 0; i < RGL_MAX_VERTEX_ATTRIBS; ++i ) + for(GLuint i = 0; i < RGL_MAX_VERTEX_ATTRIBS; ++i) { // skip this attribute if not needing update if ( ! RGLBIT_GET( needsUpdateMask, i ) ) continue; @@ -1888,9 +1884,9 @@ GLuint rglValidateAttributesSlow( rglDrawParams *dparams, GLboolean *isMain ) GLuint offset = ( dparams->firstVertex / freq ) * stride; char * b = ( char * )xferBuffer + dparams->attribXferOffset[i]; - memcpy( b + offset, - ( char * )attrib->clientData + offset, - dparams->attribXferSize[i] - offset ); + __builtin_memcpy(b + offset, + ( char*)attrib->clientData + offset, + dparams->attribXferSize[i] - offset); // draw directly from bounce buffer *isMain = gmmIdIsMain(xferId); @@ -2353,7 +2349,7 @@ void rglPlatformUploadTexture( rglTexture* texture ) // create surface descriptors for image transfer rglGcmSurface src = { -source: RGLGCM_SURFACE_SOURCE_TEMPORARY, + source: RGLGCM_SURFACE_SOURCE_TEMPORARY, width: 0, // replaced per image height: 0, // replaced per image bpp: pixelBytes, @@ -2366,7 +2362,7 @@ source: RGLGCM_SURFACE_SOURCE_TEMPORARY, }; rglGcmSurface dst = { -source: RGLGCM_SURFACE_SOURCE_TEXTURE, + source: RGLGCM_SURFACE_SOURCE_TEXTURE, width: 0, // replaced per image height: 0, // replaced per image bpp: pixelBytes, @@ -2380,74 +2376,70 @@ source: RGLGCM_SURFACE_SOURCE_TEXTURE, // use a bounce buffer to transfer to GPU GLuint bounceBufferId = GMM_ERROR; + + // check if upload is needed for this image + rglImage *image = texture->image; + + if ( image->dataState == RGL_IMAGE_DATASTATE_HOST ) { + // determine image offset from base address + // TODO: compute all offsets at once for efficiency + // This is the offset in bytes for this face/image from the + // texture base address. + const GLuint dataOffset = rglGetGcmImageOffset( layout, 0, 0 ); + + // set source pixel buffer + src.ppuData = image->data; + + // lazy allocation of bounce buffer + if ( bounceBufferId == GMM_ERROR && layout->baseDepth == 1 ) + bounceBufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo, + CELL_GCM_LOCATION_LOCAL, 0, gcmTexture->gpuSize); + + if ( bounceBufferId != GMM_ERROR ) { - // check if upload is needed for this image - rglImage *image = texture->image; - if ( image->dataState == RGL_IMAGE_DATASTATE_HOST ) - { - // determine image offset from base address - // TODO: compute all offsets at once for efficiency - // This is the offset in bytes for this face/image from the - // texture base address. - const GLuint dataOffset = rglGetGcmImageOffset( layout, 0, 0 ); + // copy image to bounce buffer + src.dataId = bounceBufferId; + src.dataIdOffset = dataOffset; - // set source pixel buffer - src.ppuData = image->data; + // NPOT DXT + __builtin_memcpy( gmmIdToAddress( src.dataId ) + dataOffset, + image->data, image->storageSize ); + } - // lazy allocation of bounce buffer - if ( bounceBufferId == GMM_ERROR && layout->baseDepth == 1 ) - bounceBufferId = gmmAlloc((CellGcmContextData*)&rglGcmState_i.fifo, - CELL_GCM_LOCATION_LOCAL, 0, gcmTexture->gpuSize); + // use surface copy functions + src.width = image->width; + src.height = image->height; + src.pitch = pixelBytes * src.width; - if ( bounceBufferId != GMM_ERROR ) - { - // copy image to bounce buffer - src.dataId = bounceBufferId; - src.dataIdOffset = dataOffset; + dst.width = src.width; + dst.height = image->height; + dst.dataId = gcmTexture->gpuAddressId; + dst.dataIdOffset = gcmTexture->gpuAddressIdOffset + dataOffset; - // NPOT DXT - memcpy( gmmIdToAddress( src.dataId ) + dataOffset, - image->data, - image->storageSize ); - } + GLuint offsetHeight = 0; - { - // use surface copy functions - src.width = image->width; - src.height = image->height; - src.pitch = pixelBytes * src.width; + if(dst.pitch) + { + // linear (not swizzled) + // The tiled linear format requires that render + // targets be aligned to 8*pitch from the start of + // the tiled region. + offsetHeight = ( dataOffset / dst.pitch ) % 8; + dst.height += offsetHeight; + dst.dataIdOffset -= offsetHeight * dst.pitch; + } - dst.width = src.width; - dst.height = image->height; - dst.dataId = gcmTexture->gpuAddressId; - dst.dataIdOffset = gcmTexture->gpuAddressIdOffset + dataOffset; + rglGcmCopySurface( + &src, 0, 0, + &dst, 0, offsetHeight, + src.width, src.height, + GL_TRUE ); // don't bypass GPU pipeline - GLuint offsetHeight = 0; - if ( dst.pitch ) - { - // linear (not swizzled) - // The tiled linear format requires that render - // targets be aligned to 8*pitch from the start of - // the tiled region. - offsetHeight = ( dataOffset / dst.pitch ) % 8; - dst.height += offsetHeight; - dst.dataIdOffset -= offsetHeight * dst.pitch; - } - - rglGcmCopySurface( - &src, 0, 0, - &dst, 0, offsetHeight, - src.width, src.height, - GL_TRUE ); // don't bypass GPU pipeline - } - - // free CPU copy of data - rglImageFreeCPUStorage( image ); - image->dataState |= RGL_IMAGE_DATASTATE_GPU; - } // newer data on host - } // loop over levels - } // loop over faces + // free CPU copy of data + rglImageFreeCPUStorage( image ); + image->dataState |= RGL_IMAGE_DATASTATE_GPU; + } // newer data on host if ( bounceBufferId != GMM_ERROR ) gmmFree( bounceBufferId ); @@ -2469,10 +2461,8 @@ static inline void rglGcmUpdateGcmTexture( rglTexture * texture, rglGcmTextureLa platformTexture->gcmTexture.format, platformTexture->gcmTexture.remap ); // This is just to cover the conversion from swizzled to linear - if ( layout->pitch ) - { + if(layout->pitch) platformTexture->gcmTexture.format += 0x20; // see class doc definitions for SZ_NR vs LN_NR... - } platformTexture->gcmTexture.width = layout->baseWidth; platformTexture->gcmTexture.height = layout->baseHeight; @@ -2541,7 +2531,7 @@ void rglGcmUpdateMethods( rglTexture * texture ) // ----------------------------------------------------------------------- // set the SET_TEXTURE_CONTROL0 params - platformTexture->gcmMethods.control0.maxAniso = rglGcmMapAniso( maxAniso ); + platformTexture->gcmMethods.control0.maxAniso = CELL_GCM_TEXTURE_MAX_ANISO_1; const GLfloat minLOD = MAX( texture->minLod, texture->baseLevel ); const GLfloat maxLOD = MIN( texture->maxLod, texture->maxLevel ); platformTexture->gcmMethods.control0.minLOD = ( GLuint )( MAX( minLOD, 0 ) * 256.0f ); @@ -3046,8 +3036,21 @@ void rglValidateVertexProgram() void rglValidateVertexConstants() { RGLcontext* LContext = _CurrentContext; + _CGprogram *cgprog = LContext->BoundVertexProgram; - rglGcmPushProgramPushBuffer( LContext->BoundVertexProgram ); + // Push a CG program onto the current command buffer + + // make sure there is space for the pushbuffer + any nops we need to add for alignment + rglGcmFifoWaitForFreeSpace( &rglGcmState_i.fifo, cgprog->constantPushBufferWordSize + 4 + 32); + + // first add nops to get us the next alligned position in the fifo + // [YLIN] Use VMX register to copy + uint32_t padding_in_word = ( ( 0x10-(((uint32_t)rglGcmState_i.fifo.current)&0xf))&0xf )>>2; + uint32_t padded_size = ( ((cgprog->constantPushBufferWordSize)<<2) + 0xf )&~0xf; + + GCM_FUNC( cellGcmSetNopCommandUnsafe, padding_in_word ); + memcpy16(rglGcmState_i.fifo.current, cgprog->constantPushBuffer, padded_size); + rglGcmState_i.fifo.current+=cgprog->constantPushBufferWordSize; } /*============================================================