improve and fix NonStupidBitfield also get rid of some UB
fixes optimised lto clang build
This commit is contained in:
parent
1112162e99
commit
6256a42e00
18
src/GPU.cpp
18
src/GPU.cpp
|
@ -1187,7 +1187,7 @@ NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranul
|
||||||
{
|
{
|
||||||
if (currentMappings[i] != Mapping[i])
|
if (currentMappings[i] != Mapping[i])
|
||||||
{
|
{
|
||||||
result |= NonStupidBitField<Size/VRAMDirtyGranularity>(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
|
result.SetRange(i*VRAMBitsPerMapping, VRAMBitsPerMapping);
|
||||||
banksToBeZeroed |= currentMappings[i];
|
banksToBeZeroed |= currentMappings[i];
|
||||||
Mapping[i] = currentMappings[i];
|
Mapping[i] = currentMappings[i];
|
||||||
}
|
}
|
||||||
|
@ -1209,19 +1209,19 @@ NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranul
|
||||||
if (MappingGranularity == 16*1024)
|
if (MappingGranularity == 16*1024)
|
||||||
{
|
{
|
||||||
u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
|
u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)];
|
||||||
((u32*)result.Data)[i] |= dirty;
|
result.Data[i / 2] |= (u64)dirty << ((i&1)*32);
|
||||||
}
|
}
|
||||||
else if (MappingGranularity == 8*1024)
|
else if (MappingGranularity == 8*1024)
|
||||||
{
|
{
|
||||||
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
|
u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)];
|
||||||
((u16*)result.Data)[i] |= dirty;
|
result.Data[i / 4] |= (u64)dirty << ((i&3)*16);
|
||||||
}
|
}
|
||||||
else if (MappingGranularity == 128*1024)
|
else if (MappingGranularity == 128*1024)
|
||||||
{
|
{
|
||||||
((u64*)result.Data)[i * 4 + 0] |= ((u64*)VRAMDirty[num].Data)[0];
|
result.Data[i * 4 + 0] |= VRAMDirty[num].Data[0];
|
||||||
((u64*)result.Data)[i * 4 + 1] |= ((u64*)VRAMDirty[num].Data)[1];
|
result.Data[i * 4 + 1] |= VRAMDirty[num].Data[1];
|
||||||
((u64*)result.Data)[i * 4 + 2] |= ((u64*)VRAMDirty[num].Data)[2];
|
result.Data[i * 4 + 2] |= VRAMDirty[num].Data[2];
|
||||||
((u64*)result.Data)[i * 4 + 3] |= ((u64*)VRAMDirty[num].Data)[3];
|
result.Data[i * 4 + 3] |= VRAMDirty[num].Data[3];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1236,7 +1236,7 @@ NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranul
|
||||||
{
|
{
|
||||||
u32 num = __builtin_ctz(banksToBeZeroed);
|
u32 num = __builtin_ctz(banksToBeZeroed);
|
||||||
banksToBeZeroed &= ~(1 << num);
|
banksToBeZeroed &= ~(1 << num);
|
||||||
memset(VRAMDirty[num].Data, 0, sizeof(VRAMDirty[num].Data));
|
VRAMDirty[num].Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -1266,7 +1266,7 @@ void SyncDirtyFlags(u32* mappings, NonStupidBitField<Size>& writtenFlags)
|
||||||
mapping &= ~(1 << num);
|
mapping &= ~(1 << num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memset(writtenFlags.Data, 0, sizeof(writtenFlags.Data));
|
writtenFlags.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SyncDirtyFlags()
|
void SyncDirtyFlags()
|
||||||
|
|
|
@ -117,6 +117,7 @@ void GPU2D::Reset()
|
||||||
BGMosaicYMax = 0;
|
BGMosaicYMax = 0;
|
||||||
OBJMosaicY = 0;
|
OBJMosaicY = 0;
|
||||||
OBJMosaicYMax = 0;
|
OBJMosaicYMax = 0;
|
||||||
|
OBJMosaicYCount = 0;
|
||||||
|
|
||||||
BlendCnt = 0;
|
BlendCnt = 0;
|
||||||
EVA = 16;
|
EVA = 16;
|
||||||
|
@ -130,6 +131,7 @@ void GPU2D::Reset()
|
||||||
memset(DispFIFOBuffer, 0, 256*2);
|
memset(DispFIFOBuffer, 0, 256*2);
|
||||||
|
|
||||||
CaptureCnt = 0;
|
CaptureCnt = 0;
|
||||||
|
CaptureLatch = false;
|
||||||
|
|
||||||
MasterBrightness = 0;
|
MasterBrightness = 0;
|
||||||
|
|
||||||
|
|
|
@ -14,9 +14,8 @@
|
||||||
template <u32 Size>
|
template <u32 Size>
|
||||||
struct NonStupidBitField
|
struct NonStupidBitField
|
||||||
{
|
{
|
||||||
static_assert((Size % 8) == 0, "bitfield size must be a multiple of 8");
|
static constexpr u32 DataLength = (Size + 0x3F) >> 6;
|
||||||
static const u32 DataLength = Size / 8;
|
u64 Data[DataLength];
|
||||||
u8 Data[DataLength];
|
|
||||||
|
|
||||||
struct Ref
|
struct Ref
|
||||||
{
|
{
|
||||||
|
@ -25,13 +24,13 @@ struct NonStupidBitField
|
||||||
|
|
||||||
operator bool()
|
operator bool()
|
||||||
{
|
{
|
||||||
return BitField.Data[Idx >> 3] & (1 << (Idx & 0x7));
|
return BitField.Data[Idx >> 6] & (1ULL << (Idx & 0x3F));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ref& operator=(bool set)
|
Ref& operator=(bool set)
|
||||||
{
|
{
|
||||||
BitField.Data[Idx >> 3] &= ~(1 << (Idx & 0x7));
|
BitField.Data[Idx >> 6] &= ~(1ULL << (Idx & 0x3F));
|
||||||
BitField.Data[Idx >> 3] |= ((u8)set << (Idx & 0x7));
|
BitField.Data[Idx >> 6] |= ((u64)set << (Idx & 0x3F));
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -43,27 +42,40 @@ struct NonStupidBitField
|
||||||
u32 BitIdx;
|
u32 BitIdx;
|
||||||
u64 RemainingBits;
|
u64 RemainingBits;
|
||||||
|
|
||||||
u32 operator*() { return DataIdx * 8 + BitIdx; }
|
u32 operator*() { return DataIdx * 64 + BitIdx; }
|
||||||
|
|
||||||
bool operator==(const Iterator& other) { return other.DataIdx == DataIdx; }
|
bool operator==(const Iterator& other)
|
||||||
bool operator!=(const Iterator& other) { return other.DataIdx != DataIdx; }
|
{
|
||||||
|
return other.DataIdx == DataIdx;
|
||||||
|
}
|
||||||
|
bool operator!=(const Iterator& other)
|
||||||
|
{
|
||||||
|
return other.DataIdx != DataIdx;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void Next()
|
void Next()
|
||||||
{
|
{
|
||||||
if (DataIdx >= DataLength)
|
if (RemainingBits == 0)
|
||||||
return;
|
|
||||||
|
|
||||||
while (RemainingBits == 0)
|
|
||||||
{
|
{
|
||||||
DataIdx += sizeof(T);
|
for (u32 i = DataIdx + 1; i < DataLength; i++)
|
||||||
if (DataIdx >= DataLength)
|
{
|
||||||
|
if (BitField.Data[i])
|
||||||
|
{
|
||||||
|
DataIdx = i;
|
||||||
|
RemainingBits = BitField.Data[i];
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DataIdx = DataLength;
|
||||||
return;
|
return;
|
||||||
RemainingBits = *(T*)&BitField.Data[DataIdx];
|
done:;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitIdx = __builtin_ctzll(RemainingBits);
|
BitIdx = __builtin_ctzll(RemainingBits);
|
||||||
RemainingBits &= ~(1ULL << BitIdx);
|
RemainingBits &= ~(1ULL << BitIdx);
|
||||||
|
|
||||||
|
if ((Size & 0x3F) && BitIdx >= Size)
|
||||||
|
DataIdx = DataLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator operator++(int)
|
Iterator operator++(int)
|
||||||
|
@ -75,40 +87,35 @@ struct NonStupidBitField
|
||||||
|
|
||||||
Iterator& operator++()
|
Iterator& operator++()
|
||||||
{
|
{
|
||||||
if ((DataLength % 8) == 0)
|
Next();
|
||||||
Next<u64>();
|
|
||||||
else if ((DataLength % 4) == 0)
|
|
||||||
Next<u32>();
|
|
||||||
else if ((DataLength % 2) == 0)
|
|
||||||
Next<u16>();
|
|
||||||
else
|
|
||||||
Next<u8>();
|
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
NonStupidBitField(u32 start, u32 size)
|
NonStupidBitField(u32 startBit, u32 bitsCount)
|
||||||
{
|
{
|
||||||
memset(Data, 0, sizeof(Data));
|
Clear();
|
||||||
|
|
||||||
if (size == 0)
|
if (bitsCount == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
u32 roundedStartBit = (start + 7) & ~7;
|
SetRange(startBit, bitsCount);
|
||||||
u32 roundedEndBit = (start + size) & ~7;
|
/*for (int i = 0; i < Size; i++)
|
||||||
if (roundedStartBit != roundedEndBit)
|
{
|
||||||
memset(Data + roundedStartBit / 8, 0xFF, (roundedEndBit - roundedStartBit) / 8);
|
bool state = (*this)[i];
|
||||||
|
if (state != (i >= startBit && i < startBit + bitsCount))
|
||||||
if (start & 0x7)
|
{
|
||||||
Data[start >> 3] = 0xFF << (start & 0x7);
|
for (u32 j = 0; j < DataLength; j++)
|
||||||
if ((start + size) & 0x7)
|
printf("data %016lx\n", Data[j]);
|
||||||
Data[(start + size) >> 3] = 0xFF >> ((start + size) & 0x7);
|
printf("blarg %d %d %d %d\n", i, startBit, bitsCount, Size);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
NonStupidBitField()
|
NonStupidBitField()
|
||||||
{
|
{
|
||||||
memset(Data, 0, sizeof(Data));
|
Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator End()
|
Iterator End()
|
||||||
|
@ -117,14 +124,20 @@ struct NonStupidBitField
|
||||||
}
|
}
|
||||||
Iterator Begin()
|
Iterator Begin()
|
||||||
{
|
{
|
||||||
if ((DataLength % 8) == 0)
|
for (u32 i = 0; i < DataLength; i++)
|
||||||
return ++Iterator{*this, 0, 0, *(u64*)Data};
|
{
|
||||||
else if ((DataLength % 4) == 0)
|
u32 idx = __builtin_ctzll(Data[i]);
|
||||||
return ++Iterator{*this, 0, 0, *(u32*)Data};
|
if (Data[i] && idx + i * 64 < Size)
|
||||||
else if ((DataLength % 2) == 0)
|
{
|
||||||
return ++Iterator{*this, 0, 0, *(u16*)Data};
|
return {*this, i, idx, Data[i] & ~(1ULL << idx)};
|
||||||
else
|
}
|
||||||
return ++Iterator{*this, 0, 0, *Data};
|
}
|
||||||
|
return End();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
memset(Data, 0, sizeof(Data));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ref operator[](u32 idx)
|
Ref operator[](u32 idx)
|
||||||
|
@ -132,6 +145,27 @@ struct NonStupidBitField
|
||||||
return Ref{*this, idx};
|
return Ref{*this, idx};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetRange(u32 startBit, u32 bitsCount)
|
||||||
|
{
|
||||||
|
u32 startEntry = startBit >> 6;
|
||||||
|
u64 entriesCount = ((startBit + bitsCount + 0x3F & ~0x3F) >> 6) - startEntry;
|
||||||
|
|
||||||
|
if (entriesCount > 1)
|
||||||
|
{
|
||||||
|
Data[startEntry] |= 0xFFFFFFFFFFFFFFFF << (startBit & 0x3F);
|
||||||
|
if ((startBit + bitsCount) & 0x3F)
|
||||||
|
Data[startEntry + entriesCount - 1] |= ~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F));
|
||||||
|
else
|
||||||
|
Data[startEntry + entriesCount - 1] = 0xFFFFFFFFFFFFFFFF;
|
||||||
|
for (int i = startEntry + 1; i < startEntry + entriesCount - 1; i++)
|
||||||
|
Data[i] = 0xFFFFFFFFFFFFFFFF;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Data[startEntry] |= ((1ULL << bitsCount) - 1) << (startBit & 0x3F);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
|
NonStupidBitField& operator|=(const NonStupidBitField<Size>& other)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < DataLength; i++)
|
for (u32 i = 0; i < DataLength; i++)
|
||||||
|
|
Loading…
Reference in New Issue