diff --git a/rewind.c b/rewind.c index a10624af30..86082aedb4 100644 --- a/rewind.c +++ b/rewind.c @@ -61,10 +61,10 @@ extern char double_check_sizeof_size_t[(sizeof(size_t)==8)?1:-1]; #ifdef USE_64BIT static inline void write_size_t(uint16_t* ptr, size_t val) { - ptr[0]=val>>0; - ptr[1]=val>>16; - ptr[2]=val>>32; - ptr[3]=val>>48; + ptr[0] = val>>0; + ptr[1] = val>>16; + ptr[2] = val>>32; + ptr[3] = val>>48; } static inline size_t read_size_t(uint16_t* ptr) @@ -77,8 +77,8 @@ static inline size_t read_size_t(uint16_t* ptr) #else static inline void write_size_t(uint16_t* ptr, size_t val) { - ptr[0]=val; - ptr[1]=val>>16; + ptr[0] = val; + ptr[1] = val>>16; } static inline size_t read_size_t(uint16_t* ptr) @@ -110,22 +110,22 @@ struct state_manager { state_manager_t *state_manager_new(size_t state_size, size_t buffer_size) { - state_manager_t *state=malloc(sizeof(*state)); + state_manager_t *state = (state_manager_t*)malloc(sizeof(*state)); - state->capacity=0; - state->blocksize=0; + state->capacity = 0; + state->blocksize = 0; - int newblocksize=((state_size-1)|(sizeof(uint16_t)-1))+1; - state->blocksize=newblocksize; + int newblocksize = ((state_size-1)|(sizeof(uint16_t)-1))+1; + state->blocksize = newblocksize; - const int maxcblkcover=UINT16_MAX*sizeof(uint16_t); - const int maxcblks=(state->blocksize+maxcblkcover-1)/maxcblkcover; - state->maxcompsize=state->blocksize + maxcblks*sizeof(uint16_t)*2 + sizeof(uint16_t)+sizeof(uint32_t) + sizeof(size_t)*2; + const int maxcblkcover = UINT16_MAX*sizeof(uint16_t); + const int maxcblks = (state->blocksize+maxcblkcover-1)/maxcblkcover; + state->maxcompsize = state->blocksize + maxcblks*sizeof(uint16_t)*2 + sizeof(uint16_t)+sizeof(uint32_t) + sizeof(size_t)*2; - state->data=malloc(buffer_size); + state->data = (char*)malloc(buffer_size); - state->thisblock=calloc(state->blocksize+sizeof(uint16_t)*8, 1); - state->nextblock=calloc(state->blocksize+sizeof(uint16_t)*8, 1); + state->thisblock = (char*)calloc(state->blocksize+sizeof(uint16_t)*8, 1); + state->nextblock = (char*)calloc(state->blocksize+sizeof(uint16_t)*8, 1); if (!state->data || !state->thisblock || !state->nextblock) { free(state->data); @@ -138,17 +138,17 @@ state_manager_t *state_manager_new(size_t state_size, size_t buffer_size) //There is also a large amount of data that's the same, to stop the other scan //There is also some padding at the end. This is so we don't read outside the buffer end if we're reading in large blocks; // it doesn't make any difference to us, but sacrificing 16 bytes to get Valgrind happy is worth it. - *(uint16_t*)(state->thisblock+state->blocksize+sizeof(uint16_t)*3)=0xFFFF; - *(uint16_t*)(state->nextblock+state->blocksize+sizeof(uint16_t)*3)=0x0000; + *(uint16_t*)(state->thisblock+state->blocksize+sizeof(uint16_t)*3) = 0xFFFF; + *(uint16_t*)(state->nextblock+state->blocksize+sizeof(uint16_t)*3) = 0x0000; state->capacity=buffer_size; - state->head=state->data+sizeof(size_t); - state->tail=state->data+sizeof(size_t); + state->head = state->data+sizeof(size_t); + state->tail = state->data+sizeof(size_t); - state->thisblock_valid=false; + state->thisblock_valid = false; - state->entries=0; + state->entries = 0; return state; } @@ -163,53 +163,53 @@ void state_manager_free(state_manager_t *state) bool state_manager_pop(state_manager_t *state, void **data) { - *data=NULL; + *data = NULL; if (state->thisblock_valid) { - state->thisblock_valid=false; + state->thisblock_valid = false; state->entries--; - *data=state->thisblock; + *data = state->thisblock; return true; } - if (state->head==state->tail) return false; + if (state->head == state->tail) return false; - size_t start=read_size_t((uint16_t*)(state->head - sizeof(size_t))); - state->head=state->data+start; + size_t start = read_size_t((uint16_t*)(state->head - sizeof(size_t))); + state->head = state->data+start; - const char * compressed=state->data+start+sizeof(size_t); - char * out=state->thisblock; + const char *compressed = state->data+start+sizeof(size_t); + char *out = state->thisblock; //Begin decompression code //out is the last pushed (or returned) state - const uint16_t *compressed16=(const uint16_t*)compressed; - uint16_t *out16=(uint16_t*)out; + const uint16_t *compressed16 = (const uint16_t*)compressed; + uint16_t *out16 = (uint16_t*)out; while (true) { - uint16_t numchanged=*(compressed16++); + uint16_t numchanged = *(compressed16++); if (numchanged) { - out16+=*(compressed16++); + out16 += *(compressed16++); //We could do memcpy, but it seems that memcpy has a constant-per-call overhead that actually shows up. //Our average size in here seems to be 8 or something. //Therefore, we do something with lower overhead. - for (int i=0;ientries--; - *data=state->thisblock; + *data = state->thisblock; return true; } @@ -222,7 +222,7 @@ void *state_manager_push_where(state_manager_t *state) void *ignore; if (state_manager_pop(state, &ignore)) { - state->thisblock_valid=true; + state->thisblock_valid = true; } } @@ -236,28 +236,28 @@ bool state_manager_push_do(state_manager_t *state) if (state->capacitymaxcompsize) return false; recheckcapacity:; - size_t headpos=(state->head-state->data); - size_t tailpos=(state->tail-state->data); - size_t remaining=(tailpos+state->capacity-sizeof(size_t)-headpos-1)%state->capacity + 1; - if (remaining<=state->maxcompsize) + size_t headpos = (state->head-state->data); + size_t tailpos = (state->tail-state->data); + size_t remaining = (tailpos+state->capacity-sizeof(size_t)-headpos-1)%state->capacity + 1; + if (remaining <= state->maxcompsize) { - state->tail=state->data + read_size_t((uint16_t*)state->tail); + state->tail = state->data + read_size_t((uint16_t*)state->tail); state->entries--; goto recheckcapacity; } - const char *old=state->thisblock; - const char *new=state->nextblock; - char *compressed=state->head+sizeof(size_t); + const char *oldb = state->thisblock; + const char *newb = state->nextblock; + char *compressed = state->head+sizeof(size_t); //Begin compression code; 'compressed' will point to the end of the compressed data (excluding the prev pointer). - const uint16_t *old16=(const uint16_t*)old; - const uint16_t *new16=(const uint16_t*)new; - uint16_t *compressed16=(uint16_t*)compressed; - size_t num16s=state->blocksize/sizeof(uint16_t); + const uint16_t *old16 = (const uint16_t*)oldb; + const uint16_t *new16 = (const uint16_t*)newb; + uint16_t *compressed16 = (uint16_t*)compressed; + size_t num16s = state->blocksize/sizeof(uint16_t); while (num16s) { - const uint16_t * oldprev=old16; + const uint16_t *oldprev = old16; #ifdef NO_UNALIGNED_MEM while ((uintptr_t)old16 & (sizeof(size_t)-1) && *old16==*new16) { @@ -267,102 +267,104 @@ bool state_manager_push_do(state_manager_t *state) if (*old16==*new16) #endif { - const size_t *olds=(const size_t*)old16; - const size_t *news=(const size_t*)new16; + const size_t *olds = (const size_t*)old16; + const size_t *news = (const size_t*)new16; - while (*olds==*news) + while (*olds == *news) { olds++; news++; } - old16=(const uint16_t*)olds; - new16=(const uint16_t*)news; + old16 = (const uint16_t*)olds; + new16 = (const uint16_t*)news; - while (*old16==*new16) + while (*old16 == *new16) { old16++; new16++; } } - size_t skip=(old16-oldprev); + size_t skip = (old16-oldprev); - if (skip>=num16s) break; - num16s-=skip; + if (skip >= num16s) break; + num16s -= skip; - if (skip>UINT16_MAX) + if (skip > UINT16_MAX) { - if (skip>UINT32_MAX) + if (skip > UINT32_MAX) { - old16-=skip; - new16-=skip; - skip=UINT32_MAX; - old16+=skip; - new16+=skip; + // This will make it scan the entire thing again, but it only hits on 8GB unchanged + // data anyways, and if you're doing that, you've got bigger problems. + old16 -= skip; + new16 -= skip; + skip = UINT32_MAX; + old16 += skip; + new16 += skip; } - *(compressed16++)=0; - *(compressed16++)=skip; - *(compressed16++)=skip>>16; - skip=0; + *(compressed16++) = 0; + *(compressed16++) = skip; + *(compressed16++) = skip>>16; + skip = 0; continue; } size_t changed; - const uint16_t *old16prev=old16; + const uint16_t *old16prev = old16; //Comparing two or three words makes no real difference. //With two, the smaller blocks are less likely to be chopped up elsewhere due to 64KB; // with three, we get larger blocks which should be a minuscle bit faster to decompress, // but probably a little slower to compress. Since compression is more bottleneck than decompression is, we favor that. - while (old16[0]!=new16[0] || old16[1]!=new16[1]) + while (old16[0] != new16[0] || old16[1] != new16[1]) { old16++; new16++; //Optimize this by only checking one at the time for as long as possible. - while (*old16!=*new16) + while (*old16 != *new16) { old16++; new16++; } } - changed=(old16-old16prev); + changed = (old16-old16prev); if (!changed) continue; - if (changed>UINT16_MAX) + if (changed > UINT16_MAX) { - old16-=changed; - new16-=changed; - changed=UINT16_MAX; - old16+=changed; - new16+=changed; + old16 -= changed; + new16 -= changed; + changed = UINT16_MAX; + old16 += changed; + new16 += changed; } - num16s-=changed; - *(compressed16++)=changed; - *(compressed16++)=skip; - for (int i=0;idata+state->maxcompsize > state->capacity) + if (compressed - state->data + state->maxcompsize > state->capacity) { - compressed=state->data; - if (state->tail==state->data+sizeof(size_t)) state->tail=state->data + *(size_t*)state->tail; + compressed = state->data; + if (state->tail == state->data+sizeof(size_t)) state->tail = state->data + *(size_t*)state->tail; } write_size_t((uint16_t*)compressed, state->head-state->data); - compressed+=sizeof(size_t); + compressed += sizeof(size_t); write_size_t((uint16_t*)state->head, compressed-state->data); - state->head=compressed; + state->head = compressed; } else { - state->thisblock_valid=true; + state->thisblock_valid = true; } - char *swap=state->thisblock; - state->thisblock=state->nextblock; - state->nextblock=swap; + char *swap = state->thisblock; + state->thisblock = state->nextblock; + state->nextblock = swap; state->entries++; @@ -371,11 +373,11 @@ bool state_manager_push_do(state_manager_t *state) void state_manager_capacity(state_manager_t *state, unsigned int * entries, size_t * bytes, bool * full) { - size_t headpos=(state->head-state->data); - size_t tailpos=(state->tail-state->data); - size_t remaining=(tailpos+state->capacity-sizeof(size_t)-headpos-1)%state->capacity + 1; + size_t headpos = (state->head-state->data); + size_t tailpos = (state->tail-state->data); + size_t remaining = (tailpos+state->capacity-sizeof(size_t)-headpos-1)%state->capacity + 1; - if (entries) *entries=state->entries; - if (bytes) *bytes=(state->capacity-remaining); - if (full) *full=(remaining<=state->maxcompsize*2); + if (entries) *entries = state->entries; + if (bytes) *bytes = (state->capacity-remaining); + if (full) *full = (remaining<=state->maxcompsize*2); }