112 lines
3.2 KiB
C
112 lines
3.2 KiB
C
/* crc32_braid.c -- compute the CRC-32 of a data stream
|
|
* Copyright (C) 1995-2022 Mark Adler
|
|
* For conditions of distribution and use, see copyright notice in README.md
|
|
*
|
|
* This interleaved implementation of a CRC makes use of pipelined multiple
|
|
* arithmetic-logic units, commonly found in modern CPU cores. It is due to
|
|
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in upstream.
|
|
*/
|
|
|
|
#include "common.h"
|
|
#include "crc32_braid_tbl.h"
|
|
|
|
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8)
|
|
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
|
|
|
static uint32_t crc_word(uint64_t data) {
|
|
unsigned k;
|
|
for (k = 0; k < sizeof(uint64_t); k++)
|
|
data = (data >> 8) ^ crc_table[data & 0xff];
|
|
return (uint32_t)data;
|
|
}
|
|
|
|
__attribute__((visibility("hidden")))
|
|
uint32_t crc32_braid(uint32_t crc, const uint8_t *buf, uint32_t len) {
|
|
register uint32_t c;
|
|
|
|
c = crc;
|
|
|
|
/* If provided enough bytes, do a braided CRC calculation. */
|
|
if (len >= 5 * sizeof(uint64_t) + sizeof(uint64_t) - 1) {
|
|
uint64_t blks;
|
|
uint64_t const *words;
|
|
unsigned k;
|
|
|
|
/* Compute the CRC up to a uint64_t boundary. */
|
|
while (len && ((uint64_t)buf & (sizeof(uint64_t) - 1)) != 0) {
|
|
len--;
|
|
DO1;
|
|
}
|
|
|
|
/* Compute the CRC on as many 5 uint64_t blocks as are available. */
|
|
blks = len / (5 * sizeof(uint64_t));
|
|
len -= blks * 5 * sizeof(uint64_t);
|
|
words = (uint64_t const *)buf;
|
|
|
|
uint64_t crc0, word0, comb;
|
|
uint64_t crc1, word1;
|
|
uint64_t crc2, word2;
|
|
uint64_t crc3, word3;
|
|
uint64_t crc4, word4;
|
|
|
|
/* Initialize the CRC for each braid. */
|
|
crc0 = c;
|
|
crc1 = 0;
|
|
crc2 = 0;
|
|
crc3 = 0;
|
|
crc4 = 0;
|
|
|
|
/* Process the first blks-1 blocks, computing the CRCs on each braid independently. */
|
|
while (--blks) {
|
|
/* Load the word for each braid into registers. */
|
|
word0 = crc0 ^ words[0];
|
|
word1 = crc1 ^ words[1];
|
|
word2 = crc2 ^ words[2];
|
|
word3 = crc3 ^ words[3];
|
|
word4 = crc4 ^ words[4];
|
|
|
|
words += 5;
|
|
|
|
/* Compute and update the CRC for each word. The loop should get unrolled. */
|
|
crc0 = crc_braid_table[0][word0 & 0xff];
|
|
crc1 = crc_braid_table[0][word1 & 0xff];
|
|
crc2 = crc_braid_table[0][word2 & 0xff];
|
|
crc3 = crc_braid_table[0][word3 & 0xff];
|
|
crc4 = crc_braid_table[0][word4 & 0xff];
|
|
|
|
for (k = 1; k < sizeof(uint64_t); k++) {
|
|
crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
|
|
crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
|
|
crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
|
|
crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
|
|
crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
|
|
}
|
|
}
|
|
|
|
/* Process the last block, combining the CRCs of the 5 braids at the same time. */
|
|
comb = crc_word(crc0 ^ words[0]);
|
|
comb = crc_word(crc1 ^ words[1] ^ comb);
|
|
comb = crc_word(crc2 ^ words[2] ^ comb);
|
|
comb = crc_word(crc3 ^ words[3] ^ comb);
|
|
comb = crc_word(crc4 ^ words[4] ^ comb);
|
|
|
|
words += 5;
|
|
c = comb;
|
|
|
|
/* Update the pointer to the remaining bytes to process. */
|
|
buf = (const uint8_t *)words;
|
|
}
|
|
|
|
/* Complete the computation of the CRC on any remaining bytes. */
|
|
while (len >= 8) {
|
|
len -= 8;
|
|
DO8;
|
|
}
|
|
while (len) {
|
|
len--;
|
|
DO1;
|
|
}
|
|
|
|
return c;
|
|
}
|