Util: Text codecs now properly handle null bytes

This commit is contained in:
Jeffrey Pfau 2016-11-01 22:28:26 -07:00
parent a3ee07a6cc
commit dbdf10843e
2 changed files with 96 additions and 17 deletions

View File

@ -824,6 +824,80 @@ M_TEST_DEFINE(controlCodes) {
vf->close(vf); vf->close(vf);
} }
M_TEST_DEFINE(nullBytes) {
static const char file[] =
"00=A\n"
"0000=a\n"
"0001=b\n"
"01=B\n"
"0100=c";
struct VFile* vf = VFileFromConstMemory(file, sizeof(file) - 1);
struct TextCodec codec;
assert_true(TextCodecLoadTBL(&codec, vf, false));
struct TextCodecIterator iter;
uint8_t output[16] = {};
size_t len;
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 1);
assert_memory_equal(output, "A", 1);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 1);
assert_memory_equal(output, "a", 1);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 2);
assert_memory_equal(output, "aA", 2);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 1);
assert_memory_equal(output, "b", 1);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 1);
assert_memory_equal(output, "B", 1);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 1);
assert_memory_equal(output, "c", 1);
len = 0;
TextCodecStartDecode(&codec, &iter);
len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 0, output + len, sizeof(output) - len);
len += TextCodecAdvance(&iter, 1, output + len, sizeof(output) - len);
len += TextCodecFinish(&iter, output + len, sizeof(output) - len);
assert_int_equal(len, 2);
assert_memory_equal(output, "cB", 2);
TextCodecDeinit(&codec);
vf->close(vf);
}
M_TEST_SUITE_DEFINE(TextCodec, M_TEST_SUITE_DEFINE(TextCodec,
cmocka_unit_test(emptyCodec), cmocka_unit_test(emptyCodec),
cmocka_unit_test(singleEntry), cmocka_unit_test(singleEntry),
@ -834,4 +908,5 @@ M_TEST_SUITE_DEFINE(TextCodec,
cmocka_unit_test(overlappingEntry), cmocka_unit_test(overlappingEntry),
cmocka_unit_test(overlappingEntryReverse), cmocka_unit_test(overlappingEntryReverse),
cmocka_unit_test(raggedEntry), cmocka_unit_test(raggedEntry),
cmocka_unit_test(controlCodes)) cmocka_unit_test(controlCodes),
cmocka_unit_test(nullBytes))

View File

@ -32,10 +32,11 @@ static struct TextCodecNode* _createNode(void) {
return node; return node;
} }
static void _insertLeafNullTerminated(struct TextCodecNode* node, uint8_t* word, uint8_t* output) { static void _insertLeaf(struct TextCodecNode* node, uint8_t* word, size_t wordLength, uint8_t* output, size_t outputLength) {
if (!word[0]) { if (!wordLength) {
node->leafLength = strlen((char*) output); node->leafLength = outputLength;
node->leaf = (uint8_t*) strdup((char*) output); node->leaf = malloc(outputLength);
memcpy(node->leaf, output, outputLength);
return; return;
} }
struct TextCodecNode* subnode = TableLookup(&node->children, word[0]); struct TextCodecNode* subnode = TableLookup(&node->children, word[0]);
@ -43,7 +44,7 @@ static void _insertLeafNullTerminated(struct TextCodecNode* node, uint8_t* word,
subnode = _createNode(); subnode = _createNode();
TableInsert(&node->children, word[0], subnode); TableInsert(&node->children, word[0], subnode);
} }
_insertLeafNullTerminated(subnode, &word[1], output); _insertLeaf(subnode, &word[1], wordLength - 1, output, outputLength);
} }
bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReverse) { bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReverse) {
@ -59,17 +60,20 @@ bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReve
ssize_t length; ssize_t length;
while ((length = vf->readline(vf, lineBuffer, sizeof(lineBuffer))) > 0) { while ((length = vf->readline(vf, lineBuffer, sizeof(lineBuffer))) > 0) {
memset(wordBuffer, 0, sizeof(wordBuffer)); memset(wordBuffer, 0, sizeof(wordBuffer));
if (lineBuffer[length - 1] == '\n') { if (lineBuffer[length - 1] == '\n' || lineBuffer[length - 1] == '\r') {
lineBuffer[length - 1] = '\0'; --length;
}
if (!length) {
continue;
} }
if (lineBuffer[length - 1] == '\r') { if (lineBuffer[length - 1] == '\r') {
lineBuffer[length - 1] = '\0'; --length;
} }
if (length > 1 && lineBuffer[length - 2] == '\r') { if (!length) {
lineBuffer[length - 2] = '\0'; continue;
} }
size_t i; size_t i;
for (i = 0; i < sizeof(wordBuffer) - 1; ++i) { for (i = 0; i < sizeof(wordBuffer) - 1 && i < (size_t) length; ++i) {
if (!hex8(&lineBuffer[i * 2], &wordBuffer[i])) { if (!hex8(&lineBuffer[i * 2], &wordBuffer[i])) {
break; break;
} }
@ -102,19 +106,19 @@ bool TextCodecLoadTBL(struct TextCodec* codec, struct VFile* vf, bool createReve
if (i == 0) { if (i == 0) {
return false; return false;
} }
lineBuffer[1] = '\0'; _insertLeaf(codec->forwardRoot, wordBuffer, i, (uint8_t*) lineBuffer, 1);
_insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) lineBuffer);
if (codec->reverseRoot) { if (codec->reverseRoot) {
_insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) lineBuffer, wordBuffer); _insertLeaf(codec->reverseRoot, (uint8_t*) lineBuffer, 1, wordBuffer, i);
} }
} }
} else { } else {
if (lineBuffer[i * 2] != '=') { if (lineBuffer[i * 2] != '=') {
return false; return false;
} }
_insertLeafNullTerminated(codec->forwardRoot, wordBuffer, (uint8_t*) &lineBuffer[i * 2 + 1]); size_t offset = i * 2 + 1;
_insertLeaf(codec->forwardRoot, wordBuffer, i, (uint8_t*) &lineBuffer[offset], length - offset);
if (codec->reverseRoot) { if (codec->reverseRoot) {
_insertLeafNullTerminated(codec->reverseRoot, (uint8_t*) &lineBuffer[i * 2 + 1], wordBuffer); _insertLeaf(codec->reverseRoot, (uint8_t*) &lineBuffer[offset], length - offset, wordBuffer, i);
} }
} }
} }