Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ public abstract class DictionaryValuesWriter extends ValuesWriter implements Req
/* will become true if the dictionary becomes too big */
protected boolean dictionaryTooBig;

/* set to true when the dictionary exceeds maxDictionaryByteSize or MAX_DICTIONARY_ENTRIES,
* checked by shouldFallBack() to avoid repeated virtual dispatch to getDictionarySize() on every write */
private boolean dictionarySizeExceeded;

/* current size in bytes the dictionary will take once serialized */
protected long dictionaryByteSize;

Expand Down Expand Up @@ -121,8 +125,20 @@ protected DictionaryPage dictPage(ValuesWriter dictPageWriter) {

@Override
public boolean shouldFallBack() {
// if the dictionary reaches the max byte size or the values can not be encoded on 4 bytes anymore.
return dictionaryByteSize > maxDictionaryByteSize || getDictionarySize() > MAX_DICTIONARY_ENTRIES;
return dictionarySizeExceeded;
}

/**
* Called by subclass write methods after adding a new dictionary entry to check if the dictionary
* has exceeded its size limits. This avoids the per-value virtual dispatch overhead of calling
* getDictionarySize() on every write -- the check only runs when a new entry is actually added.
*
* @param newDictionarySize the current dictionary size after adding the new entry
*/
protected void checkDictionarySizeLimit(int newDictionarySize) {
if (dictionaryByteSize > maxDictionaryByteSize || newDictionarySize > MAX_DICTIONARY_ENTRIES) {
dictionarySizeExceeded = true;
}
}

@Override
Expand Down Expand Up @@ -208,6 +224,7 @@ public void resetDictionary() {
lastUsedDictionaryByteSize = 0;
lastUsedDictionarySize = 0;
dictionaryTooBig = false;
dictionarySizeExceeded = false;
clearDictionaryContent();
}

Expand Down Expand Up @@ -250,6 +267,7 @@ public void writeBytes(Binary v) {
binaryDictionaryContent.put(v.copy(), id);
// length as int (4 bytes) + actual bytes
dictionaryByteSize += 4L + v.length();
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down Expand Up @@ -320,6 +338,7 @@ public void writeBytes(Binary value) {
id = binaryDictionaryContent.size();
binaryDictionaryContent.put(value.copy(), id);
dictionaryByteSize += length;
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down Expand Up @@ -364,6 +383,7 @@ public void writeLong(long v) {
id = longDictionaryContent.size();
longDictionaryContent.put(v, id);
dictionaryByteSize += 8;
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down Expand Up @@ -435,6 +455,7 @@ public void writeDouble(double v) {
id = doubleDictionaryContent.size();
doubleDictionaryContent.put(v, id);
dictionaryByteSize += 8;
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down Expand Up @@ -506,6 +527,7 @@ public void writeInteger(int v) {
id = intDictionaryContent.size();
intDictionaryContent.put(v, id);
dictionaryByteSize += 4;
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down Expand Up @@ -578,6 +600,7 @@ public void writeFloat(float v) {
id = floatDictionaryContent.size();
floatDictionaryContent.put(v, id);
dictionaryByteSize += 4;
checkDictionarySizeLimit(id + 1);
}
encodedValues.add(id);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,29 @@ public void testZeroValues() throws IOException {
}
}

@Test
public void testCheckDictionarySizeLimitExceedsByEntryCount() {
// Use a large maxDictionaryByteSize so only the entry-count limit can trigger
int maxDictionaryByteSize = Integer.MAX_VALUE;
try (PlainIntegerDictionaryValuesWriter writer = new PlainIntegerDictionaryValuesWriter(
maxDictionaryByteSize, PLAIN_DICTIONARY, PLAIN_DICTIONARY, allocator)) {

assertFalse("should not fall back initially", writer.shouldFallBack());

// At the limit (Integer.MAX_VALUE - 1 entries): should NOT trigger
writer.checkDictionarySizeLimit(Integer.MAX_VALUE - 1);
assertFalse("should not fall back when entry count equals MAX_DICTIONARY_ENTRIES", writer.shouldFallBack());

// Exceeding the limit (Integer.MAX_VALUE entries): should trigger
writer.checkDictionarySizeLimit(Integer.MAX_VALUE);
assertTrue("should fall back when entry count exceeds MAX_DICTIONARY_ENTRIES", writer.shouldFallBack());

// resetDictionary clears the flag
writer.resetDictionary();
assertFalse("should not fall back after resetDictionary", writer.shouldFallBack());
}
}

@Test
public void testBooleanDictionary() throws IOException {
// Create a dictionary page with boolean values (false, true)
Expand Down