From 2b4b67ee8af414dd2064e1cf96dd5398b6b09e7b Mon Sep 17 00:00:00 2001 From: liuxiaocs7 Date: Thu, 25 Jun 2026 00:39:25 +0800 Subject: [PATCH 1/3] HBASE-30256 FuzzyRowFilter mishandles the fuzzy-mask encoding on the no-unsafe path --- .../hadoop/hbase/filter/FuzzyRowFilter.java | 156 +++++++--- .../hbase/filter/TestFuzzyRowFilter.java | 37 +++ .../filter/TestFuzzyRowFilterWoUnsafe.java | 267 ++++++++++++++++++ 3 files changed, 425 insertions(+), 35 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWoUnsafe.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index 3729353f0c0d..a9286c2a6b9a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -22,7 +22,6 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; -import java.util.Objects; import java.util.PriorityQueue; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; @@ -106,9 +105,12 @@ public FuzzyRowFilter(List> fuzzyKeysData) { p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length)); p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length)); - // update mask ( 0 -> -1 (0xff), 1 -> 2) + // normalize the mask into the encoding the active (unsafe/no-unsafe) path expects p.setSecond(preprocessMask(p.getSecond())); - preprocessSearchKey(p); + preprocessSearchKey(p, UNSAFE_UNALIGNED); + // Fix the unsafe mask into the {-1, 0} form satisfies() expects once, here, so the stored + // mask is never mutated during scanning (no-op on the no-unsafe path, which keeps {0, 1}). + preprocessMaskForSatisfies(p.getSecond(), UNSAFE_UNALIGNED); fuzzyKeyDataCopy.add(p); } @@ -116,29 +118,53 @@ public FuzzyRowFilter(List> fuzzyKeysData) { this.tracker = new RowTracker(); } - private void preprocessSearchKey(Pair p) { - if (!UNSAFE_UNALIGNED) { - // do nothing - return; - } + /** + * Zeroes the non-fixed ("don't care") positions of the search key so that the next-cell hint + * built by {@link #getNextForFuzzyRule} is the smallest matching row. This now runs on BOTH paths + * (it used to be a no-op on the no-unsafe path), so {@link #getFuzzyKeys} and + * {@link #toByteArray} now report non-fixed key bytes as 0 on the no-unsafe path too, matching + * the long-standing unsafe-path behavior. The byte value at a non-fixed position is semantically + * dead (never compared), so this changes neither matching nor cross-version deserialization. + */ + static void preprocessSearchKey(Pair p, boolean unsafeUnaligned) { byte[] key = p.getFirst(); byte[] mask = p.getSecond(); for (int i = 0; i < mask.length; i++) { - // set non-fixed part of a search key to 0. - if (mask[i] == 2) { + // set non-fixed part of a search key to 0. The non-fixed positions are encoded as 2 on the + // unsafe path (mask was preprocessed to {-1, 2}) and as 1 on the no-unsafe path ({0, 1}). + if ((unsafeUnaligned && mask[i] == 2) || (!unsafeUnaligned && mask[i] == 1)) { key[i] = 0; } } } /** - * We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as - * fixed positions + * Normalizes the constructor/deserialized mask into the encoding the active code path expects. + * The public constructor format is {0 (fixed), 1 (non-fixed)}, but a mask can also arrive already + * preprocessed into the unsafe {-1 (fixed), 2 (non-fixed)} form - e.g. {@link #parseFrom} of a + * filter that was serialized on an unsafe peer. We accept either form on both paths so a filter + * serialized on one platform deserializes correctly on the other: + *
    + *
  • unsafe path: keep an already-preprocessed {-1, 2} mask, otherwise convert {0, 1} to {-1, + * 2}.
  • + *
  • no-unsafe path: keep a {0, 1} mask, otherwise convert an already-preprocessed {-1, 2} mask + * back to {0, 1} (the form {@link #satisfiesNoUnsafe} and {@link #preprocessMaskForHinting} + * expect).
  • + *
* @return mask array */ private byte[] preprocessMask(byte[] mask) { if (!UNSAFE_UNALIGNED) { - // do nothing + if (isPreprocessedMask(mask)) { + // Mask was serialized by an unsafe peer in {-1, 2} form; restore the {0, 1} form. + for (int i = 0; i < mask.length; i++) { + if (mask[i] == -1) { + mask[i] = 0; // -1 -> 0 + } else if (mask[i] == 2) { + mask[i] = 1; // 2 -> 1 + } + } + } return mask; } if (isPreprocessedMask(mask)) return mask; @@ -161,6 +187,24 @@ private boolean isPreprocessedMask(byte[] mask) { return true; } + /** + * Converts a stored mask back to the public constructor format {0 (fixed), 1 (non-fixed)}, + * independent of the internal representation, so {@link #toByteArray} and {@link #getFuzzyKeys} + * never leak the internal preprocessed form onto the wire. On the no-unsafe path the stored mask + * is already {0, 1}. On the unsafe path the stored mask is {-1 (fixed), 0 (non-fixed)} (fixed up + * front by the constructor); a -1 byte is fixed and any other byte is non-fixed. + * @return a new array in {0 (fixed), 1 (non-fixed)} form + */ + private static byte[] toConstructorMask(byte[] mask, boolean unsafeUnaligned) { + byte[] out = Arrays.copyOf(mask, mask.length); + if (unsafeUnaligned) { + for (int i = 0; i < out.length; i++) { + out[i] = (byte) (out[i] == -1 ? 0 : 1); + } + } + return out; + } + /** * Returns the Fuzzy keys in the format expected by the constructor. * @return the Fuzzy keys in the format expected by the constructor @@ -171,18 +215,7 @@ public List> getFuzzyKeys() { Pair returnKey = new Pair<>(); // This won't revert the original key's don't care values, but we don't care. returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length)); - byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length); - if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) { - // Revert the preprocessing. - for (int i = 0; i < returnMask.length; i++) { - if (returnMask[i] == -1) { - returnMask[i] = 0; // -1 >> 0 - } else if (returnMask[i] == 2) { - returnMask[i] = 1;// 2 >> 1 - } - } - } - returnKey.setSecond(returnMask); + returnKey.setSecond(toConstructorMask(fuzzyKey.getSecond(), UNSAFE_UNALIGNED)); returnList.add(returnKey); } return returnList; @@ -205,10 +238,6 @@ public ReturnCode filterCell(final Cell c) { for (int i = startIndex; i < size + startIndex; i++) { final int index = i % size; Pair fuzzyData = fuzzyKeysData.get(index); - // This shift is idempotent - always end up with 0 and -1 as mask values. - for (int j = 0; j < fuzzyData.getSecond().length; j++) { - fuzzyData.getSecond()[j] >>= 2; - } SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); if (satisfiesCode == SatisfiesCode.YES) { @@ -316,9 +345,12 @@ boolean isNextRowSameAs(Cell currentCell) { } byte[] updateWith(Cell currentCell, Pair fuzzyData) { - byte[] nextRowKeyCandidate = - getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(), - currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond()); + // getNextForFuzzyRule needs {-1, 0}. On the no-unsafe path this returns a converted copy; on + // the unsafe path the stored mask is already {-1, 0} (fixed by the constructor), so it is + // returned as is. + byte[] fuzzyKeyMeta = preprocessMaskForHinting(fuzzyData.getSecond(), UNSAFE_UNALIGNED); + byte[] nextRowKeyCandidate = getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), + currentCell.getRowOffset(), currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyKeyMeta); if (nextRowKeyCandidate != null) { nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData)); } @@ -339,7 +371,10 @@ public byte[] toByteArray() { for (Pair fuzzyData : fuzzyKeysData) { BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder(); bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst())); - bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond())); + // Serialize the mask in the public constructor {0, 1} form rather than the internal + // preprocessed representation, so the wire bytes are platform- and state-independent. + bbpBuilder.setSecond(UnsafeByteOperations + .unsafeWrap(toConstructorMask(fuzzyData.getSecond(), UNSAFE_UNALIGNED))); builder.addFuzzyKeysData(bbpBuilder); } return builder.build().toByteArray(); @@ -467,6 +502,46 @@ static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int leng return SatisfiesCode.YES; } + /** + * Prepares the mask for {@link #satisfies}, mutating {@code mask} in place. Called once from the + * constructor (after {@link #preprocessSearchKey}) so the stored mask is fixed up front and never + * mutated during scanning. On the unsafe path the stored mask uses {-1, 2}; this shifts it to the + * {-1 (fixed), 0 (non-fixed)} form the word-based {@code satisfies} expects (the shift is + * idempotent: -1 stays -1, 2 becomes 0). On the no-unsafe path the mask keeps its original {0 + * (fixed), 1 (non-fixed)} representation, which is what {@link #satisfiesNoUnsafe} expects, so + * this is a no-op. + */ + static void preprocessMaskForSatisfies(byte[] mask, boolean unsafeUnaligned) { + if (!unsafeUnaligned) { + return; + } + for (int i = 0; i < mask.length; i++) { + mask[i] >>= 2; + } + } + + /** + * Prepares the mask for {@link #getNextForFuzzyRule}, which expects {-1 (fixed), 0 (non-fixed)}. + * On the no-unsafe path the stored mask is {0 (fixed), 1 (non-fixed)}, so it is converted into a + * new array (the original is left untouched because {@link #satisfiesNoUnsafe} still needs it). + * On the unsafe path the stored mask is already {-1, 0} (the constructor fixes it up front), so + * it is returned as is. + */ + static byte[] preprocessMaskForHinting(byte[] mask, boolean unsafeUnaligned) { + if (unsafeUnaligned) { + return mask; + } + byte[] converted = Arrays.copyOf(mask, mask.length); + for (int i = 0; i < converted.length; i++) { + if (converted[i] == 0) { + converted[i] = -1; + } else if (converted[i] == 1) { + converted[i] = 0; + } + } + return converted; + } + static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { if (row == null) { @@ -713,9 +788,12 @@ boolean areSerializedFieldsEqual(Filter o) { for (int i = 0; i < fuzzyKeysData.size(); ++i) { Pair thisData = this.fuzzyKeysData.get(i); Pair otherData = other.fuzzyKeysData.get(i); + // Compare the mask in the normalized constructor {0, 1} form (the form toByteArray emits) so + // equality is independent of the internal mask encoding and consistent with the wire bytes. if ( !(Bytes.equals(thisData.getFirst(), otherData.getFirst()) - && Bytes.equals(thisData.getSecond(), otherData.getSecond())) + && Bytes.equals(toConstructorMask(thisData.getSecond(), UNSAFE_UNALIGNED), + toConstructorMask(otherData.getSecond(), UNSAFE_UNALIGNED))) ) { return false; } @@ -730,6 +808,14 @@ public boolean equals(Object obj) { @Override public int hashCode() { - return Objects.hash(this.fuzzyKeysData); + // Hash the same normalized fields equals() compares (the mask in constructor {0, 1} form) so + // equal filters hash identically (content-based, independent of the internal mask encoding). + int result = 1; + for (Pair fuzzyData : fuzzyKeysData) { + result = 31 * result + Bytes.hashCode(fuzzyData.getFirst()); + result = + 31 * result + Bytes.hashCode(toConstructorMask(fuzzyData.getSecond(), UNSAFE_UNALIGNED)); + } + return result; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index d65f3a451058..b36853ad03c5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -391,6 +391,43 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException { } } + /** + * Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored + * mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0, + * 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is + * reparsed as an all-fixed mask). Processing a cell first must not change this. + */ + @Test + public void testSerializationAfterFilterCellPreservesBehavior() throws Exception { + FuzzyRowFilter original = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + // Process a cell first to prove serialization is unaffected by scanning. + original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + // A row matching only via the wildcard position must still be INCLUDEEd after the round-trip. + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * Two filters built from the same rule (distinct array instances) must be equal and hash equally, + * i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent + * with each other and with the serialized ({0, 1}) form. This must also hold after one of them + * has processed a cell. + */ + @Test + public void testEqualsConsistentAfterFilterCell() { + FuzzyRowFilter fresh = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter scanned = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + assertEquals(fresh, scanned); + assertEquals(fresh.hashCode(), scanned.hashCode()); + } + private static FuzzyRowFilter newReverseFuzzyRowFilter() { FuzzyRowFilter filter = new FuzzyRowFilter(Arrays.asList(new Pair<>(Bytes.toBytes("aaa"), new byte[] { 0, 1, 0 }))); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWoUnsafe.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWoUnsafe.java new file mode 100644 index 000000000000..93f82b4bc516 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWoUnsafe.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mockStatic; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Collections; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; + +import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair; + +/** + * Exercises {@link FuzzyRowFilter} on the "no-unsafe" code path + * ({@code HBasePlatformDependent.unaligned() == false}) end-to-end, i.e. through the constructor, + * {@link FuzzyRowFilter#filterCell} and {@link FuzzyRowFilter#getNextCellHint}. On this path the + * mask uses the {0 (fixed), 1 (non-fixed)} encoding, which differs from the unsafe {-1, 2} encoding + * that the rest of the scan machinery assumes. + */ +@Tag(FilterTests.TAG) +@Tag(SmallTests.TAG) +public class TestFuzzyRowFilterWoUnsafe { + + @BeforeAll + public static void disableUnsafe() throws Exception { + // Force the no-unsafe path: make HBasePlatformDependent.unaligned() return false and trigger + // FuzzyRowFilter's static initializer while the mock is active, so its private static final + // UNSAFE_UNALIGNED is captured as false for this JVM fork. + try (MockedStatic mocked = mockStatic(HBasePlatformDependent.class)) { + mocked.when(HBasePlatformDependent::isUnsafeAvailable).thenReturn(false); + mocked.when(HBasePlatformDependent::unaligned).thenReturn(false); + Field field = FuzzyRowFilter.class.getDeclaredField("UNSAFE_UNALIGNED"); + field.setAccessible(true); + assertFalse(field.getBoolean(null), "expected FuzzyRowFilter to use the no-unsafe path"); + } + } + + /** + * A row that matches the fuzzy rule only thanks to a wildcard position must be INCLUDEd. On the + * broken no-unsafe path the mask was shifted to all-zeroes (all positions treated as fixed), so + * the wildcard row was wrongly rejected. + */ + @Test + public void testForwardMatchesWildcardRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + + KeyValue match = KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }); + assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(match)); + } + + /** + * For a non-matching row the next-cell hint must be the smallest row that can satisfy the rule. + * With fixed positions 5 and 5 and a wildcard in the middle, the smallest row at or after {3,0,0} + * is {5,0,5}. On the broken no-unsafe path the wildcard key byte was never cleared and the mask + * was corrupted, producing a wrong hint. + */ + @Test + public void testForwardHintSkipsToSmallestMatchingRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 5, 100, 5 }, new byte[] { 0, 1, 0 }))); + + KeyValue current = KeyValueUtil.createFirstOnRow(new byte[] { 3, 0, 0 }); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(current)); + + Cell hint = filter.getNextCellHint(current); + assertRow(new byte[] { 5, 0, 5 }, hint); + } + + /** + * No-unsafe analogue of {@code TestFuzzyRowFilter#testReverseFilterCellSkipsSameRowHint}. This is + * the most subtle composition point this change touches: the reverse next-cell hint is computed + * from the no-unsafe mask (updateWith -> preprocessMaskForHinting({0,1}->{-1,0}) -> + * getNextForFuzzyRule(reverse)) and must still play with the HBASE-30226 same-row short-circuit. + * A non-matching row seeks back to "abb"; revisiting "abb" must be skipped with NEXT_ROW instead + * of recreating the same hint. + */ + @Test + public void testReverseHintSkipsSameRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(Bytes.toBytes("aaa"), new byte[] { 0, 1, 0 }))); + filter.setReversed(true); + + KeyValue abc = KeyValueUtil.createFirstOnRow(Bytes.toBytes("abc")); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(abc)); + assertRow(Bytes.toBytes("abb"), filter.getNextCellHint(abc)); + + KeyValue abb = KeyValueUtil.createFirstOnRow(Bytes.toBytes("abb")); + assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(abb)); + } + + /** + * With multiple fuzzy keys the RowTracker priority queue holds one (separately converted) hint + * mask per key and must return the smallest matching row across all of them. Here key1 {5,*,5} + * hints {5,0,5} and key2 {4,9,9} hints {4,9,9}; the smaller {4,9,9} must win. + */ + @Test + public void testForwardHintWithMultipleKeysReturnsSmallest() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 5, 100, 5 }, new byte[] { 0, 1, 0 }), + new Pair<>(new byte[] { 4, 9, 9 }, new byte[] { 0, 0, 0 }))); + + KeyValue current = KeyValueUtil.createFirstOnRow(new byte[] { 3, 0, 0 }); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(current)); + assertRow(new byte[] { 4, 9, 9 }, filter.getNextCellHint(current)); + } + + /** + * A FuzzyRowFilter serialized by an unsafe peer puts the mask on the wire in its preprocessed {-1 + * (fixed), 2 (non-fixed)} form. A no-unsafe server must still interpret it correctly: this is + * exactly the {key, mask} pair {@link FuzzyRowFilter#parseFrom} hands to the constructor for such + * a filter. Before the fix the no-unsafe constructor left {-1, 2} untouched, so + * {@link FuzzyRowFilter#satisfiesNoUnsafe} (which keys off {0, 1}) treated every position as a + * wildcard and stopped enforcing the fixed bytes, wrongly INCLUDEing non-matching rows. + */ + @Test + public void testUnsafeEncodedMaskFromPeerEnforcesFixedPositions() { + // {-1, 2, -1} == fixed, non-fixed, fixed -> equivalent no-unsafe mask {0, 1, 0}. + FuzzyRowFilter match = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 0, 3 }, new byte[] { -1, 2, -1 }))); + assertEquals(Filter.ReturnCode.INCLUDE, + match.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + + // A row that differs at a FIXED position (pos 2: 9 != 3) must not be INCLUDEd. + FuzzyRowFilter reject = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 0, 3 }, new byte[] { -1, 2, -1 }))); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, + reject.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * Faithful wire-level companion to {@link #testUnsafeEncodedMaskFromPeerEnforcesFixedPositions}: + * build the exact protobuf bytes an unsafe peer emits (mask in the preprocessed {-1, 2} form) and + * run them through {@link FuzzyRowFilter#parseFrom}. A no-unsafe server must normalize the mask + * back to {0, 1} on deserialization and still enforce the fixed positions. + */ + @Test + public void testParseFromUnsafeEncodedFilterEnforcesFixedPositions() throws Exception { + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + + assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + // A row that differs at a FIXED position (pos 2: 9 != 3) must not be INCLUDEd. + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * A filter built on a no-unsafe server must survive its own {@link FuzzyRowFilter#toByteArray} / + * {@link FuzzyRowFilter#parseFrom} round-trip with identical behavior and identical + * {@code equals}/{@code hashCode} (the wire form is the canonical {0, 1}). + */ + @Test + public void testSerializationRoundTripPreservesFilter() throws Exception { + FuzzyRowFilter original = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + + assertEquals(original, parsed); + assertEquals(original.hashCode(), parsed.hashCode()); + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + private static void assertRow(byte[] expected, Cell cell) { + byte[] actual = Bytes.copy(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + assertEquals(Bytes.toStringBinary(expected), Bytes.toStringBinary(actual)); + } + + // --------------------------------------------------------------------------- + // Fine-grained unit coverage of the mask/key preprocessing helpers used above. + // --------------------------------------------------------------------------- + + @Test + public void testPreprocessMaskForSatisfiesNoUnsafeKeepsMaskSemantics() { + byte[] row = new byte[] { 1, 2, 1, 3, 3 }; + byte[] fuzzyKey = new byte[] { 1, 2, 0, 3 }; + byte[] fuzzyKeyFiltered = new byte[] { 0, 2, 0, 3 }; + byte[] mask = new byte[] { 0, 0, 1, 0 }; + + assertEquals(FuzzyRowFilter.SatisfiesCode.YES, + FuzzyRowFilter.satisfiesNoUnsafe(false, row, 0, row.length, fuzzyKey, mask)); + assertEquals(FuzzyRowFilter.SatisfiesCode.NO_NEXT, + FuzzyRowFilter.satisfiesNoUnsafe(false, row, 0, row.length, fuzzyKeyFiltered, mask)); + + // On the no-unsafe path the mask must be left untouched (the broken code shifted it to 0s). + FuzzyRowFilter.preprocessMaskForSatisfies(mask, false); + assertArrayEquals(new byte[] { 0, 0, 1, 0 }, mask); + + assertEquals(FuzzyRowFilter.SatisfiesCode.YES, + FuzzyRowFilter.satisfiesNoUnsafe(false, row, 0, row.length, fuzzyKey, mask)); + assertEquals(FuzzyRowFilter.SatisfiesCode.NO_NEXT, + FuzzyRowFilter.satisfiesNoUnsafe(false, row, 0, row.length, fuzzyKeyFiltered, mask)); + } + + @Test + public void testPreprocessMaskForHintingNoUnsafeConvertsToGetNextSemantics() { + byte[] row = new byte[] { 1, 2, 1, 3, 3 }; + byte[] fuzzyKey = new byte[] { 1, 2, 0, 3 }; + byte[] noUnsafeMask = new byte[] { 0, 0, 1, 0 }; + + byte[] convertedMask = FuzzyRowFilter.preprocessMaskForHinting(noUnsafeMask, false); + // The original mask must not be mutated (satisfiesNoUnsafe still needs the {0, 1} form). + assertArrayEquals(new byte[] { 0, 0, 1, 0 }, noUnsafeMask); + assertArrayEquals(new byte[] { -1, -1, 0, -1 }, convertedMask); + + byte[] nextWithConverted = + FuzzyRowFilter.getNextForFuzzyRule(false, row, 0, row.length, fuzzyKey, convertedMask); + byte[] nextWithExpectedMask = FuzzyRowFilter.getNextForFuzzyRule(false, row, 0, row.length, + fuzzyKey, new byte[] { -1, -1, 0, -1 }); + + assertNotNull(nextWithConverted); + assertArrayEquals(nextWithExpectedMask, nextWithConverted); + } + + @Test + public void testNoUnsafePreprocessSearchKeyClearsWildcardBytes() { + Pair fuzzyData = new Pair<>(new byte[] { 1, 100, 3 }, new byte[] { 0, 1, 0 }); + + FuzzyRowFilter.preprocessSearchKey(fuzzyData, false); + byte[] convertedMask = FuzzyRowFilter.preprocessMaskForHinting(fuzzyData.getSecond(), false); + byte[] nextForFuzzyRule = FuzzyRowFilter.getNextForFuzzyRule(false, new byte[] { 0, 0, 0 }, 0, + 3, fuzzyData.getFirst(), convertedMask); + + // The wildcard byte (100) must be cleared so the next hint is the smallest matching row. + assertArrayEquals(new byte[] { 1, 0, 3 }, fuzzyData.getFirst()); + assertArrayEquals(new byte[] { 1, 0, 3 }, nextForFuzzyRule); + } +} From 080bfa1a7a3be3b9265cb015d0dacc166c73f532 Mon Sep 17 00:00:00 2001 From: liuxiaocs7 Date: Thu, 25 Jun 2026 00:52:11 +0800 Subject: [PATCH 2/3] update --- .../hadoop/hbase/filter/FuzzyRowFilter.java | 78 +++++++------------ 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java index a9286c2a6b9a..f370b3f772f6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java @@ -105,11 +105,10 @@ public FuzzyRowFilter(List> fuzzyKeysData) { p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length)); p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length)); - // normalize the mask into the encoding the active (unsafe/no-unsafe) path expects + // Normalize the mask, zero the non-fixed key bytes, then fix the unsafe mask to its final + // {-1, 0} form once here so it is never mutated during scanning. p.setSecond(preprocessMask(p.getSecond())); preprocessSearchKey(p, UNSAFE_UNALIGNED); - // Fix the unsafe mask into the {-1, 0} form satisfies() expects once, here, so the stored - // mask is never mutated during scanning (no-op on the no-unsafe path, which keeps {0, 1}). preprocessMaskForSatisfies(p.getSecond(), UNSAFE_UNALIGNED); fuzzyKeyDataCopy.add(p); @@ -119,19 +118,15 @@ public FuzzyRowFilter(List> fuzzyKeysData) { } /** - * Zeroes the non-fixed ("don't care") positions of the search key so that the next-cell hint - * built by {@link #getNextForFuzzyRule} is the smallest matching row. This now runs on BOTH paths - * (it used to be a no-op on the no-unsafe path), so {@link #getFuzzyKeys} and - * {@link #toByteArray} now report non-fixed key bytes as 0 on the no-unsafe path too, matching - * the long-standing unsafe-path behavior. The byte value at a non-fixed position is semantically - * dead (never compared), so this changes neither matching nor cross-version deserialization. + * Zeroes the non-fixed ("don't care") positions of the search key (on both paths) so the + * next-cell hint from {@link #getNextForFuzzyRule} is the smallest matching row. The byte at a + * non-fixed position is never compared, so this affects neither matching nor deserialization. */ static void preprocessSearchKey(Pair p, boolean unsafeUnaligned) { byte[] key = p.getFirst(); byte[] mask = p.getSecond(); for (int i = 0; i < mask.length; i++) { - // set non-fixed part of a search key to 0. The non-fixed positions are encoded as 2 on the - // unsafe path (mask was preprocessed to {-1, 2}) and as 1 on the no-unsafe path ({0, 1}). + // non-fixed is encoded as 2 on the unsafe path ({-1, 2}) and as 1 on no-unsafe ({0, 1}) if ((unsafeUnaligned && mask[i] == 2) || (!unsafeUnaligned && mask[i] == 1)) { key[i] = 0; } @@ -139,24 +134,16 @@ static void preprocessSearchKey(Pair p, boolean unsafeUnaligned) } /** - * Normalizes the constructor/deserialized mask into the encoding the active code path expects. - * The public constructor format is {0 (fixed), 1 (non-fixed)}, but a mask can also arrive already - * preprocessed into the unsafe {-1 (fixed), 2 (non-fixed)} form - e.g. {@link #parseFrom} of a - * filter that was serialized on an unsafe peer. We accept either form on both paths so a filter - * serialized on one platform deserializes correctly on the other: - *
    - *
  • unsafe path: keep an already-preprocessed {-1, 2} mask, otherwise convert {0, 1} to {-1, - * 2}.
  • - *
  • no-unsafe path: keep a {0, 1} mask, otherwise convert an already-preprocessed {-1, 2} mask - * back to {0, 1} (the form {@link #satisfiesNoUnsafe} and {@link #preprocessMaskForHinting} - * expect).
  • - *
+ * Normalizes the incoming mask to the active path's encoding. Input is the public {0, 1} form, or + * the already-preprocessed unsafe {-1, 2} form when {@link #parseFrom} deserializes a filter from + * an unsafe peer; accepting both lets a filter serialized on one platform work on the other. + * Unsafe keeps/produces {-1, 2}; no-unsafe keeps/produces {0, 1}. * @return mask array */ private byte[] preprocessMask(byte[] mask) { if (!UNSAFE_UNALIGNED) { if (isPreprocessedMask(mask)) { - // Mask was serialized by an unsafe peer in {-1, 2} form; restore the {0, 1} form. + // deserialized {-1, 2} from an unsafe peer -> restore {0, 1} for (int i = 0; i < mask.length; i++) { if (mask[i] == -1) { mask[i] = 0; // -1 -> 0 @@ -188,12 +175,11 @@ private boolean isPreprocessedMask(byte[] mask) { } /** - * Converts a stored mask back to the public constructor format {0 (fixed), 1 (non-fixed)}, - * independent of the internal representation, so {@link #toByteArray} and {@link #getFuzzyKeys} - * never leak the internal preprocessed form onto the wire. On the no-unsafe path the stored mask - * is already {0, 1}. On the unsafe path the stored mask is {-1 (fixed), 0 (non-fixed)} (fixed up - * front by the constructor); a -1 byte is fixed and any other byte is non-fixed. - * @return a new array in {0 (fixed), 1 (non-fixed)} form + * Converts a stored mask back to the public {0 (fixed), 1 (non-fixed)} form as a new array, so + * serialization, {@link #getFuzzyKeys}, equals and hashCode never expose the internal encoding. + * No-unsafe already stores {0, 1}; unsafe stores {-1, 0}, where -1 is fixed and anything else is + * non-fixed. + * @return a new array in {0, 1} form */ private static byte[] toConstructorMask(byte[] mask, boolean unsafeUnaligned) { byte[] out = Arrays.copyOf(mask, mask.length); @@ -345,9 +331,8 @@ boolean isNextRowSameAs(Cell currentCell) { } byte[] updateWith(Cell currentCell, Pair fuzzyData) { - // getNextForFuzzyRule needs {-1, 0}. On the no-unsafe path this returns a converted copy; on - // the unsafe path the stored mask is already {-1, 0} (fixed by the constructor), so it is - // returned as is. + // getNextForFuzzyRule needs {-1, 0}: a converted copy on no-unsafe, the stored mask on + // unsafe. byte[] fuzzyKeyMeta = preprocessMaskForHinting(fuzzyData.getSecond(), UNSAFE_UNALIGNED); byte[] nextRowKeyCandidate = getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(), currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyKeyMeta); @@ -371,8 +356,7 @@ public byte[] toByteArray() { for (Pair fuzzyData : fuzzyKeysData) { BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder(); bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst())); - // Serialize the mask in the public constructor {0, 1} form rather than the internal - // preprocessed representation, so the wire bytes are platform- and state-independent. + // Emit the public {0, 1} mask, not the internal form, so the wire is platform-independent. bbpBuilder.setSecond(UnsafeByteOperations .unsafeWrap(toConstructorMask(fuzzyData.getSecond(), UNSAFE_UNALIGNED))); builder.addFuzzyKeysData(bbpBuilder); @@ -503,13 +487,10 @@ static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int leng } /** - * Prepares the mask for {@link #satisfies}, mutating {@code mask} in place. Called once from the - * constructor (after {@link #preprocessSearchKey}) so the stored mask is fixed up front and never - * mutated during scanning. On the unsafe path the stored mask uses {-1, 2}; this shifts it to the - * {-1 (fixed), 0 (non-fixed)} form the word-based {@code satisfies} expects (the shift is - * idempotent: -1 stays -1, 2 becomes 0). On the no-unsafe path the mask keeps its original {0 - * (fixed), 1 (non-fixed)} representation, which is what {@link #satisfiesNoUnsafe} expects, so - * this is a no-op. + * Mutates {@code mask} in place into the form {@link #satisfies} expects. Called once from the + * constructor so the stored mask is fixed up front and never mutated during scanning. Unsafe: + * shift {-1, 2} -> {-1, 0} (the word-based satisfies wants non-fixed = 0). No-unsafe: no-op, + * {@link #satisfiesNoUnsafe} already wants {0, 1}. */ static void preprocessMaskForSatisfies(byte[] mask, boolean unsafeUnaligned) { if (!unsafeUnaligned) { @@ -521,11 +502,9 @@ static void preprocessMaskForSatisfies(byte[] mask, boolean unsafeUnaligned) { } /** - * Prepares the mask for {@link #getNextForFuzzyRule}, which expects {-1 (fixed), 0 (non-fixed)}. - * On the no-unsafe path the stored mask is {0 (fixed), 1 (non-fixed)}, so it is converted into a - * new array (the original is left untouched because {@link #satisfiesNoUnsafe} still needs it). - * On the unsafe path the stored mask is already {-1, 0} (the constructor fixes it up front), so - * it is returned as is. + * Returns the mask in the {-1 (fixed), 0 (non-fixed)} form {@link #getNextForFuzzyRule} expects. + * No-unsafe converts {0, 1} into a NEW array (the stored {0, 1} is still needed by + * {@link #satisfiesNoUnsafe}); unsafe is already {-1, 0}, returned as is. */ static byte[] preprocessMaskForHinting(byte[] mask, boolean unsafeUnaligned) { if (unsafeUnaligned) { @@ -788,8 +767,7 @@ boolean areSerializedFieldsEqual(Filter o) { for (int i = 0; i < fuzzyKeysData.size(); ++i) { Pair thisData = this.fuzzyKeysData.get(i); Pair otherData = other.fuzzyKeysData.get(i); - // Compare the mask in the normalized constructor {0, 1} form (the form toByteArray emits) so - // equality is independent of the internal mask encoding and consistent with the wire bytes. + // Compare masks in the normalized {0, 1} form, so equality matches the serialized bytes. if ( !(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(toConstructorMask(thisData.getSecond(), UNSAFE_UNALIGNED), @@ -808,8 +786,6 @@ public boolean equals(Object obj) { @Override public int hashCode() { - // Hash the same normalized fields equals() compares (the mask in constructor {0, 1} form) so - // equal filters hash identically (content-based, independent of the internal mask encoding). int result = 1; for (Pair fuzzyData : fuzzyKeysData) { result = 31 * result + Bytes.hashCode(fuzzyData.getFirst()); From b51efbd751b417a44dce5445b72ddd30caa6064b Mon Sep 17 00:00:00 2001 From: Xiao Liu Date: Thu, 25 Jun 2026 20:45:44 +0800 Subject: [PATCH 3/3] Update hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dávid Paksy --- .../java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index b36853ad03c5..fa61aaa9f217 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -405,7 +405,7 @@ public void testSerializationAfterFilterCellPreservesBehavior() throws Exception original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); - // A row matching only via the wildcard position must still be INCLUDEEd after the round-trip. + // A row matching only via the wildcard position must still be INCLUDED after the round-trip. assertEquals(Filter.ReturnCode.INCLUDE, parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); }