apache · sanjeet006py · Apr 29, 2025 · Apr 30, 2025 · Apr 30, 2025 · May 9, 2025
diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/coprocessor/CompactionScanner.java b/phoenix-core-server/src/main/java/org/apache/phoenix/coprocessor/CompactionScanner.java
@@ -65,6 +65,7 @@
 import org.apache.phoenix.filter.RowKeyComparisonFilter.RowKeyTuple;
 import org.apache.phoenix.jdbc.PhoenixConnection;
 import org.apache.phoenix.jdbc.PhoenixPreparedStatement;
+import org.apache.phoenix.query.QueryConstants;
 import org.apache.phoenix.query.QueryServices;
 import org.apache.phoenix.query.QueryServicesOptions;
 import org.apache.phoenix.schema.CompiledConditionalTTLExpression;
@@ -144,13 +145,13 @@ public class CompactionScanner implements InternalScanner {
     private final long maxLookbackInMillis;
     private int minVersion;
     private int maxVersion;
-    private final boolean emptyCFStore;
+    private boolean emptyCFStore;
     private final boolean localIndex;
     private final int familyCount;
     private KeepDeletedCells keepDeletedCells;
     private long compactionTime;
-    private final byte[] emptyCF;
-    private final byte[] emptyCQ;
+    private byte[] emptyCF;
+    private byte[] emptyCQ;
     private final byte[] storeColumnFamily;
     private final String tableName;
     private final String columnFamilyName;
@@ -180,13 +181,13 @@ public CompactionScanner(RegionCoprocessorEnvironment env,
         // Empty column family and qualifier are always needed to compute which all empty cells to retain
         // even during minor compactions. If required empty cells are not retained during
         // minor compactions then we can run into the risk of partial row expiry on next major compaction.
-        this.emptyCF = SchemaUtil.getEmptyColumnFamily(table);
-        this.emptyCQ = SchemaUtil.getEmptyColumnQualifier(table);
+        this.emptyCF = table != null ? SchemaUtil.getEmptyColumnFamily(table) : EMPTY_BYTE_ARRAY;
+        this.emptyCQ = table != null ? SchemaUtil.getEmptyColumnQualifier(table) : EMPTY_BYTE_ARRAY;
         compactionTime = EnvironmentEdgeManager.currentTimeMillis();
         columnFamilyName = store.getColumnFamilyName();
         storeColumnFamily = columnFamilyName.getBytes();
         tableName = region.getRegionInfo().getTable().getNameAsString();
-        String dataTableName = table.getName().toString();
+        String dataTableName = table != null ? table.getName().toString() : "";
         Long overriddenMaxLookback = maxLookbackMap.get(tableName + SEPARATOR + columnFamilyName);
         this.maxLookbackInMillis = overriddenMaxLookback == null ?
                 maxLookbackAgeInMillis : Math.max(maxLookbackAgeInMillis, overriddenMaxLookback);
@@ -416,11 +417,32 @@ private void postProcessForConditionalTTL(List<Cell> result) {
         }
     }
 
+    private void determineEmptyCfCq(List<Cell> result) {
+        for (Cell cell : result) {
+            emptyCF = CellUtil.cloneFamily(cell);
+            if(ScanUtil.isEmptyColumn(cell, emptyCF, QueryConstants.EMPTY_COLUMN_BYTES)) {
+                emptyCQ = QueryConstants.EMPTY_COLUMN_BYTES;
+                emptyCFStore = true;
+                break;
+            } //Empty column is always encoded in FOUR_BYTE format, since it's a reserved qualifier. See EncodedColumnsUtil#isReservedColumnQualifier
+            else if(ScanUtil.isEmptyColumn(cell, emptyCF, QueryConstants.ENCODED_EMPTY_COLUMN_BYTES)) {
+                emptyCQ = QueryConstants.ENCODED_EMPTY_COLUMN_BYTES;
+                emptyCFStore = true;
+                break;
+            }
+        }
+    }
+
     @Override
     public boolean next(List<Cell> result) throws IOException {
         boolean hasMore = storeScanner.next(result);
         inputCellCount += result.size();
         if (!result.isEmpty()) {
+            // This will happen only during flushes as then we don't pass PTable object
+            // to determine emptyCF and emptyCQ
+            if (emptyCQ == EMPTY_BYTE_ARRAY) {
+                determineEmptyCfCq(result);
+            }
            // This is for debugging
            // printRow(result, "Input for " + tableName + " " + columnFamilyName, true, false);
             phoenixLevelRowCompactor.compact(result, false);

diff --git a/...server/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java b/...server/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java
@@ -60,6 +60,7 @@
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
 import org.apache.hadoop.hbase.ipc.controller.InterRegionServerIndexRpcControllerFactory;
+import org.apache.hadoop.hbase.regionserver.FlushLifeCycleTracker;
 import org.apache.hadoop.hbase.regionserver.InternalScanner;
 import org.apache.hadoop.hbase.regionserver.MiniBatchOperationInProgress;
 import org.apache.hadoop.hbase.regionserver.Region;
@@ -611,6 +612,28 @@ private boolean areMutationsInSameTable(Table targetHTable, Region region) {
                 region.getTableDescriptor().getTableName().getName()) == 0);
     }
 
+    @Override
+    public InternalScanner preFlush(ObserverContext<RegionCoprocessorEnvironment> c, Store store,
+                                    InternalScanner scanner, FlushLifeCycleTracker tracker) throws IOException {
+        if (!isPhoenixTableTTLEnabled(c.getEnvironment().getConfiguration())) {
+            return scanner;
+        } else {
+            return User.runAsLoginUser(new PrivilegedExceptionAction<InternalScanner>() {
+                @Override public InternalScanner run() throws Exception {
+                    String tableName = c.getEnvironment().getRegion().getRegionInfo().getTable()
+                            .getNameAsString();
+                    Configuration conf = c.getEnvironment().getConfiguration();
+                    long maxLookbackInMillis =
+                            BaseScannerRegionObserverConstants.getMaxLookbackInMillis(conf);
+                    maxLookbackInMillis = CompactionScanner.getMaxLookbackInMillis(tableName,
+                            store.getColumnFamilyName(), maxLookbackInMillis);
+                    return new CompactionScanner(c.getEnvironment(), store, scanner,
+                            maxLookbackInMillis, false, true, null);
+                }
+            });
+        }
+    }
+
     @Override
     public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> c, Store store,
                                       InternalScanner scanner, ScanType scanType, CompactionLifeCycleTracker tracker,

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/MaxLookbackExtendedIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/MaxLookbackExtendedIT.java
@@ -610,6 +610,44 @@ public void testRetainingLastRowVersion() throws Exception {
         }
     }
 
+    @Test(timeout=60000)
+    public void testRetainingLastRowVersionForFlushes() throws Exception {
+        try(Connection conn = DriverManager.getConnection(getUrl())) {
+            String tableName = generateUniqueName();
+            createTable(tableName);
+            long timeIntervalBetweenTwoUpserts = (ttl / 2) + 1;
+            injectEdge.setValue(System.currentTimeMillis());
+            EnvironmentEdgeManager.injectEdge(injectEdge);
+            TableName dataTableName = TableName.valueOf(tableName);
+            injectEdge.incrementValue(1);
+            Statement stmt = conn.createStatement();
+            stmt.execute("upsert into " + tableName + " values ('a', 'ab', 'abc', 'abcd')");
+            conn.commit();
+            injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000);
+            stmt.execute("upsert into " + tableName + " values ('a', 'ab1')");
+            conn.commit();
+            injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000);
+            stmt.execute("upsert into " + tableName + " values ('a', 'ab2')");
+            conn.commit();
+            injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000);
+            stmt.execute("upsert into " + tableName + " values ('a', 'ab3')");
+            conn.commit();
+            injectEdge.incrementValue(MAX_LOOKBACK_AGE * 1000);
+            TestUtil.dumpTable(conn, dataTableName);
+            TestUtil.flush(utility, dataTableName);
+            injectEdge.incrementValue(1);
+            TestUtil.dumpTable(conn, dataTableName);
+            majorCompact(dataTableName);
+            injectEdge.incrementValue(1);
+            TestUtil.dumpTable(conn, dataTableName);
+            ResultSet rs = stmt.executeQuery("select * from " + dataTableName + " where id = 'a'");
+            while(rs.next()) {
+                assertNotNull(rs.getString(3));
+                assertNotNull(rs.getString(4));
+            }
+        }
+    }
+
     private void flush(TableName table) throws IOException {
         Admin admin = getUtility().getAdmin();
         admin.flush(table);

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableTTLIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/TableTTLIT.java
@@ -273,10 +273,12 @@ public void testMinorCompactionShouldNotRetainCellsWhenMaxLookbackIsDisabled()
                         Thread.sleep(1);
                     }
                     flush(TableName.valueOf(tableName));
-                    // Flushes dump and retain all the cells to HFile.
-                    // Doing MAX_COLUMN_INDEX + 1 to account for empty cells
-                    assertEquals(TestUtil.getRawCellCount(conn, TableName.valueOf(tableName), row),
-                            rowUpdateCounter * (MAX_COLUMN_INDEX + 1));
+                    // At every flush, extra cell versions should be removed.
+                    // MAX_COLUMN_INDEX table columns and one empty column will be retained for
+                    // each row version.
+                    int rawCellCount = TestUtil.getRawCellCount(
+                            conn, TableName.valueOf(tableName), row);
+                    assertEquals((i + 1) * (MAX_COLUMN_INDEX + 1) * versions, rawCellCount);
                 }
                 // Run one minor compaction (in case no minor compaction has happened yet)
                 TestUtil.minorCompact(utility, TableName.valueOf(tableName));