-
Notifications
You must be signed in to change notification settings - Fork 1k
PHOENIX-7593: Enable CompactionScanner for flushes #2134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,6 +65,7 @@ | |
| import org.apache.phoenix.filter.RowKeyComparisonFilter.RowKeyTuple; | ||
| import org.apache.phoenix.jdbc.PhoenixConnection; | ||
| import org.apache.phoenix.jdbc.PhoenixPreparedStatement; | ||
| import org.apache.phoenix.query.QueryConstants; | ||
| import org.apache.phoenix.query.QueryServices; | ||
| import org.apache.phoenix.query.QueryServicesOptions; | ||
| import org.apache.phoenix.schema.CompiledConditionalTTLExpression; | ||
|
|
@@ -144,13 +145,13 @@ public class CompactionScanner implements InternalScanner { | |
| private final long maxLookbackInMillis; | ||
| private int minVersion; | ||
| private int maxVersion; | ||
| private final boolean emptyCFStore; | ||
| private boolean emptyCFStore; | ||
| private final boolean localIndex; | ||
| private final int familyCount; | ||
| private KeepDeletedCells keepDeletedCells; | ||
| private long compactionTime; | ||
| private final byte[] emptyCF; | ||
| private final byte[] emptyCQ; | ||
| private byte[] emptyCF; | ||
| private byte[] emptyCQ; | ||
| private final byte[] storeColumnFamily; | ||
| private final String tableName; | ||
| private final String columnFamilyName; | ||
|
|
@@ -180,13 +181,13 @@ public CompactionScanner(RegionCoprocessorEnvironment env, | |
| // Empty column family and qualifier are always needed to compute which all empty cells to retain | ||
| // even during minor compactions. If required empty cells are not retained during | ||
| // minor compactions then we can run into the risk of partial row expiry on next major compaction. | ||
| this.emptyCF = SchemaUtil.getEmptyColumnFamily(table); | ||
| this.emptyCQ = SchemaUtil.getEmptyColumnQualifier(table); | ||
| this.emptyCF = table != null ? SchemaUtil.getEmptyColumnFamily(table) : EMPTY_BYTE_ARRAY; | ||
| this.emptyCQ = table != null ? SchemaUtil.getEmptyColumnQualifier(table) : EMPTY_BYTE_ARRAY; | ||
| compactionTime = EnvironmentEdgeManager.currentTimeMillis(); | ||
| columnFamilyName = store.getColumnFamilyName(); | ||
| storeColumnFamily = columnFamilyName.getBytes(); | ||
| tableName = region.getRegionInfo().getTable().getNameAsString(); | ||
| String dataTableName = table.getName().toString(); | ||
| String dataTableName = table != null ? table.getName().toString() : ""; | ||
| Long overriddenMaxLookback = maxLookbackMap.get(tableName + SEPARATOR + columnFamilyName); | ||
| this.maxLookbackInMillis = overriddenMaxLookback == null ? | ||
| maxLookbackAgeInMillis : Math.max(maxLookbackAgeInMillis, overriddenMaxLookback); | ||
|
|
@@ -416,11 +417,32 @@ private void postProcessForConditionalTTL(List<Cell> result) { | |
| } | ||
| } | ||
|
|
||
| private void determineEmptyCfCq(List<Cell> result) { | ||
| for (Cell cell : result) { | ||
| emptyCF = CellUtil.cloneFamily(cell); | ||
| if(ScanUtil.isEmptyColumn(cell, emptyCF, QueryConstants.EMPTY_COLUMN_BYTES)) { | ||
| emptyCQ = QueryConstants.EMPTY_COLUMN_BYTES; | ||
| emptyCFStore = true; | ||
| break; | ||
| } //Empty column is always encoded in FOUR_BYTE format, since it's a reserved qualifier. See EncodedColumnsUtil#isReservedColumnQualifier | ||
| else if(ScanUtil.isEmptyColumn(cell, emptyCF, QueryConstants.ENCODED_EMPTY_COLUMN_BYTES)) { | ||
| emptyCQ = QueryConstants.ENCODED_EMPTY_COLUMN_BYTES; | ||
| emptyCFStore = true; | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public boolean next(List<Cell> result) throws IOException { | ||
| boolean hasMore = storeScanner.next(result); | ||
| inputCellCount += result.size(); | ||
| if (!result.isEmpty()) { | ||
| // This will happen only during flushes as then we don't pass PTable object | ||
| // to determine emptyCF and emptyCQ | ||
| if (emptyCQ == EMPTY_BYTE_ARRAY) { | ||
| determineEmptyCfCq(result); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For not emptycfstore aren't we doing this check on every row
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, will fix that. Thanks |
||
| } | ||
| // This is for debugging | ||
| // printRow(result, "Input for " + tableName + " " + columnFamilyName, true, false); | ||
| phoenixLevelRowCompactor.compact(result, false); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,7 @@ | |
| import org.apache.hadoop.hbase.io.ImmutableBytesWritable; | ||
| import org.apache.hadoop.hbase.ipc.RpcControllerFactory; | ||
| import org.apache.hadoop.hbase.ipc.controller.InterRegionServerIndexRpcControllerFactory; | ||
| import org.apache.hadoop.hbase.regionserver.FlushLifeCycleTracker; | ||
| import org.apache.hadoop.hbase.regionserver.InternalScanner; | ||
| import org.apache.hadoop.hbase.regionserver.MiniBatchOperationInProgress; | ||
| import org.apache.hadoop.hbase.regionserver.Region; | ||
|
|
@@ -611,6 +612,28 @@ private boolean areMutationsInSameTable(Table targetHTable, Region region) { | |
| region.getTableDescriptor().getTableName().getName()) == 0); | ||
| } | ||
|
|
||
| @Override | ||
| public InternalScanner preFlush(ObserverContext<RegionCoprocessorEnvironment> c, Store store, | ||
| InternalScanner scanner, FlushLifeCycleTracker tracker) throws IOException { | ||
| if (!isPhoenixTableTTLEnabled(c.getEnvironment().getConfiguration())) { | ||
|
||
| return scanner; | ||
| } else { | ||
| return User.runAsLoginUser(new PrivilegedExceptionAction<InternalScanner>() { | ||
| @Override public InternalScanner run() throws Exception { | ||
| String tableName = c.getEnvironment().getRegion().getRegionInfo().getTable() | ||
| .getNameAsString(); | ||
| Configuration conf = c.getEnvironment().getConfiguration(); | ||
| long maxLookbackInMillis = | ||
| BaseScannerRegionObserverConstants.getMaxLookbackInMillis(conf); | ||
| maxLookbackInMillis = CompactionScanner.getMaxLookbackInMillis(tableName, | ||
| store.getColumnFamilyName(), maxLookbackInMillis); | ||
| return new CompactionScanner(c.getEnvironment(), store, scanner, | ||
| maxLookbackInMillis, false, true, null); | ||
| } | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> c, Store store, | ||
| InternalScanner scanner, ScanType scanType, CompactionLifeCycleTracker tracker, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -610,6 +610,44 @@ public void testRetainingLastRowVersion() throws Exception { | |
| } | ||
| } | ||
|
|
||
| @Test(timeout=60000) | ||
| public void testRetainingLastRowVersionForFlushes() throws Exception { | ||
| try(Connection conn = DriverManager.getConnection(getUrl())) { | ||
| String tableName = generateUniqueName(); | ||
| createTable(tableName); | ||
| long timeIntervalBetweenTwoUpserts = (ttl / 2) + 1; | ||
| injectEdge.setValue(System.currentTimeMillis()); | ||
| EnvironmentEdgeManager.injectEdge(injectEdge); | ||
| TableName dataTableName = TableName.valueOf(tableName); | ||
| injectEdge.incrementValue(1); | ||
| Statement stmt = conn.createStatement(); | ||
| stmt.execute("upsert into " + tableName + " values ('a', 'ab', 'abc', 'abcd')"); | ||
| conn.commit(); | ||
| injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000); | ||
| stmt.execute("upsert into " + tableName + " values ('a', 'ab1')"); | ||
| conn.commit(); | ||
| injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000); | ||
| stmt.execute("upsert into " + tableName + " values ('a', 'ab2')"); | ||
| conn.commit(); | ||
| injectEdge.incrementValue(timeIntervalBetweenTwoUpserts * 1000); | ||
| stmt.execute("upsert into " + tableName + " values ('a', 'ab3')"); | ||
| conn.commit(); | ||
| injectEdge.incrementValue(MAX_LOOKBACK_AGE * 1000); | ||
| TestUtil.dumpTable(conn, dataTableName); | ||
| TestUtil.flush(utility, dataTableName); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use TestUtil.getRawCellCount and verify that extra row versions are removed. |
||
| injectEdge.incrementValue(1); | ||
| TestUtil.dumpTable(conn, dataTableName); | ||
| majorCompact(dataTableName); | ||
| injectEdge.incrementValue(1); | ||
| TestUtil.dumpTable(conn, dataTableName); | ||
| ResultSet rs = stmt.executeQuery("select * from " + dataTableName + " where id = 'a'"); | ||
| while(rs.next()) { | ||
| assertNotNull(rs.getString(3)); | ||
|
||
| assertNotNull(rs.getString(4)); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private void flush(TableName table) throws IOException { | ||
| Admin admin = getUtility().getAdmin(); | ||
| admin.flush(table); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -273,10 +273,12 @@ public void testMinorCompactionShouldNotRetainCellsWhenMaxLookbackIsDisabled() | |
| Thread.sleep(1); | ||
| } | ||
| flush(TableName.valueOf(tableName)); | ||
| // Flushes dump and retain all the cells to HFile. | ||
| // Doing MAX_COLUMN_INDEX + 1 to account for empty cells | ||
| assertEquals(TestUtil.getRawCellCount(conn, TableName.valueOf(tableName), row), | ||
| rowUpdateCounter * (MAX_COLUMN_INDEX + 1)); | ||
| // At every flush, extra cell versions should be removed. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to rename this test method to testMinorCompactionAndFlushShouldNotRetainCellsWhenMaxLookbackIsDisabled |
||
| // MAX_COLUMN_INDEX table columns and one empty column will be retained for | ||
| // each row version. | ||
| int rawCellCount = TestUtil.getRawCellCount( | ||
| conn, TableName.valueOf(tableName), row); | ||
| assertEquals((i + 1) * (MAX_COLUMN_INDEX + 1) * versions, rawCellCount); | ||
| } | ||
| // Run one minor compaction (in case no minor compaction has happened yet) | ||
| TestUtil.minorCompact(utility, TableName.valueOf(tableName)); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not keep emptyCF and emptyCQ as null if PTable is null, so that we can also incorporate this logic?
Instead of
this
and similarly,
if (emptyCQ == EMPTY_BYTE_ARRAY)too will be simple null check.I don't think EMPTY_BYTE_ARRAY is allowed as CF:CQ, but while debugging, null check will be more readable rather than using incorrect values of
emptyCFandemptyCQforScanUtil.isEmptyColumn?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By keeping
emptyCFandemptyCQvalues asnullare we trying to optimize theifcheck? I actually kept it empty byte array to avoid null handling and nothing will match empty byte array.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it, will change to storing null values. I agree this improves readability. Thanks