-
Notifications
You must be signed in to change notification settings - Fork 3.8k
refactor: adds StringColumnFormatSpec for string dimension configs #19258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
24a0826
e658c7b
2f4badc
450a968
ca5738c
96b22dc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,37 +23,21 @@ | |
| import com.fasterxml.jackson.annotation.JsonIgnore; | ||
| import com.fasterxml.jackson.annotation.JsonInclude; | ||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import org.apache.druid.error.DruidException; | ||
| import org.apache.druid.guice.BuiltInTypesModule; | ||
| import org.apache.druid.segment.DimensionHandler; | ||
| import org.apache.druid.segment.IndexSpec; | ||
| import org.apache.druid.segment.StringColumnFormatSpec; | ||
| import org.apache.druid.segment.StringDimensionHandler; | ||
| import org.apache.druid.segment.column.ColumnType; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.util.Objects; | ||
|
|
||
| public class StringDimensionSchema extends DimensionSchema | ||
| { | ||
| private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true; | ||
|
|
||
| @Nullable | ||
| public static Integer getDefaultMaxStringLength() | ||
| { | ||
| return BuiltInTypesModule.getMaxStringLength(); | ||
| } | ||
|
|
||
| @Nullable | ||
| private static Integer validateMaxStringLength(String name, @Nullable Integer maxStringLength) | ||
| { | ||
| if (maxStringLength != null && maxStringLength < 0) { | ||
| throw DruidException.forPersona(DruidException.Persona.USER) | ||
| .ofCategory(DruidException.Category.INVALID_INPUT) | ||
| .build("maxStringLength for column [%s] must be >= 0, got [%s]", name, maxStringLength); | ||
| } | ||
| return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); | ||
| } | ||
|
|
||
| @Nullable | ||
| private final Integer maxStringLength; | ||
| private final StringColumnFormatSpec columnFormatSpec; | ||
|
|
||
| @JsonCreator | ||
| public static StringDimensionSchema create(String name) | ||
|
|
@@ -66,11 +50,11 @@ public StringDimensionSchema( | |
| @JsonProperty("name") String name, | ||
| @JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling, | ||
| @JsonProperty("createBitmapIndex") Boolean createBitmapIndex, | ||
| @JsonProperty("maxStringLength") @Nullable Integer maxStringLength | ||
| @JsonProperty("columnFormatSpec") @Nullable StringColumnFormatSpec columnFormatSpec | ||
| ) | ||
| { | ||
| super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); | ||
| this.maxStringLength = validateMaxStringLength(name, maxStringLength); | ||
| this.columnFormatSpec = columnFormatSpec; | ||
| } | ||
|
|
||
| public StringDimensionSchema( | ||
|
|
@@ -87,12 +71,29 @@ public StringDimensionSchema(String name) | |
| this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null); | ||
| } | ||
|
|
||
| @Nullable | ||
| @JsonProperty | ||
| @JsonInclude(JsonInclude.Include.NON_NULL) | ||
| @Nullable | ||
| public Integer getMaxStringLength() | ||
| public StringColumnFormatSpec getColumnFormatSpec() | ||
| { | ||
| return maxStringLength; | ||
| return columnFormatSpec; | ||
| } | ||
|
|
||
| @Override | ||
| public DimensionSchema getEffectiveSchema(IndexSpec indexSpec) | ||
| { | ||
| // If there's no per-column or job-level string format config, nothing to resolve | ||
| if (columnFormatSpec == null && indexSpec.getStringColumnFormatSpec() == null) { | ||
| return this; | ||
| } | ||
| StringColumnFormatSpec effective = | ||
| StringColumnFormatSpec.getEffectiveFormatSpec(columnFormatSpec, indexSpec); | ||
| return new StringDimensionSchema( | ||
| getName(), | ||
| getMultiValueHandling(), | ||
| hasBitmapIndex(), | ||
| effective | ||
| ); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -117,6 +118,35 @@ public boolean canBeMultiValued() | |
| @Override | ||
| public DimensionHandler getDimensionHandler() | ||
| { | ||
| return new StringDimensionHandler(getName(), getMultiValueHandling(), hasBitmapIndex(), false, maxStringLength); | ||
| MultiValueHandling mvh = columnFormatSpec != null && columnFormatSpec.getMultiValueHandling() != null | ||
| ? columnFormatSpec.getMultiValueHandling() | ||
| : getMultiValueHandling(); | ||
| boolean bitmap = columnFormatSpec != null && columnFormatSpec.getIndexType() != null | ||
| ? columnFormatSpec.getIndexType().hasBitmapIndex() | ||
| : hasBitmapIndex(); | ||
| Integer maxStringLength = columnFormatSpec != null ? columnFormatSpec.getMaxStringLength() : null; | ||
|
||
| return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(Object o) | ||
| { | ||
| if (this == o) { | ||
| return true; | ||
| } | ||
| if (o == null || getClass() != o.getClass()) { | ||
| return false; | ||
| } | ||
| if (!super.equals(o)) { | ||
| return false; | ||
| } | ||
| StringDimensionSchema that = (StringDimensionSchema) o; | ||
| return Objects.equals(columnFormatSpec, that.columnFormatSpec); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() | ||
| { | ||
| return Objects.hash(super.hashCode(), columnFormatSpec); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,6 +75,8 @@ public static Builder builder() | |
| @Nullable | ||
| private final NestedCommonFormatColumnFormatSpec autoColumnFormatSpec; | ||
| @Nullable | ||
| private final StringColumnFormatSpec stringColumnFormatSpec; | ||
| @Nullable | ||
| private final CompressionStrategy metadataCompression; | ||
|
|
||
| /** | ||
|
|
@@ -109,6 +111,8 @@ public static Builder builder() | |
| * used to load the written segment | ||
| * @param autoColumnFormatSpec specify the default {@link NestedCommonFormatColumnFormatSpec} to use for json and | ||
| * auto columns. Defaults to null upon calling {@link #getEffectiveSpec()}. | ||
| * @param stringColumnFormatSpec specify the default {@link StringColumnFormatSpec} to use for string columns. | ||
| * Defaults to null upon calling {@link #getEffectiveSpec()}. | ||
| */ | ||
| @JsonCreator | ||
| public IndexSpec( | ||
|
|
@@ -121,7 +125,8 @@ public IndexSpec( | |
| @JsonProperty("complexMetricCompression") @Nullable CompressionStrategy complexMetricCompression, | ||
| @Deprecated @JsonProperty("jsonCompression") @Nullable CompressionStrategy jsonCompression, | ||
| @JsonProperty("segmentLoader") @Nullable SegmentizerFactory segmentLoader, | ||
| @JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec | ||
| @JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec, | ||
| @JsonProperty("stringColumnFormatSpec") @Nullable StringColumnFormatSpec stringColumnFormatSpec | ||
| ) | ||
| { | ||
| this.bitmapSerdeFactory = bitmapSerdeFactory; | ||
|
|
@@ -134,6 +139,7 @@ public IndexSpec( | |
| this.jsonCompression = jsonCompression; | ||
| this.segmentLoader = segmentLoader; | ||
| this.autoColumnFormatSpec = autoColumnFormatSpec; | ||
| this.stringColumnFormatSpec = stringColumnFormatSpec; | ||
| } | ||
|
|
||
| @JsonProperty("bitmap") | ||
|
|
@@ -212,6 +218,14 @@ public NestedCommonFormatColumnFormatSpec getAutoColumnFormatSpec() | |
| return autoColumnFormatSpec; | ||
| } | ||
|
|
||
| @JsonProperty | ||
| @JsonInclude(JsonInclude.Include.NON_NULL) | ||
| @Nullable | ||
| public StringColumnFormatSpec getStringColumnFormatSpec() | ||
| { | ||
| return stringColumnFormatSpec; | ||
| } | ||
|
|
||
| /** | ||
| * Populate all null fields of {@link IndexSpec}, first from {@link #getDefault()} and finally falling back to hard | ||
| * coded defaults if no overrides are defined. | ||
|
|
@@ -298,6 +312,16 @@ public IndexSpec getEffectiveSpec() | |
| ); | ||
| } | ||
|
|
||
| if (stringColumnFormatSpec != null) { | ||
| bob.withStringColumnFormatSpec( | ||
| StringColumnFormatSpec.getEffectiveFormatSpec(stringColumnFormatSpec, this) | ||
| ); | ||
| } else if (defaultSpec.stringColumnFormatSpec != null) { | ||
| bob.withStringColumnFormatSpec( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this needed? stringColumnFormatSpec is always null in the defaultSpec
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see a similar pattern for other properties but I don't think it's actually needed in this case
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. defautlSpec can be set in runtime.properties to set system level default overrides, so we want it to be like this i think |
||
| StringColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.stringColumnFormatSpec, this) | ||
| ); | ||
| } | ||
|
|
||
| return bob.build(); | ||
| } | ||
|
|
||
|
|
@@ -320,7 +344,8 @@ public boolean equals(Object o) | |
| Objects.equals(complexMetricCompression, indexSpec.complexMetricCompression) && | ||
| Objects.equals(jsonCompression, indexSpec.jsonCompression) && | ||
| Objects.equals(segmentLoader, indexSpec.segmentLoader) && | ||
| Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec); | ||
| Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec) && | ||
| Objects.equals(stringColumnFormatSpec, indexSpec.stringColumnFormatSpec); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -336,7 +361,8 @@ public int hashCode() | |
| complexMetricCompression, | ||
| jsonCompression, | ||
| segmentLoader, | ||
| autoColumnFormatSpec | ||
| autoColumnFormatSpec, | ||
| stringColumnFormatSpec | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -352,6 +378,7 @@ public String toString() | |
| ", longEncoding=" + longEncoding + | ||
| ", complexMetricCompression=" + complexMetricCompression + | ||
| ", autoColumnFormatSpec=" + autoColumnFormatSpec + | ||
| ", stringColumnFormatSpec=" + stringColumnFormatSpec + | ||
| ", jsonCompression=" + jsonCompression + | ||
| ", segmentLoader=" + segmentLoader + | ||
| '}'; | ||
|
|
@@ -379,6 +406,8 @@ public static class Builder | |
| private SegmentizerFactory segmentLoader; | ||
| @Nullable | ||
| private NestedCommonFormatColumnFormatSpec autoColumnFormatSpec; | ||
| @Nullable | ||
| private StringColumnFormatSpec stringColumnFormatSpec; | ||
|
|
||
| public Builder withBitmapSerdeFactory(@Nullable BitmapSerdeFactory bitmapSerdeFactory) | ||
| { | ||
|
|
@@ -441,6 +470,12 @@ public Builder withAutoColumnFormatSpec(@Nullable NestedCommonFormatColumnFormat | |
| return this; | ||
| } | ||
|
|
||
| public Builder withStringColumnFormatSpec(@Nullable StringColumnFormatSpec stringColumnFormatSpec) | ||
| { | ||
| this.stringColumnFormatSpec = stringColumnFormatSpec; | ||
| return this; | ||
| } | ||
|
|
||
| public IndexSpec build() | ||
| { | ||
| return new IndexSpec( | ||
|
|
@@ -453,7 +488,8 @@ public IndexSpec build() | |
| complexMetricCompression, | ||
| jsonCompression, | ||
| segmentLoader, | ||
| autoColumnFormatSpec | ||
| autoColumnFormatSpec, | ||
| stringColumnFormatSpec | ||
| ); | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: we can do a small refactor to only check if columnFormatSpec is null one time
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to null check columnFormatSpec once