Skip to content
This repository was archived by the owner on Jun 29, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
90303fa
updated poi-ooxml
Sep 25, 2019
999647c
refactored [tabs] to [spaces]
Sep 26, 2019
5a75486
resolved review comments
Sep 26, 2019
b82e728
resolved review comments part 1
SociopathicPixel Sep 30, 2019
8c21387
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
8919900
+ minor fix which sneaked in when reverting few changes.
SociopathicPixel Sep 30, 2019
aae088f
removed empty spaces in whitelines
SociopathicPixel Sep 30, 2019
7ae1b26
added a test, however datatypes are not found...
SociopathicPixel Sep 30, 2019
77a1b80
added test case for datatypes
Oct 1, 2019
84c6f06
fixed another test that fel over
Oct 1, 2019
b4d9a20
commit part 1; did some indentation fixes
Oct 2, 2019
1a34285
commit part 1.01; did some indentation fixes
Oct 2, 2019
3785c91
resolving review comments
Oct 3, 2019
66749e9
Merge branch 'dev/add-datatypes-to-excel-columns' of https://github.c…
SociopathicPixel Oct 4, 2019
a8ab4b6
resolving indentation filler
SociopathicPixel Oct 4, 2019
57108bc
revert all code style changes
Oct 7, 2019
138b147
wrote some tests, added update check
Oct 8, 2019
5088744
fixed assert that was set wrong
SociopathicPixel Oct 8, 2019
4bfcc41
resolving review comments 1 of many
Oct 14, 2019
0701bab
resolving review comments
Oct 16, 2019
9c3976e
resolving review comments, still there are a few thingies that could …
Oct 23, 2019
cc85725
still need to pull apache/master into this branch
SociopathicPixel Oct 27, 2019
23a7781
resolving review comments, not finished yet
Oct 29, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion excel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ under the License.
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.1</version>
<version>4.1.0</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
Expand All @@ -63,14 +65,13 @@ public DefaultSpreadsheetReaderDelegate(Resource resource, ExcelConfiguration co
_configuration = configuration;
}

@Override
public Schema createSchema(String schemaName) {
final MutableSchema schema = new MutableSchema(schemaName);
final Workbook wb = ExcelUtils.readWorkbook(_resource, true);
try {
for (int i = 0; i < wb.getNumberOfSheets(); i++) {
final Sheet currentSheet = wb.getSheetAt(i);
final MutableTable table = createTable(wb, currentSheet);
final MutableTable table = createTable(wb, currentSheet, _configuration.isValidateColumnTypes());
table.setSchema(schema);
schema.addTable(table);
}
Expand Down Expand Up @@ -103,7 +104,7 @@ public void notifyTablesModified() {
// do nothing
}

private MutableTable createTable(final Workbook wb, final Sheet sheet) {
private MutableTable createTable(final Workbook wb, final Sheet sheet, boolean validateColumnTypes) {
final MutableTable table = new MutableTable(sheet.getSheetName(), TableType.TABLE);

if (sheet.getPhysicalNumberOfRows() <= 0) {
Expand All @@ -124,11 +125,14 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
while (row == null && rowIterator.hasNext()) {
row = rowIterator.next();
}

} else {
row = rowIterator.next();
}

final int columnNameLineNumber = _configuration.getColumnNameLineNumber();
final ColumnType[] columnTypes = getColumnTypes(sheet, row);

if (columnNameLineNumber == ExcelConfiguration.NO_COLUMN_NAME_LINE) {

// get to the first non-empty line (no matter if lines are skipped
Expand All @@ -148,8 +152,17 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {

for (int j = offset; j < row.getLastCellNum(); j++) {
final ColumnNamingContext namingContext = new ColumnNamingContextImpl(table, null, j);
final Column column = new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
ColumnType.STRING, table, j, true);
final Column column;
if (validateColumnTypes) {

column =
new MutableColumn(columnNamingSession.getNextColumnName(namingContext), columnTypes[j],
table, j, true);
} else {
column =
new MutableColumn(columnNamingSession.getNextColumnName(namingContext),
ColumnType.STRING, table, j, true);
}
table.addColumn(column);
}
}
Expand All @@ -169,21 +182,84 @@ private MutableTable createTable(final Workbook wb, final Sheet sheet) {
}

if (hasColumns) {
createColumns(table, wb, row);
createColumns(table, wb, row, columnTypes);
}
}

return table;
}

private ColumnType[] getColumnTypes(final Sheet sheet, final Row row) {
final Iterator<Row> data = ExcelUtils.getRowIterator(sheet, _configuration, false);
final int rowLength = row.getLastCellNum();
int eagerness = ExcelConfiguration.EAGER_READ;
final ColumnType[] columnTypes = new ColumnType[rowLength];

while (data.hasNext() && eagerness-- > 0) {
final Row currentRow = data.next();
if (currentRow.getRowNum() < _configuration.getColumnNameLineNumber()) {
continue;
}
for (int index = 0; index < rowLength; index++) {
if (currentRow.getLastCellNum() == 0) {
continue;
}
columnTypes[index] = getColumnTypeFromRow(columnTypes[index], currentRow, index);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like it better if you don't pass the current value of the columnType at the current index into the getColumnTypeFromRow method. I would move the logic from the checkColumnType(ColumnType, ColumnType) here, so that method can be removed and the getColumnTypeFromRow` just returns the ColumnType it determines for its inspected cell.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made a comment about this earlier on which I didn't get an answer. The logic in checkColumnType(expected, columnType) gets used multiple times in this method so why not reuse the code in a seperate method like now?

}
}
return columnTypes;
}

private ColumnType getColumnTypeFromRow(final ColumnType columnType, final Row currentRow, int index) {
if (currentRow.getCell(index) == null) {
return checkColumnType(ColumnType.STRING, columnType);
} else {
CellType cellType = currentRow.getCell(index).getCellType();
switch (cellType) {
case NUMERIC:
if (DateUtil.isCellDateFormatted(currentRow.getCell(index))) {
return checkColumnType(ColumnType.DATE, columnType);
} else {
return checkColumnType((currentRow.getCell(index).getNumericCellValue() % 1 == 0)
? ColumnType.INTEGER : ColumnType.DOUBLE, columnType);
}
case BOOLEAN:
return checkColumnType(ColumnType.BOOLEAN, columnType);
case ERROR:
// fall through
case _NONE:
// fall through
case STRING:
// fall through
case FORMULA:
// fall through
case BLANK:
// fall through
default :
return checkColumnType(ColumnType.STRING, columnType);
}
}
}

private ColumnType checkColumnType(final ColumnType expecetedColumnType, ColumnType columnType) {
if (columnType != null) {
if (!columnType.equals(ColumnType.STRING) && !columnType.equals(expecetedColumnType)) {
return ColumnType.VARCHAR;
}
} else {
return expecetedColumnType;
}
return columnType;
}

/**
* Builds columns based on row/cell values.
*
* @param table
* @param wb
* @param row
*/
private void createColumns(MutableTable table, Workbook wb, Row row) {
private void createColumns(final MutableTable table, final Workbook wb, final Row row, final ColumnType[] columTypes) {
if (row == null) {
logger.warn("Cannot create columns based on null row!");
return;
Expand All @@ -201,7 +277,12 @@ private void createColumns(MutableTable table, Workbook wb, Row row) {
final ColumnNamingContext columnNamingContext = new ColumnNamingContextImpl(table, intrinsicColumnName,
j);
final String columnName = columnNamingSession.getNextColumnName(columnNamingContext);
final Column column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
final Column column;
if (columTypes == null) {
column = new MutableColumn(columnName, ColumnType.VARCHAR, table, j, true);
} else {
column = new MutableColumn(columnName, columTypes[j], table, j, true);
}
table.addColumn(column);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,38 @@ public final class ExcelConfiguration extends BaseObject implements

public static final int NO_COLUMN_NAME_LINE = 0;
public static final int DEFAULT_COLUMN_NAME_LINE = 1;
public static final int EAGER_READ = 1000;

private final int columnNameLineNumber;
private final ColumnNamingStrategy columnNamingStrategy;
private final boolean skipEmptyLines;
private final boolean skipEmptyColumns;
private final boolean validateColumnTypes;

public ExcelConfiguration() {
this(DEFAULT_COLUMN_NAME_LINE, true, false);
}

public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns) {
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns);
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, false);
}

public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy,
Boolean skipEmptyLines, Boolean skipEmptyColumns) {
this(columnNameLineNumber, columnNamingStrategy, skipEmptyLines, skipEmptyColumns, false);
}

public ExcelConfiguration(int columnNameLineNumber, boolean skipEmptyLines, boolean skipEmptyColumns, boolean validateColumnTypes) {
this(columnNameLineNumber, null, skipEmptyLines, skipEmptyColumns, validateColumnTypes);
}

public ExcelConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy,
boolean skipEmptyLines, boolean skipEmptyColumns) {
boolean skipEmptyLines, boolean skipEmptyColumns, boolean validateColumnTypes) {
this.columnNameLineNumber = columnNameLineNumber;
this.skipEmptyLines = skipEmptyLines;
this.skipEmptyColumns = skipEmptyColumns;
this.columnNamingStrategy = columnNamingStrategy;
this.validateColumnTypes = validateColumnTypes;
}

/**
Expand Down Expand Up @@ -102,17 +114,29 @@ public boolean isSkipEmptyColumns() {
return skipEmptyColumns;
}

/**
* Defines if columns in the excel spreadsheet should be validated on datatypes while
* reading the spreadsheet.
*
* @return a boolean indicating whether or not to validate column types.
*/
public boolean isValidateColumnTypes() {
return validateColumnTypes;
}

@Override
protected void decorateIdentity(List<Object> identifiers) {
identifiers.add(columnNameLineNumber);
identifiers.add(skipEmptyLines);
identifiers.add(skipEmptyColumns);
identifiers.add(validateColumnTypes);
}

@Override
public String toString() {
return "ExcelConfiguration[columnNameLineNumber="
+ columnNameLineNumber + ", skipEmptyLines=" + skipEmptyLines
+ ", skipEmptyColumns=" + skipEmptyColumns + "]";
+ ", skipEmptyColumns=" + skipEmptyColumns +", validateColumnTypes="
+ validateColumnTypes + "]";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class ExcelConfigurationTest extends TestCase {
public void testToString() throws Exception {
ExcelConfiguration conf = new ExcelConfiguration(1, true, false);
assertEquals(
"ExcelConfiguration[columnNameLineNumber=1, skipEmptyLines=true, skipEmptyColumns=false]",
"ExcelConfiguration[columnNameLineNumber=1, skipEmptyLines=true, skipEmptyColumns=false, validateColumnTypes=false]",
conf.toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,10 @@ public void testMissingValues() throws Exception {
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
assertEquals("[Column[name=a,columnNumber=0,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=c,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
+ "Column[name=c,columnNumber=2,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand All @@ -419,17 +419,31 @@ public void testMissingValues() throws Exception {
assertFalse(ds.next());
}

public void testDifferentDataTypes() throws Exception {
DataContext dc = new ExcelDataContext(copyOf("src/test/resources/different_datatypes.xls"),
new ExcelConfiguration(ExcelConfiguration.DEFAULT_COLUMN_NAME_LINE, true, false, true));

Schema schema = dc.getDefaultSchema();
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=INTEGER,columnNumber=0,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=TEXT,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=FORMULA,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));
}

public void testMissingColumnHeader() throws Exception {
File file = copyOf("src/test/resources/xls_missing_column_header.xls");
DataContext dc = new ExcelDataContext(file);
Schema schema = dc.getDefaultSchema();
assertEquals(2, schema.getTableCount());

Table table = schema.getTables().get(0);
assertEquals("[Column[name=a,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
assertEquals("[Column[name=a,columnNumber=0,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=b,columnNumber=1,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=A,columnNumber=2,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=d,columnNumber=3,type=INTEGER,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));

Query q = new Query().select(table.getColumns()).from(table);
Expand Down Expand Up @@ -532,11 +546,10 @@ public void testTicket99defect() throws Exception {

Table table = schema.getTableByName("Sheet1");
assertEquals(

"[Column[name=Pkg No.,columnNumber=0,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=VARCHAR,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=VARCHAR,nullable=true,nativeType=null,columnSize=null]]",
"[Column[name=Pkg No.,columnNumber=0,type=INTEGER,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Description,columnNumber=1,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Room,columnNumber=2,type=STRING,nullable=true,nativeType=null,columnSize=null], "
+ "Column[name=Level,columnNumber=3,type=STRING,nullable=true,nativeType=null,columnSize=null]]",
Arrays.toString(table.getColumns().toArray()));
}

Expand Down
Binary file added excel/src/test/resources/different_datatypes.xls
Binary file not shown.