diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 0fb903ab441..5acbdd48df1 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -138,10 +138,22 @@ private void buildRunsInOrderFromXml(XmlObject object) { if (o instanceof CTSdtBlock) { XWPFSDT cc = new XWPFSDT((CTSdtBlock) o, part); iruns.add(cc); + CTSdtContentBlock content = ((CTSdtBlock)o).getSdtContent(); + if (content != null) { + for (CTP ctp : content.getPList()) { + processCTRs(ctp.getRList()); + } + } } if (o instanceof CTSdtRun) { - XWPFSDT cc = new XWPFSDT((CTSdtRun) o, part); + XWPFSDT cc = new XWPFSDT((CTSdtRun)o, part); iruns.add(cc); + + CTSdtContentRun sdtContent = ((CTSdtRun)o).getSdtContent(); + if (sdtContent != null) + { + processCTRs(sdtContent.getRList()); + } } if (o instanceof CTRunTrackChange) { final CTRunTrackChange parentRecord = (CTRunTrackChange) o; @@ -164,6 +176,17 @@ private void buildRunsInOrderFromXml(XmlObject object) { } } + private void processCTRs(List ctrs) { + if (ctrs == null) { + return; + } + for (CTR ctr : ctrs) { + if (ctr.getRPr() != null) { + runs.add(new XWPFRun(ctr, (IRunBody)this)); + } + } + } + @Internal public CTP getCTP() { return paragraph; diff --git a/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFTable.java index 33fc2a8d635..4f0e4d1a29c 100644 --- a/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFTable.java +++ b/poi-ooxml/src/main/java/org/apache/poi/xwpf/usermodel/XWPFTable.java @@ -29,11 +29,15 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.apache.poi.ooxml.util.POIXMLUnits; import org.apache.poi.util.Internal; import org.apache.poi.util.Units; +import org.apache.xmlbeans.XmlCursor; +import org.apache.xmlbeans.XmlObject; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJcTable; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRow; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRow; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblBorders; @@ -168,24 +172,59 @@ public XWPFTable(CTTbl table, IBody part, boolean initRow) { createEmptyTable(table); } - for (CTRow row : table.getTrList()) { - StringBuilder rowText = new StringBuilder(); - XWPFTableRow tabRow = new XWPFTableRow(row, this); - tableRows.add(tabRow); - for (CTTc cell : row.getTcList()) { - for (CTP ctp : cell.getPList()) { - XWPFParagraph p = new XWPFParagraph(ctp, part); - if (rowText.length() > 0) { - rowText.append('\t'); + try (XmlCursor cursor = table.newCursor()) { + cursor.selectPath("./*"); + while (cursor.toNextSelection()) { + XmlObject xmlObject = cursor.getObject(); + if (xmlObject instanceof CTRow) { + processCTRow((CTRow)xmlObject); + } + else if (xmlObject instanceof CTSdtRow) { + List rows = new ArrayList<>(); + collectCTRowsInnerSdtRow((CTSdtRow)xmlObject, rows); + for (CTRow row : rows) + { + processCTRow(row); } - rowText.append(p.getText()); } } - if (rowText.length() > 0) { - this.text.append(rowText); - this.text.append('\n'); + } + } + + private void processCTRow(CTRow row) { + StringBuilder rowText = new StringBuilder(); + XWPFTableRow tableRow = new XWPFTableRow(row, this); + tableRows.add(tableRow); + for (CTTc cell : row.getTcList()) { + for (CTP ctp : cell.getPList()) { + XWPFParagraph p = new XWPFParagraph(ctp, part); + if (rowText.length() > 0) { + rowText.append('\t'); + } + rowText.append(p.getText()); } } + if (rowText.length() > 0) { + this.text.append(rowText); + this.text.append('\n'); + } + } + + private void collectCTRowsInnerSdtRow(CTSdtRow sdtRow, List rows) { + CTSdtContentRow sdtContent = sdtRow.getSdtContent(); + if (sdtContent == null) { + return; + } + + List rowsInnerSdtContent = sdtContent.getTrList(); + if (!rowsInnerSdtContent.isEmpty()) { + rows.addAll(rowsInnerSdtContent); + return; + } + + for (CTSdtRow innerSdt : sdtContent.getSdtList()) { + collectCTRowsInnerSdtRow(innerSdt, rows); + } } private void createEmptyTable(CTTbl table) { diff --git a/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java b/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java index 5ef646a77cc..20cf7942cde 100644 --- a/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java +++ b/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFParagraph.java @@ -44,6 +44,10 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentBlock; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment; import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder; @@ -975,4 +979,155 @@ private static void checkSearchText(XWPFParagraph paragraph, String search, int assertEquals(beginChar, result.getBeginChar(), "beginChar"); assertEquals(endChar, result.getEndChar(), "endChar"); } + + @Test + void testParagraphWithSdtRunAndContent() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + XWPFParagraph p = doc.createParagraph(); + + CTP ctp = p.getCTP(); + + CTR normalRun = ctp.addNewR(); + normalRun.addNewT().setStringValue("Before "); + + CTSdtRun sdtRun = ctp.addNewSdt(); + CTSdtContentRun content = sdtRun.addNewSdtContent(); + + CTR innerRun = content.addNewR(); + innerRun.addNewRPr().addNewB().setVal(STOnOff1.ON); + innerRun.addNewT().setStringValue("SDT Run Content"); + + CTR innerRun2 = content.addNewR(); + innerRun2.addNewRPr().addNewI().setVal(STOnOff1.ON); + innerRun2.addNewT().setStringValue(" More"); + + CTR afterRun = ctp.addNewR(); + afterRun.addNewT().setStringValue(" After"); + + XWPFParagraph newParagraph = new XWPFParagraph(ctp, doc); + + // getRuns() includes: normal runs (always) + SDT content runs with RPr + // Before (1) + SDT Run Content (1) + More (1) + After (1) = 4 runs + List runs = newParagraph.getRuns(); + assertEquals(4, runs.size(), "Should have 4 runs (2 normal + 2 SDT with RPr)"); + + // getIRuns() includes SDT elements, but SDT runs from processCTRs are NOT in iruns + // Before (1) + SDT (1) + After (1) = 3 + List iruns = newParagraph.getIRuns(); + assertEquals(3, iruns.size(), "Should have 3 elements (SDT runs are not in iruns)"); + + // Verify text includes SDT content (text is also duplicated) + String text = newParagraph.getText(); + assertTrue(text.contains("Before"), "Text should contain 'Before'"); + assertTrue(text.contains("SDT Run Content"), "Text should contain 'SDT Run Content'"); + assertTrue(text.contains(" More"), "Text should contain ' More'"); + assertTrue(text.contains("After"), "Text should contain 'After'"); + } + } + + @Test + void testParagraphWithMultipleSdtRuns() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + XWPFParagraph p = doc.createParagraph(); + + CTP ctp = p.getCTP(); + + CTSdtRun sdtRun1 = ctp.addNewSdt(); + CTSdtContentRun content1 = sdtRun1.addNewSdtContent(); + CTR innerRun1 = content1.addNewR(); + innerRun1.addNewRPr().addNewB().setVal(STOnOff1.ON); + innerRun1.addNewT().setStringValue("First"); + + CTR normalRun = ctp.addNewR(); + normalRun.addNewT().setStringValue(" Middle "); + + CTSdtRun sdtRun2 = ctp.addNewSdt(); + CTSdtContentRun content2 = sdtRun2.addNewSdtContent(); + CTR innerRun2 = content2.addNewR(); + innerRun2.addNewRPr().addNewI().setVal(STOnOff1.ON); + innerRun2.addNewT().setStringValue("Second"); + + XWPFParagraph newParagraph = new XWPFParagraph(ctp, doc); + + // getRuns() includes: normal run (always) + SDT content runs with RPr + // First (1) + Middle (1) + Second (1) = 3 runs + List runs = newParagraph.getRuns(); + assertEquals(3, runs.size(), "Should have 3 runs (1 normal + 2 SDT with RPr)"); + + // getIRuns() includes SDT elements, but SDT runs from processCTRs are NOT in iruns + // SDT1 (1) + Middle (1) + SDT2 (1) = 3 (SDT runs are in runs, not iruns) + List iruns = newParagraph.getIRuns(); + assertEquals(3, iruns.size(), "Should have 3 elements (SDT runs are in runs, not iruns)"); + + // Verify text includes all content + String text = newParagraph.getText(); + assertTrue(text.contains("First"), "Text should contain 'First' from SDT run"); + assertTrue(text.contains("Middle"), "Text should contain 'Middle' from normal run"); + assertTrue(text.contains("Second"), "Text should contain 'Second' from SDT run"); + } + } + + /** + * Bug 66263 — Test SDT runs in paragraph using sample document. + * Verifies that SDT runs with and without RPr are processed correctly. + */ + @Test + void testSdtRunsFromSampleDocument() throws IOException { + try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug66263-paragraph.docx")) { + List paragraphs = doc.getParagraphs(); + + // Paragraph 1: SDT Run with RPr + XWPFParagraph paragraph1 = paragraphs.get(0); + String paragraph1Text = paragraph1.getText(); + assertTrue(paragraph1Text.contains("Before"), "Paragraph 1 should contain 'Before'"); + assertTrue(paragraph1Text.contains("SDT Run with RPr"), "Paragraph 1 should contain SDT run with RPr"); + assertTrue(paragraph1Text.contains("After"), "Paragraph 1 should contain 'After'"); + + // getRuns() contains: 2 normal runs (always added) + 1 SDT run (added because it has RPr) = 3 runs + List runs1 = paragraph1.getRuns(); + assertEquals(3, runs1.size(), "Paragraph 1 should have 3 runs (2 normal + 1 SDT with RPr)"); + assertEquals("Before ", runs1.get(0).toString(), "First run should be 'Before'"); + assertEquals("SDT Run with RPr", runs1.get(1).toString(), "Second run should be SDT content"); + assertEquals(" After", runs1.get(2).toString(), "Third run should be 'After'"); + + // getIRuns() should contain all elements including SDT + List iruns1 = paragraph1.getIRuns(); + assertEquals(3, iruns1.size(), "Paragraph 1 should have 3 elements in getIRuns() (2 normal runs + 1 SDT)"); + + // Paragraph 2: SDT Run without RPr + XWPFParagraph paragraph2 = paragraphs.get(1); + String paragraph2Text = paragraph2.getText(); + assertTrue(paragraph2Text.contains("Before No RPr"), "Paragraph 2 should contain 'Before No RPr'"); + assertTrue(paragraph2Text.contains("SDT Run without RPr"), "Paragraph 2 should contain SDT run without RPr"); + assertTrue(paragraph2Text.contains("After"), "Paragraph 2 should contain 'After'"); + + // getRuns() contains: 2 normal runs (always added) + 0 SDT runs (no RPr) = 2 runs + List runs2 = paragraph2.getRuns(); + assertEquals(2, runs2.size(), "Paragraph 2 should have 2 runs (normal runs only, no SDT runs without RPr)"); + assertEquals("Before No RPr ", runs2.get(0).toString(), "First run should be 'Before No RPr'"); + assertEquals(" After", runs2.get(1).toString(), "Second run should be 'After'"); + + // getIRuns() should still contain all elements + List iruns2 = paragraph2.getIRuns(); + assertEquals(3, iruns2.size(), "Paragraph 2 should have 3 elements in getIRuns() (2 normal runs + 1 SDT)"); + + // Paragraph 3: Multiple SDT Runs + XWPFParagraph paragraph3 = paragraphs.get(2); + String paragraph3Text = paragraph3.getText(); + assertTrue(paragraph3Text.contains("First"), "Paragraph 3 should contain 'First' from first SDT"); + assertTrue(paragraph3Text.contains("Middle"), "Paragraph 3 should contain 'Middle'"); + assertTrue(paragraph3Text.contains("Second"), "Paragraph 3 should contain 'Second' from second SDT"); + + // getRuns() contains: 1 normal run (always added) + 2 SDT runs (both have RPr) = 3 runs + List runs3 = paragraph3.getRuns(); + assertEquals(3, runs3.size(), "Paragraph 3 should have 3 runs (1 normal + 2 SDT runs with RPr)"); + assertTrue(runs3.get(0).toString().contains("First"), "First run should contain 'First'"); + assertEquals(" Middle ", runs3.get(1).toString(), "Second run should be 'Middle'"); + assertTrue(runs3.get(2).toString().contains("Second"), "Third run should contain 'Second'"); + + // getIRuns() should contain 3 elements (2 SDTs + 1 normal run) + List iruns3 = paragraph3.getIRuns(); + assertEquals(3, iruns3.size(), "Paragraph 3 should have 3 elements in getIRuns() (2 SDTs + 1 normal run)"); + } + } } diff --git a/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFTable.java b/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFTable.java index 8fe1f303c95..dccd5d71e8d 100644 --- a/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFTable.java +++ b/poi-ooxml/src/test/java/org/apache/poi/xwpf/usermodel/TestXWPFTable.java @@ -28,6 +28,8 @@ Licensed to the Apache Software Foundation (ASF) under one or more import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRow; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRow; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblBorders; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblCellMar; @@ -662,4 +664,155 @@ public void testGetTableWidthIfNotPresent() throws Exception { assertEquals(TableWidthType.AUTO, table1.getWidthType()); } } + + @Test + void testTableWithSdtRow() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + CTTbl table = CTTbl.Factory.newInstance(); + + CTRow normalRow = table.addNewTr(); + CTTc cell1 = normalRow.addNewTc(); + CTP p1 = cell1.addNewP(); + CTR r1 = p1.addNewR(); + r1.addNewT().setStringValue("Normal Row Cell 1"); + + CTTc cell2 = normalRow.addNewTc(); + CTP p2 = cell2.addNewP(); + CTR r2 = p2.addNewR(); + r2.addNewT().setStringValue("Normal Row Cell 2"); + + CTSdtRow sdtRow = table.addNewSdt(); + CTSdtContentRow sdtContent = sdtRow.addNewSdtContent(); + CTRow innerRow = sdtContent.addNewTr(); + + CTTc sdtCell1 = innerRow.addNewTc(); + CTP sdtP1 = sdtCell1.addNewP(); + CTR sdtR1 = sdtP1.addNewR(); + sdtR1.addNewT().setStringValue("SDT Row Cell 1"); + + CTTc sdtCell2 = innerRow.addNewTc(); + CTP sdtP2 = sdtCell2.addNewP(); + CTR sdtR2 = sdtP2.addNewR(); + sdtR2.addNewT().setStringValue("SDT Row Cell 2"); + + XWPFTable xtab = new XWPFTable(table, doc); + + assertEquals(1, xtab.getNumberOfRows(), "Table should have 1 row at top level (SDT rows are not counted)"); + + String text = xtab.getText(); + assertTrue(text.contains("Normal Row Cell 1"), "Text should contain normal row cell 1"); + assertTrue(text.contains("Normal Row Cell 2"), "Text should contain normal row cell 2"); + assertTrue(text.contains("SDT Row Cell 1"), "Text should contain SDT row cell 1"); + assertTrue(text.contains("SDT Row Cell 2"), "Text should contain SDT row cell 2"); + } + } + + @Test + void testTableWithNestedSdtRows() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + CTTbl table = CTTbl.Factory.newInstance(); + + CTSdtRow outerSdtRow = table.addNewSdt(); + CTSdtContentRow outerContent = outerSdtRow.addNewSdtContent(); + + CTSdtRow innerSdtRow = outerContent.addNewSdt(); + CTSdtContentRow innerContent = innerSdtRow.addNewSdtContent(); + CTRow row1 = innerContent.addNewTr(); + + CTTc cell1 = row1.addNewTc(); + CTP p1 = cell1.addNewP(); + CTR r1 = p1.addNewR(); + r1.addNewT().setStringValue("Nested SDT Row"); + + XWPFTable xtab = new XWPFTable(table, doc); + + String text = xtab.getText(); + assertTrue(text.contains("Nested SDT Row"), "Text should contain nested SDT row content"); + } + } + + + @Test + void testTableWithOnlySdtRow() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + CTTbl table = CTTbl.Factory.newInstance(); + + CTSdtRow sdtRow = table.addNewSdt(); + CTSdtContentRow sdtContent = sdtRow.addNewSdtContent(); + CTRow innerRow = sdtContent.addNewTr(); + + CTTc cell = innerRow.addNewTc(); + CTP p = cell.addNewP(); + CTR r = p.addNewR(); + r.addNewT().setStringValue("Only SDT Row"); + + XWPFTable xtab = new XWPFTable(table, doc); + + assertEquals(1, xtab.getNumberOfRows(), "Table has 1 row from createEmptyTable"); + assertEquals(2, xtab.getRows().size(), "Table should have 2 rows (1 empty + 1 SDT)"); + + String text = xtab.getText(); + assertTrue(text.contains("Only SDT Row"), "Text should contain SDT row content"); + } + } + + @Test + void testTableWithMultipleSdtRows() throws IOException { + try (XWPFDocument doc = new XWPFDocument()) { + CTTbl table = CTTbl.Factory.newInstance(); + + // First SDT row + CTSdtRow sdtRow1 = table.addNewSdt(); + CTSdtContentRow sdtContent1 = sdtRow1.addNewSdtContent(); + CTRow row1 = sdtContent1.addNewTr(); + CTTc cell1 = row1.addNewTc(); + CTP p1 = cell1.addNewP(); + CTR r1 = p1.addNewR(); + r1.addNewT().setStringValue("SDT Row 1"); + + // Second SDT row + CTSdtRow sdtRow2 = table.addNewSdt(); + CTSdtContentRow sdtContent2 = sdtRow2.addNewSdtContent(); + CTRow row2 = sdtContent2.addNewTr(); + CTTc cell2 = row2.addNewTc(); + CTP p2 = cell2.addNewP(); + CTR r2 = p2.addNewR(); + r2.addNewT().setStringValue("SDT Row 2"); + + XWPFTable xtab = new XWPFTable(table, doc); + + assertEquals(1, xtab.getNumberOfRows(), "Table has 1 top-level row (empty + 2 SDTs)"); + assertEquals(3, xtab.getRows().size(), "Table should have 3 rows (empty + 2 SDTs)"); + + String text = xtab.getText(); + assertTrue(text.contains("SDT Row 1"), "Text should contain first SDT row"); + assertTrue(text.contains("SDT Row 2"), "Text should contain second SDT row"); + } + } + + /** + * Bug 66263 — Test SDT row support using sample document. + * Verifies that SDT rows from sample document are correctly processed. + */ + @Test + void testSdtRowFromSampleDocument() throws IOException { + try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug66263-table.docx")) { + XWPFTable table = doc.getTables().get(0); + + // Verify SDT row text extraction + String tableText = table.getText(); + assertTrue(tableText.contains("SDT Cell 1"), "Table should contain SDT cell 1"); + assertTrue(tableText.contains("SDT Cell 2"), "Table should contain SDT cell 2"); + + // Verify SDT row is accessible via getRows() + List rows = table.getRows(); + assertEquals(1, rows.size(), "Table should have 1 SDT row"); + + // Verify paragraph processing in table cells + XWPFParagraph cellPara = rows.get(0).getCell(0).getParagraphs().get(0); + assertNotNull(cellPara, "Cell paragraph should not be null"); + String paraText = cellPara.getText(); + assertTrue(paraText.contains("SDT Cell 1"), "Cell paragraph should contain SDT cell text"); + } + } } diff --git a/test-data/document/Bug66263-paragraph.docx b/test-data/document/Bug66263-paragraph.docx new file mode 100644 index 00000000000..e1b942b9ce0 Binary files /dev/null and b/test-data/document/Bug66263-paragraph.docx differ diff --git a/test-data/document/Bug66263-table.docx b/test-data/document/Bug66263-table.docx new file mode 100644 index 00000000000..4964164911f Binary files /dev/null and b/test-data/document/Bug66263-table.docx differ