From 7bbcb3974e1cd45512cdda29a8ef01ad74d0f106 Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:35:00 +0000 Subject: [PATCH] [SPARK-56165][PYTHON][TESTS] Use temporary directory instead of saveAsTable in test_data_source_segfault ### What changes were proposed in this pull request? Replace `saveAsTable("test_table")` with `.save(tempdir)` in `test_data_source_segfault`'s two write/commit subtests. ### Why are the changes needed? `saveAsTable()` triggers catalog format validation which can reject the custom `"test"` data source format. Using `.save(tempdir)` avoids catalog validation while exercising the same Python worker write/commit code paths. This is also consistent with how other write tests in the same file are structured. ### Does this PR introduce _any_ user-facing change? No. Test-only change. ### How was this patch tested? `python/pyspark/sql/tests/test_python_datasource.py::test_data_source_segfault` (12 subtests passed). ### Was this patch authored or co-authored using generative AI tooling? No. --- python/pyspark/sql/tests/test_python_datasource.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/tests/test_python_datasource.py b/python/pyspark/sql/tests/test_python_datasource.py index 1bdb7a5395e1b..a2d18dd7b8161 100644 --- a/python/pyspark/sql/tests/test_python_datasource.py +++ b/python/pyspark/sql/tests/test_python_datasource.py @@ -919,10 +919,9 @@ def write(self, iterator): self.spark.dataSource.register(TestDataSource) - with self.assertRaisesRegex(Exception, expected): - self.spark.range(10).write.format("test").mode("append").saveAsTable( - "test_table" - ) + with tempfile.TemporaryDirectory(prefix="test_segfault_") as d: + with self.assertRaisesRegex(Exception, expected): + self.spark.range(10).write.format("test").mode("append").save(d) with self.subTest(worker="pyspark.sql.worker.commit_data_source_write"): @@ -943,10 +942,9 @@ def commit(self, messages): self.spark.dataSource.register(TestDataSource) - with self.assertRaisesRegex(Exception, expected): - self.spark.range(10).write.format("test").mode("append").saveAsTable( - "test_table" - ) + with tempfile.TemporaryDirectory(prefix="test_segfault_") as d: + with self.assertRaisesRegex(Exception, expected): + self.spark.range(10).write.format("test").mode("append").save(d) @unittest.skipIf(is_remote_only(), "Requires JVM access") def test_data_source_reader_with_logging(self):