diff --git a/Sources/SkipLib/Skip/String.kt b/Sources/SkipLib/Skip/String.kt index 0cffa95..ee2290b 100644 --- a/Sources/SkipLib/Skip/String.kt +++ b/Sources/SkipLib/Skip/String.kt @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MPL-2.0 package skip.lib +import java.lang.Character import java.util.Random /// Allow Swift code to reference Substring type. @@ -248,33 +249,57 @@ fun String.compactMap(transform: (Char) -> RE?): Array { } fun Substring.compactMap(transform: (Char) -> RE?): Array = stringValue.compactMap(transform) -fun String.split(separator: Char, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array { - if (this.isEmpty()) return if (omittingEmptySubsequences) Array() else Array(listOf("")) +fun String.split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array { + if (!omittingEmptySubsequences) { + val limit = if (maxSplits == Int.max) 0 else maxSplits + 1 + return Array(split(separator, limit = limit)) + } + + if (separator.isEmpty()) { + return splitOnEmptyStringSeparatorIncludingEmptySubsequences(maxSplits) + } val result = mutableListOf() var start = 0 var splits = 0 - for (i in this.indices) { - if (this[i] == separator && splits < maxSplits) { - val part = this.substring(start, i) - if (!omittingEmptySubsequences || part.isNotEmpty()) { - result.add(part) - } - start = i + 1 + while (splits < maxSplits) { + val found = indexOf(separator, start) + if (found < 0) break + val part = substring(start, found) + if (part.isNotEmpty()) { + result.add(part) splits++ - if (splits >= maxSplits) { break } } + start = found + separator.length } - - val part = this.substring(start) - if (!omittingEmptySubsequences || part.isNotEmpty()) { - result.add(part) + val rest = substring(start) + if (rest.isNotEmpty()) { + result.add(rest) } + return Array(result, nocopy = true) +} +// Ideally, this would use Kotlin split, but Kotlin split mangles emojis +private fun String.splitOnEmptyStringSeparatorIncludingEmptySubsequences(maxSplits: Int): Array { + val result = mutableListOf() + var remainder = this + var splits = 0 + while (splits < maxSplits && remainder.isNotEmpty()) { + val cp = Character.codePointAt(remainder, 0) + val len = Character.charCount(cp) + val first = remainder.substring(0, len) + result.add(first) + remainder = remainder.substring(len) + splits++ + } + if (remainder.isNotEmpty()) { + result.add(remainder) + } return Array(result, nocopy = true) } -fun Substring.split(separator: Char, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array = stringValue.split(separator = separator, maxSplits = maxSplits, omittingEmptySubsequences = omittingEmptySubsequences) + +fun Substring.split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array = stringValue.split(separator = separator, maxSplits = maxSplits, omittingEmptySubsequences = omittingEmptySubsequences) fun String.joined(): String = this fun Substring.joined(): String = stringValue diff --git a/Sources/SkipLib/String.swift b/Sources/SkipLib/String.swift index a6489ce..271730d 100644 --- a/Sources/SkipLib/String.swift +++ b/Sources/SkipLib/String.swift @@ -56,7 +56,7 @@ public struct String: RandomAccessCollection { } // Support in String although it is not yet supported in Collection - public func split(separator: Character, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] { + public func split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] { fatalError() } @@ -109,7 +109,7 @@ public struct Substring: RandomAccessCollection { } // Support in String although it is not yet supported in Collection - public func split(separator: Character, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] { + public func split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] { fatalError() } } diff --git a/Tests/SkipLibTests/StringTests.swift b/Tests/SkipLibTests/StringTests.swift index 01e839d..177e511 100644 --- a/Tests/SkipLibTests/StringTests.swift +++ b/Tests/SkipLibTests/StringTests.swift @@ -257,6 +257,45 @@ import Testing #expect(str2 == "ab++cd++efg++hi") } + @Test func splitByString() { + // Round-trip: multi-character separator (Kotlin `indexOf` loop) + let joined = "ab++cd++efg++hi" + #expect(["ab", "cd", "efg", "hi"] == joined.split(separator: "++")) + + // No match → single component + #expect(["hello"] == "hello".split(separator: "++")) + + // Empty subject, default omitting empties + #expect(0 == "".split(separator: "::").count) + + // maxSplits with multi-character separator + #expect(["a", "b++c"] == "a++b++c".split(separator: "++", maxSplits: 1)) + #expect(["a++b++c"] == "a++b++c".split(separator: "++", maxSplits: 0)) + + // Consecutive separators → empty pieces when not omitted (Kotlin scans non-overlapping occurrences) + #expect(["a", "", "b"] == "a::::b".split(separator: "::", omittingEmptySubsequences: false)) + #expect(["a", "b"] == "a::::b".split(separator: "::", omittingEmptySubsequences: true)) + #expect(["a"] == "::::a".split(separator: "::", maxSplits: 1, omittingEmptySubsequences: true)) + + // Non-overlapping scan (separator does not skip past shared characters) + #expect(["bar", "baz"] == "foobarfoobaz".split(separator: "foo")) + #expect(["ba"] == "ababa".split(separator: "aba")) + + // Empty separator: Kotlin `splitOnEmptyStringSeparator` (Unicode scalars; matches Swift for ASCII) + #expect(["a", "b", "cd"] == "abcd".split(separator: "", maxSplits: 2)) + #expect(["", "a", "b"] == "ab".split(separator: "", maxSplits: 2, omittingEmptySubsequences: false)) + + // Non-ASCII: multi-scalar separator and content (UTF-16 `indexOf` / Kotlin `String.indexOf`) + #expect(["你好", "世界", "!"] == "你好++世界++!".split(separator: "++")) + #expect(["مرحبا", "بالعالم"] == "مرحبا##بالعالم".split(separator: "##")) + #expect(["a", "b", "c"] == "a∑b∑c".split(separator: "∑")) + + // Supplementary-plane scalars (emoji): delimiter and segments are full scalar substrings + #expect(["😀", "🎉"] == "😀++🎉".split(separator: "++")) + #expect(["a", "b", "c"] == "a🙂b🙂c".split(separator: "🙂")) + #expect(["😀", "😀", "😀"] == "😀😀😀".split(separator: "", maxSplits: 2)) + } + @Test func splitMax() { let str = "ab,cd,efg,,hi" #expect(["ab,cd,efg,,hi"] == str.split(separator: ",", maxSplits: 0))