Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 40 additions & 15 deletions Sources/SkipLib/Skip/String.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: MPL-2.0
package skip.lib

import java.lang.Character
import java.util.Random

/// Allow Swift code to reference Substring type.
Expand Down Expand Up @@ -248,33 +249,57 @@ fun <RE> String.compactMap(transform: (Char) -> RE?): Array<RE> {
}
fun <RE> Substring.compactMap(transform: (Char) -> RE?): Array<RE> = stringValue.compactMap(transform)

fun String.split(separator: Char, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array<String> {
if (this.isEmpty()) return if (omittingEmptySubsequences) Array() else Array(listOf(""))
fun String.split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array<String> {
if (!omittingEmptySubsequences) {
val limit = if (maxSplits == Int.max) 0 else maxSplits + 1
return Array(split(separator, limit = limit))
}

if (separator.isEmpty()) {
return splitOnEmptyStringSeparatorIncludingEmptySubsequences(maxSplits)
}

val result = mutableListOf<String>()
var start = 0
var splits = 0

for (i in this.indices) {
if (this[i] == separator && splits < maxSplits) {
val part = this.substring(start, i)
if (!omittingEmptySubsequences || part.isNotEmpty()) {
result.add(part)
}
start = i + 1
while (splits < maxSplits) {
val found = indexOf(separator, start)
if (found < 0) break
val part = substring(start, found)
if (part.isNotEmpty()) {
result.add(part)
splits++
if (splits >= maxSplits) { break }
}
start = found + separator.length
}

val part = this.substring(start)
if (!omittingEmptySubsequences || part.isNotEmpty()) {
result.add(part)
val rest = substring(start)
if (rest.isNotEmpty()) {
result.add(rest)
}
return Array(result, nocopy = true)
}

// Ideally, this would use Kotlin split, but Kotlin split mangles emojis
private fun String.splitOnEmptyStringSeparatorIncludingEmptySubsequences(maxSplits: Int): Array<String> {
val result = mutableListOf<String>()
var remainder = this
var splits = 0
while (splits < maxSplits && remainder.isNotEmpty()) {
val cp = Character.codePointAt(remainder, 0)
val len = Character.charCount(cp)
val first = remainder.substring(0, len)
result.add(first)
remainder = remainder.substring(len)
splits++
}
if (remainder.isNotEmpty()) {
result.add(remainder)
}
return Array(result, nocopy = true)
}
fun Substring.split(separator: Char, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array<String> = stringValue.split(separator = separator, maxSplits = maxSplits, omittingEmptySubsequences = omittingEmptySubsequences)

fun Substring.split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Boolean = true): Array<String> = stringValue.split(separator = separator, maxSplits = maxSplits, omittingEmptySubsequences = omittingEmptySubsequences)

fun String.joined(): String = this
fun Substring.joined(): String = stringValue
Expand Down
4 changes: 2 additions & 2 deletions Sources/SkipLib/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public struct String: RandomAccessCollection {
}

// Support in String although it is not yet supported in Collection
public func split(separator: Character, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] {
public func split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] {
fatalError()
}

Expand Down Expand Up @@ -109,7 +109,7 @@ public struct Substring: RandomAccessCollection {
}

// Support in String although it is not yet supported in Collection
public func split(separator: Character, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] {
public func split(separator: String, maxSplits: Int = Int.max, omittingEmptySubsequences: Bool = true) -> [String] {
fatalError()
}
}
Expand Down
39 changes: 39 additions & 0 deletions Tests/SkipLibTests/StringTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,45 @@ import Testing
#expect(str2 == "ab++cd++efg++hi")
}

@Test func splitByString() {
// Round-trip: multi-character separator (Kotlin `indexOf` loop)
let joined = "ab++cd++efg++hi"
#expect(["ab", "cd", "efg", "hi"] == joined.split(separator: "++"))

// No match → single component
#expect(["hello"] == "hello".split(separator: "++"))

// Empty subject, default omitting empties
#expect(0 == "".split(separator: "::").count)

// maxSplits with multi-character separator
#expect(["a", "b++c"] == "a++b++c".split(separator: "++", maxSplits: 1))
#expect(["a++b++c"] == "a++b++c".split(separator: "++", maxSplits: 0))

// Consecutive separators → empty pieces when not omitted (Kotlin scans non-overlapping occurrences)
#expect(["a", "", "b"] == "a::::b".split(separator: "::", omittingEmptySubsequences: false))
#expect(["a", "b"] == "a::::b".split(separator: "::", omittingEmptySubsequences: true))
#expect(["a"] == "::::a".split(separator: "::", maxSplits: 1, omittingEmptySubsequences: true))

// Non-overlapping scan (separator does not skip past shared characters)
#expect(["bar", "baz"] == "foobarfoobaz".split(separator: "foo"))
#expect(["ba"] == "ababa".split(separator: "aba"))

// Empty separator: Kotlin `splitOnEmptyStringSeparator` (Unicode scalars; matches Swift for ASCII)
#expect(["a", "b", "cd"] == "abcd".split(separator: "", maxSplits: 2))
#expect(["", "a", "b"] == "ab".split(separator: "", maxSplits: 2, omittingEmptySubsequences: false))

// Non-ASCII: multi-scalar separator and content (UTF-16 `indexOf` / Kotlin `String.indexOf`)
#expect(["你好", "世界", "!"] == "你好++世界++!".split(separator: "++"))
#expect(["مرحبا", "بالعالم"] == "مرحبا##بالعالم".split(separator: "##"))
#expect(["a", "b", "c"] == "a∑b∑c".split(separator: "∑"))

// Supplementary-plane scalars (emoji): delimiter and segments are full scalar substrings
#expect(["😀", "🎉"] == "😀++🎉".split(separator: "++"))
#expect(["a", "b", "c"] == "a🙂b🙂c".split(separator: "🙂"))
#expect(["😀", "😀", "😀"] == "😀😀😀".split(separator: "", maxSplits: 2))
}

@Test func splitMax() {
let str = "ab,cd,efg,,hi"
#expect(["ab,cd,efg,,hi"] == str.split(separator: ",", maxSplits: 0))
Expand Down