Split text into an array while maintaining punctuation in Swift

I want to split the text into an array, keeping the punctuation separated by the rest of the words, so a line like:

Hello, I am Albert Einstein.

should turn into such an array:

["Hello", ",", "I", "am", "Albert", "Einstein", "."]

I tried with sting.components(separatedBy: CharacterSet.init(charactersIn: " ,;;:")), but this method removes all punctuation and returns an array like this:

["Hello", "I", "am", "Albert", "Einstein"]

So, how can I get an array, as my first example?

+4
source share
2 answers

This is not beautiful as a solution, but you can try:

var str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
//enumerate to get all characters including ".", ",", ";", " "
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
    if let _subString = substring {
        if (!currentSubString.isEmpty &&
            (_subString.compare(" ") == .orderedSame
                || _subString.compare(",") == .orderedSame
                || _subString.compare(".") == .orderedSame
                || _subString.compare(";") == .orderedSame
            )
            ) {
            //create word if see any of those character and currentSubString is not empty
            list.append(currentSubString)
            currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces )
        } else {
            //add to current sub string if current character is not space.
            if (_subString.compare(" ") != .orderedSame) {
                currentSubString += _subString
            }
        }
    }
}


//last word
if (!currentSubString.isEmpty) {
    list.append(currentSubString)
}

In Swift3:

var str = "Hello, I am Albert Einstein."
var list = [String]()
var currentSubString = "";
//enumerate to get all characters including ".", ",", ";", " "
str.enumerateSubstrings(in: str.startIndex..<str.endIndex, options: String.EnumerationOptions.byComposedCharacterSequences) { (substring, substringRange, enclosingRange, value) in
    if let _subString = substring {
        if (!currentSubString.isEmpty &&
            (_subString.compare(" ") == .orderedSame
                || _subString.compare(",") == .orderedSame
                || _subString.compare(".") == .orderedSame
                || _subString.compare(";") == .orderedSame
            )
            ) {
            //create word if see any of those character and currentSubString is not empty
            list.append(currentSubString)
            currentSubString = _subString.trimmingCharacters(in: CharacterSet.whitespaces )
        } else {
            //add to current sub string if current character is not space.
            if (_subString.compare(" ") != .orderedSame) {
                currentSubString += _subString
            }
        }
    }
} 


//last word
if (!currentSubString.isEmpty) {
    list.append(currentSubString)
}

, . , , ,, . ;. , , , . :

  • get H ( ) - > currentSubString = "H"
  • get e ( ) - > currentSubString = "He"
  • get l ( ) - > currentSubString = "Hel"
  • get l ( ) - > currentSubString = ""
  • get o ( ) - > currentSubString = "Hello"
  • get . ( )
    • → currentSubString , list , list = [ "Hello" ]
    • → currentSubString = "." (, , - , , .
  • get ( )
    • → currentSubString , list → list = [ "Hello" , "." ]
    • → currentSubString = "" (). ... ..
+2

... . ( ) ( ) .

( ), : ([,\.\:\"])*([A-Za-z0-9\']*)([,\.\:\"])*

3 groups. (, ). - , ( , "" ). .

, : (), [] . , , [A-Z] , A-Z . [A-Za-z] , , [A-Za-z0-9] 0 9. , , , .

, , , - :

func find(value: NSString) throws -> [NSString] {
    let regex = try NSRegularExpression(pattern: "([,\\.\\:\\\"])*([A-Za-z0-9\\']*)([,\\.\\:\\\"])*") // Notice you have to escape the values in code
    let results = regex.matches(in: value, range: NSRange(location: 0, length: nsString.length))
    return results.map({ value.substring(with: $0.range) }).filter({ $0 != nil })
}

- , String, .

, , , Swift .

...

~

0

Source: https://habr.com/ru/post/1656592/


All Articles