Swift正则如何查找分离多行相同头内容文本

问题遇到的现象和发生背景

有如下文本

Thread 0 name: Dispatch queue: com.apple.main-thread
Thread 0 Crashed:
0  libsystem_kernel.dylib    0x000000000123456 0x1abcdef + 12345
1  libsystem_kernel.dylib    0x000000000123452 0x1abcdef + 12345

Thread 1 name: com.apple.thread.kindtwo
Thread 1:
0  CoreFoundation.dylib    0x000000000123456 0x1abcdef + 12345
Thread 2:
0  Foundation   0x000000000123456 0x1abcdef + 12345
1  CFNetwork    0x000000000123456 0x1abcdef + 12345

希望通过正则拆分获取每个Thread所属的内容段

问题相关代码,请勿粘贴截图
// string
let content =
"""
Thread 0 name: Dispatch queue: com.apple.main-thread
Thread 0 Crashed:
0  libsystem_kernel.dylib    0x000000000123456 0x1abcdef + 12345
1  libsystem_kernel.dylib    0x000000000123452 0x1abcdef + 12345

Thread 1 name: com.apple.thread.kindtwo
Thread 1:
0  CoreFoundation.dylib    0x000000000123456 0x1abcdef + 12345
Thread 2:
0  Foundation   0x000000000123456 0x1abcdef + 12345
1  CFNetwork    0x000000000123456 0x1abcdef + 12345
"""
// regex
let pattern = "\\s+?Thread\\s+(\\d+)+(?:\\s+name:\\s+(.*)$)(?:\\sThread\\s+\\d+\\s?(Crashed)?:$)?\\n(.*?)(?:\\n\\s*\\n|$)"

let re = try! Regex(pattern, options: [.anchorsMatchLines, .caseInsensitive])

print(re.findAll(content))


extension String {
    subscript (r: Range<Int>) -> String? {
        get {
            let nsRange = NSRange(r)
            return self.substring(with: nsRange)
        }
    }
    
    func matchGroups(fromResult result: NSTextCheckingResult) -> [String] {
        let number = result.numberOfRanges
        if number == 1 {
            if let range = Range(result.range(at:0)) {
                if let subString = self[range] {
                    return [subString]
                }
            }
            return []
        }
        
        var groups = [String]()
        
        for index in 1..<number {
            if let range = Range(result.range(at: index)) {
                groups.append(self[range] ?? "")
            }
        }
        return groups
    }
    
    var nsRange: NSRange {
        return NSRange(self.startIndex..<self.endIndex, in: self)
    }
    
    func substring(with nsrange: NSRange) -> String? {
        guard let range = Range(nsrange, in: self) else { return nil }
        return String(self[range])
    }
}

public struct Regex {
    let regex: NSRegularExpression
    
    public init(_ pattern: String,
                options: NSRegularExpression.Options = []) throws {
        try regex = NSRegularExpression(pattern: pattern,
                                        options: options)
    }
    
    public func findFirst(_ string: String,
                          options: NSRegularExpression.MatchingOptions = []) -> [String]? {
        guard let ret = regex.firstMatch(in: string,
                                         options: options,
                                         range: string.nsRange) else {
            return nil
        }
        return string.matchGroups(fromResult: ret)
    }
    
    public func findAll(_ string: String,
                        options: NSRegularExpression.MatchingOptions = []) -> [[String]]? {
        let ret = regex.matches(in: string,
                                options: options,
                                range: string.nsRange)
        guard ret.count != 0 else { return nil }
        
        return ret.map { string.matchGroups(fromResult: $0) }
    }
    
    public func match(_ string: String,
                      options: NSRegularExpression.MatchingOptions = []) -> [String]? {
        findFirst(string, options: options)
    }
    
    public func findFirstRange(_ string: String,
                               options: NSRegularExpression.MatchingOptions = []) -> NSRange? {
        let nsRange = string.nsRange
        let range = regex.rangeOfFirstMatch(in: string, options: options, range: nsRange)
        guard range.location < (nsRange.location + nsRange.length) else {
            return nil
        }
        
        return range
    }
    
    public func findAllRange(_ string: String,
                             options: NSRegularExpression.MatchingOptions = []) -> [NSRange]? {
        let matches = regex.matches(in: string, options: options, range: string.nsRange)
        guard matches.count != 0 else { return nil }
        
        return matches.map { $0.range }
    }
}
运行结果及报错内容
Optional([["1", "com.apple.thread.kindtwo", "0  CoreFoundation.dylib    0x000000000123456 0x1abcdef + 12345"]])
我的解答思路和尝试过的方法

我知道([\\w\\W\\s]*)可以匹配多行,但是由于开头都比较类似,所以一旦用了([\\w\\W\\s]*)不论结尾怎么写都无法在下一个Thread之前停下捕获~

我想要达到的结果
[
["0", "Dispatch queue: com.apple.main-thread", "Crashed", "0  libsystem_kernel.dylib    0x000000000123456 0x1abcdef + 12345\n1  libsystem_kernel.dylib    0x000000000123456 0x1abcdef + 12345"],
["1", "com.apple.thread.kindtwo", "", "CoreFoundation.dylib    0x000000000123456 0x1abcdef + 12345"],
["2", "", "", "0  Foundation   0x000000000123456 0x1abcdef + 12345\n1  CFNetwork    0x000000000123456 0x1abcdef + 12345"]
]

js 的正则?


var content = 'Thread 0 name: Dispatch queue: com.apple.main-thread\nThread 0 Crashed:\n0  libsystem_kernel.dylib    0x000000000123456 0x1abcdef + 12345\n1  libsystem_kernel.dylib    0x000000000123452 0x1abcdef + 12345\nThread 1 name: com.apple.thread.kindtwo\nThread 1:\n0  CoreFoundation.dylib    0x000000000123456 0x1abcdef + 12345\nThread 2:\n0  Foundation   0x000000000123456 0x1abcdef + 12345\n1  CFNetwork    0x000000000123456 0x1abcdef + 12345\n';

var m = content.match(/Thread (\d+)[\s\S]+?(?=($|Thread (?!\1(?!\d))\d+))/gi)

console.log(m)

img

先把所有 Thread 分隔开,然后单独处理每一个 Thread 里的数据不就好了?

最终在老顾的帮助下得到答案:(Thread (\\d+)[\\s\\S]+?(?=(?:$|Thread (?!\\2(?!\\d))\\d+)))