RegEx全部匹配,直到两个连续的特殊字符(]])

I tried to figure out one (multiline.pattern) or two (multiline.pattern & exclude_line) regex in order to ship log information from filebeat to logstash. The system which writes the logs has a standardized log format which looks as follows

[2019-08-28 10:38:57 +0200][0000000000][Info][User][OLS][201][Some Logging Information]

To match this I have built up the regex (maybe this needs also some improvements :-))

^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[\d]*\]\[[^\]]*\]$

Unfortunately the log structure changes when the system runs in debug mode

[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM[HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:34 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
    Server: Test/2019.3
    Pragma: no-cache
    Cache-control: no-cache
    Content-Type: text/xml
    Content-Length: 255

    <?xml version="1.0" encoding="utf-8"?>
    <Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:34 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
    Content-Type: text/xml; charset=utf-8
    Cache-Control: no-cache
    Pragma: no-cache
    User-Agent: Java/11.0.2
    Host: serverxyz:24821
    Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
    Connection: keep-alive
    Content-Length: 10

    <Status />]]

I want to exclude those log entries (multiline) which contains "Debug" in the 3rd field. From my point of view the main difference between normal and debug log is in 6th field is not a [\d*]. And in some cases, I think this is my problem, there is a log inside the Loginformation (last logfield) - which looks like [[[45][some text][other text]]

What I am looking for is either a regex which matches one complete log entry independent of debug or normal. Or two expression 1st match of normal logs 2nd match debug logs (and exclude them)

Since all you want to do is match the log entries, and not capture any info, use this:

^\[\d{4}-\d{2}-\d{2}[\s\S]+?\]\]?$ /gm

The idea is to capture the data lazily (by using ?) until a single or two ] are encountered at the end of the line.

Demo

Some of your groups are optional, not sure which ones, yet this expression might be OK to start with:

^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\](\[[^\]]*\])?\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\](\[[\s\S]*?\])?$

Demo 1

or maybe,

^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[(\d{10})\](\[([^\]]*)\])?\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\](\[([\s\S]*?)\])?$

Demo 2

if you might want to capture those data in the brackets.


If you wish to simplify/modify/explore the expression, it's been explained on the top right panel of regex101.com. If you'd like, you can also watch in this link, how it would match against some sample inputs.


if you want to parse the data, you can optin for a bufio.Scanner like interface

package main

import (
    "bufio"
    "io"
    "log"
    "strings"
)

func main() {

    input := `
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:40 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:41 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
    Server: Test/2019.3
    Pragma: no-cache
    Cache-control: no-cache
    Content-Type: text/xml
    Content-Length: 255

    <?xml version="1.0" encoding="utf-8"?>
    <Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:42 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
    Content-Type: text/xml; charset=utf-8
    Cache-Control: no-cache
    Pragma: no-cache
    User-Agent: Java/11.0.2
    Host: serverxyz:24821
    Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
    Connection: keep-alive
    Content-Length: 10\]

    <Status />]]
`
    // input = `[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]`

    src := strings.NewReader(input)
    parser := newScanner(src)
    for parser.Scan() {
        line := parser.Items()
        if len(line) > 2 && line[2] == "DEBUG" {
            continue
        }
        log.Printf("line %#v
", line)
    }
    log.Println("done")
}

type scanner struct {
    *bufio.Scanner
    buf            []byte
    openedBrackets int
    lineDone       bool
    atEOF          bool
    lines          [][]string
    currentCols    []string
    currentCol     []byte
}

func newScanner(r io.Reader) *scanner {
    b := bufio.NewScanner(r)
    s := &scanner{
        Scanner:     b,
        buf:         make([]byte, 500),
        lines:       [][]string{},
        currentCols: []string{},
        currentCol:  []byte{},
    }
    b.Split(s.parse)
    return s
}

func (s *scanner) Scan() bool {
    for !s.lineDone {
        if s.Scanner.Scan() {
            s.lineDone = false
            return true
        } else if s.atEOF {
            return false
        }
    }
    return false
}

func (s *scanner) Items() []string {
    if len(s.lines) == 0 {
        return nil
    }
    return s.lines[len(s.lines)-1]
}

var (
    buf          = make([]byte, 500)
    eol          = []byte("
")[0]
    bracketClose = []byte("]")[0]
    bracketOpen  = []byte("[")[0]
    backslash    = []byte("\\")[0]
)

func (s *scanner) parse(data []byte, atEOF bool) (advance int, token []byte, err error) {
    var d byte
    var i int
    var since int
    for i, d = range data {
        if d == bracketClose {
            if i > 0 && data[i-1] == backslash {
                continue
            }
            s.currentCol = append(s.currentCol, data[since+1:i]...)
            since = i
            s.openedBrackets--
            if s.openedBrackets == 0 {
                s.currentCols = append(s.currentCols, string(s.currentCol))
                s.currentCol = s.currentCol[:0]
            } else {
                s.currentCol = append(s.currentCol, d)
            }
        } else if s.openedBrackets == 0 && d == eol {
            line := make([]string, len(s.currentCols))
            copy(line, s.currentCols)
            s.lines = append(s.lines, line)
            s.currentCols = s.currentCols[:0]
            s.openedBrackets = 0
            s.lineDone = true
            return i + 1, data[i+1:], nil

        } else if d == bracketOpen {
            if i > 0 && data[i-1] == backslash {
                continue
            }
            since = i
            if s.openedBrackets > 0 {
                s.currentCol = append(s.currentCol, d)
            }
            s.openedBrackets++
        }
    }
    if atEOF {
        s.atEOF = true
        s.lineDone = true
        if len(s.currentCols) > 0 {
            line := make([]string, len(s.currentCols))
            copy(line, s.currentCols)
            s.lines = append(s.lines, line)
            s.currentCols = s.currentCols[:0]
            s.openedBrackets = 0
            s.lineDone = true
            return len(data) + 1, nil, nil
        }
        return len(data) + 1, nil, io.EOF
    }
    return len(data) + 1, nil, nil
}