I tried to figure out one (multiline.pattern) or two (multiline.pattern & exclude_line) regex in order to ship log information from filebeat to logstash. The system which writes the logs has a standardized log format which looks as follows
[2019-08-28 10:38:57 +0200][0000000000][Info][User][OLS][201][Some Logging Information]
To match this I have built up the regex (maybe this needs also some improvements :-))
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[\d]*\]\[[^\]]*\]$
Unfortunately the log structure changes when the system runs in debug mode
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM[HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:34 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
Server: Test/2019.3
Pragma: no-cache
Cache-control: no-cache
Content-Type: text/xml
Content-Length: 255
<?xml version="1.0" encoding="utf-8"?>
<Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:34 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
Content-Type: text/xml; charset=utf-8
Cache-Control: no-cache
Pragma: no-cache
User-Agent: Java/11.0.2
Host: serverxyz:24821
Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
Connection: keep-alive
Content-Length: 10
<Status />]]
I want to exclude those log entries (multiline) which contains "Debug" in the 3rd field. From my point of view the main difference between normal and debug log is in 6th field is not a [\d*]. And in some cases, I think this is my problem, there is a log inside the Loginformation (last logfield) - which looks like [[[45][some text][other text]]
What I am looking for is either a regex which matches one complete log entry independent of debug or normal. Or two expression 1st match of normal logs 2nd match debug logs (and exclude them)
Since all you want to do is match the log entries, and not capture any info, use this:
^\[\d{4}-\d{2}-\d{2}[\s\S]+?\]\]?$ /gm
The idea is to capture the data lazily (by using ?
) until a single or two ]
are encountered at the end of the line.
Some of your groups are optional, not sure which ones, yet this expression might be OK to start with:
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\](\[[^\]]*\])?\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\](\[[\s\S]*?\])?$
or maybe,
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[(\d{10})\](\[([^\]]*)\])?\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\](\[([\s\S]*?)\])?$
if you might want to capture those data in the brackets.
If you wish to simplify/modify/explore the expression, it's been explained on the top right panel of regex101.com. If you'd like, you can also watch in this link, how it would match against some sample inputs.
if you want to parse the data, you can optin for a bufio.Scanner like interface
package main
import (
"bufio"
"io"
"log"
"strings"
)
func main() {
input := `
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:40 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:41 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
Server: Test/2019.3
Pragma: no-cache
Cache-control: no-cache
Content-Type: text/xml
Content-Length: 255
<?xml version="1.0" encoding="utf-8"?>
<Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:42 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
Content-Type: text/xml; charset=utf-8
Cache-Control: no-cache
Pragma: no-cache
User-Agent: Java/11.0.2
Host: serverxyz:24821
Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
Connection: keep-alive
Content-Length: 10\]
<Status />]]
`
// input = `[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]`
src := strings.NewReader(input)
parser := newScanner(src)
for parser.Scan() {
line := parser.Items()
if len(line) > 2 && line[2] == "DEBUG" {
continue
}
log.Printf("line %#v
", line)
}
log.Println("done")
}
type scanner struct {
*bufio.Scanner
buf []byte
openedBrackets int
lineDone bool
atEOF bool
lines [][]string
currentCols []string
currentCol []byte
}
func newScanner(r io.Reader) *scanner {
b := bufio.NewScanner(r)
s := &scanner{
Scanner: b,
buf: make([]byte, 500),
lines: [][]string{},
currentCols: []string{},
currentCol: []byte{},
}
b.Split(s.parse)
return s
}
func (s *scanner) Scan() bool {
for !s.lineDone {
if s.Scanner.Scan() {
s.lineDone = false
return true
} else if s.atEOF {
return false
}
}
return false
}
func (s *scanner) Items() []string {
if len(s.lines) == 0 {
return nil
}
return s.lines[len(s.lines)-1]
}
var (
buf = make([]byte, 500)
eol = []byte("
")[0]
bracketClose = []byte("]")[0]
bracketOpen = []byte("[")[0]
backslash = []byte("\\")[0]
)
func (s *scanner) parse(data []byte, atEOF bool) (advance int, token []byte, err error) {
var d byte
var i int
var since int
for i, d = range data {
if d == bracketClose {
if i > 0 && data[i-1] == backslash {
continue
}
s.currentCol = append(s.currentCol, data[since+1:i]...)
since = i
s.openedBrackets--
if s.openedBrackets == 0 {
s.currentCols = append(s.currentCols, string(s.currentCol))
s.currentCol = s.currentCol[:0]
} else {
s.currentCol = append(s.currentCol, d)
}
} else if s.openedBrackets == 0 && d == eol {
line := make([]string, len(s.currentCols))
copy(line, s.currentCols)
s.lines = append(s.lines, line)
s.currentCols = s.currentCols[:0]
s.openedBrackets = 0
s.lineDone = true
return i + 1, data[i+1:], nil
} else if d == bracketOpen {
if i > 0 && data[i-1] == backslash {
continue
}
since = i
if s.openedBrackets > 0 {
s.currentCol = append(s.currentCol, d)
}
s.openedBrackets++
}
}
if atEOF {
s.atEOF = true
s.lineDone = true
if len(s.currentCols) > 0 {
line := make([]string, len(s.currentCols))
copy(line, s.currentCols)
s.lines = append(s.lines, line)
s.currentCols = s.currentCols[:0]
s.openedBrackets = 0
s.lineDone = true
return len(data) + 1, nil, nil
}
return len(data) + 1, nil, io.EOF
}
return len(data) + 1, nil, nil
}