I am trying to extract lines from a file if a condition is met.
The data in the file look like this :
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
If the pattern of the date is matched, I want to print the following five lines.
My code is,
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
// here how do i capture the following 5 lines
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}
Perhaps, something like this?
package main
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"time"
)
type Match struct {
Date time.Time
Opponents string
Venue string
Type string
Result string
Scorers string
Attendance string
}
var fmtMatchDate = "Mon 02 Jan 2006"
func (m Match) String() string {
var s string
s += fmt.Sprint(m.Date.Format(fmtMatchDate), "
")
s += fmt.Sprint(
m.Opponents, "
",
m.Venue, "
",
m.Type, "
",
m.Result, "
",
)
if len(m.Scorers) > 0 {
s += fmt.Sprint(
m.Scorers, "
",
)
}
if len(m.Attendance) > 0 {
s += fmt.Sprint(
m.Attendance, "
",
)
}
return s
}
func ParseMatch(lines []string) (Match, error) {
// TODO: Implement a better parser.
var m Match
for i, line := range lines {
line = strings.TrimSpace(line)
switch i {
case 0:
date, err := time.Parse(fmtMatchDate, line)
if err != nil {
return Match{}, err
}
m.Date = date
case 1:
m.Opponents = line
case 2:
m.Venue = line
case 3:
m.Type = line
case 4:
m.Result = line
case 5:
m.Scorers = line
case 6:
m.Attendance = line
default:
}
}
return m, nil
}
func main() {
f, err := os.Open("match.txt")
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
var lines []string
snr := bufio.NewScanner(f)
for snr.Scan() {
line := snr.Text()
if _, err = time.Parse(fmtMatchDate, strings.TrimSpace(line)); err == nil {
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
lines = lines[:0]
}
lines = append(lines, line)
}
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
if err := snr.Err(); err != nil {
if err != io.EOF {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
}
Input:
$ cat match.txt
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
Output:
$ go run match.go
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
$
I am not a great fan of regex as it tends to complicate things when you, or someone else, goes back to it in 6 months. I would read the the file into a slice of lines, and use an offset as the way of getting the lines to test.
func main() {
var (
dayName string
month string
name string
A string
league string
score string
scorers string
attendance string
day int
year int
err error
)
data, errRead := ioutil.ReadFile(fileName)
if errRead != nil {
return
}
// get the files as a block of text
theText := string(data)
// make the line endings consistent
theText = strings.Replace(theText, "
", "", -1)
theText = strings.Replace(theText, "
", "", -1)
// split it into a set of lines
lines := strings.Split(theText, "")
numLines := len(lines)
i := 0
for i < numLines {
// at this point we should have your test line
theLine := lines[i]
i++
// give each line a consistent spacing, you never know what state it is in
theLine = strings.Replace(theLine, " ", " ", -1)
parts := strings.Split(theLine, " ")
if len(parts) == 4 {
// At least the line has the four date parts
dayName := parts[0]
day, err = strconv.Atoi(parts[1])
if err == nil {
// We have a number for the day
month := parts[2]
year, err = strconv.Atoi(parts[3])
if err == nil {
// We have a number for the year
// the next five lines are your data
name = lines[i]
A = lines[i+1]
league = lines[i+2]
score = lines[i+3]
scorers = lines[i+4]
attendance = lines[i+5]
i += 6
}
}
}
}
}
For the score etc you will have to parse it yourself, but this will be fairly trivial. You also need to remember that when getting data from someone else they may not always be as consistent as you would wish.
Not sure if I have missed something but would something like this suffice:
package main
import (
"regexp"
"os"
"log"
"bufio"
"fmt"
)
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("/tmp/test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
for i :=0; i < 5; i++{
scanner.Scan()
fmt.Println(scanner.Text())
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}