使用golang从文件中提取数据

I am trying to extract lines from a file if a condition is met.

The data in the file look like this :

Sat 08 Aug 2015
Norwich City
A
League
    W 3-1
    Zaha 38; Delaney 48; Cabaye 90
    27,036

If the pattern of the date is matched, I want to print the following five lines.

My code is,

func main() {

    r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")

    file, err := os.Open("test.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {

        if r.MatchString(scanner.Text()) {

            fmt.Println(scanner.Text())

            // here how do i capture the following 5 lines

        }

        if err := scanner.Err(); err != nil {
            log.Fatal(err)
        }
    }
}

Perhaps, something like this?

package main

import (
    "bufio"
    "fmt"
    "io"
    "os"
    "strings"
    "time"
)

type Match struct {
    Date       time.Time
    Opponents  string
    Venue      string
    Type       string
    Result     string
    Scorers    string
    Attendance string
}

var fmtMatchDate = "Mon 02 Jan 2006"

func (m Match) String() string {
    var s string
    s += fmt.Sprint(m.Date.Format(fmtMatchDate), "
")
    s += fmt.Sprint(
        m.Opponents, "
",
        m.Venue, "
",
        m.Type, "
",
        m.Result, "
",
    )
    if len(m.Scorers) > 0 {
        s += fmt.Sprint(
            m.Scorers, "
",
        )
    }
    if len(m.Attendance) > 0 {
        s += fmt.Sprint(
            m.Attendance, "
",
        )
    }
    return s
}

func ParseMatch(lines []string) (Match, error) {
    // TODO: Implement a better parser.
    var m Match
    for i, line := range lines {
        line = strings.TrimSpace(line)
        switch i {
        case 0:
            date, err := time.Parse(fmtMatchDate, line)
            if err != nil {
                return Match{}, err
            }
            m.Date = date
        case 1:
            m.Opponents = line
        case 2:
            m.Venue = line
        case 3:
            m.Type = line
        case 4:
            m.Result = line
        case 5:
            m.Scorers = line
        case 6:
            m.Attendance = line
        default:
        }
    }
    return m, nil
}

func main() {
    f, err := os.Open("match.txt")
    if err != nil {
        fmt.Fprintln(os.Stderr, err)
        os.Exit(1)
    }
    var lines []string
    snr := bufio.NewScanner(f)
    for snr.Scan() {
        line := snr.Text()
        if _, err = time.Parse(fmtMatchDate, strings.TrimSpace(line)); err == nil {
            if len(lines) > 0 {
                m, err := ParseMatch(lines)
                if err != nil {
                    fmt.Fprintln(os.Stderr, err)
                } else {
                    fmt.Print(m)
                }
            }
            lines = lines[:0]
        }
        lines = append(lines, line)
    }
    if len(lines) > 0 {
        m, err := ParseMatch(lines)
        if err != nil {
            fmt.Fprintln(os.Stderr, err)
        } else {
            fmt.Print(m)
        }
    }
    if err := snr.Err(); err != nil {
        if err != io.EOF {
            fmt.Fprintln(os.Stderr, err)
            os.Exit(1)
        }
    }
}

Input:

$ cat match.txt
Sat 08 Aug 2015
Norwich City
A
League
    W 3-1
    Zaha 38; Delaney 48; Cabaye 90
    27,036
Sun 16 Aug 2015
Arsenal
H
League
    L 1-2
Sat 29 Aug 2015
Chelsea
A
League
    W 2-1
    Sako 64; Ward 80
    41,581

Output:

$ go run match.go
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
$

I am not a great fan of regex as it tends to complicate things when you, or someone else, goes back to it in 6 months. I would read the the file into a slice of lines, and use an offset as the way of getting the lines to test.

func main() {
    var (
        dayName    string
        month      string
        name       string
        A          string
        league     string
        score      string
        scorers    string
        attendance string
        day        int
        year       int
        err        error
    )
    data, errRead := ioutil.ReadFile(fileName)
    if errRead != nil {
        return
    }

    //  get the files as a block of text
    theText := string(data)
    //  make the line endings consistent
    theText = strings.Replace(theText, "
", "", -1)
    theText = strings.Replace(theText, "
", "", -1)
    //  split it into a set of lines
    lines := strings.Split(theText, "")
    numLines := len(lines)
    i := 0
    for i < numLines {
        //      at this point we should have your test line
        theLine := lines[i]
        i++
        //      give each line a consistent spacing, you never know what state it is in
        theLine = strings.Replace(theLine, "  ", " ", -1)
        parts := strings.Split(theLine, " ")
        if len(parts) == 4 {
            //         At least the line has the four date parts
            dayName := parts[0]
            day, err = strconv.Atoi(parts[1])
            if err == nil {
                //             We have a number for the day
                month := parts[2]
                year, err = strconv.Atoi(parts[3])
                if err == nil {
                    //                 We have a number for the year
                    //                 the next five lines are your data
                    name = lines[i]
                    A = lines[i+1]
                    league = lines[i+2]
                    score = lines[i+3]
                    scorers = lines[i+4]
                    attendance = lines[i+5]
                    i += 6
                }
            }
        }
    }
}

For the score etc you will have to parse it yourself, but this will be fairly trivial. You also need to remember that when getting data from someone else they may not always be as consistent as you would wish.

Not sure if I have missed something but would something like this suffice:

package main

import (
"regexp"
"os"
"log"
"bufio"
"fmt"
)

func main() {

r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")

file, err := os.Open("/tmp/test.txt")
if err != nil {
    log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {

    if r.MatchString(scanner.Text()) {
        fmt.Println(scanner.Text())
        for i :=0; i < 5; i++{
           scanner.Scan()
            fmt.Println(scanner.Text())
        }

    }

    if err := scanner.Err(); err != nil {
        log.Fatal(err)
    }
  }
}