如何使用pprof优化CSV加载器?

I am trying to optimize a CSV loading process that is basically doing a regex search in a large CSV file (+4GB - 31033993 records for my experiment) I managed to build a multiprocessing logic to read the CSV but when I analyze the CPU profiling using pprof I think my regex search is not optimized. Could you help me improve this code so that it can read the CSV much quickly?

enter image description here

Here is my code so far:

package main

import (
    "bufio"
    "flag"
    "fmt"
    "log"
    "os"
    "regexp"
    "runtime"
    "runtime/pprof"
    "strings"
    "sync"
)

func processFile(path string) [][]string {
    file, err := os.Open(path)
    if err != nil {
        log.Println("Error:", err)
    }
    var pattern = regexp.MustCompile(`^.*foo.*$`)
    numCPU := runtime.NumCPU()
    jobs := make(chan string, numCPU+1)

    fmt.Printf("Strategy: Parallel, %d Workers ...
", numCPU)

    results := make(chan []string)
    wg := new(sync.WaitGroup)
    for w := 1; w <= numCPU; w++ {
        wg.Add(1)
        go parseRecord(jobs, results, wg, pattern)
    }
    go func() {
        scanner := bufio.NewScanner(file)
        for scanner.Scan() {
            jobs <- scanner.Text()
        }
        close(jobs)
    }()

    go func() {
        wg.Wait()
        close(results)
    }()

    lines := [][]string{}
    for line := range results {
        lines = append(lines, line)
    }

    return lines
}

func parseRecord(jobs <-chan string, results chan<- []string, wg *sync.WaitGroup, pattern *regexp.Regexp) {
    defer wg.Done()
    for j := range jobs {
        if pattern.MatchString(j) {
            x := strings.Split(string(j), "
")
            results <- x
        }

    }
}

func split(r rune) bool {
    return r == ','
}

func main() {
    f, err := os.Create("perf.data")
    if err != nil {
        log.Fatal(err)
    }
    pprof.StartCPUProfile(f)
    defer pprof.StopCPUProfile()

    pathFlag := flag.String("file", "", `The CSV file to operate on.`)
    flag.Parse()
    lines := processFile(*pathFlag)
    fmt.Println("loaded", len(lines), "records")
}

When I process the file without any regex constraint I am getting a reasonable computing time (I simply load the parsed string into the 2D array without any pattern.MatchString())

Strategy: Parallel, 8 Workers ... loaded 31033993 records 2018/10/09 11:46:38 readLines took 30.611246035s

Instead, when I run the above code with the Regex constraint I am getting this result:

Strategy: Parallel, 8 Workers ... loaded 143090 records 2018/10/09 12:04:32 readLines took 1m24.029830907s

MatchString looks for any match on the string So you can get rid of the anchors and the wildcarding The wildcarding at both ends is usually slow in regexp engines

example showing this on go 1.10

package reggie

import (
        "regexp"
        "testing"
)

var pattern = regexp.MustCompile(`^.*foo.*$`)
var pattern2 = regexp.MustCompile(`foo`)

func BenchmarkRegexp(b *testing.B) {
        for i := 0; i < b.N; i++ {
                pattern.MatchString("youfathairyfoobar")
        }
}

func BenchmarkRegexp2(b *testing.B) {
        for i := 0; i < b.N; i++ {
                pattern2.MatchString("youfathairyfoobar")
        }
}
$ go test -bench=.
goos: darwin
goarch: amd64
BenchmarkRegexp-4        3000000           471 ns/op
BenchmarkRegexp2-4      20000000           101 ns/op
PASS
ok      _/Users/jsandrew/wip/src/reg    4.031s