在Go中计算切片中字符的出现

Okay, so I've hit a brick wall.

Edit: Using bytes.IndexByte() in my count() function makes it run almost twice as fast. bytes.IndexByte() is written in assembly instead of Go. Still not C speed, but closer.

I have two programs, one in C and one in Go that both count newlines in a file. Super simple. The C program runs in ~1.5 seconds, the Go in ~4.25 seconds on a 2.4GB file.

Am I hitting Go's speed limit? If so, what, exactly, is causing this? I can read C, but I can't read Assembly so comparing the C's asm and the Go's asm doesn't do much to me except show that the Go has ~400 more lines (ignoring the .ascii section).

While I know Go can't match C step-for-step, I wouldn't assume a 4x slowdown.

Ideas?

Here's the cpuprofile of the Go: enter image description here

Here's the C (compiled w/ gcc -Wall -pedantic -O9)

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

#define BUFFER_SIZE (16 * 1024)

int
main()
{

    const char *file = "big.txt";
    int fd = open (file, O_RDONLY);
    char buf[BUFFER_SIZE + 1];
    uintmax_t bytes;
    size_t bytes_read;
    size_t lines;

    posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
    while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
    {
        char *p = buf;

        // error checking

        while ((p = memchr (p, '
', (buf + bytes_read) - p)))
          {
            ++p;
            ++lines;
          }
        bytes += bytes_read;
    }
    printf("%zu
", bytes);
    printf("%zu
", lines);
    return 0;
}

And the Go:

package main

import (
    "flag"
    "fmt"
    "io"
    "os"
    "runtime/pprof"
    "syscall"
)

const (
    POSIX_FADV_SEQUENTIAL = 2

    NewLineByte = '
' // or 10
    BufferSize  = (16 * 1024) + 1
)

var Buffer = make([]byte, BufferSize)

func fadvise(file *os.File, off, length int, advice uint32) error {
    _, _, errno := syscall.Syscall6(syscall.SYS_FADVISE64, file.Fd(), uintptr(off), uintptr(length), uintptr(advice), 0, 0)
    if errno != 0 {
        return errno
    }
    return nil
}

func count(s []byte) int64 {
    count := int64(0)
    for i := 0; i < len(s); i++ {
        if s[i] == NewLineByte {
            count++
        }
    }
    return count
}

func main() {

    file, err := os.Open("big.txt")
    if err != nil {
        panic(err)
    }

    var lines int64
    var bytes int64

    fadvise(file, 0, 0, POSIX_FADV_SEQUENTIAL)
    for {

        n, err := file.Read(Buffer)
        if err != nil && err != io.EOF {
            panic(err)
        }

        lines += count(Buffer[:n])
        bytes += int64(n)

        if err == io.EOF {
            break
        }
    }

    fmt.Printf("%d
", bytes)
    fmt.Printf("%d
", lines)
}

As far as this is about counting ' ' in a file, this code runs in ~1.26 sec (and mostly faster), on a Zorin VM (VMWare Player), 6 GB RAM, 4 Cores (& power is plugged in; because power managers sometimes prevent CPU from consuming battery too fast), Host OS is Windows 8. I am using Go in some real world projects for less than 6 months and I'm a Linux noob too. But I think the problem is calling C from Go and that's much slower than pure Go - I've experienced this in calling some C code, both as dll and got compiled with cgo.

package main

import (
    "fmt"
    "io"
    "os"
    "runtime"
    "time"
)

func main() {
    tstart := time.Now()

    file, err := os.Open(filePath)
    if err != nil {
        panic(err)
    }
    defer file.Close()

    done := make(chan bool)
    var cnt int64 = 0
    go func() {
        var Buffer = make([]byte, BufferSize)
        for {
            n, err := file.Read(Buffer)
            if err != nil && err != io.EOF {
                panic(err)
            }

            cnt += count(Buffer[:n])

            if err == io.EOF {
                done <- true
                return
            }
        }
    }()
    <-done
    // should be 5860298 in this case (zorin iso image) & it is.
    fmt.Println(cnt)
    fmt.Printf("%s took %s
", "counting", time.Since(tstart))
}

func count(s []byte) int64 {
    count := int64(0)
    for i := 0; i < len(s); i++ {
        if s[i] == NewLineByte {
            count++
        }
    }
    return count
}

const (
    NewLineByte = '
' // or 10
    BufferSize  = 32 * 1024
)

var (
    filePath = "/.../zorin-os-9.1-core-64++.iso"
    maxt     int
)

func init() {
    maxt = runtime.NumCPU()
    runtime.GOMAXPROCS(maxt)
}

Here's a not too hard and not too slow way, using bytes.IndexByte (since you found Go's asm implementation of it helped) and syscall.Mmap:

package main

import (
    "bytes"
    "fmt"
    "log"
    "os"
    "syscall"
)

func main() {
    if len(os.Args) < 2 {
        log.Fatal("pass filename on command line")
    }
    f, err := os.Open(os.Args[1])
    if err != nil {
        log.Fatal("open: ", err)
    }
    stat, err := f.Stat()
    if err != nil {
        log.Fatal("stat: ", err)

    }
    data, err := syscall.Mmap(int(f.Fd()), 0, int(stat.Size()), syscall.PROT_READ, syscall.MAP_SHARED)
    if err != nil {
        log.Fatal("mmap: ", err)
    }
    newlines := 0
    for {
        i := bytes.IndexByte(data, 10)
        if i == -1 {
            break
        }
        newlines++
        data = data[i+1:]
    }
    fmt.Println(newlines)
}

Mmap looks weird, but here it's much as if you'd read the file into a slice, except less resource-intensive thanks to the OS's help.

You can parallelize the counting without too much more work, but I'm not sure that's worth it. (It would not shock me if the gain on amd64 were zero or negative if, for example, single-core counting were limited by memory bandwidth, but that's not quick for me to test.)