读取具有固定宽度和缺失值的表格数据

I'm trying to read a table from disk in Go, with mixed integers and floats, where the width of each field is fixed (every field occupies a fixed number of places, preceded by blanks if too short) and where some values may be missing (and should default to zero).

The file is here: https://celestrak.com/SpaceData/sw20100101.txt

The Fortran format used to read it is written in the header:

FORMAT(I4,I3,I3,I5,I3,8I3,I4,8I4,I4,F4.1,I2,I4,F6.1,I2,5F6.1)

and the lines looks like this (some of the last lines, with blanks):

2014 12 29 2475  2 20 30 23 33 37 47 33 47 270   7  15   9  18  22  39  18  39  21 1.1 5  64 127.1 0 150.4 156.0 131.4 153.3 160.9
2014 12 30 2475  3 30 40 37 20 30 27 27 23 233  15  27  22   7  15  12  12   9  15 0.8 4  66 126.0 0 150.3 156.1 130.3 152.7 161.0
2014 12 31 2475  4 13 23 13 17 20 33 13 17 150   5   9   5   6   7  18   5   6   8 0.4 2  65 129.2 0 150.5 156.3 133.6 152.4 161.3
2015 01 01 2475  5 20 10 10 10 10 20 20 30 130   7   4   4   4   4   7   7  15   6       101 138.0 0 150.7 156.6 142.7 152.1 161.7
2015 01 02 2475  6 30 10 20 20 30 20 30 40 200  15   4   7   7  15   7  15  27  12       113 146.0 0 150.9 157.0 151.0 152.2 162.1
2015 01 03 2475  7 50 30 30 30 30 20 20 10 220  48  15  15  15  15   7   7   4  15       122 149.0 0 151.0 157.2 154.1 152.4 162.4

I have been trying a clever format string to use with Sscanf (like "%4d%3d%3d%5d...") but it won't work with blanks, or if the number is not right-aligned to its slot.

I'm looking a way to read it like in Fortran, where:

  • Mixed field types (integers, floats, strings) are possible.
  • Each column have a fixed size in characters, filling the slot with blanks if necessary, but different columns may have a different size.
  • Numeric values may be preceded by zeros.
  • Values may be missing, in that case, it gives its zero value.
  • Values may be in any position in the slot, not necessarily right-aligned (not the example but it could be possible)

Is there a clever method to read something like this or should I split, trim, check and convert manually every field?

package main

import "fmt"
import "reflect"
import "strconv"
import "strings"

type scanner struct {
    len   int
    parts []int
}

func (ss *scanner) Scan(s string, args ...interface{}) (n int, err error) {
    if i := len(s); i != ss.len {
        return 0, fmt.Errorf("exepected string of size %d, actual %d", ss.len, i)
    }
    if len(args) != len(ss.parts) {
        return 0, fmt.Errorf("expected %d args, actual %d", len(ss.parts), len(args))
    }
    n = 0
    start := 0
    for ; n < len(args); n++ {
        a := args[n]
        l := ss.parts[n]
        if err = scanOne(s[start:start+l], a); err != nil {
            return
        }
        start += l
    }
    return n, nil
}

func newScan(parts ...int) *scanner {
    len := 0
    for _, v := range parts {
        len += v
    }
    return &scanner{len, parts}
}

func scanOne(s string, arg interface{}) (err error) {
    s = strings.TrimSpace(s)
    switch v := arg.(type) {
    case *int:
        if s == "" {
            *v = int(0)
        } else {
            *v, err = strconv.Atoi(s)
        }
    case *int32:
        if s == "" {
            *v = int32(0)
        } else {
            var val int64
            val, err = strconv.ParseInt(s, 10, 32)
            *v = int32(val)
        }
    case *int64:
        if s == "" {
            *v = int64(0)
        } else {
            *v, err = strconv.ParseInt(s, 10, 64)
        }
    case *float32:
        if s == "" {
            *v = float32(0)
        } else {
            var val float64
            val, err = strconv.ParseFloat(s, 32)
            *v = float32(val)
        }
    case *float64:
        if s == "" {
            *v = float64(0)
        } else {
            *v, err = strconv.ParseFloat(s, 64)
        }
    default:
        val := reflect.ValueOf(v)
        err = fmt.Errorf("can't scan type: " + val.Type().String())
    }
    return
}

func main() {
    s := newScan(2, 4, 2)
    var a int
    var b float32
    var c int32

    s.Scan("12 2.2 1", &a, &b, &c)
    fmt.Printf("%d %f %d
", a, b, c)

    s.Scan("1      2", &a, &b, &c)
    fmt.Printf("%d %f %d
", a, b, c)

    s.Scan("        ", &a, &b, &c)
    fmt.Printf("%d %f %d
", a, b, c)
}

Output:

12 2.200000 1
1 0.000000 1
0 0.000000 0

Notice that Scan function returns n - number of parsed arguments and err. If value is missing the function will set it to 0. The implementation is mostly taken from fmt.Scanf.

You can employ csv encoding with delimiter set to blankspace. Something like this

import (
"encoding/csv"
"os"
)
file, _:=os.Open("/SpaceData/sw20100101.txt")
csvreader:=csv.NewReader(file)
csvreader.Comma=' '
csvreader.FieldsPerRecord=33
csvreader.TrimLeadingSpace=true
parsedout, _ := csvreader.Read()

here is working example https://play.golang.org/p/Tsp72D4vsR