使用正则表达式同时读取两个文件

Doing small helping tool for combining two text files into one. These files stores a big 2D arrays of float values. Here is some of them:

File 1
-0,1296169 -0,1286087 -0,1276232 ...
-0,1288124 -0,1278683 -0,1269373 ...
-0,1280221 -0,1271375 -0,12626  ...
...

File 2
-0,1181779 -0,1200798 -0,1219472 ...
-0,1198357 -0,1216468 -0,1234369 ...
-0,1214746 -0,1232006 -0,1249159 ...
... 
both may have hunderds of rows and columns ...

Values also can be in scientific form (etc. 1.234e-003). My goal is to read two files simultaneously value by value and write output, while fixing delimeter from comma to point and conver from scientific form to standard in the process.

This version of program combines only prepeared files (delimeter changed to point, values represented in standard form and values moved "one value per line"), but making these preparation is unreal if file have more than million of values.

Here is what i have for now:

import (
    "bufio"
    "fmt"
    "io"
    "os"
    "regexp"
)

func main() {
    file_dB, err := os.Open("d:/dB.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }
    file_dL, err := os.Open("d:/dL.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }
    file_out, err := os.Create("d:/out.txt") // also rewrite existing !
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }

    dB := bufio.NewReader(file_dB)
    dL := bufio.NewReader(file_dL)

    err = nil
    i := 1

    for {
        line1, _, err := dB.ReadLine()
        if len(line1) > 0 && line1[len(line1)-1] == '
' {
            line1 = line1[:len(line1)-1]
        }
        line2, _, err := dL.ReadLine()
        if len(line2) > 0 && line2[len(line2)-1] == '
' {
            line2 = line2[:len(line2)-1]
        }
        if len(line1) == 0 || len(line2) == 0 || err == io.EOF {
            fmt.Println("Total lines done: ", i)
            break
        } else if err != nil {
            fmt.Printf("Error while reading files: %v
", err)
            os.Exit(1)
        }
        i++
        str := string(line1) + ";" + string(line2) + "
"
        if _, err := file_out.WriteString(str); err != nil {
            panic(err)
        }
    }
}

How can i use regexp to make this program read unprepeared files (first listing) value by value and form it like:

-0.129617;-0.118178
-0.128609;-0.120080
-0.127623;-0.121947
...

Input files always formed in same way: -decimal separator is comma -one space after value (even if it last in a row) -newline in the end of line

Previously used expression like ([-?])([0-9]{1})([,]{1})([0-9]{1,12})( {1}) and Notepad++ replace function to split line-of-values into one-value-per-line (combined to new vaules used expression like $1$2.$4 \), but its mess if 'scientific form' value happens.

So is there any way to read files value by value without messing with splitting line into slices/substrings and working over them?

Something like this. Note the limitation that assumes same number of values per line. Be careful it would blowup with the error if this assumption is wrong :)

package main

import (
    "bufio"
    "fmt"
    "os"
    "strconv"
    "strings"
)

func main() {
    file_dB, err := os.Open("dB.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        return
    }
    defer file_dB.Close()

    file_dL, err := os.Open("dL.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        return
    }
    defer file_dL.Close()

    file_out, err := os.Create("out.txt") // also rewrite existing !
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        return
    }
    defer file_out.Close()

    dB := bufio.NewReader(file_dB)
    dL := bufio.NewReader(file_dL)

    lc := 0

    for {
        lc++

        line1, _, err := dB.ReadLine()
        vals1 := strings.Split(string(line1), " ")
        if err != nil {
            fmt.Println(lc, err)
            return
        }

        line2, _, err := dL.ReadLine()
        vals2 := strings.Split(string(line2), " ")
        if err != nil {
            fmt.Println(lc, err)
            return
        }

        // Limitation: assumes line1 and line2 have same number of values per line

        for i := range vals1 {
            dot1 := strings.Replace(vals1[i], ",", ".", 1)
            v1, err := strconv.ParseFloat(dot1, 64)
            if err != nil {
                fmt.Println(lc, err)
                continue
            }

            dot2 := strings.Replace(vals2[i], ",", ".", 1)
            v2, err := strconv.ParseFloat(dot2, 64)
            if err != nil {
                fmt.Println(lc, err)
                continue
            }

            _, err = fmt.Fprintf(file_out, "%v; %v
", v1, v2)
            if err != nil {
                fmt.Println(lc, err)
                return
            }
        }

    }
}

For example,

package main

import (
    "bufio"
    "bytes"
    "fmt"
    "io"
    "os"
    "strconv"
    "strings"
)

var comma, period = []byte{','}, []byte{'.'}

func readNext(r io.Reader) func() (float64, error) {
    s := bufio.NewScanner(r)
    var fields []string
    return func() (float64, error) {
        if len(fields) == 0 {
            err := io.EOF
            for s.Scan() {
                line := bytes.Replace(s.Bytes(), comma, period, -1)
                fields = strings.Fields(string(line))
                if len(fields) > 0 {
                    err = nil
                    break
                }
            }
            if err := s.Err(); err != nil {
                return 0, err
            }
            if err == io.EOF {
                return 0, err
            }
        }
        n, err := strconv.ParseFloat(fields[0], 64)
        fields = fields[1:]
        if err != nil {
            return 0, err
        }
        return n, nil
    }
}

func main() {
    in1Name := `in1.data`
    in2Name := `in2.data`
    outName := `out.data`
    in1, err := os.Open(in1Name)
    if err != nil {
        fmt.Fprint(os.Stderr, err)
        return
    }
    defer in1.Close()
    in2, err := os.Open(in2Name)
    if err != nil {
        fmt.Fprint(os.Stderr, err)
        return
    }
    defer in2.Close()
    out, err := os.Create(outName)
    if err != nil {
        fmt.Fprint(os.Stderr, err)
        return
    }
    defer out.Close()
    outw := bufio.NewWriter(out)
    defer outw.Flush()

    next1 := readNext(in1)
    next2 := readNext(in2)
    for {
        n1, err1 := next1()
        n2, err2 := next2()
        if err1 == io.EOF && err2 == io.EOF {
            break
        }
        if err1 != nil || err2 != nil {
            fmt.Fprint(os.Stderr, err1, err2)
            return
        }
        _, err := fmt.Fprintf(outw, "%g;%g
", n1, n2)
        if err != nil {
            fmt.Fprint(os.Stderr, err)
            return
        }
    }
}

Playground: https://play.golang.org/p/I_sT_EPFI_W

Output:

$ go run  data.go
$ cat in1.data
-0,1296169 -0,1286087 -0,1276232 
-0,1288124 -0,1278683 -0,1269373 
-0,1280221 -0,1271375 -0,12626  
$ cat in2.data
-0,1296169 -0,1286087 -0,1276232 
-0,1288124 -0,1278683 -0,1269373 
-0,1280221 -0,1271375 -0,12626  
$ cat out.data
-0.1296169;-0.1296169
-0.1286087;-0.1286087
-0.1276232;-0.1276232
-0.1288124;-0.1288124
-0.1278683;-0.1278683
-0.1269373;-0.1269373
-0.1280221;-0.1280221
-0.1271375;-0.1271375
-0.12626;-0.12626
$ 

Thanks for help, with points of view of another peoples i've found my own solution.

What this tool does? Generally it combines two text files to one.

Where i've used it? Creating "Generic ASCII" text file for "Country specific coordinate system tool". Input text files are ASCII export of GRID files from GIS applications (values in arc degrees expected). Later this file may be used to fix local coordinate shifts when working with precise GPS/GNSS receivers.

Here what i've "developed":

package main

import (
    "bufio"
    "fmt"
    "os"
    "regexp"
    "strconv"
    "strings"
)

func main() {
    file_dB, err := os.Open("d:/dB.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }
    defer file_dB.Close()
    file_dL, err := os.Open("d:/dL.txt")
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }
    defer file_dL.Close()
    file_out, err := os.Create("d:/out.txt") // also rewrite existing !
    if err != nil {
        fmt.Printf("error opening file: %v
", err)
        os.Exit(1)
    }
    defer file_out.Close()

    dB := bufio.NewReader(file_dB)
    dL := bufio.NewReader(file_dL)

    err = nil
    xcorn_float := 0.0
    ycorn_float := 0.0
    cellsize_float := 0.0
    ncols := regexp.MustCompile("[0-9]+")
    nrows := regexp.MustCompile("[0-9]+")
    xcorn := regexp.MustCompile("[0-9]*,[0-9]*")
    ycorn := regexp.MustCompile("[0-9]*,[0-9]*")
    cellsize := regexp.MustCompile("[0-9]*,[0-9]*")
    nodataval := regexp.MustCompile("-?d+")
    tmp := 0.0

    // n cols --------------------
    ncols_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    ncols_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if ncols.FindString(ncols_dB) != ncols.FindString(ncols_dL) {
        panic(err)
    }
    ncols_dB = ncols.FindString(ncols_dB)
    // n rows --------------------
    nrows_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    nrows_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if nrows.FindString(nrows_dB) != nrows.FindString(nrows_dL) {
        panic(err)
    }
    nrows_dB = nrows.FindString(nrows_dB)
    // X --------------------
    xcorn_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    xcorn_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if xcorn.FindString(xcorn_dB) != xcorn.FindString(xcorn_dL) {
        panic(err)
    }
    xcorn_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(xcorn_dB), ",", ".", 1), 8)
    xcorn_float *= 3600.0
    // Y --------------------
    ycorn_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    ycorn_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if ycorn.FindString(ycorn_dB) != ycorn.FindString(ycorn_dL) {
        panic(err)
    }
    ycorn_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(ycorn_dB), ",", ".", 1), 8)
    ycorn_float *= 3600.0
    // cell size --------------------
    cellsize_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    cellsize_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if cellsize.FindString(cellsize_dB) != cellsize.FindString(cellsize_dL) {
        panic(err)
    }
    cellsize_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(cellsize_dB), ",", ".", 1), 8)
    cellsize_float *= 3600.0
    // nodata value --------------------
    nodataval_dB, err := dB.ReadString('
')
    if err != nil {
        panic(err)
    }
    nodataval_dL, err := dL.ReadString('
')
    if err != nil {
        panic(err)
    }
    if nodataval.FindString(nodataval_dB) != nodataval.FindString(nodataval_dL) {
        panic(err)
    }
    nodataval_dB = nodataval.FindString(nodataval_dB)
    fmt.Print(nodataval_dB)
    //making header
    if _, err := file_out.WriteString("name
3;0;2
1;2;" + nrows_dB + ";" + ncols_dB + "
" + strconv.FormatFloat(xcorn_float, 'f', -1, 32) + ";" + strconv.FormatFloat(ycorn_float, 'f', -1, 32) + ";" + strconv.FormatFloat(cellsize_float, 'f', -1, 32) + ";" + strconv.FormatFloat(cellsize_float, 'f', -1, 32) + "
1
"); err != nil {
        panic(err)
    }
    // valuses --------------------
    for {
        line1, err := dB.ReadString(' ')
        if err != nil {
            break
        }
        if tmp, err = strconv.ParseFloat(strings.TrimSpace(strings.Replace(line1, ",", ".", 1)), 64); err == nil {
            line1 = strconv.FormatFloat(tmp, 'f', 8, 64)
        }
        line2, err := dL.ReadString(' ')
        if err != nil {
            break
        }
        if tmp, err = strconv.ParseFloat(strings.TrimSpace(strings.Replace(line2, ",", ".", 1)), 64); err == nil {
            line2 = strconv.FormatFloat(tmp, 'f', 8, 64)
        }
        if err != nil {
            panic(err)
        }
        str := string(line1) + ";" + string(line2) + "
"
        if _, err := file_out.WriteString(str); err != nil {
            panic(err)
        }
    }
}

If you have any recomendations - feel free to leave a comment!