Doing small helping tool for combining two text files into one. These files stores a big 2D arrays of float values. Here is some of them:
File 1
-0,1296169 -0,1286087 -0,1276232 ...
-0,1288124 -0,1278683 -0,1269373 ...
-0,1280221 -0,1271375 -0,12626 ...
...
File 2
-0,1181779 -0,1200798 -0,1219472 ...
-0,1198357 -0,1216468 -0,1234369 ...
-0,1214746 -0,1232006 -0,1249159 ...
...
both may have hunderds of rows and columns ...
Values also can be in scientific form (etc. 1.234e-003). My goal is to read two files simultaneously value by value and write output, while fixing delimeter from comma to point and conver from scientific form to standard in the process.
This version of program combines only prepeared files (delimeter changed to point, values represented in standard form and values moved "one value per line"), but making these preparation is unreal if file have more than million of values.
Here is what i have for now:
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
)
func main() {
file_dB, err := os.Open("d:/dB.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
file_dL, err := os.Open("d:/dL.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
file_out, err := os.Create("d:/out.txt") // also rewrite existing !
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
dB := bufio.NewReader(file_dB)
dL := bufio.NewReader(file_dL)
err = nil
i := 1
for {
line1, _, err := dB.ReadLine()
if len(line1) > 0 && line1[len(line1)-1] == '
' {
line1 = line1[:len(line1)-1]
}
line2, _, err := dL.ReadLine()
if len(line2) > 0 && line2[len(line2)-1] == '
' {
line2 = line2[:len(line2)-1]
}
if len(line1) == 0 || len(line2) == 0 || err == io.EOF {
fmt.Println("Total lines done: ", i)
break
} else if err != nil {
fmt.Printf("Error while reading files: %v
", err)
os.Exit(1)
}
i++
str := string(line1) + ";" + string(line2) + "
"
if _, err := file_out.WriteString(str); err != nil {
panic(err)
}
}
}
How can i use regexp to make this program read unprepeared files (first listing) value by value and form it like:
-0.129617;-0.118178
-0.128609;-0.120080
-0.127623;-0.121947
...
Input files always formed in same way: -decimal separator is comma -one space after value (even if it last in a row) -newline in the end of line
Previously used expression like ([-?])([0-9]{1})([,]{1})([0-9]{1,12})( {1})
and Notepad++ replace function to split line-of-values into one-value-per-line (combined to new vaules used expression like $1$2.$4 \
), but its mess if 'scientific form' value happens.
So is there any way to read files value by value without messing with splitting line into slices/substrings and working over them?
Something like this. Note the limitation that assumes same number of values per line. Be careful it would blowup with the error if this assumption is wrong :)
package main
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
func main() {
file_dB, err := os.Open("dB.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
return
}
defer file_dB.Close()
file_dL, err := os.Open("dL.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
return
}
defer file_dL.Close()
file_out, err := os.Create("out.txt") // also rewrite existing !
if err != nil {
fmt.Printf("error opening file: %v
", err)
return
}
defer file_out.Close()
dB := bufio.NewReader(file_dB)
dL := bufio.NewReader(file_dL)
lc := 0
for {
lc++
line1, _, err := dB.ReadLine()
vals1 := strings.Split(string(line1), " ")
if err != nil {
fmt.Println(lc, err)
return
}
line2, _, err := dL.ReadLine()
vals2 := strings.Split(string(line2), " ")
if err != nil {
fmt.Println(lc, err)
return
}
// Limitation: assumes line1 and line2 have same number of values per line
for i := range vals1 {
dot1 := strings.Replace(vals1[i], ",", ".", 1)
v1, err := strconv.ParseFloat(dot1, 64)
if err != nil {
fmt.Println(lc, err)
continue
}
dot2 := strings.Replace(vals2[i], ",", ".", 1)
v2, err := strconv.ParseFloat(dot2, 64)
if err != nil {
fmt.Println(lc, err)
continue
}
_, err = fmt.Fprintf(file_out, "%v; %v
", v1, v2)
if err != nil {
fmt.Println(lc, err)
return
}
}
}
}
For example,
package main
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"strconv"
"strings"
)
var comma, period = []byte{','}, []byte{'.'}
func readNext(r io.Reader) func() (float64, error) {
s := bufio.NewScanner(r)
var fields []string
return func() (float64, error) {
if len(fields) == 0 {
err := io.EOF
for s.Scan() {
line := bytes.Replace(s.Bytes(), comma, period, -1)
fields = strings.Fields(string(line))
if len(fields) > 0 {
err = nil
break
}
}
if err := s.Err(); err != nil {
return 0, err
}
if err == io.EOF {
return 0, err
}
}
n, err := strconv.ParseFloat(fields[0], 64)
fields = fields[1:]
if err != nil {
return 0, err
}
return n, nil
}
}
func main() {
in1Name := `in1.data`
in2Name := `in2.data`
outName := `out.data`
in1, err := os.Open(in1Name)
if err != nil {
fmt.Fprint(os.Stderr, err)
return
}
defer in1.Close()
in2, err := os.Open(in2Name)
if err != nil {
fmt.Fprint(os.Stderr, err)
return
}
defer in2.Close()
out, err := os.Create(outName)
if err != nil {
fmt.Fprint(os.Stderr, err)
return
}
defer out.Close()
outw := bufio.NewWriter(out)
defer outw.Flush()
next1 := readNext(in1)
next2 := readNext(in2)
for {
n1, err1 := next1()
n2, err2 := next2()
if err1 == io.EOF && err2 == io.EOF {
break
}
if err1 != nil || err2 != nil {
fmt.Fprint(os.Stderr, err1, err2)
return
}
_, err := fmt.Fprintf(outw, "%g;%g
", n1, n2)
if err != nil {
fmt.Fprint(os.Stderr, err)
return
}
}
}
Playground: https://play.golang.org/p/I_sT_EPFI_W
Output:
$ go run data.go
$ cat in1.data
-0,1296169 -0,1286087 -0,1276232
-0,1288124 -0,1278683 -0,1269373
-0,1280221 -0,1271375 -0,12626
$ cat in2.data
-0,1296169 -0,1286087 -0,1276232
-0,1288124 -0,1278683 -0,1269373
-0,1280221 -0,1271375 -0,12626
$ cat out.data
-0.1296169;-0.1296169
-0.1286087;-0.1286087
-0.1276232;-0.1276232
-0.1288124;-0.1288124
-0.1278683;-0.1278683
-0.1269373;-0.1269373
-0.1280221;-0.1280221
-0.1271375;-0.1271375
-0.12626;-0.12626
$
Thanks for help, with points of view of another peoples i've found my own solution.
What this tool does? Generally it combines two text files to one.
Where i've used it? Creating "Generic ASCII" text file for "Country specific coordinate system tool". Input text files are ASCII export of GRID files from GIS applications (values in arc degrees expected). Later this file may be used to fix local coordinate shifts when working with precise GPS/GNSS receivers.
Here what i've "developed":
package main
import (
"bufio"
"fmt"
"os"
"regexp"
"strconv"
"strings"
)
func main() {
file_dB, err := os.Open("d:/dB.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
defer file_dB.Close()
file_dL, err := os.Open("d:/dL.txt")
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
defer file_dL.Close()
file_out, err := os.Create("d:/out.txt") // also rewrite existing !
if err != nil {
fmt.Printf("error opening file: %v
", err)
os.Exit(1)
}
defer file_out.Close()
dB := bufio.NewReader(file_dB)
dL := bufio.NewReader(file_dL)
err = nil
xcorn_float := 0.0
ycorn_float := 0.0
cellsize_float := 0.0
ncols := regexp.MustCompile("[0-9]+")
nrows := regexp.MustCompile("[0-9]+")
xcorn := regexp.MustCompile("[0-9]*,[0-9]*")
ycorn := regexp.MustCompile("[0-9]*,[0-9]*")
cellsize := regexp.MustCompile("[0-9]*,[0-9]*")
nodataval := regexp.MustCompile("-?d+")
tmp := 0.0
// n cols --------------------
ncols_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
ncols_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if ncols.FindString(ncols_dB) != ncols.FindString(ncols_dL) {
panic(err)
}
ncols_dB = ncols.FindString(ncols_dB)
// n rows --------------------
nrows_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
nrows_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if nrows.FindString(nrows_dB) != nrows.FindString(nrows_dL) {
panic(err)
}
nrows_dB = nrows.FindString(nrows_dB)
// X --------------------
xcorn_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
xcorn_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if xcorn.FindString(xcorn_dB) != xcorn.FindString(xcorn_dL) {
panic(err)
}
xcorn_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(xcorn_dB), ",", ".", 1), 8)
xcorn_float *= 3600.0
// Y --------------------
ycorn_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
ycorn_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if ycorn.FindString(ycorn_dB) != ycorn.FindString(ycorn_dL) {
panic(err)
}
ycorn_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(ycorn_dB), ",", ".", 1), 8)
ycorn_float *= 3600.0
// cell size --------------------
cellsize_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
cellsize_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if cellsize.FindString(cellsize_dB) != cellsize.FindString(cellsize_dL) {
panic(err)
}
cellsize_float, err = strconv.ParseFloat(strings.Replace(cellsize.FindString(cellsize_dB), ",", ".", 1), 8)
cellsize_float *= 3600.0
// nodata value --------------------
nodataval_dB, err := dB.ReadString('
')
if err != nil {
panic(err)
}
nodataval_dL, err := dL.ReadString('
')
if err != nil {
panic(err)
}
if nodataval.FindString(nodataval_dB) != nodataval.FindString(nodataval_dL) {
panic(err)
}
nodataval_dB = nodataval.FindString(nodataval_dB)
fmt.Print(nodataval_dB)
//making header
if _, err := file_out.WriteString("name
3;0;2
1;2;" + nrows_dB + ";" + ncols_dB + "
" + strconv.FormatFloat(xcorn_float, 'f', -1, 32) + ";" + strconv.FormatFloat(ycorn_float, 'f', -1, 32) + ";" + strconv.FormatFloat(cellsize_float, 'f', -1, 32) + ";" + strconv.FormatFloat(cellsize_float, 'f', -1, 32) + "
1
"); err != nil {
panic(err)
}
// valuses --------------------
for {
line1, err := dB.ReadString(' ')
if err != nil {
break
}
if tmp, err = strconv.ParseFloat(strings.TrimSpace(strings.Replace(line1, ",", ".", 1)), 64); err == nil {
line1 = strconv.FormatFloat(tmp, 'f', 8, 64)
}
line2, err := dL.ReadString(' ')
if err != nil {
break
}
if tmp, err = strconv.ParseFloat(strings.TrimSpace(strings.Replace(line2, ",", ".", 1)), 64); err == nil {
line2 = strconv.FormatFloat(tmp, 'f', 8, 64)
}
if err != nil {
panic(err)
}
str := string(line1) + ";" + string(line2) + "
"
if _, err := file_out.WriteString(str); err != nil {
panic(err)
}
}
}
If you have any recomendations - feel free to leave a comment!