I'm learning Golang so I can rewrite some of my shell scripts.
I have URL's that look like this:
https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value
I want to extract the following part:
https://example-1.example.com/a/c482dfad3573acff324c/list.txt
In a shell script I would do something like this:
echo "$myString" | grep -o 'http://.*.txt'
What is the best way to do the same thing in Golang, only by using the standard library?
There are a few options:
// match regexp as in question
pat := regexp.MustCompile(`https?://.*\.txt`)
s := pat.FindString(myString)
// everything before the query
s := strings.Split(myString, "?")[0] string
// same as previous, but avoids []string allocation
s := myString
if i := strings.IndexByte(s, '?'); i >= 0 {
s = s[:i]
}
// parse and clear query string
u, err := url.Parse(myString)
u.RawQuery = ""
s := u.String()
The last option is the best because it will handle all possible corner cases.
you may use strings.IndexRune
, strings.IndexByte
, strings.Split
, strings.SplitAfter
, strings.FieldsFunc
, url.Parse
, regexp
or your function.
first most simple way:
you may use i := strings.IndexRune(s, '?')
or i := strings.IndexByte(s, '?')
then s[:i]
like this (with commented output):
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
i := strings.IndexByte(s, '?')
if i != -1 {
fmt.Println(s[:i]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
}
or you may use url.Parse(s)
(I'd use this):
package main
import "fmt"
import "net/url"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
url, err := url.Parse(s)
if err == nil {
url.RawQuery = ""
fmt.Println(url.String()) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
}
or you may use regexp.MustCompile(".*\\.txt")
:
package main
import "fmt"
import "regexp"
var rgx = regexp.MustCompile(`.*\.txt`)
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
fmt.Println(rgx.FindString(s)) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
or you may use splits := strings.FieldsFunc(s, func(r rune) bool { return r == '?' })
then splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.FieldsFunc(s, func(r rune) bool { return r == '?' })
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
you may use splits := strings.Split(s, "?")
then splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.Split(s, "?")
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
you may use splits := strings.SplitAfter(s, ".txt")
then splits[0]
:
package main
import "fmt"
import "strings"
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
splits := strings.SplitAfter(s, ".txt")
fmt.Println(splits[0]) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
or you may use your function (most independent way):
package main
import "fmt"
func left(s string) string {
for i, r := range s {
if r == '?' {
return s[:i]
}
}
return ""
}
func main() {
s := `https://example-1.example.com/a/c482dfad3573acff324c/list.txt?parm1=value,parm2=value,parm3=https://example.com/a?parm1=value,parm2=value`
fmt.Println(left(s)) // https://example-1.example.com/a/c482dfad3573acff324c/list.txt
}
If you are prosessing only URLs, you can use Go's net/url
library https://golang.org/pkg/net/url/ to parse the URL, truncate the Query and Fragment parts (Query would be parm1=value,parm2=value
etc.), and extract the remaining portion scheme://host/path
, as in the following example (https://play.golang.org/p/Ao0jU22NyA):
package main
import (
"fmt"
"net/url"
)
func main() {
u, _ := url.Parse("https://example-1.example.com/a/b/c/list.txt?parm1=value,parm2=https%3A%2F%2Fexample.com%2Fa%3Fparm1%3Dvalue%2Cparm2%3Dvalue#somefragment")
u.RawQuery, u.Fragment = "", ""
fmt.Printf("%s
", u)
}
Output:
https://example-1.example.com/a/b/c/list.txt