I am reading rows from the GAE Datastore and I want to sort them alphanumerically.
Suppose I have something like this:
key name description sequence
===========================================
ASD.. maths1 it is maths chap21.1
ASD.. maths2 it is maths chap21.10
ASD.. maths3 it is maths chap21.2
I want the result sorted alphanumerically on the sequence field, like this:
key name description sequence
===========================================
ASD.. maths1 it is maths chap21.1
ASD.. maths3 it is maths chap21.2
ASD.. maths2 it is maths chap21.10
Use ISO/IEC 14651:2011 to construct the sequence sort key. For example,
package main
import (
"fmt"
"sort"
)
const maxByte = 1<<8 - 1
func isDigit(d byte) bool {
return '0' <= d && d <= '9'
}
func SequenceKey(key string) string {
sKey := make([]byte, 0, len(key)+8)
j := -1
for i := 0; i < len(key); i++ {
b := key[i]
if !isDigit(b) {
sKey = append(sKey, b)
j = -1
continue
}
if j == -1 {
sKey = append(sKey, 0x00)
j = len(sKey) - 1
}
if sKey[j] == 1 && sKey[j+1] == '0' {
sKey[j+1] = b
continue
}
if sKey[j]+1 > maxByte {
panic("SequenceKey: invalid key")
}
sKey = append(sKey, b)
sKey[j]++
}
return string(sKey)
}
type Chapter struct {
Key string
Name string
Description string
Sequence string
SequenceKey string `datastore:"-"`
}
type Chapters []*Chapter
var chapters = Chapters{
{Key: "ASD..", Name: "maths1", Description: "it is maths", Sequence: "chap21.1"},
{Key: "ASD..", Name: "maths2", Description: "it is maths", Sequence: "chap21.10"},
{Key: "ASD..", Name: "maths3", Description: "it is maths", Sequence: "chap21.2"},
}
func (s Chapters) Len() int {
return len(s)
}
func (s Chapters) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
type BySequenceKey struct{ Chapters }
func (s BySequenceKey) Less(i, j int) bool {
return s.Chapters[i].SequenceKey < s.Chapters[j].SequenceKey
}
func main() {
for _, chapter := range chapters {
chapter.SequenceKey = SequenceKey(chapter.Sequence)
}
fmt.Println("Unsorted:")
for _, chapter := range chapters {
fmt.Printf(" sequence: %#v
", chapter.Sequence)
fmt.Printf(" sort key: %#v
", chapter.SequenceKey)
fmt.Printf(" name: %#v
", chapter.Name)
}
fmt.Println("Sorted:")
sort.Sort(BySequenceKey{chapters})
for _, chapter := range chapters {
fmt.Printf(" sequence: %#v
", chapter.Sequence)
fmt.Printf(" sort key: %#v
", chapter.SequenceKey)
fmt.Printf(" name: %#v
", chapter.Name)
}
}
Output:
Unsorted:
sequence: "chap21.1"
sort key: "chap\x0221.\x011"
name: "maths1"
sequence: "chap21.10"
sort key: "chap\x0221.\x0210"
name: "maths2"
sequence: "chap21.2"
sort key: "chap\x0221.\x012"
name: "maths3"
Sorted:
sequence: "chap21.1"
sort key: "chap\x0221.\x011"
name: "maths1"
sequence: "chap21.2"
sort key: "chap\x0221.\x012"
name: "maths3"
sequence: "chap21.10"
sort key: "chap\x0221.\x0210"
name: "maths2"
According to the reference, you can simply sort on the property you require:
From the doc:
// Order alphabetically by last name:
q := datastore.NewQuery("Person").Order("LastName")
So in your example, you could have something along the lines of:
func queryAll(r *http.Request) ([]string, error) {
c := appengine.NewContext(r)
res := make([]string, 0, 0)
t := datastore.NewQuery("YourStructure").Order("Sequence").Run(c)
for {
var s YourStructure
if _, err := t.Next(&s); err == datastore.Done {
// Done iterating
return res, nil
} else if err != nil {
// An error happened
return nil, err
}
res = append(res, s.Name)
}
panic("unreachable")
}
If you do not have too many numbers of rows, you can probably retrieve all rows and store them in a slice. Then you can sort those entries in RAM by implementing the sort.Interface and calling the sort.Sort function. Take a look at the source of sort.IntSlice if you need an example for that.
The tricky part is probably defining the alphanumeric sort order. I don't know the exact definition of it (and I wasn't able to look it up in this short amount of time), but I have tried to implement it anyway. Here is the code that you might use for the less method:
package main
import "log"
func less(a, b string) bool {
i, j := 0, 0
for i < len(a) && j < len(b) {
numeric, numA, numB := false, 0, 0
for i < len(a) && a[i] >= '0' && a[i] <= '9' {
numA = numA*10 + int(a[i]) - '0'
numeric = true
i++
}
for j < len(b) && b[j] >= '0' && b[j] <= '9' {
numB = numB*10 + int(b[j]) - '0'
numeric = true
j++
}
if numeric {
if numA != numB {
return numA < numB
}
continue
}
if a[i] != b[j] {
return a[i] < b[j]
}
i++
j++
}
return i == len(a) && j != len(b)
}
var tests = []struct {
a, b string
r1, r2 bool
}{
{"bar", "foo", true, false},
{"foo100", "foo10", false, true},
{"foo100a", "foo100b", true, false},
{"foo", "foo", false, false},
{"100", "100", false, false},
{"foo5", "foo12", true, false},
{"foo5", "fo3", true, false},
{"foo", "foo8", true, false},
}
func main() {
for i := range tests {
if less(tests[i].a, tests[i].b) != tests[i].r1 {
log.Fatalf("test %d failed", i)
}
if less(tests[i].b, tests[i].a) != tests[i].r2 {
log.Fatalf("reverse test %d failed", i)
}
}
}
I'm not sure if the code is sufficient for you or if you need to handle more complex cases, but it might provide at least a good starting point for your own modifications.
Peter's answer reminded me of the collate package of the go.text repository, a subrepo of the official Go repository that contains some packages that are currently under development. This package offers everything you need and is fully locale and unicode aware.
You could use the CompareString method to sort a slice of rows in-memory, but the better approach would be to store a sort key (a seqence of bytes that can be compared as usual) as an additional column and let GAE do the rest for you.
package main
import (
"code.google.com/p/go.text/collate"
"code.google.com/p/go.text/locale"
"fmt"
)
func main() {
locId := locale.Make("en-US")
col := collate.New(locId)
col.SetOptions(collate.Numeric | collate.IgnoreCase)
keys := []string{"chap21.1", "chap21.10", "chap21.2", "chap21.03.3",
"chap21.3.03", "chap21.03.03"}
buf := new(collate.Buffer)
for i := 0; i < len(keys); i++ {
fmt.Println(keys[i], col.KeyFromString(buf, keys[i]))
}
}
Edit: I have just taken a closer look at the implementation and most of the methods (including SetOptions
and the handling of numeric sorting) are not implemented yet. So this answer was probably a bit too early, but at least you got the picture of how you might sort your rows in the future ;)