Go中的字母数字排序

I am reading rows from the GAE Datastore and I want to sort them alphanumerically.

Suppose I have something like this:

key      name      description    sequence  
===========================================  
ASD..    maths1    it is maths    chap21.1  
ASD..    maths2    it is maths    chap21.10  
ASD..    maths3    it is maths    chap21.2  

I want the result sorted alphanumerically on the sequence field, like this:

key      name      description    sequence  
===========================================  
ASD..    maths1    it is maths    chap21.1  
ASD..    maths3    it is maths    chap21.2 
ASD..    maths2    it is maths    chap21.10   

Use ISO/IEC 14651:2011 to construct the sequence sort key. For example,

package main

import (
    "fmt"
    "sort"
)

const maxByte = 1<<8 - 1

func isDigit(d byte) bool {
    return '0' <= d && d <= '9'
}

func SequenceKey(key string) string {
    sKey := make([]byte, 0, len(key)+8)
    j := -1
    for i := 0; i < len(key); i++ {
        b := key[i]
        if !isDigit(b) {
            sKey = append(sKey, b)
            j = -1
            continue
        }
        if j == -1 {
            sKey = append(sKey, 0x00)
            j = len(sKey) - 1
        }
        if sKey[j] == 1 && sKey[j+1] == '0' {
            sKey[j+1] = b
            continue
        }
        if sKey[j]+1 > maxByte {
            panic("SequenceKey: invalid key")
        }
        sKey = append(sKey, b)
        sKey[j]++
    }
    return string(sKey)
}

type Chapter struct {
    Key         string
    Name        string
    Description string
    Sequence    string
    SequenceKey string `datastore:"-"`
}

type Chapters []*Chapter

var chapters = Chapters{
    {Key: "ASD..", Name: "maths1", Description: "it is maths", Sequence: "chap21.1"},
    {Key: "ASD..", Name: "maths2", Description: "it is maths", Sequence: "chap21.10"},
    {Key: "ASD..", Name: "maths3", Description: "it is maths", Sequence: "chap21.2"},
}

func (s Chapters) Len() int {
    return len(s)
}

func (s Chapters) Swap(i, j int) {
    s[i], s[j] = s[j], s[i]
}

type BySequenceKey struct{ Chapters }

func (s BySequenceKey) Less(i, j int) bool {
    return s.Chapters[i].SequenceKey < s.Chapters[j].SequenceKey
}

func main() {
    for _, chapter := range chapters {
        chapter.SequenceKey = SequenceKey(chapter.Sequence)
    }
    fmt.Println("Unsorted:")
    for _, chapter := range chapters {
        fmt.Printf("  sequence: %#v
", chapter.Sequence)
        fmt.Printf("    sort key: %#v
", chapter.SequenceKey)
        fmt.Printf("      name: %#v
", chapter.Name)
    }
    fmt.Println("Sorted:")
    sort.Sort(BySequenceKey{chapters})
    for _, chapter := range chapters {
        fmt.Printf("  sequence: %#v
", chapter.Sequence)
        fmt.Printf("    sort key: %#v
", chapter.SequenceKey)
        fmt.Printf("      name: %#v
", chapter.Name)
    }
}

Output:

Unsorted:
  sequence: "chap21.1"
    sort key: "chap\x0221.\x011"
      name: "maths1"
  sequence: "chap21.10"
    sort key: "chap\x0221.\x0210"
      name: "maths2"
  sequence: "chap21.2"
    sort key: "chap\x0221.\x012"
      name: "maths3"
Sorted:
  sequence: "chap21.1"
    sort key: "chap\x0221.\x011"
      name: "maths1"
  sequence: "chap21.2"
    sort key: "chap\x0221.\x012"
      name: "maths3"
  sequence: "chap21.10"
    sort key: "chap\x0221.\x0210"
      name: "maths2"

According to the reference, you can simply sort on the property you require:

From the doc:

// Order alphabetically by last name:
q := datastore.NewQuery("Person").Order("LastName")

So in your example, you could have something along the lines of:

func queryAll(r *http.Request) ([]string, error) {
    c := appengine.NewContext(r)
    res := make([]string, 0, 0)
    t := datastore.NewQuery("YourStructure").Order("Sequence").Run(c)
    for {
        var s YourStructure
        if _, err := t.Next(&s); err == datastore.Done {
            // Done iterating
            return res, nil
        } else if err != nil {
            // An error happened
            return nil, err
        }
        res = append(res, s.Name) 
    }
    panic("unreachable")
}

If you do not have too many numbers of rows, you can probably retrieve all rows and store them in a slice. Then you can sort those entries in RAM by implementing the sort.Interface and calling the sort.Sort function. Take a look at the source of sort.IntSlice if you need an example for that.

The tricky part is probably defining the alphanumeric sort order. I don't know the exact definition of it (and I wasn't able to look it up in this short amount of time), but I have tried to implement it anyway. Here is the code that you might use for the less method:

package main

import "log"

func less(a, b string) bool {
    i, j := 0, 0
    for i < len(a) && j < len(b) {
        numeric, numA, numB := false, 0, 0
        for i < len(a) && a[i] >= '0' && a[i] <= '9' {
            numA = numA*10 + int(a[i]) - '0'
            numeric = true
            i++
        }
        for j < len(b) && b[j] >= '0' && b[j] <= '9' {
            numB = numB*10 + int(b[j]) - '0'
            numeric = true
            j++
        }
        if numeric {
            if numA != numB {
                return numA < numB
            }
            continue
        }
        if a[i] != b[j] {
            return a[i] < b[j]
        }
        i++
        j++
    }
    return i == len(a) && j != len(b)
}

var tests = []struct {
    a, b string
    r1, r2 bool
}{
    {"bar", "foo", true, false},
    {"foo100", "foo10", false, true},
    {"foo100a", "foo100b", true, false},
    {"foo", "foo", false, false},
    {"100", "100", false, false},
    {"foo5", "foo12", true, false},
    {"foo5", "fo3", true, false},
    {"foo", "foo8", true, false},
}

func main() {
    for i := range tests {
        if less(tests[i].a, tests[i].b) != tests[i].r1 {
            log.Fatalf("test %d failed", i)
        }
        if less(tests[i].b, tests[i].a) != tests[i].r2 {
            log.Fatalf("reverse test %d failed", i)
        }

    }
}

I'm not sure if the code is sufficient for you or if you need to handle more complex cases, but it might provide at least a good starting point for your own modifications.

Peter's answer reminded me of the collate package of the go.text repository, a subrepo of the official Go repository that contains some packages that are currently under development. This package offers everything you need and is fully locale and unicode aware.

You could use the CompareString method to sort a slice of rows in-memory, but the better approach would be to store a sort key (a seqence of bytes that can be compared as usual) as an additional column and let GAE do the rest for you.

package main

import (
    "code.google.com/p/go.text/collate"
    "code.google.com/p/go.text/locale"
    "fmt"
)

func main() {
    locId := locale.Make("en-US")
    col := collate.New(locId)
    col.SetOptions(collate.Numeric | collate.IgnoreCase)

    keys := []string{"chap21.1", "chap21.10", "chap21.2", "chap21.03.3",
        "chap21.3.03", "chap21.03.03"}

    buf := new(collate.Buffer)
    for i := 0; i < len(keys); i++ {
        fmt.Println(keys[i], col.KeyFromString(buf, keys[i]))
    }
}

Edit: I have just taken a closer look at the implementation and most of the methods (including SetOptions and the handling of numeric sorting) are not implemented yet. So this answer was probably a bit too early, but at least you got the picture of how you might sort your rows in the future ;)