如何使用golang将HTML表转换为数组

I'm having a problem trying to convert an HTML table into a Golang array. I've tried to achieve it using x/net/html and goquery, without any success on both of them.

Let's say we have this HTML table:

<html>
  <body>
    <table>
      <tr>
        <td>Row 1, Content 1<td>
        <td>Row 1, Content 2<td>
        <td>Row 1, Content 3<td>
        <td>Row 1, Content 4<td>
      </tr>
      <tr>
        <td>Row 2, Content 1<td>
        <td>Row 2, Content 2<td>
        <td>Row 2, Content 3<td>
        <td>Row 2, Content 4<td>
      </tr>
    </table>
  </body>
</html>

And I'd like to end up with this array:

------------------------------------
|Row 1, Content 1| Row 1, Content 2|
------------------------------------
|Row 2, Content 1| Row 2, Content 2|
------------------------------------

As you guy can see, I'm just ignoring Contents 3 and 4.

My extraction code:

func extractValue(content []byte) {
  doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(content))

  doc.Find("table tr td").Each(func(i int, td *goquery.Selection) {
    // ...
  })
}

I've tried to add a controller number which would be responsible for ignoring the <td> that I don't want to convert and calling

td.NextAll()

but with no luck. Do you guys have any idea of what should I do to accomplish it?

Thanks.

You can get away with package golang.org/x/net/html only.

var body = strings.NewReader(`                                                                                                                            
        <html>                                                                                                                                            
        <body>                                                                                                                                            
        <table>                                                                                                                                           
        <tr>                                                                                                                                              
        <td>Row 1, Content 1<td>                                                                                                                          
        <td>Row 1, Content 2<td>                                                                                                                          
        <td>Row 1, Content 3<td>                                                                                                                          
        <td>Row 1, Content 4<td>                                                                                                                          
        </tr>                                                                                                                                             
        <tr>                                                                                                                                              
        <td>Row 2, Content 1<td>                                                                                                        
        <td>Row 2, Content 2<td>                                                                                                                          
        <td>Row 2, Content 3<td>                                                                                                                          
        <td>Row 2, Content 4<td>                                                                                                                          
        </tr>  
        </table>                                                                                                                                          
        </body>                                                                                                                                           
        </html>`)          

func main() {
    z := html.NewTokenizer(body)
    content := []string{}

    // While have not hit the </html> tag
    for z.Token().Data != "html" {
        tt := z.Next()
        if tt == html.StartTagToken {
            t := z.Token()
            if t.Data == "td" {
                inner := z.Next()
                if inner == html.TextToken {
                    text := (string)(z.Text())
                    t := strings.TrimSpace(text)
                    content = append(content, t)
                }
            }
        }
    }
    // Print to check the slice's content
    fmt.Println(content)
}

This code is written only for this typical HTML pattern only, but refactoring it to be more general wouldn't be hard.

Try an approach like this to make a 2d array and handle variable row sizes:

    z := html.NewTokenizer(body)
    table := [][]string{}
    row := []string{}

    for z.Token().Data != "html" {
        tt := z.Next()
        if tt == html.StartTagToken {
            t := z.Token()

            if t.Data == "tr" {
                if len(row) > 0 {
                    table = append(table, row)
                    row = []string{}
                }
            }

            if t.Data == "td" {
                inner := z.Next()

                if inner == html.TextToken {
                    text := (string)(z.Text())
                    t := strings.TrimSpace(text)
                    row = append(row, t)
                }
            }

        }
    }
    if len(row) > 0 {
        table = append(table, row)
    }