从HTTP到https代理

I've started working on a cache to put into our data center for S3 objects that get frequently pulled from various S3 servers. Some of them are in the GB size and many servers are requesting the same objects. So to improve performance, this cache is needed.

However, unlike other S3 caches I've seen, I don't need the s3 authentication part. It's already included in the headers of the requests from my client.

So the plan is to parse the request, check if the object exists completely locally, or is currently being fetched. And when complete, return the result. Objects are conveniently hashed. So if it's contents changes it's a new hash and a new object.

The requests already contain everything needed to authenticate with S3. But where I'm slightly stuck is I'm not familiar with HTTP's proxy Connect protocol. I modified some example beginning code I found to not use Hijack so I could interpret what is going on and make decisions on whether I need to in fact get the upstream object. But it's not working. The client spits out:

Get https://example.com: tls: first record does not look like a TLS handshake

hmmm.

Here is the proxy listening on HTTP (it'll be behind a firewall so I have made it deliberately insecure for simplicity).

package main

import (
    "crypto/tls"
    "io"
    "log"
    "net"
    "net/http"
    "time"
)

func handleTunneling(w http.ResponseWriter, r *http.Request) {
    dest_conn, err := net.DialTimeout("tcp", r.Host, 10*time.Second)
    if err != nil {
        http.Error(w, err.Error(), http.StatusServiceUnavailable)
        return
    }
    w.WriteHeader(http.StatusOK)
    hijacker, ok := w.(http.Hijacker)
    if !ok {
        http.Error(w, "Hijacking not supported", http.StatusInternalServerError)
        return
    }
    client_conn, _, err := hijacker.Hijack()
    if err != nil {
        http.Error(w, err.Error(), http.StatusServiceUnavailable)
    }
    go transfer(dest_conn, client_conn)
    go transfer(client_conn, dest_conn)
}

func transfer(destination io.WriteCloser, source io.ReadCloser) {
    defer destination.Close()
    defer source.Close()
    io.Copy(destination, source)
}

func handleHTTP(w http.ResponseWriter, req *http.Request) {
    resp, err := http.DefaultTransport.RoundTrip(req)
    if err != nil {
        http.Error(w, err.Error(), http.StatusServiceUnavailable)
        return
    }
    defer resp.Body.Close()

    log.Println(req.RemoteAddr, " ", resp.Status)

    copyHeader(w.Header(), resp.Header)
    w.WriteHeader(resp.StatusCode)
    io.Copy(w, resp.Body)
}

func copyHeader(dst, src http.Header) {
    for k, vv := range src {
        for _, v := range vv {
            dst.Add(k, v)
        }
    }
}

func main() {
    server := &http.Server{
        Addr: ":8080",
        Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
            if r.Method == http.MethodConnect {
                handleTunneling(w, r)
            } else {
                handleHTTP(w, r)
            }
        }),
        // Disable HTTP/2.
        TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
    }

    log.Fatal(server.ListenAndServe())
}

The client looks like this:

package main

import (
    "net/http"
    "fmt"
    "io/ioutil"
    "os"
    "net/url"
)

func main() {  
    args := os.Args[1:]
    var whereTo string
    if len(args) > 0 {
        whereTo = args[0]
    } else {
        fmt.Println("Usage: htclient [url]")
        os.Exit(1)
    }

    proxyUrl, err := url.Parse("http://localhost:8080")
    client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}}

    req, err := http.NewRequest("GET", whereTo, nil)

    response, err := client.Do(req)
    if err != nil {
        fmt.Printf("%s", err)
        os.Exit(1)
    } else {
        defer response.Body.Close()
        contents, err := ioutil.ReadAll(response.Body)
        if err != nil {
            fmt.Printf("%s", err)
            os.Exit(1)
        }
        fmt.Printf("%s
", string(contents))
    }
}

Suggestions?

You are sending the 200 OK response too early; send it after the error checking.

https://tools.ietf.org/html/rfc7231#section-4.3.6 (emphasize mine):

CONNECT is intended only for use in requests to a proxy. An origin server that receives a CONNECT request for itself MAY respond with a 2xx (Successful) status code to indicate that a connection is established.

func handleTunneling(w http.ResponseWriter, r *http.Request) {
    dest_conn, err := net.DialTimeout("tcp", r.Host, 10*time.Second)
    if err != nil {
        http.Error(w, err.Error(), http.StatusServiceUnavailable)
        return
    }

    // too early: w.WriteHeader(http.StatusOK)

    hijacker, ok := w.(http.Hijacker)
    if !ok {
        http.Error(w, "Hijacking not supported", http.StatusInternalServerError)
        return
    }
    client_conn, _, err := hijacker.Hijack()
    if err != nil {
        http.Error(w, err.Error(), http.StatusServiceUnavailable)
        return // missing in your sample code
    }

    w.WriteHeader(http.StatusOK) // respond 2xx here
    go transfer(dest_conn, client_conn)
    go transfer(client_conn, dest_conn)
}