golang sync.WaitGroup永远不会完成

I have the below code that fetches a list of URL's and then conditionally downloads a file and saves it to the filesystem. The files are fetched concurrently and the main goroutine waits for all the files to be fetched. But, the program never exits (and there are no errors) after completing all the requests.

What I think is happening is that somehow the amount of go routines in the WaitGroup is either incremented by too many to begin with (via Add) or not decremented by enough (a Done call is not happening).

Is there something I am obviously doing wrong? How would I inspect how many go routines are presently in the WaitGroup so I can better debug what's happening?

package main

import (
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "os"
    "strings"
    "sync"
)

func main() {
    links := parseLinks()

    var wg sync.WaitGroup

    for _, url := range links {
        if isExcelDocument(url) {
            wg.Add(1)
            go downloadFromURL(url, wg)
        } else {
            fmt.Printf("Skipping: %v 
", url)
        }
    }
    wg.Wait()
}

func downloadFromURL(url string, wg sync.WaitGroup) error {
    tokens := strings.Split(url, "/")
    fileName := tokens[len(tokens)-1]
    fmt.Printf("Downloading %v to %v 
", url, fileName)

    content, err := os.Create("temp_docs/" + fileName)
    if err != nil {
        fmt.Printf("Error while creating %v because of %v", fileName, err)
        return err
    }

    resp, err := http.Get(url)
    if err != nil {
        fmt.Printf("Could not fetch %v because %v", url, err)
        return err
    }
    defer resp.Body.Close()

    _, err = io.Copy(content, resp.Body)
    if err != nil {
        fmt.Printf("Error while saving %v from %v", fileName, url)
        return err
    }

    fmt.Printf("Download complete for %v 
", fileName)

    defer wg.Done()
    return nil
}

func isExcelDocument(url string) bool {
    return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}

func parseLinks() []string {
    linksData, err := ioutil.ReadFile("links.txt")
    if err != nil {
        fmt.Printf("Trouble reading file: %v", err)
    }

    links := strings.Split(string(linksData), ", ")

    return links
}

There are two problems with this code. First, you have to pass a pointer to the WaitGroup to downloadFromURL(), otherwise the object will be copied and Done() will not be visible in main().

See:

func main() {
    ...
    go downloadFromURL(url, &wg)
    ...
}

Second, defer wg.Done() should be one of the first statements in downloadFromURL(), otherwise if you return from the function before that statement, it won't get "registered" and won't get called.

func downloadFromURL(url string, wg *sync.WaitGroup) error {
    defer wg.Done()
    ...
}

Arguments in Go are always passed by value. Use a pointer when an argument may be modified. Also, make sure that you always execute wg.Done().For example,

package main

import (
    "fmt"
    "io"
    "io/ioutil"
    "net/http"
    "os"
    "strings"
    "sync"
)

func main() {
    links := parseLinks()

    wg := new(sync.WaitGroup)

    for _, url := range links {
        if isExcelDocument(url) {
            wg.Add(1)
            go downloadFromURL(url, wg)
        } else {
            fmt.Printf("Skipping: %v 
", url)
        }
    }
    wg.Wait()
}

func downloadFromURL(url string, wg *sync.WaitGroup) error {
    defer wg.Done()
    tokens := strings.Split(url, "/")
    fileName := tokens[len(tokens)-1]
    fmt.Printf("Downloading %v to %v 
", url, fileName)

    content, err := os.Create("temp_docs/" + fileName)
    if err != nil {
        fmt.Printf("Error while creating %v because of %v", fileName, err)
        return err
    }

    resp, err := http.Get(url)
    if err != nil {
        fmt.Printf("Could not fetch %v because %v", url, err)
        return err
    }
    defer resp.Body.Close()

    _, err = io.Copy(content, resp.Body)
    if err != nil {
        fmt.Printf("Error while saving %v from %v", fileName, url)
        return err
    }

    fmt.Printf("Download complete for %v 
", fileName)

    return nil
}

func isExcelDocument(url string) bool {
    return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}

func parseLinks() []string {
    linksData, err := ioutil.ReadFile("links.txt")
    if err != nil {
        fmt.Printf("Trouble reading file: %v", err)
    }

    links := strings.Split(string(linksData), ", ")

    return links
}

As @Bartosz mentioned, you will need to pass a reference to your WaitGroup object. He did a great job discussing the importance of defer ws.Done()

I like WaitGroup's simplicity. However, I do not like that we need to pass the reference to the goroutine because that would mean that the concurrency logic would be mixed with your business logic.

So I came up with this generic function to solve this problem for me:

// Parallelize parallelizes the function calls
func Parallelize(functions ...func()) {
    var waitGroup sync.WaitGroup
    waitGroup.Add(len(functions))

    defer waitGroup.Wait()

    for _, function := range functions {
        go func(copy func()) {
            defer waitGroup.Done()
            copy()
        }(function)
    }
}

So your example could be solved this way:

func main() {
    links := parseLinks()

    functions := []func(){}
    for _, url := range links {
        if isExcelDocument(url) {
            function := func(url string){
                return func() { downloadFromURL(url) }
            }(url)

            functions = append(functions, function)
        } else {
            fmt.Printf("Skipping: %v 
", url)
        }
    }

    Parallelize(functions...)
}

func downloadFromURL(url string) {
    ...
}

If you would like to use it, you can find it here https://github.com/shomali11/util