为什么用Golang编写的某些函数运行起来比用Java还要慢? [关闭]

I have tested several simple functions with Golang and Java. To my surprise, Java sometimes is faster than Golang(especially in recursive function and some function in standard library such as math/rand.Rand). I wonder why. Here is some code I used for test and the result.

Golang code:

package main

import (
    "fmt"
    "math/rand"
    "time"
)

func calPi(pointCount int) float64 {
    inCircleCount := 0

    var x, y float64
    var Pi float64

    for i := 0; i < pointCount; i++ {
        x = rand.Float64()
        y = rand.Float64()

        if x*x+y*y < 1 {
            inCircleCount++
        }
    }

    Pi = (4.0 * float64(inCircleCount)) / float64(pointCount)

    return Pi
}

func fibonacci(c int64) int64 {
    if c < 2 {
        return c
    }

    return fibonacci(c-2) + fibonacci(c-1)
}

func main() {
    rand.Seed(time.Now().Unix()) 

    fmt.Printf("Test 1
")

    startTime := time.Now()

    result := 0.0

    for i := 0.0; i < 1000000000; i = i + 1 {
        result += i * i
    }

    endTime := time.Now()

    fmt.Printf("Result: %v
", result)

    fmt.Printf("Duration: %v
", endTime.Sub(startTime))

    fmt.Printf("Test 2
")

    startTime = time.Now()

    resultInt := fibonacci(50)

    endTime = time.Now()

    fmt.Printf("Result: %v
", resultInt)

    fmt.Printf("Duration: %v
", endTime.Sub(startTime))

    fmt.Printf("Test 3
")

    startTime = time.Now()

    result = 0.0

    for i := 0.0; i < 100000000; i = i + 1 {
        result += rand.Float64()
    }

    endTime = time.Now()

    fmt.Printf("Result: %v
", result)

    fmt.Printf("Duration: %v
 s", endTime.Sub(startTime))

    fmt.Printf("Test 4
")

    startTime = time.Now()

    result = calPi(100000000)

    endTime = time.Now()

    fmt.Printf("Result: %v
", result)

    fmt.Printf("Duration: %v s
", endTime.Sub(startTime))

}

the result:

Test 1
Result: 3.333333328333552e+26
Duration: 1.449212507s
Test 2
Result: 12586269025
Duration: 1m31.645050682s
Test 3
Result: 4.999483069673434e+07
Duration: 2.534121566s
 sTest 4
Result: 3.14147056
Duration: 5.036491495s s

Java code:

public class Performance {

    public static double calPi(int pointCount) {
        int inCircleCount = 0;

        double x, y;
        double Pi;

        for (int i = 0; i < pointCount; i++) {
            x = Math.random();
            y = Math.random();

            if (x * x + y * y < 1) {
                inCircleCount++;
            }
        }

        Pi = (4.0 * inCircleCount) / pointCount;

        return Pi;
    }

    public static double cal(double a, double b, double c) {
        return a * b / (c + 1) + a;
    }

    public static long fibonacci(long c) {
        if (c < 2)
            return c;
        return fibonacci(c - 2) + fibonacci(c - 1);
    }

    public static void main(String[] args) {

        System.out.println("Test 1");

        long startTime = System.currentTimeMillis();

        double result = 0.0;

        for (double i = 0.0; i < 1000000000; i = i + 1) {
            result += i * i;
        }

        long endTime = System.currentTimeMillis();

        float duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 2");

        startTime = System.currentTimeMillis();

        long resultInt = fibonacci(50);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + resultInt);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 3");

        startTime = System.currentTimeMillis();

        result = 0.0;

        for (double i = 0; i < 100000000; i = i + 1) {
            result += Math.random();
        }

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 4");

        startTime = System.currentTimeMillis();

        result = calPi(100000000);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

    }
}

result:

Test 1
Result: 3.333333328333552E26
Duration: 2.948 s
Test 2
Result: 12586269025
Duration: 60.816 s
Test 3
Result: 4.9999087237930864E7
Duration: 2.448 s
Test 4
Result: 3.14147284
Duration: 4.786 s

The difference of Test 2 results really shocked me! Please help me to find the reason, thanks. And better if someone could give me the example(s) to show the advantage of Golang (vs Java).

Both Java and Golang programs are compiled into machine language, before getting executed—that's what JIT stands for Java VM. As of performance comparison, there must be a not-so-subtle difference between the Machine code generated by each.

Unfortunately, I don't have access to the machine code generated by Java JIT compiler, but we can take a look at what have been generated by Go compiler (v1.11.4-amd64) for fibonacci function:

        # Do the comparison
        MOVQ    "c", AX
        CMPQ    AX, $2
        JGE     @ELSE
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET
@ELSE:
        # Compute fib(c - 2)
        LEAQ    -2(AX), CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Save the call result
        MOVQ    8(SP), AX
        MOVQ    AX, "temp"
        # Compute fib(c - 1)
        MOVQ    "c", CX
        DECQ    CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Add previous results together
        MOVQ    16(SP), AX
        ADDQ    8(SP), AX
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET

Note that this code is not the exact same output, but I've modified it a little bit to make it more clear. Quoted variables are stack positions.

What I conclude is that while Go compiler does employ some optimization techniques to generate more performant code (see Compiler Optimization), it is not doing very well for allocating CPU registers (compare it to what would be generated by a C compiler), and relies too much on stack, especially for return values—and I think there have to be a reason for that which is probably related to the way language works (e.g. multiple return values).

Update 1

Just for comparison, this is the machine code generated by GCC (amd64) for the same function:

        pushq %rbp
        movq  %rsp, %rbp
        pushq %r14
        pushq %rbx
        # Do the comparison
        movq  %rdi, %rbx
        cmpq  $2, %rbx
        jge @ELSE
        # Save "c" in "r"
        movq  %rbx, %rax
        jmp @RETURN
@ELSE:
        # Compute fib(i - 2)
        leaq  -2(%rbx), %rdi
        callq fibonacci
        # Compute fib(i - 1)
        movq  %rax, %r14
        decq  %rbx
        movq  %rbx, %rdi
        callq fibonacci
        # Add previous results together
        addq  %r14, %rax
@RETURN:
        popq  %rbx
        popq  %r14
        popq  %rbp
        retq

Update 2

That being said, I strongly believe that in real-world projects, the language runtime (e.g. object allocation, garbage collection, call indirection, dynamic loading, concurrency support, etc.) would have a much greater effect on the overall performance of the program, rather than micro-optimizations on the function level.