I'm building a discord bot that uses an Opus stream. I've tried various things such as sending the Opus packets up directly, decoding the OPUS stream to a PCM and encoding it into a byte array, and converting the PCM to a byte array directly. In all cases I get:
Could not recognize: code:11 message:"Audio data is being streamed too slow. Please stream audio data approximately at real time."
I've tried 8kHz-48kHz frequencies at 20 ms frames. I've also tried to encode the converted PCM with the max bitrate. I have run the sample code successfully, so there is no connection issue on my end. Where should I look for a solution?
package main
import (
"fmt"
//"io"
"log"
"os"
"flag"
speech "cloud.google.com/go/speech/apiv1"
"golang.org/x/net/context"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
"github.com/bwmarrin/discordgo"
"layeh.com/gopus"
)
// OnError gets called by dgvoice when an error is encountered.
// By default logs to STDERR
var OnError = func(str string, err error) {
prefix := "dgVoice: " + str
if err != nil {
os.Stderr.WriteString(prefix + ": " + err.Error())
} else {
os.Stderr.WriteString(prefix)
}
}
var stream speechpb.Speech_StreamingRecognizeClient
func main() {
var (
Token = flag.String("t", "", "Discord bot token.")
// Email = flag.String("e", "", "Discord account email.")
// Password = flag.String("p", "", "Discord account password.")
GuildID = flag.String("g", "", "Guild ID")
ChannelID = flag.String("c", "", "Channel ID")
)
flag.Parse()
fmt.Println("Connecting to Discord...")
// Connect to Discord
discord, err := discordgo.New(*Token)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Opening Socket...")
// Open Websocket
err = discord.Open()
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Joining Channel...")
// Connect to voice channel.
// NOTE: Setting mute to false, deaf to true.
dgv, err := discord.ChannelVoiceJoin(*GuildID, *ChannelID, false, false)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Connecting to Google Speech Recognition API...")
ctx := context.Background()
// [START speech_streaming_mic_recognize]
client, err := speech.NewClient(ctx)
if err != nil {
log.Fatal(err)
}
stream, err = client.StreamingRecognize(ctx)
if err != nil {
log.Fatal(err)
}
// Send the initial configuration message.
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
//InterimResults: true,
SingleUtterance: true,
},
},
}); err != nil {
log.Fatal(err)
}
recv := make(chan *discordgo.Packet, 2)
go Receive(dgv, recv)
send := make(chan []int16, 2)
go Send(dgv, send)
// dgv.Speaking(true)
// defer dgv.Speaking(false)
go func() {
for {
p, ok := <-recv
if !ok {
fmt.Println("Not OK")
return
}
send <- p.PCM
}
} ()
for {
resp, err := stream.Recv()
//fmt.Printf("%+v
",resp)
if err != nil {
log.Fatalf("Cannot stream results: %v", err)
}
if err := resp.Error; err != nil {
log.Fatalf("Could not recognize: %v", err)
}
for _, result := range resp.Results {
fmt.Printf("Result: %+v
", result)
}
}
// Close connections
dgv.Close()
discord.Close()
return
}
func Receive(v *discordgo.VoiceConnection, c chan *discordgo.Packet) {
var speakers map[uint32]*gopus.Decoder
if c == nil {
return
}
var err error
for {
p, ok := <-v.OpusRecv
if !ok {
return
}
if speakers == nil {
speakers = make(map[uint32]*gopus.Decoder)
}
_, ok = speakers[p.SSRC]
if !ok {
speakers[p.SSRC], err = gopus.NewDecoder(16000, 1)
if err != nil {
OnError("error creating opus decoder", err)
continue
}
}
p.PCM, err = speakers[p.SSRC].Decode(p.Opus, 320, false)
if err != nil {
OnError("Error decoding opus data", err)
continue
}
// try encoding pcm frame with Opus
c <- p
}
}
func Send(v *discordgo.VoiceConnection, pcm <- chan []int16) {
for {
// read pcm from chan, exit if channel is closed.
recv, ok := <-pcm
if !ok {
OnError("PCM Channel closed", nil)
return
}
buf := make([]byte,2*len(recv))
for i := 0; i < len(recv); i+=2 {
var h, l uint8 = uint8(i>>8), uint8(i&0xff)
buf[i] = h
buf[i+1] = l
}
stream.Send(&speechpb.StreamingRecognizeRequest{
StreamingRequest: &speechpb.StreamingRecognizeRequest_AudioContent{
AudioContent: buf,
},
});
}
}