网络链接序列号乱序

I'm using the netlink and genetlink Go packages to write a utility to interact with the MAC80211_HWSIM netlink family functions. It's working well ... almost.

I'm finding that if I do a GET_RADIO call, the next call into the family fails to validate because the sequence number of the response doesn't match that of the request. Specifically, the pattern I see is

GET_RADIO request:  seq=655
GET_RADIO response: seq=655
DEL_RADIO request:  seq=656
DEL_RADIO response: seq=655

It doesn't matter which command I issue after GET_RADIO, this same pattern shows up. If GET_RADIO isn't in the mix, then the sequence numbers are exactly as expected.

There's an existing command written in C, using libnl and libgenl, which I modified to run more than one command. As best I can tell, that implementation does a very similar thing, but doesn't have the issue with the sequence numbers, so I'm disinclined to think the problem lies with the kernel module. But strace clearly shows that the kernel is responding with an out-of-sequence number (please forgive the excessive side-scrolling):

socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) = 3
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=18357, nl_groups=00000000}, [112->12]) = 0
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=40, type=0x10 /* NLMSG_??? */, flags=NLM_F_REQUEST, seq=2596996163, pid=18357}, "\3\1\0\0\23\0\2\0MAC80211_HWSIM\0\0"}, iov_len=40}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 40
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* NLMSG_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 224
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=224, type=0x10 /* NLMSG_??? */, flags=0, seq=2596996163, pid=18357}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 224
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=2596996164, pid=18357}, "\6\1\0\0\10\0
\0T\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0
\0T\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 48
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=2596996164, pid=18357}, "\6\1\0\0\10\0
\0T\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy84\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=2596996165, pid=18357}, "\5\1\0\0\10\0
\0T\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0D\4\313\232\265G\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_PEEK) = 36
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=112->12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=2596996164, pid=18357}, "\0\0\0\0\34\0\0\0\31\0\5\0D\4\313\232\265G\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=4096}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
close(3)                                = 0
+++ exited with 0 +++

Note that there are two recvmsg() calls for each sendmsg() because the first one simply does a MSG_PEEK to see if an answer is available. Of the three sets of requests and responses, the first (sequence ...163) is simply getting the family information. The second (sequence 164) is doing the GET_RADIO (\6) call, and the third is the one with the out-of-sequence response: this DEL_RADIO (\5) call sends a message with a ...165 sequence number, but the response that comes back, otherwise well-formed, has a sequence number matching the previous command: ...164.

Because this is Go, there are multiple threads active at once, but all the calls in the strace output above are from the same thread. The only calls missing from the trace are those to futex().

The C version, because it uses the callback mechanisms provided by libnl and libgenl, does process the response in a different thread than the request came from. The request trace:

socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_GENERIC) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [32768], 4) = 0
getpid()                                = 21338
bind(3, {sa_family=AF_NETLINK, nl_pid=-1300212902, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=-1300212902, nl_groups=00000000}, [12]) = 0
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=20, type=0x10 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK|0x300, seq=1530817156, pid=2994754394}, "\3\1\0\0"}, iov_len=20}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=116, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\v\0\2\0nlctrl\0\0\6\0\1\0\20\0\0\0\10\0\3\0\2\0\0\0"...}, {{len=96, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0VFS_DQUOT\0\0\0\6\0\1\0\21\0\0\0\10\0\3\0"...}, {{len=104, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\17\0\2\0acpi_event\0\0\6\0\1\0\23\0\0\0\10\0\3\0"...}, {{len=108, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\22\0\2\0thermal_event\0\0\0\6\0\1\0\24\0\0\0"...}, {{len=112, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\20\0\2\0tcp_metrics\0\6\0\1\0\25\0\0\0\10\0\3\0"...}, {{len=112, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\16\0\2\0TASKSTATS\0\0\0\6\0\1\0\26\0\0\0\10\0\3\0"...}, {{len=2076, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\f\0\2\0nl80211\0\6\0\1\0\30\0\0\0\10\0\3\0\1\0\0\0"...}, {{len=224, type=0x10 /* NLMSG_??? */, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\1\2\0\0\23\0\2\0MAC80211_HWSIM\0\0\6\0\1\0\31\0\0\0"...}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 2948
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=20, type=NLMSG_DONE, flags=NLM_F_MULTI, seq=1530817156, pid=2994754394}, "\0\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=65542}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 20
mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7fdb752ce000
mprotect(0x7fdb752ce000, 4096, PROT_NONE) = 0
clone(child_stack=0x7fdb75acdff0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7fdb75ace9d0, tls=0x7fdb75ace700, child_tidptr=0x7fdb75ace9d0) = 21339
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0
\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0}, 0x7ffff64ac200) = 0
write(1, "Deleting radio with id '95'...
", 31) = 31
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=28, type=0x19 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1530817158, pid=2994754394}, "\5\1\0\0\10\0
\0_\0\0\0"}, iov_len=28}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 28
nanosleep({tv_sec=2, tv_nsec=0},  <unfinished ...>) = ?
+++ exited with 0 +++

and the response trace:

recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=48, type=0x19 /* NLMSG_??? */, flags=0, seq=1530817157, pid=2994754394}, "\6\1\0\0\10\0
\0_\0\0\0\10\0\t\0\1\0\0\0\t\0\21\0phy95\0\0\0"}, {{len=0, type=0 /* NLMSG_??? */, flags=0, seq=0, pid=0}}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 48
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817157, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\205j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
write(1, "new SSID defined to interface 95"..., 33) = 33
epoll_wait(4, [{EPOLLIN, {u32=3, u64=3}}], 32, -1) = 1
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[{{len=36, type=NLMSG_ERROR, flags=0, seq=1530817158, pid=2994754394}, "\0\0\0\0\34\0\0\0\31\0\5\0\206j>[ZS\200\262"}, {{len=1114121, type=0x6870 /* NLMSG_??? */, flags=NLM_F_REQUEST|NLM_F_ECHO|NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED|0x3940, seq=53, pid=0}, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...}], iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36
write(1, "Successfully deleted device with"..., 39) = 39
exit_group(0)                                       = ?
+++ exited with 0 +++

The only thing that seems relevant to me (other than the threading structure) is that it uses different flags (NLM_F_DUMP_INTR|NLM_F_DUMP_FILTERED in the C code, 0 in the Go code).

I'm not sure what other tools I can bring to bear on this, since I don't know how those sequence numbers are constructed. There didn't seem to me to be anything unusually wrong with the code in the kernel module, in the Go packages, or in my Go code, but there could always be a mismatch between the way an interface was supposed to be used and the way it actually was. Does this problem seem familiar to anyone, or does anyone have any thoughts on how to make progress on this?

I still don't understand why this situation occurs, but it appears the TL;DR fix is to read the next packet: it will have the same data with the correct sequence number.

I tried writing a program with a different netlink package, and discovered that while it had the same issue, it repeated the recvmsg() call when it runs into this situation. The code that does that is here:

https://github.com/vishvananda/netlink/blob/a06dabf/nl/nl_linux.go#L425

The commit that introduced that code doesn't explain why this loop is correct, but emulating it in the other package takes care of the problem.