Skip to content

Commit a669977

Browse files
committed
Fix correctness issues in eBPF programs and userland
- bpf/counter.bpf.c: use bpf_xdp_get_buff_len() for accurate XDP packet length instead of data_end - data arithmetic - bpf/counter.bpf.c: mask PID with 0xFFFFFFFF to extract tgid only from bpf_get_current_pid_tgid() result - bpf/counter.bpf.c: convert process_tcp/process_udp_recv return type to bool so callers can short-circuit on unsupported address families - bpf/counter.bpf.c: fix IPv4-in-IPv6 address construction to use the ip4in6 prefix constant via memcpy instead of partial s6_addr16 writes - bpf/counter.bpf.c: fix sport byte order in process_tcp using bpf_ntohs(inet_sport) and read iphdr->protocol for UDP kprobe filter - bpf/counter.bpf.c: add src/dst swap for UDP receive direction and fix UDP length with bpf_ntohs(); guard skb_consume_udp on len <= 0 - bpf/counter.bpf.c: fix ICMP payload length with underflow guard and set type/code directly from __icmp_send kprobe arguments - bpf/cgroup.bpf.c: set perf_cgroup_event max_entries to 0 for dynamic per-CPU sizing - cgroup.go: add 10ms sleep on perf read errors to avoid busy-looping; refresh cgroup cache on lost perf samples - probe.go: move dead error check for RawTracepoint support to correct location before AttachRawTracepoint call - output.go: fix misleading comments in srcIPSort/dstIPSort (ascending, not descending)
1 parent 4a0507c commit a669977

File tree

9 files changed

+77
-31
lines changed

9 files changed

+77
-31
lines changed

bpf/cgroup.bpf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ struct {
4545
struct {
4646
__uint(type,
4747
BPF_MAP_TYPE_PERF_EVENT_ARRAY); // perf event array requires 4.3 kernel
48-
__uint(max_entries, MAX_ENTRIES);
48+
__uint(max_entries, 0);
4949
__type(key, int);
5050
__type(value, __u32);
5151
} perf_cgroup_event SEC(".maps");

bpf/counter.bpf.c

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ xdp_process_packet(struct xdp_md *xdp) {
382382
void *data = (void *)(long)xdp->data;
383383
void *data_end = (void *)(long)xdp->data_end;
384384

385-
process_eth(data, data_end, data_end - data);
385+
process_eth(data, data_end, bpf_xdp_get_buff_len(xdp));
386386
}
387387

388388
/**
@@ -441,7 +441,7 @@ SEC("cgroup/sock_create")
441441
int cgroup_sock_create(struct bpf_sock *sk) {
442442
__u64 cookie = bpf_get_socket_cookie(sk);
443443
sockinfo ski = {
444-
.pid = bpf_get_current_pid_tgid(),
444+
.pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF,
445445
.comm = {0},
446446
};
447447

@@ -511,21 +511,21 @@ int cgroup_skb_egress(struct __sk_buff *skb) {
511511
*
512512
* @throws none
513513
*/
514-
static inline __attribute__((always_inline)) void
514+
static inline __attribute__((always_inline)) bool
515515
process_tcp(bool receive, struct sock *sk, statkey *key, pid_t pid) {
516516
__u16 family = BPF_CORE_READ(sk, __sk_common.skc_family);
517517

518518
switch (family) {
519519
case AF_INET: {
520520
// convert to V4MAPPED address
521521
__be32 ip4_src = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
522-
key->srcip.s6_addr16[5] = bpf_htons(0xffff);
523-
__builtin_memcpy(&key->srcip.s6_addr32[3], &ip4_src, sizeof(ip4_src));
522+
__builtin_memcpy(key->srcip.s6_addr, ip4in6, sizeof(ip4in6));
523+
__builtin_memcpy(key->srcip.s6_addr + sizeof(ip4in6), &ip4_src, sizeof(ip4_src));
524524

525525
// convert to V4MAPPED address
526526
__be32 ip4_dst = BPF_CORE_READ(sk, __sk_common.skc_daddr);
527-
key->dstip.s6_addr16[5] = bpf_htons(0xffff);
528-
__builtin_memcpy(&key->dstip.s6_addr32[3], &ip4_dst, sizeof(ip4_dst));
527+
__builtin_memcpy(key->dstip.s6_addr, ip4in6, sizeof(ip4in6));
528+
__builtin_memcpy(key->dstip.s6_addr + sizeof(ip4in6), &ip4_dst, sizeof(ip4_dst));
529529

530530
break;
531531
}
@@ -536,14 +536,14 @@ process_tcp(bool receive, struct sock *sk, statkey *key, pid_t pid) {
536536
break;
537537
}
538538
default: {
539-
return;
539+
return false;
540540
}
541541
}
542542

543543
__u16 sport = BPF_CORE_READ(sk, __sk_common.skc_num);
544544
if (sport == 0) {
545545
struct inet_sock *isk = (struct inet_sock *)sk;
546-
BPF_CORE_READ_INTO(&sport, isk, inet_sport);
546+
sport = bpf_ntohs(BPF_CORE_READ(isk, inet_sport));
547547
}
548548
key->src_port = sport;
549549
key->dst_port = bpf_ntohs(BPF_CORE_READ(sk, __sk_common.skc_dport));
@@ -561,6 +561,8 @@ process_tcp(bool receive, struct sock *sk, statkey *key, pid_t pid) {
561561
key->src_port = key->dst_port;
562562
key->dst_port = tmp_port;
563563
}
564+
565+
return true;
564566
}
565567

566568
/**
@@ -579,7 +581,7 @@ process_tcp(bool receive, struct sock *sk, statkey *key, pid_t pid) {
579581
*
580582
* @throws none
581583
*/
582-
static inline __attribute__((always_inline)) void
584+
static inline __attribute__((always_inline)) bool
583585
process_udp_recv(bool receive, struct sk_buff *skb, statkey *key, pid_t pid) {
584586
struct udphdr *udphdr =
585587
(struct udphdr *)(BPF_CORE_READ(skb, head) +
@@ -594,13 +596,13 @@ process_udp_recv(bool receive, struct sk_buff *skb, statkey *key, pid_t pid) {
594596

595597
// convert to V4MAPPED address
596598
__be32 ip4_src = BPF_CORE_READ(iphdr, saddr);
597-
key->srcip.s6_addr16[5] = bpf_htons(0xffff);
598-
__builtin_memcpy(&key->srcip.s6_addr32[3], &ip4_src, sizeof(ip4_src));
599+
__builtin_memcpy(key->srcip.s6_addr, ip4in6, sizeof(ip4in6));
600+
__builtin_memcpy(key->srcip.s6_addr + sizeof(ip4in6), &ip4_src, sizeof(ip4_src));
599601

600602
// convert to V4MAPPED address
601603
__be32 ip4_dst = BPF_CORE_READ(iphdr, daddr);
602-
key->dstip.s6_addr16[5] = bpf_htons(0xffff);
603-
__builtin_memcpy(&key->dstip.s6_addr32[3], &ip4_dst, sizeof(ip4_dst));
604+
__builtin_memcpy(key->dstip.s6_addr, ip4in6, sizeof(ip4in6));
605+
__builtin_memcpy(key->dstip.s6_addr + sizeof(ip4in6), &ip4_dst, sizeof(ip4_dst));
604606
break;
605607
}
606608
case ETH_P_IPV6: {
@@ -614,14 +616,27 @@ process_udp_recv(bool receive, struct sk_buff *skb, statkey *key, pid_t pid) {
614616
break;
615617
}
616618
default:
617-
return;
619+
return false;
618620
}
619621

620622
key->src_port = bpf_ntohs(BPF_CORE_READ(udphdr, source));
621623
key->dst_port = bpf_ntohs(BPF_CORE_READ(udphdr, dest));
622624

623625
key->proto = IPPROTO_UDP;
624626
key->pid = pid;
627+
628+
/* we need to swap the source and destination IP addresses and ports */
629+
if (receive) {
630+
struct in6_addr tmp_ip = key->srcip;
631+
key->srcip = key->dstip;
632+
key->dstip = tmp_ip;
633+
634+
__u16 tmp_port = key->src_port;
635+
key->src_port = key->dst_port;
636+
key->dst_port = tmp_port;
637+
}
638+
639+
return true;
625640
}
626641

627642
/**
@@ -648,13 +663,13 @@ process_icmp4(struct sk_buff *skb, statkey *key, pid_t pid) {
648663

649664
// convert to V4MAPPED address
650665
__be32 ip4_src = BPF_CORE_READ(iphdr, saddr);
651-
key->srcip.s6_addr16[5] = bpf_htons(0xffff);
652-
__builtin_memcpy(&key->srcip.s6_addr32[3], &ip4_src, sizeof(ip4_src));
666+
__builtin_memcpy(key->srcip.s6_addr, ip4in6, sizeof(ip4in6));
667+
__builtin_memcpy(key->srcip.s6_addr + sizeof(ip4in6), &ip4_src, sizeof(ip4_src));
653668

654669
// convert to V4MAPPED address
655670
__be32 ip4_dst = BPF_CORE_READ(iphdr, daddr);
656-
key->dstip.s6_addr16[5] = bpf_htons(0xffff);
657-
__builtin_memcpy(&key->dstip.s6_addr32[3], &ip4_dst, sizeof(ip4_dst));
671+
__builtin_memcpy(key->dstip.s6_addr, ip4in6, sizeof(ip4in6));
672+
__builtin_memcpy(key->dstip.s6_addr + sizeof(ip4in6), &ip4_dst, sizeof(ip4_dst));
658673

659674
// store ICMP type in src port
660675
key->src_port = BPF_CORE_READ(icmphdr, type);
@@ -664,8 +679,9 @@ process_icmp4(struct sk_buff *skb, statkey *key, pid_t pid) {
664679
key->proto = IPPROTO_ICMP;
665680
key->pid = pid;
666681

667-
size_t msglen = bpf_ntohs(BPF_CORE_READ(iphdr, tot_len)) -
668-
BPF_CORE_READ_BITFIELD_PROBED(iphdr, ihl) * 4;
682+
__u16 tot_len = bpf_ntohs(BPF_CORE_READ(iphdr, tot_len));
683+
__u16 ihl_bytes = (__u16)(BPF_CORE_READ_BITFIELD_PROBED(iphdr, ihl) * 4);
684+
size_t msglen = (ihl_bytes <= tot_len) ? (tot_len - ihl_bytes) : 0;
669685

670686
return msglen;
671687
}
@@ -735,8 +751,9 @@ process_udp_send(struct sk_buff *skb, statkey *key, pid_t pid) {
735751
(struct udphdr *)(BPF_CORE_READ(skb, head) +
736752
BPF_CORE_READ(skb, transport_header));
737753

738-
process_udp_recv(false, skb, key, pid);
739-
size_t msglen = BPF_CORE_READ(udphdr, len);
754+
if (!process_udp_recv(false, skb, key, pid))
755+
return 0;
756+
size_t msglen = bpf_ntohs(BPF_CORE_READ(udphdr, len));
740757

741758
return msglen;
742759
}
@@ -891,7 +908,8 @@ int BPF_KPROBE(tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size) {
891908

892909
key.cgroupid = bpf_get_current_cgroup_id();
893910

894-
process_tcp(false, sk, &key, pid);
911+
if (!process_tcp(false, sk, &key, pid))
912+
return 0;
895913
update_val(&key, size);
896914

897915
return 0;
@@ -927,7 +945,8 @@ int BPF_KPROBE(tcp_cleanup_rbuf, struct sock *sk, int copied) {
927945

928946
key.cgroupid = bpf_get_current_cgroup_id();
929947

930-
process_tcp(true, sk, &key, pid);
948+
if (!process_tcp(true, sk, &key, pid))
949+
return 0;
931950
update_val(&key, copied);
932951

933952
return 0;
@@ -949,8 +968,9 @@ int BPF_KPROBE(tcp_cleanup_rbuf, struct sock *sk, int copied) {
949968
*/
950969
SEC("kprobe/ip_send_skb")
951970
int BPF_KPROBE(ip_send_skb, struct net *net, struct sk_buff *skb) {
952-
__u16 protocol = BPF_CORE_READ(skb, protocol);
953-
if (protocol != IPPROTO_UDP) {
971+
struct iphdr *iphdr = (struct iphdr *)(BPF_CORE_READ(skb, head) +
972+
BPF_CORE_READ(skb, network_header));
973+
if (BPF_CORE_READ(iphdr, protocol) != IPPROTO_UDP) {
954974
return 0;
955975
}
956976

@@ -987,6 +1007,10 @@ int BPF_KPROBE(ip_send_skb, struct net *net, struct sk_buff *skb) {
9871007
*/
9881008
SEC("kprobe/skb_consume_udp")
9891009
int BPF_KPROBE(skb_consume_udp, struct sock *sk, struct sk_buff *skb, int len) {
1010+
if (len <= 0) {
1011+
return 0;
1012+
}
1013+
9901014
statkey key;
9911015
__builtin_memset(&key, 0, sizeof(key));
9921016

@@ -997,7 +1021,8 @@ int BPF_KPROBE(skb_consume_udp, struct sock *sk, struct sk_buff *skb, int len) {
9971021

9981022
key.cgroupid = bpf_get_current_cgroup_id();
9991023

1000-
process_udp_recv(true, skb, &key, pid);
1024+
if (!process_udp_recv(true, skb, &key, pid))
1025+
return 0;
10011026
update_val(&key, len);
10021027

10031028
return 0;
@@ -1034,6 +1059,11 @@ int BPF_KPROBE(__icmp_send, struct sk_buff *skb, __u8 type, __u8 code,
10341059
key.cgroupid = bpf_get_current_cgroup_id();
10351060

10361061
size_t msglen = process_icmp4(skb, &key, pid);
1062+
/* __icmp_send is called with the triggering packet's skb; its
1063+
* transport_header points to that packet's transport layer (e.g. TCP/UDP),
1064+
* not to an ICMP header. Use the kprobe arguments directly for type/code. */
1065+
key.src_port = type;
1066+
key.dst_port = code;
10371067
update_val(&key, msglen);
10381068

10391069
return 0;

cgroup.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"strings"
3232
"sync"
3333
"syscall"
34+
"time"
3435

3536
"github.com/cilium/ebpf/perf"
3637
)
@@ -197,6 +198,17 @@ func cGroupWatcher(objs cgroupObjects) (*perf.Reader, error) {
197198
return
198199
}
199200

201+
// avoid tight-spinning on persistent errors
202+
time.Sleep(10 * time.Millisecond)
203+
204+
continue
205+
}
206+
207+
// Perf buffer overflowed: lost samples mean we may have missed
208+
// cgroup mkdir events, so rebuild the cache from the filesystem.
209+
if r.LostSamples > 0 {
210+
cgroupCacheRefresh(CGroupRootPath)
211+
200212
continue
201213
}
202214

cgroup_arm64_bpfel.o

0 Bytes
Binary file not shown.

cgroup_x86_bpfel.o

0 Bytes
Binary file not shown.

counter_arm64_bpfel.o

3.66 KB
Binary file not shown.

counter_x86_bpfel.o

3.67 KB
Binary file not shown.

output.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func bytesSort(stats []statEntry) {
9696
})
9797
}
9898

99-
// srcIPSort sorts a slice of statEntry objects by their SrcIP field in descending order.
99+
// srcIPSort sorts a slice of statEntry objects by their SrcIP field in ascending order.
100100
//
101101
// Parameters:
102102
//
@@ -107,7 +107,7 @@ func srcIPSort(stats []statEntry) {
107107
})
108108
}
109109

110-
// dstIPSort sorts a slice of statEntry objects by their DstIP field in descending order.
110+
// dstIPSort sorts a slice of statEntry objects by their DstIP field in ascending order.
111111
//
112112
// Parameters:
113113
//

probe.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ func startCGroupTrace(objs cgroupObjects, links []link.Link) []link.Link {
292292
return links
293293
}
294294

295+
if err != nil {
296+
log.Fatalf("Error checking RawTracepoint support: %v", err)
297+
}
298+
295299
l, err = link.AttachRawTracepoint(link.RawTracepointOptions{
296300
Program: objs.TraceCgroupMkdir,
297301
Name: "cgroup_mkdir",

0 commit comments

Comments
 (0)