@@ -182,14 +182,42 @@ process_ip6(struct ipv6hdr *ip6, void *data_end, statkey *key) {
182182 return NOK ;
183183 }
184184
185- // IPv6 copy of source IP, destination IP and transport protocol
185+ // IPv6 copy of source IP and destination IP
186186 key -> srcip = ip6 -> saddr ;
187187 key -> dstip = ip6 -> daddr ;
188- key -> proto = ip6 -> nexthdr ;
189188
190- switch (ip6 -> nexthdr ) {
189+ // Walk optional extension headers to find the actual transport protocol.
190+ // Variable-length extension headers (HopByHop, Routing, Destination):
191+ // total size = (hdr_ext_len + 1) * 8 bytes; next_header at offset 0.
192+ // Fragment header: fixed 8 bytes; next_header at offset 0.
193+ __u8 nexthdr = ip6 -> nexthdr ;
194+ void * transport = (void * )ip6 + sizeof (* ip6 );
195+
196+ #pragma unroll
197+ for (int i = 0 ; i < 6 ; i ++ ) {
198+ if (nexthdr != IPPROTO_HOPOPTS && nexthdr != IPPROTO_ROUTING &&
199+ nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_DSTOPTS ) {
200+ break ;
201+ }
202+ if (nexthdr == IPPROTO_FRAGMENT ) {
203+ if (transport + 8 > data_end )
204+ return NOK ;
205+ nexthdr = ((__u8 * )transport )[0 ];
206+ transport += 8 ;
207+ } else {
208+ if (transport + 2 > data_end )
209+ return NOK ;
210+ __u8 hdrlen = ((__u8 * )transport )[1 ];
211+ nexthdr = ((__u8 * )transport )[0 ];
212+ transport += ((__u32 )(hdrlen + 1 ) * 8 );
213+ }
214+ }
215+
216+ key -> proto = nexthdr ;
217+
218+ switch (nexthdr ) {
191219 case IPPROTO_TCP : {
192- struct tcphdr * tcp = ( void * ) ip6 + sizeof ( * ip6 ) ;
220+ struct tcphdr * tcp = transport ;
193221
194222 // validate TCP size
195223 if ((void * )tcp + sizeof (* tcp ) > data_end ) {
@@ -202,7 +230,7 @@ process_ip6(struct ipv6hdr *ip6, void *data_end, statkey *key) {
202230 break ;
203231 }
204232 case IPPROTO_UDP : {
205- struct udphdr * udp = ( void * ) ip6 + sizeof ( * ip6 ) ;
233+ struct udphdr * udp = transport ;
206234
207235 // validate UDP size
208236 if ((void * )udp + sizeof (* udp ) > data_end ) {
@@ -215,7 +243,7 @@ process_ip6(struct ipv6hdr *ip6, void *data_end, statkey *key) {
215243 break ;
216244 }
217245 case IPPROTO_ICMPV6 : {
218- struct icmp6hdr * icmp = ( void * ) ip6 + sizeof ( * ip6 ) ;
246+ struct icmp6hdr * icmp = transport ;
219247
220248 // validate ICMPv6 size
221249 if ((void * )icmp + sizeof (* icmp ) > data_end ) {
@@ -355,6 +383,8 @@ process_cgroup_skb(struct __sk_buff *skb) {
355383 __builtin_memcpy (key .comm , ski -> comm , sizeof (key .comm ));
356384 }
357385
386+ key .cgroupid = bpf_get_current_cgroup_id ();
387+
358388 statvalue * val = (statvalue * )bpf_map_lookup_elem (& pkt_count , & key );
359389 if (val ) {
360390 // atomic XADD, doesn't need bpf_spin_lock()
@@ -769,12 +799,12 @@ process_icmp6(struct sk_buff *skb, statkey *key, pid_t pid) {
769799 */
770800static inline __attribute__((always_inline )) size_t
771801process_udp_send (struct sk_buff * skb , statkey * key , pid_t pid ) {
802+ if (!process_udp_recv (false, skb , key , pid ))
803+ return 0 ;
804+
772805 struct udphdr * udphdr =
773806 (struct udphdr * )(BPF_CORE_READ (skb , head ) +
774807 BPF_CORE_READ (skb , transport_header ));
775-
776- if (!process_udp_recv (false, skb , key , pid ))
777- return 0 ;
778808 size_t msglen = bpf_ntohs (BPF_CORE_READ (udphdr , len ));
779809
780810 return msglen ;
@@ -1028,10 +1058,11 @@ int BPF_KPROBE(ip_send_skb, struct net *net, struct sk_buff *skb) {
10281058 */
10291059SEC ("kprobe/ip6_send_skb" )
10301060int BPF_KPROBE (ip6_send_skb , struct sk_buff * skb ) {
1031- struct ipv6hdr * ip6hdr =
1032- (struct ipv6hdr * )(BPF_CORE_READ (skb , head ) +
1033- BPF_CORE_READ (skb , network_header ));
1034- if (BPF_CORE_READ (ip6hdr , nexthdr ) != IPPROTO_UDP ) {
1061+ // Use sk_protocol from the socket rather than ip6hdr->nexthdr so that
1062+ // IPv6 packets with extension headers (where nexthdr != IPPROTO_UDP) are
1063+ // still accounted for correctly.
1064+ struct sock * sk = BPF_CORE_READ (skb , sk );
1065+ if (!sk || BPF_CORE_READ (sk , sk_protocol ) != IPPROTO_UDP ) {
10351066 return 0 ;
10361067 }
10371068
@@ -1120,7 +1151,10 @@ int BPF_KPROBE(__icmp_send, struct sk_buff *skb, __u8 type, __u8 code,
11201151
11211152 key .cgroupid = bpf_get_current_cgroup_id ();
11221153
1123- size_t msglen = process_icmp4 (skb , & key , pid );
1154+ /* process_icmp4 populates src/dst IPs from the triggering packet's IP
1155+ * header; its returned msglen (triggering payload) is not used here. */
1156+ process_icmp4 (skb , & key , pid );
1157+
11241158 /* __icmp_send is called with the triggering packet's skb, so src/dst IPs
11251159 * come from the original packet (src=sender, dst=us). Swap them to reflect
11261160 * the actual ICMP error direction (src=us, dst=original sender).
@@ -1131,6 +1165,20 @@ int BPF_KPROBE(__icmp_send, struct sk_buff *skb, __u8 type, __u8 code,
11311165 key .dstip = tmp_ip ;
11321166 key .src_port = type ;
11331167 key .dst_port = code ;
1168+
1169+ /* Compute the actual ICMP error response size per RFC 792 / RFC 1812:
1170+ * 8-byte ICMP header + original IP header + first 8 bytes of original data.
1171+ * Using the triggering packet's tot_len (as process_icmp4 did) over-counts
1172+ * by including the full original payload instead of just the 8-byte excerpt.
1173+ */
1174+ struct iphdr * iphdr = (struct iphdr * )(BPF_CORE_READ (skb , head ) +
1175+ BPF_CORE_READ (skb , network_header ));
1176+ __u16 tot_len = bpf_ntohs (BPF_CORE_READ (iphdr , tot_len ));
1177+ __u16 ihl_bytes = (__u16 )(BPF_CORE_READ_BITFIELD_PROBED (iphdr , ihl ) * 4 );
1178+ __u16 payload = (ihl_bytes <= tot_len ) ? (tot_len - ihl_bytes ) : 0 ;
1179+ size_t msglen =
1180+ sizeof (struct icmphdr ) + ihl_bytes + (payload > 8 ? 8 : payload );
1181+
11341182 update_val (& key , msglen );
11351183
11361184 return 0 ;
@@ -1165,7 +1213,10 @@ int BPF_KPROBE(icmp6_send, struct sk_buff *skb, __u8 type, __u8 code,
11651213
11661214 key .cgroupid = bpf_get_current_cgroup_id ();
11671215
1168- size_t msglen = process_icmp6 (skb , & key , pid );
1216+ /* process_icmp6 populates src/dst IPs from the triggering packet's IPv6
1217+ * header; its returned msglen (triggering payload_len) is not used here. */
1218+ process_icmp6 (skb , & key , pid );
1219+
11691220 /* icmp6_send is called with the triggering packet's skb, so src/dst IPs
11701221 * come from the original packet (src=sender, dst=us). Swap them to reflect
11711222 * the actual ICMPv6 error direction (src=us, dst=original sender).
@@ -1176,6 +1227,13 @@ int BPF_KPROBE(icmp6_send, struct sk_buff *skb, __u8 type, __u8 code,
11761227 key .dstip = tmp_ip ;
11771228 key .src_port = type ;
11781229 key .dst_port = code ;
1230+
1231+ /* Compute the actual ICMPv6 error response size per RFC 4443:
1232+ * 8-byte ICMPv6 header + fixed 40-byte IPv6 header + first 8 bytes of
1233+ * original data. The triggering payload_len (as process_icmp6 returned)
1234+ * over-counts by including the full original payload. */
1235+ size_t msglen = sizeof (struct icmp6hdr ) + sizeof (struct ipv6hdr ) + 8 ;
1236+
11791237 update_val (& key , msglen );
11801238
11811239 return 0 ;
@@ -1207,7 +1265,8 @@ int BPF_KPROBE(icmp_rcv, struct sk_buff *skb) {
12071265 key .cgroupid = bpf_get_current_cgroup_id ();
12081266
12091267 size_t msglen = process_icmp4 (skb , & key , pid );
1210- update_val (& key , msglen );
1268+ if (msglen > 0 )
1269+ update_val (& key , msglen );
12111270
12121271 return 0 ;
12131272}
0 commit comments