diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go index b15a230bb4..62a0a2cc53 100644 --- a/pkg/tcpip/transport/tcp/connect.go +++ b/pkg/tcpip/transport/tcp/connect.go @@ -1178,46 +1178,61 @@ func (e *Endpoint) drainClosingSegmentQueue() { } } +// handleReset processes an inbound segment carrying the RST flag. +// +// Acceptance follows RFC 5961 section 3.2: +// - If the segment sequence number is out of window, the segment is +// silently dropped. +// - If the segment sequence number is in window but not exactly equal +// to RCV.NXT, the implementation sends a challenge ACK and drops +// the segment. +// - Only an exact match against RCV.NXT causes the connection to be +// reset. +// +// This is stricter than RFC 793 page 37, which accepted any in-window RST. +// The strict-match rule defends against off-path blind RST injection. +// Linux has implemented it since version 3.6 (2012); see +// net/ipv4/tcp_input.c tcp_validate_incoming(). +// // +checklocks:e.mu func (e *Endpoint) handleReset(s *segment) (ok bool, err tcpip.Error) { - if e.rcv.acceptable(s.sequenceNumber, 0) { - // RFC 793, page 37 states that "in all states - // except SYN-SENT, all reset (RST) segments are - // validated by checking their SEQ-fields." So - // we only process it if it's acceptable. - switch e.EndpointState() { - // In case of a RST in CLOSE-WAIT linux moves - // the socket to closed state with an error set - // to indicate EPIPE. - // - // Technically this seems to be at odds w/ RFC. - // As per https://tools.ietf.org/html/rfc793#section-2.7 - // page 69 the behavior for a segment arriving - // w/ RST bit set in CLOSE-WAIT is inlined below. - // - // ESTABLISHED - // FIN-WAIT-1 - // FIN-WAIT-2 - // CLOSE-WAIT - - // If the RST bit is set then, any outstanding RECEIVEs and - // SEND should receive "reset" responses. All segment queues - // should be flushed. Users should also receive an unsolicited - // general "connection reset" signal. Enter the CLOSED state, - // delete the TCB, and return. - case StateCloseWait: - e.transitionToStateCloseLocked() - e.hardError = &tcpip.ErrAborted{} - return false, nil - default: - // RFC 793, page 37 states that "in all states - // except SYN-SENT, all reset (RST) segments are - // validated by checking their SEQ-fields." So - // we only process it if it's acceptable. - return false, &tcpip.ErrConnectionReset{} - } + if !e.rcv.acceptable(s.sequenceNumber, 0) { + // Out of window. Silent drop. + return true, nil + } + + if s.sequenceNumber != e.rcv.RcvNxt { + // In window but not an exact match. Send a challenge ACK and drop the + // segment per RFC 5961 section 3.2. The challenge ACK helper rate-limits + // challenge transmission per RFC 5961 section 7. + e.snd.maybeSendOutOfWindowAck(s) + return true, nil + } + + switch e.EndpointState() { + // In case of a RST in CLOSE-WAIT linux moves the socket to closed state + // with an error set to indicate EPIPE. + // + // As per https://tools.ietf.org/html/rfc793#section-2.7 page 69 the + // behavior for a segment arriving w/ RST bit set in CLOSE-WAIT is + // inlined below. + // + // ESTABLISHED + // FIN-WAIT-1 + // FIN-WAIT-2 + // CLOSE-WAIT + // + // If the RST bit is set then, any outstanding RECEIVEs and SEND should + // receive "reset" responses. All segment queues should be flushed. + // Users should also receive an unsolicited general "connection reset" + // signal. Enter the CLOSED state, delete the TCB, and return. + case StateCloseWait: + e.transitionToStateCloseLocked() + e.hardError = &tcpip.ErrAborted{} + return false, nil + default: + return false, &tcpip.ErrConnectionReset{} } - return true, nil } // handleSegments processes all inbound segments. diff --git a/pkg/tcpip/transport/tcp/test/e2e/tcp_test.go b/pkg/tcpip/transport/tcp/test/e2e/tcp_test.go index f61332bc4e..8616905b82 100644 --- a/pkg/tcpip/transport/tcp/test/e2e/tcp_test.go +++ b/pkg/tcpip/transport/tcp/test/e2e/tcp_test.go @@ -9665,6 +9665,131 @@ func TestCloseInSynRecvWithLinger(t *testing.T) { } } +// TestRSTOutOfWindowIsDropped verifies that an RST whose sequence +// number falls outside the advertised receive window is silently dropped. +// Pre-RFC-5961 behavior (RFC 793 page 37) already silently dropped these, +// so this asserts the unchanged path. +func TestRSTOutOfWindowIsDropped(t *testing.T) { + c := context.New(t, e2e.DefaultMTU) + defer c.Cleanup() + + c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */) + + // Send a RST far outside the receive window. + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagRst, + SeqNum: seqnum.Value(context.TestInitialSequenceNumber).Add(1 << 30), + RcvWnd: 30000, + }) + + // Allow the stack to process the packet, then assert no abort. + time.Sleep(50 * time.Millisecond) + + // Read must NOT return ErrConnectionReset. + _, err := c.EP.Read(io.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrConnectionReset); ok { + t.Fatalf("connection aborted on out-of-window RST") + } + + if got, want := tcp.EndpointState(c.EP.State()), tcp.StateEstablished; got != want { + t.Errorf("endpoint state = %v, want %v", got, want) + } +} + +// TestRSTInWindowNotExactSendsChallengeAck verifies that an in-window RST +// whose sequence number does not exactly match RCV.NXT does NOT abort the +// connection and triggers a challenge ACK per RFC 5961 section 3.2. This +// is the security-relevant behavior change introduced by the patch: +// pre-patch, such a RST would have aborted the connection (RFC 793 +// window-test). +func TestRSTInWindowNotExactSendsChallengeAck(t *testing.T) { + c := context.New(t, e2e.DefaultMTU) + defer c.Cleanup() + + c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */) + + rcvNxt := seqnum.Value(context.TestInitialSequenceNumber).Add(1) + // Pick a sequence number inside the advertised window but not equal to + // RCV.NXT. + offSeq := rcvNxt.Add(1024) + + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagRst, + SeqNum: offSeq, + RcvWnd: 30000, + }) + + // We expect a challenge ACK to be emitted in response. + v := c.GetPacket() + defer v.Release() + checker.IPv4(t, v, + checker.TCP( + checker.DstPort(context.TestPort), + checker.TCPFlags(header.TCPFlagAck), + checker.TCPSeqNum(uint32(c.IRS)+1), + checker.TCPAckNum(uint32(rcvNxt)), + ), + ) + + // Read must NOT return ErrConnectionReset. + _, err := c.EP.Read(io.Discard, tcpip.ReadOptions{}) + if _, ok := err.(*tcpip.ErrConnectionReset); ok { + t.Fatalf("connection aborted on in-window non-exact RST (RFC 5961 strict-match rule violated)") + } + + if got, want := tcp.EndpointState(c.EP.State()), tcp.StateEstablished; got != want { + t.Errorf("endpoint state = %v, want %v", got, want) + } +} + +// TestRSTExactMatchAbortsConnection verifies that an RST whose sequence +// number equals RCV.NXT does abort the connection per RFC 5961 section 3.2. +// This preserves the existing behavior for legitimate RSTs. +func TestRSTExactMatchAbortsConnection(t *testing.T) { + c := context.New(t, e2e.DefaultMTU) + defer c.Cleanup() + + c.CreateConnected(context.TestInitialSequenceNumber, 30000, -1 /* epRcvBuf */) + + rcvNxt := seqnum.Value(context.TestInitialSequenceNumber).Add(1) + + c.SendPacket(nil, &context.Headers{ + SrcPort: context.TestPort, + DstPort: c.Port, + Flags: header.TCPFlagRst, + SeqNum: rcvNxt, + RcvWnd: 30000, + }) + + // Wait for the readable event signaling the connection went into the + // error state. + we, ch := waiter.NewChannelEntry(waiter.ReadableEvents) + c.WQ.EventRegister(&we) + defer c.WQ.EventUnregister(&we) + + for { + _, err := c.EP.Read(io.Discard, tcpip.ReadOptions{}) + switch err.(type) { + case *tcpip.ErrWouldBlock: + select { + case <-ch: + // Loop back and Read again to surface the hard error. + continue + case <-time.After(2 * time.Second): + t.Fatalf("connection did not abort on exact-match RST within 2s") + } + case *tcpip.ErrConnectionReset: + return + default: + t.Fatalf("c.EP.Read after exact-match RST: err = %v, want ErrConnectionReset", err) + } + } +} + func TestMain(m *testing.M) { refs.SetLeakMode(refs.LeaksPanic) code := m.Run()