tcp_output.c 105 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Mark Evans, <evansmp@uhura.aston.ac.uk>
 *		Corey Minyard <wf-rch!minyard@relay.EU.net>
 *		Florian La Roche, <flla@stud.uni-sb.de>
 *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 *		Linus Torvalds, <torvalds@cs.helsinki.fi>
 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
 *		Matthew Dillon, <dillon@apollo.west.oic.com>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *		Jorge Cwik, <jorge@laser.satlink.net>
 */

/*
 * Changes:	Pedro Roque	:	Retransmit queue handled by TCP.
 *				:	Fragmentation on mtu decrease
 *				:	Segment collapse on retransmit
 *				:	AF independence
 *
 *		Linus Torvalds	:	send_delayed_ack
 *		David S. Miller	:	Charge memory using the right skb
 *					during syn/ack processing.
 *		David S. Miller :	Output engine completely rewritten.
 *		Andrea Arcangeli:	SYNACK carry ts_recent in tsecr.
 *		Cacophonix Gaul :	draft-minshall-nagle-01
 *		J Hadi Salim	:	ECN support
 *
 */

37 38
#define pr_fmt(fmt) "TCP: " fmt

Linus Torvalds's avatar
Linus Torvalds committed
39 40 41
#include <net/tcp.h>

#include <linux/compiler.h>
42
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
43 44 45
#include <linux/module.h>

/* People can turn this off for buggy TCP's found in printers etc. */
46
int sysctl_tcp_retrans_collapse __read_mostly = 1;
Linus Torvalds's avatar
Linus Torvalds committed
47

48
/* People can turn this on to work with those rare, broken TCPs that
49 50
 * interpret the window field as a signed quantity.
 */
51
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52

53 54
/* Default TSQ limit of four TSO segments */
int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
Eric Dumazet's avatar
Eric Dumazet committed
55

Linus Torvalds's avatar
Linus Torvalds committed
56 57 58 59
/* This limits the percentage of the congestion window which we
 * will allow a single TSO frame to consume.  Building TSO frames
 * which are too large can cause TCP streams to be bursty.
 */
60
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
Linus Torvalds's avatar
Linus Torvalds committed
61

62
/* By default, RFC2861 behavior.  */
63
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64

Eric Dumazet's avatar
Eric Dumazet committed
65 66
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
			   int push_one, gfp_t gfp);
67

68
/* Account for new data that has been sent to the network. */
69
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
70
{
Nandita Dukkipati's avatar
Nandita Dukkipati committed
71
	struct inet_connection_sock *icsk = inet_csk(sk);
72
	struct tcp_sock *tp = tcp_sk(sk);
73
	unsigned int prior_packets = tp->packets_out;
74

75
	tcp_advance_send_head(sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
76
	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
77

78
	tp->packets_out += tcp_skb_pcount(skb);
Yuchung Cheng's avatar
Yuchung Cheng committed
79
	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
80
		tcp_rearm_rto(sk);
81

82 83
	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
		      tcp_skb_pcount(skb));
Linus Torvalds's avatar
Linus Torvalds committed
84 85
}

86 87
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
 * window scaling factor due to loss of precision.
Linus Torvalds's avatar
Linus Torvalds committed
88 89 90 91 92
 * If window has been shrunk, what should we make? It is not clear at all.
 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
 * invalid. OK, let's make this for now:
 */
93
static inline __u32 tcp_acceptable_seq(const struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
94
{
95
	const struct tcp_sock *tp = tcp_sk(sk);
96

97 98 99
	if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
	    (tp->rx_opt.wscale_ok &&
	     ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
Linus Torvalds's avatar
Linus Torvalds committed
100 101
		return tp->snd_nxt;
	else
102
		return tcp_wnd_end(tp);
Linus Torvalds's avatar
Linus Torvalds committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
}

/* Calculate mss to advertise in SYN segment.
 * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
 *
 * 1. It is independent of path mtu.
 * 2. Ideally, it is maximal possible segment size i.e. 65535-40.
 * 3. For IPv4 it is reasonable to calculate it from maximal MTU of
 *    attached devices, because some buggy hosts are confused by
 *    large MSS.
 * 4. We do not make 3, we advertise MSS, calculated from first
 *    hop device mtu, but allow to raise it to ip_rt_min_advmss.
 *    This may be overridden via information stored in routing table.
 * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
 *    probably even Jumbo".
 */
static __u16 tcp_advertise_mss(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
122
	const struct dst_entry *dst = __sk_dst_get(sk);
Linus Torvalds's avatar
Linus Torvalds committed
123 124
	int mss = tp->advmss;

125 126 127 128 129 130 131
	if (dst) {
		unsigned int metric = dst_metric_advmss(dst);

		if (metric < mss) {
			mss = metric;
			tp->advmss = mss;
		}
Linus Torvalds's avatar
Linus Torvalds committed
132 133 134 135 136 137
	}

	return (__u16)mss;
}

/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
138 139 140
 * This is the first part of cwnd validation mechanism.
 */
void tcp_cwnd_restart(struct sock *sk, s32 delta)
Linus Torvalds's avatar
Linus Torvalds committed
141
{
142
	struct tcp_sock *tp = tcp_sk(sk);
143
	u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
Linus Torvalds's avatar
Linus Torvalds committed
144 145
	u32 cwnd = tp->snd_cwnd;

146
	tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
Linus Torvalds's avatar
Linus Torvalds committed
147

148
	tp->snd_ssthresh = tcp_current_ssthresh(sk);
Linus Torvalds's avatar
Linus Torvalds committed
149 150
	restart_cwnd = min(restart_cwnd, cwnd);

151
	while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
Linus Torvalds's avatar
Linus Torvalds committed
152 153 154 155 156 157
		cwnd >>= 1;
	tp->snd_cwnd = max(cwnd, restart_cwnd);
	tp->snd_cwnd_stamp = tcp_time_stamp;
	tp->snd_cwnd_used = 0;
}

158
/* Congestion state accounting after a packet has been sent. */
Stephen Hemminger's avatar
Stephen Hemminger committed
159
static void tcp_event_data_sent(struct tcp_sock *tp,
160
				struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
161
{
162 163
	struct inet_connection_sock *icsk = inet_csk(sk);
	const u32 now = tcp_time_stamp;
Linus Torvalds's avatar
Linus Torvalds committed
164

165 166 167
	if (tcp_packets_in_flight(tp) == 0)
		tcp_ca_event(sk, CA_EVENT_TX_START);

Linus Torvalds's avatar
Linus Torvalds committed
168 169 170 171 172
	tp->lsndtime = now;

	/* If it is a reply for ato after last received
	 * packet, enter pingpong mode.
	 */
173 174
	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
		icsk->icsk_ack.pingpong = 1;
Linus Torvalds's avatar
Linus Torvalds committed
175 176
}

177
/* Account for an ACK we sent. */
Stephen Hemminger's avatar
Stephen Hemminger committed
178
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
Linus Torvalds's avatar
Linus Torvalds committed
179
{
180 181
	tcp_dec_quickack_mode(sk, pkts);
	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
Linus Torvalds's avatar
Linus Torvalds committed
182 183
}

184 185 186 187

u32 tcp_default_init_rwnd(u32 mss)
{
	/* Initial receive window should be twice of TCP_INIT_CWND to
Weiping Pan's avatar
Weiping Pan committed
188
	 * enable proper sending of new unsent data during fast recovery
189 190 191 192 193 194 195 196 197 198
	 * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
	 * limit when mss is larger than 1460.
	 */
	u32 init_rwnd = TCP_INIT_CWND * 2;

	if (mss > 1460)
		init_rwnd = max((1460 * init_rwnd) / mss, 2U);
	return init_rwnd;
}

Linus Torvalds's avatar
Linus Torvalds committed
199 200 201 202 203 204 205 206 207
/* Determine a window scaling and initial window to offer.
 * Based on the assumption that the given amount of space
 * will be offered. Store the results in the tp structure.
 * NOTE: for smooth operation initial space offering should
 * be a multiple of mss if possible. We assume here that mss >= 1.
 * This MUST be enforced by all callers.
 */
void tcp_select_initial_window(int __space, __u32 mss,
			       __u32 *rcv_wnd, __u32 *window_clamp,
208 209
			       int wscale_ok, __u8 *rcv_wscale,
			       __u32 init_rcv_wnd)
Linus Torvalds's avatar
Linus Torvalds committed
210 211 212 213 214 215 216 217 218 219 220 221 222
{
	unsigned int space = (__space < 0 ? 0 : __space);

	/* If no clamp set the clamp to the max possible scaled window */
	if (*window_clamp == 0)
		(*window_clamp) = (65535 << 14);
	space = min(*window_clamp, space);

	/* Quantize space offering to a multiple of mss if possible. */
	if (space > mss)
		space = (space / mss) * mss;

	/* NOTE: offering an initial window larger than 32767
223 224 225 226 227 228
	 * will break some buggy TCP stacks. If the admin tells us
	 * it is likely we could be speaking with such a buggy stack
	 * we will truncate our initial window offering to 32K-1
	 * unless the remote has sent us a window scaling option,
	 * which we interpret as a sign the remote TCP is not
	 * misinterpreting the window field as a signed quantity.
Linus Torvalds's avatar
Linus Torvalds committed
229
	 */
230 231 232 233 234
	if (sysctl_tcp_workaround_signed_windows)
		(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
	else
		(*rcv_wnd) = space;

Linus Torvalds's avatar
Linus Torvalds committed
235 236 237
	(*rcv_wscale) = 0;
	if (wscale_ok) {
		/* Set window scaling on max possible window
238
		 * See RFC1323 for an explanation of the limit to 14
Linus Torvalds's avatar
Linus Torvalds committed
239
		 */
240 241
		space = max_t(u32, space, sysctl_tcp_rmem[2]);
		space = max_t(u32, space, sysctl_rmem_max);
242
		space = min_t(u32, space, *window_clamp);
Linus Torvalds's avatar
Linus Torvalds committed
243 244 245 246 247 248
		while (space > 65535 && (*rcv_wscale) < 14) {
			space >>= 1;
			(*rcv_wscale)++;
		}
	}

249
	if (mss > (1 << *rcv_wscale)) {
250 251 252
		if (!init_rcv_wnd) /* Use default unless specified otherwise */
			init_rcv_wnd = tcp_default_init_rwnd(mss);
		*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
Linus Torvalds's avatar
Linus Torvalds committed
253 254 255 256 257
	}

	/* Set the clamp no higher than max representable value */
	(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
258
EXPORT_SYMBOL(tcp_select_initial_window);
Linus Torvalds's avatar
Linus Torvalds committed
259 260 261 262 263 264

/* Chose a new window to advertise, update state in tcp_sock for the
 * socket, and return result with RFC1323 scaling applied.  The return
 * value can be stuffed directly into th->window for an outgoing
 * frame.
 */
Stephen Hemminger's avatar
Stephen Hemminger committed
265
static u16 tcp_select_window(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
266 267
{
	struct tcp_sock *tp = tcp_sk(sk);
268
	u32 old_win = tp->rcv_wnd;
Linus Torvalds's avatar
Linus Torvalds committed
269 270 271 272
	u32 cur_win = tcp_receive_window(tp);
	u32 new_win = __tcp_select_window(sk);

	/* Never shrink the offered window */
Stephen Hemminger's avatar
Stephen Hemminger committed
273
	if (new_win < cur_win) {
Linus Torvalds's avatar
Linus Torvalds committed
274 275 276 277 278 279 280
		/* Danger Will Robinson!
		 * Don't update rcv_wup/rcv_wnd here or else
		 * we will not be able to advertise a zero
		 * window in time.  --DaveM
		 *
		 * Relax Will Robinson.
		 */
281 282 283
		if (new_win == 0)
			NET_INC_STATS(sock_net(sk),
				      LINUX_MIB_TCPWANTZEROWINDOWADV);
284
		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
Linus Torvalds's avatar
Linus Torvalds committed
285 286 287 288 289 290 291
	}
	tp->rcv_wnd = new_win;
	tp->rcv_wup = tp->rcv_nxt;

	/* Make sure we do not exceed the maximum possible
	 * scaled window.
	 */
292
	if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
Linus Torvalds's avatar
Linus Torvalds committed
293 294 295 296 297 298 299 300
		new_win = min(new_win, MAX_TCP_WINDOW);
	else
		new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));

	/* RFC1323 scaling applied */
	new_win >>= tp->rx_opt.rcv_wscale;

	/* If we advertise zero window, disable fast path. */
301
	if (new_win == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
302
		tp->pred_flags = 0;
303 304 305 306 307 308
		if (old_win)
			NET_INC_STATS(sock_net(sk),
				      LINUX_MIB_TCPTOZEROWINDOWADV);
	} else if (old_win == 0) {
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
	}
Linus Torvalds's avatar
Linus Torvalds committed
309 310 311 312

	return new_win;
}

313
/* Packet ECN state for a SYN-ACK */
314
static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
315
{
316 317
	const struct tcp_sock *tp = tcp_sk(sk);

Eric Dumazet's avatar
Eric Dumazet committed
318
	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
319
	if (!(tp->ecn_flags & TCP_ECN_OK))
Eric Dumazet's avatar
Eric Dumazet committed
320
		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
321 322
	else if (tcp_ca_needs_ecn(sk))
		INET_ECN_xmit(sk);
323 324
}

325
/* Packet ECN state for a SYN.  */
326
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
327 328
{
	struct tcp_sock *tp = tcp_sk(sk);
329 330 331 332 333 334 335 336 337
	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
		       tcp_ca_needs_ecn(sk);

	if (!use_ecn) {
		const struct dst_entry *dst = __sk_dst_get(sk);

		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
			use_ecn = true;
	}
338 339

	tp->ecn_flags = 0;
340 341

	if (use_ecn) {
Eric Dumazet's avatar
Eric Dumazet committed
342
		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
343
		tp->ecn_flags = TCP_ECN_OK;
344 345
		if (tcp_ca_needs_ecn(sk))
			INET_ECN_xmit(sk);
346 347 348
	}
}

349 350 351 352 353 354 355 356 357
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{
	if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
		/* tp->ecn_flags are cleared at a later point in time when
		 * SYN ACK is ultimatively being received.
		 */
		TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
}

358
static void
359
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
360
{
361
	if (inet_rsk(req)->ecn_ok)
362 363 364
		th->ece = 1;
}

365 366 367
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
 * be sent.
 */
368
static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
369
			 struct tcphdr *th, int tcp_header_len)
370 371 372 373 374 375 376 377
{
	struct tcp_sock *tp = tcp_sk(sk);

	if (tp->ecn_flags & TCP_ECN_OK) {
		/* Not-retransmitted data segment: set ECT and inject CWR. */
		if (skb->len != tcp_header_len &&
		    !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
			INET_ECN_xmit(sk);
378
			if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
379
				tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
380
				th->cwr = 1;
381 382
				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
			}
383
		} else if (!tcp_ca_needs_ecn(sk)) {
384 385 386 387
			/* ACK or retransmitted segment: clear ECT|CE */
			INET_ECN_dontxmit(sk);
		}
		if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
388
			th->ece = 1;
389 390 391
	}
}

392 393 394 395 396
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
 * auto increment end seqno.
 */
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
397
	skb->ip_summed = CHECKSUM_PARTIAL;
398 399
	skb->csum = 0;

Eric Dumazet's avatar
Eric Dumazet committed
400
	TCP_SKB_CB(skb)->tcp_flags = flags;
401 402
	TCP_SKB_CB(skb)->sacked = 0;

403
	tcp_skb_pcount_set(skb, 1);
404 405

	TCP_SKB_CB(skb)->seq = seq;
Changli Gao's avatar
Changli Gao committed
406
	if (flags & (TCPHDR_SYN | TCPHDR_FIN))
407 408 409 410
		seq++;
	TCP_SKB_CB(skb)->end_seq = seq;
}

Eric Dumazet's avatar
Eric Dumazet committed
411
static inline bool tcp_urg_mode(const struct tcp_sock *tp)
Ilpo Järvinen's avatar
Ilpo Järvinen committed
412 413 414 415
{
	return tp->snd_una != tp->snd_up;
}

Adam Langley's avatar
Adam Langley committed
416 417 418
#define OPTION_SACK_ADVERTISE	(1 << 0)
#define OPTION_TS		(1 << 1)
#define OPTION_MD5		(1 << 2)
419
#define OPTION_WSCALE		(1 << 3)
Yuchung Cheng's avatar
Yuchung Cheng committed
420
#define OPTION_FAST_OPEN_COOKIE	(1 << 8)
Adam Langley's avatar
Adam Langley committed
421 422

struct tcp_out_options {
Yuchung Cheng's avatar
Yuchung Cheng committed
423 424
	u16 options;		/* bit field of OPTION_* */
	u16 mss;		/* 0 to disable */
Adam Langley's avatar
Adam Langley committed
425 426
	u8 ws;			/* window scale, 0 to disable */
	u8 num_sack_blocks;	/* number of SACK blocks to include */
427 428
	u8 hash_size;		/* bytes in hash_location */
	__u8 *hash_location;	/* temporary pointer, overloaded */
Yuchung Cheng's avatar
Yuchung Cheng committed
429 430
	__u32 tsval, tsecr;	/* need to include OPTION_TS */
	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
Adam Langley's avatar
Adam Langley committed
431 432
};

433 434 435
/* Write previously computed TCP options to the packet.
 *
 * Beware: Something in the Internet is very sensitive to the ordering of
436 437
 * TCP options, we learned this through the hard way, so be careful here.
 * Luckily we can at least blame others for their non-compliance but from
stephen hemminger's avatar
stephen hemminger committed
438
 * inter-operability perspective it seems that we're somewhat stuck with
439 440 441 442 443 444 445
 * the ordering which we have been using if we want to keep working with
 * those broken things (not that it currently hurts anybody as there isn't
 * particular reason why the ordering would need to be changed).
 *
 * At least SACK_PERM as the first option is known to lead to a disaster
 * (but it may well be that other scenarios fail similarly).
 */
Adam Langley's avatar
Adam Langley committed
446
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
447 448
			      struct tcp_out_options *opts)
{
Yuchung Cheng's avatar
Yuchung Cheng committed
449
	u16 options = opts->options;	/* mungable copy */
450 451

	if (unlikely(OPTION_MD5 & options)) {
Christoph Paasch's avatar
Christoph Paasch committed
452 453
		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
			       (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
454 455
		/* overload cookie hash location */
		opts->hash_location = (__u8 *)ptr;
Adam Langley's avatar
Adam Langley committed
456
		ptr += 4;
Stephen Hemminger's avatar
Stephen Hemminger committed
457
	}
Adam Langley's avatar
Adam Langley committed
458

459 460 461 462 463 464
	if (unlikely(opts->mss)) {
		*ptr++ = htonl((TCPOPT_MSS << 24) |
			       (TCPOLEN_MSS << 16) |
			       opts->mss);
	}

465 466
	if (likely(OPTION_TS & options)) {
		if (unlikely(OPTION_SACK_ADVERTISE & options)) {
Adam Langley's avatar
Adam Langley committed
467 468 469 470
			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
				       (TCPOLEN_SACK_PERM << 16) |
				       (TCPOPT_TIMESTAMP << 8) |
				       TCPOLEN_TIMESTAMP);
471
			options &= ~OPTION_SACK_ADVERTISE;
Adam Langley's avatar
Adam Langley committed
472 473 474 475 476 477 478 479 480 481
		} else {
			*ptr++ = htonl((TCPOPT_NOP << 24) |
				       (TCPOPT_NOP << 16) |
				       (TCPOPT_TIMESTAMP << 8) |
				       TCPOLEN_TIMESTAMP);
		}
		*ptr++ = htonl(opts->tsval);
		*ptr++ = htonl(opts->tsecr);
	}

482
	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
Adam Langley's avatar
Adam Langley committed
483 484 485 486 487 488
		*ptr++ = htonl((TCPOPT_NOP << 24) |
			       (TCPOPT_NOP << 16) |
			       (TCPOPT_SACK_PERM << 8) |
			       TCPOLEN_SACK_PERM);
	}

489
	if (unlikely(OPTION_WSCALE & options)) {
Adam Langley's avatar
Adam Langley committed
490 491 492 493 494 495 496 497 498
		*ptr++ = htonl((TCPOPT_NOP << 24) |
			       (TCPOPT_WINDOW << 16) |
			       (TCPOLEN_WINDOW << 8) |
			       opts->ws);
	}

	if (unlikely(opts->num_sack_blocks)) {
		struct tcp_sack_block *sp = tp->rx_opt.dsack ?
			tp->duplicate_sack : tp->selective_acks;
Stephen Hemminger's avatar
Stephen Hemminger committed
499 500 501 502 503
		int this_sack;

		*ptr++ = htonl((TCPOPT_NOP  << 24) |
			       (TCPOPT_NOP  << 16) |
			       (TCPOPT_SACK <<  8) |
Adam Langley's avatar
Adam Langley committed
504
			       (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
Stephen Hemminger's avatar
Stephen Hemminger committed
505
						     TCPOLEN_SACK_PERBLOCK)));
Stephen Hemminger's avatar
Stephen Hemminger committed
506

Adam Langley's avatar
Adam Langley committed
507 508
		for (this_sack = 0; this_sack < opts->num_sack_blocks;
		     ++this_sack) {
Stephen Hemminger's avatar
Stephen Hemminger committed
509 510 511
			*ptr++ = htonl(sp[this_sack].start_seq);
			*ptr++ = htonl(sp[this_sack].end_seq);
		}
Stephen Hemminger's avatar
Stephen Hemminger committed
512

513
		tp->rx_opt.dsack = 0;
Stephen Hemminger's avatar
Stephen Hemminger committed
514
	}
Yuchung Cheng's avatar
Yuchung Cheng committed
515 516 517

	if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
		struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
518 519 520 521 522 523 524 525 526 527 528 529 530
		u8 *p = (u8 *)ptr;
		u32 len; /* Fast Open option length */

		if (foc->exp) {
			len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
			*ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
				     TCPOPT_FASTOPEN_MAGIC);
			p += TCPOLEN_EXP_FASTOPEN_BASE;
		} else {
			len = TCPOLEN_FASTOPEN_BASE + foc->len;
			*p++ = TCPOPT_FASTOPEN;
			*p++ = len;
		}
Yuchung Cheng's avatar
Yuchung Cheng committed
531

532 533 534 535
		memcpy(p, foc->val, foc->len);
		if ((len & 3) == 2) {
			p[foc->len] = TCPOPT_NOP;
			p[foc->len + 1] = TCPOPT_NOP;
Yuchung Cheng's avatar
Yuchung Cheng committed
536
		}
537
		ptr += (len + 3) >> 2;
Yuchung Cheng's avatar
Yuchung Cheng committed
538
	}
Adam Langley's avatar
Adam Langley committed
539 540
}

541 542 543
/* Compute TCP options for SYN packets. This is not the final
 * network wire format yet.
 */
544
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
Adam Langley's avatar
Adam Langley committed
545
				struct tcp_out_options *opts,
546 547
				struct tcp_md5sig_key **md5)
{
Adam Langley's avatar
Adam Langley committed
548
	struct tcp_sock *tp = tcp_sk(sk);
549
	unsigned int remaining = MAX_TCP_OPTION_SPACE;
550
	struct tcp_fastopen_request *fastopen = tp->fastopen_req;
Adam Langley's avatar
Adam Langley committed
551

552
#ifdef CONFIG_TCP_MD5SIG
Adam Langley's avatar
Adam Langley committed
553 554 555
	*md5 = tp->af_specific->md5_lookup(sk, sk);
	if (*md5) {
		opts->options |= OPTION_MD5;
556
		remaining -= TCPOLEN_MD5SIG_ALIGNED;
557
	}
Adam Langley's avatar
Adam Langley committed
558 559
#else
	*md5 = NULL;
560
#endif
Adam Langley's avatar
Adam Langley committed
561 562 563 564 565 566 567 568 569 570 571

	/* We always get an MSS option.  The option bytes which will be seen in
	 * normal data packets should timestamps be used, must be in the MSS
	 * advertised.  But we subtract them from tp->mss_cache so that
	 * calculations in tcp_sendmsg are simpler etc.  So account for this
	 * fact here if necessary.  If we don't do this correctly, as a
	 * receiver we won't recognize data packets as being full sized when we
	 * should, and thus we won't abide by the delayed ACK rules correctly.
	 * SACKs don't matter, we never delay an ACK when we have any of those
	 * going out.  */
	opts->mss = tcp_advertise_mss(sk);
572
	remaining -= TCPOLEN_MSS_ALIGNED;
Adam Langley's avatar
Adam Langley committed
573

574
	if (likely(sysctl_tcp_timestamps && !*md5)) {
Adam Langley's avatar
Adam Langley committed
575
		opts->options |= OPTION_TS;
576
		opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
Adam Langley's avatar
Adam Langley committed
577
		opts->tsecr = tp->rx_opt.ts_recent;
578
		remaining -= TCPOLEN_TSTAMP_ALIGNED;
Adam Langley's avatar
Adam Langley committed
579
	}
580
	if (likely(sysctl_tcp_window_scaling)) {
Adam Langley's avatar
Adam Langley committed
581
		opts->ws = tp->rx_opt.rcv_wscale;
582
		opts->options |= OPTION_WSCALE;
583
		remaining -= TCPOLEN_WSCALE_ALIGNED;
Adam Langley's avatar
Adam Langley committed
584
	}
585
	if (likely(sysctl_tcp_sack)) {
Adam Langley's avatar
Adam Langley committed
586
		opts->options |= OPTION_SACK_ADVERTISE;
587
		if (unlikely(!(OPTION_TS & opts->options)))
588
			remaining -= TCPOLEN_SACKPERM_ALIGNED;
Adam Langley's avatar
Adam Langley committed
589 590
	}

591
	if (fastopen && fastopen->cookie.len >= 0) {
592 593 594 595
		u32 need = fastopen->cookie.len;

		need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
					       TCPOLEN_FASTOPEN_BASE;
596 597 598 599 600 601
		need = (need + 3) & ~3U;  /* Align to 32 bits */
		if (remaining >= need) {
			opts->options |= OPTION_FAST_OPEN_COOKIE;
			opts->fastopen_cookie = &fastopen->cookie;
			remaining -= need;
			tp->syn_fastopen = 1;
602
			tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
603 604
		}
	}
605 606

	return MAX_TCP_OPTION_SPACE - remaining;
Stephen Hemminger's avatar
Stephen Hemminger committed
607 608
}

609
/* Set up TCP options for SYN-ACKs. */
610 611 612 613 614
static unsigned int tcp_synack_options(struct request_sock *req,
				       unsigned int mss, struct sk_buff *skb,
				       struct tcp_out_options *opts,
				       const struct tcp_md5sig_key *md5,
				       struct tcp_fastopen_cookie *foc)
615
{
Adam Langley's avatar
Adam Langley committed
616
	struct inet_request_sock *ireq = inet_rsk(req);
617
	unsigned int remaining = MAX_TCP_OPTION_SPACE;
Adam Langley's avatar
Adam Langley committed
618

619
#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet's avatar
Eric Dumazet committed
620
	if (md5) {
Adam Langley's avatar
Adam Langley committed
621
		opts->options |= OPTION_MD5;
622 623 624 625 626 627 628
		remaining -= TCPOLEN_MD5SIG_ALIGNED;

		/* We can't fit any SACK blocks in a packet with MD5 + TS
		 * options. There was discussion about disabling SACK
		 * rather than TS in order to fit in better with old,
		 * buggy kernels, but that was deemed to be unnecessary.
		 */
Eric Dumazet's avatar
Eric Dumazet committed
629
		ireq->tstamp_ok &= !ireq->sack_ok;
630 631
	}
#endif
Adam Langley's avatar
Adam Langley committed
632

633
	/* We always send an MSS option. */
Adam Langley's avatar
Adam Langley committed
634
	opts->mss = mss;
635
	remaining -= TCPOLEN_MSS_ALIGNED;
Adam Langley's avatar
Adam Langley committed
636 637 638

	if (likely(ireq->wscale_ok)) {
		opts->ws = ireq->rcv_wscale;
639
		opts->options |= OPTION_WSCALE;
640
		remaining -= TCPOLEN_WSCALE_ALIGNED;
Adam Langley's avatar
Adam Langley committed
641
	}
Eric Dumazet's avatar
Eric Dumazet committed
642
	if (likely(ireq->tstamp_ok)) {
Adam Langley's avatar
Adam Langley committed
643
		opts->options |= OPTION_TS;
644
		opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
Adam Langley's avatar
Adam Langley committed
645
		opts->tsecr = req->ts_recent;
646
		remaining -= TCPOLEN_TSTAMP_ALIGNED;
Adam Langley's avatar
Adam Langley committed
647 648 649
	}
	if (likely(ireq->sack_ok)) {
		opts->options |= OPTION_SACK_ADVERTISE;
Eric Dumazet's avatar
Eric Dumazet committed
650
		if (unlikely(!ireq->tstamp_ok))
651
			remaining -= TCPOLEN_SACKPERM_ALIGNED;
Adam Langley's avatar
Adam Langley committed
652
	}
653 654 655 656 657
	if (foc != NULL && foc->len >= 0) {
		u32 need = foc->len;

		need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
				   TCPOLEN_FASTOPEN_BASE;
658 659 660 661 662 663 664
		need = (need + 3) & ~3U;  /* Align to 32 bits */
		if (remaining >= need) {
			opts->options |= OPTION_FAST_OPEN_COOKIE;
			opts->fastopen_cookie = foc;
			remaining -= need;
		}
	}
Christoph Paasch's avatar
Christoph Paasch committed
665

666
	return MAX_TCP_OPTION_SPACE - remaining;
Adam Langley's avatar
Adam Langley committed
667 668
}

669 670 671
/* Compute TCP options for ESTABLISHED sockets. This is not the
 * final wire format yet.
 */
672
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
Adam Langley's avatar
Adam Langley committed
673
					struct tcp_out_options *opts,
674 675
					struct tcp_md5sig_key **md5)
{
Adam Langley's avatar
Adam Langley committed
676
	struct tcp_sock *tp = tcp_sk(sk);
677
	unsigned int size = 0;
678
	unsigned int eff_sacks;
Adam Langley's avatar
Adam Langley committed
679

680 681
	opts->options = 0;

Adam Langley's avatar
Adam Langley committed
682 683 684 685 686 687 688 689 690 691 692 693
#ifdef CONFIG_TCP_MD5SIG
	*md5 = tp->af_specific->md5_lookup(sk, sk);
	if (unlikely(*md5)) {
		opts->options |= OPTION_MD5;
		size += TCPOLEN_MD5SIG_ALIGNED;
	}
#else
	*md5 = NULL;
#endif

	if (likely(tp->rx_opt.tstamp_ok)) {
		opts->options |= OPTION_TS;
694
		opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
Adam Langley's avatar
Adam Langley committed
695 696 697 698
		opts->tsecr = tp->rx_opt.ts_recent;
		size += TCPOLEN_TSTAMP_ALIGNED;
	}

699 700
	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
	if (unlikely(eff_sacks)) {
701
		const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
Adam Langley's avatar
Adam Langley committed
702
		opts->num_sack_blocks =
703
			min_t(unsigned int, eff_sacks,
Adam Langley's avatar
Adam Langley committed
704 705 706 707 708 709 710
			      (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
			      TCPOLEN_SACK_PERBLOCK);
		size += TCPOLEN_SACK_BASE_ALIGNED +
			opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
	}

	return size;
Stephen Hemminger's avatar
Stephen Hemminger committed
711
}
Linus Torvalds's avatar
Linus Torvalds committed
712

Eric Dumazet's avatar
Eric Dumazet committed
713 714 715 716 717 718 719 720 721

/* TCP SMALL QUEUES (TSQ)
 *
 * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev)
 * to reduce RTT and bufferbloat.
 * We do this using a special skb destructor (tcp_wfree).
 *
 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
 * needs to be reallocated in a driver.
stephen hemminger's avatar
stephen hemminger committed
722
 * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
Eric Dumazet's avatar
Eric Dumazet committed
723 724 725 726 727 728 729 730 731 732 733
 *
 * Since transmit from skb destructor is forbidden, we use a tasklet
 * to process all sockets that eventually need to send more skbs.
 * We use one tasklet per cpu, with its own queue of sockets.
 */
struct tsq_tasklet {
	struct tasklet_struct	tasklet;
	struct list_head	head; /* queue of tcp sockets */
};
static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);

734 735 736 737
static void tcp_tsq_handler(struct sock *sk)
{
	if ((1 << sk->sk_state) &
	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
738 739 740 741 742 743 744 745
	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) {
		struct tcp_sock *tp = tcp_sk(sk);

		if (tp->lost_out > tp->retrans_out &&
		    tp->snd_cwnd > tcp_packets_in_flight(tp))
			tcp_xmit_retransmit_queue(sk);

		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
John Ogness's avatar
John Ogness committed
746
			       0, GFP_ATOMIC);
747
	}
748
}
Eric Dumazet's avatar
Eric Dumazet committed
749
/*
stephen hemminger's avatar
stephen hemminger committed
750
 * One tasklet per cpu tries to send more skbs.
Eric Dumazet's avatar
Eric Dumazet committed
751
 * We run in tasklet context but need to disable irqs when
stephen hemminger's avatar
stephen hemminger committed
752
 * transferring tsq->head because tcp_wfree() might
Eric Dumazet's avatar
Eric Dumazet committed
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
 * interrupt us (non NAPI drivers)
 */
static void tcp_tasklet_func(unsigned long data)
{
	struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
	LIST_HEAD(list);
	unsigned long flags;
	struct list_head *q, *n;
	struct tcp_sock *tp;
	struct sock *sk;

	local_irq_save(flags);
	list_splice_init(&tsq->head, &list);
	local_irq_restore(flags);

	list_for_each_safe(q, n, &list) {
		tp = list_entry(q, struct tcp_sock, tsq_node);
		list_del(&tp->tsq_node);

		sk = (struct sock *)tp;
773
		smp_mb__before_atomic();
774 775
		clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);

776
		if (!sk->sk_lock.owned &&
777
		    test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
778 779
			bh_lock_sock(sk);
			if (!sock_owned_by_user(sk)) {
780
				clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
781 782 783
				tcp_tsq_handler(sk);
			}
			bh_unlock_sock(sk);
Eric Dumazet's avatar
Eric Dumazet committed
784 785 786 787 788 789
		}

		sk_free(sk);
	}
}

790 791 792 793
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED |		\
			  TCPF_WRITE_TIMER_DEFERRED |	\
			  TCPF_DELACK_TIMER_DEFERRED |	\
			  TCPF_MTU_REDUCED_DEFERRED)
Eric Dumazet's avatar
Eric Dumazet committed
794 795 796 797 798 799 800 801 802
/**
 * tcp_release_cb - tcp release_sock() callback
 * @sk: socket
 *
 * called from release_sock() to perform protocol dependent
 * actions before socket release.
 */
void tcp_release_cb(struct sock *sk)
{
803
	unsigned long flags, nflags;
Eric Dumazet's avatar
Eric Dumazet committed
804

805 806
	/* perform an atomic operation only if at least one flag is set */
	do {
807
		flags = sk->sk_tsq_flags;
808 809 810
		if (!(flags & TCP_DEFERRED_ALL))
			return;
		nflags = flags & ~TCP_DEFERRED_ALL;
811
	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
812

813
	if (flags & TCPF_TSQ_DEFERRED)
814 815
		tcp_tsq_handler(sk);

816 817 818 819 820 821 822 823 824 825 826
	/* Here begins the tricky part :
	 * We are called from release_sock() with :
	 * 1) BH disabled
	 * 2) sk_lock.slock spinlock held
	 * 3) socket owned by us (sk->sk_lock.owned == 1)
	 *
	 * But following code is meant to be called from BH handlers,
	 * so we should keep BH disabled, but early release socket ownership
	 */
	sock_release_ownership(sk);

827
	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
828
		tcp_write_timer_handler(sk);
829 830
		__sock_put(sk);
	}
831
	if (flags & TCPF_DELACK_TIMER_DEFERRED) {
832
		tcp_delack_timer_handler(sk);
833 834
		__sock_put(sk);
	}
835
	if (flags & TCPF_MTU_REDUCED_DEFERRED) {
836
		inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
837 838
		__sock_put(sk);
	}
Eric Dumazet's avatar
Eric Dumazet committed
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
}
EXPORT_SYMBOL(tcp_release_cb);

void __init tcp_tasklet_init(void)
{
	int i;

	for_each_possible_cpu(i) {
		struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);

		INIT_LIST_HEAD(&tsq->head);
		tasklet_init(&tsq->tasklet,
			     tcp_tasklet_func,
			     (unsigned long)tsq);
	}
}

/*
 * Write buffer destructor automatically called from kfree_skb.
stephen hemminger's avatar
stephen hemminger committed
858
 * We can't xmit new skbs from this context, as we might already
Eric Dumazet's avatar
Eric Dumazet committed
859 860
 * hold qdisc lock.
 */
Eric Dumazet's avatar
Eric Dumazet committed
861
void tcp_wfree(struct sk_buff *skb)
Eric Dumazet's avatar
Eric Dumazet committed
862 863 864
{
	struct sock *sk = skb->sk;
	struct tcp_sock *tp = tcp_sk(sk);
865
	unsigned long flags, nval, oval;
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
	int wmem;

	/* Keep one reference on sk_wmem_alloc.
	 * Will be released by sk_free() from here or tcp_tasklet_func()
	 */
	wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);

	/* If this softirq is serviced by ksoftirqd, we are likely under stress.
	 * Wait until our queues (qdisc + devices) are drained.
	 * This gives :
	 * - less callbacks to tcp_write_xmit(), reducing stress (batches)
	 * - chance for incoming ACK (processed by another cpu maybe)
	 *   to migrate this flow (skb->ooo_okay will be eventually set)
	 */
	if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
		goto out;
Eric Dumazet's avatar
Eric Dumazet committed
882

883
	for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
Eric Dumazet's avatar
Eric Dumazet committed
884
		struct tsq_tasklet *tsq;
885
		bool empty;
Eric Dumazet's avatar
Eric Dumazet committed
886

887 888 889
		if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED))
			goto out;

890
		nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
891
		nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
892 893 894
		if (nval != oval)
			continue;

Eric Dumazet's avatar
Eric Dumazet committed
895 896
		/* queue this socket to tasklet queue */
		local_irq_save(flags);
897
		tsq = this_cpu_ptr(&tsq_tasklet);
898
		empty = list_empty(&tsq->head);
Eric Dumazet's avatar
Eric Dumazet committed
899
		list_add(&tp->tsq_node, &tsq->head);
900 901
		if (empty)
			tasklet_schedule(&tsq->tasklet);
Eric Dumazet's avatar
Eric Dumazet committed
902
		local_irq_restore(flags);
903
		return;
Eric Dumazet's avatar
Eric Dumazet committed
904
	}
905 906
out:
	sk_free(sk);
Eric Dumazet's avatar
Eric Dumazet committed
907 908
}

Linus Torvalds's avatar
Linus Torvalds committed
909 910 911 912 913 914 915 916 917 918 919
/* This routine actually transmits TCP packets queued in by
 * tcp_do_sendmsg().  This is used by both the initial
 * transmission and possible later retransmissions.
 * All SKB's seen here are completely headerless.  It is our
 * job to build the TCP header, and pass the packet down to
 * IP so it can do the same plus pass the packet off to the
 * device.
 *
 * We are working here with either a clone of the original
 * SKB, or a fresh unique copy made by the retransmit engine.
 */
920 921
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
			    gfp_t gfp_mask)
Linus Torvalds's avatar
Linus Torvalds committed
922
{
923 924 925 926
	const struct inet_connection_sock *icsk = inet_csk(sk);
	struct inet_sock *inet;
	struct tcp_sock *tp;
	struct tcp_skb_cb *tcb;
Adam Langley's avatar
Adam Langley committed
927
	struct tcp_out_options opts;
928
	unsigned int tcp_options_size, tcp_header_size;
929
	struct tcp_md5sig_key *md5;
930 931 932 933
	struct tcphdr *th;
	int err;

	BUG_ON(!skb || !tcp_skb_pcount(skb));
934
	tp = tcp_sk(sk);
935