pseudotcp.c 78.6 KB
Newer Older
1 2 3
/*
 * This file is part of the Nice GLib ICE library.
 *
4 5
 * (C) 2010, 2014 Collabora Ltd.
 *  Contact: Philip Withnall
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24

 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the Nice GLib ICE library.
 *
 * The Initial Developers of the Original Code are Collabora Ltd and Nokia
 * Corporation. All Rights Reserved.
 *
 * Contributors:
 *   Youness Alaoui, Collabora Ltd.
25
 *   Philip Withnall, Collabora Ltd.
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
 * case the provisions of LGPL are applicable instead of those above. If you
 * wish to allow use of your version of this file only under the terms of the
 * LGPL and not to allow others to use your version of this file under the
 * MPL, indicate your decision by deleting the provisions above and replace
 * them with the notice and other provisions required by the LGPL. If you do
 * not delete the provisions above, a recipient may use your version of this
 * file under either the MPL or the LGPL.
 */

/* Reproducing license from libjingle for copied code */

/*
 * libjingle
 * Copyright 2004--2005, Google Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright notice,
 *     this list of conditions and the following disclaimer in the documentation
 *     and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *     derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdlib.h>
#include <errno.h>
#include <string.h>

#include <glib.h>
72 73

#ifndef G_OS_WIN32
74 75
#  include <arpa/inet.h>
#endif
76 77

#include "pseudotcp.h"
78
#include "agent-priv.h"
79

80
struct _PseudoTcpSocketClass {
81
    GObjectClass parent_class;
82 83 84 85
};

typedef struct _PseudoTcpSocketPrivate PseudoTcpSocketPrivate;

86

87
struct _PseudoTcpSocket {
88
    GObject parent;
89 90 91 92
    PseudoTcpSocketPrivate *priv;
};

G_DEFINE_TYPE (PseudoTcpSocket, pseudo_tcp_socket, G_TYPE_OBJECT);
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

//////////////////////////////////////////////////////////////////////
// Network Constants
//////////////////////////////////////////////////////////////////////

// Standard MTUs
const guint16 PACKET_MAXIMUMS[] = {
  65535,    // Theoretical maximum, Hyperchannel
  32000,    // Nothing
  17914,    // 16Mb IBM Token Ring
  8166,   // IEEE 802.4
  //4464,   // IEEE 802.5 (4Mb max)
  4352,   // FDDI
  //2048,   // Wideband Network
  2002,   // IEEE 802.5 (4Mb recommended)
  //1536,   // Expermental Ethernet Networks
  //1500,   // Ethernet, Point-to-Point (default)
  1492,   // IEEE 802.3
  1006,   // SLIP, ARPANET
  //576,    // X.25 Networks
  //544,    // DEC IP Portal
  //512,    // NETBIOS
  508,    // IEEE 802/Source-Rt Bridge, ARCNET
  296,    // Point-to-Point (low delay)
  //68,     // Official minimum
  0,      // End of list marker
};

121 122 123
// FIXME: This is a reasonable MTU, but we should get it from the lower layer
#define DEF_MTU 1400
#define MAX_PACKET 65532
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
// Note: we removed lowest level because packet overhead was larger!
#define MIN_PACKET 296

// (+ up to 40 bytes of options?)
#define IP_HEADER_SIZE 20
#define ICMP_HEADER_SIZE 8
#define UDP_HEADER_SIZE 8
// TODO: Make JINGLE_HEADER_SIZE transparent to this code?
// when relay framing is in use
#define JINGLE_HEADER_SIZE 64

//////////////////////////////////////////////////////////////////////
// Global Constants and Functions
//////////////////////////////////////////////////////////////////////
//
//    0                   1                   2                   3
//    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//  0 |                      Conversation Number                      |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//  4 |                        Sequence Number                        |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//  8 |                     Acknowledgment Number                     |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//    |               |   |U|A|P|R|S|F|                               |
// 12 |    Control    |   |R|C|S|S|Y|I|            Window             |
//    |               |   |G|K|H|T|N|N|                               |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// 16 |                       Timestamp sending                       |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// 20 |                      Timestamp receiving                      |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// 24 |                             data                              |
//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
//////////////////////////////////////////////////////////////////////

#define MAX_SEQ 0xFFFFFFFF
#define HEADER_SIZE 24

#define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
      IP_HEADER_SIZE + JINGLE_HEADER_SIZE)

167 168
// MIN_RTO = 1 second (RFC6298, Sec 2.4)
#define MIN_RTO     1000
169
#define DEF_RTO     1000 /* 1 seconds (RFC 6298 sect 2.1) */
170
#define MAX_RTO    60000 /* 60 seconds */
171 172
#define DEFAULT_ACK_DELAY    100 /* 100 milliseconds */
#define DEFAULT_NO_DELAY     FALSE
173

174 175 176
#define DEFAULT_RCV_BUF_SIZE (60 * 1024)
#define DEFAULT_SND_BUF_SIZE (90 * 1024)

177 178
/* NOTE: This must fit in 8 bits. This is used on the wire. */
typedef enum {
179
  /* Google-provided options: */
180 181 182 183
  TCP_OPT_EOL = 0,  /* end of list */
  TCP_OPT_NOOP = 1,  /* no-op */
  TCP_OPT_MSS = 2,  /* maximum segment size */
  TCP_OPT_WND_SCALE = 3,  /* window scale factor */
184 185
  /* libnice extensions: */
  TCP_OPT_FIN_ACK = 254,  /* FIN-ACK support */
186
} TcpOption;
187 188


189 190 191 192 193
/*
#define FLAG_SYN 0x02
#define FLAG_ACK 0x10
*/

194 195 196
/* NOTE: This must fit in 5 bits. This is used on the wire. */
typedef enum {
  FLAG_NONE = 0,
197
  FLAG_FIN = 1 << 0,
198 199 200
  FLAG_CTL = 1 << 1,
  FLAG_RST = 1 << 2,
} TcpFlags;
201 202 203 204 205 206 207 208

#define CTL_CONNECT  0
//#define CTL_REDIRECT  1
#define CTL_EXTRA 255


#define CTRL_BOUND 0x80000000

209 210 211 212 213
/* Maximum segment lifetime (1 minute).
 * RFC 793, §3.3 specifies 2 minutes; but Linux uses 1 minute, so let’s go with
 * that. */
#define TCP_MSL (60 * 1000)

214 215 216 217
// If there are no pending clocks, wake up every 4 seconds
#define DEFAULT_TIMEOUT 4000
// If the connection is closed, once per minute
#define CLOSED_TIMEOUT (60 * 1000)
218 219 220 221 222 223 224 225
/* Timeout after reaching the TIME_WAIT state, in milliseconds.
 * See: RFC 1122, §4.2.2.13.
 *
 * XXX: Since we can control the underlying layer’s channel ID, we can guarantee
 * delayed segments won’t affect subsequent connections, so can radically
 * shorten the TIME-WAIT timeout (to the extent that it basically doesn’t
 * exist). It would normally be (2 * TCP_MSL). */
#define TIME_WAIT_TIMEOUT 1
226 227 228 229

//////////////////////////////////////////////////////////////////////
// Helper Functions
//////////////////////////////////////////////////////////////////////
230 231 232 233
#ifndef G_OS_WIN32
#  define min(first, second) ((first) < (second) ? (first) : (second))
#  define max(first, second) ((first) > (second) ? (first) : (second))
#endif
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270

static guint32
bound(guint32 lower, guint32 middle, guint32 upper)
{
   return min (max (lower, middle), upper);
}

static gboolean
time_is_between(guint32 later, guint32 middle, guint32 earlier)
{
  if (earlier <= later) {
    return ((earlier <= middle) && (middle <= later));
  } else {
    return !((later < middle) && (middle < earlier));
  }
}

static gint32
time_diff(guint32 later, guint32 earlier)
{
  guint32 LAST = 0xFFFFFFFF;
  guint32 HALF = 0x80000000;
  if (time_is_between(earlier + HALF, later, earlier)) {
    if (earlier <= later) {
      return (long)(later - earlier);
    } else {
      return (long)(later + (LAST - earlier) + 1);
    }
  } else {
    if (later <= earlier) {
      return -(long) (earlier - later);
    } else {
      return -(long)(earlier + (LAST - later) + 1);
    }
  }
}

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
////////////////////////////////////////////////////////
// PseudoTcpFifo works exactly like FifoBuffer in libjingle
////////////////////////////////////////////////////////


typedef struct {
  guint8 *buffer;
  gsize buffer_length;
  gsize data_length;
  gsize read_position;
} PseudoTcpFifo;


static void
pseudo_tcp_fifo_init (PseudoTcpFifo *b, gsize size)
{
  b->buffer = g_slice_alloc (size);
  b->buffer_length = size;
}

static void
pseudo_tcp_fifo_clear (PseudoTcpFifo *b)
{
  if (b->buffer)
    g_slice_free1 (b->buffer_length, b->buffer);
  b->buffer = NULL;
  b->buffer_length = 0;
}

static gsize
pseudo_tcp_fifo_get_buffered (PseudoTcpFifo *b)
{
  return b->data_length;
}

static gboolean
pseudo_tcp_fifo_set_capacity (PseudoTcpFifo *b, gsize size)
{
  if (b->data_length > size)
    return FALSE;

  if (size != b->data_length) {
    guint8 *buffer = g_slice_alloc (size);
    gsize copy = b->data_length;
    gsize tail_copy = min (copy, b->buffer_length - b->read_position);

    memcpy (buffer, &b->buffer[b->read_position], tail_copy);
    memcpy (buffer + tail_copy, &b->buffer[0], copy - tail_copy);
    g_slice_free1 (b->buffer_length, b->buffer);
    b->buffer = buffer;
    b->buffer_length = size;
    b->read_position = 0;
  }

  return TRUE;
}

static void
pseudo_tcp_fifo_consume_read_data (PseudoTcpFifo *b, gsize size)
{
  g_assert (size <= b->data_length);

  b->read_position = (b->read_position + size) % b->buffer_length;
  b->data_length -= size;
}

static void
pseudo_tcp_fifo_consume_write_buffer (PseudoTcpFifo *b, gsize size)
{
  g_assert (size <= b->buffer_length - b->data_length);

  b->data_length += size;
}

static gsize
pseudo_tcp_fifo_get_write_remaining (PseudoTcpFifo *b)
{
  return b->buffer_length - b->data_length;
}

static gsize
pseudo_tcp_fifo_read_offset (PseudoTcpFifo *b, guint8 *buffer, gsize bytes,
    gsize offset)
{
  gsize available = b->data_length - offset;
  gsize read_position = (b->read_position + offset) % b->buffer_length;
  gsize copy = min (bytes, available);
  gsize tail_copy = min(copy, b->buffer_length - read_position);

  /* EOS */
  if (offset >= b->data_length)
    return 0;

  memcpy(buffer, &b->buffer[read_position], tail_copy);
  memcpy(buffer + tail_copy, &b->buffer[0], copy - tail_copy);

  return copy;
}

static gsize
pseudo_tcp_fifo_write_offset (PseudoTcpFifo *b, const guint8 *buffer,
    gsize bytes, gsize offset)
{
  gsize available = b->buffer_length - b->data_length - offset;
  gsize write_position = (b->read_position + b->data_length + offset)
      % b->buffer_length;
  gsize copy = min (bytes, available);
  gsize tail_copy = min(copy, b->buffer_length - write_position);

  if (b->data_length + offset >= b->buffer_length) {
    return 0;
  }

  memcpy(&b->buffer[write_position], buffer, tail_copy);
  memcpy(&b->buffer[0], buffer + tail_copy, copy - tail_copy);

  return copy;
}

static gsize
pseudo_tcp_fifo_read (PseudoTcpFifo *b, guint8 *buffer, gsize bytes)
{
  gsize copy;

  copy = pseudo_tcp_fifo_read_offset (b, buffer, bytes, 0);

  b->read_position = (b->read_position + copy) % b->buffer_length;
  b->data_length -= copy;

  return copy;
}

static gsize
pseudo_tcp_fifo_write (PseudoTcpFifo *b, const guint8 *buffer, gsize bytes)
{
  gsize copy;

  copy = pseudo_tcp_fifo_write_offset (b, buffer, bytes, 0);
  b->data_length += copy;

  return copy;
}


415 416 417 418
//////////////////////////////////////////////////////////////////////
// PseudoTcp
//////////////////////////////////////////////////////////////////////

419
/* Only used if FIN-ACK support is disabled. */
420 421 422 423 424 425 426 427 428
typedef enum {
  SD_NONE,
  SD_GRACEFUL,
  SD_FORCEFUL
} Shutdown;

typedef enum {
  sfNone,
  sfDelayedAck,
429 430 431
  sfImmediateAck,
  sfFin,
  sfRst,
432
  sfDuplicateAck,
433 434 435 436
} SendFlags;

typedef struct {
  guint32 conv, seq, ack;
437
  TcpFlags flags;
438 439 440 441 442 443 444 445 446
  guint16 wnd;
  const gchar * data;
  guint32 len;
  guint32 tsval, tsecr;
} Segment;

typedef struct {
  guint32 seq, len;
  guint8 xmit;
447
  TcpFlags flags;
448 449 450 451 452 453
} SSegment;

typedef struct {
  guint32 seq, len;
} RSegment;

454 455 456 457 458 459 460 461
/**
 * ClosedownSource:
 * @CLOSEDOWN_LOCAL: Error detected locally, or connection forcefully closed
 * locally.
 * @CLOSEDOWN_REMOTE: RST segment received from the peer.
 *
 * Reasons for calling closedown().
 *
Olivier Crête's avatar
Olivier Crête committed
462
 * Since: 0.1.8
463 464 465 466 467 468
 */
typedef enum {
  CLOSEDOWN_LOCAL,
  CLOSEDOWN_REMOTE,
} ClosedownSource;

469 470 471 472

struct _PseudoTcpSocketPrivate {
  PseudoTcpCallbacks callbacks;

473
  Shutdown shutdown;  /* only used if !support_fin_ack */
474
  gboolean shutdown_reads;
475 476 477 478 479 480 481 482 483 484
  gint error;

  // TCB data
  PseudoTcpState state;
  guint32 conv;
  gboolean bReadEnable, bWriteEnable, bOutgoing;
  guint32 last_traffic;

  // Incoming data
  GList *rlist;
485 486 487
  guint32 rbuf_len, rcv_nxt, rcv_wnd, lastrecv;
  guint8 rwnd_scale; // Window scale factor
  PseudoTcpFifo rbuf;
488
  guint32 rcv_fin;  /* sequence number of the received FIN octet, or 0 */
489 490

  // Outgoing data
491
  GQueue slist;
492
  GQueue unsent_slist;
493 494
  guint32 sbuf_len, snd_nxt, snd_wnd, lastsend;
  guint32 snd_una;  /* oldest unacknowledged sequence number */
495 496 497
  guint8 swnd_scale; // Window scale factor
  PseudoTcpFifo sbuf;

498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
  // Maximum segment size, estimated protocol level, largest segment sent
  guint32 mss, msslevel, largest, mtu_advise;
  // Retransmit timer
  guint32 rto_base;

  // Timestamp tracking
  guint32 ts_recent, ts_lastack;

  // Round-trip calculation
  guint32 rx_rttvar, rx_srtt, rx_rto;

  // Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
  guint32 ssthresh, cwnd;
  guint8 dup_acks;
  guint32 recover;
513
  gboolean fast_recovery;
514
  guint32 t_ack;  /* time a delayed ack was scheduled; 0 if no acks scheduled */
515
  guint32 last_acked_ts;
516

517 518
  gboolean use_nagling;
  guint32 ack_delay;
519 520 521 522

  // This is used by unit tests to test backward compatibility of
  // PseudoTcp implementations that don't support window scaling.
  gboolean support_wnd_scale;
523 524 525 526

  /* Current time. Typically only used for testing, when non-zero. When zero,
   * the system monotonic clock is used. Units: monotonic milliseconds. */
  guint32 current_time;
527 528 529 530 531

  /* This is used by compatible implementations (with the TCP_OPT_FIN_ACK
   * option) to enable correct FIN-ACK connection termination. Defaults to
   * TRUE unless no compatible option is received. */
  gboolean support_fin_ack;
532 533
};

534 535 536 537
#define LARGER(a,b) (((a) - (b) - 1) < (G_MAXUINT32 >> 1))
#define LARGER_OR_EQUAL(a,b) (((a) - (b)) < (G_MAXUINT32 >> 1))
#define SMALLER(a,b) LARGER ((b),(a))
#define SMALLER_OR_EQUAL(a,b) LARGER_OR_EQUAL ((b),(a))
538 539 540 541 542 543 544

/* properties */
enum
{
  PROP_CONVERSATION = 1,
  PROP_CALLBACKS,
  PROP_STATE,
545 546
  PROP_ACK_DELAY,
  PROP_NO_DELAY,
547 548
  PROP_RCV_BUF,
  PROP_SND_BUF,
549
  PROP_SUPPORT_FIN_ACK,
550 551 552 553 554 555 556 557
  LAST_PROPERTY
};


static void pseudo_tcp_socket_get_property (GObject *object, guint property_id,
    GValue *value,  GParamSpec *pspec);
static void pseudo_tcp_socket_set_property (GObject *object, guint property_id,
    const GValue *value, GParamSpec *pspec);
558
static void pseudo_tcp_socket_finalize (GObject *object);
559 560


561
static void queue_connect_message (PseudoTcpSocket *self);
562
static guint32 queue (PseudoTcpSocket *self, const gchar *data,
563
    guint32 len, TcpFlags flags);
564
static PseudoTcpWriteResult packet(PseudoTcpSocket *self, guint32 seq,
565
    TcpFlags flags, guint32 offset, guint32 len, guint32 now);
566 567 568
static gboolean parse (PseudoTcpSocket *self,
    const guint8 *_header_buf, gsize header_buf_len,
    const guint8 *data_buf, gsize data_buf_len);
569
static gboolean process(PseudoTcpSocket *self, Segment *seg);
570
static int transmit(PseudoTcpSocket *self, SSegment *sseg, guint32 now);
571
static void attempt_send(PseudoTcpSocket *self, SendFlags sflags);
572 573
static void closedown (PseudoTcpSocket *self, guint32 err,
    ClosedownSource source);
574
static void adjustMTU(PseudoTcpSocket *self);
575 576 577 578
static void parse_options (PseudoTcpSocket *self, const guint8 *data,
    guint32 len);
static void resize_send_buffer (PseudoTcpSocket *self, guint32 new_size);
static void resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size);
579
static void set_state (PseudoTcpSocket *self, PseudoTcpState new_state);
580 581
static void set_state_established (PseudoTcpSocket *self);
static void set_state_closed (PseudoTcpSocket *self, guint32 err);
582

583
static const gchar *pseudo_tcp_state_get_name (PseudoTcpState state);
584 585
static gboolean pseudo_tcp_state_has_sent_fin (PseudoTcpState state);
static gboolean pseudo_tcp_state_has_received_fin (PseudoTcpState state);
586
static gboolean pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state);
587

588 589 590
// The following logging is for detailed (packet-level) pseudotcp analysis only.
static PseudoTcpDebugLevel debug_level = PSEUDO_TCP_DEBUG_NONE;

591 592
#define DEBUG(level, fmt, ...)                                          \
  if (debug_level >= level)                                             \
593 594
    g_log (level == PSEUDO_TCP_DEBUG_NORMAL ? "libnice-pseudotcp" : "libnice-pseudotcp-verbose", G_LOG_LEVEL_DEBUG, "PseudoTcpSocket %p %s: " fmt, \
        self, pseudo_tcp_state_get_name (self->priv->state), ## __VA_ARGS__)
595 596 597 598 599 600

void
pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level)
{
  debug_level = level;
}
601

602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
static guint32
get_current_time (PseudoTcpSocket *socket)
{
  if (G_UNLIKELY (socket->priv->current_time != 0))
    return socket->priv->current_time;

  return g_get_monotonic_time () / 1000;
}

void
pseudo_tcp_socket_set_time (PseudoTcpSocket *self, guint32 current_time)
{
  self->priv->current_time = current_time;
}

617 618 619 620 621 622 623
static void
pseudo_tcp_socket_class_init (PseudoTcpSocketClass *cls)
{
  GObjectClass *object_class = G_OBJECT_CLASS (cls);

  object_class->get_property = pseudo_tcp_socket_get_property;
  object_class->set_property = pseudo_tcp_socket_set_property;
624
  object_class->finalize = pseudo_tcp_socket_finalize;
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639

  g_object_class_install_property (object_class, PROP_CONVERSATION,
      g_param_spec_uint ("conversation", "TCP Conversation ID",
          "The TCP Conversation ID",
          0, G_MAXUINT32, 0,
          G_PARAM_CONSTRUCT_ONLY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));

  g_object_class_install_property (object_class, PROP_CALLBACKS,
      g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
          "Structure with the callbacks to call when PseudoTcp events happen",
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));

  g_object_class_install_property (object_class, PROP_STATE,
      g_param_spec_uint ("state", "PseudoTcp State",
          "The current state (enum PseudoTcpState) of the PseudoTcp socket",
640
          PSEUDO_TCP_LISTEN, PSEUDO_TCP_CLOSED, PSEUDO_TCP_LISTEN,
641 642
          G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));

643 644 645 646 647 648 649 650 651 652 653
  g_object_class_install_property (object_class, PROP_ACK_DELAY,
      g_param_spec_uint ("ack-delay", "ACK Delay",
          "Delayed ACK timeout (in milliseconds)",
          0, G_MAXUINT, DEFAULT_ACK_DELAY,
          G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));

  g_object_class_install_property (object_class, PROP_NO_DELAY,
      g_param_spec_boolean ("no-delay", "No Delay",
          "Disable the Nagle algorithm (like the TCP_NODELAY option)",
          DEFAULT_NO_DELAY,
          G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
654 655 656 657 658 659 660 661 662 663 664 665

  g_object_class_install_property (object_class, PROP_RCV_BUF,
      g_param_spec_uint ("rcv-buf", "Receive Buffer",
          "Receive Buffer size",
          1, G_MAXUINT, DEFAULT_RCV_BUF_SIZE,
          G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));

  g_object_class_install_property (object_class, PROP_SND_BUF,
      g_param_spec_uint ("snd-buf", "Send Buffer",
          "Send Buffer size",
          1, G_MAXUINT, DEFAULT_SND_BUF_SIZE,
          G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
666 667 668 669 670 671 672 673 674 675 676 677 678

  /**
   * PseudoTcpSocket:support-fin-ack:
   *
   * Whether to support the FIN–ACK extension to the pseudo-TCP protocol for
   * this socket. The extension is only compatible with other libnice pseudo-TCP
   * stacks, and not with Jingle pseudo-TCP stacks. If enabled, support is
   * negotiatied on connection setup, so it is safe for a #PseudoTcpSocket with
   * support enabled to be used with one with it disabled, or with a Jingle
   * pseudo-TCP socket which doesn’t support it at all.
   *
   * Support is enabled by default.
   *
Olivier Crête's avatar
Olivier Crête committed
679
   * Since: 0.1.8
680 681 682 683 684 685
   */
  g_object_class_install_property (object_class, PROP_SUPPORT_FIN_ACK,
      g_param_spec_boolean ("support-fin-ack", "Support FIN–ACK",
          "Whether to enable the optional FIN–ACK support.",
          TRUE,
          G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
}


static void
pseudo_tcp_socket_get_property (GObject *object,
                                  guint property_id,
                                  GValue *value,
                                  GParamSpec *pspec)
{
  PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);

  switch (property_id) {
    case PROP_CONVERSATION:
      g_value_set_uint (value, self->priv->conv);
      break;
    case PROP_CALLBACKS:
      g_value_set_pointer (value, (gpointer) &self->priv->callbacks);
      break;
    case PROP_STATE:
      g_value_set_uint (value, self->priv->state);
      break;
707 708 709 710 711 712
    case PROP_ACK_DELAY:
      g_value_set_uint (value, self->priv->ack_delay);
      break;
    case PROP_NO_DELAY:
      g_value_set_boolean (value, !self->priv->use_nagling);
      break;
713 714 715 716 717 718
    case PROP_RCV_BUF:
      g_value_set_uint (value, self->priv->rbuf_len);
      break;
    case PROP_SND_BUF:
      g_value_set_uint (value, self->priv->sbuf_len);
      break;
719 720 721
    case PROP_SUPPORT_FIN_ACK:
      g_value_set_boolean (value, self->priv->support_fin_ack);
      break;
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
      break;
  }
}

static void
pseudo_tcp_socket_set_property (GObject *object,
                                  guint property_id,
                                  const GValue *value,
                                  GParamSpec *pspec)
{
  PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);

  switch (property_id) {
    case PROP_CONVERSATION:
      self->priv->conv = g_value_get_uint (value);
      break;
    case PROP_CALLBACKS:
      {
        PseudoTcpCallbacks *c = g_value_get_pointer (value);
        self->priv->callbacks = *c;
      }
      break;
746 747 748 749 750 751
    case PROP_ACK_DELAY:
      self->priv->ack_delay = g_value_get_uint (value);
      break;
    case PROP_NO_DELAY:
      self->priv->use_nagling = !g_value_get_boolean (value);
      break;
752
    case PROP_RCV_BUF:
753
      g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
754 755 756
      resize_receive_buffer (self, g_value_get_uint (value));
      break;
    case PROP_SND_BUF:
757
      g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
758 759
      resize_send_buffer (self, g_value_get_uint (value));
      break;
760 761 762
    case PROP_SUPPORT_FIN_ACK:
      self->priv->support_fin_ack = g_value_get_boolean (value);
      break;
763 764 765 766 767 768 769
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
      break;
  }
}

static void
770
pseudo_tcp_socket_finalize (GObject *object)
771 772 773
{
  PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
  PseudoTcpSocketPrivate *priv = self->priv;
774
  GList *i;
775
  SSegment *sseg;
776 777 778 779

  if (priv == NULL)
    return;

780
  while ((sseg = g_queue_pop_head (&priv->slist)))
781
    g_slice_free (SSegment, sseg);
782
  g_queue_clear (&priv->unsent_slist);
783 784 785 786 787 788
  for (i = priv->rlist; i; i = i->next) {
    RSegment *rseg = i->data;
    g_slice_free (RSegment, rseg);
  }
  g_list_free (priv->rlist);
  priv->rlist = NULL;
789

790 791 792
  pseudo_tcp_fifo_clear (&priv->rbuf);
  pseudo_tcp_fifo_clear (&priv->sbuf);

793 794 795
  g_free (priv);
  self->priv = NULL;

796 797
  if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize)
    G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize (object);
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
}


static void
pseudo_tcp_socket_init (PseudoTcpSocket *obj)
{
  /* Use g_new0, and do not use g_object_set_private because the size of
   * our private data is too big (150KB+) and the g_slice_allow cannot allocate
   * it. So we handle the private ourselves */
  PseudoTcpSocketPrivate *priv = g_new0 (PseudoTcpSocketPrivate, 1);

  obj->priv = priv;

  priv->shutdown = SD_NONE;
  priv->error = 0;

814 815 816 817 818
  priv->rbuf_len = DEFAULT_RCV_BUF_SIZE;
  pseudo_tcp_fifo_init (&priv->rbuf, priv->rbuf_len);
  priv->sbuf_len = DEFAULT_SND_BUF_SIZE;
  pseudo_tcp_fifo_init (&priv->sbuf, priv->sbuf_len);

819
  priv->state = PSEUDO_TCP_LISTEN;
820
  priv->conv = 0;
821
  g_queue_init (&priv->slist);
822
  g_queue_init (&priv->unsent_slist);
823 824 825
  priv->rcv_wnd = priv->rbuf_len;
  priv->rwnd_scale = priv->swnd_scale = 0;
  priv->snd_nxt = 0;
826
  priv->snd_wnd = 1;
827
  priv->snd_una = priv->rcv_nxt = 0;
828 829
  priv->bReadEnable = TRUE;
  priv->bWriteEnable = FALSE;
830 831
  priv->rcv_fin = 0;

832 833 834 835 836
  priv->t_ack = 0;

  priv->msslevel = 0;
  priv->largest = 0;
  priv->mss = MIN_PACKET - PACKET_OVERHEAD;
837
  priv->mtu_advise = DEF_MTU;
838 839 840 841

  priv->rto_base = 0;

  priv->cwnd = 2 * priv->mss;
842
  priv->ssthresh = priv->rbuf_len;
843
  priv->lastrecv = priv->lastsend = priv->last_traffic = 0;
844 845 846 847
  priv->bOutgoing = FALSE;

  priv->dup_acks = 0;
  priv->recover = 0;
848
  priv->last_acked_ts = 0;
849 850 851 852 853

  priv->ts_recent = priv->ts_lastack = 0;

  priv->rx_rto = DEF_RTO;
  priv->rx_srtt = priv->rx_rttvar = 0;
854 855 856

  priv->ack_delay = DEFAULT_ACK_DELAY;
  priv->use_nagling = !DEFAULT_NO_DELAY;
857 858

  priv->support_wnd_scale = TRUE;
859
  priv->support_fin_ack = TRUE;
860 861
}

862 863 864 865 866 867 868 869 870 871
PseudoTcpSocket *pseudo_tcp_socket_new (guint32 conversation,
    PseudoTcpCallbacks *callbacks)
{

  return g_object_new (PSEUDO_TCP_SOCKET_TYPE,
      "conversation", conversation,
      "callbacks", callbacks,
      NULL);
}

872 873 874 875
static void
queue_connect_message (PseudoTcpSocket *self)
{
  PseudoTcpSocketPrivate *priv = self->priv;
876
  guint8 buf[8];
877 878 879
  gsize size = 0;

  buf[size++] = CTL_CONNECT;
880 881

  if (priv->support_wnd_scale) {
882 883 884
    buf[size++] = TCP_OPT_WND_SCALE;
    buf[size++] = 1;
    buf[size++] = priv->rwnd_scale;
885 886
  }

887 888 889 890 891 892
  if (priv->support_fin_ack) {
    buf[size++] = TCP_OPT_FIN_ACK;
    buf[size++] = 1;  /* option length; zero is invalid (RFC 1122, §4.2.2.5) */
    buf[size++] = 0;  /* currently unused */
  }

893 894
  priv->snd_wnd = size;

895
  queue (self, (char *) buf, size, FLAG_CTL);
896 897
}

898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
static void
queue_fin_message (PseudoTcpSocket *self)
{
  g_assert (self->priv->support_fin_ack);

  /* FIN segments are always zero-length. */
  queue (self, "", 0, FLAG_FIN);
}

static void
queue_rst_message (PseudoTcpSocket *self)
{
  g_assert (self->priv->support_fin_ack);

  /* RST segments are always zero-length. */
  queue (self, "", 0, FLAG_RST);
}

916
gboolean
917 918 919 920
pseudo_tcp_socket_connect(PseudoTcpSocket *self)
{
  PseudoTcpSocketPrivate *priv = self->priv;

921
  if (priv->state != PSEUDO_TCP_LISTEN) {
922
    priv->error = EINVAL;
923
    return FALSE;
924 925
  }

926
  set_state (self, PSEUDO_TCP_SYN_SENT);
927

928
  queue_connect_message (self);
929 930
  attempt_send(self, sfNone);

931
  return TRUE;
932 933 934 935 936 937 938
}

void
pseudo_tcp_socket_notify_mtu(PseudoTcpSocket *self, guint16 mtu)
{
  PseudoTcpSocketPrivate *priv = self->priv;
  priv->mtu_advise = mtu;
939
  if (priv->state == PSEUDO_TCP_ESTABLISHED) {
940 941 942 943 944 945 946 947
    adjustMTU(self);
  }
}

void
pseudo_tcp_socket_notify_clock(PseudoTcpSocket *self)
{
  PseudoTcpSocketPrivate *priv = self->priv;
948
  guint32 now = get_current_time (self);
949

950
  if (priv->state == PSEUDO_TCP_CLOSED)
951 952
    return;

953 954 955 956
  /* If in the TIME-WAIT state, any delayed segments have passed and the
   * connection can be considered closed from both ends.
   * FIXME: This should probably actually compare a timestamp before
   * operating. */
957
  if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
958 959 960 961 962 963 964 965
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
        "Notified clock in TIME-WAIT state; closing connection.");
    set_state_closed (self, 0);
  }

  /* If in the LAST-ACK state, resend the FIN because it hasn’t been ACKed yet.
   * FIXME: This should probably actually compare a timestamp before
   * operating. */
966
  if (priv->support_fin_ack && priv->state == PSEUDO_TCP_LAST_ACK) {
967 968 969 970 971 972
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
        "Notified clock in LAST-ACK state; resending FIN segment.");
    queue_fin_message (self);
    attempt_send (self, sfFin);
  }

973 974 975
  // Check if it's time to retransmit a segment
  if (priv->rto_base &&
      (time_diff(priv->rto_base + priv->rx_rto, now) <= 0)) {
976
    if (g_queue_get_length (&priv->slist) == 0) {
977 978 979 980 981 982
      g_assert_not_reached ();
    } else {
      // Note: (priv->slist.front().xmit == 0)) {
      // retransmit segments
      guint32 nInFlight;
      guint32 rto_limit;
983
      int transmit_status;
984

985 986
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "timeout retransmit (rto: %u) "
          "(rto_base: %u) (now: %u) (dup_acks: %u)",
987 988
          priv->rx_rto, priv->rto_base, now, (guint) priv->dup_acks);

989 990
      transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
      if (transmit_status != 0) {
991 992
        DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
            "Error transmitting segment. Closing down.");
993
        closedown (self, transmit_status, CLOSEDOWN_LOCAL);
994 995 996 997 998
        return;
      }

      nInFlight = priv->snd_nxt - priv->snd_una;
      priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
999 1000
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "ssthresh: %u = (nInFlight: %u / 2) + "
          "2 * mss: %u", priv->ssthresh, nInFlight, priv->mss);
1001 1002 1003 1004
      //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << "  nInFlight: " << nInFlight << "  priv->mss: " << priv->mss;
      priv->cwnd = priv->mss;

      // Back off retransmit timer.  Note: the limit is lower when connecting.
1005
      rto_limit = (priv->state < PSEUDO_TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
1006 1007
      priv->rx_rto = min(rto_limit, priv->rx_rto * 2);
      priv->rto_base = now;
1008 1009 1010 1011 1012 1013 1014

      priv->recover = priv->snd_nxt;
      if (priv->dup_acks >= 3) {
        priv->dup_acks = 0;
        priv->fast_recovery = FALSE;
        DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery on timeout");
      }
1015 1016 1017 1018 1019 1020 1021
    }
  }

  // Check if it's time to probe closed windows
  if ((priv->snd_wnd == 0)
        && (time_diff(priv->lastsend + priv->rx_rto, now) <= 0)) {
    if (time_diff(now, priv->lastrecv) >= 15000) {
1022
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Receive window closed. Closing down.");
1023
      closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1024 1025 1026 1027
      return;
    }

    // probe the window
1028
    packet(self, priv->snd_nxt - 1, 0, 0, 0, now);
1029 1030 1031 1032 1033 1034 1035
    priv->lastsend = now;

    // back off retransmit timer
    priv->rx_rto = min(MAX_RTO, priv->rx_rto * 2);
  }

  // Check if it's time to send delayed acks
1036
  if (priv->t_ack && (time_diff(priv->t_ack + priv->ack_delay, now) <= 0)) {
1037
    packet(self, priv->snd_nxt, 0, 0, 0, now);
1038 1039 1040 1041 1042 1043 1044 1045
  }

}

gboolean
pseudo_tcp_socket_notify_packet(PseudoTcpSocket *self,
    const gchar * buffer, guint32 len)
{
1046 1047
  gboolean retval;

1048 1049
  if (len > MAX_PACKET) {
    //LOG_F(WARNING) << "packet too large";
1050
    self->priv->error = EMSGSIZE;
1051
    return FALSE;
1052 1053
  } else if (len < HEADER_SIZE) {
    //LOG_F(WARNING) << "packet too small";
1054
    self->priv->error = EINVAL;
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
    return FALSE;
  }

  /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
   * closed from within a callback. */
  g_object_ref (self);
  retval = parse (self, (guint8 *) buffer, HEADER_SIZE,
      (guint8 *) buffer + HEADER_SIZE, len - HEADER_SIZE);
  g_object_unref (self);

  return retval;
}

/* Assume there are two buffers in the given #NiceInputMessage: a 24-byte one
 * containing the header, and a bigger one for the data. */
gboolean
pseudo_tcp_socket_notify_message (PseudoTcpSocket *self,
    NiceInputMessage *message)
{
  gboolean retval;

1076 1077 1078 1079 1080 1081
  g_assert_cmpuint (message->n_buffers, >, 0);

  if (message->n_buffers == 1)
    return pseudo_tcp_socket_notify_packet (self, message->buffers[0].buffer,
        message->buffers[0].size);

1082 1083 1084 1085 1086 1087 1088 1089 1090
  g_assert_cmpuint (message->n_buffers, ==, 2);
  g_assert_cmpuint (message->buffers[0].size, ==, HEADER_SIZE);

  if (message->length > MAX_PACKET) {
    //LOG_F(WARNING) << "packet too large";
    return FALSE;
  } else if (message->length < HEADER_SIZE) {
    //LOG_F(WARNING) << "packet too small";
    return FALSE;
1091
  }
1092 1093 1094 1095

  /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
   * closed from within a callback. */
  g_object_ref (self);
1096 1097
  retval = parse (self, message->buffers[0].buffer, message->buffers[0].size,
      message->buffers[1].buffer, message->length - message->buffers[0].size);
1098 1099 1100
  g_object_unref (self);

  return retval;
1101 1102 1103
}

gboolean
1104
pseudo_tcp_socket_get_next_clock(PseudoTcpSocket *self, guint64 *timeout)
1105 1106
{
  PseudoTcpSocketPrivate *priv = self->priv;
1107
  guint32 now = get_current_time (self);
1108
  gsize snd_buffered;
1109 1110 1111 1112 1113 1114 1115
  guint32 closed_timeout;

  if (priv->shutdown == SD_FORCEFUL) {
    if (priv->support_fin_ack) {
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
          "‘Forceful’ shutdown used when FIN-ACK support is enabled");
    }
1116

1117 1118 1119
    /* Transition to the CLOSED state. */
    closedown (self, 0, CLOSEDOWN_REMOTE);

1120
    return FALSE;
1121
  }
1122

1123
  snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1124
  if ((priv->shutdown == SD_GRACEFUL)
1125
      && ((priv->state != PSEUDO_TCP_ESTABLISHED)
1126
          || ((snd_buffered == 0) && (priv->t_ack == 0)))) {
1127 1128 1129 1130 1131
    if (priv->support_fin_ack) {
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
          "‘Graceful’ shutdown used when FIN-ACK support is enabled");
    }

1132 1133 1134
    /* Transition to the CLOSED state. */
    closedown (self, 0, CLOSEDOWN_REMOTE);

1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
    return FALSE;
  }

  /* FIN-ACK support. The timeout for closing the socket if nothing is received
   * varies depending on whether the socket is waiting in the TIME-WAIT state
   * for delayed segments to pass.
   *
   * See: http://vincent.bernat.im/en/blog/2014-tcp-time-wait-state-linux.html
   */
  closed_timeout = CLOSED_TIMEOUT;
1145
  if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT)
1146 1147
    closed_timeout = TIME_WAIT_TIMEOUT;

1148
  if (priv->support_fin_ack && priv->state == PSEUDO_TCP_CLOSED) {
1149 1150 1151
    return FALSE;
  }

1152
  if (*timeout == 0 || *timeout < now)
1153
    *timeout = now + closed_timeout;
1154

1155
  if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
1156 1157 1158 1159
    *timeout = min (*timeout, now + TIME_WAIT_TIMEOUT);
    return TRUE;
  }

1160
  if (priv->state == PSEUDO_TCP_CLOSED && !priv->support_fin_ack) {
1161
    *timeout = min (*timeout, now + CLOSED_TIMEOUT);
1162 1163 1164
    return TRUE;
  }

1165
  *timeout = min (*timeout, now + DEFAULT_TIMEOUT);
1166 1167

  if (priv->t_ack) {
1168
    *timeout = min(*timeout, priv->t_ack + priv->ack_delay);
1169 1170
  }
  if (priv->rto_base) {
1171
    *timeout = min(*timeout, priv->rto_base + priv->rx_rto);
1172 1173
  }
  if (priv->snd_wnd == 0) {
1174
    *timeout = min(*timeout, priv->lastsend + priv->rx_rto);
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
  }

  return TRUE;
}


gint
pseudo_tcp_socket_recv(PseudoTcpSocket *self, char * buffer, size_t len)
{
  PseudoTcpSocketPrivate *priv = self->priv;
1185
  gsize bytesread;
1186
  gsize available_space;
1187

1188
  /* Received a FIN from the peer, so return 0. RFC 793, §3.5, Case 2. */
1189
  if (priv->support_fin_ack && priv->shutdown_reads) {
1190 1191 1192
    return 0;
  }

1193 1194 1195 1196 1197
  /* Return 0 if FIN-ACK is not supported but the socket has been closed. */
  if (!priv->support_fin_ack && pseudo_tcp_socket_is_closed (self)) {
    return 0;
  }

1198 1199
  /* Return ENOTCONN if FIN-ACK is not supported and the connection is not
   * ESTABLISHED. */
1200
  if (!priv->support_fin_ack && priv->state != PSEUDO_TCP_ESTABLISHED) {
1201 1202 1203 1204
    priv->error = ENOTCONN;
    return -1;
  }

1205 1206 1207
  if (len == 0)
    return 0;

1208
  bytesread = pseudo_tcp_fifo_read (&priv->rbuf, (guint8 *) buffer, len);
1209 1210

 // If there's no data in |m_rbuf|.
1211 1212 1213
  if (bytesread == 0 &&
      !(pseudo_tcp_state_has_received_fin (priv->state) ||
        pseudo_tcp_state_has_received_fin_ack (priv->state))) {
1214 1215 1216 1217 1218
    priv->bReadEnable = TRUE;
    priv->error = EWOULDBLOCK;
    return -1;
  }

1219
  available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1220

1221 1222
  if (available_space - priv->rcv_wnd >=
      min (priv->rbuf_len / 2, priv->mss)) {
1223 1224 1225
    // !?! Not sure about this was closed business
    gboolean bWasClosed = (priv->rcv_wnd == 0);

1226
    priv->rcv_wnd = available_space;
1227 1228 1229 1230 1231 1232

    if (bWasClosed) {
      attempt_send(self, sfImmediateAck);
    }
  }

1233
  return bytesread;
1234 1235 1236 1237 1238 1239 1240
}

gint
pseudo_tcp_socket_send(PseudoTcpSocket *self, const char * buffer, guint32 len)
{
  PseudoTcpSocketPrivate *priv = self->priv;
  gint written;
1241
  gsize available_space;
1242

1243
  if (priv->state != PSEUDO_TCP_ESTABLISHED) {
1244
    priv->error = pseudo_tcp_state_has_sent_fin (priv->state) ? EPIPE : ENOTCONN;
1245 1246 1247
    return -1;
  }

1248 1249 1250
  available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);

  if (!available_space) {
1251 1252 1253 1254 1255
    priv->bWriteEnable = TRUE;
    priv->error = EWOULDBLOCK;
    return -1;
  }

1256
  written = queue (self, buffer, len, FLAG_NONE);
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
  attempt_send(self, sfNone);

  if (written > 0 && (guint32)written < len) {
    priv->bWriteEnable = TRUE;
  }

  return written;
}

void
pseudo_tcp_socket_close(PseudoTcpSocket *self, gboolean force)
{
  PseudoTcpSocketPrivate *priv = self->priv;
1270

1271
  DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing socket %p %s", self,
1272
      force ? "forcefully" : "gracefully");
1273

1274
  /* Forced closure by sending an RST segment. RFC 1122, §4.2.2.13. */
1275
  if (force && priv->state != PSEUDO_TCP_CLOSED) {
1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290
    closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
    return;
  }

  /* Fall back to shutdown(). */
  pseudo_tcp_socket_shutdown (self, PSEUDO_TCP_SHUTDOWN_RDWR);
}

void
pseudo_tcp_socket_shutdown (PseudoTcpSocket *self, PseudoTcpShutdown how)
{
  PseudoTcpSocketPrivate *priv = self->priv;

  DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Shutting down socket %p: %u", self, how);

1291 1292
  /* FIN-ACK--only stuff below here. */
  if (!priv->support_fin_ack) {
1293 1294
    if (priv->shutdown == SD_NONE)
      priv->shutdown = SD_GRACEFUL;
1295 1296 1297
    return;
  }

1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
  /* What needs shutting down? */
  switch (how) {
  case PSEUDO_TCP_SHUTDOWN_RD:
  case PSEUDO_TCP_SHUTDOWN_RDWR:
    priv->shutdown_reads = TRUE;
    break;
  case PSEUDO_TCP_SHUTDOWN_WR:
    /* Handled below. */
    break;
  default:
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid shutdown method: %u.", how);
    break;
  }

  if (how == PSEUDO_TCP_SHUTDOWN_RD) {
1313 1314 1315
    return;
  }

1316
  /* Unforced write closure. */
1317
  switch (priv->state) {
1318 1319
  case PSEUDO_TCP_LISTEN:
  case PSEUDO_TCP_SYN_SENT:
1320 1321 1322
    /* Just abort the connection without completing the handshake. */
    set_state_closed (self, 0);
    break;
1323 1324
  case PSEUDO_TCP_SYN_RECEIVED:
  case PSEUDO_TCP_ESTABLISHED:
1325 1326 1327 1328 1329 1330 1331 1332
    /* Local user initiating the close: RFC 793, §3.5, Cases 1 and 3.
     * If there is pending receive data, send RST instead of FIN;
     * see RFC 1122, §4.2.2.13. */
    if (pseudo_tcp_socket_get_available_bytes (self) > 0) {
      closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
    } else {
      queue_fin_message (self);
      attempt_send (self, sfFin);
1333
      set_state (self, PSEUDO_TCP_FIN_WAIT_1);
1334 1335
    }
    break;
1336
  case PSEUDO_TCP_CLOSE_WAIT:
1337 1338 1339 1340 1341
    /* Remote user initiating the close: RFC 793, §3.5, Case 2.
     * We’ve previously received a FIN from the peer; now the user is closing
     * the local end of the connection. */
    queue_fin_message (self);
    attempt_send (self, sfFin);
1342
    set_state (self, PSEUDO_TCP_LAST_ACK);
1343
    break;
1344 1345
  case PSEUDO_TCP_CLOSING:
  case PSEUDO_TCP_CLOSED:
1346 1347
    /* Already closed on both sides. */
    break;
1348 1349 1350 1351
  case PSEUDO_TCP_FIN_WAIT_1:
  case PSEUDO_TCP_FIN_WAIT_2:
  case PSEUDO_TCP_TIME_WAIT:
  case PSEUDO_TCP_LAST_ACK:
1352 1353 1354 1355 1356 1357
    /* Already closed locally. */
    break;
  default:
    /* Do nothing. */
    break;
  }
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371
}

int
pseudo_tcp_socket_get_error(PseudoTcpSocket *self)
{
  PseudoTcpSocketPrivate *priv = self->priv;
  return priv->error;
}

//
// Internal Implementation
//

static guint32
1372
queue (PseudoTcpSocket *self, const gchar * data, guint32 len, TcpFlags flags)
1373 1374
{
  PseudoTcpSocketPrivate *priv = self->priv;
1375
  gsize available_space;
1376

1377 1378
  available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
  if (len > available_space) {
1379
    g_assert (flags == FLAG_NONE);
1380
    len = available_space;
1381 1382 1383 1384
  }

  // We can concatenate data if the last segment is the same type
  // (control v. regular data), and has not been transmitted yet
1385
  if (g_queue_get_length (&priv->slist) &&
1386
      (((SSegment *)g_queue_peek_tail (&priv->slist))->flags == flags) &&
1387 1388
      (((SSegment *)g_queue_peek_tail (&priv->slist))->xmit == 0)) {
    ((SSegment *)g_queue_peek_tail (&priv->slist))->len += len;
1389 1390
  } else {
    SSegment *sseg = g_slice_new0 (SSegment);
1391 1392 1393
    gsize snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);

    sseg->seq = priv->snd_una + snd_buffered;
1394
    sseg->len = len;
1395
    sseg->flags = flags;
1396
    g_queue_push_tail (&priv->slist, sseg);
1397
    g_queue_push_tail (&priv->unsent_slist, sseg);
1398 1399 1400
  }

  //LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
1401
  return pseudo_tcp_fifo_write (&priv->sbuf, (guint8*) data, len);;
1402 1403
}

1404 1405 1406 1407 1408 1409 1410 1411 1412
// Creates a packet and submits it to the network. This method can either
// send payload or just an ACK packet.
//
// |seq| is the sequence number of this packet.
// |flags| is the flags for sending this packet.
// |offset| is the offset to read from |m_sbuf|.
// |len| is the number of bytes to read from |m_sbuf| as payload. If this
// value is 0 then this is an ACK packet, otherwise this packet has payload.

1413
static PseudoTcpWriteResult
1414
packet(PseudoTcpSocket *self, guint32 seq, TcpFlags flags,
1415
    guint32 offset, guint32 len, guint32 now)
1416 1417
{
  PseudoTcpSocketPrivate *priv = self->priv;
1418 1419 1420 1421 1422
  union {
    guint8 u8[MAX_PACKET];
    guint16 u16[MAX_PACKET / 2];
    guint32 u32[MAX_PACKET / 4];
  } buffer;
1423 1424 1425 1426
  PseudoTcpWriteResult wres = WR_SUCCESS;

  g_assert(HEADER_SIZE + len <= MAX_PACKET);

1427 1428 1429 1430 1431
  *buffer.u32 = htonl(priv->conv);
  *(buffer.u32 + 1) = htonl(seq);
  *(buffer.u32 + 2) = htonl(priv->rcv_nxt);
  buffer.u8[12] = 0;
  buffer.u8[13] = flags;
1432
  *(buffer.u16 + 7) = htons((guint16)(priv->rcv_wnd >> priv->rwnd_scale));
1433 1434

  // Timestamp computations
1435 1436
  *(buffer.u32 + 4) = htonl(now);
  *(buffer.u32 + 5) = htonl(priv->ts_recent);
1437 1438
  priv->ts_lastack = priv->rcv_nxt;

1439 1440 1441 1442 1443 1444 1445
  if (len) {
    gsize bytes_read;

    bytes_read = pseudo_tcp_fifo_read_offset (&priv->sbuf, buffer.u8 + HEADER_SIZE,
        len, offset);
    g_assert (bytes_read == len);
  }
1446

1447
  DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Sending <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1448
      "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1449 1450 1451
      priv->conv, (unsigned)flags, seq, seq + len, priv->rcv_nxt, priv->rcv_wnd,
      now % 10000, priv->ts_recent % 10000, len);

1452
  wres = priv->callbacks.WritePacket(self, (gchar *) buffer.u8, len + HEADER_SIZE,
1453
                                     priv->callbacks.user_data);
1454
  /* Note: When len is 0, this is an ACK packet.  We don't read the
1455 1456 1457
     return value for those, and thus we won't retry.  So go ahead and treat
     the packet as a success (basically simulate as if it were dropped),
     which will prevent our timers from being messed up. */
1458
  if ((wres != WR_SUCCESS) && (0 != len))
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
    return wres;

  priv->t_ack = 0;
  if (len > 0) {
    priv->lastsend = now;
  }
  priv->last_traffic = now;
  priv->bOutgoing = TRUE;

  return WR_SUCCESS;
}

static gboolean
1472 1473
parse (PseudoTcpSocket *self, const guint8 *_header_buf, gsize header_buf_len,
    const guint8 *data_buf, gsize data_buf_len)
1474 1475 1476
{
  Segment seg;

1477 1478 1479 1480
  union {
    const guint8 *u8;
    const guint16 *u16;
    const guint32 *u32;
1481
  } header_buf;
1482

1483
  header_buf.u8 = _header_buf;
1484

1485
  if (header_buf_len != 24)
1486 1487
    return FALSE;

1488 1489 1490 1491 1492
  seg.conv = ntohl(*header_buf.u32);
  seg.seq = ntohl(*(header_buf.u32 + 1));
  seg.ack = ntohl(*(header_buf.u32 + 2));
  seg.flags = header_buf.u8[13];
  seg.wnd = ntohs(*(header_buf.u16 + 7));
1493

1494 1495
  seg.tsval = ntohl(*(header_buf.u32 + 4));
  seg.tsecr = ntohl(*(header_buf.u32 + 5));
1496

1497 1498
  seg.data = (const gchar *) data_buf;
  seg.len = data_buf_len;
1499

1500 1501
  DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
      "Received <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1502
      "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1503 1504 1505 1506 1507 1508
      seg.conv, (unsigned)seg.flags, seg.seq, seg.seq + seg.len, seg.ack,
      seg.wnd, seg.tsval % 10000, seg.tsecr % 10000, seg.len);

  return process(self, &seg);
}

1509 1510 1511 1512 1513 1514
/* True iff the @state requires that a FIN has already been sent by this
 * host. */
static gboolean
pseudo_tcp_state_has_sent_fin (PseudoTcpState state)
{
  switch (state) {
1515 1516 1517 1518 1519
  case PSEUDO_TCP_LISTEN:
  case PSEUDO_TCP_SYN_SENT:
  case PSEUDO_TCP_SYN_RECEIVED:
  case PSEUDO_TCP_ESTABLISHED:
  case PSEUDO_TCP_CLOSE_WAIT:
1520
    return FALSE;
1521 1522 1523 1524 1525 1526
  case PSEUDO_TCP_CLOSED:
  case PSEUDO_TCP_FIN_WAIT_1:
  case PSEUDO_TCP_FIN_WAIT_2:
  case PSEUDO_TCP_CLOSING:
  case PSEUDO_TCP_TIME_WAIT:
  case PSEUDO_TCP_LAST_ACK:
1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
    return TRUE;
  default:
    return FALSE;
  }
}

/* True iff the @state requires that a FIN has already been received from the
 * peer. */
static gboolean
pseudo_tcp_state_has_received_fin (PseudoTcpState state)
{
  switch (state) {
1539 1540 1541 1542 1543 1544
  case PSEUDO_TCP_LISTEN:
  case PSEUDO_TCP_SYN_SENT:
  case PSEUDO_TCP_SYN_RECEIVED:
  case PSEUDO_TCP_ESTABLISHED:
  case PSEUDO_TCP_FIN_WAIT_1:
  case PSEUDO_TCP_FIN_WAIT_2:
1545
    return FALSE;
1546 1547 1548 1549 1550
  case PSEUDO_TCP_CLOSED:
  case PSEUDO_TCP_CLOSING:
  case PSEUDO_TCP_TIME_WAIT:
  case PSEUDO_TCP_CLOSE_WAIT:
  case PSEUDO_TCP_LAST_ACK:
1551 1552 1553 1554 1555
    return TRUE;
  default:
    return FALSE;
  }
}
1556

1557 1558 1559 1560 1561 1562
/* True iff the @state requires that a FIN-ACK has already been received from
 * the peer. */
static gboolean
pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state)
{
  switch (state) {
1563 1564 1565 1566 1567 1568 1569 1570 1571
  case PSEUDO_TCP_LISTEN:
  case PSEUDO_TCP_SYN_SENT:
  case PSEUDO_TCP_SYN_RECEIVED:
  case PSEUDO_TCP_ESTABLISHED:
  case PSEUDO_TCP_FIN_WAIT_1:
  case PSEUDO_TCP_FIN_WAIT_2:
  case PSEUDO_TCP_CLOSING:
  case PSEUDO_TCP_CLOSE_WAIT:
  case PSEUDO_TCP_LAST_ACK:
1572
    return FALSE;
1573 1574
  case PSEUDO_TCP_CLOSED:
  case PSEUDO_TCP_TIME_WAIT:
1575 1576 1577 1578 1579 1580
    return TRUE;
  default:
    return FALSE;
  }
}

1581 1582 1583 1584 1585 1586 1587 1588 1589
static gboolean
process(PseudoTcpSocket *self, Segment *seg)
{
  PseudoTcpSocketPrivate *priv = self->priv;
  guint32 now;
  SendFlags sflags = sfNone;
  gboolean bIgnoreData;
  gboolean bNewData;
  gboolean bConnect = FALSE;
1590 1591 1592
  gsize snd_buffered;
  gsize available_space;
  guint32 kIdealRefillSize;
1593
  gboolean is_valuable_ack, is_duplicate_ack, is_fin_ack = FALSE;
1594
  gboolean received_fin = FALSE;
1595 1596 1597 1598 1599 1600 1601

  /* If this is the wrong conversation, send a reset!?!
     (with the correct conversation?) */
  if (seg->conv != priv->conv) {
    //if ((seg->flags & FLAG_RST) == 0) {
    //  packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
    //}
1602
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "wrong conversation");
1603 1604 1605
    return FALSE;
  }

1606
  now = get_current_time (self);
1607 1608 1609
  priv->last_traffic = priv->lastrecv = now;
  priv->bOutgoing = FALSE;

1610
  if (priv->state == PSEUDO_TCP_CLOSED ||
1611 1612 1613 1614 1615
      (pseudo_tcp_state_has_received_fin_ack (priv->state) && seg->len > 0)) {
    /* Send an RST segment. See: RFC 1122, §4.2.2.13; RFC 793, §3.4, point 3,
     * page 37. We can only send RST if we know the peer knows we’re closed;
     * otherwise this could be a timeout retransmit from them, due to our
     * packets from data through to FIN being dropped. */
1616 1617
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
        "Segment received while closed; sending RST.");
1618 1619 1620
    if ((seg->flags & FLAG_RST) == 0) {
      closedown (self, 0, CLOSEDOWN_LOCAL);
    }
1621

1622 1623 1624 1625 1626
    return FALSE;
  }

  // Check if this is a reset segment
  if (seg->flags & FLAG_RST) {
1627
    DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Received RST segment; closing down.");
1628
    closedown (self, ECONNRESET, CLOSEDOWN_REMOTE);
1629 1630 1631 1632 1633 1634 1635
    return FALSE;
  }

  // Check for control data
  bConnect = FALSE;
  if (seg->flags & FLAG_CTL) {
    if (seg->len == 0) {
1636
      DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Missing control code");
1637 1638 1639
      return FALSE;
    } else if (seg->data[0] == CTL_CONNECT) {
      bConnect = TRUE;