Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include <unistd.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <vm/vm_zone.h>

#include <sys/malloc.h>
#include <machine/param.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/proc.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <errno.h>

#include <netinet/il.h>
#include <netinet/il_var.h>

struct ilpcb * il_drop(struct ilpcb *ilpcb, int errno0);
static struct ilpcb * il_close(struct ilpcb *ilpcb);

/* kernel protocol states needed */
static struct inpcbhead ilb;
static struct inpcbinfo ilbinfo;

u_long il_sendspace = 1024*64;
u_long il_recvspace = 1024*64;

/*
 * Target size of IL PCB hash tables. Must be a power of two.
 *
 * Note that this can be overridden by the kernel environment
 * variable net.inet.tcp.tcbhashsize
 */
#ifndef ILBHASHSIZE
#define ILBHASHSIZE     512
#endif

enum                            /* Connection state */
{
        ILS_CLOSED,
        ILS_SYNCER,
        ILS_SYNCEE,
        ILS_ESTABLISHED,
        ILS_LISTENING,
        ILS_CLOSING,
        ILS_OPENING,            /* only for file server */
};

char    *ilstates[] = 
{ 
        "Closed",
        "Syncer",
        "Syncee",
        "Established",
        "Listening",
        "Closing",
        "Opening",              /* only for file server */
};

enum                            /* Packet types */
{
        ILT_SYNC,
        ILT_DATA,
        ILT_DATAQUERY,
        ILT_ACK,
        ILT_QUERY,
        ILT_STATE,
        ILT_CLOSE
};

char    *iltype[] = 
{       
        "sync",
        "data",
        "dataquery",
        "ack",
        "query",
        "state",
        "close",
};

/*
 * This is the actual shape of what we allocate using the zone
 * allocator.  Doing it this way allows us to protect both structures
 * using the same generation count, and also eliminates the overhead
 * of allocating tcpcbs separately.  By hiding the structure here,
 * we avoid changing most of the rest of the code (although it needs
 * to be changed, eventually, for greater efficiency).
 */
#define ALIGNMENT       32
#define ALIGNM1         (ALIGNMENT - 1)
struct  inp_ilpcb {
        union {
                struct  inpcb inp;
                char    align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
        } inp_tp_u;
        struct  ilpcb ilpcb;
};
#undef ALIGNMENT
#undef ALIGNM1

static __inline struct mbuf * il_segq_top(struct ilpcb * ilpcb)
{
  return (ilpcb->segq);
}

static __inline void il_segq_dequeue(struct ilpcb * ilpcb)
{
  struct mbuf * m = ilpcb->segq;
  ilpcb->segq = m->m_nextpkt;
  m->m_nextpkt = 0;
}

static __inline void il_segq_insert(struct ilpcb * ilpcb, struct mbuf * m, u_long seq, struct ilhdr * il)
{
  u_long pseq;
  struct mbuf * mp, * mq;

  m->m_pkthdr.header = il;

  mp = 0;
  mq = ilpcb->segq;
  while ( mq ) {
    il = mq->m_pkthdr.header;
    pseq = ntohl(*(u_long *)il->ilid);
    if( pseq > seq )
      break;
    if( pseq == seq ) { /* we already got this packet */
      m_freem(m);
      return;
    }
    mp = mq;
    mq = mq->m_nextpkt;
  }

  if( mp == 0 ) {
    m->m_nextpkt = ilpcb->segq;
    ilpcb->segq = m;
    return;
  }
  mp->m_nextpkt = m;
  m->m_nextpkt = mq;
}

void il_init()
{  
  LIST_INIT(&ilb);
  ilbinfo.listhead = &ilb;
  ilbinfo.hashbase = hashinit(ILBHASHSIZE, M_PCB, &ilbinfo.hashmask);
  ilbinfo.porthashbase = hashinit(ILBHASHSIZE, M_PCB,
                                  &ilbinfo.porthashmask);
  ilbinfo.ipi_zone = zinit("ilpcb", sizeof(struct inp_ilpcb), maxsockets,
                           ZONE_INTERRUPT, 0);
}

/* fill in il header and cksum, ip src/dst addresses */
static int il_output(struct ilpcb * ilpcb, struct mbuf *m, int type, u_long seq, u_char spec)
{
  struct ilhdr * il;
  struct ip * ip;
  int illen;
  struct inpcb * inp;
  struct socket * so;

  /* XXX: check total size is less than IP_MAXPACKET */

  if( m == 0 ) {
    inp = ilpcb->inpcb;
    so = inp->inp_socket;
    m = m_copypacket(so->so_snd.sb_mb, M_DONTWAIT);
  } 

  /*
   * Calculate data length and get a mbuf
   * for IL and IP headers.
   */
  illen = m->m_pkthdr.len; /* size of il payload */
  M_PREPEND(m, sizeof(struct ip) + sizeof(struct ilhdr), M_DONTWAIT);
  if( m == 0 )
    return ENOBUFS;

  ip = mtod(m, struct ip *);
  il = (struct ilhdr *) (ip+1);
  bzero(ip, sizeof(*ip));

  ip->ip_p = IPPROTO_IL;
  ip->ip_src = ilpcb->inpcb->inp_laddr;
  ip->ip_dst = ilpcb->inpcb->inp_faddr;
  ip->ip_len = m->m_pkthdr.len;
  ip->ip_ttl = ilpcb->inpcb->inp_ip_ttl;        /* XXX */
  ip->ip_tos = ilpcb->inpcb->inp_ip_tos;        /* XXX */

  *(u_short *)il->illen = htons(illen + sizeof(struct ilhdr));
  il->iltype = type;
  il->ilspec = spec;
  *(u_short *)il->ilsrc = ilpcb->inpcb->inp_lport;
  *(u_short *)il->ildst = ilpcb->inpcb->inp_fport;
  if ( type != ILT_SYNC )
    *(u_long *)il->ilid = htonl(seq);
  else
    *(u_long *)il->ilid = htonl(ilpcb->start);

  if( type != ILT_ACK && type != ILT_STATE) {
    if( ilpcb->rxt_timer == 0 )
      ilpcb->rxt_timer = ilpcb->rxt_timer_cur;
    if( ilpcb->death_timer == 0 )
      ilpcb->death_timer = ilpcb->death_timer_cur;
  }

  *(u_long *)il->ilack = htonl(ilpcb->recvd);
  il->ilsum[0] = il->ilsum[1] = 0;

  /* IL checksum does not cover IP header */
  m->m_data += sizeof(struct ip);
  m->m_len  -= sizeof(struct ip);
  *(u_short *)il->ilsum = in_cksum(m, illen + sizeof(struct ilhdr));
  m->m_data -= sizeof(struct ip);
  m->m_len  += sizeof(struct ip);

  return ip_output(m, ilpcb->inpcb->inp_options, &ilpcb->inpcb->inp_route, 
                   ilpcb->inpcb->inp_socket->so_options & SO_DONTROUTE ,0);
}

static int il_send_empty(struct ilpcb * ilpcb, int type, u_char spec)
{
  struct mbuf * m0;

  MGETHDR(m0, M_DONTWAIT, MT_DATA);
  m0->m_len = 0;
  m0->m_pkthdr.len = 0;
  MH_ALIGN(m0, 0); /* leave space for the packet header */

  return il_output(ilpcb, m0, type, ilpcb->next, spec);
}

static int il_respond(struct ilpcb * ilpcb, struct ip * ip, struct ilhdr *il, int type, u_char spec)
{
  struct mbuf * m;
  int illen;
  struct ip * ip0;
  struct ilhdr *il0;
  struct route * ro;
  struct route sro;

  if( ilpcb ) {
    ro = & ilpcb->inpcb->inp_route;
  } else {
    ro = &sro;
    bzero(ro, sizeof *ro);
  }
              
  MGETHDR(m, M_DONTWAIT, MT_DATA);
  m->m_len = 0;
  m->m_pkthdr.len = 0;
  MH_ALIGN(m, 0); /* leave space for the packet header */
  illen = m->m_pkthdr.len; /* size of il payload */
  M_PREPEND(m, sizeof(struct ip) + sizeof(struct ilhdr), M_DONTWAIT);
  if( m == 0 )
    return ENOBUFS;

  ip0 = mtod(m, struct ip *);
  il0 = (struct ilhdr *) (ip0+1);
  bzero(ip0, sizeof(*ip0));

  ip0->ip_p = IPPROTO_IL;
  ip0->ip_src = ip->ip_dst;
  ip0->ip_dst = ip->ip_src;
  ip0->ip_ttl = ip_defttl;
  ip0->ip_len = sizeof(struct ip) + sizeof(struct ilhdr);
  *(u_short *)il0->illen = htons(illen + sizeof(struct ilhdr));
  il0->iltype = type;
  il0->ilspec = spec;
  bcopy(il->ilsrc, il0->ildst, 2);
  bcopy(il->ildst, il0->ilsrc, 2);
  *(u_long *)il0->ilid = 0;
  bcopy(il->ilid, il0->ilack, 4);
  il0->ilsum[0] = il0->ilsum[1] = 0;

  /* IL checksum does not cover IP header */
  m->m_data += sizeof(struct ip);
  m->m_len  -= sizeof(struct ip);
  *(u_short *)il0->ilsum = in_cksum(m, illen + sizeof(struct ilhdr));
  m->m_data -= sizeof(struct ip);
  m->m_len  += sizeof(struct ip);

  return ip_output(m, 0, ro, 0 ,0);
}

static struct ilpcb *
il_newconn(struct ilpcb * ilpcb, struct in_addr ti_dst, u_short ti_dport,
           struct in_addr ti_src, u_short ti_sport)
{
  register struct ilpcb * ilpcb0;
  struct socket *so2, * so;
  struct inpcb * inp;
  struct sockaddr_in sin;

  so = ilpcb->inpcb->inp_socket;
  so2 = sonewconn(so, 0);
  if (so2 == 0) {
    so2 = sodropablereq(so);
    if (so2) {
      il_drop(sotoilpcb(so2), ETIMEDOUT);
      so2 = sonewconn(so, 0);
    }
    if (!so2)
      return 0;
  }
  so = so2;

  inp = (struct inpcb *)so->so_pcb;
  inp->inp_laddr = ti_dst;
  inp->inp_lport = ti_dport;
  if (in_pcbinshash(inp) != 0) {
                                /*
                                 * Undo the assignments above if we failed to put
                                 * the PCB on the hash lists.
                                 */
    inp->inp_laddr.s_addr = INADDR_ANY;
    inp->inp_lport = 0;

    soabort(so);
    return 0;
  }

  bzero((char *)&sin, sizeof(sin));
  sin.sin_family = AF_INET;
  sin.sin_len = sizeof(sin);
  sin.sin_addr = ti_src;
  sin.sin_port = ti_sport;
  if (in_pcbconnect(inp, (struct sockaddr *)&sin, &proc0)) {
    inp->inp_laddr.s_addr = INADDR_ANY;
    soabort(so);
    return 0;
  }

  ilpcb0 = intoilpcb(inp);
  ilpcb0->state = ILS_LISTENING;

  return ilpcb0;
}

/* ack processing */
static void il_proc_ack(struct ilpcb * ilpcb, struct socket * so, u_long ack)
{
  if( ack >= ilpcb->unacked ) {
    ilpcb->rxt_timer = 0;
    ilpcb->death_timer = 0;

    /* the rxt timer is not prop. to RTT */
    /* reset it so that the first rxt is always 1 second */
    ilpcb->rxt_timer_cur = 2;

    if( ack >= ilpcb->next )
      ack = ilpcb->next - 1;
    while (ilpcb->unacked <= ack ) {
      sbdroprecord(&so->so_snd);
      ilpcb->unacked++;
    }
    if( ilpcb->unacked != ilpcb->next ) {
      ilpcb->rxt_timer = ilpcb->rxt_timer_cur;
      ilpcb->death_timer = ilpcb->death_timer_cur; /* do we need this here? */
    }
    sowwakeup(so);
  }
}

static int il_proc_data(struct ilpcb * ilpcb, struct socket * so, struct mbuf * m, u_long seq, int spec)
{
  struct mbuf * m0;
  struct ip * ip;
  int hlen = sizeof(struct ip) + sizeof(struct ilhdr);
  struct ilhdr * il;
  int needack = 0;

  ip = mtod(m, struct ip *);
  il = (struct ilhdr *)(ip+1);
  if( seq == ilpcb->recvd + 1 ) {
    needack = 1;
    while(1) {
      ilpcb->recvd = seq;
      
      m->m_len -= hlen;
      m->m_pkthdr.len -= hlen;
      m->m_data += hlen;
      sbappendrecord(&so->so_rcv, m);

      if( (m0 = il_segq_top(ilpcb)) == 0 )
        break;
      ip = mtod(m0, struct ip *);
      il = (struct ilhdr *)(ip+1);
      seq = ntohl(*(u_long *)il->ilid);
      if( seq != ilpcb->recvd + 1 )
        break;
      il_segq_dequeue(ilpcb);
      m = m0;
    };      
    sorwakeup(so);
  } else {
    if( seq > ilpcb->recvd ) 
      il_segq_insert(ilpcb, m, seq, il);
    else
      m_freem(m);
  }

  return needack;
}

/* assume we only have one connection */
void il_input(struct mbuf * m, int iphlen)
{
  struct ilhdr * il;
  struct ilpcb * ilpcb = 0;
  int len, type;
  u_long seq, ack;
  struct ip * ip;
  struct inpcb * inp;
  u_short sport, dport;
  struct socket * so;
  u_char spec;

  /*
   * Strip IP options, if any; should skip this,
   * make available to user, and use on returned packets,
   * but we don't yet have a way to check the checksum
   * with options still present.
   */
  if (iphlen > sizeof (struct ip)) {
    ip_stripoptions(m, (struct mbuf *)0);
    iphlen = sizeof(struct ip);
  }

  /*
   * Get IP and IL header together in first mbuf.
   */
  ip = mtod(m, struct ip *);
  if (m->m_len < iphlen + sizeof(struct ilhdr)) {
    if ((m = m_pullup(m, iphlen + sizeof(struct ilhdr))) == 0) {
      return;
    }
    ip = mtod(m, struct ip *);
  }
  il = (struct ilhdr *)((caddr_t)ip + iphlen);

  len = ntohs(*(u_short *)il->illen);
  seq = ntohl(*(u_long *)il->ilid);
  ack = ntohl(*(u_long *)il->ilack);
  sport = *(u_short *)il->ilsrc;
  dport = *(u_short *)il->ildst;  
  type = il->iltype;
  spec = il->ilspec;

  inp = in_pcblookup_hash(&ilbinfo, ip->ip_src, sport, ip->ip_dst, dport, 1);
  if ( inp == 0 && type == ILT_SYNC )
    goto dropwithrest;
  if( inp == 0 )
    goto drop;

  ilpcb = intoilpcb(inp);
  if( ilpcb == 0 )
    goto drop;

  so = inp->inp_socket;
  if( type == ILT_QUERY ) { /* XXX: can we use the same mbuf to send? */
    il_send_empty(ilpcb, ILT_STATE, il->ilspec);
    goto drop;
  }  

 again:
  /* FSM transition */
  switch( ilpcb->state ) {
  case ILS_SYNCER:
    if( ack != ilpcb->start )
      goto drop;
    switch( type ) {
    case ILT_SYNC:
      ilpcb->unacked++;
      ilpcb->recvd = seq;
      il_send_empty(ilpcb, ILT_ACK, 0);
      ilpcb->state = ILS_ESTABLISHED;
      ilpcb->rxt_timer = 0;
      ilpcb->death_timer = 0;
      soisconnected(inp->inp_socket);
      break;
    case ILT_CLOSE:
      il_drop(ilpcb, ECONNREFUSED);
      break;
    }
    break;

  case ILS_LISTENING:
    if( type == ILT_SYNC && ack == 0 && so->so_options & SO_ACCEPTCONN ) {
      ilpcb = il_newconn(ilpcb, ip->ip_dst, dport, ip->ip_src, sport);

      ilpcb->next = ilpcb->start = random();
      ilpcb->unacked = ilpcb->next;
      ilpcb->rstart = ilpcb->recvd = seq;
      ilpcb->state = ILS_SYNCEE;
      il_send_empty(ilpcb, ILT_SYNC, 0);
      ilpcb->next++;
    } else
      il_respond(ilpcb, ip, il, ILT_CLOSE, 0);
    break;

  case ILS_SYNCEE:
    if( ack == ilpcb->start ) {      
      ilpcb->rxt_timer = 0;
      ilpcb->unacked++;
      ilpcb->state = ILS_ESTABLISHED;
      soisconnected(so);
      goto again;
      break;
    }
    if( type == ILT_SYNC && seq == ilpcb->recvd && ack == 0 )
      il_send_empty(ilpcb, ILT_SYNC, 0);
    break;

  case ILS_ESTABLISHED:
    il_proc_ack(ilpcb, so, ack);
    switch( type ) {
    case ILT_DATA:
      if( il_proc_data(ilpcb, so, m, seq, spec) ) 
        ilpcb->flags |= ILF_NEEDACK;
      goto done;
      break;
    case ILT_DATAQUERY:
      il_proc_data(ilpcb, so, m, seq, spec);
      il_send_empty(ilpcb, ILT_STATE, spec);
      goto done;
      break;
    case ILT_CLOSE:
      if( ack < ilpcb->next && ack >= ilpcb->start ) {
        if( ilpcb->recvd+1 == seq )
          ilpcb->recvd = seq;
        il_send_empty(ilpcb, ILT_CLOSE, 0);
        ilpcb->state = ILS_CLOSING;
      }
      break;
    case ILT_STATE:
      if( ack < ilpcb->rxt_max ) {
        ilpcb->rxt_max = ilpcb->next;
        il_output(ilpcb, 0, ILT_DATAQUERY, ilpcb->unacked, 1);
      }
      break;
    case ILT_SYNC:
      il_send_empty(ilpcb, ILT_ACK, 0);
      break;
    }
    break;

  case  ILS_CLOSED:
    goto drop;
    break;

  case ILS_CLOSING:
    if( type == ILT_CLOSE ) {
      if( ilpcb->recvd+1 == seq )
        ilpcb->recvd = seq;
      il_send_empty(ilpcb, ILT_CLOSE, 0);
      ilpcb->state = ILS_CLOSED;
      il_close(ilpcb);
    }
    break;
  }

  m_freem(m);
 done:
  return;

 dropwithrest:
  il_respond(ilpcb, ip, il, ILT_CLOSE, 0);
 drop:
  m_freem(m);
}

static void il_sendseqinit(struct ilpcb * ilpcb)
{
  ilpcb->start = ilpcb->next = random();
  ilpcb->unacked = ilpcb->next;
  ilpcb->state = ILS_SYNCER;
  ilpcb->next++;
}

static void il_rxt_timeout(struct ilpcb * ilpcb)
{
  switch ( ilpcb->state ) {
  case ILS_ESTABLISHED:
    il_output(ilpcb, 0, ILT_DATAQUERY, ilpcb->unacked, 1);
    ilpcb->rxtot++;
    break;
  case ILS_SYNCER:
  case ILS_SYNCEE:
    il_send_empty(ilpcb, ILT_SYNC, 0);
    break;
  case ILS_CLOSING:
    il_send_empty(ilpcb, ILT_CLOSE, 0);
    break;
  }
  ilpcb->rxt_timer = ilpcb->rxt_timer_cur;
}

void il_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{}

int  il_ctloutput(struct socket *so, struct sockopt *sopt)
{ return 0; }

void il_drain()
{}

void il_slowtimo()
{
  struct ilpcb * ilpcb;
  struct inpcb * inp;
  int s;

  s = splnet();
  for(inp = ilb.lh_first; inp; inp = inp->inp_list.le_next) {
    ilpcb = intoilpcb(inp);
    if(ilpcb->death_timer &&  --ilpcb->death_timer == 0 )
      il_drop(ilpcb, ETIMEDOUT);
    
    if(ilpcb->rxt_timer &&  --ilpcb->rxt_timer == 0 ) {
      ilpcb->rxt_timer_cur <<= 1;
      il_rxt_timeout(ilpcb);
    }
  }
  splx(s);
}

void il_fasttimo()
{
  struct ilpcb * ilpcb;
  struct inpcb * inp;
  int s;

  s = splnet();
  for(inp = ilb.lh_first; inp; inp = inp->inp_list.le_next) {
    ilpcb = intoilpcb(inp);
    if(ilpcb->flags & ILF_NEEDACK) {
      ilpcb->flags &= ~ILF_NEEDACK;
      il_send_empty(ilpcb, ILT_ACK, 0);
    }
  }
  splx(s);
}

static struct ilpcb * il_newilpcb(struct inpcb * inp)
{
  struct inp_ilpcb *it;
  register struct ilpcb *ilpcb;
  
  it = (struct inp_ilpcb *)inp;
  ilpcb = &it->ilpcb;
  bzero((char *) ilpcb, sizeof(struct ilpcb));
  
  ilpcb->state = ILS_CLOSED;
  ilpcb->inpcb = inp;
  ilpcb->rxt_timer_cur = 2;
  ilpcb->death_timer_cur = 20;
  
  ilpcb->inpcb = inp;   /* XXX */
  inp->inp_ip_ttl = ip_defttl;
  inp->inp_ppcb = (caddr_t)ilpcb;
  return (ilpcb);               /* XXX */
}

/*
 * Common subroutine to open a TCP connection to remote host specified
 * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
 * port number if needed.  Call in_pcbladdr to do the routing and to choose
 * a local host address (interface).  If there is an existing incarnation
 * of the same connection in TIME-WAIT state and if the remote host was
 * sending CC options and if the connection duration was < MSL, then
 * truncate the previous TIME-WAIT state and proceed.
 * Initialize connection parameters and enter SYN-SENT state.
 */
static int
il_connect(struct ilpcb *ilpcb, struct sockaddr *nam, struct proc *p)
{
        struct inpcb *inp = ilpcb->inpcb, *oinp;
        struct socket *so = inp->inp_socket;
        struct sockaddr_in *sin = (struct sockaddr_in *)nam;
        struct sockaddr_in *ifaddr;
        int error;

        if (inp->inp_lport == 0) {
                error = in_pcbbind(inp, (struct sockaddr *)0, p);
                if (error)
                        return error;
        }

        /*
         * Cannot simply call in_pcbconnect, because there might be an
         * earlier incarnation of this same connection still in
         * TIME_WAIT state, creating an ADDRINUSE error.
         */
        error = in_pcbladdr(inp, nam, &ifaddr);
        if (error)
                return error;
        oinp = in_pcblookup_hash(inp->inp_pcbinfo,
            sin->sin_addr, sin->sin_port,
            inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
                                                : ifaddr->sin_addr,
            inp->inp_lport,  0);
        if (oinp) {
                        return EADDRINUSE;
        }
        if (inp->inp_laddr.s_addr == INADDR_ANY)
                inp->inp_laddr = ifaddr->sin_addr;
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;
        in_pcbrehash(inp);

#if 0
        ilpcb->t_template = tcp_template(tp);
        if (ilpcb->t_template == 0) {
                in_pcbdisconnect(inp);
                return ENOBUFS;
        }
#endif

        soisconnecting(so);
        il_sendseqinit(ilpcb);

        return 0;
}

static int il_usr_send(struct socket *so, int flags, struct mbuf * m, struct sockaddr *addr, struct mbuf *control, struct proc *p)
{
  struct ilpcb * ilpcb;
  struct inpcb * inp = sotoinpcb(so);
  int error;
  struct mbuf * m0;

  if (inp == 0) {
    m_freem(m);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);

  if (sbspace(&so->so_snd) < -512) {
    m_freem(m);
    error = ENOBUFS;
    goto out;
  }

  sbappendrecord(&so->so_snd, m);
  m0 = m_copypacket(m, M_DONTWAIT);
  error = il_output(ilpcb, m0, ILT_DATA, ilpcb->next++, 0); 

 out:
  return error;
}

static int il_usr_attach(struct socket *so, int proto, struct proc *p)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb *ilpcb = 0;

  if (inp) {
    error = EISCONN;
    goto out;
  }
  
  if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    error = soreserve(so, il_sendspace, il_recvspace);
    if (error)
      goto out;
  }

  error = in_pcballoc(so, &ilbinfo, p);

  if (error)
    goto out;

  inp = sotoinpcb(so);
  ilpcb = il_newilpcb(inp);
  if (ilpcb == 0) {
    int nofd = so->so_state & SS_NOFDREF;       /* XXX */
    
    so->so_state &= ~SS_NOFDREF;        /* don't free the socket yet */
    in_pcbdetach(inp);
    so->so_state |= nofd;
    error = ENOBUFS;
    goto out;
  }
  ilpcb->state = ILS_CLOSED;
  ilpcb->segq = 0;
  
 out:
  splx(s);
  return error;

}

static int il_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb *ilpcb;
  struct sockaddr_in *sinp;

  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);

        /*
         * Must check for multicast addresses and disallow binding
         * to them.
         */
  sinp = (struct sockaddr_in *)nam;
  if (sinp->sin_family == AF_INET &&
      IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
    error = EAFNOSUPPORT;
    goto out;
  }
  error = in_pcbbind(inp, nam, p);
 out: splx(s); 
  return error; 
}

/*
 * Initiate connection to peer.
 * Create a template for use in transmissions on this connection.
 * Enter SYN_SENT state, and mark socket as connecting.
 * Start keep-alive timer, and seed output sequence space.
 * Send initial segment on connection.
 */
static int
il_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb *ilpcb;
  struct sockaddr_in *sinp;
  
  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
    
  /*
   * Must disallow TCP ``connections'' to multicast addresses.
   */
  sinp = (struct sockaddr_in *)nam;
  if (sinp->sin_family == AF_INET
      && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
    error = EAFNOSUPPORT;
    goto out;
  }
  
  if ((error = il_connect(ilpcb, nam, p)) != 0)
    goto out;

  error = il_send_empty(ilpcb, ILT_SYNC, 0);

 out: splx(s); 
  return error; 
}

/*
 * Close a TCP control block:
 *      discard all space held by the tcp
 *      discard internet protocol block
 *      wake up any sleepers
 */
static struct ilpcb *
il_close(struct ilpcb *ilpcb)
{
        register struct mbuf *q;
        register struct mbuf *nq;
        struct inpcb *inp = ilpcb->inpcb;
        struct socket *so = inp->inp_socket;

        /* free the reassembly queue, if any */
        for (q = ilpcb->segq; q; q = nq) {
                nq = q->m_nextpkt;
                ilpcb->segq = nq;
                m_freem(q);
        }
        inp->inp_ppcb = NULL;
        soisdisconnected(so);
        in_pcbdetach(inp);
        return ((struct ilpcb *)0);
}

/*
 * User issued close, and wish to trail through shutdown states:
 * if never received SYN, just forget it.  If got a SYN from peer,
 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 * If already got a FIN from peer, then almost done; go to LAST_ACK
 * state.  In all other cases, have already sent FIN to peer (e.g.
 * after PRU_SHUTDOWN), and just have to play tedious game waiting
 * for peer to send FIN or not respond to keep-alives, etc.
 * We can let the user exit from the close as soon as the FIN is acked.
 */
static struct ilpcb *
il_usrclosed(struct ilpcb *ilpcb)
{

        switch (ilpcb->state) {
        case ILS_CLOSED:
        case ILS_LISTENING:
                ilpcb->state = ILS_CLOSED;
                ilpcb = il_close(ilpcb);
                break;

        case ILS_SYNCER:
        case ILS_SYNCEE:
        case ILS_ESTABLISHED:
          il_send_empty(ilpcb, ILT_CLOSE, 0);
          ilpcb->state = ILS_CLOSING;
          break;

        case ILS_CLOSING:
                break;
        }
        return (ilpcb);
}

/*
 * Drop a TCP connection, reporting
 * the specified error.  If connection is synchronized,
 * then send a RST to peer.
 */
struct ilpcb *
il_drop(ilpcb, errno0)
     register struct ilpcb *ilpcb;
     int errno0;
{
  struct socket *so = ilpcb->inpcb->inp_socket;

  panic("il_drop");
  
  switch(ilpcb->state) {
  case ILS_SYNCEE:
  case ILS_ESTABLISHED:
  case ILS_CLOSING:
    il_send_empty(ilpcb, ILT_CLOSE, 0);
  default:
    break;
  }
  ilpcb->state = ILS_CLOSED;
  so->so_error = errno0;
  return (il_close(ilpcb));
}

/*
 * Initiate (or continue) disconnect.
 * If embryonic state, just send reset (once).
 * If in ``let data drain'' option and linger null, just drop.
 * Otherwise (hard), mark socket disconnecting and drop
 * current input data; switch states based on user close, and
 * send segment to peer (with FIN).
 */
static struct ilpcb *
il_disconnect(struct ilpcb *ilpcb)
{
  struct socket *so = ilpcb->inpcb->inp_socket;
  
  soisdisconnecting(so);
  sbflush(&so->so_rcv);
  ilpcb = il_usrclosed(ilpcb);
  
  return (ilpcb);
}


/*
 * pru_detach() detaches the IL protocol from the socket.
 * If the protocol state is non-embryonic, then can't
 * do this directly: have to initiate a pru_disconnect(),
 * which may finish later; embryonic TCB's can just
 * be discarded here.
 */
static int
il_usr_detach(struct socket *so)
{
        int s = splnet();
        int error = 0;
        struct inpcb *inp = sotoinpcb(so);
        struct ilpcb *ilpcb;

        if (inp == 0) {
                splx(s);
                return EINVAL;  /* XXX */
        }
        ilpcb = intoilpcb(inp);
        ilpcb = il_disconnect(ilpcb);
        splx(s);
        return error;
}

/*
 * Mark the connection as being incapable of further output.
 */
static int
il_usr_shutdown(struct socket *so)
{
        int s = splnet();
        int error = 0;
        struct inpcb *inp = sotoinpcb(so);
        struct ilpcb *ilpcb;

  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
  
  socantsendmore(so);
  ilpcb = il_usrclosed(ilpcb);
  splx(s); 
  return error;
}

/*
 * Initiate disconnect from peer.
 * If connection never passed embryonic stage, just drop;
 * else if don't need to let data drain, then can just drop anyways,
 * else have to begin TCP shutdown process: mark socket disconnecting,
 * drain unread data, state switch to reflect user close, and
 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
 * when peer sends FIN and acks ours.
 *
 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
 */
static int
il_usr_disconnect(struct socket *so)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb * ilpcb;

  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
    
  il_disconnect(ilpcb);
  splx(s); 
  return error;
}

/*
 * Abort the TCP.
 */
static int
il_usr_abort(struct socket *so)
{
        int s = splnet();
        int error = 0;
        struct inpcb *inp = sotoinpcb(so);
        struct ilpcb * ilpcb;

  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
    
  ilpcb = il_drop(ilpcb, ECONNABORTED);
  splx(s); 
  return error;

}

/*
 * Prepare to accept connections.
 */
static int
il_usr_listen(struct socket *so, struct proc *p)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb *ilpcb;
  
  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
  
  if (inp->inp_lport == 0)
    error = in_pcbbind(inp, (struct sockaddr *)0, p);
  if (error == 0)
    ilpcb->state = ILS_LISTENING;

  splx(s); 
  return error;
}

/*
 * Accept a connection.  Essentially all the work is
 * done at higher levels; just return the address
 * of the peer, storing through addr.
 */
static int
il_usr_accept(struct socket *so, struct sockaddr **nam)
{
  int s = splnet();
  int error = 0;
  struct inpcb *inp = sotoinpcb(so);
  struct ilpcb * ilpcb;
  
  if (inp == 0) {
    splx(s);
    return EINVAL;
  }
  ilpcb = intoilpcb(inp);
  
  in_setpeeraddr(so, nam);
  splx(s); 
  return error;
}

/* xxx - should be const */
struct pr_usrreqs il_usrreqs = {
        il_usr_abort, il_usr_accept, il_usr_attach, il_usr_bind,
        il_usr_connect, pru_connect2_notsupp, in_control, il_usr_detach,
        il_usr_disconnect, il_usr_listen, in_setpeeraddr, pru_rcvd_notsupp,
        pru_rcvoob_notsupp, il_usr_send, pru_sense_null, il_usr_shutdown,
        in_setsockaddr, sosend, soreceive, sopoll
};