net/core/sock.c
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Generic socket support routines. Memory allocators, socket lock/release
* handler for protocols to use and generic option handler.
*
*
* Version: @(#)sock.c 1.0.17 06/02/93
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
* Alan Cox, <A.Cox@swansea.ac.uk>
*
* Fixes:
* Alan Cox : Numerous verify_area() problems
* Alan Cox : Connecting on a connecting socket
* now returns an error for tcp.
* Alan Cox : sock->protocol is set correctly.
* and is not sometimes left as 0.
* Alan Cox : connect handles icmp errors on a
* connect properly. Unfortunately there
* is a restart syscall nasty there. I
* can't match BSD without hacking the C
* library. Ideas urgently sought!
* Alan Cox : Disallow bind() to addresses that are
* not ours - especially broadcast ones!!
* Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
* Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
* instead they leave that for the DESTROY timer.
* Alan Cox : Clean up error flag in accept
* Alan Cox : TCP ack handling is buggy, the DESTROY timer
* was buggy. Put a remove_sock() in the handler
* for memory when we hit 0. Also altered the timer
* code. The ACK stuff can wait and needs major
* TCP layer surgery.
* Alan Cox : Fixed TCP ack bug, removed remove sock
* and fixed timer/inet_bh race.
* Alan Cox : Added zapped flag for TCP
* Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
* Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
* Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
* Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
* Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
* Rick Sladkey : Relaxed UDP rules for matching packets.
* C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
* Pauline Middelink : identd support
* Alan Cox : Fixed connect() taking signals I think.
* Alan Cox : SO_LINGER supported
* Alan Cox : Error reporting fixes
* Anonymous : inet_create tidied up (sk->reuse setting)
* Alan Cox : inet sockets don't set sk->type!
* Alan Cox : Split socket option code
* Alan Cox : Callbacks
* Alan Cox : Nagle flag for Charles & Johannes stuff
* Alex : Removed restriction on inet fioctl
* Alan Cox : Splitting INET from NET core
* Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
* Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
* Alan Cox : Split IP from generic code
* Alan Cox : New kfree_skbmem()
* Alan Cox : Make SO_DEBUG superuser only.
* Alan Cox : Allow anyone to clear SO_DEBUG
* (compatibility fix)
* Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
* Alan Cox : Allocator for a socket is settable.
* Alan Cox : SO_ERROR includes soft errors.
* Alan Cox : Allow NULL arguments on some SO_ opts
* Alan Cox : Generic socket allocation to make hooks
* easier (suggested by Craig Metz).
*
* To Fix:
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <asm/segment.h>
#include <asm/system.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/rarp.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
#define min(a,b) ((a)<(b)?(a):(b))
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
*/
int sock_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
int val;
int valbool;
int err;
struct linger ling;
/*
* Options without arguments
*/
#ifdef SO_DONTLINGER /* Compatibility item... */
switch(optname)
{
case SO_DONTLINGER:
sk->linger=0;
return 0;
}
#endif
if (optval == NULL)
return(-EINVAL);
err=verify_area(VERIFY_READ, optval, sizeof(int));
if(err)
return err;
val = get_user((int *)optval);
valbool = val?1:0;
switch(optname)
{
case SO_DEBUG:
if(val && !suser())
return(-EPERM);
sk->debug=valbool;
return 0;
case SO_REUSEADDR:
sk->reuse = valbool;
return(0);
case SO_TYPE:
case SO_ERROR:
return(-ENOPROTOOPT);
case SO_DONTROUTE:
sk->localroute=valbool;
return 0;
case SO_BROADCAST:
sk->broadcast=valbool;
return 0;
case SO_SNDBUF:
if(val > SK_WMEM_MAX*2)
val = SK_WMEM_MAX*2;
if(val < 256)
val = 256;
if(val > 65535)
val = 65535;
sk->sndbuf = val;
return 0;
case SO_RCVBUF:
if(val > SK_RMEM_MAX*2)
val = SK_RMEM_MAX*2;
if(val < 256)
val = 256;
sk->rcvbuf = val;
return(0);
case SO_KEEPALIVE:
sk->keepopen = valbool;
return(0);
case SO_OOBINLINE:
sk->urginline = valbool;
return(0);
case SO_NO_CHECK:
sk->no_check = valbool;
return(0);
case SO_PRIORITY:
if (val >= 0 && val < DEV_NUMBUFFS)
{
sk->priority = val;
}
else
{
return(-EINVAL);
}
return(0);
case SO_LINGER:
err=verify_area(VERIFY_READ,optval,sizeof(ling));
if(err)
return err;
memcpy_fromfs(&ling,optval,sizeof(ling));
if(ling.l_onoff==0)
sk->linger=0;
else
{
sk->lingertime=ling.l_linger;
sk->linger=1;
}
return 0;
case SO_BSDCOMPAT:
sk->bsdism = valbool;
return 0;
default:
return(-ENOPROTOOPT);
}
}
int sock_getsockopt(struct sock *sk, int level, int optname,
char *optval, int *optlen)
{
int val;
int err;
struct linger ling;
switch(optname)
{
case SO_DEBUG:
val = sk->debug;
break;
case SO_DONTROUTE:
val = sk->localroute;
break;
case SO_BROADCAST:
val= sk->broadcast;
break;
case SO_SNDBUF:
val=sk->sndbuf;
break;
case SO_RCVBUF:
val =sk->rcvbuf;
break;
case SO_REUSEADDR:
val = sk->reuse;
break;
case SO_KEEPALIVE:
val = sk->keepopen;
break;
case SO_TYPE:
val = sk->type;
break;
case SO_ERROR:
val = sock_error(sk);
if(val==0)
val=xchg(&sk->err_soft,0);
break;
case SO_OOBINLINE:
val = sk->urginline;
break;
case SO_NO_CHECK:
val = sk->no_check;
break;
case SO_PRIORITY:
val = sk->priority;
break;
case SO_LINGER:
err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
if(err)
return err;
err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
if(err)
return err;
put_fs_long(sizeof(ling),(unsigned long *)optlen);
ling.l_onoff=sk->linger;
ling.l_linger=sk->lingertime;
memcpy_tofs(optval,&ling,sizeof(ling));
return 0;
case SO_BSDCOMPAT:
val = sk->bsdism;
break;
default:
return(-ENOPROTOOPT);
}
err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
if(err)
return err;
put_fs_long(sizeof(int),(unsigned long *) optlen);
err=verify_area(VERIFY_WRITE, optval, sizeof(int));
if(err)
return err;
put_fs_long(val,(unsigned long *)optval);
return(0);
}
struct sock *sk_alloc(int priority)
{
struct sock *sk=(struct sock *)kmalloc(sizeof(*sk), priority);
if(!sk)
return NULL;
memset(sk, 0, sizeof(*sk));
return sk;
}
void sk_free(struct sock *sk)
{
kfree_s(sk,sizeof(*sk));
}
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
{
if (sk) {
if (force || sk->wmem_alloc + size < sk->sndbuf) {
struct sk_buff * skb = alloc_skb(size, priority);
if (skb)
atomic_add(skb->truesize, &sk->wmem_alloc);
return skb;
}
return NULL;
}
return alloc_skb(size, priority);
}
struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
{
if (sk) {
if (force || sk->rmem_alloc + size < sk->rcvbuf) {
struct sk_buff *skb = alloc_skb(size, priority);
if (skb)
atomic_add(skb->truesize, &sk->rmem_alloc);
return skb;
}
return NULL;
}
return alloc_skb(size, priority);
}
unsigned long sock_rspace(struct sock *sk)
{
int amt;
if (sk != NULL)
{
if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW)
return(0);
amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
if (amt < 0)
return(0);
return(amt);
}
return(0);
}
unsigned long sock_wspace(struct sock *sk)
{
if (sk != NULL)
{
if (sk->shutdown & SEND_SHUTDOWN)
return(0);
if (sk->wmem_alloc >= sk->sndbuf)
return(0);
return sk->sndbuf - sk->wmem_alloc;
}
return(0);
}
void sock_wfree(struct sock *sk, struct sk_buff *skb)
{
int s=skb->truesize;
#if CONFIG_SKB_CHECK
IS_SKB(skb);
#endif
kfree_skbmem(skb);
if (sk)
{
/* In case it might be waiting for more memory. */
sk->write_space(sk);
atomic_sub(s, &sk->wmem_alloc);
}
}
void sock_rfree(struct sock *sk, struct sk_buff *skb)
{
int s=skb->truesize;
#if CONFIG_SKB_CHECK
IS_SKB(skb);
#endif
kfree_skbmem(skb);
if (sk)
{
atomic_sub(s, &sk->rmem_alloc);
}
}
/*
* Generic send/receive buffer handlers
*/
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
{
struct sk_buff *skb;
int err;
do
{
if(sk->err!=0)
{
cli();
err= -sk->err;
sk->err=0;
sti();
*errcode=err;
return NULL;
}
if(sk->shutdown&SEND_SHUTDOWN)
{
*errcode=-EPIPE;
return NULL;
}
if(!fallback)
skb = sock_wmalloc(sk, size, 0, sk->allocation);
else
{
/* The buffer get won't block, or use the atomic queue. It does
produce annoying no free page messages still.... */
skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
if(!skb)
skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
}
/*
* This means we have too many buffers for this socket already.
*/
if(skb==NULL)
{
unsigned long tmp;
sk->socket->flags |= SO_NOSPACE;
if(noblock)
{
*errcode=-EAGAIN;
return NULL;
}
if(sk->shutdown&SEND_SHUTDOWN)
{
*errcode=-EPIPE;
return NULL;
}
tmp = sk->wmem_alloc;
cli();
if(sk->shutdown&SEND_SHUTDOWN)
{
sti();
*errcode=-EPIPE;
return NULL;
}
#if 1
if( tmp <= sk->wmem_alloc)
#else
/* ANK: Line above seems either incorrect
* or useless. sk->wmem_alloc has a tiny chance to change
* between tmp = sk->w... and cli(),
* but it might(?) change earlier. In real life
* it does not (I never seen the message).
* In any case I'd delete this check at all, or
* change it to:
*/
if (sk->wmem_alloc + size >= sk->sndbuf)
#endif
{
sk->socket->flags &= ~SO_NOSPACE;
interruptible_sleep_on(sk->sleep);
if (current->signal & ~current->blocked)
{
sti();
*errcode = -ERESTARTSYS;
return NULL;
}
}
sti();
}
}
while(skb==NULL);
return skb;
}
void __release_sock(struct sock *sk)
{
#ifdef CONFIG_INET
if (!sk->prot || !sk->prot->rcv)
return;
/* See if we have any packets built up. */
start_bh_atomic();
while (!skb_queue_empty(&sk->back_log)) {
struct sk_buff * skb = sk->back_log.next;
__skb_unlink(skb, &sk->back_log);
sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
skb->saddr, skb->len, skb->daddr, 1,
/* Only used for/by raw sockets. */
(struct inet_protocol *)sk->pair);
}
end_bh_atomic();
#endif
}