diff options
author | tuexen <tuexen@9df1edf5-d72c-5b5f-11c0-5f5209eb73f7> | 2011-11-01 23:02:02 +0000 |
---|---|---|
committer | tuexen <tuexen@9df1edf5-d72c-5b5f-11c0-5f5209eb73f7> | 2011-11-01 23:02:02 +0000 |
commit | af04b47a7f4dc9983eff3a9a195dbb0a03a47ff9 (patch) | |
tree | 7fa91719670419014e4b81a17094fd62833563c9 | |
parent | f45025ce8d63ddb09b11d6adde9be6cb083a2611 (diff) | |
download | usrsctp-af04b47a7f4dc9983eff3a9a195dbb0a03a47ff9.tar.gz |
Add files.
-rw-r--r-- | usrsctplib/Makefile.am | 51 | ||||
-rwxr-xr-x | usrsctplib/opt_compat.h | 3 | ||||
-rwxr-xr-x | usrsctplib/opt_inet.h | 1 | ||||
-rwxr-xr-x | usrsctplib/opt_inet6.h | 1 | ||||
-rwxr-xr-x | usrsctplib/opt_ipsec.h | 0 | ||||
-rwxr-xr-x | usrsctplib/opt_sctp.h | 23 | ||||
-rwxr-xr-x | usrsctplib/user_atomic.h | 245 | ||||
-rwxr-xr-x | usrsctplib/user_environment.c | 64 | ||||
-rwxr-xr-x | usrsctplib/user_environment.h | 84 | ||||
-rwxr-xr-x | usrsctplib/user_inpcb.h | 407 | ||||
-rwxr-xr-x | usrsctplib/user_ip6_var.h | 18 | ||||
-rwxr-xr-x | usrsctplib/user_malloc.h | 260 | ||||
-rwxr-xr-x | usrsctplib/user_mbuf.c | 1245 | ||||
-rwxr-xr-x | usrsctplib/user_mbuf.h | 444 | ||||
-rwxr-xr-x | usrsctplib/user_radix.h | 167 | ||||
-rwxr-xr-x | usrsctplib/user_recv_thread.c | 382 | ||||
-rwxr-xr-x | usrsctplib/user_recv_thread.h | 17 | ||||
-rwxr-xr-x | usrsctplib/user_resourcevar.h | 8 | ||||
-rwxr-xr-x | usrsctplib/user_route.h | 366 | ||||
-rwxr-xr-x | usrsctplib/user_sctp_callout.c | 78 | ||||
-rwxr-xr-x | usrsctplib/user_sctp_callout.h | 65 | ||||
-rwxr-xr-x | usrsctplib/user_sctp_timer_iterate.c | 111 | ||||
-rwxr-xr-x | usrsctplib/user_socket.c | 2527 | ||||
-rwxr-xr-x | usrsctplib/user_socketvar.h | 834 | ||||
-rwxr-xr-x | usrsctplib/user_uma.h | 67 |
25 files changed, 7468 insertions, 0 deletions
diff --git a/usrsctplib/Makefile.am b/usrsctplib/Makefile.am new file mode 100644 index 00000000..4fb3b0f4 --- /dev/null +++ b/usrsctplib/Makefile.am @@ -0,0 +1,51 @@ +lib_LTLIBRARIES = libusrsctp.la +libusrsctp_la_SOURCES = opt_compat.h \ + opt_inet6.h \ + opt_ipsec.h \ + opt_sctp.h \ + user_atomic.h \ + user_environment.c user_environment.h \ + user_inpcb.h \ + user_ip6_var.h \ + user_malloc.h \ + user_mbuf.c \ + user_mbuf.h \ + user_radix.h \ + user_recv_thread.c user_recv_thread.h \ + user_resourcevar.h \ + user_route.h \ + user_sctp_callout.c user_sctp_callout.h \ + user_sctp_timer_iterate.c \ + user_socket.c \ + user_socketvar.h \ + user_uma.h \ + netinet/sctp.h \ + netinet/sctp_asconf.c netinet/sctp_asconf.h \ + netinet/sctp_auth.c netinet/sctp_auth.h \ + netinet/sctp_bsd_addr.c netinet/sctp_bsd_addr.h \ + netinet/sctp_callout.c netinet/sctp_callout.h \ + netinet/sctp_cc_functions.c \ + netinet/sctp_constants.h \ + netinet/sctp_crc32.c netinet/sctp_crc32.h \ + netinet/sctp_dtrace_declare.h \ + netinet/sctp_dtrace_define.h \ + netinet/sctp_hashdriver.c netinet/sctp_hashdriver.h \ + netinet/sctp_header.h \ + netinet/sctp_indata.c netinet/sctp_indata.h \ + netinet/sctp_input.c netinet/sctp_input.h \ + netinet/sctp_lock_userspace.h \ + netinet/sctp_os.h \ + netinet/sctp_os_userspace.h \ + netinet/sctp_output.c netinet/sctp_output.h \ + netinet/sctp_pcb.c netinet/sctp_pcb.h \ + netinet/sctp_peeloff.c netinet/sctp_peeloff.h \ + netinet/sctp_process_lock.h \ + netinet/sctp_sha1.c netinet/sctp_sha1.h \ + netinet/sctp_ss_functions.c \ + netinet/sctp_structs.h \ + netinet/sctp_sysctl.c netinet/sctp_sysctl.h \ + netinet/sctp_timer.c netinet/sctp_timer.h \ + netinet/sctp_uio.h \ + netinet/sctp_usrreq.c \ + netinet/sctp_var.h \ + netinet/sctputil.c netinet/sctputil.h diff --git a/usrsctplib/opt_compat.h b/usrsctplib/opt_compat.h new file mode 100755 index 00000000..e6910362 --- /dev/null +++ b/usrsctplib/opt_compat.h @@ -0,0 +1,3 @@ +#define COMPAT_43 1 +#define COMPAT_FREEBSD5 1 +#define COMPAT_FREEBSD4 1 diff --git a/usrsctplib/opt_inet.h b/usrsctplib/opt_inet.h new file mode 100755 index 00000000..fdf70095 --- /dev/null +++ b/usrsctplib/opt_inet.h @@ -0,0 +1 @@ +#define INET 1 diff --git a/usrsctplib/opt_inet6.h b/usrsctplib/opt_inet6.h new file mode 100755 index 00000000..d2485592 --- /dev/null +++ b/usrsctplib/opt_inet6.h @@ -0,0 +1 @@ +/*#define INET6 0*/ diff --git a/usrsctplib/opt_ipsec.h b/usrsctplib/opt_ipsec.h new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/usrsctplib/opt_ipsec.h diff --git a/usrsctplib/opt_sctp.h b/usrsctplib/opt_sctp.h new file mode 100755 index 00000000..02ca0b7c --- /dev/null +++ b/usrsctplib/opt_sctp.h @@ -0,0 +1,23 @@ +#define SCTP 1 +#define SCTP_DEBUG 1 + +/* SCTP_PROCESS_LEVEL_LOCKS uses sctp_process_lock.h within sctp_pcb.h + * otherwise if undefined (i.e. below is commented out), we will use + * sctp_lock_userspace.h . + */ +#define SCTP_PROCESS_LEVEL_LOCKS 1 +//#define SCTP_PER_SOCKET_LOCKING 1 + +/* uncomment the below in order to make the CRC32c disabled */ +/*#define SCTP_WITH_NO_CSUM 1*/ + +/* forces routes to have MTU 1500. user mbuf implementation doesn't have + * efficient jumbo support yet. + */ +#define SCTP_USERSPACE_ROUTE_USE_MTU_1500 1 + + +/* makes use of the send callback only at a threshold if 1, and whenever the callback + * is not NULL if 0. + */ +#define SCTP_USERSPACE_SEND_CALLBACK_USE_THRESHOLD 0 diff --git a/usrsctplib/user_atomic.h b/usrsctplib/user_atomic.h new file mode 100755 index 00000000..6e6ef08a --- /dev/null +++ b/usrsctplib/user_atomic.h @@ -0,0 +1,245 @@ +#ifndef _USER_ATOMIC_H_ +#define _USER_ATOMIC_H_ + +/* __Userspace__ version of sys/i386/include/atomic.h goes here */ + +/* TODO In the future, might want to not use i386 specific assembly. + * The options include: + * - implement them generically (but maybe not truly atomic?) in userspace + * - have ifdef's for __Userspace_arch_ perhaps (OS isn't enough...) + */ + +#include <stdio.h> +#include <sys/types.h> + +#if defined(__Userspace_os_Darwin) +#include <libkern/OSAtomic.h> +#define atomic_add_int(addr, val) OSAtomicAdd32Barrier(val, (int32_t *)addr) +#define atomic_fetchadd_int(addr, val) OSAtomicAdd32Barrier(val, (int32_t *)addr) +#define atomic_subtract_int(addr, val) OSAtomicAdd32Barrier(-val, (int32_t *)addr) +#define atomic_cmpset_int(dst, exp, src) OSAtomicCompareAndSwapIntBarrier(exp, src, (int *)dst) + +#define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 0) +#if defined(INVARIANTS) +#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ +{ \ + int32_t newval; \ + newval = atomic_fetchadd_int(addr, -val); \ + if (newval < 0) { \ + panic("Counter goes negative"); \ + } \ +} +#else +#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ +{ \ + int32_t newval; \ + newval = atomic_fetchadd_int(addr, -val); \ + if (newval < 0) { \ + *addr = 0; \ + } \ +} +static inline void atomic_init() {} /* empty when we are not using atomic_mtx */ +#endif + +#else +/* Using gcc built-in functions for atomic memory operations + Reference: http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + Requires gcc version 4.1.0 + compile with -march=i486 + */ + +/*Atomically add V to *P.*/ +#define atomic_add_int(P, V) (void) __sync_fetch_and_add(P, V) + +/*Atomically subtrace V from *P.*/ +#define atomic_subtract_int(P, V) (void) __sync_fetch_and_sub(P, V) + +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +#define atomic_fetchadd_int(p, v) __sync_fetch_and_add(p, v) + +/* Following explanation from src/sys/i386/include/atomic.h, + * for atomic compare and set + * + * if (*dst == exp) *dst = src (all 32 bit words) + * + * Returns 0 on failure, non-zero on success + */ + +#define atomic_cmpset_int(dst, exp, src) __sync_bool_compare_and_swap(dst, exp, src) + +#define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1) +#if defined(INVARIANTS) +#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ +{ \ + int32_t oldval; \ + oldval = atomic_fetchadd_int(addr, -val); \ + if (oldval < val) { \ + panic("Counter goes negative"); \ + } \ +} +#else +#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ +{ \ + int32_t oldval; \ + oldval = atomic_fetchadd_int(addr, -val); \ + if (oldval < val) { \ + *addr = 0; \ + } \ +} +#endif +static inline void atomic_init() {} /* empty when we are not using atomic_mtx */ +#endif + +#if 0 /* using libatomic_ops */ +#include "user_include/atomic_ops.h" + +/*Atomically add incr to *P, and return the original value of *P.*/ +#define atomic_add_int(P, V) AO_fetch_and_add((AO_t*)P, V) + +#define atomic_subtract_int(P, V) AO_fetch_and_add((AO_t*)P, -(V)) + +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +#define atomic_fetchadd_int(p, v) AO_fetch_and_add((AO_t*)p, v) + +/* Atomically compare *addr to old_val, and replace *addr by new_val + if the first comparison succeeds. Returns nonzero if the comparison + succeeded and *addr was updated. +*/ +/* Following Explanation from src/sys/i386/include/atomic.h, which + matches that of AO_compare_and_swap above. + * Atomic compare and set, used by the mutex functions + * + * if (*dst == exp) *dst = src (all 32 bit words) + * + * Returns 0 on failure, non-zero on success + */ + +#define atomic_cmpset_int(dst, exp, src) AO_compare_and_swap((AO_t*)dst, exp, src) + +static inline void atomic_init() {} /* empty when we are not using atomic_mtx */ +#endif /* closing #if for libatomic */ + +#if 0 /* using atomic_mtx */ + +#include <pthread.h> + + +extern pthread_mutex_t atomic_mtx; +static inline void atomic_init() { + (void)pthread_mutex_init(&atomic_mtx, NULL); +} +static inline void atomic_destroy() { + (void)pthread_mutex_destroy(&atomic_mtx); +} +static inline void atomic_lock() { + (void)pthread_mutex_lock(&atomic_mtx); +} +static inline void atomic_unlock() { + (void)pthread_mutex_unlock(&atomic_mtx); +} + +/* + * For userland, always use lock prefixes so that the binaries will run + * on both SMP and !SMP systems. + */ + +#define MPLOCKED "lock ; " + + +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_int +atomic_fetchadd_int(volatile void *n, u_int v) +{ + int *p = (int *) n; + atomic_lock(); + __asm __volatile( + " " MPLOCKED " " + " xaddl %0, %1 ; " + "# atomic_fetchadd_int" + : "+r" (v), /* 0 (result) */ + "=m" (*p) /* 1 */ + : "m" (*p)); /* 2 */ + atomic_unlock(); + + return (v); +} + + +#ifdef CPU_DISABLE_CMPXCHG + +static __inline int +atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) +{ + u_char res; + + atomic_lock(); + __asm __volatile( + " pushfl ; " + " cli ; " + " cmpl %3,%4 ; " + " jne 1f ; " + " movl %2,%1 ; " + "1: " + " sete %0 ; " + " popfl ; " + "# atomic_cmpset_int" + : "=q" (res), /* 0 */ + "=m" (*dst) /* 1 */ + : "r" (src), /* 2 */ + "r" (exp), /* 3 */ + "m" (*dst) /* 4 */ + : "memory"); + atomic_unlock(); + + return (res); +} + +#else /* !CPU_DISABLE_CMPXCHG */ + +static __inline int +atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) +{ + atomic_lock(); + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " cmpxchgl %2,%1 ; " + " sete %0 ; " + "1: " + "# atomic_cmpset_int" + : "=a" (res), /* 0 */ + "=m" (*dst) /* 1 */ + : "r" (src), /* 2 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ + : "memory"); + atomic_unlock(); + + return (res); +} + +#endif /* CPU_DISABLE_CMPXCHG */ + +#define atomic_add_int(P, V) do { \ + atomic_lock(); \ + (*(u_int *)(P) += (V)); \ + atomic_unlock(); \ +} while(0) +#define atomic_subtract_int(P, V) do { \ + atomic_lock(); \ + (*(u_int *)(P) -= (V)); \ + atomic_unlock(); \ +} while(0) + +#endif +#endif diff --git a/usrsctplib/user_environment.c b/usrsctplib/user_environment.c new file mode 100755 index 00000000..f7202ef2 --- /dev/null +++ b/usrsctplib/user_environment.c @@ -0,0 +1,64 @@ +/* __Userspace__ */ + +#include <stdlib.h> +#include <stdint.h> +#include <user_environment.h> +#include <sys/types.h> +#include <sys/sysctl.h> +/* #include <sys/param.h> defines MIN */ +#if !defined(MIN) +#define MIN(arg1,arg2) ((arg1) < (arg2) ? (arg1) : (arg2)) +#endif +#include <string.h> + +#define uHZ 1000 + +/* See user_include/user_environment.h for comments about these variables */ +int maxsockets = 25600; +int hz = uHZ; +int ip_defttl = 64; +int ipport_firstauto = 49152, ipport_lastauto = 65535; +int nmbclusters = 65536; + +/* Source ip_output.c. extern'd in ip_var.h */ +u_short ip_id = 0; /*__Userspace__ TODO Should it be initialized to zero? */ + +/* used in user_include/user_atomic.h in order to make the operations + * defined there truly atomic + */ +pthread_mutex_t atomic_mtx; + +/* Source: /usr/src/sys/dev/random/harvest.c */ +static int read_random_phony(void *, int); + +static int (*read_func)(void *, int) = read_random_phony; + +/* Userland-visible version of read_random */ +int +read_random(void *buf, int count) +{ + return ((*read_func)(buf, count)); +} + +/* If the entropy device is not loaded, make a token effort to + * provide _some_ kind of randomness. This should only be used + * inside other RNG's, like arc4random(9). + */ +static int +read_random_phony(void *buf, int count) +{ + uint32_t randval; + int size, i; + + /* srandom() is called in kern/init_main.c:proc0_post() */ + + /* Fill buf[] with random(9) output */ + for (i = 0; i < count; i+= (int)sizeof(uint32_t)) { + randval = random(); + size = MIN(count - i, sizeof(uint32_t)); + memcpy(&((char *)buf)[i], &randval, (size_t)size); + } + + return (count); +} + diff --git a/usrsctplib/user_environment.h b/usrsctplib/user_environment.h new file mode 100755 index 00000000..33cb01d6 --- /dev/null +++ b/usrsctplib/user_environment.h @@ -0,0 +1,84 @@ +#ifndef _USER_ENVIRONMENT_H_ +#define _USER_ENVIRONMENT_H_ +/* __Userspace__ */ +#include <sys/types.h> + +#ifdef __Userspace_os_FreeBSD +#ifndef _SYS_MUTEX_H_ +#include <sys/mutex.h> +#endif +#endif + +/* maxsockets is used in SCTP_ZONE_INIT call. It refers to + * kern.ipc.maxsockets kernel environment variable. + */ +extern int maxsockets; + +/* int hz; is declared in sys/kern/subr_param.c and refers to kernel timer frequency. + * See http://ivoras.sharanet.org/freebsd/vmware.html for additional info about kern.hz + * hz is initialized in void init_param1(void) in that file. + */ +extern int hz; + + +/* The following two ints define a range of available ephermal ports. */ +extern int ipport_firstauto, ipport_lastauto; + +/* nmbclusters is used in sctp_usrreq.c (e.g., sctp_init). In the FreeBSD kernel, + * this is 1024 + maxusers * 64. + */ +extern int nmbclusters; + + +/* __Userspace__ Are min, max defined in some header file? */ +#define min(a,b) ((a)>(b)?(b):(a)) +#define max(a,b) ((a)>(b)?(a):(b)) + + +extern int read_random(void *buf, int count); + +/* errno's may differ per OS. errno.h now included in sctp_os_userspace.h */ +/* Source: /usr/src/sys/sys/errno.h */ +/* #define ENOSPC 28 */ /* No space left on device */ +/* #define ENOBUFS 55 */ /* No buffer space available */ +/* #define ENOMEM 12 */ /* Cannot allocate memory */ +/* #define EACCES 13 */ /* Permission denied */ +/* #define EFAULT 14 */ /* Bad address */ +/* #define EHOSTDOWN 64 */ /* Host is down */ +/* #define EHOSTUNREACH 65 */ /* No route to host */ + +/* Source ip_output.c. extern'd in ip_var.h */ +extern u_short ip_id; + +#if defined(__Userspace_os_Linux) +#define IPV6_VERSION 0x60 +#endif + + +/* kernel stuff */ +#include <assert.h> +#define KASSERT(exp,msg) assert(exp) + +#define panic(arg1) do { \ + perror(arg1); \ + exit(1); \ +} while (0) +/* necessary for sctp_pcb.c */ +extern int ip_defttl; + + +/* dummy definitions used (temporarily?) for inpcb userspace port */ +#define mtx_lock(arg1) +#define mtx_unlock(arg1) +#define mtx_assert(arg1,arg2) +#define MA_OWNED 7 /* sys/mutex.h typically on FreeBSD */ +#if !defined(__Userspace_os_FreeBSD) +struct mtx {}; +struct selinfo {}; +struct sx {}; +#endif + +/* called in sctp_usrreq.c */ +#define in6_sin_2_v4mapsin6(arg1, arg2) /* STUB */ + +#endif diff --git a/usrsctplib/user_inpcb.h b/usrsctplib/user_inpcb.h new file mode 100755 index 00000000..c1f3018e --- /dev/null +++ b/usrsctplib/user_inpcb.h @@ -0,0 +1,407 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_pcb.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.100.2.1 2007/12/07 05:46:08 kmacy Exp $ + */ + +#ifndef _NETINET_IN_PCB_H_ +#define _NETINET_IN_PCB_H_ + +#include <sys/queue.h> +/* #include <sys/_lock.h> was a 0 byte file */ +/* #include <sys/_mutex.h> was a 0 byte file */ + +#include <user_route.h> /* was <net/route.h> */ + +#define in6pcb inpcb /* for KAME src sync over BSD*'s */ +#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ +struct inpcbpolicy; + +/* + * Struct inpcb is the ommon structure pcb for the Internet Protocol + * implementation. + * + * Pointers to local and foreign host table entries, local and foreign socket + * numbers, and pointers up (to a socket structure) and down (to a + * protocol-specific control block) are stored here. + */ +LIST_HEAD(inpcbhead, inpcb); +LIST_HEAD(inpcbporthead, inpcbport); +typedef u_quad_t inp_gen_t; + +/* + * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. + * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing + * the following structure. + */ +struct in_addr_4in6 { + u_int32_t ia46_pad32[3]; + struct in_addr ia46_addr4; +}; + +/* + * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has + * some extra padding to accomplish this. + */ +struct in_endpoints { + u_int16_t ie_fport; /* foreign port */ + u_int16_t ie_lport; /* local port */ + /* protocol dependent part, local and foreign addr */ + union { + /* foreign host table entry */ + struct in_addr_4in6 ie46_foreign; + struct in6_addr ie6_foreign; + } ie_dependfaddr; + union { + /* local host table entry */ + struct in_addr_4in6 ie46_local; + struct in6_addr ie6_local; + } ie_dependladdr; +#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4 +#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4 +#define ie6_faddr ie_dependfaddr.ie6_foreign +#define ie6_laddr ie_dependladdr.ie6_local +}; + +/* + * XXX The defines for inc_* are hacks and should be changed to direct + * references. + */ +struct in_conninfo { + u_int8_t inc_flags; + u_int8_t inc_len; + u_int16_t inc_pad; /* XXX alignment for in_endpoints */ + /* protocol dependent part */ + struct in_endpoints inc_ie; +}; +#define inc_isipv6 inc_flags /* temp compatability */ +#define inc_fport inc_ie.ie_fport +#define inc_lport inc_ie.ie_lport +#define inc_faddr inc_ie.ie_faddr +#define inc_laddr inc_ie.ie_laddr +#define inc6_faddr inc_ie.ie6_faddr +#define inc6_laddr inc_ie.ie6_laddr + +struct icmp6_filter; + +struct inpcb { + LIST_ENTRY(inpcb) inp_hash; /* hash list */ + LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ + void *inp_ppcb; /* pointer to per-protocol pcb */ + struct inpcbinfo *inp_pcbinfo; /* PCB list info */ + struct socket *inp_socket; /* back pointer to socket */ + + u_int32_t inp_flow; + int inp_flags; /* generic IP/datagram flags */ + + u_char inp_vflag; /* IP version flag (v4/v6) */ +#define INP_IPV4 0x1 +#define INP_IPV6 0x2 +#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */ +#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */ +#define INP_ONESBCAST 0x10 /* send all-ones broadcast */ +#define INP_DROPPED 0x20 /* protocol drop flag */ +#define INP_SOCKREF 0x40 /* strong socket reference */ + u_char inp_ip_ttl; /* time to live proto */ + u_char inp_ip_p; /* protocol proto */ + u_char inp_ip_minttl; /* minimum TTL or drop */ + uint32_t inp_ispare1; /* connection id / queue id */ + void *inp_pspare[2]; /* rtentry / general use */ + + /* Local and foreign ports, local and foreign addr. */ + struct in_conninfo inp_inc; + + /* list for this PCB's local port */ + struct label *inp_label; /* MAC label */ + struct inpcbpolicy *inp_sp; /* for IPSEC */ + + /* Protocol-dependent part; options. */ + struct { + u_char inp4_ip_tos; /* type of service proto */ + struct mbuf *inp4_options; /* IP options */ + struct ip_moptions *inp4_moptions; /* IP multicast options */ + } inp_depend4; +#define inp_fport inp_inc.inc_fport +#define inp_lport inp_inc.inc_lport +#define inp_faddr inp_inc.inc_faddr +#define inp_laddr inp_inc.inc_laddr +#define inp_ip_tos inp_depend4.inp4_ip_tos +#define inp_options inp_depend4.inp4_options +#define inp_moptions inp_depend4.inp4_moptions + struct { + /* IP options */ + struct mbuf *inp6_options; + /* IP6 options for outgoing packets */ + struct ip6_pktopts *inp6_outputopts; + /* IP multicast options */ + struct ip6_moptions *inp6_moptions; + /* ICMPv6 code type filter */ + struct icmp6_filter *inp6_icmp6filt; + /* IPV6_CHECKSUM setsockopt */ + int inp6_cksum; + short inp6_hops; + } inp_depend6; + LIST_ENTRY(inpcb) inp_portlist; + struct inpcbport *inp_phd; /* head of this list */ +#define inp_zero_size offsetof(struct inpcb, inp_gencnt) + inp_gen_t inp_gencnt; /* generation count of this instance */ + struct mtx inp_mtx; + +#define in6p_faddr inp_inc.inc6_faddr +#define in6p_laddr inp_inc.inc6_laddr +#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ +#define in6p_ip6_nxt inp_ip_p +#define in6p_flowinfo inp_flow +#define in6p_vflag inp_vflag +#define in6p_options inp_depend6.inp6_options +#define in6p_outputopts inp_depend6.inp6_outputopts +#define in6p_moptions inp_depend6.inp6_moptions +#define in6p_icmp6filt inp_depend6.inp6_icmp6filt +#define in6p_cksum inp_depend6.inp6_cksum +#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ +#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ +#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ +#define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ +#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ +}; +/* + * The range of the generation count, as used in this implementation, is 9e19. + * We would have to create 300 billion connections per second for this number + * to roll over in a year. This seems sufficiently unlikely that we simply + * don't concern ourselves with that possibility. + */ + +/* + * Interface exported to userland by various protocols which use inpcbs. Hack + * alert -- only define if struct xsocket is in scope. + */ +#ifdef _SYS_SOCKETVAR_H_ +struct xinpcb { + size_t xi_len; /* length of this structure */ + struct inpcb xi_inp; + struct xsocket xi_socket; + u_quad_t xi_alignment_hack; +}; + +struct xinpgen { + size_t xig_len; /* length of this structure */ + u_int xig_count; /* number of PCBs at this time */ + inp_gen_t xig_gen; /* generation count at this time */ + so_gen_t xig_sogen; /* socket generation count at this time */ +}; +#endif /* _SYS_SOCKETVAR_H_ */ + +struct inpcbport { + LIST_ENTRY(inpcbport) phd_hash; + struct inpcbhead phd_pcblist; + u_short phd_port; +}; + +/* + * Global data structure for each high-level protocol (UDP, TCP, ...) in both + * IPv4 and IPv6. Holds inpcb lists and information for managing them. + */ +struct inpcbinfo { + /* + * Global list of inpcbs on the protocol. + */ + struct inpcbhead *ipi_listhead; + u_int ipi_count; + + /* + * Global hash of inpcbs, hashed by local and foreign addresses and + * port numbers. + */ + struct inpcbhead *ipi_hashbase; + u_long ipi_hashmask; + + /* + * Global hash of inpcbs, hashed by only local port number. + */ + struct inpcbporthead *ipi_porthashbase; + u_long ipi_porthashmask; + + /* + * Fields associated with port lookup and allocation. + */ + u_short ipi_lastport; + u_short ipi_lastlow; + u_short ipi_lasthi; + + /* + * UMA zone from which inpcbs are allocated for this protocol. + */ + struct uma_zone *ipi_zone; + + /* + * Generation count--incremented each time a connection is allocated + * or freed. + */ + u_quad_t ipi_gencnt; + struct mtx ipi_mtx; + + /* + * vimage 1 + * general use 1 + */ + void *ipi_pspare[2]; +}; + +#define INP_LOCK_INIT(inp, d, t) \ + mtx_init(&(inp)->inp_mtx, (d), (t), MTX_DEF | MTX_RECURSE | MTX_DUPOK) +#define INP_LOCK_DESTROY(inp) mtx_destroy(&(inp)->inp_mtx) +#define INP_LOCK(inp) mtx_lock(&(inp)->inp_mtx) +#define INP_UNLOCK(inp) mtx_unlock(&(inp)->inp_mtx) +#define INP_LOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_OWNED) +#define INP_UNLOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_NOTOWNED) + +#define INP_INFO_LOCK_INIT(ipi, d) \ + mtx_init(&(ipi)->ipi_mtx, (d), NULL, MTX_DEF | MTX_RECURSE) +#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_mtx) +#define INP_INFO_RLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) +#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx) +#define INP_INFO_RUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) +#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx) +#define INP_INFO_RLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) +#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED) +#define INP_INFO_UNLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_NOTOWNED) + +#define INP_PCBHASH(faddr, lport, fport, mask) \ + (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) +#define INP_PCBPORTHASH(lport, mask) \ + (ntohs((lport)) & (mask)) + +/* flags in inp_flags: */ +#define INP_RECVOPTS 0x01 /* receive incoming IP options */ +#define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ +#define INP_RECVDSTADDR 0x04 /* receive IP dst address */ +#define INP_HDRINCL 0x08 /* user supplies entire IP header */ +#define INP_HIGHPORT 0x10 /* user wants "high" port binding */ +#define INP_LOWPORT 0x20 /* user wants "low" port binding */ +#define INP_ANONPORT 0x40 /* port chosen for user */ +#define INP_RECVIF 0x80 /* receive incoming interface */ +#define INP_MTUDISC 0x100 /* user can do MTU discovery */ +#define INP_FAITH 0x200 /* accept FAITH'ed connections */ +#define INP_RECVTTL 0x400 /* receive incoming IP TTL */ +#define INP_DONTFRAG 0x800 /* don't fragment packet */ + +#define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */ + +#define IN6P_PKTINFO 0x010000 /* receive IP6 dst and I/F */ +#define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ +#define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ +#define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ +#define IN6P_RTHDR 0x100000 /* receive routing header */ +#define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ +#define IN6P_TCLASS 0x400000 /* receive traffic class value */ +#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ +#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */ +#define IN6P_MTU 0x80000000 /* receive path MTU */ + +#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ + INP_RECVIF|INP_RECVTTL|\ + IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ + IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ + IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\ + IN6P_MTU) +#define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ + IN6P_TCLASS|IN6P_AUTOFLOWLABEL) + + /* for KAME src sync over BSD*'s */ +#define IN6P_HIGHPORT INP_HIGHPORT +#define IN6P_LOWPORT INP_LOWPORT +#define IN6P_ANONPORT INP_ANONPORT +#define IN6P_RECVIF INP_RECVIF +#define IN6P_MTUDISC INP_MTUDISC +#define IN6P_FAITH INP_FAITH +#define IN6P_CONTROLOPTS INP_CONTROLOPTS + /* + * socket AF version is {newer than,or include} + * actual datagram AF version + */ + +#define INPLOOKUP_WILDCARD 1 +#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) +#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ + +#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family + +#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) + +/* #ifdef _KERNEL */ +extern int ipport_reservedhigh; +extern int ipport_reservedlow; +extern int ipport_lowfirstauto; +extern int ipport_lowlastauto; +extern int ipport_firstauto; +extern int ipport_lastauto; +extern int ipport_hifirstauto; +extern int ipport_hilastauto; +extern struct callout ipport_tick_callout; + +void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); +int in_pcballoc(struct socket *, struct inpcbinfo *); +int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, + u_short *, struct ucred *); +int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *, + u_short *, in_addr_t *, u_short *, struct inpcb **, + struct ucred *); +void in_pcbdetach(struct inpcb *); +void in_pcbdisconnect(struct inpcb *); +void in_pcbdrop(struct inpcb *); +void in_pcbfree(struct inpcb *); +int in_pcbinshash(struct inpcb *); +struct inpcb * + in_pcblookup_local(struct inpcbinfo *, + struct in_addr, u_int, int); +struct inpcb * + in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, + struct in_addr, u_int, int, struct ifnet *); +void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr, + int, struct inpcb *(*)(struct inpcb *, int)); +void in_pcbrehash(struct inpcb *); +void in_pcbsetsolabel(struct socket *so); +int in_getpeeraddr(struct socket *so, struct sockaddr **nam); +int in_getsockaddr(struct socket *so, struct sockaddr **nam); +struct sockaddr * + in_sockaddr(in_port_t port, struct in_addr *addr); +void in_pcbsosetlabel(struct socket *so); +void in_pcbremlists(struct inpcb *inp); +void ipport_tick(void *xtp); + +/* + * Debugging routines compiled in when DDB is present. + */ +void db_print_inpcb(struct inpcb *inp, const char *name, int indent); + +/* #endif _KERNEL */ + +#endif /* !_NETINET_IN_PCB_H_ */ diff --git a/usrsctplib/user_ip6_var.h b/usrsctplib/user_ip6_var.h new file mode 100755 index 00000000..c8d35b4f --- /dev/null +++ b/usrsctplib/user_ip6_var.h @@ -0,0 +1,18 @@ +/* __Userspace__ version of ip6_var.h */ + +#define IN6_IFF_ANYCAST 0x01 /* anycast address */ +#define IN6_IFF_TENTATIVE 0x02 /* tentative address */ +#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ +#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ +#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ +#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address + * (used only at first SIOC* call) + */ +#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ +#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ +#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management. + * XXX: this should be temporary. + */ + +/* do not input/output */ +#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) diff --git a/usrsctplib/user_malloc.h b/usrsctplib/user_malloc.h new file mode 100755 index 00000000..a15cc7f9 --- /dev/null +++ b/usrsctplib/user_malloc.h @@ -0,0 +1,260 @@ +/*- + * Copyright (c) 1987, 1993 + * The Regents of the University of California. + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)malloc.h 8.5 (Berkeley) 5/3/95 + * $FreeBSD: src/sys/sys/malloc.h,v 1.80.2.1 2005/07/23 17:11:33 rwatson Exp $ + */ + +/* This file has been renamed user_malloc.h for Userspace */ +#ifndef _SYS_MALLOC_H_ +#define _SYS_MALLOC_H_ + +/*__Userspace__*/ +#include <stdlib.h> +#include <strings.h> +#include <sys/types.h> +#include <stdint.h> /* needed on Linux (at least) */ + +/* #include <sys/param.h> on FreeBSD, sets MSIZE to 256 */ +/* #include <sys/queue.h> */ +/* #include <sys/_lock.h> */ +/* #include <sys/_mutex.h> */ + +#define MINALLOCSIZE UMA_SMALLEST_UNIT + +/* + * flags to malloc. + */ +#define M_NOWAIT 0x0001 /* do not block */ +#define M_WAITOK 0x0002 /* ok to block */ +#define M_ZERO 0x0100 /* bzero the allocation */ +#define M_NOVM 0x0200 /* don't ask VM for pages */ +#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */ + +#define M_MAGIC 877983977 /* time when first defined :-) */ + +/* + * Two malloc type structures are present: malloc_type, which is used by a + * type owner to declare the type, and malloc_type_internal, which holds + * malloc-owned statistics and other ABI-sensitive fields, such as the set of + * malloc statistics indexed by the compile-time MAXCPU constant. + * Applications should avoid introducing dependence on the allocator private + * data layout and size. + * + * The malloc_type ks_next field is protected by malloc_mtx. Other fields in + * malloc_type are static after initialization so unsynchronized. + * + * Statistics in malloc_type_stats are written only when holding a critical + * section and running on the CPU associated with the index into the stat + * array, but read lock-free resulting in possible (minor) races, which the + * monitoring app should take into account. + */ +struct malloc_type_stats { + uint64_t mts_memalloced; /* Bytes allocated on CPU. */ + uint64_t mts_memfreed; /* Bytes freed on CPU. */ + uint64_t mts_numallocs; /* Number of allocates on CPU. */ + uint64_t mts_numfrees; /* number of frees on CPU. */ + uint64_t mts_size; /* Bitmask of sizes allocated on CPU. */ + uint64_t _mts_reserved1; /* Reserved field. */ + uint64_t _mts_reserved2; /* Reserved field. */ + uint64_t _mts_reserved3; /* Reserved field. */ +}; + +#ifndef MAXCPU /* necessary on Linux */ +#define MAXCPU 4 /* arbitrary? */ +#endif + +struct malloc_type_internal { + struct malloc_type_stats mti_stats[MAXCPU]; +}; + +/* + * ABI-compatible version of the old 'struct malloc_type', only all stats are + * now malloc-managed in malloc-owned memory rather than in caller memory, so + * as to avoid ABI issues. The ks_next pointer is reused as a pointer to the + * internal data handle. + */ +struct malloc_type { + struct malloc_type *ks_next; /* Next in global chain. */ + u_long _ks_memuse; /* No longer used. */ + u_long _ks_size; /* No longer used. */ + u_long _ks_inuse; /* No longer used. */ + uint64_t _ks_calls; /* No longer used. */ + u_long _ks_maxused; /* No longer used. */ + u_long ks_magic; /* Detect programmer error. */ + const char *ks_shortdesc; /* Printable type name. */ + + /* + * struct malloc_type was terminated with a struct mtx, which is no + * longer required. For ABI reasons, continue to flesh out the full + * size of the old structure, but reuse the _lo_class field for our + * internal data handle. + */ + void *ks_handle; /* Priv. data, was lo_class. */ + const char *_lo_name; + const char *_lo_type; + u_int _lo_flags; + void *_lo_list_next; + struct witness *_lo_witness; + uintptr_t _mtx_lock; + u_int _mtx_recurse; +}; + +/* + * Statistics structure headers for user space. The kern.malloc sysctl + * exposes a structure stream consisting of a stream header, then a series of + * malloc type headers and statistics structures (quantity maxcpus). For + * convenience, the kernel will provide the current value of maxcpus at the + * head of the stream. + */ +#define MALLOC_TYPE_STREAM_VERSION 0x00000001 +struct malloc_type_stream_header { + uint32_t mtsh_version; /* Stream format version. */ + uint32_t mtsh_maxcpus; /* Value of MAXCPU for stream. */ + uint32_t mtsh_count; /* Number of records. */ + uint32_t _mtsh_pad; /* Pad/reserved field. */ +}; + +#define MALLOC_MAX_NAME 32 +struct malloc_type_header { + char mth_name[MALLOC_MAX_NAME]; +}; + +/* __Userspace__ +Notice that at places it uses ifdef _KERNEL. That line cannot be +removed because it causes conflicts with malloc definition in +/usr/include/malloc.h, which essentially says that malloc.h has +been overridden by stdlib.h. We will need to use names like +user_malloc.h for isolating kernel interface headers. using +original names like malloc.h in a user_include header can be +confusing, All userspace header files are being placed in ./user_include +Better still to remove from user_include.h all irrelevant code such +as that in the block starting with #ifdef _KERNEL. I am only leaving +it in for the time being to see what functionality is in this file +that kernel uses. + +Start copy: Copied code for __Userspace__ */ +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + struct malloc_type type[1] = { \ + { NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, NULL, NULL, \ + NULL, 0, NULL, NULL, 0, 0 } \ + }; + +/* Removed "extern" in __Userspace__ code */ +/* If we need to use MALLOC_DECLARE before using MALLOC then + we have to remove extern. + In /usr/include/sys/malloc.h there is this definition: + #define MALLOC_DECLARE(type) \ + extern struct malloc_type type[1] + and loader is unable to find the extern malloc_type because + it may be defined in one of kernel object files. + It seems that MALLOC_DECLARE and MALLOC_DEFINE cannot be used at + the same time for same "type" variable. Also, in Randall's architecture + document, where it specifies O/S specific macros and functions, it says + that the name in SCTP_MALLOC does not have to be used. +*/ +#define MALLOC_DECLARE(type) \ + struct malloc_type type[1] + +#define FREE(addr, type) free((addr)) + +/* changed definitions of MALLOC and FREE */ +/* Using memset if flag M_ZERO is specified. Todo: M_WAITOK and M_NOWAIT */ +#define MALLOC(space, cast, size, type, flags) \ + ((space) = (cast)malloc((u_long)(size))); \ + do { \ + if(flags & M_ZERO) { \ + memset(space,0,size); \ + } \ + } while (0); + + +/* End copy: Copied code for __Userspace__ */ + + +#ifdef _KERNEL +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + struct malloc_type type[1] = { \ + { NULL, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, NULL, NULL, \ + NULL, 0, NULL, NULL, 0, 0 } \ + }; \ + SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_SECOND, malloc_init, \ + type); \ + SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \ + malloc_uninit, type); + + +#define MALLOC_DECLARE(type) \ + extern struct malloc_type type[1] + +MALLOC_DECLARE(M_CACHE); +MALLOC_DECLARE(M_DEVBUF); +MALLOC_DECLARE(M_TEMP); + +MALLOC_DECLARE(M_IP6OPT); /* for INET6 */ +MALLOC_DECLARE(M_IP6NDP); /* for INET6 */ + +/* + * Deprecated macro versions of not-quite-malloc() and free(). + */ +#define MALLOC(space, cast, size, type, flags) \ + ((space) = (cast)malloc((u_long)(size), (type), (flags))) +#define FREE(addr, type) free((addr), (type)) + +/* + * XXX this should be declared in <sys/uio.h>, but that tends to fail + * because <sys/uio.h> is included in a header before the source file + * has a chance to include <sys/malloc.h> to get MALLOC_DECLARE() defined. + */ +MALLOC_DECLARE(M_IOV); + +extern struct mtx malloc_mtx; + +/* XXX struct malloc_type is unused for contig*(). */ +void contigfree(void *addr, unsigned long size, struct malloc_type *type); +void *contigmalloc(unsigned long size, struct malloc_type *type, int flags, + vm_paddr_t low, vm_paddr_t high, unsigned long alignment, + unsigned long boundary); +void free(void *addr, struct malloc_type *type); +void *malloc(unsigned long size, struct malloc_type *type, int flags); +void malloc_init(void *); +int malloc_last_fail(void); +void malloc_type_allocated(struct malloc_type *type, unsigned long size); +void malloc_type_freed(struct malloc_type *type, unsigned long size); +void malloc_uninit(void *); +void *realloc(void *addr, unsigned long size, struct malloc_type *type, + int flags); +void *reallocf(void *addr, unsigned long size, struct malloc_type *type, + int flags); + + +#endif /* _KERNEL */ + +#endif /* !_SYS_MALLOC_H_ */ diff --git a/usrsctplib/user_mbuf.c b/usrsctplib/user_mbuf.c new file mode 100755 index 00000000..b31b6be2 --- /dev/null +++ b/usrsctplib/user_mbuf.c @@ -0,0 +1,1245 @@ +/* + * __Userspace__ version of /usr/src/sys/kern/kern_mbuf.c + * We are initializing two zones for Mbufs and Clusters. + * + */ + +#include <stdio.h> +#include <string.h> +/* #include <sys/param.h> This defines MSIZE 256 */ +#include <assert.h> +#include <sys/queue.h> +#if !defined(SCTP_SIMPLE_ALLOCATOR) +#include "umem.h" +//#include "user_include/umem_impl.h" +#endif +#include "user_mbuf.h" +#include "user_environment.h" +#include "user_atomic.h" + +struct mbstat mbstat; +#define KIPC_MAX_LINKHDR 4 /* int: max length of link header (see sys/sysclt.h) */ +#define KIPC_MAX_PROTOHDR 5 /* int: max length of network header (see sys/sysclt.h)*/ +int max_linkhdr = KIPC_MAX_LINKHDR; +int max_protohdr = KIPC_MAX_PROTOHDR; /* Size of largest protocol layer header. */ + +/* + * Zones from which we allocate. + */ +sctp_zone_t zone_mbuf; +sctp_zone_t zone_clust; +sctp_zone_t zone_ext_refcnt; + +/*__Userspace__ + * constructor callback_data + * mbuf_mb_args will be passed as callback data to umem_cache_create. + * umem_cache_alloc will then be able to use this callback data when the constructor + * function mb_ctor_mbuf is called. See user_mbuf.c + * This is important because mbuf_mb_args would specify flags like M_PKTHDR + * and type like MT_DATA or MT_HEADER. This information is needed in mb_ctor_mbuf + * to properly initialize the mbuf being allocated. + * + * Argument structure passed to UMA routines during mbuf and packet + * allocations. + */ +struct mb_args mbuf_mb_args; + +/* __Userspace__ clust_mb_args will be passed as callback data to mb_ctor_clust + * and mb_dtor_clust. + * Note: I had to use struct clust_args as an encapsulation for an mbuf pointer. + * struct mbuf * clust_mb_args; does not work. + */ +struct clust_args clust_mb_args; + + +/* __Userspace__ + * Local prototypes. + */ +static int mb_ctor_mbuf(void *, void *, int); +static int mb_ctor_clust(void *, void *, int); +static void mb_dtor_mbuf(void *, void *); +static void mb_dtor_clust(void *, void *); + + +/***************** Functions taken from user_mbuf.h *************/ + +/* __Userspace__ Setter function for mbuf_mb_args */ +static void set_mbuf_mb_args(int flags, short type) { + mbuf_mb_args.flags = flags; + mbuf_mb_args.type = type; +} +#if USING_MBUF_CONSTRUCTOR +/* __Userspace__ Setter function for clust_mb_args */ +static void set_clust_mb_args(struct mbuf * mb) { + clust_mb_args.parent_mbuf = mb; +} +#endif + +static int mbuf_constructor_dup(struct mbuf *m, int pkthdr, short type) +{ + int flags = pkthdr; + if (type == MT_NOINIT) + return (0); + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + if (flags & M_PKTHDR) { + m->m_data = m->m_pktdat; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; + SLIST_INIT(&m->m_pkthdr.tags); + } else + m->m_data = m->m_dat; + + return (0); +} + +/* __Userspace__ */ +struct mbuf * +m_get(int how, short type) +{ + struct mbuf *mret; + /* The following setter function is not yet being enclosed within + * #if USING_MBUF_CONSTRUCTOR - #endif, until I have thoroughly tested + * mb_dtor_mbuf. See comment there + */ + set_mbuf_mb_args(0, type); + + /* Mbuf master zone, zone_mbuf, has already been + * created in mbuf_init() */ + mret = SCTP_ZONE_GET(zone_mbuf, struct mbuf); +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_ctor_mbuf(mret, &mbuf_mb_args, 0); +#endif + /*mret = ((struct mbuf *)umem_cache_alloc(zone_mbuf, UMEM_DEFAULT));*/ + + /* There are cases when an object available in the current CPU's + * loaded magazine and in those cases the object's constructor is not applied. + * If that is the case, then we are duplicating constructor initialization here, + * so that the mbuf is properly constructed before returning it. + */ + if (mret) { +#if USING_MBUF_CONSTRUCTOR + if (! (mret->m_type == type) ) { + mbuf_constructor_dup(mret, 0, type); + } +#else + mbuf_constructor_dup(mret, 0, type); +#endif + + } + return mret; +} + + +/* __Userspace__ */ +struct mbuf * +m_gethdr(int how, short type) +{ + struct mbuf *mret; + /* The following setter function is not yet being enclosed within + * #if USING_MBUF_CONSTRUCTOR - #endif, until I have thoroughly tested + * mb_dtor_mbuf. See comment there + */ + set_mbuf_mb_args(M_PKTHDR, type); + + mret = SCTP_ZONE_GET(zone_mbuf, struct mbuf); +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_ctor_mbuf(mret, &mbuf_mb_args, 0); +#endif + /*mret = ((struct mbuf *)umem_cache_alloc(zone_mbuf, UMEM_DEFAULT));*/ + /* There are cases when an object available in the current CPU's + * loaded magazine and in those cases the object's constructor is not applied. + * If that is the case, then we are duplicating constructor initialization here, + * so that the mbuf is properly constructed before returning it. + */ + if (mret) { +#if USING_MBUF_CONSTRUCTOR + if (! ((mret->m_flags & M_PKTHDR) && (mret->m_type == type)) ) { + mbuf_constructor_dup(mret, M_PKTHDR, type); + } +#else + mbuf_constructor_dup(mret, M_PKTHDR, type); +#endif + } + return mret; +} + +/* __Userspace__ */ +struct mbuf * +m_free(struct mbuf *m) +{ + + struct mbuf *n = m->m_next; + + if (m->m_flags & M_EXT) + mb_free_ext(m); + else if ((m->m_flags & M_NOFREE) == 0) { +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_dtor_mbuf(m, &mbuf_mb_args); +#endif + SCTP_ZONE_FREE(zone_mbuf, m); + } + /*umem_cache_free(zone_mbuf, m);*/ + return (n); +} + + +static int clust_constructor_dup(caddr_t m_clust, struct mbuf* m) +{ + u_int *refcnt; + int type, size; + sctp_zone_t zone; + + /* Assigning cluster of MCLBYTES. TODO: Add jumbo frame functionality */ + type = EXT_CLUSTER; + zone = zone_clust; + size = MCLBYTES; + + refcnt = SCTP_ZONE_GET(zone_ext_refcnt, u_int); + /*refcnt = (u_int *)umem_cache_alloc(zone_ext_refcnt, UMEM_DEFAULT);*/ + if (refcnt == NULL) { + printf("calling reap in %s\n", __func__); +#if !defined(SCTP_SIMPLE_ALLOCATOR) + umem_reap(); +#endif + refcnt = SCTP_ZONE_GET(zone_ext_refcnt, u_int); + /*refcnt = (u_int *)umem_cache_alloc(zone_ext_refcnt, UMEM_DEFAULT);*/ + assert(refcnt != NULL); + } + *refcnt = 1; + if (m != NULL) { + m->m_ext.ext_buf = (caddr_t)m_clust; + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ext_size = size; + m->m_ext.ext_type = type; + m->m_ext.ref_cnt = refcnt; + } + + return (0); +} + + + +/* __Userspace__ */ +void +m_clget(struct mbuf *m, int how) +{ + caddr_t mclust_ret; + if (m->m_flags & M_EXT) + printf("%s: %p mbuf already has cluster\n", __func__, m); + m->m_ext.ext_buf = (char *)NULL; +#if USING_MBUF_CONSTRUCTOR + set_clust_mb_args(m); +#endif + mclust_ret = SCTP_ZONE_GET(zone_clust, char); +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_ctor_clust(mclust_ret, &clust_mb_args, 0); +#endif + /*mclust_ret = umem_cache_alloc(zone_clust, UMEM_DEFAULT);*/ + /* + On a cluster allocation failure, call umem_reap() and retry. + */ + + if ((mclust_ret == NULL)) { + printf("calling reap in %s\n", __func__); + +#if !defined(SCTP_SIMPLE_ALLOCATOR) + /* mclust_ret = SCTP_ZONE_GET(zone_clust, char); + mb_ctor_clust(mclust_ret, &clust_mb_args, 0); +#else*/ + umem_reap(); + mclust_ret = SCTP_ZONE_GET(zone_clust, char); +#endif + /*mclust_ret = umem_cache_alloc(zone_clust, UMEM_DEFAULT);*/ + if(NULL == mclust_ret) + { + printf("Memory allocation failure in %s\n", __func__); + exit(1); + } + } + +#if USING_MBUF_CONSTRUCTOR + if ((m->m_ext.ext_buf == NULL)) { + clust_constructor_dup(mclust_ret, m); + } +#else + clust_constructor_dup(mclust_ret, m); +#endif +} + +/* + * Unlink a tag from the list of tags associated with an mbuf. + */ +static __inline void +m_tag_unlink(struct mbuf *m, struct m_tag *t) +{ + + SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); +} + +/* + * Reclaim resources associated with a tag. + */ +static __inline void +m_tag_free(struct m_tag *t) +{ + + (*t->m_tag_free)(t); +} + +/* + * Set up the contents of a tag. Note that this does not fill in the free + * method; the caller is expected to do that. + * + * XXX probably should be called m_tag_init, but that was already taken. + */ +static __inline void +m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len) +{ + + t->m_tag_id = type; + t->m_tag_len = len; + t->m_tag_cookie = cookie; +} + +/************ End functions from user_mbuf.h ******************/ + + + +/************ End functions to substitute umem_cache_alloc and umem_cache_free **************/ + +/* __Userspace__ + * TODO: mbuf_init must be called in the initialization routines + * of userspace stack. + */ +void +mbuf_init(void *dummy) +{ + + /* + * __Userspace__Configure UMA zones for Mbufs and Clusters. + * (TODO: m_getcl() - using packet secondary zone). + * There is no provision for trash_init and trash_fini in umem. + * + */ + /* zone_mbuf = umem_cache_create(MBUF_MEM_NAME, MSIZE, 0, + mb_ctor_mbuf, mb_dtor_mbuf, NULL, + &mbuf_mb_args, + NULL, 0); + zone_mbuf = umem_cache_create(MBUF_MEM_NAME, MSIZE, 0, NULL, NULL, NULL, NULL, NULL, 0);*/ +#if defined(SCTP_SIMPLE_ALLOCATOR) + SCTP_ZONE_INIT(zone_mbuf, MBUF_MEM_NAME, MSIZE, 0); +#else + zone_mbuf = umem_cache_create(MBUF_MEM_NAME, MSIZE, 0, + mb_ctor_mbuf, mb_dtor_mbuf, NULL, + &mbuf_mb_args, + NULL, 0); +#endif + /*zone_ext_refcnt = umem_cache_create(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 0, + NULL, NULL, NULL, + NULL, + NULL, 0);*/ + SCTP_ZONE_INIT(zone_ext_refcnt, MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 0); + + /*zone_clust = umem_cache_create(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 0, + mb_ctor_clust, mb_dtor_clust, NULL, + &clust_mb_args, + NULL, 0); + zone_clust = umem_cache_create(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 0, NULL, NULL, NULL, NULL, NULL,0);*/ +#if defined(SCTP_SIMPLE_ALLOCATOR) + SCTP_ZONE_INIT(zone_clust, MBUF_CLUSTER_MEM_NAME, MCLBYTES, 0); +#else + zone_clust = umem_cache_create(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 0, + mb_ctor_clust, mb_dtor_clust, NULL, + &clust_mb_args, + NULL, 0); +#endif + + /* uma_prealloc() goes here... */ + + /* __Userspace__ Add umem_reap here for low memory situation? + * + */ + + + /* + * [Re]set counters and local statistics knobs. + * + */ + + mbstat.m_mbufs = 0; + mbstat.m_mclusts = 0; + mbstat.m_drain = 0; + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + mbstat.m_numtypes = MT_NTYPES; + + mbstat.m_mcfail = mbstat.m_mpfail = 0; + mbstat.sf_iocnt = 0; + mbstat.sf_allocwait = mbstat.sf_allocfail = 0; + +} + + + +/* + * __Userspace__ + * + * Constructor for Mbuf master zone. We have a different constructor + * for allocating the cluster. + * + * The 'arg' pointer points to a mb_args structure which + * contains call-specific information required to support the + * mbuf allocation API. See user_mbuf.h. + * + * The flgs parameter below can be UMEM_DEFAULT or UMEM_NOFAIL depending on what + * was passed when umem_cache_alloc was called. + * TODO: Use UMEM_NOFAIL in umem_cache_alloc and also define a failure handler + * and call umem_nofail_callback(my_failure_handler) in the stack initialization routines + * The advantage of using UMEM_NOFAIL is that we don't have to check if umem_cache_alloc + * was successful or not. The failure handler would take care of it, if we use the UMEM_NOFAIL + * flag. + * + * NOTE Ref: http://docs.sun.com/app/docs/doc/819-2243/6n4i099p2?l=en&a=view&q=umem_zalloc) + * The umem_nofail_callback() function sets the **process-wide** UMEM_NOFAIL callback. + * It also mentions that umem_nofail_callback is Evolving. + * + */ +static int +mb_ctor_mbuf(void *mem, void *arg, int flgs) +{ +#if USING_MBUF_CONSTRUCTOR + struct mbuf *m; + struct mb_args *args; + + int flags; + short type; + + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + type = args->type; + + /* + * The mbuf is initialized later. + * + */ + if (type == MT_NOINIT) + return (0); + + m->m_next = NULL; + m->m_nextpkt = NULL; + m->m_len = 0; + m->m_flags = flags; + m->m_type = type; + if (flags & M_PKTHDR) { + m->m_data = m->m_pktdat; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.tso_segsz = 0; + m->m_pkthdr.ether_vtag = 0; + SLIST_INIT(&m->m_pkthdr.tags); + } else + m->m_data = m->m_dat; +#endif + return (0); +} + + +/* + * __Userspace__ + * The Mbuf master zone destructor. + * This would be called in response to umem_cache_destroy + * TODO: Recheck if this is what we want to do in this destructor. + * (Note: the number of times mb_dtor_mbuf is called is equal to the + * number of individual mbufs allocated from zone_mbuf. + */ +static void +mb_dtor_mbuf(void *mem, void *arg) +{ + + struct mbuf *m; + struct mb_args *args; + int flags; + + m = (struct mbuf *)mem; + args = (struct mb_args *)arg; + flags = args->flags; + + if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) + { + m_tag_delete_chain(m, NULL); + } + assert((m->m_flags & M_EXT) == 0); + assert((m->m_flags & M_NOFREE) == 0); + +} + + +/* __Userspace__ + * The Cluster zone constructor. + * + * Here the 'arg' pointer points to the Mbuf which we + * are configuring cluster storage for. If 'arg' is + * empty we allocate just the cluster without setting + * the mbuf to it. See mbuf.h. + */ +static int +mb_ctor_clust(void *mem, void *arg, int flgs) +{ + +#if USING_MBUF_CONSTRUCTOR + struct mbuf *m; + struct clust_args * cla; + u_int *refcnt; + int type, size; + sctp_zone_t zone; + + /* Assigning cluster of MCLBYTES. TODO: Add jumbo frame functionality */ + type = EXT_CLUSTER; + zone = zone_clust; + size = MCLBYTES; + + cla = (struct clust_args *)arg; + m = cla->parent_mbuf; + + refcnt = SCTP_ZONE_GET(zone_ext_refcnt, u_int); + /*refcnt = (u_int *)umem_cache_alloc(zone_ext_refcnt, UMEM_DEFAULT);*/ + *refcnt = 1; + + if (m != NULL) { + m->m_ext.ext_buf = (caddr_t)mem; + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ext_size = size; + m->m_ext.ext_type = type; + m->m_ext.ref_cnt = refcnt; + } +#endif + return (0); +} + +/* __Userspace__ */ +static void +mb_dtor_clust(void *mem, void *arg) +{ + + /* mem is of type caddr_t. In sys/types.h we have typedef char * caddr_t; */ + /* mb_dtor_clust is called at time of umem_cache_destroy() (the number of times + * mb_dtor_clust is called is equal to the number of individual mbufs allocated + * from zone_clust. Similarly for mb_dtor_mbuf). + * At this point the following: + * struct mbuf *m; + * m = (struct mbuf *)arg; + * assert (*(m->m_ext.ref_cnt) == 0); is not meaningful since m->m_ext.ref_cnt = NULL; + * has been done in mb_free_ext(). + */ + +} + + + + +/* Unlink and free a packet tag. */ +void +m_tag_delete(struct mbuf *m, struct m_tag *t) +{ + + assert(m && t); + m_tag_unlink(m, t); + m_tag_free(t); +} + + +/* Unlink and free a packet tag chain, starting from given tag. */ +void +m_tag_delete_chain(struct mbuf *m, struct m_tag *t) +{ + + struct m_tag *p, *q; + + assert(m); + if (t != NULL) + p = t; + else + p = SLIST_FIRST(&m->m_pkthdr.tags); + if (p == NULL) + return; + while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) + m_tag_delete(m, q); + m_tag_delete(m, p); +} + +#if 0 +static void +sctp_print_mbuf_chain(struct mbuf *m) +{ + printf("Printing mbuf chain %p.\n", m); + for(; m; m=m->m_next) { + printf("%p: m_len = %ld, m_type = %x, m_next = %p.\n", m, m->m_len, m->m_type, m->m_next); + if (m->m_flags & M_EXT) + printf("%p: extend_size = %d, extend_buffer = %p, ref_cnt = %d.\n", m, m->m_ext.ext_size, m->m_ext.ext_buf, *(m->m_ext.ref_cnt)); + } +} +#endif + +/* + * Free an entire chain of mbufs and associated external buffers, if + * applicable. + */ +void +m_freem(struct mbuf *mb) +{ + while (mb != NULL) + mb = m_free(mb); +} + +/* + * __Userspace__ + * clean mbufs with M_EXT storage attached to them + * if the reference count hits 1. + */ +void +mb_free_ext(struct mbuf *m) +{ + + int skipmbuf; + + assert((m->m_flags & M_EXT) == M_EXT); + assert(m->m_ext.ref_cnt != NULL); + + /* + * check if the header is embedded in the cluster + */ + skipmbuf = (m->m_flags & M_NOFREE); + + /* Free the external attached storage if this + * mbuf is the only reference to it. + *__Userspace__ TODO: jumbo frames + * + */ + /* NOTE: We had the same code that SCTP_DECREMENT_AND_CHECK_REFCOUNT + reduces to here before but the IPHONE malloc commit had changed + this to compare to 0 instead of 1 (see next line). Why? + + ... this caused a huge memory leak in Linux. + */ +#ifdef IPHONE + if (atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 0) +#else + if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(m->m_ext.ref_cnt)) +#endif + { + if (m->m_ext.ext_type == EXT_CLUSTER){ +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_dtor_clust(m->m_ext.ext_buf, &clust_mb_args); +#endif + SCTP_ZONE_FREE(zone_clust, m->m_ext.ext_buf); + SCTP_ZONE_FREE(zone_ext_refcnt, (u_int*)m->m_ext.ref_cnt); + m->m_ext.ref_cnt = NULL; + } + } + + if (skipmbuf) + return; + + + /* __Userspace__ Also freeing the storage for ref_cnt + * Free this mbuf back to the mbuf zone with all m_ext + * information purged. + */ + m->m_ext.ext_buf = NULL; + m->m_ext.ext_free = NULL; + m->m_ext.ext_args = NULL; + m->m_ext.ref_cnt = NULL; + m->m_ext.ext_size = 0; + m->m_ext.ext_type = 0; + m->m_flags &= ~M_EXT; +#if defined(SCTP_SIMPLE_ALLOCATOR) + mb_dtor_mbuf(m, &mbuf_mb_args); +#endif + SCTP_ZONE_FREE(zone_mbuf, m); + + /*umem_cache_free(zone_mbuf, m);*/ +} + +/* + * "Move" mbuf pkthdr from "from" to "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + */ +void +m_move_pkthdr(struct mbuf *to, struct mbuf *from) +{ + + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; /* especially tags */ + SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ + from->m_flags &= ~M_PKTHDR; +} + + +/* + * Rearange an mbuf chain so that len bytes are contiguous + * and in the data area of an mbuf (so that mtod and dtom + * will work for a structure of size len). Returns the resulting + * mbuf chain on success, frees it and returns null on failure. + * If there is room, it will add up to max_protohdr-len extra bytes to the + * contiguous region in an attempt to avoid being called next time. + */ +struct mbuf * +m_pullup(struct mbuf *n, int len) +{ + struct mbuf *m; + int count; + int space; + + /* + * If first mbuf has no cluster, and has room for len bytes + * without shifting current data, pullup into it, + * otherwise allocate a new mbuf to prepend to the chain. + */ + if ((n->m_flags & M_EXT) == 0 && + n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MHLEN) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(m, n); + } + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else + n = m_free(n); + } while (len > 0 && n); + if (len > 0) { + (void) m_free(m); + goto bad; + } + m->m_next = n; + return (m); +bad: + m_freem(n); + mbstat.m_mpfail++; /* XXX: No consistency. */ + return (NULL); +} + + +/* + * Attach the the cluster from *m to *n, set up m_ext in *n + * and bump the refcount of the cluster. + */ +static void +mb_dupcl(struct mbuf *n, struct mbuf *m) +{ + assert((m->m_flags & M_EXT) == M_EXT); + assert(m->m_ext.ref_cnt != NULL); + assert((n->m_flags & M_EXT) == 0); + + if (*(m->m_ext.ref_cnt) == 1) + *(m->m_ext.ref_cnt) += 1; + else + atomic_add_int(m->m_ext.ref_cnt, 1); + n->m_ext.ext_buf = m->m_ext.ext_buf; + n->m_ext.ext_free = m->m_ext.ext_free; + n->m_ext.ext_args = m->m_ext.ext_args; + n->m_ext.ext_size = m->m_ext.ext_size; + n->m_ext.ref_cnt = m->m_ext.ref_cnt; + n->m_ext.ext_type = m->m_ext.ext_type; + n->m_flags |= M_EXT; +} + + +/* + * Make a copy of an mbuf chain starting "off0" bytes from the beginning, + * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. + * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. + * Note that the copy is read-only, because clusters are not copied, + * only their reference counts are incremented. + */ + +struct mbuf * +m_copym(struct mbuf *m, int off0, int len, int wait) +{ + struct mbuf *n, **np; + int off = off0; + struct mbuf *top; + int copyhdr = 0; + + assert(off >= 0); + assert(len >= 0); + + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + assert(m != NULL); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = 0; + while (len > 0) { + if (m == NULL) { + assert(len == M_COPYALL); + break; + } + if (copyhdr) + MGETHDR(n, wait, m->m_type); + else + MGET(n, wait, m->m_type); + *np = n; + if (n == NULL) + goto nospace; + if (copyhdr) { + if (!m_dup_pkthdr(n, m, wait)) + goto nospace; + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, m->m_len - off); + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + off; + mb_dupcl(n, m); + } else + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (u_int)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == NULL) + mbstat.m_mcfail++; /* XXX: No consistency. */ + + return (top); +nospace: + m_freem(top); + mbstat.m_mcfail++; /* XXX: No consistency. */ + return (NULL); +} + + +int +m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) +{ + struct m_tag *p, *t, *tprev = NULL; + + assert(to && from); + m_tag_delete_chain(to, NULL); + SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { + t = m_tag_copy(p, how); + if (t == NULL) { + m_tag_delete_chain(to, NULL); + return 0; + } + if (tprev == NULL) + SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); + else + SLIST_INSERT_AFTER(tprev, t, m_tag_link); + tprev = t; + } + return 1; +} + +/* + * Duplicate "from"'s mbuf pkthdr in "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + * In particular, this does a deep copy of the packet tags. + */ +int +m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) +{ + + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; + SLIST_INIT(&to->m_pkthdr.tags); + return (m_tag_copy_chain(to, from, MBTOM(how))); +} + +/* Copy a single tag. */ +struct m_tag * +m_tag_copy(struct m_tag *t, int how) +{ + struct m_tag *p; + + assert(t); + p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, how); + if (p == NULL) + return (NULL); + bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ + return p; +} + +/* Get a packet tag structure along with specified data following. */ +struct m_tag * +m_tag_alloc(u_int32_t cookie, int type, int len, int wait) +{ + struct m_tag *t; + + if (len < 0) + return NULL; + t = malloc(len + sizeof(struct m_tag)); + if (t == NULL) + return NULL; + m_tag_setup(t, cookie, type, len); + t->m_tag_free = m_tag_free_default; + return t; +} + +/* Free a packet tag. */ +void +m_tag_free_default(struct m_tag *t) +{ + free(t); +} + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. + */ +void +m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp) +{ + int mlen; + struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == NULL) + return; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == NULL) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == NULL) + goto out; + bzero(mtod(n, caddr_t), MLEN); + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + mlen = min (m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == NULL) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == NULL) + break; + n->m_len = min(MLEN, len); + m->m_next = n; + } + m = m->m_next; + } +out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; +} + + +/* + * Lesser-used path for M_PREPEND: + * allocate new mbuf to prepend to chain, + * copy junk along. + */ +struct mbuf * +m_prepend(struct mbuf *m, int len, int how) +{ + struct mbuf *mn; + + if (m->m_flags & M_PKTHDR) + MGETHDR(mn, how, m->m_type); + else + MGET(mn, how, m->m_type); + if (mn == NULL) { + m_freem(m); + return (NULL); + } + if (m->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(mn, m); + mn->m_next = m; + m = mn; + if(m->m_flags & M_PKTHDR) { + if (len < MHLEN) + MH_ALIGN(m, len); + } else { + if (len < MLEN) + M_ALIGN(m, len); + } + m->m_len = len; + return (m); +} + +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +void +m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) +{ + u_int count; + + assert(off >= 0); + assert(len >= 0); + while (off > 0) { + assert(m != NULL); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + assert(m != NULL); + count = min(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + + +/* + * Concatenate mbuf chain n to m. + * Both chains must be of the same type (e.g. MT_DATA). + * Any m_pkthdr is not updated. + */ +void +m_cat(struct mbuf *m, struct mbuf *n) +{ + while (m->m_next) + m = m->m_next; + while (n) { + if (m->m_flags & M_EXT || + m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + /* just join the two chains */ + m->m_next = n; + return; + } + /* splat the data from one into the other */ + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); + m->m_len += n->m_len; + n = m_free(n); + } +} + + +void +m_adj(struct mbuf *mp, int req_len) +{ + int len = req_len; + struct mbuf *m; + int count; + + if ((m = mp) == NULL) + return; + if (len >= 0) { + /* + * Trim from head. + */ + while (m != NULL && len > 0) { + if (m->m_len <= len) { + len -= m->m_len; + m->m_len = 0; + m = m->m_next; + } else { + m->m_len -= len; + m->m_data += len; + len = 0; + } + } + m = mp; + if (mp->m_flags & M_PKTHDR) + m->m_pkthdr.len -= (req_len - len); + } else { + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + len = -len; + count = 0; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len >= len) { + m->m_len -= len; + if (mp->m_flags & M_PKTHDR) + mp->m_pkthdr.len -= len; + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + m = mp; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len = count; + for (; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + if (m->m_next != NULL) { + m_freem(m->m_next); + m->m_next = NULL; + } + break; + } + count -= m->m_len; + } + } +} + + +/* m_split is used within sctp_handle_cookie_echo. */ + +/* + * Partition an mbuf chain in two pieces, returning the tail -- + * all but the first len0 bytes. In case of failure, it returns NULL and + * attempts to restore the chain to its original state. + * + * Note that the resulting mbufs might be read-only, because the new + * mbuf can end up sharing an mbuf cluster with the original mbuf if + * the "breaking point" happens to lie within a cluster mbuf. Use the + * M_WRITABLE() macro to check for this case. + */ +struct mbuf * +m_split(struct mbuf *m0, int len0, int wait) +{ + struct mbuf *m, *n; + u_int len = len0, remain; + + /* MBUF_CHECKSLEEP(wait); */ + for (m = m0; m && len > m->m_len; m = m->m_next) + len -= m->m_len; + if (m == NULL) + return (NULL); + remain = m->m_len - len; + if (m0->m_flags & M_PKTHDR) { + MGETHDR(n, wait, m0->m_type); + if (n == NULL) + return (NULL); + n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; + n->m_pkthdr.len = m0->m_pkthdr.len - len0; + m0->m_pkthdr.len = len0; + if (m->m_flags & M_EXT) + goto extpacket; + if (remain > MHLEN) { + /* m can't be the lead packet */ + MH_ALIGN(n, 0); + n->m_next = m_split(m, len, wait); + if (n->m_next == NULL) { + (void) m_free(n); + return (NULL); + } else { + n->m_len = 0; + return (n); + } + } else + MH_ALIGN(n, remain); + } else if (remain == 0) { + n = m->m_next; + m->m_next = NULL; + return (n); + } else { + MGET(n, wait, m->m_type); + if (n == NULL) + return (NULL); + M_ALIGN(n, remain); + } +extpacket: + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + len; + mb_dupcl(n, m); + } else { + bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); + } + n->m_len = remain; + m->m_len = len; + n->m_next = m->m_next; + m->m_next = NULL; + return (n); +} + + + + +int pack_send_buffer(caddr_t buffer, struct mbuf* mb){ + + int count_to_copy; + int total_count_copied = 0; + int offset = 0; + do{ + count_to_copy = mb->m_len; + bcopy(mtod(mb, caddr_t), buffer+offset, count_to_copy); + offset += count_to_copy; + total_count_copied += count_to_copy; + mb = mb->m_next; + }while(mb); + + return (total_count_copied); +} diff --git a/usrsctplib/user_mbuf.h b/usrsctplib/user_mbuf.h new file mode 100755 index 00000000..6f4134c2 --- /dev/null +++ b/usrsctplib/user_mbuf.h @@ -0,0 +1,444 @@ +#ifndef _USER_MBUF_H_ +#define _USER_MBUF_H_ + +/* __Userspace__ header file for mbufs */ +#include <stdio.h> +#if !defined(SCTP_SIMPLE_ALLOCATOR) +#include "umem.h" +#endif +#include "user_malloc.h" +#include "netinet/sctp_os_userspace.h" +#include <sys/queue.h> +/* #include <sys/param.h> This defines MSIZE 256 */ + +#ifdef IPHONE +#include <sys/types.h> +typedef __const char * c_caddr_t; +#else +#if defined(__Userspace_os_Linux) || defined(__Userspace_os_Darwin) +typedef char * caddr_t; +typedef __const char * c_caddr_t; +#else +#include <sys/types.h> +#endif +#endif + +#define USING_MBUF_CONSTRUCTOR 0 + +/* For Linux */ +#ifndef MSIZE +#define MSIZE 256 +/* #define MSIZE 1024 */ +#endif +#ifndef MCLBYTES +#define MCLBYTES 2048 +#endif + +struct mbuf * m_gethdr(int how, short type); +struct mbuf * m_get(int how, short type); +struct mbuf * m_free(struct mbuf *m); +void m_clget(struct mbuf *m, int how); + + +/* mbuf initialization function */ +void mbuf_init(void *); + +#define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) +#define MGET(m, how, type) ((m) = m_get((how), (type))) +#define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) +#define MCLGET(m, how) m_clget((m), (how)) + + +#define M_HDR_PAD ((sizeof(intptr_t)==4) ? 2 : 6) /* modified for __Userspace__ */ + +/* Length to m_copy to copy all. */ +#define M_COPYALL 1000000000 + +/* umem_cache_t is defined in user_include/umem.h as + * typedef struct umem_cache umem_cache_t; + * Note:umem_zone_t is a pointer. + */ +#if defined(SCTP_SIMPLE_ALLOCATOR) +typedef size_t sctp_zone_t; +#else +typedef umem_cache_t *sctp_zone_t; +#endif + +extern sctp_zone_t zone_mbuf; +extern sctp_zone_t zone_clust; +extern sctp_zone_t zone_ext_refcnt; + +/*- + * Macros for type conversion: + * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. + * dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX). + */ +#define mtod(m, t) ((t)((m)->m_data)) +#define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) + + + +struct mb_args { + int flags; /* Flags for mbuf being allocated */ + short type; /* Type of mbuf being allocated */ +}; + +struct clust_args { + struct mbuf * parent_mbuf; +}; + +/*__Userspace__ + * mbuf_mb_args will be passed as callback data to umem_cache_create. + * umem_cache_alloc will then be able to use this callback data when the constructor + * function mb_ctor_mbuf is called. See user_mbuf.c + * This is important because mbuf_mb_args would specify flags like M_PKTHDR + * and type like MT_DATA or MT_HEADER. This information is needed in mb_ctor_mbuf + * to properly initialize the mbuf being allocated. + * + * Argument structure passed to UMA routines during mbuf and packet + * allocations. + */ +extern struct mb_args mbuf_mb_args; +/* __Userspace__ clust_mb_args will be passed as callback data to mb_ctor_clust + * and mb_dtor_clust. + */ +extern struct clust_args clust_mb_args; + +struct mbuf * m_split(struct mbuf *, int, int); +void m_cat(struct mbuf *m, struct mbuf *n); +void m_adj(struct mbuf *, int); +void mb_free_ext(struct mbuf *); +void m_freem(struct mbuf *); +struct m_tag *m_tag_alloc(u_int32_t, int, int, int); +struct mbuf *m_copym(struct mbuf *, int, int, int); +void m_copyback(struct mbuf *, int, int, c_caddr_t); +struct mbuf *m_pullup(struct mbuf *, int); +int m_dup_pkthdr(struct mbuf *, struct mbuf *, int); +struct m_tag *m_tag_copy(struct m_tag *, int); +int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); +struct mbuf *m_prepend(struct mbuf *, int, int); +void m_copydata(const struct mbuf *, int, int, caddr_t); + +#define MBUF_MEM_NAME "mbuf" +#define MBUF_CLUSTER_MEM_NAME "mbuf_cluster" +#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt" + +#define MT_NOINIT 255 /* Not a type but a flag to allocate + a non-initialized mbuf */ +#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */ + +/* + * General mbuf allocator statistics structure. + * __Userspace__ mbstat may be useful for gathering statistics. + * In the kernel many of these statistics are no longer used as + * they track allocator statistics through kernel UMA's built in statistics mechanism. + */ +struct mbstat { + u_long m_mbufs; /* XXX */ + u_long m_mclusts; /* XXX */ + + u_long m_drain; /* times drained protocols for space */ + u_long m_mcfail; /* XXX: times m_copym failed */ + u_long m_mpfail; /* XXX: times m_pullup failed */ + u_long m_msize; /* length of an mbuf */ + u_long m_mclbytes; /* length of an mbuf cluster */ + u_long m_minclsize; /* min length of data to allocate a cluster */ + u_long m_mlen; /* length of data in an mbuf */ + u_long m_mhlen; /* length of data in a header mbuf */ + + /* Number of mbtypes (gives # elems in mbtypes[] array: */ + short m_numtypes; + + /* XXX: Sendfile stats should eventually move to their own struct */ + u_long sf_iocnt; /* times sendfile had to do disk I/O */ + u_long sf_allocfail; /* times sfbuf allocation failed */ + u_long sf_allocwait; /* times sfbuf allocation had to wait */ +}; + + +/* + * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. + * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in + * sys/param.h), which has no additional overhead and is used instead of the + * internal data area; this is done when at least MINCLSIZE of data must be + * stored. Additionally, it is possible to allocate a separate buffer + * externally and attach it to the mbuf in a way similar to that of mbuf + * clusters. + */ +#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ +#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ +#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ + + +/* + * Header present at the beginning of every mbuf. + */ +struct m_hdr { + struct mbuf *mh_next; /* next buffer in chain */ + struct mbuf *mh_nextpkt; /* next chain in queue/record */ + caddr_t mh_data; /* location of data */ + int mh_len; /* amount of data in this mbuf */ + int mh_flags; /* flags; see below */ + short mh_type; /* type of data in this mbuf */ + uint8_t pad[M_HDR_PAD];/* word align */ +}; + +/* + * Packet tag structure (see below for details). + */ +struct m_tag { + SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ + u_int16_t m_tag_id; /* Tag ID */ + u_int16_t m_tag_len; /* Length of data */ + u_int32_t m_tag_cookie; /* ABI/Module ID */ + void (*m_tag_free)(struct m_tag *); +}; + +/* + * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. + */ +struct pkthdr { + struct ifnet *rcvif; /* rcv interface */ + /* variables for ip and tcp reassembly */ + void *header; /* pointer to packet header */ + int len; /* total packet length */ + /* variables for hardware checksum */ + int csum_flags; /* flags regarding checksum */ + int csum_data; /* data field used by csum routines */ + u_int16_t tso_segsz; /* TSO segment size */ + u_int16_t ether_vtag; /* Ethernet 802.1p+q vlan tag */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ +}; + +/* + * Description of external storage mapped into mbuf; valid only if M_EXT is + * set. + */ +struct m_ext { + caddr_t ext_buf; /* start of buffer */ + void (*ext_free) /* free routine if not the usual */ + (void *, void *); + void *ext_args; /* optional argument pointer */ + u_int ext_size; /* size of buffer, for ext_free */ + volatile u_int *ref_cnt; /* pointer to ref count info */ + int ext_type; /* type of external storage */ +}; + + +/* + * The core of the mbuf object along with some shortcut defined for practical + * purposes. + */ +struct mbuf { + struct m_hdr m_hdr; + union { + struct { + struct pkthdr MH_pkthdr; /* M_PKTHDR set */ + union { + struct m_ext MH_ext; /* M_EXT set */ + char MH_databuf[MHLEN]; + } MH_dat; + } MH; + char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + } M_dat; +}; + +#define m_next m_hdr.mh_next +#define m_len m_hdr.mh_len +#define m_data m_hdr.mh_data +#define m_type m_hdr.mh_type +#define m_flags m_hdr.mh_flags +#define m_nextpkt m_hdr.mh_nextpkt +#define m_act m_nextpkt +#define m_pkthdr M_dat.MH.MH_pkthdr +#define m_ext M_dat.MH.MH_dat.MH_ext +#define m_pktdat M_dat.MH.MH_dat.MH_databuf +#define m_dat M_dat.M_databuf + + +/* + * mbuf flags. + */ +#define M_EXT 0x0001 /* has associated external storage */ +#define M_PKTHDR 0x0002 /* start of record */ +#define M_EOR 0x0004 /* end of record */ +#define M_RDONLY 0x0008 /* associated data is marked read-only */ +#define M_PROTO1 0x0010 /* protocol-specific */ +#define M_PROTO2 0x0020 /* protocol-specific */ +#define M_PROTO3 0x0040 /* protocol-specific */ +#define M_PROTO4 0x0080 /* protocol-specific */ +#define M_PROTO5 0x0100 /* protocol-specific */ +#define M_NOTIFICATION M_PROTO5/* SCTP notification */ +#define M_SKIP_FIREWALL 0x4000 /* skip firewall processing */ +#define M_FREELIST 0x8000 /* mbuf is on the free list */ + + +/* + * Flags copied when copying m_pkthdr. + */ +#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_RDONLY|M_PROTO1|M_PROTO1|M_PROTO2|\ + M_PROTO3|M_PROTO4|M_PROTO5|M_SKIP_FIREWALL|\ + M_BCAST|M_MCAST|M_FRAG|M_FIRSTFRAG|M_LASTFRAG|\ + M_VLANTAG|M_PROMISC) + + +/* + * mbuf pkthdr flags (also stored in m_flags). + */ +#define M_BCAST 0x0200 /* send/received as link-level broadcast */ +#define M_MCAST 0x0400 /* send/received as link-level multicast */ +#define M_FRAG 0x0800 /* packet is a fragment of a larger packet */ +#define M_FIRSTFRAG 0x1000 /* packet is first fragment */ +#define M_LASTFRAG 0x2000 /* packet is last fragment */ +#define M_VLANTAG 0x10000 /* ether_vtag is valid */ +#define M_PROMISC 0x20000 /* packet was not for us */ +#define M_NOFREE 0x40000 /* do not free mbuf - it is embedded in the cluster */ + + +/* + * External buffer types: identify ext_buf type. + */ +#define EXT_CLUSTER 1 /* mbuf cluster */ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ +#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ +#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ +#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ +#define EXT_PACKET 6 /* mbuf+cluster from packet zone */ +#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ +#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ +#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ +#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ +#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ + + +/* + * mbuf types. + */ +#define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ +#define MT_DATA 1 /* dynamic (data) allocation */ +#define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ +#define MT_SONAME 8 /* socket name */ +#define MT_CONTROL 14 /* extra-data protocol message */ +#define MT_OOBDATA 15 /* expedited data */ +#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ + +#define MT_NOINIT 255 /* Not a type but a flag to allocate + a non-initialized mbuf */ + +#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */ + + + +/* + * __Userspace__ flags like M_NOWAIT are defined in malloc.h + * Flags like these are used in functions like uma_zalloc() + * but don't have an equivalent in userland umem + * Flags specifying how an allocation should be made. + * + * The flag to use is as follows: + * - M_DONTWAIT or M_NOWAIT from an interrupt handler to not block allocation. + * - M_WAIT or M_WAITOK or M_TRYWAIT from wherever it is safe to block. + * + * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and + * if we cannot allocate immediately we may return NULL, whereas + * M_WAIT/M_WAITOK/M_TRYWAIT means that if we cannot allocate resources we + * will block until they are available, and thus never return NULL. + * + * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT. + */ +#define MBTOM(how) (how) +#define M_DONTWAIT M_NOWAIT +#define M_TRYWAIT M_WAITOK +#define M_WAIT M_WAITOK + +void m_tag_delete(struct mbuf *, struct m_tag *); +void m_tag_delete_chain(struct mbuf *, struct m_tag *); +void m_move_pkthdr(struct mbuf *, struct mbuf *); +void m_tag_free_default(struct m_tag *); + +extern int max_linkhdr; /* Largest link-level header */ +extern int max_protohdr; /* Size of largest protocol layer header. See user_mbuf.c */ + +extern struct mbstat mbstat; /* General mbuf stats/infos */ + + +/* + * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can + * be both the local data payload, or an external buffer area, depending on + * whether M_EXT is set). + */ +#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ + (!(((m)->m_flags & M_EXT)) || \ + (*((m)->m_ext.ref_cnt) == 1)) ) \ + + +/* + * Compute the amount of space available before the current start of data in + * an mbuf. + * + * The M_WRITABLE() is a temporary, conservative safety measure: the burden + * of checking writability of the mbuf data area rests solely with the caller. + */ +#define M_LEADINGSPACE(m) \ + ((m)->m_flags & M_EXT ? \ + (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ + (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ + (m)->m_data - (m)->m_dat) + +/* + * Compute the amount of space available after the end of data in an mbuf. + * + * The M_WRITABLE() is a temporary, conservative safety measure: the burden + * of checking writability of the mbuf data area rests solely with the caller. + */ +#define M_TRAILINGSPACE(m) \ + ((m)->m_flags & M_EXT ? \ + (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ + - ((m)->m_data + (m)->m_len) : 0) : \ + &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + + + +/* + * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be + * allocated, how specifies whether to wait. If the allocation fails, the + * original mbuf chain is freed and m is set to NULL. + */ +#define M_PREPEND(m, plen, how) do { \ + struct mbuf **_mmp = &(m); \ + struct mbuf *_mm = *_mmp; \ + int _mplen = (plen); \ + int __mhow = (how); \ + \ + if (M_LEADINGSPACE(_mm) >= _mplen) { \ + _mm->m_data -= _mplen; \ + _mm->m_len += _mplen; \ + } else \ + _mm = m_prepend(_mm, _mplen, __mhow); \ + if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ + _mm->m_pkthdr.len += _mplen; \ + *_mmp = _mm; \ +} while (0) + +/* + * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an + * object of the specified size at the end of the mbuf, longword aligned. + */ +#define M_ALIGN(m, len) do { \ + assert(!((m)->m_flags & (M_PKTHDR|M_EXT))); \ + assert((m)->m_data == (m)->m_dat); \ + (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ +} while (0) + +/* + * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by + * M_DUP/MOVE_PKTHDR. + */ +#define MH_ALIGN(m, len) do { \ + assert((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT)); \ + assert((m)->m_data == (m)->m_pktdat); \ + (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ +} while (0) + +#endif diff --git a/usrsctplib/user_radix.h b/usrsctplib/user_radix.h new file mode 100755 index 00000000..772f7c13 --- /dev/null +++ b/usrsctplib/user_radix.h @@ -0,0 +1,167 @@ +/*- + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.h 8.2 (Berkeley) 10/31/94 + * $FreeBSD: src/sys/net/radix.h,v 1.26 2005/01/07 01:45:35 imp Exp $ + */ + +#ifndef _RADIX_H_ +#define _RADIX_H_ + +#ifdef _KERNEL +#include <sys/_lock.h> +#include <sys/_mutex.h> +#endif + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_RTABLE); +#endif + +/* + * Radix search tree node layout. + */ + +struct radix_node { + struct radix_mask *rn_mklist; /* list of masks contained in subtree */ + struct radix_node *rn_parent; /* parent */ + short rn_bit; /* bit offset; -1-index(netmask) */ + char rn_bmask; /* node: mask for bit test*/ + u_char rn_flags; /* enumerated next */ +#define RNF_NORMAL 1 /* leaf contains normal route */ +#define RNF_ROOT 2 /* leaf is root leaf for tree */ +#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */ + union { + struct { /* leaf only data: */ + caddr_t rn_Key; /* object of search */ + caddr_t rn_Mask; /* netmask, if present */ + struct radix_node *rn_Dupedkey; + } rn_leaf; + struct { /* node only data: */ + int rn_Off; /* where to start compare */ + struct radix_node *rn_L;/* progeny */ + struct radix_node *rn_R;/* progeny */ + } rn_node; + } rn_u; +#ifdef RN_DEBUG + int rn_info; + struct radix_node *rn_twin; + struct radix_node *rn_ybro; +#endif +}; + +#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey +#define rn_key rn_u.rn_leaf.rn_Key +#define rn_mask rn_u.rn_leaf.rn_Mask +#define rn_offset rn_u.rn_node.rn_Off +#define rn_left rn_u.rn_node.rn_L +#define rn_right rn_u.rn_node.rn_R + +/* + * Annotations to tree concerning potential routes applying to subtrees. + */ + +struct radix_mask { + short rm_bit; /* bit offset; -1-index(netmask) */ + char rm_unused; /* cf. rn_bmask */ + u_char rm_flags; /* cf. rn_flags */ + struct radix_mask *rm_mklist; /* more masks to try */ + union { + caddr_t rmu_mask; /* the mask */ + struct radix_node *rmu_leaf; /* for normal routes */ + } rm_rmu; + int rm_refs; /* # of references to this struct */ +}; + +#define rm_mask rm_rmu.rmu_mask +#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */ + +typedef int walktree_f_t(struct radix_node *, void *); + +struct radix_node_head { + struct radix_node *rnh_treetop; + int rnh_addrsize; /* permit, but not require fixed keys */ + int rnh_pktsize; /* permit, but not require fixed keys */ + struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ + (void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[]); + struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ + (void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[]); + struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ + (void *v, void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ + (void *v, void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ + (void *v, struct radix_node_head *head); + struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ + (void *v, void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ + (void *v, struct radix_node_head *head); + int (*rnh_walktree) /* traverse tree */ + (struct radix_node_head *head, walktree_f_t *f, void *w); + int (*rnh_walktree_from) /* traverse tree below a */ + (struct radix_node_head *head, void *a, void *m, + walktree_f_t *f, void *w); + void (*rnh_close) /* do something when the last ref drops */ + (struct radix_node *rn, struct radix_node_head *head); + struct radix_node rnh_nodes[3]; /* empty tree for common case */ +#ifdef _KERNEL + struct mtx rnh_mtx; /* locks entire radix tree */ +#endif +}; + +#ifndef _KERNEL +#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n))) +#define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n))) +#define Free(p) free((char *)p); +#else +#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT)) +#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO)) +#define Free(p) free((caddr_t)p, M_RTABLE); + +#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \ + mtx_init(&(rnh)->rnh_mtx, "radix node head", NULL, MTX_DEF | MTX_RECURSE) +#define RADIX_NODE_HEAD_LOCK(rnh) mtx_lock(&(rnh)->rnh_mtx) +#define RADIX_NODE_HEAD_UNLOCK(rnh) mtx_unlock(&(rnh)->rnh_mtx) +#define RADIX_NODE_HEAD_DESTROY(rnh) mtx_destroy(&(rnh)->rnh_mtx) +#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) mtx_assert(&(rnh)->rnh_mtx, MA_OWNED) +#endif /* _KERNEL */ + +void rn_init(void); +int rn_inithead(void **, int); +int rn_refines(void *, void *); +struct radix_node + *rn_addmask(void *, int, int), + *rn_addroute (void *, void *, struct radix_node_head *, + struct radix_node [2]), + *rn_delete(void *, void *, struct radix_node_head *), + *rn_lookup (void *v_arg, void *m_arg, + struct radix_node_head *head), + *rn_match(void *, struct radix_node_head *); + +#endif /* _RADIX_H_ */ diff --git a/usrsctplib/user_recv_thread.c b/usrsctplib/user_recv_thread.c new file mode 100755 index 00000000..2dc88efa --- /dev/null +++ b/usrsctplib/user_recv_thread.c @@ -0,0 +1,382 @@ +#include <sys/types.h> +#include <sys/socket.h> +#if 0 +#include <sys/uio.h> +#endif +#include <unistd.h> +#include <pthread.h> +#include <netinet/sctp_os.h> +#include <netinet/sctp_var.h> +#include <netinet/sctp_pcb.h> + + +/* extern __Userspace__ variable in user_recv_thread.h */ +int userspace_rawsctp = -1; /* needs to be declared = -1 */ +int userspace_udpsctp = -1; +int userspace_route = -1; + +/* local macros and datatypes used to get IP addresses system independently */ +#if defined IP_RECVDSTADDR +# define DSTADDR_SOCKOPT IP_RECVDSTADDR +# define DSTADDR_DATASIZE (CMSG_SPACE(sizeof(struct in_addr))) +# define dstaddr(x) (CMSG_DATA(x)) +#elif defined IP_PKTINFO +# define DSTADDR_SOCKOPT IP_PKTINFO +# define DSTADDR_DATASIZE (CMSG_SPACE(sizeof(struct in_pktinfo))) +# define dstaddr(x) (&(((struct in_pktinfo *)(CMSG_DATA(x)))->ipi_addr)) +#else +# error "can't determine socket option to use to get UDP IP" +#endif + +void recv_thread_destroy_udp(void *); +void recv_thread_destroy_raw(void *); +const int MAXLEN_MBUF_CHAIN = 32; /* What should this value be? */ + +/* need ref to this for destroy... */ +struct mbuf **recvmbuf; + +static void * +recv_function_raw(void *arg) +{ + struct iovec recv_iovec[MAXLEN_MBUF_CHAIN]; + int iovcnt = MAXLEN_MBUF_CHAIN; + /*Initially the entire set of mbufs is to be allocated. + to_fill indicates this amount. */ + int to_fill = MAXLEN_MBUF_CHAIN; + /* iovlen is the size of each mbuf in the chain */ + int i, n, ncounter; + int iovlen = MCLBYTES; + int want_ext = (iovlen > MLEN)? 1 : 0; + int want_header = 0; + + recvmbuf = malloc(sizeof(struct mbuf *) * MAXLEN_MBUF_CHAIN); + /* why can't I compile with this? */ +#if 0 + pthread_cleanup_push(recv_thread_destroy_raw, NULL); +#endif + + while (1) { + for (i = 0; i < to_fill; i++) { + /* Not getting the packet header. Tests with chain of one run + as usual without having the packet header. + Have tried both sending and receiving + */ + recvmbuf[i] = sctp_get_mbuf_for_msg(iovlen, want_header, M_DONTWAIT, want_ext, MT_DATA); + recv_iovec[i].iov_base = (caddr_t)recvmbuf[i]->m_data; + recv_iovec[i].iov_len = iovlen; + } + to_fill = 0; + + ncounter = n = readv(userspace_rawsctp, recv_iovec, iovcnt); + assert (n <= (MAXLEN_MBUF_CHAIN * iovlen)); + SCTP_HEADER_LEN(recvmbuf[0]) = n; /* length of total packet */ + + if (n <= iovlen) { + SCTP_BUF_LEN(recvmbuf[0]) = n; + (to_fill)++; + } else { +/* printf("%s: n=%d > iovlen=%d\n", __func__, n, iovlen); */ + i = 0; + SCTP_BUF_LEN(recvmbuf[0]) = iovlen; + + ncounter -= iovlen; + (to_fill)++; + do { + recvmbuf[i]->m_next = recvmbuf[i+1]; + SCTP_BUF_LEN(recvmbuf[i]->m_next) = min(ncounter, iovlen); + i++; + ncounter -= iovlen; + (to_fill)++; + } while (ncounter > 0); + } + assert(to_fill <= MAXLEN_MBUF_CHAIN); + SCTPDBG(SCTP_DEBUG_INPUT1, "%s: Received %d bytes.", __func__, n); + SCTPDBG(SCTP_DEBUG_INPUT1, " - calling sctp_input with off=%d\n", (int)sizeof(struct ip)); + + /* process incoming data */ + /* sctp_input frees this mbuf. */ + sctp_input_with_port(recvmbuf[0], sizeof(struct ip), 0); + } + return NULL; +} + + +/* need ref to this for destroy... */ +struct mbuf **udprecvmbuf; + +static void * +recv_function_udp(void *arg) +{ + struct iovec iov[MAXLEN_MBUF_CHAIN]; + /*Initially the entire set of mbufs is to be allocated. + to_fill indicates this amount. */ + int to_fill = MAXLEN_MBUF_CHAIN; + /* iovlen is the size of each mbuf in the chain */ + int i, n, ncounter; + int iovlen = MCLBYTES; + int want_ext = (iovlen > MLEN)? 1 : 0; + int want_header = 0; + struct ip *ip; + struct mbuf *ip_m; + struct msghdr msg; + struct sockaddr_in src, dst; + char cmsgbuf[DSTADDR_DATASIZE]; + struct cmsghdr *cmsgptr; + + udprecvmbuf = malloc(sizeof(struct mbuf *) * MAXLEN_MBUF_CHAIN); + /* why can't I compile with this? */ +#if 0 + pthread_cleanup_push(recv_thread_destroy_udp, NULL); +#endif + + while (1) { + for (i = 0; i < to_fill; i++) { + /* Not getting the packet header. Tests with chain of one run + as usual without having the packet header. + Have tried both sending and receiving + */ + udprecvmbuf[i] = sctp_get_mbuf_for_msg(iovlen, want_header, M_DONTWAIT, want_ext, MT_DATA); + iov[i].iov_base = (caddr_t)udprecvmbuf[i]->m_data; + iov[i].iov_len = iovlen; + } + to_fill = 0; + bzero((void *)&msg, sizeof(struct msghdr)); + bzero((void *)&src, sizeof(struct sockaddr_in)); + bzero((void *)&dst, sizeof(struct sockaddr_in)); + bzero((void *)cmsgbuf, DSTADDR_DATASIZE); + + msg.msg_name = (void *)&src; + msg.msg_namelen = sizeof(struct sockaddr_in); + msg.msg_iov = iov; + msg.msg_iovlen = MAXLEN_MBUF_CHAIN; + msg.msg_control = (void *)cmsgbuf; + msg.msg_controllen = DSTADDR_DATASIZE; + msg.msg_flags = 0; + + ncounter = n = recvmsg(userspace_udpsctp, &msg, 0); + + assert (n <= (MAXLEN_MBUF_CHAIN * iovlen)); + SCTP_HEADER_LEN(udprecvmbuf[0]) = n; /* length of total packet */ + + if (n <= iovlen) { + SCTP_BUF_LEN(udprecvmbuf[0]) = n; + (to_fill)++; + } else { + printf("%s: n=%d > iovlen=%d\n", __func__, n, iovlen); + i = 0; + SCTP_BUF_LEN(udprecvmbuf[0]) = iovlen; + + ncounter -= iovlen; + (to_fill)++; + do { + udprecvmbuf[i]->m_next = udprecvmbuf[i+1]; + SCTP_BUF_LEN(udprecvmbuf[i]->m_next) = min(ncounter, iovlen); + i++; + ncounter -= iovlen; + (to_fill)++; + } while (ncounter > 0); + } + assert(to_fill <= MAXLEN_MBUF_CHAIN); + + for (cmsgptr = CMSG_FIRSTHDR(&msg); cmsgptr != NULL; cmsgptr = CMSG_NXTHDR(&msg, cmsgptr)) { + if ((cmsgptr->cmsg_level == IPPROTO_IP) && (cmsgptr->cmsg_type == DSTADDR_SOCKOPT)) { + dst.sin_family = AF_INET; +#ifdef HAVE_SIN_LEN + dst.sin_len = sizeof(struct sockaddr_in); +#endif + dst.sin_port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); + memcpy((void *)&dst.sin_addr, (const void *) dstaddr(cmsgptr), sizeof(struct in_addr)); + } + } + + ip_m = sctp_get_mbuf_for_msg(sizeof(struct ip), 1, M_DONTWAIT, 1, MT_DATA); + + ip = mtod(ip_m, struct ip *); + bzero((void *)ip, sizeof(struct ip)); + ip->ip_v = IPVERSION; + ip->ip_p = IPPROTO_UDP; /* tells me over UDP */ + ip->ip_len = n; + ip->ip_src = src.sin_addr; + ip->ip_dst = dst.sin_addr; + SCTP_HEADER_LEN(ip_m) = sizeof(struct ip) + n; + SCTP_BUF_LEN(ip_m) = sizeof(struct ip); + SCTP_BUF_NEXT(ip_m) = udprecvmbuf[0]; + + SCTPDBG(SCTP_DEBUG_INPUT1, "%s: Received %d bytes.", __func__, n); + SCTPDBG(SCTP_DEBUG_INPUT1, " - calling sctp_input with off=%d\n", (int)sizeof(struct ip)); + + /* process incoming data */ + /* sctp_input frees this mbuf. */ + sctp_input_with_port(ip_m, sizeof(struct ip), src.sin_port); + } + return NULL; +} + +#if 0 +static int +getReceiveBufferSize(int sfd) +{ + int actualbufsize; + socklen_t intlen = sizeof(int); + + if (getsockopt(sfd, SOL_SOCKET, SO_RCVBUF, &actualbufsize, (socklen_t *)&intlen) < 0) { + perror("setsockopt: rcvbuf"); + exit(1); + } else { + fprintf(stdout,"Receive buffer size: %d.\n", actualbufsize); + } + return 0; +} + +static int +getSendBufferSize(int sfd) +{ + int actualbufsize; + socklen_t intlen = sizeof(int); + + if (getsockopt(sfd, SOL_SOCKET, SO_SNDBUF, &actualbufsize, (socklen_t *)&intlen) < 0) { + perror("setsockopt: sendbuf"); + exit(1); + } else { + fprintf(stdout,"Send buffer size: %d.\n", actualbufsize); + } + return 0; +} +#endif + +static int +setReceiveBufferSize(int sfd, int new_size) +{ + int ch = new_size; + if (setsockopt (sfd, SOL_SOCKET, SO_RCVBUF, (void*)&ch, sizeof(ch)) < 0) { + perror("setReceiveBufferSize setsockopt: SO_RCVBUF failed !\n"); + exit(1); + } + /*printf("setReceiveBufferSize set receive buffer size to : %d bytes\n",ch);*/ + return 0; +} + +static int +setSendBufferSize(int sfd, int new_size) +{ + int ch = new_size; + if (setsockopt (sfd, SOL_SOCKET, SO_SNDBUF, (void*)&ch, sizeof(ch)) < 0) { + perror("setSendBufferSize setsockopt: SO_RCVBUF failed !\n"); + exit(1); + } + /*printf("setSendBufferSize set send buffer size to : %d bytes\n",ch);*/ + return 0; +} + +void +recv_thread_init() +{ + pthread_t recvthreadraw , recvthreadudp; + int rc; + const int hdrincl = 1; + const int on = 1; + struct sockaddr_in addr_ipv4; + + /* use raw socket, create if not initialized */ + if (userspace_rawsctp == -1) { + if ((userspace_rawsctp = socket(AF_INET, SOCK_RAW, IPPROTO_SCTP)) < 0) { + perror("raw socket failure. continue with only UDP socket...\n"); + } else { + /* complete setting up the raw SCTP socket */ + if (setsockopt(userspace_rawsctp, IPPROTO_IP, IP_HDRINCL, &hdrincl, sizeof(int)) < 0) { + perror("raw setsockopt failure\n"); + exit(1); + } + setReceiveBufferSize(userspace_rawsctp, SB_RAW); /* 128K */ + setSendBufferSize(userspace_rawsctp, SB_RAW); /* 128K Is this setting net.inet.raw.maxdgram value? Should it be set to 64K? */ + } + } + + /* use UDP socket, create if not initialized */ + if (userspace_udpsctp == -1) { + if ((userspace_udpsctp = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) { + perror("UDP socket failure"); + exit(1); + } + if (setsockopt(userspace_udpsctp, IPPROTO_IP, DSTADDR_SOCKOPT, (const void *)&on, (int)sizeof(int)) < 0) { + perror("setsockopt: DSTADDR_SOCKOPT"); + exit(1); + } + memset((void *)&addr_ipv4, 0, sizeof(struct sockaddr_in)); +#ifdef HAVE_SIN_LEN + addr_ipv4.sin_len = sizeof(struct sockaddr_in); +#endif + addr_ipv4.sin_family = AF_INET; + addr_ipv4.sin_port = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); + addr_ipv4.sin_addr.s_addr = htonl(INADDR_ANY); + if (bind(userspace_udpsctp, (const struct sockaddr *)&addr_ipv4, sizeof(struct sockaddr_in)) < 0) { + perror("bind"); + exit(1); + } + if (userspace_rawsctp == -1) { + SCTP_BASE_SYSCTL(sctp_udp_tunneling_for_client_enable) = 1; + } + setReceiveBufferSize(userspace_udpsctp, SB_RAW); /* 128K */ + setSendBufferSize(userspace_udpsctp, SB_RAW); /* 128K Is this setting net.inet.raw.maxdgram value? Should it be set to 64K? */ + } + + /* start threads here for receiving incoming messages */ + if (userspace_rawsctp != -1) { + if ((rc = pthread_create(&recvthreadraw, NULL, &recv_function_raw, (void *)NULL))) { + printf("ERROR; return code from recvthread pthread_create() is %d\n", rc); + exit(1); + } + } + if (userspace_udpsctp != -1) { + if ((rc = pthread_create(&recvthreadudp, NULL, &recv_function_udp, (void *)NULL))) { + printf("ERROR; return code from recvthread pthread_create() is %d\n", rc); + exit(1); + } + } +} + + +void +recv_thread_destroy_raw(void *parm) { + + int i; + + /* close sockets if they are open */ + if (userspace_route != -1) + close(userspace_route); + if (userspace_rawsctp != -1) + close(userspace_rawsctp); + + /* + * call m_free on contents of recvmbuf array + */ + for(i=0; i < MAXLEN_MBUF_CHAIN; i++) { + m_free(recvmbuf[i]); + } + + /* free the array itself */ + free(recvmbuf); + + +} + +void +recv_thread_destroy_udp(void *parm) { + + int i; + + /* socket closed in + void sctp_over_udp_stop(void) + */ + + /* + * call m_free on contents of udprecvmbuf array + */ + for(i=0; i < MAXLEN_MBUF_CHAIN; i++) { + m_free(udprecvmbuf[i]); + } + + /* free the array itself */ + free(udprecvmbuf); + +} diff --git a/usrsctplib/user_recv_thread.h b/usrsctplib/user_recv_thread.h new file mode 100755 index 00000000..08aceeb3 --- /dev/null +++ b/usrsctplib/user_recv_thread.h @@ -0,0 +1,17 @@ +/* __Userspace__ user_recv_thread.h header file for creating recv thread */ + + + +/* raw socket to be used for both incoming and outgoing packets */ +extern int userspace_rawsctp; /* needs to be declared = -1 */ + +/* udp socket to be used for both incoming and outgoing packets */ +extern int userspace_udpsctp; /* needs to be declared = -1 */ + +/* routing socket used for route lookups */ +extern int userspace_route; + +void recv_thread_init(); + +#define RECV_THREAD_INIT recv_thread_init + diff --git a/usrsctplib/user_resourcevar.h b/usrsctplib/user_resourcevar.h new file mode 100755 index 00000000..b80d628b --- /dev/null +++ b/usrsctplib/user_resourcevar.h @@ -0,0 +1,8 @@ +/* __Userspace__ +In sctp_os_userspace.h +#define SCTP_PROCESS_STRUCT struct proc * + +struct proc moved to user_socketvar,h. This file should be removed +after all references to user_resourcevar.h have been removed. +*/ + diff --git a/usrsctplib/user_route.h b/usrsctplib/user_route.h new file mode 100755 index 00000000..45bbee87 --- /dev/null +++ b/usrsctplib/user_route.h @@ -0,0 +1,366 @@ +/*- + * Copyright (c) 1980, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)route.h 8.4 (Berkeley) 1/9/95 + * $FreeBSD: src/sys/net/route.h,v 1.65 2006/03/15 19:39:09 andre Exp $ + */ + +#ifndef _NET_ROUTE_H_ +#define _NET_ROUTE_H_ + +/* + * Kernel resident routing tables. + * + * The routing tables are initialized when interface addresses + * are set by making entries for all directly connected interfaces. + */ + +/* + * A route consists of a destination address and a reference + * to a routing entry. These are often held by protocols + * in their control blocks, e.g. inpcb. + */ +struct route { + struct rtentry *ro_rt; + struct sockaddr ro_dst; +}; + +/* + * These numbers are used by reliable protocols for determining + * retransmission behavior and are included in the routing structure. + */ +struct rt_metrics_lite { + u_long rmx_mtu; /* MTU for this path */ + u_long rmx_expire; /* lifetime for route, e.g. redirect */ + u_long rmx_pksent; /* packets sent using this route */ +}; + +struct rt_metrics { + u_long rmx_locks; /* Kernel must leave these values alone */ + u_long rmx_mtu; /* MTU for this path */ + u_long rmx_hopcount; /* max hops expected */ + u_long rmx_expire; /* lifetime for route, e.g. redirect */ + u_long rmx_recvpipe; /* inbound delay-bandwidth product */ + u_long rmx_sendpipe; /* outbound delay-bandwidth product */ + u_long rmx_ssthresh; /* outbound gateway buffer limit */ + u_long rmx_rtt; /* estimated round trip time */ + u_long rmx_rttvar; /* estimated rtt variance */ + u_long rmx_pksent; /* packets sent using this route */ + u_long rmx_filler[4]; /* will be used for T/TCP later */ +}; + +/* + * rmx_rtt and rmx_rttvar are stored as microseconds; + * RTTTOPRHZ(rtt) converts to a value suitable for use + * by a protocol slowtimo counter. + */ +#define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ +#define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) + +/* + * XXX kernel function pointer `rt_output' is visible to applications. + */ +struct mbuf; + +/* + * We distinguish between routes to hosts and routes to networks, + * preferring the former if available. For each route we infer + * the interface to use from the gateway address supplied when + * the route was entered. Routes that forward packets through + * gateways are marked so that the output routines know to address the + * gateway rather than the ultimate destination. + */ +#ifndef RNF_NORMAL +#include <user_radix.h> /* was <net/radix.h> */ +#endif +struct rtentry { + struct radix_node rt_nodes[2]; /* tree glue, and other values */ + /* + * XXX struct rtentry must begin with a struct radix_node (or two!) + * because the code does some casts of a 'struct radix_node *' + * to a 'struct rtentry *' + */ +#define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) +#define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) + struct sockaddr *rt_gateway; /* value */ + u_long rt_flags; /* up/down?, host/net */ + struct ifnet *rt_ifp; /* the answer: interface to use */ + struct ifaddr *rt_ifa; /* the answer: interface address to use */ + struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ + long rt_refcnt; /* # held references */ + struct sockaddr *rt_genmask; /* for generation of cloned routes */ + caddr_t rt_llinfo; /* pointer to link level info cache */ + struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ + struct rtentry *rt_parent; /* cloning parent of this route */ + /* need for __Userspace__ (?) */ + /* #ifdef _KERNEL */ + /* XXX ugly, user apps use this definition but don't have a mtx def */ + struct mtx rt_mtx; /* mutex for routing entry */ + /* #endif */ +}; + +/* + * Following structure necessary for 4.3 compatibility; + * We should eventually move it to a compat file. + */ +struct ortentry { + u_long rt_hash; /* to speed lookups */ + struct sockaddr rt_dst; /* key */ + struct sockaddr rt_gateway; /* value */ + short rt_flags; /* up/down?, host/net */ + short rt_refcnt; /* # held references */ + u_long rt_use; /* raw # packets forwarded */ + struct ifnet *rt_ifp; /* the answer: interface to use */ +}; + +#define rt_use rt_rmx.rmx_pksent + +#define RTF_UP 0x1 /* route usable */ +#define RTF_GATEWAY 0x2 /* destination is a gateway */ +#define RTF_HOST 0x4 /* host entry (net otherwise) */ +#define RTF_REJECT 0x8 /* host or net unreachable */ +#define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ +#define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ +#define RTF_DONE 0x40 /* message confirmed */ +/* 0x80 unused, was RTF_DELCLONE */ +#define RTF_CLONING 0x100 /* generate new routes on use */ +#define RTF_XRESOLVE 0x200 /* external daemon resolves name */ +#define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ +#define RTF_STATIC 0x800 /* manually added */ +#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ +#define RTF_PROTO2 0x4000 /* protocol specific routing flag */ +#define RTF_PROTO1 0x8000 /* protocol specific routing flag */ + +/* XXX: temporary to stay API/ABI compatible with userland */ +#ifndef _KERNEL +#define RTF_PRCLONING 0x10000 /* unused, for compatibility */ +#endif + +#define RTF_WASCLONED 0x20000 /* route generated through cloning */ +#define RTF_PROTO3 0x40000 /* protocol specific routing flag */ +/* 0x80000 unused */ +#define RTF_PINNED 0x100000 /* future use */ +#define RTF_LOCAL 0x200000 /* route represents a local address */ +#define RTF_BROADCAST 0x400000 /* route represents a bcast address */ +#define RTF_MULTICAST 0x800000 /* route represents a mcast address */ + /* 0x1000000 and up unassigned */ + +/* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ +#define RTF_FMASK \ + (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ + RTF_REJECT | RTF_STATIC) + +/* + * Routing statistics. + */ +struct rtstat { + short rts_badredirect; /* bogus redirect calls */ + short rts_dynamic; /* routes created by redirects */ + short rts_newgateway; /* routes modified by redirects */ + short rts_unreach; /* lookups which failed */ + short rts_wildcard; /* lookups satisfied by a wildcard */ +}; +/* + * Structures for routing messages. + */ +struct rt_msghdr { + u_short rtm_msglen; /* to skip over non-understood messages */ + u_char rtm_version; /* future binary compatibility */ + u_char rtm_type; /* message type */ + u_short rtm_index; /* index for associated ifp */ + int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ + int rtm_addrs; /* bitmask identifying sockaddrs in msg */ + pid_t rtm_pid; /* identify sender */ + int rtm_seq; /* for sender to identify action */ + int rtm_errno; /* why failed */ + int rtm_fmask; /* bitmask used in RTM_CHANGE message */ +#define rtm_use rtm_fmask /* deprecated, use rtm_rmx->rmx_pksent */ + u_long rtm_inits; /* which metrics we are initializing */ + struct rt_metrics rtm_rmx; /* metrics themselves */ +}; + +#define RTM_VERSION 5 /* Up the ante and ignore older versions */ + +/* + * Message types. + */ +#define RTM_ADD 0x1 /* Add Route */ +#define RTM_DELETE 0x2 /* Delete Route */ +#define RTM_CHANGE 0x3 /* Change Metrics or flags */ +#define RTM_GET 0x4 /* Report Metrics */ +#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ +#define RTM_REDIRECT 0x6 /* Told to use different route */ +#define RTM_MISS 0x7 /* Lookup failed on this address */ +#define RTM_LOCK 0x8 /* fix specified metrics */ +#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ +#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ +#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ +#define RTM_NEWADDR 0xc /* address being added to iface */ +#define RTM_DELADDR 0xd /* address being removed from iface */ +#define RTM_IFINFO 0xe /* iface going up/down etc. */ +#define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ +#define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ +#define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ +#define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */ + +/* + * Bitmask values for rtm_inits and rmx_locks. + */ +#define RTV_MTU 0x1 /* init or lock _mtu */ +#define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ +#define RTV_EXPIRE 0x4 /* init or lock _expire */ +#define RTV_RPIPE 0x8 /* init or lock _recvpipe */ +#define RTV_SPIPE 0x10 /* init or lock _sendpipe */ +#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ +#define RTV_RTT 0x40 /* init or lock _rtt */ +#define RTV_RTTVAR 0x80 /* init or lock _rttvar */ + +/* + * Bitmask values for rtm_addrs. + */ +#define RTA_DST 0x1 /* destination sockaddr present */ +#define RTA_GATEWAY 0x2 /* gateway sockaddr present */ +#define RTA_NETMASK 0x4 /* netmask sockaddr present */ +#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ +#define RTA_IFP 0x10 /* interface name sockaddr present */ +#define RTA_IFA 0x20 /* interface addr sockaddr present */ +#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ +#define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ + +/* + * Index offsets for sockaddr array for alternate internal encoding. + */ +#define RTAX_DST 0 /* destination sockaddr present */ +#define RTAX_GATEWAY 1 /* gateway sockaddr present */ +#define RTAX_NETMASK 2 /* netmask sockaddr present */ +#define RTAX_GENMASK 3 /* cloning mask sockaddr present */ +#define RTAX_IFP 4 /* interface name sockaddr present */ +#define RTAX_IFA 5 /* interface addr sockaddr present */ +#define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ +#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ +#define RTAX_MAX 8 /* size of array to allocate */ + +struct rt_addrinfo { + int rti_addrs; + struct sockaddr *rti_info[RTAX_MAX]; + int rti_flags; + struct ifaddr *rti_ifa; + struct ifnet *rti_ifp; +}; + +/* + * This macro returns the size of a struct sockaddr when passed + * through a routing socket. Basically we round up sa_len to + * a multiple of sizeof(long), with a minimum of sizeof(long). + * The check for a NULL pointer is just a convenience, probably never used. + * The case sa_len == 0 should only apply to empty structures. + */ +#define SA_SIZE(sa) \ + ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \ + sizeof(long) : \ + 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) + +/* need for __Userspace__ */ +/* #ifdef _KERNEL */ + +#define RT_LOCK_INIT(_rt) \ + mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) +#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) +#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) +#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) +#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) + +#define RT_ADDREF(_rt) do { \ + RT_LOCK_ASSERT(_rt); \ + KASSERT((_rt)->rt_refcnt >= 0, \ + ("negative refcnt %ld", (_rt)->rt_refcnt)); \ + (_rt)->rt_refcnt++; \ +} while (0) +#define RT_REMREF(_rt) do { \ + RT_LOCK_ASSERT(_rt); \ + KASSERT((_rt)->rt_refcnt > 0, \ + ("bogus refcnt %ld", (_rt)->rt_refcnt)); \ + (_rt)->rt_refcnt--; \ +} while (0) + +#define RTFREE_LOCKED(_rt) do { \ + if ((_rt)->rt_refcnt <= 1) \ + rtfree(_rt); \ + else { \ + RT_REMREF(_rt); \ + RT_UNLOCK(_rt); \ + } \ + /* guard against invalid refs */ \ + _rt = 0; \ + } while (0) +#define RTFREE(_rt) do { \ + RT_LOCK(_rt); \ + RTFREE_LOCKED(_rt); \ + } while (0) + +extern struct radix_node_head *rt_tables[AF_MAX+1]; + +struct ifmultiaddr; + +int rt_getifa(struct rt_addrinfo *); +void rt_ieee80211msg(struct ifnet *, int, void *, size_t); +void rt_ifannouncemsg(struct ifnet *, int); +void rt_ifmsg(struct ifnet *); +void rt_missmsg(int, struct rt_addrinfo *, int, int); +void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); +void rt_newmaddrmsg(int, struct ifmultiaddr *); +int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); + +/* + * Note the following locking behavior: + * + * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked + * + * rtalloc1() returns a locked rtentry + * + * rtfree() and RTFREE_LOCKED() require a locked rtentry + * + * RTFREE() uses an unlocked entry. + */ + +void rtalloc_ign(struct route *ro, u_long ignflags); +void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ +struct rtentry *rtalloc1(struct sockaddr *, int, u_long); +int rtexpunge(struct rtentry *); +void rtfree(struct rtentry *); +int rtinit(struct ifaddr *, int, int); +int rtioctl(u_long, caddr_t); +void rtredirect(struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct sockaddr *); +int rtrequest(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **); +int rtrequest1(int, struct rt_addrinfo *, struct rtentry **); +int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); +/* #endif */ + +#endif diff --git a/usrsctplib/user_sctp_callout.c b/usrsctplib/user_sctp_callout.c new file mode 100755 index 00000000..e17dd77a --- /dev/null +++ b/usrsctplib/user_sctp_callout.c @@ -0,0 +1,78 @@ +/* __Userspace__ version of sctp_callout.c file */ + + +#include <netinet/sctp_os.h> +#include "user_sctp_callout.h" +#include <netinet/sctp_pcb.h> + +static int onetime_timer_initialization = 0; + + +void +sctp_os_timer_init(sctp_os_timer_t *c) +{ + bzero(c, sizeof(*c)); +} + +void +sctp_os_timer_start(sctp_os_timer_t *c, int to_ticks, void (*ftn) (void *), + void *arg) +{ + /* if timer_init() not called previously, then call it */ + if (!onetime_timer_initialization) + { + onetime_timer_initialization = 1; + timer_init(); + } + + /* paranoia */ + if ((c == NULL) || (ftn == NULL)) + return; + + SCTP_TIMERQ_LOCK(); + /* check to see if we're rescheduling a timer */ + if (c->c_flags & SCTP_CALLOUT_PENDING) { + TAILQ_REMOVE(&SCTP_BASE_INFO(callqueue), c, tqe); + /* + * part of the normal "stop a pending callout" process + * is to clear the CALLOUT_ACTIVE and CALLOUT_PENDING + * flags. We don't bother since we are setting these + * below and we still hold the lock. + */ + } + + /* + * We could unlock here and lock at the TAILQ_INSERT_TAIL, + * but there's no point since doing this setup doesn't take much time. + */ + if (to_ticks <= 0) + to_ticks = 1; + + c->c_arg = arg; + c->c_flags = (SCTP_CALLOUT_ACTIVE | SCTP_CALLOUT_PENDING); + c->c_func = ftn; + c->c_time = uticks + to_ticks; + TAILQ_INSERT_TAIL(&SCTP_BASE_INFO(callqueue), c, tqe); + SCTP_TIMERQ_UNLOCK(); + +} + +int +sctp_os_timer_stop(sctp_os_timer_t *c) +{ + + SCTP_TIMERQ_LOCK(); + /* + * Don't attempt to delete a callout that's not on the queue. + */ + if (!(c->c_flags & SCTP_CALLOUT_PENDING)) { + c->c_flags &= ~SCTP_CALLOUT_ACTIVE; + SCTP_TIMERQ_UNLOCK(); + return (0); + } + c->c_flags &= ~(SCTP_CALLOUT_ACTIVE | SCTP_CALLOUT_PENDING); + + TAILQ_REMOVE(&SCTP_BASE_INFO(callqueue), c, tqe); + SCTP_TIMERQ_UNLOCK(); + return (1); +} diff --git a/usrsctplib/user_sctp_callout.h b/usrsctplib/user_sctp_callout.h new file mode 100755 index 00000000..c901eaf0 --- /dev/null +++ b/usrsctplib/user_sctp_callout.h @@ -0,0 +1,65 @@ +/* __Userspace__ version of sctp_callout.h file */ +#include <sys/queue.h> +#include <stdlib.h> +#include <sys/types.h> + + +#ifndef __USER_SCTP_CALLOUT__ +#define __USER_SCTP_CALLOUT__ + + +/* + * __Userspace__ + * NOTE: the following MACROS are required for locking the callout + * queue along with a lock/mutex in the OS specific headers and + * implementation files:: + * - SCTP_TIMERQ_LOCK() + * - SCTP_TIMERQ_UNLOCK() + * - SCTP_TIMERQ_LOCK_INIT() + * - SCTP_TIMERQ_LOCK_DESTROY() + * + * SCTP_TIMERQ_LOCK protects: + * - sctppcbinfo.callqueue + */ + + +#define SCTP_TIMERQ_LOCK() (void)pthread_mutex_lock(&timer_mtx) +#define SCTP_TIMERQ_UNLOCK() (void)pthread_mutex_unlock(&timer_mtx) +#define SCTP_TIMERQ_LOCK_INIT() (void)pthread_mutex_init(&timer_mtx, NULL) +#define SCTP_TIMERQ_LOCK_DESTROY() (void)pthread_mutex_destroy(&timer_mtx) + +#define _USER_SCTP_NEEDS_CALLOUT_ 1 + +extern int uticks; +extern void timer_init(); +extern pthread_mutex_t timer_mtx; + +TAILQ_HEAD(calloutlist, sctp_callout); + +struct sctp_callout { + TAILQ_ENTRY(sctp_callout) tqe; + int c_time; /* ticks to the event */ + void *c_arg; /* function argument */ + void (*c_func)(void *); /* function to call */ + int c_flags; /* state of this entry */ +}; +typedef struct sctp_callout sctp_os_timer_t; + +#define SCTP_CALLOUT_ACTIVE 0x0002 /* callout is currently active */ +#define SCTP_CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ + +void sctp_os_timer_init(sctp_os_timer_t *tmr); +void sctp_os_timer_start(sctp_os_timer_t *, int, void (*)(void *), void *); +int sctp_os_timer_stop(sctp_os_timer_t *); + +#define SCTP_OS_TIMER_INIT sctp_os_timer_init +#define SCTP_OS_TIMER_START sctp_os_timer_start +#define SCTP_OS_TIMER_STOP sctp_os_timer_stop +/* MT FIXME: Is the following correct? */ +#define SCTP_OS_TIMER_STOP_DRAIN SCTP_OS_TIMER_STOP +#define SCTP_OS_TIMER_PENDING(tmr) ((tmr)->c_flags & SCTP_CALLOUT_PENDING) +#define SCTP_OS_TIMER_ACTIVE(tmr) ((tmr)->c_flags & SCTP_CALLOUT_ACTIVE) +#define SCTP_OS_TIMER_DEACTIVATE(tmr) ((tmr)->c_flags &= ~SCTP_CALLOUT_ACTIVE) + + +#endif diff --git a/usrsctplib/user_sctp_timer_iterate.c b/usrsctplib/user_sctp_timer_iterate.c new file mode 100755 index 00000000..e25edcbe --- /dev/null +++ b/usrsctplib/user_sctp_timer_iterate.c @@ -0,0 +1,111 @@ +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <pthread.h> +#include <errno.h> +#include <netinet/sctp_pcb.h> +#include <netinet/sctp_sysctl.h> +#include "user_sctp_callout.h" + +#define FD_SIZE 1 +/* This is the polling time of callqueue in milliseconds + * 10ms seems to work well. 1ms was giving erratic behavior + */ +#define TIMEOUT_INTERVAL 10 + +void *user_sctp_timer_iterate(void * threadname); + +void * (*timerFunction)(void *) = {&user_sctp_timer_iterate}; + +int uticks=0; /* does the value in uticks overflow after some time has elapsed? */ +pthread_mutex_t timer_mtx; + +#if defined(__Userspace_os_Darwin) +/* This isn't defined on Darwin Kernel Version 8.11.1, so use FreeBSD def */ +typedef long __suseconds_t; +#endif + +void timer_init(){ + + + pthread_t ithread; + int rc; + char* tn={"iterator"}; + + /* No need to do SCTP_TIMERQ_LOCK_INIT(); here, it is being done in sctp_pcb_init() */ + + /* start one thread here */ + + + rc = pthread_create(&ithread, NULL, timerFunction, (void *)tn); + if (rc){ + printf("ERROR; return code from pthread_create() is %d\n", rc); + exit(1); + + } + +} + + +void *user_sctp_timer_iterate(void * threadname) +{ + sctp_os_timer_t *c; + void (*c_func)(void *); + void *c_arg; + sctp_os_timer_t *sctp_os_timer_next = NULL; + /* + * The MSEC_TO_TICKS conversion depends on hz. The to_ticks in + * sctp_os_timer_start also depends on hz. E.g. if hz=1000 then + * for multiple INIT the to_ticks is 2000, 4000, 8000, 16000, 32000, 60000 + * and further to_ticks level off at 60000 i.e. 60 seconds. + * If hz=100 then for multiple INIT the to_ticks are 200, 400, 800 and so-on. + */ + int time_to_ticks = MSEC_TO_TICKS(TIMEOUT_INTERVAL); + __suseconds_t timeout_interval = TIMEOUT_INTERVAL * 1000; /* in microseconds */ + + struct timeval timeout; + struct timeval *timeout_ptr; + fd_set read_fds; + int fd = 23; /* what should this value be? */ + FD_ZERO(&read_fds); + FD_SET(fd, &read_fds); + + while(1) { + + timeout.tv_sec = 0; + timeout.tv_usec = timeout_interval; + timeout_ptr = &timeout; + + select(FD_SIZE, &read_fds, NULL, NULL, timeout_ptr); + + + /* update our tick count */ + uticks += time_to_ticks; + SCTP_TIMERQ_LOCK(); + c = TAILQ_FIRST(&SCTP_BASE_INFO(callqueue)); + while (c) { + if (c->c_time <= uticks) { + sctp_os_timer_next = TAILQ_NEXT(c, tqe); + TAILQ_REMOVE(&SCTP_BASE_INFO(callqueue), c, tqe); + c_func = c->c_func; + c_arg = c->c_arg; + c->c_flags &= ~SCTP_CALLOUT_PENDING; + SCTP_TIMERQ_UNLOCK(); + c_func(c_arg); + SCTP_TIMERQ_LOCK(); + c = sctp_os_timer_next; + } else { + c = TAILQ_NEXT(c, tqe); + } + } + + SCTP_TIMERQ_UNLOCK(); + } + return NULL; +} + + diff --git a/usrsctplib/user_socket.c b/usrsctplib/user_socket.c new file mode 100755 index 00000000..f2629f26 --- /dev/null +++ b/usrsctplib/user_socket.c @@ -0,0 +1,2527 @@ +#include <netinet/sctp_os.h> +#include <netinet/sctp_pcb.h> +#include <netinet/sctputil.h> +#if defined(__Userspace_os_Linux) +#define __FAVOR_BSD /* (on Ubuntu at least) enables UDP header field names like BSD in RFC 768 */ +#endif +#include <netinet/udp.h> + +/* Statically initializing accept_mtx and accept_cond since there is no call for ACCEPT_LOCK_INIT() */ +pthread_mutex_t accept_mtx = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t accept_cond = PTHREAD_COND_INITIALIZER; + +/* Prototypes */ +extern int sctp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, + /* proc is a dummy in __Userspace__ and will not be passed to sctp_lower_sosend */ struct proc *p); + +extern int sctp_attach(struct socket *so, int proto, uint32_t vrf_id); + + + +/* Taken from usr/src/sys/kern/uipc_sockbuf.c and modified for __Userspace__*/ +/* + * Socantsendmore indicates that no more data will be sent on the socket; it + * would normally be applied to a socket when the user informs the system + * that no more data is to be sent, by the protocol code (in case + * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be + * received, and will normally be applied to the socket by a protocol when it + * detects that the peer will send no more data. Data queued for reading in + * the socket may yet be read. + */ + +void socantrcvmore_locked(struct socket *so) +{ + SOCKBUF_LOCK_ASSERT(&so->so_rcv); + + so->so_rcv.sb_state |= SBS_CANTRCVMORE; + sorwakeup_locked(so); + +} + +void socantrcvmore(struct socket *so) +{ + SOCKBUF_LOCK(&so->so_rcv); + socantrcvmore_locked(so); + +} + +void +socantsendmore_locked(struct socket *so) +{ + + SOCKBUF_LOCK_ASSERT(&so->so_snd); + + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); + +} + +void +socantsendmore(struct socket *so) +{ + + SOCKBUF_LOCK(&so->so_snd); + socantsendmore_locked(so); + +} + + + +/* Taken from usr/src/sys/kern/uipc_sockbuf.c and called within sctp_lower_sosend. + */ +int +sbwait(struct sockbuf *sb) +{ +#if defined(__Userspace__) /* __Userspace__ */ + + SOCKBUF_LOCK_ASSERT(sb); + + sb->sb_flags |= SB_WAIT; + return (pthread_cond_wait(&(sb->sb_cond), &(sb->sb_mtx))); + +#else + SOCKBUF_LOCK_ASSERT(sb); + + sb->sb_flags |= SB_WAIT; + return (msleep(&sb->sb_cc, &sb->sb_mtx, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", + sb->sb_timeo)); +#endif +} + + + + +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + */ +static struct socket * +soalloc(void) +{ +#if defined(__Userspace__) + struct socket *so; + + /* + * soalloc() sets of socket layer state for a socket, + * called only by socreate() and sonewconn(). + * + * sodealloc() tears down socket layer state for a socket, + * called only by sofree() and sonewconn(). + * __Userspace__ TODO : Make sure so is properly deallocated + * when tearing down the connection. + */ + so = malloc(sizeof(struct socket)); + if (so == NULL) + return (NULL); + bzero(so, sizeof(struct socket)); + + /* __Userspace__ Initializing the socket locks here */ + SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); + SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); + SOCKBUF_COND_INIT(&so->so_snd); + SOCKBUF_COND_INIT(&so->so_rcv); + SOCK_COND_INIT(so); /* timeo_cond */ + /* __Userspace__ Any ref counting required here? Will we have any use for aiojobq? + What about gencnt and numopensockets?*/ + TAILQ_INIT(&so->so_aiojobq); + return (so); + +#else + /* Putting the kernel version for reference. The #else + should be removed once the __Userspace__ + version is tested. + */ + struct socket *so; + + so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); + if (so == NULL) + return (NULL); +#ifdef MAC + if (mac_init_socket(so, M_NOWAIT) != 0) { + uma_zfree(socket_zone, so); + return (NULL); + } +#endif + SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); + SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); + sx_init(&so->so_snd.sb_sx, "so_snd_sx"); + sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); + TAILQ_INIT(&so->so_aiojobq); + mtx_lock(&so_global_mtx); + so->so_gencnt = ++so_gencnt; + ++numopensockets; + mtx_unlock(&so_global_mtx); + return (so); +#endif +} + +#if defined(__Userspace__) +/* + * Free the storage associated with a socket at the socket layer. + */ +static void +sodealloc(struct socket *so) +{ + + assert(so->so_count == 0); + assert(so->so_pcb == NULL); + + SOCKBUF_LOCK_DESTROY(&so->so_snd); + SOCKBUF_LOCK_DESTROY(&so->so_rcv); + + SOCKBUF_COND_DESTROY(&so->so_snd); + SOCKBUF_COND_DESTROY(&so->so_rcv); + + SOCK_COND_DESTROY(so); + + free(so); +} + +#else /* kernel version for reference. */ +/* + * Free the storage associated with a socket at the socket layer, tear down + * locks, labels, etc. All protocol state is assumed already to have been + * torn down (and possibly never set up) by the caller. + */ +static void +sodealloc(struct socket *so) +{ + + KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); + KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); + + mtx_lock(&so_global_mtx); + so->so_gencnt = ++so_gencnt; + --numopensockets; /* Could be below, but faster here. */ + mtx_unlock(&so_global_mtx); + if (so->so_rcv.sb_hiwat) + (void)chgsbsize(so->so_cred->cr_uidinfo, + &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); + if (so->so_snd.sb_hiwat) + (void)chgsbsize(so->so_cred->cr_uidinfo, + &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); +#ifdef INET + /* remove acccept filter if one is present. */ + if (so->so_accf != NULL) + do_setopt_accept_filter(so, NULL); +#endif +#ifdef MAC + mac_destroy_socket(so); +#endif + crfree(so->so_cred); + sx_destroy(&so->so_snd.sb_sx); + sx_destroy(&so->so_rcv.sb_sx); + SOCKBUF_LOCK_DESTROY(&so->so_snd); + SOCKBUF_LOCK_DESTROY(&so->so_rcv); + uma_zfree(socket_zone, so); +} +#endif + +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + */ +void +sofree(struct socket *so) +{ + /* struct protosw *pr = so->so_proto; */ + struct socket *head; + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + ACCEPT_LOCK_ASSERT(); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + SOCK_LOCK_ASSERT(so); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + /* SS_NOFDREF unset in accept call. this condition seems irrelevent + * for __Userspace__... + */ + if (/* (so->so_state & SS_NOFDREF) == 0 || */ so->so_count != 0 || + (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) { + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + SOCK_UNLOCK(so); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + ACCEPT_UNLOCK(); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + return; + } + head = so->so_head; + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + if (head != NULL) { + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + KASSERT((so->so_qstate & SQ_COMP) != 0 || + (so->so_qstate & SQ_INCOMP) != 0, + ("sofree: so_head != NULL, but neither SQ_COMP nor " + "SQ_INCOMP")); + KASSERT((so->so_qstate & SQ_COMP) == 0 || + (so->so_qstate & SQ_INCOMP) == 0, + ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_qstate &= ~SQ_INCOMP; + so->so_head = NULL; + } + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + KASSERT((so->so_qstate & SQ_COMP) == 0 && + (so->so_qstate & SQ_INCOMP) == 0, + ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", + so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); + if (so->so_options & SO_ACCEPTCONN) { + KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); + KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); + } + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + /* if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) + (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); + if (pr->pr_usrreqs->pru_detach != NULL) + (*pr->pr_usrreqs->pru_detach)(so); + */ + sctp_close(so); /* was... sctp_detach(so); */ + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + /* + * From this point on, we assume that no other references to this + * socket exist anywhere else in the stack. Therefore, no locks need + * to be acquired or held. + * + * We used to do a lot of socket buffer and socket locking here, as + * well as invoke sorflush() and perform wakeups. The direct call to + * dom_dispose() and sbrelease_internal() are an inlining of what was + * necessary from sorflush(). + * + * Notice that the socket buffer and kqueue state are torn down + * before calling pru_detach. This means that protocols shold not + * assume they can perform socket wakeups, etc, in their detach code. + */ + /* sbdestroy(&so->so_snd, so); + sbdestroy(&so->so_rcv, so); + knlist_destroy(&so->so_rcv.sb_sel.si_note); + knlist_destroy(&so->so_snd.sb_sel.si_note); */ + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + sodealloc(so); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ +} + + + +/* Taken from /src/sys/kern/uipc_socket.c */ +int +soabort(so) + struct socket *so; +{ + int error; + + error = sctp_abort(so); + if (error) { + sofree(so); + return error; + } + return (0); +} + + +/* Taken from usr/src/sys/kern/uipc_socket.c and called within sctp_connect (sctp_usrreq.c). + * We use sctp_connect for send_one_init_real in ms1. + */ +void +soisconnecting(struct socket *so) +{ + + SOCK_LOCK(so); + so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISCONNECTING; + SOCK_UNLOCK(so); +} + +/* Taken from usr/src/sys/kern/uipc_socket.c and called within sctp_disconnect (sctp_usrreq.c). + * TODO Do we use sctp_disconnect? + */ +void +soisdisconnecting(struct socket *so) +{ + + /* + * Note: This code assumes that SOCK_LOCK(so) and + * SOCKBUF_LOCK(&so->so_rcv) are the same. + */ + SOCKBUF_LOCK(&so->so_rcv); + so->so_state &= ~SS_ISCONNECTING; + so->so_state |= SS_ISDISCONNECTING; + so->so_rcv.sb_state |= SBS_CANTRCVMORE; + sorwakeup_locked(so); + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_state |= SBS_CANTSENDMORE; + sowwakeup_locked(so); + wakeup("dummy",so); + // requires 2 args but this was in orig wakeup(&so->so_timeo); +} + + +/* Taken from sys/kern/kern_synch.c and + modified for __Userspace__ +*/ + +/* + * Make all threads sleeping on the specified identifier runnable. + * Associating wakeup with so_timeo identifier and timeo_cond + * condition variable. TODO. If we use iterator thread then we need to + * modify wakeup so it can distinguish between iterator identifier and + * timeo identifier. + */ +void +wakeup(ident, so) + void *ident; + struct socket *so; +{ + SOCK_LOCK(so); + pthread_cond_broadcast(&(so)->timeo_cond); + SOCK_UNLOCK(so); +} + + +/* + * Make a thread sleeping on the specified identifier runnable. + * May wake more than one thread if a target thread is currently + * swapped out. + */ +void +wakeup_one(ident) + void *ident; +{ + /* __Userspace__ Check: We are using accept_cond for wakeup_one. + It seems that wakeup_one is only called within + soisconnected() and sonewconn() with ident &head->so_timeo + head is so->so_head, which is back pointer to listen socket + This seems to indicate that the use of accept_cond is correct + since socket where accepts occur is so_head in all + subsidiary sockets. + */ + ACCEPT_LOCK(); + pthread_cond_signal(&accept_cond); + ACCEPT_UNLOCK(); +} + + +/* Called within sctp_process_cookie_[existing/new] */ +void +soisconnected(struct socket *so) +{ + struct socket *head; + + ACCEPT_LOCK(); + SOCK_LOCK(so); + so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); + so->so_state |= SS_ISCONNECTED; + head = so->so_head; + if (head != NULL && (so->so_qstate & SQ_INCOMP)) { + if ((so->so_options & SO_ACCEPTFILTER) == 0) { + SOCK_UNLOCK(so); + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_qstate &= ~SQ_INCOMP; + TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + head->so_qlen++; + so->so_qstate |= SQ_COMP; + ACCEPT_UNLOCK(); + sorwakeup(head); + wakeup_one(&head->so_timeo); + } else { + ACCEPT_UNLOCK(); + /* so->so_upcall = + head->so_accf->so_accept_filter->accf_callback; + so->so_upcallarg = head->so_accf->so_accept_filter_arg; */ + so->so_rcv.sb_flags |= SB_UPCALL; + so->so_options &= ~SO_ACCEPTFILTER; + SOCK_UNLOCK(so); + /* so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); */ + } + + return; + } + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + wakeup(&so->so_timeo, so); + sorwakeup(so); + sowwakeup(so); + +} + +/* called within sctp_handle_cookie_echo */ + +struct socket * +sonewconn(struct socket *head, int connstatus) +{ + struct socket *so; + int over; + + ACCEPT_LOCK(); + over = (head->so_qlen > 3 * head->so_qlimit / 2); + ACCEPT_UNLOCK(); +#ifdef REGRESSION + if (regression_sonewconn_earlytest && over) +#else + if (over) +#endif + return (NULL); + so = soalloc(); + if (so == NULL) + return (NULL); + if ((head->so_options & SO_ACCEPTFILTER) != 0) + connstatus = 0; + so->so_head = head; + so->so_type = head->so_type; + so->so_options = head->so_options &~ SO_ACCEPTCONN; + so->so_linger = head->so_linger; + so->so_state = head->so_state | SS_NOFDREF; + so->so_proto = head->so_proto; + /* so->so_cred = crhold(head->so_cred); */ +#ifdef MAC + SOCK_LOCK(head); + mac_create_socket_from_socket(head, so); + SOCK_UNLOCK(head); +#endif + /* knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), + NULL, NULL, NULL); + knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), + NULL, NULL, NULL); */ + if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || + sctp_attach(so, IPPROTO_SCTP, SCTP_DEFAULT_VRFID) ) { + /* (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { */ + sodealloc(so); + return (NULL); + } + so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; + so->so_snd.sb_lowat = head->so_snd.sb_lowat; + so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; + so->so_snd.sb_timeo = head->so_snd.sb_timeo; + so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; + so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; + so->so_state |= connstatus; + ACCEPT_LOCK(); + if (connstatus) { + TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + so->so_qstate |= SQ_COMP; + head->so_qlen++; + } else { + /* + * Keep removing sockets from the head until there's room for + * us to insert on the tail. In pre-locking revisions, this + * was a simple if(), but as we could be racing with other + * threads and soabort() requires dropping locks, we must + * loop waiting for the condition to be true. + */ + while (head->so_incqlen > head->so_qlimit) { + struct socket *sp; + sp = TAILQ_FIRST(&head->so_incomp); + TAILQ_REMOVE(&head->so_incomp, sp, so_list); + head->so_incqlen--; + sp->so_qstate &= ~SQ_INCOMP; + sp->so_head = NULL; + ACCEPT_UNLOCK(); + soabort(sp); + ACCEPT_LOCK(); + } + TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); + so->so_qstate |= SQ_INCOMP; + head->so_incqlen++; + } + ACCEPT_UNLOCK(); + if (connstatus) { + sorwakeup(head); + wakeup_one(&head->so_timeo); + } + return (so); + +} + +/* From /src/sys/sys/sysproto.h */ +struct sctp_generic_sendmsg_args { + int sd; + caddr_t msg; + int mlen; + caddr_t to; + socklen_t tolen; /* was __socklen_t */ + struct sctp_sndrcvinfo * sinfo; + int flags; +}; + +struct sctp_generic_recvmsg_args { + int sd; + struct iovec *iov; + int iovlen; + struct sockaddr *from; + socklen_t *fromlenaddr; /* was __socklen_t */ + struct sctp_sndrcvinfo *sinfo; + int *msg_flags; +}; + + + /* + Source: /src/sys/gnu/fs/xfs/FreeBSD/xfs_ioctl.c + */ + static __inline__ int +copy_to_user(void *dst, void *src, int len) { + memcpy(dst,src,len); + return 0; +} + + static __inline__ int +copy_from_user(void *dst, void *src, int len) { + memcpy(dst,src,len); + return 0; +} + +/* + References: + src/sys/dev/lmc/if_lmc.h: + src/sys/powerpc/powerpc/copyinout.c + src/sys/sys/systm.h +*/ +# define copyin(u, k, len) copy_from_user(k, u, len) + +/* References: + src/sys/powerpc/powerpc/copyinout.c + src/sys/sys/systm.h +*/ +# define copyout(k, u, len) copy_to_user(u, k, len) + + +/* copyiniov definition copied/modified from src/sys/kern/kern_subr.c */ +int +copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) +{ + u_int iovlen; + + *iov = NULL; + if (iovcnt > UIO_MAXIOV) + return (error); + iovlen = iovcnt * sizeof (struct iovec); + *iov = malloc(iovlen); /*, M_IOV, M_WAITOK); */ + error = copyin(iovp, *iov, iovlen); + if (error) { + free(*iov); /*, M_IOV); */ + *iov = NULL; + } + return (error); +} + +/* (__Userspace__) version of uiomove */ +int +uiomove(void *cp, int n, struct uio *uio) +{ + struct iovec *iov; + u_int cnt; + int error = 0; + + assert(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE); + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + goto out; + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp = (char *)cp + cnt; + n -= cnt; + } +out: + return (error); +} + + +/* Source: src/sys/kern/uipc_syscalls.c */ +int +getsockaddr(namp, uaddr, len) + struct sockaddr **namp; + caddr_t uaddr; + size_t len; +{ + struct sockaddr *sa; + int error; + + if (len > SOCK_MAXADDRLEN) + return (ENAMETOOLONG); + if (len < offsetof(struct sockaddr, sa_data[0])) + return (EINVAL); + MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); + error = copyin(uaddr, sa, len); + if (error) { + FREE(sa, M_SONAME); + } else { +#if !defined(__Userspace_os_Linux) + sa->sa_len = len; +#endif + *namp = sa; + } + return (error); +} + + + +/* The original implementation of sctp_generic_sendmsg is in /src/sys/kern/uipc_syscalls.c + * Modifying it for __Userspace__ + */ +#if 0 +static int +sctp_generic_sendmsg (so, uap, retval) + struct socket *so; + struct sctp_generic_sendmsg_args /* { + int sd, + caddr_t msg, + int mlen, + caddr_t to, + socklen_t tolen, + struct sctp_sndrcvinfo *sinfo, + int flags + } */ *uap; + int *retval; +{ + + struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; + int error = 0, len; + struct sockaddr *to = NULL; + + struct uio auio; + struct iovec iov[1]; + + if (uap->sinfo) { + error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); + if (error) + return (error); + u_sinfo = &sinfo; + } + if (uap->tolen) { + error = getsockaddr(&to, uap->to, uap->tolen); + if (error) { + to = NULL; + goto sctp_bad2; + } + } + + + iov[0].iov_base = uap->msg; + iov[0].iov_len = uap->mlen; + + + auio.uio_iov = iov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + len = auio.uio_resid = uap->mlen; + error = sctp_lower_sosend(so, to, &auio, + (struct mbuf *)NULL, (struct mbuf *)NULL, + uap->flags, u_sinfo); + if (error == 0) + *retval = len - auio.uio_resid; + else + *retval = (-1); +sctp_bad2: + if (to) + FREE(to, M_SONAME); + return (error); +} +#endif + +/* Taken from /src/lib/libc/net/sctp_sys_calls.c + * and modified for __Userspace__ + * calling sctp_generic_sendmsg from this function + */ +ssize_t +userspace_sctp_sendmsg(struct socket *so, + const void *data, + size_t len, + struct sockaddr *to, + socklen_t tolen, + u_int32_t ppid, + u_int32_t flags, + u_int16_t stream_no, + u_int32_t timetolive, + u_int32_t context) +{ + +#if 1 /* def SYS_sctp_generic_sendmsg __Userspace__ */ + + struct sctp_sndrcvinfo sndrcvinfo, *sinfo = &sndrcvinfo; + struct uio auio; + struct iovec iov[1]; + int error = 0; + int uflags = 0; + int retvalsendmsg; + + sinfo->sinfo_ppid = ppid; + sinfo->sinfo_flags = flags; + sinfo->sinfo_stream = stream_no; + sinfo->sinfo_timetolive = timetolive; + sinfo->sinfo_context = context; + sinfo->sinfo_assoc_id = 0; + + + /* Perform error checks on destination (to) */ + if (tolen > SOCK_MAXADDRLEN){ + error = (ENAMETOOLONG); + goto sendmsg_return; + } + if (tolen < offsetof(struct sockaddr, sa_data[0])){ + error = (EINVAL); + goto sendmsg_return; + } + /* Adding the following as part of defensive programming, in case the application + does not do it when preparing the destination address.*/ +#if !defined(__Userspace_os_Linux) + to->sa_len = tolen; +#endif + + + iov[0].iov_base = (caddr_t)data; + iov[0].iov_len = len; + + auio.uio_iov = iov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = len; + error = sctp_lower_sosend(so, to, &auio, + (struct mbuf *)NULL, (struct mbuf *)NULL, + uflags, sinfo); +sendmsg_return: + if (0 == error) + retvalsendmsg = len - auio.uio_resid; + else if(error == EWOULDBLOCK) { + errno = EWOULDBLOCK; + retvalsendmsg = (-1); + } else { + printf("%s: error = %d\n", __func__, error); + retvalsendmsg = (-1); + } + + return retvalsendmsg; + + +#if 0 /* Removed: Old implementation that does unnecessary copying of sinfo and sockaddr */ + struct sctp_sndrcvinfo sndrcvinfo, *sinfo = &sndrcvinfo; + struct sctp_generic_sendmsg_args ua, *uap = &ua; + int retval; + ssize_t sz; + + sinfo->sinfo_ppid = ppid; + sinfo->sinfo_flags = flags; + sinfo->sinfo_stream = stream_no; + sinfo->sinfo_timetolive = timetolive; + sinfo->sinfo_context = context; + sinfo->sinfo_assoc_id = 0; + /*__Userspace__ sd field is being arbitrarily set to 0 + * it appears not to be used later and not passes to + * sctp_lower_sosend + */ + uap->sd = 0; + uap->msg = (caddr_t)data; + uap->mlen = len; + uap->to = (caddr_t)to; + uap->tolen = tolen; + uap->sinfo = sinfo; + uap->flags = 0; + + sz = sctp_generic_sendmsg(so, uap, &retval); + + if(sz) /*error*/ + printf("%s: errno = sz = %d\n", __func__, sz); /* should we exit here in case of error? */ + + return retval; +#endif + +#else + + ssize_t sz; + struct msghdr msg; + struct sctp_sndrcvinfo *s_info; + struct iovec iov[SCTP_SMALL_IOVEC_SIZE]; + char controlVector[SCTP_CONTROL_VEC_SIZE_RCV]; + struct cmsghdr *cmsg; + struct sockaddr *who = NULL; + union { + struct sockaddr_in in; + struct sockaddr_in6 in6; + } addr; + +/* + fprintf(io, "sctp_sendmsg(sd:%d, data:%x, len:%d, to:%x, tolen:%d, ppid:%x, flags:%x str:%d ttl:%d ctx:%x\n", + s, + (u_int)data, + (int)len, + (u_int)to, + (int)tolen, + ppid, flags, + (int)stream_no, + (int)timetolive, + (u_int)context); + fflush(io); +*/ + if ((tolen > 0) && ((to == NULL) || (tolen < sizeof(struct sockaddr)))) { + errno = EINVAL; + return -1; + } + if (to && (tolen > 0)) { + if (to->sa_family == AF_INET) { + if (tolen != sizeof(struct sockaddr_in)) { + errno = EINVAL; + return -1; + } + if ((to->sa_len > 0) && (to->sa_len != sizeof(struct sockaddr_in))) { + errno = EINVAL; + return -1; + } + memcpy(&addr, to, sizeof(struct sockaddr_in)); + addr.in.sin_len = sizeof(struct sockaddr_in); + } else if (to->sa_family == AF_INET6) { + if (tolen != sizeof(struct sockaddr_in6)) { + errno = EINVAL; + return -1; + } + if ((to->sa_len > 0) && (to->sa_len != sizeof(struct sockaddr_in6))) { + errno = EINVAL; + return -1; + } + memcpy(&addr, to, sizeof(struct sockaddr_in6)); + addr.in6.sin6_len = sizeof(struct sockaddr_in6); + } else { + errno = EAFNOSUPPORT; + return -1; + } + who = (struct sockaddr *)&addr; + } + iov[0].iov_base = (char *)data; + iov[0].iov_len = len; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + + if (who) { + msg.msg_name = (caddr_t)who; + msg.msg_namelen = who->sa_len; + } else { + msg.msg_name = (caddr_t)NULL; + msg.msg_namelen = 0; + } + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = (caddr_t)controlVector; + + cmsg = (struct cmsghdr *)controlVector; + + cmsg->cmsg_level = IPPROTO_SCTP; + cmsg->cmsg_type = SCTP_SNDRCV; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); + s_info = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + + s_info->sinfo_stream = stream_no; + s_info->sinfo_ssn = 0; + s_info->sinfo_flags = flags; + s_info->sinfo_ppid = ppid; + s_info->sinfo_context = context; + s_info->sinfo_assoc_id = 0; + s_info->sinfo_timetolive = timetolive; + errno = 0; + msg.msg_controllen = cmsg->cmsg_len; + sz = sendmsg(s, &msg, 0); + return (sz); +#endif +} + + +struct mbuf* mbufalloc(size_t size, void* data, unsigned char fill) +{ + size_t left; + int resv_upfront = sizeof(struct sctp_data_chunk); + int cancpy, willcpy; + struct mbuf *m, *head; + int cpsz=0; + + /* First one gets a header equal to sizeof(struct sctp_data_chunk) */ + left = size; + head = m = sctp_get_mbuf_for_msg((left + resv_upfront), 1, M_WAIT, 0, MT_DATA); + if (m == NULL) { + printf("%s: ENOMEN: Memory allocation failure\n", __func__); + return (NULL); + } + /*- + * Skipping space for chunk header. __Userspace__ Is this required? + */ + SCTP_BUF_RESV_UF(m, resv_upfront); + cancpy = M_TRAILINGSPACE(m); + willcpy = min(cancpy, left); + + while (left > 0) { + + if (data != NULL){ + /* fill in user data */ + memcpy(mtod(m, caddr_t), data+cpsz, willcpy); + }else if (fill != '\0'){ + memset(mtod(m, caddr_t), fill, willcpy); + } + + SCTP_BUF_LEN(m) = willcpy; + left -= willcpy; + cpsz += willcpy; + if (left > 0) { + SCTP_BUF_NEXT(m) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 0, MT_DATA); + if (SCTP_BUF_NEXT(m) == NULL) { + /* + * the head goes back to caller, he can free + * the rest + */ + sctp_m_freem(head); + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + printf("%s: ENOMEN: Memory allocation failure\n", __func__); + return (NULL); + } + m = SCTP_BUF_NEXT(m); + cancpy = M_TRAILINGSPACE(m); + willcpy = min(cancpy, left); + } else { + SCTP_BUF_NEXT(m) = NULL; + } + } + + /* The following overwrites data in head->m_hdr.mh_data , if M_PKTHDR isn't set */ + SCTP_HEADER_LEN(head) = cpsz; + + return (head); +} + + + +struct mbuf* mbufallocfromiov(int iovlen, struct iovec *srciov) +{ + size_t left = 0,total; + int resv_upfront = sizeof(struct sctp_data_chunk); + int cancpy, willcpy; + struct mbuf *m, *head; + int cpsz=0,i, cur=-1, currdsz=0, mbuffillsz; + char *data; + + /* Get the total length */ + for(i=0; i < iovlen; i++) { + left += srciov[i].iov_len; + if(cur == -1 && srciov[i].iov_len > 0) { + /* set the first field where there's data */ + cur = i; + data = srciov[cur].iov_base; + } + } + total = left; + + /* First one gets a header equal to sizeof(struct sctp_data_chunk) */ + head = m = sctp_get_mbuf_for_msg((left + resv_upfront), 1, M_WAIT, 0, MT_DATA); + if (m == NULL) { + printf("%s: ENOMEN: Memory allocation failure\n", __func__); + return (NULL); + } + /*- + * Skipping space for chunk header. __Userspace__ Is this required? + */ + SCTP_BUF_RESV_UF(m, resv_upfront); + cancpy = M_TRAILINGSPACE(m); + willcpy = min(cancpy, left); + + while (left > 0) { + /* fill in user data */ + mbuffillsz = 0; + while (mbuffillsz < willcpy) { + + if(cancpy < srciov[cur].iov_len - currdsz) { + /* will fill mbuf before srciov[cur] is completely read */ + memcpy(SCTP_BUF_AT(m,mbuffillsz), data, cancpy); + data += cancpy; + currdsz += cancpy; + break; + } else { + /* will completely read srciov[cur] */ + if(srciov[cur].iov_len != currdsz) { + memcpy(SCTP_BUF_AT(m,mbuffillsz), data, srciov[cur].iov_len - currdsz); + mbuffillsz += (srciov[cur].iov_len - currdsz); + cancpy -= (srciov[cur].iov_len - currdsz); + } + currdsz = 0; + /* find next field with data */ + data = NULL; + while(++cur < iovlen) { + if(srciov[cur].iov_len > 0) { + data = srciov[cur].iov_base; + break; + } + } + } + } + + SCTP_BUF_LEN(m) = willcpy; + left -= willcpy; + cpsz += willcpy; + if (left > 0) { + SCTP_BUF_NEXT(m) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 0, MT_DATA); + if (SCTP_BUF_NEXT(m) == NULL) { + /* + * the head goes back to caller, he can free + * the rest + */ + sctp_m_freem(head); + SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); + printf("%s: ENOMEN: Memory allocation failure\n", __func__); + return (NULL); + } + m = SCTP_BUF_NEXT(m); + cancpy = M_TRAILINGSPACE(m); + willcpy = min(cancpy, left); + } else { + SCTP_BUF_NEXT(m) = NULL; + } + } + + /* The following overwrites data in head->m_hdr.mh_data , if M_PKTHDR isn't set */ + assert(cpsz == total); + SCTP_HEADER_LEN(head) = total; + + return (head); +} + + + + +ssize_t +userspace_sctp_sendmbuf(struct socket *so, + struct mbuf* mbufdata, + size_t len, + struct sockaddr *to, + socklen_t tolen, + u_int32_t ppid, + u_int32_t flags, + u_int16_t stream_no, + u_int32_t timetolive, + u_int32_t context) +{ + + struct sctp_sndrcvinfo sndrcvinfo, *sinfo = &sndrcvinfo; + /* struct uio auio; + struct iovec iov[1]; */ + int error = 0; + int uflags = 0; + int retvalsendmsg; + + sinfo->sinfo_ppid = ppid; + sinfo->sinfo_flags = flags; + sinfo->sinfo_stream = stream_no; + sinfo->sinfo_timetolive = timetolive; + sinfo->sinfo_context = context; + sinfo->sinfo_assoc_id = 0; + + /* Perform error checks on destination (to) */ + if (tolen > SOCK_MAXADDRLEN){ + error = (ENAMETOOLONG); + goto sendmsg_return; + } + if (tolen < offsetof(struct sockaddr, sa_data[0])){ + error = (EINVAL); + goto sendmsg_return; + } + /* Adding the following as part of defensive programming, in case the application + does not do it when preparing the destination address.*/ +#if !defined(__Userspace_os_Linux) + to->sa_len = tolen; +#endif + + error = sctp_lower_sosend(so, to, NULL/*uio*/, + (struct mbuf *)mbufdata, (struct mbuf *)NULL, + uflags, sinfo); +sendmsg_return: + /* TODO: Needs a condition for non-blocking when error is EWOULDBLOCK */ + if (0 == error) + retvalsendmsg = len; + else if(error == EWOULDBLOCK) { + errno = EWOULDBLOCK; + retvalsendmsg = (-1); + } else { + printf("%s: error = %d\n", __func__, error); + errno = error; + retvalsendmsg = (-1); + } + return retvalsendmsg; + +} + +#if 0 /* Old version: To be removed */ +/* This is purely experimental for now. It can't handle message sizes larger than MHLEN */ +ssize_t userspace_sctp_sendmbuf(struct socket *so, const void *message, size_t length, + int flags, const struct sockaddr *dest_addr, socklen_t dest_len) +{ + struct mbuf * m; + struct mbuf * control = NULL; + struct proc *p = NULL; + + if (so == NULL) { + perror("sockset so passed to userspace_sctp_sendmbuf is NULL\n"); + exit(1); + } + + /* Just getting a single mbuf for now for a short message. + * Not appending any packet headers because that would be done + * in sctp_med_chunk_output, which prepends common sctp header and + * in sctp_lowlevel_chunk_output which attaches ip header + */ + + m = sctp_get_mbuf_for_msg(MHLEN, 0, M_DONTWAIT, 0, MT_DATA); + + if (m == NULL){ + perror("out of memory in userspace_sctp_sendmbuf\n"); + exit(1); + } + + bcopy((caddr_t)message, m->m_data, length); + SCTP_HEADER_LEN(m) = SCTP_BUF_LEN(m) = length; + + return (sctp_sosend(so, + (struct sockaddr *) dest_addr, + (struct uio *)NULL, + m, + control, + flags, + p + )); + +} +#endif + + +/* The original implementation of sctp_generic_recvmsg is in /src/sys/kern/uipc_syscalls.c + * Modifying it for __Userspace__ + */ +#if 0 +static int +sctp_generic_recvmsg(so, uap, retval) + struct socket *so; + struct sctp_generic_recvmsg_args /* { + int sd, + struct iovec *iov, + int iovlen, + struct sockaddr *from, + socklen_t *fromlenaddr, + struct sctp_sndrcvinfo *sinfo, + int *msg_flags + } */ *uap; + int *retval; +{ + u_int8_t sockbufstore[256]; + struct uio auio; + struct iovec *iov, *tiov; + struct sctp_sndrcvinfo sinfo; + struct sockaddr *fromsa; + int fromlen; + int len, i, msg_flags; + int error = 0; + error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); + if (error) { + return (error); + } + + if (uap->fromlenaddr) { + error = copyin(uap->fromlenaddr, + &fromlen, sizeof (fromlen)); + if (error) { + goto out; + } + } else { + fromlen = 0; + } + if(uap->msg_flags) { + error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); + if (error) { + goto out; + } + } else { + msg_flags = 0; + } + auio.uio_iov = iov; + auio.uio_iovcnt = uap->iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + tiov = iov; + for (i = 0; i <uap->iovlen; i++, tiov++) { + if ((auio.uio_resid += tiov->iov_len) < 0) { + error = EINVAL; + goto out; + } + } + len = auio.uio_resid; + fromsa = (struct sockaddr *)sockbufstore; + + error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, + fromsa, fromlen, &msg_flags, + (struct sctp_sndrcvinfo *)&sinfo, 1); + if (error) { + if (auio.uio_resid != (int)len && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } else { + if (uap->sinfo) + error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); + } + if (error) + goto out; + + /* ready return value */ + /* td->td_retval[0] = (int)len - auio.uio_resid; original */ + *retval = (int)len - auio.uio_resid; + + if (fromlen && uap->from) { + len = fromlen; + if (len <= 0 || fromsa == 0) + len = 0; + else { +#if !defined(__Userspace_os_Linux) + len = min(len, fromsa->sa_len); +#else + len = min(len, sizeof(*fromsa)); +#endif + error = copyout(fromsa, uap->from, (unsigned)len); + if (error) + goto out; + } + error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); + if (error) { + goto out; + } + } + if (uap->msg_flags) { + error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); + if (error) { + goto out; + } + } +out: + free(iov); /* , M_IOV); */ + + return (error); +} +#endif + +/* taken from usr.lib/sctp_sys_calls.c and needed here */ +#define SCTP_SMALL_IOVEC_SIZE 2 + +/* Taken from /src/lib/libc/net/sctp_sys_calls.c + * and modified for __Userspace__ + * calling sctp_generic_recvmsg from this function + */ +ssize_t +userspace_sctp_recvmsg(struct socket *so, + void *dbuf, + size_t len, + struct sockaddr *from, + socklen_t * fromlen, + struct sctp_sndrcvinfo *sinfo, + int *msg_flags) +{ +#if 1 /* def SYS_sctp_generic_recvmsg __Userspace__ */ + + struct uio auio; + struct iovec iov[SCTP_SMALL_IOVEC_SIZE]; + struct iovec *tiov; + int iovlen = 1; + int error = 0; + int ulen, i, retval; + + iov[0].iov_base = dbuf; + iov[0].iov_len = len; + + auio.uio_iov = iov; + auio.uio_iovcnt = iovlen; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_rw = UIO_READ; + auio.uio_offset = 0; /* XXX */ + auio.uio_resid = 0; + tiov = iov; + for (i = 0; i <iovlen; i++, tiov++) { + if ((auio.uio_resid += tiov->iov_len) < 0) { + error = EINVAL; + printf("%s: error = %d\n", __func__, error); + return (-1); + } + } + ulen = auio.uio_resid; + error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, + from, *fromlen, msg_flags, + (struct sctp_sndrcvinfo *)sinfo, 1); + + if (error) { + if (auio.uio_resid != (int)ulen && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + + if (0 == error){ + /* ready return value */ + retval = (int)ulen - auio.uio_resid; + return retval; + }else{ + printf("%s: error = %d\n", __func__, error); + return (-1); + } + + +#if 0 /* Removed: Old implementation that does unnecessary copying of sinfo and sockaddr */ + + struct iovec iov[SCTP_SMALL_IOVEC_SIZE]; + struct sctp_generic_recvmsg_args ua, *uap = &ua; + ssize_t sz; + int retval; + + iov[0].iov_base = dbuf; + iov[0].iov_len = len; + + uap->sd = 0; + uap->iov = iov; + uap->iovlen = 1; + uap->from = from; + uap->fromlenaddr = fromlen; + uap->sinfo = sinfo; + uap->msg_flags = msg_flags; + + + sz = sctp_generic_recvmsg(so, uap, &retval); + if(sz == 0) { + /* success */ + return retval; + } + + /* error */ + errno = sz; + return (-1); +#endif + +#else + + struct sctp_sndrcvinfo *s_info; + ssize_t sz; + int sinfo_found = 0; + struct msghdr msg; + struct iovec iov[SCTP_SMALL_IOVEC_SIZE]; + char controlVector[SCTP_CONTROL_VEC_SIZE_RCV]; + struct cmsghdr *cmsg; + + if (msg_flags == NULL) { + errno = EINVAL; + return (-1); + } + msg.msg_flags = 0; + iov[0].iov_base = dbuf; + iov[0].iov_len = len; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + msg.msg_name = (caddr_t)from; + if (fromlen == NULL) + msg.msg_namelen = 0; + else + msg.msg_namelen = *fromlen; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = (caddr_t)controlVector; + msg.msg_controllen = sizeof(controlVector); + errno = 0; + sz = recvmsg(s, &msg, *msg_flags); + if (sz <= 0) + return (sz); + + s_info = NULL; + len = sz; + *msg_flags = msg.msg_flags; + if (sinfo) + sinfo->sinfo_assoc_id = 0; + + if ((msg.msg_controllen) && sinfo) { + /* + * parse through and see if we find the sctp_sndrcvinfo (if + * the user wants it). + */ + cmsg = (struct cmsghdr *)controlVector; + while (cmsg) { + if ((cmsg->cmsg_len == 0) || (cmsg->cmsg_len > msg.msg_controllen)) { + break; + } + if (cmsg->cmsg_level == IPPROTO_SCTP) { + if (cmsg->cmsg_type == SCTP_SNDRCV) { + /* Got it */ + s_info = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + /* Copy it to the user */ + if (sinfo) + *sinfo = *s_info; + sinfo_found = 1; + break; + } else if (cmsg->cmsg_type == SCTP_EXTRCV) { + /* + * Got it, presumably the user has + * asked for this extra info, so the + * structure holds more room :-D + */ + s_info = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg); + /* Copy it to the user */ + if (sinfo) { + memcpy(sinfo, s_info, sizeof(struct sctp_extrcvinfo)); + } + sinfo_found = 1; + break; + + } + } + cmsg = CMSG_NXTHDR(&msg, cmsg); + } + } + return (sz); +#endif +} + + + +#if defined(__Userspace__) +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + * socreate returns a socket. The socket should be + * closed with soclose(). + */ +int +socreate(int dom, struct socket **aso, int type, int proto) +{ + struct socket *so; + int error; + + assert((AF_INET == dom) || (AF_LOCAL == dom)); + assert((SOCK_STREAM == type) || (SOCK_SEQPACKET == type)); + assert(IPPROTO_SCTP == proto); + + so = soalloc(); + if (so == NULL) + return (ENOBUFS); + + /* + * so_incomp represents a queue of connections that + * must be completed at protocol level before being + * returned. so_comp field heads a list of sockets + * that are ready to be returned to the listening process + *__Userspace__ These queues are being used at a number of places like accept etc. + */ + TAILQ_INIT(&so->so_incomp); + TAILQ_INIT(&so->so_comp); + so->so_type = type; + so->so_count = 1; + /* + * Auto-sizing of socket buffers is managed by the protocols and + * the appropriate flags must be set in the pru_attach function. + * For __Userspace__ The pru_attach function in this case is sctp_attach. + */ + error = sctp_attach(so, proto, SCTP_DEFAULT_VRFID); + if (error) { + assert(so->so_count == 1); + so->so_count = 0; + sodealloc(so); + return (error); + } + *aso = so; + return (0); +} +#else +/* The kernel version for reference is below. The #else + should be removed once the __Userspace__ + version is tested. + * socreate returns a socket with a ref count of 1. The socket should be + * closed with soclose(). + */ +int +socreate(int dom, struct socket **aso, int type, int proto, + struct ucred *cred, struct thread *td) +{ + struct protosw *prp; + struct socket *so; + int error; + + if (proto) + prp = pffindproto(dom, proto, type); + else + prp = pffindtype(dom, type); + + if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL || + prp->pr_usrreqs->pru_attach == pru_attach_notsupp) + return (EPROTONOSUPPORT); + + if (jailed(cred) && jail_socket_unixiproute_only && + prp->pr_domain->dom_family != PF_LOCAL && + prp->pr_domain->dom_family != PF_INET && + prp->pr_domain->dom_family != PF_ROUTE) { + return (EPROTONOSUPPORT); + } + + if (prp->pr_type != type) + return (EPROTOTYPE); + so = soalloc(); + if (so == NULL) + return (ENOBUFS); + + TAILQ_INIT(&so->so_incomp); + TAILQ_INIT(&so->so_comp); + so->so_type = type; + so->so_cred = crhold(cred); + so->so_proto = prp; +#ifdef MAC + mac_create_socket(cred, so); +#endif + knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), + NULL, NULL, NULL); + knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), + NULL, NULL, NULL); + so->so_count = 1; + /* + * Auto-sizing of socket buffers is managed by the protocols and + * the appropriate flags must be set in the pru_attach function. + */ + error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); + if (error) { + KASSERT(so->so_count == 1, ("socreate: so_count %d", + so->so_count)); + so->so_count = 0; + sodealloc(so); + return (error); + } + *aso = so; + return (0); +} +#endif + + + + +/* Taken from /src/sys/kern/uipc_syscalls.c + * and modified for __Userspace__ + * Removing struct thread td. + */ +struct socket * +userspace_socket(int domain, int type, int protocol) +{ + struct socket *so = NULL; + int error; + + error = socreate(domain, &so, type, protocol); + if (error) { + perror("In user_socket(): socreate failed\n"); + exit(1); + } + /* + * The original socket call returns the file descriptor fd. + * td->td_retval[0] = fd. + * We are returning struct socket *so. + */ + return (so); +} + + +u_long sb_max = SB_MAX; +u_long sb_max_adj = + SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ + +static u_long sb_efficiency = 8; /* parameter for sbreserve() */ + +#if defined (__Userspace__) +/* + * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't + * become limiting if buffering efficiency is near the normal case. + */ +int +sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so) +{ + SOCKBUF_LOCK_ASSERT(sb); + sb->sb_mbmax = min(cc * sb_efficiency, sb_max); + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} +#else /* kernel version for reference */ +/* + * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't + * become limiting if buffering efficiency is near the normal case. + */ +int +sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, + struct thread *td) +{ + rlim_t sbsize_limit; + + SOCKBUF_LOCK_ASSERT(sb); + + /* + * td will only be NULL when we're in an interrupt (e.g. in + * tcp_input()). + * + * XXXRW: This comment needs updating, as might the code. + */ + if (cc > sb_max_adj) + return (0); + if (td != NULL) { + PROC_LOCK(td->td_proc); + sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); + PROC_UNLOCK(td->td_proc); + } else + sbsize_limit = RLIM_INFINITY; + if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, + sbsize_limit)) + return (0); + sb->sb_mbmax = min(cc * sb_efficiency, sb_max); + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} +#endif + + + +#if defined(__Userspace__) +int +soreserve(struct socket *so, u_long sndcc, u_long rcvcc) +{ + + SOCKBUF_LOCK(&so->so_snd); + SOCKBUF_LOCK(&so->so_rcv); + so->so_snd.sb_hiwat = sndcc; + so->so_rcv.sb_hiwat = rcvcc; + + if (sbreserve_locked(&so->so_snd, sndcc, so) == 0) + goto bad; + if (sbreserve_locked(&so->so_rcv, rcvcc, so) == 0) + goto bad; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (0); + + bad: + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (ENOBUFS); +} +#else /* kernel version for reference */ +int +soreserve(struct socket *so, u_long sndcc, u_long rcvcc) +{ + struct thread *td = curthread; + + SOCKBUF_LOCK(&so->so_snd); + SOCKBUF_LOCK(&so->so_rcv); + if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) + goto bad; + if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) + goto bad2; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (0); +bad2: + sbrelease_locked(&so->so_snd, so); +bad: + SOCKBUF_UNLOCK(&so->so_rcv); + SOCKBUF_UNLOCK(&so->so_snd); + return (ENOBUFS); +} +#endif + + + + + +/* Taken from /src/sys/kern/uipc_sockbuf.c + * and modified for __Userspace__ + */ + +#if defined(__Userspace__) +void +sowakeup(struct socket *so, struct sockbuf *sb) +{ + + SOCKBUF_LOCK_ASSERT(sb); + + sb->sb_flags &= ~SB_SEL; + if (sb->sb_flags & SB_WAIT) { + sb->sb_flags &= ~SB_WAIT; + pthread_cond_signal(&(sb)->sb_cond); + } + SOCKBUF_UNLOCK(sb); + /*__Userspace__ what todo about so_upcall?*/ + +} +#else /* kernel version for reference */ +/* + * Wakeup processes waiting on a socket buffer. Do asynchronous notification + * via SIGIO if the socket has the SS_ASYNC flag set. + * + * Called with the socket buffer lock held; will release the lock by the end + * of the function. This allows the caller to acquire the socket buffer lock + * while testing for the need for various sorts of wakeup and hold it through + * to the point where it's no longer required. We currently hold the lock + * through calls out to other subsystems (with the exception of kqueue), and + * then release it to avoid lock order issues. It's not clear that's + * correct. + */ +void +sowakeup(struct socket *so, struct sockbuf *sb) +{ + + SOCKBUF_LOCK_ASSERT(sb); + + selwakeuppri(&sb->sb_sel, PSOCK); + sb->sb_flags &= ~SB_SEL; + if (sb->sb_flags & SB_WAIT) { + sb->sb_flags &= ~SB_WAIT; + wakeup(&sb->sb_cc); + } + KNOTE_LOCKED(&sb->sb_sel.si_note, 0); + SOCKBUF_UNLOCK(sb); + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) + pgsigio(&so->so_sigio, SIGIO, 0); + if (sb->sb_flags & SB_UPCALL) + (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); + if (sb->sb_flags & SB_AIO) + aio_swake(so, sb); + mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); +} +#endif + + + +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + */ + +int +sobind(struct socket *so, struct sockaddr *nam) +{ + + return (sctp_bind(so, nam)); +} + + +/* Taken from /src/sys/kern/uipc_syscalls.c + * kern_bind modified for __Userspace__ + */ + +int +user_bind(so, sa) + struct socket *so; + struct sockaddr *sa; +{ + int error; + error = sobind(so, sa); + return (error); +} + +/* Taken from /src/sys/kern/uipc_syscalls.c + * and modified for __Userspace__ + */ + +int +userspace_bind(so, name, namelen) + struct socket *so; + struct sockaddr *name; + int namelen; + +{ + struct sockaddr *sa; + int error; + + if ((error = getsockaddr(&sa, (caddr_t)name, namelen)) != 0) + return (error); + + error = user_bind(so, sa); + FREE(sa, M_SONAME); + return (error); +} + + + +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + */ + +int +solisten(struct socket *so, int backlog) +{ + + return (sctp_listen(so, backlog, NULL)); +} + + +int +solisten_proto_check(struct socket *so) +{ + + SOCK_LOCK_ASSERT(so); + + if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | + SS_ISDISCONNECTING)) + return (EINVAL); + return (0); +} + +static int somaxconn = SOMAXCONN; + +void +solisten_proto(struct socket *so, int backlog) +{ + + SOCK_LOCK_ASSERT(so); + + if (backlog < 0 || backlog > somaxconn) + backlog = somaxconn; + so->so_qlimit = backlog; + so->so_options |= SO_ACCEPTCONN; +} + + + + +/* Taken from /src/sys/kern/uipc_syscalls.c + * and modified for __Userspace__ + */ + +int +userspace_listen(so, backlog) + struct socket *so; + int backlog; + +{ + int error; + + error = solisten(so, backlog); + + return(error); +} + + +/* Taken from /src/sys/kern/uipc_socket.c + * and modified for __Userspace__ + */ + +int +soaccept(struct socket *so, struct sockaddr **nam) +{ + int error; + + SOCK_LOCK(so); + KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); + so->so_state &= ~SS_NOFDREF; + SOCK_UNLOCK(so); + error = sctp_accept(so, nam); + return (error); +} + + + +/* Taken from /src/sys/kern/uipc_syscalls.c + * kern_accept modified for __Userspace__ + */ +int +user_accept(struct socket *aso, struct sockaddr **name, socklen_t *namelen, struct socket **ptr_accept_ret_sock) +{ + struct sockaddr *sa = NULL; + int error; + struct socket *head = aso; + struct socket *so; + + + if (name) { + *name = NULL; + if (*namelen < 0) + return (EINVAL); + } + + if ((head->so_options & SO_ACCEPTCONN) == 0) { + error = EINVAL; + goto done; + } + + ACCEPT_LOCK(); + if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { + ACCEPT_UNLOCK(); + error = EWOULDBLOCK; + goto noconnection; + } + while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { + if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { + head->so_error = ECONNABORTED; + break; + } + error = pthread_cond_wait(&accept_cond, &accept_mtx); + if (error) { + ACCEPT_UNLOCK(); + goto noconnection; + } + } + if (head->so_error) { + error = head->so_error; + head->so_error = 0; + ACCEPT_UNLOCK(); + goto noconnection; + } + so = TAILQ_FIRST(&head->so_comp); + KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); + KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); + + /* + * Before changing the flags on the socket, we have to bump the + * reference count. Otherwise, if the protocol calls sofree(), + * the socket will be released due to a zero refcount. + */ + SOCK_LOCK(so); /* soref() and so_state update */ + soref(so); /* file descriptor reference */ + + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + so->so_state |= (head->so_state & SS_NBIO); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + + + /* + * The original accept returns fd value via td->td_retval[0] = fd; + * we will return the socket for accepted connection. + */ + + sa = 0; + error = soaccept(so, &sa); + if (error) { + /* + * return a namelen of zero for older code which might + * ignore the return value from accept. + */ + if (name) + *namelen = 0; + goto noconnection; + } + if (sa == NULL) { + if (name) + *namelen = 0; + goto done; + } + if (name) { +#if !defined(__Userspace_os_Linux) + /* check sa_len before it is destroyed */ + if (*namelen > sa->sa_len) + *namelen = sa->sa_len; +#endif + *name = sa; + sa = NULL; + } +noconnection: + if (sa) + FREE(sa, M_SONAME); + + +done: + *ptr_accept_ret_sock = so; + return (error); +} + + + +/* Taken from /src/sys/kern/uipc_syscalls.c + * and modified for __Userspace__ + */ +/* + * accept1() + */ +static int +accept1(so, aname, anamelen, ptr_accept_ret_sock) + struct socket *so; + struct sockaddr * aname; + socklen_t * anamelen; + struct socket **ptr_accept_ret_sock; +{ + struct sockaddr *name; + socklen_t namelen; + int error; + + if (aname == NULL) + return (user_accept(so, NULL, NULL, ptr_accept_ret_sock)); + + error = copyin(anamelen, &namelen, sizeof (namelen)); + if (error) + return (error); + + error = user_accept(so, &name, &namelen, ptr_accept_ret_sock); + + /* + * return a namelen of zero for older code which might + * ignore the return value from accept. + */ + if (error) { + (void) copyout(&namelen, + anamelen, sizeof(*anamelen)); + return (error); + } + + if (error == 0 && name != NULL) { + + error = copyout(name, aname, namelen); + } + if (error == 0) + error = copyout(&namelen, anamelen, + sizeof(namelen)); + + if(name) + FREE(name, M_SONAME); + return (error); +} + + + +struct socket * +userspace_accept(so, aname, anamelen) + struct socket *so; + struct sockaddr * aname; + socklen_t * anamelen; +{ + int error; + struct socket *accept_return_sock; + error = accept1(so, aname, anamelen, &accept_return_sock); + if(error) + printf("%s: error=%d\n",__func__, error); /* should we exit here in case of error? */ + return (accept_return_sock); +} + + + +int +sodisconnect(struct socket *so) +{ + int error; + + if ((so->so_state & SS_ISCONNECTED) == 0) + return (ENOTCONN); + if (so->so_state & SS_ISDISCONNECTING) + return (EALREADY); + error = sctp_disconnect(so); + return (error); +} + + +int +soconnect(struct socket *so, struct sockaddr *nam) +{ + int error; + + if (so->so_options & SO_ACCEPTCONN) + return (EOPNOTSUPP); + /* + * If protocol is connection-based, can only connect once. + * Otherwise, if connected, try to disconnect first. This allows + * user to disconnect by connecting to, e.g., a null address. + */ + if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && + (error = sodisconnect(so))) { + error = EISCONN; + } else { + /* + * Prevent accumulated error from previous connection from + * biting us. + */ + so->so_error = 0; + error = sctp_connect(so, nam); + } + + return (error); +} + + + +int user_connect(so, sa) + struct socket *so; + struct sockaddr *sa; +{ + int error; + int interrupted = 0; + + if (so->so_state & SS_ISCONNECTING) { + error = EALREADY; + goto done1; + } + + error = soconnect(so, sa); + if (error) + goto bad; + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EINPROGRESS; + goto done1; + } + + SOCK_LOCK(so); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + error = pthread_cond_wait(SOCK_COND(so), SOCK_MTX(so)); + if (error) { + if (error == EINTR || error == ERESTART) + interrupted = 1; + break; + } + } + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } + SOCK_UNLOCK(so); + +bad: + if (!interrupted) + so->so_state &= ~SS_ISCONNECTING; + if (error == ERESTART) + error = EINTR; +done1: + return (error); +} + + + +int userspace_connect(so, name, namelen) + struct socket *so; + struct sockaddr *name; + int namelen; +{ + + struct sockaddr *sa; + int error; + + error = getsockaddr(&sa, (caddr_t)name, namelen); + if (error) + return (error); + + error = user_connect(so, sa); + FREE(sa, M_SONAME); + return (error); + +} + +void userspace_close(struct socket *so) { + ACCEPT_LOCK(); + SOCK_LOCK(so); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ + sorele(so); + /*printf("%s::%s:%d\n", __FILE__, __FUNCTION__, __LINE__);*/ +} + +/* needed from sctp_usrreq.c */ +int +sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize, void *p); +int +userspace_setsockopt(struct socket *so, int level, int option_name, + const void *option_value, socklen_t option_len) +{ + return (sctp_setopt(so, option_name, (void *) option_value, option_len, NULL)); +} + +/* needed from sctp_usrreq.c */ +int +sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize, + void *p); +int +userspace_getsockopt(struct socket *so, int level, int option_name, + void *option_value, socklen_t option_len) +{ + return (sctp_getopt(so, option_name, option_value, (size_t*)&option_len, NULL)); +} + +#if 1 /* using iovec to sendmsg */ +/* "result" is mapped to an errno if something bad occurs */ +void sctp_userspace_ip_output(int *result, struct mbuf *o_pak, + struct route *ro, void *stcb, + uint32_t vrf_id) +{ + const int MAXLEN_MBUF_CHAIN = 32; /* What should this value be? */ + struct iovec send_iovec[MAXLEN_MBUF_CHAIN]; + struct mbuf *m; + struct mbuf *m_orig; + int iovcnt; + int send_len; + int len; + int send_count; + int i; + struct ip *ip; + struct udphdr *udp; + int res; + struct sockaddr_in dst; + struct msghdr msg_hdr; + int use_udp_tunneling; + + *result = 0; + i=0; + iovcnt = 0; + send_count = 0; + + m = SCTP_HEADER_TO_CHAIN(o_pak); + m_orig = m; + + len = sizeof(struct ip); + if (SCTP_BUF_LEN(m) < len) { + if ((m = m_pullup(m, len)) == 0) { + printf("Can not get the IP header in the first mbuf.\n"); + return; + } + } + ip = mtod(m, struct ip *); + use_udp_tunneling = (ip->ip_p == IPPROTO_UDP); + + if (use_udp_tunneling) { + len = sizeof(struct ip) + sizeof(struct udphdr); + if (SCTP_BUF_LEN(m) < len) { + if ((m = m_pullup(m, len)) == 0) { + printf("Can not get the UDP/IP header in the first mbuf.\n"); + return; + } + ip = mtod(m, struct ip *); + } + udp = (struct udphdr *)(ip + 1); + } + + if (!use_udp_tunneling) { + if (ip->ip_src.s_addr == INADDR_ANY) { + /* TODO get addr of outgoing interface */ + printf("Why did the SCTP implementation did not choose a source address?\n"); + } + /* TODO need to worry about ro->ro_dst as in ip_output? */ +#if defined(__Userspace_os_Linux) + /* need to put certain fields into network order for Linux */ + ip->ip_len = htons(ip->ip_len); + ip->ip_tos = htons(ip->ip_tos); + ip->ip_off = 0; +#endif + } + + memset((void *)&dst, 0, sizeof(struct sockaddr_in)); + dst.sin_family = AF_INET; + dst.sin_addr.s_addr = ip->ip_dst.s_addr; +#if !defined(__Userspace_os_Linux) + dst.sin_len = sizeof(struct sockaddr_in); +#endif + if (use_udp_tunneling) { + dst.sin_port = udp->uh_dport; + } else { + dst.sin_port = 0; + } + + /*tweak the mbuf chain */ + if (use_udp_tunneling) { + m_adj(m, sizeof(struct ip) + sizeof(struct udphdr)); + } + + send_len = SCTP_HEADER_LEN(m); /* length of entire packet */ + send_count = 0; + do { + send_iovec[i].iov_base = (caddr_t)m->m_data; + send_iovec[i].iov_len = SCTP_BUF_LEN(m); + send_count += send_iovec[i].iov_len; + iovcnt++; + i++; + m = m->m_next; + } while(m); + assert(send_count == send_len); + msg_hdr.msg_name = (struct sockaddr *) &dst; + msg_hdr.msg_namelen = sizeof(struct sockaddr_in); + msg_hdr.msg_iov = send_iovec; + msg_hdr.msg_iovlen = iovcnt; + msg_hdr.msg_control = NULL; + msg_hdr.msg_controllen = 0; + msg_hdr.msg_flags = 0; + + if ((!use_udp_tunneling) && (userspace_rawsctp > -1)) { + if ((res = sendmsg(userspace_rawsctp, &msg_hdr, MSG_DONTWAIT)) != send_len) { + *result = errno; + } + } + if ((use_udp_tunneling) && (userspace_udpsctp > -1)) { + if ((res = sendmsg(userspace_udpsctp, &msg_hdr, MSG_DONTWAIT)) != send_len) { + *result = errno; + } + } + sctp_m_freem(m_orig); +} + +#else /* old version of sctp_userspace_ip_output that makes a copy using pack_send_buffer */ +/*extern int pack_send_buffer(caddr_t buffer, struct mbuf* mb, int len); */ +extern int pack_send_buffer(caddr_t buffer, struct mbuf* mb); + +/* "result" is mapped to an errno if something bad occurs */ +void sctp_userspace_ip_output(int *result, struct mbuf *o_pak, + struct route *ro, void *stcb, + uint32_t vrf_id) +{ + struct mbuf; + struct ip *ip; + struct sctphdr *sh; + struct udphdr *udp; + struct sockaddr_in dst; + int o_flgs = 0, res; + /* const int hdrincl = 1; comment when using separate receive thread */ + char *ptr; + char *send_buf = NULL, *psend_buf = NULL; + send_buf = (char*) malloc(SCTP_DEFAULT_MAXSEGMENT); + if(NULL == send_buf){ + perror("malloc failure"); + exit(1); + } + psend_buf = mtod(o_pak, char *); + int send_len = SCTP_BUF_LEN(o_pak); + printf("%s:%d send_len=%d\n", __FUNCTION__, __LINE__, send_len); + int count_copied; + + *result = 0; + +#if 0 /* raw socket is being created in recv_thread_init() */ + /* use raw socket, create if not initialized */ + if (userspace_rawsctp == -1) { + if ((userspace_rawsctp = socket(AF_INET, SOCK_RAW, IPPROTO_SCTP)) < 0) { + *result = errno; + } + if (setsockopt(userspace_rawsctp, IPPROTO_IP, IP_HDRINCL, &hdrincl, sizeof(int)) < 0) { + *result = errno; + } + } +#endif + + if (userspace_rawsctp > -1 || userspace_udpsctp > -1) { + + /* may not all be in head mbuf */ + if (o_pak->m_flags & M_PKTHDR && + o_pak->m_next && + MHLEN > send_len) { + /* m_pullup currently causes assertion failure on second iteration + * of sending an INIT, so for now, copy chain into a contiguous + * buffer manually. + */ + /*o_pak = m_pullup(o_pak, SCTP_BUF_LEN(o_pak) + SCTP_BUF_LEN(o_pak->m_next)); */ + + send_len = SCTP_HEADER_LEN(o_pak); /* need the full chain len... */ + + /* count_copied = pack_send_buffer(send_buf, o_pak, send_len); + assert(count_copied == send_len); */ + send_len = count_copied = pack_send_buffer(send_buf, o_pak); + printf("%s:%d send_len=%d\n", __FUNCTION__, __LINE__, send_len); + psend_buf = send_buf; + } + + /* TODO if m_next or M_EXT still exists, use iovec */ + + ip = (struct ip *) psend_buf; + ptr = (char *) ip; + ptr += sizeof(struct ip); + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_addr.s_addr = ip->ip_dst.s_addr; +#if !defined(__Userspace_os_Linux) + dst.sin_len = sizeof(dst); +#endif + + if(ip->ip_p == IPPROTO_UDP) { + /* if the upper layer protocol is UDP then take away the IP and UDP header + * and send the remaining stuff over the UDP socket. + */ + udp = (struct udphdr *) ptr; + ptr += sizeof(struct udphdr); + + dst.sin_port = udp->uh_dport; + + /* sendto call to UDP socket and do error handling */ + if((res = sendto (userspace_udpsctp, ptr, send_len - (sizeof(struct udphdr) + sizeof(struct ip)), + o_flgs,(struct sockaddr *) &dst, + sizeof(struct sockaddr_in))) + != send_len - (sizeof(struct udphdr) + sizeof(struct ip))) { + *result = errno; + } + + + } else { + /* doing SCTP over IP so use the raw socket... */ + + sh = (struct sctphdr *) ptr; + + if(sh->dest_port == 0) { + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Sending %d bytes to port 0! Assigning port...\n", send_len); + dst.sin_port = htons(8898); /* OOTB only - arbitrary TODO assign available port */ + } else { + SCTPDBG(SCTP_DEBUG_OUTPUT1, "Sending %d bytes to supplied non-zero port!\n", send_len); + dst.sin_port = sh->dest_port; + } + + if (ip->ip_src.s_addr == INADDR_ANY) { + /* TODO get addr of outgoing interface */ + } + + /* TODO IP handles fragmentation? */ + + /* TODO need to worry about ro->ro_dst as in ip_output? */ + +#if defined(__Userspace_os_Linux) + /* need to put certain fields into network order for Linux */ + struct ip *iphdr; + iphdr = (struct ip *) psend_buf; + iphdr->ip_len = htons(iphdr->ip_len); + iphdr->ip_tos = htons(iphdr->ip_tos); + iphdr->ip_off = 0; /* when is this non-zero!?? TODO - FIX THIS HACK... */ +#endif + + if((res = sendto (userspace_rawsctp, psend_buf, send_len, + o_flgs,(struct sockaddr *) &dst, + sizeof(struct sockaddr_in))) + != send_len) { + *result = errno; + } + } + } + + if(psend_buf) + free(psend_buf); +} +#endif + diff --git a/usrsctplib/user_socketvar.h b/usrsctplib/user_socketvar.h new file mode 100755 index 00000000..9e30bd25 --- /dev/null +++ b/usrsctplib/user_socketvar.h @@ -0,0 +1,834 @@ +/*- + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 + * $FreeBSD: src/sys/sys/socketvar.h,v 1.158 2007/05/03 14:42:42 rwatson Exp $ + */ + +/* __Userspace__ version of <sys/socketvar.h> goes here.*/ + +#ifndef _SYS_SOCKETVAR_H_ +#define _SYS_SOCKETVAR_H_ + +#include <sys/queue.h> /* for TAILQ macros */ +/* #include <sys/selinfo.h> */ /*__Userspace__ alternative?*/ /* for struct selinfo */ +/* #include <sys/_lock.h> was 0 byte file */ +/* #include <sys/_mutex.h> was 0 byte file */ +/* #include <sys/_sx.h> */ /*__Userspace__ alternative?*/ + + +/* Source: /src/sys/sys/socket.h */ +#define SOCK_MAXADDRLEN 255 +#if !defined(MSG_NOTIFICATION) +#define MSG_NOTIFICATION 0x2000 /* SCTP notification */ +#endif +#define SO_ACCEPTFILTER 0x1000 /* there is an accept filter */ +#define SS_CANTRCVMORE 0x020 +#define SS_CANTSENDMORE 0x010 + +/* Needed for FreeBSD */ +#if defined (__Userspace_os_FreeBSD) || defined(__Userspace_os_Darwin) +#define UIO_MAXIOV 1024 +#define ERESTART (-1) +#endif + +enum uio_rw { UIO_READ, UIO_WRITE }; + +/* Segment flag values. */ +enum uio_seg { + UIO_USERSPACE, /* from user data space */ + UIO_SYSSPACE, /* from system space */ + UIO_NOCOPY /* don't copy, already in object */ +}; + +struct proc { + int stub; /* struct proc is a dummy for __Userspace__ */ +}; + + +struct socket_args { + int domain; + int type; + int protocol; +}; + + + +/* __Userspace__ Are these all the fields we need? + * Removing struct thread *uio_td; owner field +*/ +struct uio { + struct iovec *uio_iov; /* scatter/gather list */ + int uio_iovcnt; /* length of scatter/gather list */ + off_t uio_offset; /* offset in target object */ + int uio_resid; /* remaining bytes to process */ + enum uio_seg uio_segflg; /* address space */ + enum uio_rw uio_rw; /* operation */ +}; + + +/* __Userspace__ */ + +MALLOC_DECLARE(M_PCB); +MALLOC_DECLARE(M_SONAME); +/* + * Kernel structure per socket. + * Contains send and receive buffer queues, + * handle on protocol and pointer to protocol + * private data and error information. + */ +typedef u_quad_t so_gen_t; + +/*- + * Locking key to struct socket: + * (a) constant after allocation, no locking required. + * (b) locked by SOCK_LOCK(so). + * (c) locked by SOCKBUF_LOCK(&so->so_rcv). + * (d) locked by SOCKBUF_LOCK(&so->so_snd). + * (e) locked by ACCEPT_LOCK(). + * (f) not locked since integer reads/writes are atomic. + * (g) used only as a sleep/wakeup address, no value. + * (h) locked by global mutex so_global_mtx. + */ +struct socket { + int so_count; /* (b) reference count */ + short so_type; /* (a) generic type, see socket.h */ + short so_options; /* from socket call, see socket.h */ + short so_linger; /* time to linger while closing */ + short so_state; /* (b) internal state flags SS_* */ + int so_qstate; /* (e) internal state flags SQ_* */ + void *so_pcb; /* protocol control block */ + struct protosw *so_proto; /* (a) protocol handle */ +/* + * Variables for connection queuing. + * Socket where accepts occur is so_head in all subsidiary sockets. + * If so_head is 0, socket is not related to an accept. + * For head socket so_incomp queues partially completed connections, + * while so_comp is a queue of connections ready to be accepted. + * If a connection is aborted and it has so_head set, then + * it has to be pulled out of either so_incomp or so_comp. + * We allow connections to queue up based on current queue lengths + * and limit on number of queued connections for this socket. + */ + struct socket *so_head; /* (e) back pointer to listen socket */ + TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */ + TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */ + TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */ + u_short so_qlen; /* (e) number of unaccepted connections */ + u_short so_incqlen; /* (e) number of unaccepted incomplete + connections */ + u_short so_qlimit; /* (e) max number queued connections */ + short so_timeo; /* (g) connection timeout */ + pthread_cond_t timeo_cond; /* timeo_cond condition variable being used in wakeup */ + + u_short so_error; /* (f) error affecting connection */ + struct sigio *so_sigio; /* [sg] information for async I/O or + out of band data (SIGURG) */ + u_long so_oobmark; /* (c) chars to oob mark */ + TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */ +/* + * Variables for socket buffering. + */ + struct sockbuf { + /* __Userspace__ Many of these fields may + * not be required for the sctp stack. + * Commenting out the following. + * Including pthread mutex and condition variable to be + * used by sbwait, sorwakeup and sowwakeup. + */ + /* struct selinfo sb_sel;*/ /* process selecting read/write */ + /* struct mtx sb_mtx;*/ /* sockbuf lock */ + /* struct sx sb_sx;*/ /* prevent I/O interlacing */ + pthread_cond_t sb_cond; /* sockbuf condition variable */ + pthread_mutex_t sb_mtx; /* sockbuf lock associated with sb_cond */ + short sb_state; /* (c/d) socket state on sockbuf */ +#define sb_startzero sb_mb + struct mbuf *sb_mb; /* (c/d) the mbuf chain */ + struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */ + struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last + * record in socket buffer */ + struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */ + u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */ + u_int sb_cc; /* (c/d) actual chars in buffer */ + u_int sb_hiwat; /* (c/d) max actual char count */ + u_int sb_mbcnt; /* (c/d) chars of mbufs used */ + u_int sb_mbmax; /* (c/d) max chars of mbufs to use */ + u_int sb_ctl; /* (c/d) non-data chars in buffer */ + int sb_lowat; /* (c/d) low water mark */ + int sb_timeo; /* (c/d) timeout for read/write */ + short sb_flags; /* (c/d) flags, see below */ + } so_rcv, so_snd; +/* + * Constants for sb_flags field of struct sockbuf. + */ +#define SB_MAX (256*1024) /* default for max chars in sockbuf */ +#define SB_RAW (64*1024*2) /*Aligning so->so_rcv.sb_hiwat with the receive buffer size of raw socket*/ +/* + * Constants for sb_flags field of struct sockbuf. + */ +#define SB_WAIT 0x04 /* someone is waiting for data/space */ +#define SB_SEL 0x08 /* someone is selecting */ +#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ +#define SB_UPCALL 0x20 /* someone wants an upcall */ +#define SB_NOINTR 0x40 /* operations not interruptible */ +#define SB_AIO 0x80 /* AIO operations queued */ +#define SB_KNOTE 0x100 /* kernel note attached */ +#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ + + void (*so_upcall)(struct socket *, void *, int); + void *so_upcallarg; + struct ucred *so_cred; /* (a) user credentials */ + struct label *so_label; /* (b) MAC label for socket */ + struct label *so_peerlabel; /* (b) cached MAC label for peer */ + /* NB: generation count must not be first. */ + so_gen_t so_gencnt; /* (h) generation count */ + void *so_emuldata; /* (b) private data for emulators */ + struct so_accf { + struct accept_filter *so_accept_filter; + void *so_accept_filter_arg; /* saved filter args */ + char *so_accept_filter_str; /* saved user args */ + } *so_accf; +}; + +#define SB_EMPTY_FIXUP(sb) do { \ + if ((sb)->sb_mb == NULL) { \ + (sb)->sb_mbtail = NULL; \ + (sb)->sb_lastrecord = NULL; \ + } \ +} while (/*CONSTCOND*/0) + +/* + * Global accept mutex to serialize access to accept queues and + * fields associated with multiple sockets. This allows us to + * avoid defining a lock order between listen and accept sockets + * until such time as it proves to be a good idea. + */ +extern pthread_mutex_t accept_mtx; +#define ACCEPT_LOCK_ASSERT() assert(pthread_mutex_trylock(&accept_mtx) == EBUSY) +#define ACCEPT_UNLOCK_ASSERT() do{ \ + assert(pthread_mutex_trylock(&accept_mtx) == 0); \ + (void)pthread_mutex_unlock(&accept_mtx); \ +}while (0) +#define ACCEPT_LOCK() (void)pthread_mutex_lock(&accept_mtx) +#define ACCEPT_UNLOCK() (void)pthread_mutex_unlock(&accept_mtx) + +/* + * Per-socket buffer mutex used to protect most fields in the socket + * buffer. + */ +#define SOCKBUF_MTX(_sb) (&(_sb)->sb_mtx) +#define SOCKBUF_LOCK_INIT(_sb, _name) \ + pthread_mutex_init(SOCKBUF_MTX(_sb), NULL) +#define SOCKBUF_LOCK_DESTROY(_sb) pthread_mutex_destroy(SOCKBUF_MTX(_sb)) +#define SOCKBUF_COND_INIT(_sb) pthread_cond_init((&(_sb)->sb_cond), NULL) +#define SOCKBUF_COND_DESTROY(_sb) pthread_cond_destroy((&(_sb)->sb_cond)) +#define SOCK_COND_INIT(_so) pthread_cond_init((&(_so)->timeo_cond), NULL) +#define SOCK_COND_DESTROY(_so) pthread_cond_destroy((&(_so)->timeo_cond)) +#define SOCK_COND(_so) (&(_so)->timeo_cond) +/*__Userspace__ SOCKBUF_LOCK(_sb) is now defined in netinet/sctp_process_lock.h */ + +/* #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) unused */ +/*__Userspace__ SOCKBUF_UNLOCK(_sb) is now defined in netinet/sctp_process_lock.h */ + +/*__Userspace__ SOCKBUF_LOCK_ASSERT(_sb) is now defined in netinet/sctp_process_lock.h */ + +/* #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) unused */ + +/* + * Per-socket mutex: we reuse the receive socket buffer mutex for space + * efficiency. This decision should probably be revisited as we optimize + * locking for the socket code. + */ +#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv) +/*__Userspace__ SOCK_LOCK(_so) is now defined in netinet/sctp_process_lock.h */ + +/* #define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv) unused */ +/*__Userspace__ SOCK_UNLOCK(_so) is now defined in netinet/sctp_process_lock.h */ + +#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv) + +/* + * Socket state bits. + * + * Historically, this bits were all kept in the so_state field. For + * locking reasons, they are now in multiple fields, as they are + * locked differently. so_state maintains basic socket state protected + * by the socket lock. so_qstate holds information about the socket + * accept queues. Each socket buffer also has a state field holding + * information relevant to that socket buffer (can't send, rcv). Many + * fields will be read without locks to improve performance and avoid + * lock order issues. However, this approach must be used with caution. + */ +#define SS_NOFDREF 0x0001 /* no file table ref any more */ +#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ +#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ +#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ +#define SS_NBIO 0x0100 /* non-blocking ops */ +#define SS_ASYNC 0x0200 /* async i/o notify */ +#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ +#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ +/* + * Protocols can mark a socket as SS_PROTOREF to indicate that, following + * pru_detach, they still want the socket to persist, and will free it + * themselves when they are done. Protocols should only ever call sofree() + * following setting this flag in pru_detach(), and never otherwise, as + * sofree() bypasses socket reference counting. + */ +#define SS_PROTOREF 0x4000 /* strong protocol reference */ + +/* + * Socket state bits now stored in the socket buffer state field. + */ +#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ +#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ +#define SBS_RCVATMARK 0x0040 /* at mark on input */ + +/* + * Socket state bits stored in so_qstate. + */ +#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */ +#define SQ_COMP 0x1000 /* unaccepted, complete connection */ + +/* + * Externalized form of struct socket used by the sysctl(3) interface. + */ +struct xsocket { + size_t xso_len; /* length of this structure */ + struct socket *xso_so; /* makes a convenient handle sometimes */ + short so_type; + short so_options; + short so_linger; + short so_state; + caddr_t so_pcb; /* another convenient handle */ + int xso_protocol; + int xso_family; + u_short so_qlen; + u_short so_incqlen; + u_short so_qlimit; + short so_timeo; + u_short so_error; + pid_t so_pgid; + u_long so_oobmark; + struct xsockbuf { + u_int sb_cc; + u_int sb_hiwat; + u_int sb_mbcnt; + u_int sb_mbmax; + int sb_lowat; + int sb_timeo; + short sb_flags; + } so_rcv, so_snd; + uid_t so_uid; /* XXX */ +}; + +#if defined(_KERNEL) + + +/* + * Macros for sockets and socket buffering. + */ + +/* + * Do we need to notify the other side when I/O is possible? + */ +#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ + SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) + +/* + * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? + * This is problematical if the fields are unsigned, as the space might + * still be negative (cc > hiwat or mbcnt > mbmax). Should detect + * overflow and return 0. Should use "lmin" but it doesn't exist now. + */ +#define sbspace(sb) \ + ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \ + (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) + +/* do we have to send all at once on a socket? */ +#define sosendallatonce(so) \ + ((so)->so_proto->pr_flags & PR_ATOMIC) + +/* can we read something from so? */ +#define soreadable(so) \ + ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ + ((so)->so_rcv.sb_state & SBS_CANTRCVMORE) || \ + !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error) + +/* can we write something to so? */ +#define sowriteable(so) \ + ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ + (((so)->so_state&SS_ISCONNECTED) || \ + ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ + ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ + (so)->so_error) + +/* adjust counters in sb reflecting allocation of m */ +#define sballoc(sb, m) { \ + (sb)->sb_cc += (m)->m_len; \ + if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ + (sb)->sb_ctl += (m)->m_len; \ + (sb)->sb_mbcnt += MSIZE; \ + if ((m)->m_flags & M_EXT) \ + (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ +} + +/* adjust counters in sb reflecting freeing of m */ +#define sbfree(sb, m) { \ + (sb)->sb_cc -= (m)->m_len; \ + if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \ + (sb)->sb_ctl -= (m)->m_len; \ + (sb)->sb_mbcnt -= MSIZE; \ + if ((m)->m_flags & M_EXT) \ + (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ + if ((sb)->sb_sndptr == (m)) { \ + (sb)->sb_sndptr = NULL; \ + (sb)->sb_sndptroff = 0; \ + } \ + if ((sb)->sb_sndptroff != 0) \ + (sb)->sb_sndptroff -= (m)->m_len; \ +} + +/* + * soref()/sorele() ref-count the socket structure. Note that you must + * still explicitly close the socket, but the last ref count will free + * the structure. + */ +#define soref(so) do { \ + SOCK_LOCK_ASSERT(so); \ + ++(so)->so_count; \ +} while (0) + +#define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ + SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count <= 0) \ + panic("sorele"); \ + if (--(so)->so_count == 0) \ + sofree(so); \ + else { \ + SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ +} while (0) + +#define sotryfree(so) do { \ + ACCEPT_LOCK_ASSERT(); \ + SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count == 0) \ + sofree(so); \ + else { \ + SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ +} while(0) + +/* + * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to + * avoid a non-atomic test-and-wakeup. However, sowakeup is + * responsible for releasing the lock if it is called. We unlock only + * if we don't call into sowakeup. If any code is introduced that + * directly invokes the underlying sowakeup() primitives, it must + * maintain the same semantics. + */ +#define sorwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ + if (sb_notify(&(so)->so_rcv)) \ + sowakeup((so), &(so)->so_rcv); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_rcv); \ +} while (0) + +#define sorwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_rcv); \ + sorwakeup_locked(so); \ +} while (0) + +#define sowwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ + if (sb_notify(&(so)->so_snd)) \ + sowakeup((so), &(so)->so_snd); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_snd); \ +} while (0) + +#define sowwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_snd); \ + sowwakeup_locked(so); \ +} while (0) + +/* + * Argument structure for sosetopt et seq. This is in the KERNEL + * section because it will never be visible to user code. + */ +enum sopt_dir { SOPT_GET, SOPT_SET }; +struct sockopt { + enum sopt_dir sopt_dir; /* is this a get or a set? */ + int sopt_level; /* second arg of [gs]etsockopt */ + int sopt_name; /* third arg of [gs]etsockopt */ + void *sopt_val; /* fourth arg of [gs]etsockopt */ + size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ + struct thread *sopt_td; /* calling thread or null if kernel */ +}; + +struct accept_filter { + char accf_name[16]; + void (*accf_callback) + (struct socket *so, void *arg, int waitflag); + void * (*accf_create) + (struct socket *so, char *arg); + void (*accf_destroy) + (struct socket *so); + SLIST_ENTRY(accept_filter) accf_next; +}; + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_ACCF); +MALLOC_DECLARE(M_PCB); +MALLOC_DECLARE(M_SONAME); +#endif + +extern int maxsockets; +extern u_long sb_max; +extern struct uma_zone *socket_zone; +extern so_gen_t so_gencnt; + +struct mbuf; +struct sockaddr; +struct ucred; +struct uio; + +/* + * From uipc_socket and friends + */ +int do_getopt_accept_filter(struct socket *so, struct sockopt *sopt); +int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); +int so_setsockopt(struct socket *so, int level, int optname, + void *optval, size_t optlen); +int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type); +int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); +void sbappend(struct sockbuf *sb, struct mbuf *m); +void sbappend_locked(struct sockbuf *sb, struct mbuf *m); +void sbappendstream(struct sockbuf *sb, struct mbuf *m); +void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m); +int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control); +int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control); +int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control); +int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control); +void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); +void sbcheck(struct sockbuf *sb); +void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); +struct mbuf * + sbcreatecontrol(caddr_t p, int size, int type, int level); +void sbdestroy(struct sockbuf *sb, struct socket *so); +void sbdrop(struct sockbuf *sb, int len); +void sbdrop_locked(struct sockbuf *sb, int len); +void sbdroprecord(struct sockbuf *sb); +void sbdroprecord_locked(struct sockbuf *sb); +void sbflush(struct sockbuf *sb); +void sbflush_locked(struct sockbuf *sb); +void sbrelease(struct sockbuf *sb, struct socket *so); +void sbrelease_locked(struct sockbuf *sb, struct socket *so); +int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, + struct thread *td); +int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, + struct thread *td); +struct mbuf * + sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff); +void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); +int sbwait(struct sockbuf *sb); +int sblock(struct sockbuf *sb, int flags); +void sbunlock(struct sockbuf *sb); +void soabort(struct socket *so); +int soaccept(struct socket *so, struct sockaddr **nam); +int socheckuid(struct socket *so, uid_t uid); +int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); +void socantrcvmore(struct socket *so); +void socantrcvmore_locked(struct socket *so); +void socantsendmore(struct socket *so); +void socantsendmore_locked(struct socket *so); +int soclose(struct socket *so); +int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); +int soconnect2(struct socket *so1, struct socket *so2); +int socow_setup(struct mbuf *m0, struct uio *uio); +int socreate(int dom, struct socket **aso, int type, int proto, + struct ucred *cred, struct thread *td); +int sodisconnect(struct socket *so); +struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); +void sofree(struct socket *so); +int sogetopt(struct socket *so, struct sockopt *sopt); +void sohasoutofband(struct socket *so); +void soisconnected(struct socket *so); +void soisconnecting(struct socket *so); +void soisdisconnected(struct socket *so); +void soisdisconnecting(struct socket *so); +int solisten(struct socket *so, int backlog, struct thread *td); +void solisten_proto(struct socket *so, int backlog); +int solisten_proto_check(struct socket *so); +struct socket * + sonewconn(struct socket *head, int connstatus); +int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); +int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len); + +/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ +int soopt_getm(struct sockopt *sopt, struct mbuf **mp); +int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m); +int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m); + +int sopoll(struct socket *so, int events, struct ucred *active_cred, + struct thread *td); +int sopoll_generic(struct socket *so, int events, + struct ucred *active_cred, struct thread *td); +int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, + struct mbuf **mp0, struct mbuf **controlp, int *flagsp); +int soreceive_generic(struct socket *so, struct sockaddr **paddr, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, + int *flagsp); +int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); +void sorflush(struct socket *so); +int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, + struct thread *td); +int sosend_dgram(struct socket *so, struct sockaddr *addr, + struct uio *uio, struct mbuf *top, struct mbuf *control, + int flags, struct thread *td); +int sosend_generic(struct socket *so, struct sockaddr *addr, + struct uio *uio, struct mbuf *top, struct mbuf *control, + int flags, struct thread *td); +int sosetopt(struct socket *so, struct sockopt *sopt); +int soshutdown(struct socket *so, int how); +void sotoxsocket(struct socket *so, struct xsocket *xso); +void sowakeup(struct socket *so, struct sockbuf *sb); + +#ifdef SOCKBUF_DEBUG +void sblastrecordchk(struct sockbuf *, const char *, int); +#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) + +void sblastmbufchk(struct sockbuf *, const char *, int); +#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) +#else +#define SBLASTRECORDCHK(sb) /* nothing */ +#define SBLASTMBUFCHK(sb) /* nothing */ +#endif /* SOCKBUF_DEBUG */ + +/* + * Accept filter functions (duh). + */ +int accept_filt_add(struct accept_filter *filt); +int accept_filt_del(char *name); +struct accept_filter *accept_filt_get(char *name); +#ifdef ACCEPT_FILTER_MOD +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet_accf); +#endif +int accept_filt_generic_mod_event(module_t mod, int event, void *data); +#endif + +#endif /* _KERNEL */ + + +/*-------------------------------------------------------------*/ +/*-------------------------------------------------------------*/ +/* __Userspace__ */ +/*-------------------------------------------------------------*/ +/*-------------------------------------------------------------*/ +/* this new __Userspace__ section is to copy portions of the _KERNEL block + * above into, avoiding having to port the entire thing at once... + * For function prototypes, the full bodies are in user_socket.c . + */ +#if defined(__Userspace__) + +/* ---------------------------------------------------------- */ +/* --- function prototypes (implemented in user_socket.c) --- */ +/* ---------------------------------------------------------- */ +void soisconnecting(struct socket *so); +void soisdisconnecting(struct socket *so); +void soisconnected(struct socket *so); +struct socket * sonewconn(struct socket *head, int connstatus); +void socantrcvmore(struct socket *so); +void socantsendmore(struct socket *so); + + + +/* -------------- */ +/* --- macros --- */ +/* -------------- */ + +#define soref(so) do { \ + SOCK_LOCK_ASSERT(so); \ + ++(so)->so_count; \ +} while (0) + +#define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ + SOCK_LOCK_ASSERT(so); \ + if ((so)->so_count <= 0) \ + panic("sorele"); \ + if (--(so)->so_count == 0) \ + sofree(so); \ + else { \ + SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ +} while (0) + + +/* replacing imin with min (user_environment.h) */ +#define sbspace(sb) \ + ((long) min((int)((sb)->sb_hiwat - (sb)->sb_cc), \ + (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) + +/* do we have to send all at once on a socket? */ +#define sosendallatonce(so) \ + ((so)->so_proto->pr_flags & PR_ATOMIC) + +/* can we read something from so? */ +#define soreadable(so) \ + ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ + ((so)->so_rcv.sb_state & SBS_CANTRCVMORE) || \ + !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error) + +#if 0 /* original */ +#define PR_CONNREQUIRED 0x04 /* from sys/protosw.h "needed" for sowriteable */ +#define sowriteable(so) \ + ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ + (((so)->so_state&SS_ISCONNECTED) || \ + ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ + ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ + (so)->so_error) +#else /* line with PR_CONNREQUIRED removed */ +/* can we write something to so? */ +#define sowriteable(so) \ + ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ + (((so)->so_state&SS_ISCONNECTED))) || \ + ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ + (so)->so_error) +#endif + +extern void solisten_proto(struct socket *so, int backlog); +extern int solisten_proto_check(struct socket *so); +extern int sctp_listen(struct socket *so, int backlog, struct proc *p); +extern int sctp_bind(struct socket *so, struct sockaddr *addr); +extern int sctp_accept(struct socket *so, struct sockaddr **addr); +extern int sctp_attach(struct socket *so, int proto, uint32_t vrf_id); +extern int sctp_abort(struct socket *so); +extern void sctp_close(struct socket *so); +extern int soaccept(struct socket *so, struct sockaddr **nam); +extern int solisten(struct socket *so, int backlog); +extern int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); +extern void sowakeup(struct socket *so, struct sockbuf *sb); +extern void wakeup(void *ident, struct socket *so); /*__Userspace__ */ +extern int uiomove(void *cp, int n, struct uio *uio); +extern int sbwait(struct sockbuf *sb); +extern ssize_t userspace_sctp_sendmsg(struct socket *so, + const void *data, + size_t len, + struct sockaddr *to, + socklen_t tolen, + u_int32_t ppid, + u_int32_t flags, + u_int16_t stream_no, + u_int32_t timetolive, + u_int32_t context); + +extern ssize_t userspace_sctp_sendmbuf(struct socket *so, + struct mbuf* mbufdata, + size_t len, + struct sockaddr *to, + socklen_t tolen, + u_int32_t ppid, + u_int32_t flags, + u_int16_t stream_no, + u_int32_t timetolive, + u_int32_t context); + +extern struct socket * userspace_socket(int domain, int type, int protocol); +extern int userspace_connect(struct socket *so, struct sockaddr *name, int namelen); +extern int sodisconnect(struct socket *so); +extern int soconnect(struct socket *so, struct sockaddr *nam); +extern int sctp_disconnect(struct socket *so); +extern int sctp_connect(struct socket *so, struct sockaddr *addr); +extern struct mbuf* mbufalloc(size_t size, void* data, unsigned char fill); +extern struct mbuf* mbufallocfromiov(int iovlen, struct iovec *srciov); +extern void userspace_close(struct socket *so); +extern void sctp_finish(void); + +/* ------------------------------------------------ */ +/* ----- macros copied from above ---- */ +/* ------------------------------------------------ */ + +/* + * Do we need to notify the other side when I/O is possible? + */ +#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ + SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) + + +/* + * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to + * avoid a non-atomic test-and-wakeup. However, sowakeup is + * responsible for releasing the lock if it is called. We unlock only + * if we don't call into sowakeup. If any code is introduced that + * directly invokes the underlying sowakeup() primitives, it must + * maintain the same semantics. + */ +#define sorwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ + if (sb_notify(&(so)->so_rcv)) \ + sowakeup((so), &(so)->so_rcv); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_rcv); \ +} while (0) + +#define sorwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_rcv); \ + sorwakeup_locked(so); \ +} while (0) + +#define sowwakeup_locked(so) do { \ + SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ + if (sb_notify(&(so)->so_snd)) \ + sowakeup((so), &(so)->so_snd); \ + else \ + SOCKBUF_UNLOCK(&(so)->so_snd); \ +} while (0) + +#define sowwakeup(so) do { \ + SOCKBUF_LOCK(&(so)->so_snd); \ + sowwakeup_locked(so); \ +} while (0) + + + +#endif /* __Userspace__ */ + +#endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/usrsctplib/user_uma.h b/usrsctplib/user_uma.h new file mode 100755 index 00000000..91986922 --- /dev/null +++ b/usrsctplib/user_uma.h @@ -0,0 +1,67 @@ +/* __Userspace__ */ +#include <sys/queue.h> + +#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */ +#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ + +/* __Userspace__ All these definitions will change for +userspace Universal Memory Allocator (UMA). These are included +for reference purposes and to avoid compile errors for the time being. +*/ +typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); +typedef void (*uma_dtor)(void *mem, int size, void *arg); +typedef int (*uma_init)(void *mem, int size, int flags); +typedef void (*uma_fini)(void *mem, int size); +typedef struct uma_zone * uma_zone_t; +typedef struct uma_keg * uma_keg_t; + +struct uma_cache { + int stub; /* TODO __Userspace__ */ +}; + +struct uma_keg { + int stub; /* TODO __Userspace__ */ +}; + +struct uma_zone { + char *uz_name; /* Text name of the zone */ + struct mtx *uz_lock; /* Lock for the zone (keg's lock) */ + uma_keg_t uz_keg; /* Our underlying Keg */ + + LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */ + LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ + LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ + + uma_ctor uz_ctor; /* Constructor for each allocation */ + uma_dtor uz_dtor; /* Destructor */ + uma_init uz_init; /* Initializer for each item */ + uma_fini uz_fini; /* Discards memory */ + + u_int64_t uz_allocs; /* Total number of allocations */ + u_int64_t uz_frees; /* Total number of frees */ + u_int64_t uz_fails; /* Total number of alloc failures */ + uint16_t uz_fills; /* Outstanding bucket fills */ + uint16_t uz_count; /* Highest value ub_ptr can have */ + + /* + * This HAS to be the last item because we adjust the zone size + * based on NCPU and then allocate the space for the zones. + */ + struct uma_cache uz_cpu[1]; /* Per cpu caches */ +}; + +/* Prototype */ +uma_zone_t +uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, + uma_init uminit, uma_fini fini, int align, u_int32_t flags); + + +#define uma_zone_set_max(zone, number) /* stub TODO __Userspace__ */ + +uma_zone_t +uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor, + uma_init uminit, uma_fini fini, int align, u_int32_t flags) +{ + return NULL; /* stub TODO __Userspace__. Also place implementation in a separate .c file */ + +} |