uipc_syscalls.c (90c35c1939db21edd0301d25ca81f28d9d9ecd9d) uipc_syscalls.c (ca04d21d5fdff6e58af745766024088091fe3d90)
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 143 unchanged lines hidden (view full) ---

152
153 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
154 if (req->newptr)
155 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
156 return (SYSCTL_OUT(req, &s, sizeof(s)));
157}
158SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
159 NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
1/*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 143 unchanged lines hidden (view full) ---

152
153 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
154 if (req->newptr)
155 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
156 return (SYSCTL_OUT(req, &s, sizeof(s)));
157}
158SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
159 NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
160
161fo_sendfile_t vn_sendfile;
162
160/*
161 * Convert a user file descriptor to a kernel file entry and check if required
162 * capability rights are present.
163 * A reference on the file entry is held upon returning.
164 */
165static int
166getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
167 struct file **fpp, u_int *fflagp)

--- 1731 unchanged lines hidden (view full) ---

1899 return (do_sendfile(td, uap, 0));
1900}
1901
1902static int
1903do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1904{
1905 struct sf_hdtr hdtr;
1906 struct uio *hdr_uio, *trl_uio;
163/*
164 * Convert a user file descriptor to a kernel file entry and check if required
165 * capability rights are present.
166 * A reference on the file entry is held upon returning.
167 */
168static int
169getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
170 struct file **fpp, u_int *fflagp)

--- 1731 unchanged lines hidden (view full) ---

1902 return (do_sendfile(td, uap, 0));
1903}
1904
1905static int
1906do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1907{
1908 struct sf_hdtr hdtr;
1909 struct uio *hdr_uio, *trl_uio;
1910 struct file *fp;
1907 int error;
1908
1911 int error;
1912
1913 if (uap->offset < 0)
1914 return (EINVAL);
1915
1909 hdr_uio = trl_uio = NULL;
1910
1911 if (uap->hdtr != NULL) {
1912 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1913 if (error)
1914 goto out;
1915 if (hdtr.headers != NULL) {
1916 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1917 if (error)
1918 goto out;
1919 }
1920 if (hdtr.trailers != NULL) {
1921 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1922 if (error)
1923 goto out;
1924
1925 }
1926 }
1927
1916 hdr_uio = trl_uio = NULL;
1917
1918 if (uap->hdtr != NULL) {
1919 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1920 if (error)
1921 goto out;
1922 if (hdtr.headers != NULL) {
1923 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1924 if (error)
1925 goto out;
1926 }
1927 if (hdtr.trailers != NULL) {
1928 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1929 if (error)
1930 goto out;
1931
1932 }
1933 }
1934
1928 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1935 AUDIT_ARG_FD(uap->fd);
1936
1937 /*
1938 * sendfile(2) can start at any offset within a file so we require
1939 * CAP_READ+CAP_SEEK = CAP_PREAD.
1940 */
1941 if ((error = fget_read(td, uap->fd, CAP_PREAD, &fp)) != 0)
1942 goto out;
1943
1944 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
1945 uap->nbytes, uap->sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
1946 fdrop(fp, td);
1947
1929out:
1930 if (hdr_uio)
1931 free(hdr_uio, M_IOV);
1932 if (trl_uio)
1933 free(trl_uio, M_IOV);
1934 return (error);
1935}
1936

--- 11 unchanged lines hidden (view full) ---

1948 args.sbytes = uap->sbytes;
1949 args.flags = uap->flags;
1950
1951 return (do_sendfile(td, &args, 1));
1952}
1953#endif /* COMPAT_FREEBSD4 */
1954
1955int
1948out:
1949 if (hdr_uio)
1950 free(hdr_uio, M_IOV);
1951 if (trl_uio)
1952 free(trl_uio, M_IOV);
1953 return (error);
1954}
1955

--- 11 unchanged lines hidden (view full) ---

1967 args.sbytes = uap->sbytes;
1968 args.flags = uap->flags;
1969
1970 return (do_sendfile(td, &args, 1));
1971}
1972#endif /* COMPAT_FREEBSD4 */
1973
1974int
1956kern_sendfile(struct thread *td, struct sendfile_args *uap,
1957 struct uio *hdr_uio, struct uio *trl_uio, int compat)
1975vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
1976 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
1977 int kflags, struct thread *td)
1958{
1978{
1979 struct vnode *vp = fp->f_vnode;
1959 struct file *sock_fp;
1980 struct file *sock_fp;
1960 struct vnode *vp;
1961 struct vm_object *obj = NULL;
1962 struct socket *so = NULL;
1963 struct mbuf *m = NULL;
1964 struct sf_buf *sf;
1965 struct vm_page *pg;
1966 struct vattr va;
1967 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1968 int error, hdrlen = 0, mnw = 0;
1969 int bsize;
1970 struct sendfile_sync *sfs = NULL;
1971
1981 struct vm_object *obj = NULL;
1982 struct socket *so = NULL;
1983 struct mbuf *m = NULL;
1984 struct sf_buf *sf;
1985 struct vm_page *pg;
1986 struct vattr va;
1987 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
1988 int error, hdrlen = 0, mnw = 0;
1989 int bsize;
1990 struct sendfile_sync *sfs = NULL;
1991
1972 /*
1973 * The file descriptor must be a regular file and have a
1974 * backing VM object.
1975 * File offset must be positive. If it goes beyond EOF
1976 * we send only the header/trailer and no payload data.
1977 */
1978 AUDIT_ARG_FD(uap->fd);
1979 /*
1980 * sendfile(2) can start at any offset within a file so we require
1981 * CAP_READ+CAP_SEEK = CAP_PREAD.
1982 */
1983 if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0)
1984 goto out;
1985 vn_lock(vp, LK_SHARED | LK_RETRY);
1986 if (vp->v_type == VREG) {
1987 bsize = vp->v_mount->mnt_stat.f_iosize;
1992 vn_lock(vp, LK_SHARED | LK_RETRY);
1993 if (vp->v_type == VREG) {
1994 bsize = vp->v_mount->mnt_stat.f_iosize;
1988 if (uap->nbytes == 0) {
1995 if (nbytes == 0) {
1989 error = VOP_GETATTR(vp, &va, td->td_ucred);
1990 if (error != 0) {
1991 VOP_UNLOCK(vp, 0);
1992 obj = NULL;
1993 goto out;
1994 }
1995 rem = va.va_size;
1996 } else
1996 error = VOP_GETATTR(vp, &va, td->td_ucred);
1997 if (error != 0) {
1998 VOP_UNLOCK(vp, 0);
1999 obj = NULL;
2000 goto out;
2001 }
2002 rem = va.va_size;
2003 } else
1997 rem = uap->nbytes;
2004 rem = nbytes;
1998 obj = vp->v_object;
1999 if (obj != NULL) {
2000 /*
2001 * Temporarily increase the backing VM
2002 * object's reference count so that a forced
2003 * reclamation of its vnode does not
2004 * immediately destroy it.
2005 */

--- 8 unchanged lines hidden (view full) ---

2014 }
2015 } else
2016 bsize = 0; /* silence gcc */
2017 VOP_UNLOCK(vp, 0);
2018 if (obj == NULL) {
2019 error = EINVAL;
2020 goto out;
2021 }
2005 obj = vp->v_object;
2006 if (obj != NULL) {
2007 /*
2008 * Temporarily increase the backing VM
2009 * object's reference count so that a forced
2010 * reclamation of its vnode does not
2011 * immediately destroy it.
2012 */

--- 8 unchanged lines hidden (view full) ---

2021 }
2022 } else
2023 bsize = 0; /* silence gcc */
2024 VOP_UNLOCK(vp, 0);
2025 if (obj == NULL) {
2026 error = EINVAL;
2027 goto out;
2028 }
2022 if (uap->offset < 0) {
2023 error = EINVAL;
2024 goto out;
2025 }
2026
2027 /*
2028 * The socket must be a stream socket and connected.
2029 * Remember if it a blocking or non-blocking socket.
2030 */
2029
2030 /*
2031 * The socket must be a stream socket and connected.
2032 * Remember if it a blocking or non-blocking socket.
2033 */
2031 if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND,
2034 if ((error = getsock_cap(td->td_proc->p_fd, sockfd, CAP_SEND,
2032 &sock_fp, NULL)) != 0)
2033 goto out;
2034 so = sock_fp->f_data;
2035 if (so->so_type != SOCK_STREAM) {
2036 error = EINVAL;
2037 goto out;
2038 }
2039 if ((so->so_state & SS_ISCONNECTED) == 0) {
2040 error = ENOTCONN;
2041 goto out;
2042 }
2043 /*
2044 * Do not wait on memory allocations but return ENOMEM for
2045 * caller to retry later.
2046 * XXX: Experimental.
2047 */
2035 &sock_fp, NULL)) != 0)
2036 goto out;
2037 so = sock_fp->f_data;
2038 if (so->so_type != SOCK_STREAM) {
2039 error = EINVAL;
2040 goto out;
2041 }
2042 if ((so->so_state & SS_ISCONNECTED) == 0) {
2043 error = ENOTCONN;
2044 goto out;
2045 }
2046 /*
2047 * Do not wait on memory allocations but return ENOMEM for
2048 * caller to retry later.
2049 * XXX: Experimental.
2050 */
2048 if (uap->flags & SF_MNOWAIT)
2051 if (flags & SF_MNOWAIT)
2049 mnw = 1;
2050
2052 mnw = 1;
2053
2051 if (uap->flags & SF_SYNC) {
2054 if (flags & SF_SYNC) {
2052 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
2053 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
2054 cv_init(&sfs->cv, "sendfile");
2055 }
2056
2057#ifdef MAC
2058 error = mac_socket_check_send(td->td_ucred, so);
2059 if (error)

--- 5 unchanged lines hidden (view full) ---

2065 hdr_uio->uio_td = td;
2066 hdr_uio->uio_rw = UIO_WRITE;
2067 if (hdr_uio->uio_resid > 0) {
2068 /*
2069 * In FBSD < 5.0 the nbytes to send also included
2070 * the header. If compat is specified subtract the
2071 * header size from nbytes.
2072 */
2055 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
2056 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
2057 cv_init(&sfs->cv, "sendfile");
2058 }
2059
2060#ifdef MAC
2061 error = mac_socket_check_send(td->td_ucred, so);
2062 if (error)

--- 5 unchanged lines hidden (view full) ---

2068 hdr_uio->uio_td = td;
2069 hdr_uio->uio_rw = UIO_WRITE;
2070 if (hdr_uio->uio_resid > 0) {
2071 /*
2072 * In FBSD < 5.0 the nbytes to send also included
2073 * the header. If compat is specified subtract the
2074 * header size from nbytes.
2075 */
2073 if (compat) {
2074 if (uap->nbytes > hdr_uio->uio_resid)
2075 uap->nbytes -= hdr_uio->uio_resid;
2076 if (kflags & SFK_COMPAT) {
2077 if (nbytes > hdr_uio->uio_resid)
2078 nbytes -= hdr_uio->uio_resid;
2076 else
2079 else
2077 uap->nbytes = 0;
2080 nbytes = 0;
2078 }
2079 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
2080 0, 0, 0);
2081 if (m == NULL) {
2082 error = mnw ? EAGAIN : ENOBUFS;
2083 goto out;
2084 }
2085 hdrlen = m_length(m, NULL);

--- 14 unchanged lines hidden (view full) ---

2100 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
2101 * it on the socket.
2102 * This is done in two loops. The inner loop turns as many pages
2103 * as it can, up to available socket buffer space, without blocking
2104 * into mbufs to have it bulk delivered into the socket send buffer.
2105 * The outer loop checks the state and available space of the socket
2106 * and takes care of the overall progress.
2107 */
2081 }
2082 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
2083 0, 0, 0);
2084 if (m == NULL) {
2085 error = mnw ? EAGAIN : ENOBUFS;
2086 goto out;
2087 }
2088 hdrlen = m_length(m, NULL);

--- 14 unchanged lines hidden (view full) ---

2103 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
2104 * it on the socket.
2105 * This is done in two loops. The inner loop turns as many pages
2106 * as it can, up to available socket buffer space, without blocking
2107 * into mbufs to have it bulk delivered into the socket send buffer.
2108 * The outer loop checks the state and available space of the socket
2109 * and takes care of the overall progress.
2110 */
2108 for (off = uap->offset; ; ) {
2111 for (off = offset; ; ) {
2109 struct mbuf *mtail;
2110 int loopbytes;
2111 int space;
2112 int done;
2113
2112 struct mbuf *mtail;
2113 int loopbytes;
2114 int space;
2115 int done;
2116
2114 if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
2115 (uap->nbytes == 0 && va.va_size == fsbytes))
2117 if ((nbytes != 0 && nbytes == fsbytes) ||
2118 (nbytes == 0 && va.va_size == fsbytes))
2116 break;
2117
2118 mtail = NULL;
2119 loopbytes = 0;
2120 space = 0;
2121 done = 0;
2122
2123 /*

--- 81 unchanged lines hidden (view full) ---

2205 struct mbuf *m0;
2206
2207 /*
2208 * Calculate the amount to transfer.
2209 * Not to exceed a page, the EOF,
2210 * or the passed in nbytes.
2211 */
2212 pgoff = (vm_offset_t)(off & PAGE_MASK);
2119 break;
2120
2121 mtail = NULL;
2122 loopbytes = 0;
2123 space = 0;
2124 done = 0;
2125
2126 /*

--- 81 unchanged lines hidden (view full) ---

2208 struct mbuf *m0;
2209
2210 /*
2211 * Calculate the amount to transfer.
2212 * Not to exceed a page, the EOF,
2213 * or the passed in nbytes.
2214 */
2215 pgoff = (vm_offset_t)(off & PAGE_MASK);
2213 if (uap->nbytes)
2214 rem = (uap->nbytes - fsbytes - loopbytes);
2216 if (nbytes)
2217 rem = (nbytes - fsbytes - loopbytes);
2215 else
2216 rem = va.va_size -
2218 else
2219 rem = va.va_size -
2217 uap->offset - fsbytes - loopbytes;
2220 offset - fsbytes - loopbytes;
2218 xfsize = omin(PAGE_SIZE - pgoff, rem);
2219 xfsize = omin(space - loopbytes, xfsize);
2220 if (xfsize <= 0) {
2221 done = 1; /* all data sent */
2222 break;
2223 }
2224
2225 /*

--- 11 unchanged lines hidden (view full) ---

2237 * If we already turned some pages into mbufs,
2238 * send them off before we come here again and
2239 * block.
2240 */
2241 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2242 VM_OBJECT_WUNLOCK(obj);
2243 else if (m != NULL)
2244 error = EAGAIN; /* send what we already got */
2221 xfsize = omin(PAGE_SIZE - pgoff, rem);
2222 xfsize = omin(space - loopbytes, xfsize);
2223 if (xfsize <= 0) {
2224 done = 1; /* all data sent */
2225 break;
2226 }
2227
2228 /*

--- 11 unchanged lines hidden (view full) ---

2240 * If we already turned some pages into mbufs,
2241 * send them off before we come here again and
2242 * block.
2243 */
2244 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
2245 VM_OBJECT_WUNLOCK(obj);
2246 else if (m != NULL)
2247 error = EAGAIN; /* send what we already got */
2245 else if (uap->flags & SF_NODISKIO)
2248 else if (flags & SF_NODISKIO)
2246 error = EBUSY;
2247 else {
2248 ssize_t resid;
2249 int readahead = sfreadahead * MAXBSIZE;
2250
2251 VM_OBJECT_WUNLOCK(obj);
2252
2253 /*

--- 40 unchanged lines hidden (view full) ---

2294 */
2295 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
2296 SFB_CATCH);
2297 if (sf == NULL) {
2298 SFSTAT_INC(sf_allocfail);
2299 vm_page_lock(pg);
2300 vm_page_unwire(pg, 0);
2301 KASSERT(pg->object != NULL,
2249 error = EBUSY;
2250 else {
2251 ssize_t resid;
2252 int readahead = sfreadahead * MAXBSIZE;
2253
2254 VM_OBJECT_WUNLOCK(obj);
2255
2256 /*

--- 40 unchanged lines hidden (view full) ---

2297 */
2298 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
2299 SFB_CATCH);
2300 if (sf == NULL) {
2301 SFSTAT_INC(sf_allocfail);
2302 vm_page_lock(pg);
2303 vm_page_unwire(pg, 0);
2304 KASSERT(pg->object != NULL,
2302 ("kern_sendfile: object disappeared"));
2305 ("%s: object disappeared", __func__));
2303 vm_page_unlock(pg);
2304 if (m == NULL)
2305 error = (mnw ? EAGAIN : EINTR);
2306 break;
2307 }
2308
2309 /*
2310 * Get an mbuf and set it up as having

--- 83 unchanged lines hidden (view full) ---

2394 goto done;
2395 }
2396
2397 /*
2398 * Send trailers. Wimp out and use writev(2).
2399 */
2400 if (trl_uio != NULL) {
2401 sbunlock(&so->so_snd);
2306 vm_page_unlock(pg);
2307 if (m == NULL)
2308 error = (mnw ? EAGAIN : EINTR);
2309 break;
2310 }
2311
2312 /*
2313 * Get an mbuf and set it up as having

--- 83 unchanged lines hidden (view full) ---

2397 goto done;
2398 }
2399
2400 /*
2401 * Send trailers. Wimp out and use writev(2).
2402 */
2403 if (trl_uio != NULL) {
2404 sbunlock(&so->so_snd);
2402 error = kern_writev(td, uap->s, trl_uio);
2405 error = kern_writev(td, sockfd, trl_uio);
2403 if (error == 0)
2404 sbytes += td->td_retval[0];
2405 goto out;
2406 }
2407
2408done:
2409 sbunlock(&so->so_snd);
2410out:
2411 /*
2412 * If there was no error we have to clear td->td_retval[0]
2413 * because it may have been set by writev.
2414 */
2415 if (error == 0) {
2416 td->td_retval[0] = 0;
2417 }
2406 if (error == 0)
2407 sbytes += td->td_retval[0];
2408 goto out;
2409 }
2410
2411done:
2412 sbunlock(&so->so_snd);
2413out:
2414 /*
2415 * If there was no error we have to clear td->td_retval[0]
2416 * because it may have been set by writev.
2417 */
2418 if (error == 0) {
2419 td->td_retval[0] = 0;
2420 }
2418 if (uap->sbytes != NULL) {
2419 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2421 if (sent != NULL) {
2422 copyout(&sbytes, sent, sizeof(off_t));
2420 }
2421 if (obj != NULL)
2422 vm_object_deallocate(obj);
2423 }
2424 if (obj != NULL)
2425 vm_object_deallocate(obj);
2423 if (vp != NULL)
2424 vrele(vp);
2425 if (so)
2426 fdrop(sock_fp, td);
2427 if (m)
2428 m_freem(m);
2429
2430 if (sfs != NULL) {
2431 mtx_lock(&sfs->mtx);
2432 if (sfs->count != 0)

--- 502 unchanged lines hidden ---
2426 if (so)
2427 fdrop(sock_fp, td);
2428 if (m)
2429 m_freem(m);
2430
2431 if (sfs != NULL) {
2432 mtx_lock(&sfs->mtx);
2433 if (sfs->count != 0)

--- 502 unchanged lines hidden ---