xref: /freebsd/lib/libfetch/fetch.3 (revision a85978584cc37b468a8f24e79fd1bd5bc0edf478)
1.\" Copyright (c) 1998 Dag-Erling Co�dan Sm�rgrav
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd July 1, 1998
28.Dt FETCH 3
29.Os
30.Sh NAME
31.Nm fetchMakeURL ,
32.Nm fetchParseURL ,
33.Nm fetchFreeURL ,
34.Nm fetchGetURL ,
35.Nm fetchPutURL ,
36.Nm fetchStatURL ,
37.Nm fetchListURL ,
38.Nm fetchGet ,
39.Nm fetchPut ,
40.Nm fetchStat ,
41.Nm fetchList ,
42.Nm fetchGetFile ,
43.Nm fetchPutFile ,
44.Nm fetchStatFile ,
45.Nm fetchListFile ,
46.Nm fetchGetHTTP ,
47.Nm fetchPutHTTP ,
48.Nm fetchStatHTTP ,
49.Nm fetchListHTTP ,
50.Nm fetchGetFTP ,
51.Nm fetchPutFTP ,
52.Nm fetchStatFTP ,
53.Nm fetchListFTP
54.Nd file transfer functions
55.Sh LIBRARY
56.Lb libfetch
57.Sh SYNOPSIS
58.Fd #include <sys/param.h>
59.Fd #include <stdio.h>
60.Fd #include <fetch.h>
61.Ft struct url *
62.Fn fetchMakeURL "char *scheme" "char *host" "int port" "char *doc" "char *user" "char *pwd"
63.Ft struct url *
64.Fn fetchParseURL "char *URL"
65.Ft void
66.Fn fetchFreeURL "struct url *URL"
67.Ft FILE *
68.Fn fetchGetURL "char *URL" "char *flags"
69.Ft FILE *
70.Fn fetchPutURL "char *URL" "char *flags"
71.Ft int
72.Fn fetchStatURL "char *URL" "struct url_stat *us" "char *flags"
73.Ft struct url_ent *
74.Fn fetchListURL "char *URL" "char *flags"
75.Ft FILE *
76.Fn fetchGet "struct url *URL" "char *flags"
77.Ft FILE *
78.Fn fetchPut "struct url *URL" "char *flags"
79.Ft int
80.Fn fetchStat "struct url *URL" "struct url_stat *us" "char *flags"
81.Ft struct url_ent *
82.Fn fetchList "struct url *" "char *flags"
83.Ft FILE *
84.Fn fetchGetFile "struct url *u" "char *flags"
85.Ft FILE *
86.Fn fetchPutFile "struct url *u" "char *flags"
87.Ft int
88.Fn fetchStatFile "struct url *URL" "struct url_stat *us" "char *flags"
89.Ft struct url_ent *
90.Fn fetchListFile "struct url *" "char *flags"
91.Ft FILE *
92.Fn fetchGetHTTP "struct url *u" "char *flags"
93.Ft FILE *
94.Fn fetchPutHTTP "struct url *u" "char *flags"
95.Ft int
96.Fn fetchStatHTTP "struct url *URL" "struct url_stat *us" "char *flags"
97.Ft struct url_ent *
98.Fn fetchListHTTP "struct url *" "char *flags"
99.Ft FILE *
100.Fn fetchGetFTP "struct url *u" "char *flags"
101.Ft FILE *
102.Fn fetchPutFTP "struct url *u" "char *flags"
103.Ft int
104.Fn fetchStatFTP "struct url *URL" "struct url_stat *us" "char *flags"
105.Ft struct url_ent *
106.Fn fetchListFTP "struct url *" "char *flags"
107.Sh DESCRIPTION
108.Pp
109These functions implement a high-level library for retrieving and
110uploading files using Uniform Resource Locators (URLs).
111.Pp
112.Fn fetchParseURL
113takes a URL in the form of a null-terminated string and splits it into
114its components function according to the Common Internet Scheme Syntax
115detailed in RFC1738.
116A regular expression which produces this syntax is:
117.Bd -literal
118    <scheme>:(//(<user>(:<pwd>)?@)?<host>(:<port>)?)?/(<document>)?
119.Ed
120.Pp
121Note that some components of the URL are not necessarily relevant to
122all URL schemes.
123For instance, the file scheme only needs the <scheme>
124and <document> components.
125.Pp
126.Fn fetchMakeURL
127and
128.Fn fetchParseURL
129return a pointer to a
130.Fa url
131structure, which is defined as follows in
132.Aq Pa fetch.h :
133.Bd -literal
134#define URL_SCHEMELEN 16
135#define URL_USERLEN 256
136#define URL_PWDLEN 256
137
138struct url {
139    char	 scheme[URL_SCHEMELEN+1];
140    char	 user[URL_USERLEN+1];
141    char	 pwd[URL_PWDLEN+1];
142    char	 host[MAXHOSTNAMELEN+1];
143    int		 port;
144    char	*doc;
145    off_t	 offset;
146    size_t	 length;
147};
148.Ed
149.Pp
150The pointer returned by
151.Fn fetchMakeURL
152or
153.Fn fetchParseURL
154should be freed using
155.Fn fetchFreeURL .
156.Pp
157.Fn fetchGetURL
158and
159.Fn fetchPutURL
160constitute the recommended interface to the
161.Nm fetch
162library.
163They examine the URL passed to them to determine the transfer
164method, and call the appropriate lower-level functions to perform the
165actual transfer.
166The
167.Fa flags
168argument is a string of characters which specify transfer options.
169The
170meaning of the individual flags is scheme-dependent, and is detailed
171in the appropriate section below.
172.Pp
173.Fn fetchStatURL
174attempts to obtain the requested document's metadata and fill in the
175structure pointed to by it's second argument.
176The
177.Fa url_stat
178structure is defined as follows in
179.Aq Pa fetch.h :
180.Bd -literal
181struct url_stat {
182    off_t	 size;
183    time_t	 atime;
184    time_t	 mtime;
185};
186.Ed
187.Pp
188If the size could not be obtained from the server, the
189.Fa size
190field is set to -1.
191If the modification time could not be obtained from the server, the
192.Fa mtime
193field is set to the epoch.
194If the access time could not be obtained from the server, the
195.Fa atime
196field is set to the modification time.
197.Pp
198.Fn fetchListURL
199attempts to list the contents of the directory pointed to by the URL
200provided.
201If successful, it returns a malloced array of
202.Fa url_ent
203structures.
204The
205.Fa url_ent
206structure is defined as follows in
207.Aq Pa fetch.h :
208.Bd -literal
209struct url_ent {
210    char         name[MAXPATHLEN];
211    struct url_stat stat;
212};
213.Ed
214.Pp
215The list is terminated by an entry with an empty name.
216.Pp
217The pointer returned by
218.Fn fetchListURL
219should be freed using
220.Fn free .
221.Pp
222.Fn fetchGet ,
223.Fn fetchPut
224and
225.Fn fetchStat
226are similar to
227.Fn fetchGetURL ,
228.Fn fetchPutURL
229and
230.Fn fetchStatURL ,
231except that they expect a pre-parsed URL in the form of a pointer to
232a
233.Fa struct url
234rather than a string.
235.Pp
236All of the
237.Fn fetchGetXXX
238and
239.Fn fetchPutXXX
240functions return a pointer to a stream which can be used to read or
241write data from or to the requested document, respectively.
242Note that
243although the implementation details of the individual access methods
244vary, it can generally be assumed that a stream returned by one of the
245.Fn fetchGetXXX
246functions is read-only, and that a stream returned by one of the
247.Fn fetchPutXXX
248functions is write-only.
249.Sh FILE SCHEME
250.Fn fetchGetFile
251and
252.Fn fetchPutFile
253provide access to documents which are files in a locally mounted file
254system.
255Only the <document> component of the URL is used.
256.Pp
257.Fn fetchGetFile
258does not accept any flags.
259.Pp
260.Fn fetchPutFile
261accepts the
262.Fa a
263(append to file) flag.
264If that flag is specified, the data written to
265the stream returned by
266.Fn fetchPutFile
267will be appended to the previous contents of the file, instead of
268replacing them.
269.Sh FTP SCHEME
270.Fn fetchGetFTP
271and
272.Fn fetchPutFTP
273implement the FTP protocol as described in RFC959.
274.Pp
275If the
276.Fa p
277(passive) flag is specified, a passive (rather than active) connection
278will be attempted.
279.Pp
280If the
281.Fa h
282(high) flag is specified, data sockets will be allocated in the high
283port range (see
284.Xr ip 4 ).
285.Pp
286If the
287.Fa d
288(direct) flag is specified,
289.Fn fetchGetFTP
290and
291.Fn fetchPutFTP
292will use a direct connection even if a proxy server is defined.
293.Pp
294If no user name or password is given, the
295.Nm fetch
296library will attempt an anonymous login, with user name "ftp" and
297password "ftp".
298.Sh HTTP SCHEME
299The
300.Fn fetchGetHTTP
301and
302.Fn fetchPutHTTP
303functions implement the HTTP/1.1 protocol.
304With a little luck, there's
305even a chance that they comply with RFC2068.
306.Pp
307If the
308.Fa d
309(direct) flag is specified,
310.Fn fetchGetHTTP
311and
312.Fn fetchPutHTTP
313will use a direct connection even if a proxy server is defined.
314.Pp
315Since there seems to be no good way of implementing the HTTP PUT
316method in a manner consistent with the rest of the
317.Nm fetch
318library,
319.Fn fetchPutHTTP
320is currently unimplemented.
321.Sh RETURN VALUES
322.Fn fetchParseURL
323returns a pointer to a
324.Fa struct url
325containing the individual components of the URL.
326If it is
327unable to allocate memory, or the URL is syntactically incorrect,
328.Fn fetchParseURL
329returns a NULL pointer.
330.Pp
331The
332.Fn fetchStat
333functions return 0 on success and -1 on failure.
334.Pp
335All other functions return a stream pointer which may be used to
336access the requested document, or NULL if an error occurred.
337.Pp
338The following error codes are defined in
339.Aq Pa fetch.h :
340.Bl -tag -width 18n
341.It Bq Er FETCH_ABORT
342Operation aborted
343.It Bq Er FETCH_AUTH
344Authentication failed
345.It Bq Er FETCH_DOWN
346Service unavailable
347.It Bq Er FETCH_EXISTS
348File exists
349.It Bq Er FETCH_FULL
350File system full
351.It Bq Er FETCH_INFO
352Informational response
353.It Bq Er FETCH_MEMORY
354Insufficient memory
355.It Bq Er FETCH_MOVED
356File has moved
357.It Bq Er FETCH_NETWORK
358Network error
359.It Bq Er FETCH_OK
360No error
361.It Bq Er FETCH_PROTO
362Protocol error
363.It Bq Er FETCH_RESOLV
364Resolver error
365.It Bq Er FETCH_SERVER
366Server error
367.It Bq Er FETCH_TEMP
368Temporary error
369.It Bq Er FETCH_TIMEOUT
370Operation timed out
371.It Bq Er FETCH_UNAVAIL
372File is not available
373.It Bq Er FETCH_UNKNOWN
374Unknown error
375.It Bq Er FETCH_URL
376Invalid URL
377.El
378.Pp
379The accompanying error message includes a protocol-specific error code
380and message, e.g. "File is not available (404 Not Found)"
381.Sh ENVIRONMENT
382.Bl -tag -width HTTP_PROXY_AUTH
383.It Ev FTP_PROXY
384host name of the FTP proxy to use, optionally followed by a port
385number separated from the host name by a colon.
386.It Ev HTTP_AUTH
387Specifies HTTP authorization parameters, used only if the server
388requires authorization and no user name or password was specified in
389the URL.
390The first and second item are the authorization scheme and realm
391respectively; further items are scheme-dependent.
392Currently, only basic authorization is supported.
393Basic authorization requires two parameters: the user name and
394password, in that order.
395.It Ev HTTP_PROXY
396host name of the HTTP proxy to use, optionally followed by a port
397number separated from the host name by a colon.
398If no port number is specified, the default is 3128.
399.It Ev HTTP_PROXY_AUTH
400Specifies authorization parameters for the HTTP proxy in the same
401format as
402.Ev HTTP_AUTH .
403The value of this variable is used if and only if connected to an HTTP
404proxy.
405.El
406.Sh SEE ALSO
407.Xr fetch 1 ,
408.Xr ftpio 3 ,
409.Xr ip 4 .
410.Rs
411.%A T. Berners-Lee
412.%A L. Masinter
413.%A M. McCahill
414.%D December 1994
415.%T Uniform Resource Locators (URL)
416.%O RFC1738
417.Re
418.Rs
419.%A R. Fielding
420.%A J. Gettys
421.%A J. Mogul
422.%A H. Frystyk
423.%A T. Berners-Lee
424.%D Januray 1997
425.%B Hypertext Transfer Protocol -- HTTP/1.1
426.%O RFC2068
427.Re
428.Rs
429.%A J. Postel
430.%A J. K. Reynolds
431.%D October 1985
432.%B File Transfer Protocol
433.%O RFC959
434.Re
435.Sh HISTORY
436The
437.Nm fetch
438library first appeared in
439.Fx 3.0 .
440.Sh AUTHORS
441The
442.Nm fetch
443library was mostly written by
444.An Dag-Erling Co�dan Sm�rgrav Aq des@FreeBSD.org
445with numerous suggestions from
446.An Jordan K. Hubbard Aq jkh@FreeBSD.org ,
447.An Eugene Skepner Aq eu@qub.com
448and other FreeBSD developers.
449It replaces the older
450.Nm ftpio
451library written by
452.An Poul-Henning Kamp Aq pkh@FreeBSD.org
453and
454.An Jordan K. Hubbard Aq jkh@FreeBSD.org .
455.Pp
456This manual page was written by
457.An Dag-Erling Co�dan Sm�rgrav Aq des@FreeBSD.org
458.Sh BUGS
459Some parts of the library are not yet implemented.
460The most notable
461examples of this are
462.Fn fetchPutHTTP ,
463.Fn fetchListHTTP ,
464.Fn fetchListFTP
465and FTP proxy support.
466.Pp
467There's no way to select a proxy at run-time other than setting the
468.Ev HTTP_PROXY
469or
470.Ev FTP_PROXY
471environment variables as appropriate.
472.Pp
473.Nm libfetch
474does not attempt to interpret and respond to authentication requests
475from the HTTP server or proxy (code 401 and 407 respectively).
476.Pp
477.Nm libfetch
478does not understand or obey 305 (Use Proxy) replies.
479.Pp
480No attempt is made to encode spaces etc. within URLs.
481Spaces in the
482document part of an URLshould be replaced with "%20" in HTTP URLs and
483"\\ " in FTP URLs.
484.Pp
485Error numbers are unique only within a certain context; the error
486codes used for FTP and HTTP overlap, as do those used for resolver and
487system errors.
488For instance, error code 202 means "Command not
489implemented, superfluous at this site" in an FTP context and
490"Accepted" in an HTTP context.
491.Pp
492.Fn fetchStatFTP
493does not check that the result of an MDTM command is a valid date.
494.Pp
495The HTTP code needs a complete rewrite, or at least a serious cleanup.
496.Pp
497The man page is poorly written and produces badly formatted text.
498.Pp
499The error reporting mechanism is unsatisfactory.
500.Pp
501Some parts of the code are not fully reentrant.
502.Pp
503Tons of other stuff.
504