fetch.c revision 1.1 1 1.1 agc /*-
2 1.1 agc * Copyright (c) 1998-2004 Dag-Erling Codan Smrgrav
3 1.1 agc * All rights reserved.
4 1.1 agc *
5 1.1 agc * Redistribution and use in source and binary forms, with or without
6 1.1 agc * modification, are permitted provided that the following conditions
7 1.1 agc * are met:
8 1.1 agc * 1. Redistributions of source code must retain the above copyright
9 1.1 agc * notice, this list of conditions and the following disclaimer
10 1.1 agc * in this position and unchanged.
11 1.1 agc * 2. Redistributions in binary form must reproduce the above copyright
12 1.1 agc * notice, this list of conditions and the following disclaimer in the
13 1.1 agc * documentation and/or other materials provided with the distribution.
14 1.1 agc * 3. The name of the author may not be used to endorse or promote products
15 1.1 agc * derived from this software without specific prior written permission
16 1.1 agc *
17 1.1 agc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 1.1 agc * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 1.1 agc * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 1.1 agc * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 1.1 agc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 1.1 agc * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 1.1 agc * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 1.1 agc * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 1.1 agc * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 1.1 agc * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 1.1 agc */
28 1.1 agc
29 1.1 agc #include "free2net.h"
30 1.1 agc
31 1.1 agc #include <sys/cdefs.h>
32 1.1 agc __FBSDID("$FreeBSD: src/lib/libfetch/fetch.c,v 1.37.6.1 2006/11/11 00:16:07 des Exp $");
33 1.1 agc
34 1.1 agc #include <sys/param.h>
35 1.1 agc #include <sys/errno.h>
36 1.1 agc
37 1.1 agc #include <ctype.h>
38 1.1 agc #include <stdio.h>
39 1.1 agc #include <stdlib.h>
40 1.1 agc #include <string.h>
41 1.1 agc
42 1.1 agc #include "fetch.h"
43 1.1 agc #include "common.h"
44 1.1 agc
45 1.1 agc auth_t fetchAuthMethod;
46 1.1 agc int fetchLastErrCode;
47 1.1 agc char fetchLastErrString[MAXERRSTRING];
48 1.1 agc int fetchTimeout;
49 1.1 agc int fetchRestartCalls = 1;
50 1.1 agc int fetchDebug;
51 1.1 agc
52 1.1 agc
53 1.1 agc /*** Local data **************************************************************/
54 1.1 agc
55 1.1 agc /*
56 1.1 agc * Error messages for parser errors
57 1.1 agc */
58 1.1 agc #define URL_MALFORMED 1
59 1.1 agc #define URL_BAD_SCHEME 2
60 1.1 agc #define URL_BAD_PORT 3
61 1.1 agc static struct fetcherr _url_errlist[] = {
62 1.1 agc { URL_MALFORMED, FETCH_URL, "Malformed URL" },
63 1.1 agc { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
64 1.1 agc { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
65 1.1 agc { -1, FETCH_UNKNOWN, "Unknown parser error" }
66 1.1 agc };
67 1.1 agc
68 1.1 agc
69 1.1 agc /*** Public API **************************************************************/
70 1.1 agc
71 1.1 agc /*
72 1.1 agc * Select the appropriate protocol for the URL scheme, and return a
73 1.1 agc * read-only stream connected to the document referenced by the URL.
74 1.1 agc * Also fill out the struct url_stat.
75 1.1 agc */
76 1.1 agc FILE *
77 1.1 agc fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
78 1.1 agc {
79 1.1 agc if (us != NULL) {
80 1.1 agc us->size = -1;
81 1.1 agc us->atime = us->mtime = 0;
82 1.1 agc }
83 1.1 agc if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
84 1.1 agc return (fetchXGetFile(URL, us, flags));
85 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
86 1.1 agc return (fetchXGetFTP(URL, us, flags));
87 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
88 1.1 agc return (fetchXGetHTTP(URL, us, flags));
89 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
90 1.1 agc return (fetchXGetHTTP(URL, us, flags));
91 1.1 agc _url_seterr(URL_BAD_SCHEME);
92 1.1 agc return (NULL);
93 1.1 agc }
94 1.1 agc
95 1.1 agc /*
96 1.1 agc * Select the appropriate protocol for the URL scheme, and return a
97 1.1 agc * read-only stream connected to the document referenced by the URL.
98 1.1 agc */
99 1.1 agc FILE *
100 1.1 agc fetchGet(struct url *URL, const char *flags)
101 1.1 agc {
102 1.1 agc return (fetchXGet(URL, NULL, flags));
103 1.1 agc }
104 1.1 agc
105 1.1 agc /*
106 1.1 agc * Select the appropriate protocol for the URL scheme, and return a
107 1.1 agc * write-only stream connected to the document referenced by the URL.
108 1.1 agc */
109 1.1 agc FILE *
110 1.1 agc fetchPut(struct url *URL, const char *flags)
111 1.1 agc {
112 1.1 agc if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
113 1.1 agc return (fetchPutFile(URL, flags));
114 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
115 1.1 agc return (fetchPutFTP(URL, flags));
116 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
117 1.1 agc return (fetchPutHTTP(URL, flags));
118 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
119 1.1 agc return (fetchPutHTTP(URL, flags));
120 1.1 agc _url_seterr(URL_BAD_SCHEME);
121 1.1 agc return (NULL);
122 1.1 agc }
123 1.1 agc
124 1.1 agc /*
125 1.1 agc * Select the appropriate protocol for the URL scheme, and return the
126 1.1 agc * size of the document referenced by the URL if it exists.
127 1.1 agc */
128 1.1 agc int
129 1.1 agc fetchStat(struct url *URL, struct url_stat *us, const char *flags)
130 1.1 agc {
131 1.1 agc if (us != NULL) {
132 1.1 agc us->size = -1;
133 1.1 agc us->atime = us->mtime = 0;
134 1.1 agc }
135 1.1 agc if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
136 1.1 agc return (fetchStatFile(URL, us, flags));
137 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
138 1.1 agc return (fetchStatFTP(URL, us, flags));
139 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
140 1.1 agc return (fetchStatHTTP(URL, us, flags));
141 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
142 1.1 agc return (fetchStatHTTP(URL, us, flags));
143 1.1 agc _url_seterr(URL_BAD_SCHEME);
144 1.1 agc return (-1);
145 1.1 agc }
146 1.1 agc
147 1.1 agc /*
148 1.1 agc * Select the appropriate protocol for the URL scheme, and return a
149 1.1 agc * list of files in the directory pointed to by the URL.
150 1.1 agc */
151 1.1 agc struct url_ent *
152 1.1 agc fetchList(struct url *URL, const char *flags)
153 1.1 agc {
154 1.1 agc if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
155 1.1 agc return (fetchListFile(URL, flags));
156 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
157 1.1 agc return (fetchListFTP(URL, flags));
158 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
159 1.1 agc return (fetchListHTTP(URL, flags));
160 1.1 agc else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
161 1.1 agc return (fetchListHTTP(URL, flags));
162 1.1 agc _url_seterr(URL_BAD_SCHEME);
163 1.1 agc return (NULL);
164 1.1 agc }
165 1.1 agc
166 1.1 agc /*
167 1.1 agc * Attempt to parse the given URL; if successful, call fetchXGet().
168 1.1 agc */
169 1.1 agc FILE *
170 1.1 agc fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
171 1.1 agc {
172 1.1 agc struct url *u;
173 1.1 agc FILE *f;
174 1.1 agc
175 1.1 agc if ((u = fetchParseURL(URL)) == NULL)
176 1.1 agc return (NULL);
177 1.1 agc
178 1.1 agc f = fetchXGet(u, us, flags);
179 1.1 agc
180 1.1 agc fetchFreeURL(u);
181 1.1 agc return (f);
182 1.1 agc }
183 1.1 agc
184 1.1 agc /*
185 1.1 agc * Attempt to parse the given URL; if successful, call fetchGet().
186 1.1 agc */
187 1.1 agc FILE *
188 1.1 agc fetchGetURL(const char *URL, const char *flags)
189 1.1 agc {
190 1.1 agc return (fetchXGetURL(URL, NULL, flags));
191 1.1 agc }
192 1.1 agc
193 1.1 agc /*
194 1.1 agc * Attempt to parse the given URL; if successful, call fetchPut().
195 1.1 agc */
196 1.1 agc FILE *
197 1.1 agc fetchPutURL(const char *URL, const char *flags)
198 1.1 agc {
199 1.1 agc struct url *u;
200 1.1 agc FILE *f;
201 1.1 agc
202 1.1 agc if ((u = fetchParseURL(URL)) == NULL)
203 1.1 agc return (NULL);
204 1.1 agc
205 1.1 agc f = fetchPut(u, flags);
206 1.1 agc
207 1.1 agc fetchFreeURL(u);
208 1.1 agc return (f);
209 1.1 agc }
210 1.1 agc
211 1.1 agc /*
212 1.1 agc * Attempt to parse the given URL; if successful, call fetchStat().
213 1.1 agc */
214 1.1 agc int
215 1.1 agc fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
216 1.1 agc {
217 1.1 agc struct url *u;
218 1.1 agc int s;
219 1.1 agc
220 1.1 agc if ((u = fetchParseURL(URL)) == NULL)
221 1.1 agc return (-1);
222 1.1 agc
223 1.1 agc s = fetchStat(u, us, flags);
224 1.1 agc
225 1.1 agc fetchFreeURL(u);
226 1.1 agc return (s);
227 1.1 agc }
228 1.1 agc
229 1.1 agc /*
230 1.1 agc * Attempt to parse the given URL; if successful, call fetchList().
231 1.1 agc */
232 1.1 agc struct url_ent *
233 1.1 agc fetchListURL(const char *URL, const char *flags)
234 1.1 agc {
235 1.1 agc struct url *u;
236 1.1 agc struct url_ent *ue;
237 1.1 agc
238 1.1 agc if ((u = fetchParseURL(URL)) == NULL)
239 1.1 agc return (NULL);
240 1.1 agc
241 1.1 agc ue = fetchList(u, flags);
242 1.1 agc
243 1.1 agc fetchFreeURL(u);
244 1.1 agc return (ue);
245 1.1 agc }
246 1.1 agc
247 1.1 agc /*
248 1.1 agc * Make a URL
249 1.1 agc */
250 1.1 agc struct url *
251 1.1 agc fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
252 1.1 agc const char *user, const char *pwd)
253 1.1 agc {
254 1.1 agc struct url *u;
255 1.1 agc
256 1.1 agc if (!scheme || (!host && !doc)) {
257 1.1 agc _url_seterr(URL_MALFORMED);
258 1.1 agc return (NULL);
259 1.1 agc }
260 1.1 agc
261 1.1 agc if (port < 0 || port > 65535) {
262 1.1 agc _url_seterr(URL_BAD_PORT);
263 1.1 agc return (NULL);
264 1.1 agc }
265 1.1 agc
266 1.1 agc /* allocate struct url */
267 1.1 agc if ((u = calloc(1, sizeof(*u))) == NULL) {
268 1.1 agc _fetch_syserr();
269 1.1 agc return (NULL);
270 1.1 agc }
271 1.1 agc
272 1.1 agc if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
273 1.1 agc _fetch_syserr();
274 1.1 agc free(u);
275 1.1 agc return (NULL);
276 1.1 agc }
277 1.1 agc
278 1.1 agc #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
279 1.1 agc seturl(scheme);
280 1.1 agc seturl(host);
281 1.1 agc seturl(user);
282 1.1 agc seturl(pwd);
283 1.1 agc #undef seturl
284 1.1 agc u->port = port;
285 1.1 agc
286 1.1 agc return (u);
287 1.1 agc }
288 1.1 agc
289 1.1 agc /*
290 1.1 agc * Split an URL into components. URL syntax is:
291 1.1 agc * [method:/][/[user[:pwd]@]host[:port]/][document]
292 1.1 agc * This almost, but not quite, RFC1738 URL syntax.
293 1.1 agc */
294 1.1 agc struct url *
295 1.1 agc fetchParseURL(const char *URL)
296 1.1 agc {
297 1.1 agc char *doc;
298 1.1 agc const char *p, *q;
299 1.1 agc struct url *u;
300 1.1 agc int i;
301 1.1 agc
302 1.1 agc /* allocate struct url */
303 1.1 agc if ((u = calloc(1, sizeof(*u))) == NULL) {
304 1.1 agc _fetch_syserr();
305 1.1 agc return (NULL);
306 1.1 agc }
307 1.1 agc
308 1.1 agc /* scheme name */
309 1.1 agc if ((p = strstr(URL, ":/")) != NULL) {
310 1.1 agc snprintf(u->scheme, URL_SCHEMELEN+1,
311 1.1 agc "%.*s", (int)(p - URL), URL);
312 1.1 agc URL = ++p;
313 1.1 agc /*
314 1.1 agc * Only one slash: no host, leave slash as part of document
315 1.1 agc * Two slashes: host follows, strip slashes
316 1.1 agc */
317 1.1 agc if (URL[1] == '/')
318 1.1 agc URL = (p += 2);
319 1.1 agc } else {
320 1.1 agc p = URL;
321 1.1 agc }
322 1.1 agc if (!*URL || *URL == '/' || *URL == '.' ||
323 1.1 agc (u->scheme[0] == '\0' &&
324 1.1 agc strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
325 1.1 agc goto nohost;
326 1.1 agc
327 1.1 agc p = strpbrk(URL, "/@");
328 1.1 agc if (p && *p == '@') {
329 1.1 agc /* username */
330 1.1 agc for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
331 1.1 agc if (i < URL_USERLEN)
332 1.1 agc u->user[i++] = *q;
333 1.1 agc
334 1.1 agc /* password */
335 1.1 agc if (*q == ':')
336 1.1 agc for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
337 1.1 agc if (i < URL_PWDLEN)
338 1.1 agc u->pwd[i++] = *q;
339 1.1 agc
340 1.1 agc p++;
341 1.1 agc } else {
342 1.1 agc p = URL;
343 1.1 agc }
344 1.1 agc
345 1.1 agc /* hostname */
346 1.1 agc #ifdef INET6
347 1.1 agc if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
348 1.1 agc (*++q == '\0' || *q == '/' || *q == ':')) {
349 1.1 agc if ((i = q - p - 2) > MAXHOSTNAMELEN)
350 1.1 agc i = MAXHOSTNAMELEN;
351 1.1 agc strncpy(u->host, ++p, i);
352 1.1 agc p = q;
353 1.1 agc } else
354 1.1 agc #endif
355 1.1 agc for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
356 1.1 agc if (i < MAXHOSTNAMELEN)
357 1.1 agc u->host[i++] = *p;
358 1.1 agc
359 1.1 agc /* port */
360 1.1 agc if (*p == ':') {
361 1.1 agc for (q = ++p; *q && (*q != '/'); q++)
362 1.1 agc if (isdigit((unsigned)*q))
363 1.1 agc u->port = u->port * 10 + (*q - '0');
364 1.1 agc else {
365 1.1 agc /* invalid port */
366 1.1 agc _url_seterr(URL_BAD_PORT);
367 1.1 agc goto ouch;
368 1.1 agc }
369 1.1 agc p = q;
370 1.1 agc }
371 1.1 agc
372 1.1 agc nohost:
373 1.1 agc /* document */
374 1.1 agc if (!*p)
375 1.1 agc p = "/";
376 1.1 agc
377 1.1 agc if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
378 1.1 agc strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
379 1.1 agc const char hexnums[] = "0123456789abcdef";
380 1.1 agc
381 1.1 agc /* percent-escape whitespace. */
382 1.1 agc if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
383 1.1 agc _fetch_syserr();
384 1.1 agc goto ouch;
385 1.1 agc }
386 1.1 agc u->doc = doc;
387 1.1 agc while (*p != '\0') {
388 1.1 agc if (!isspace((unsigned)*p)) {
389 1.1 agc *doc++ = *p++;
390 1.1 agc } else {
391 1.1 agc *doc++ = '%';
392 1.1 agc *doc++ = hexnums[((unsigned int)*p) >> 4];
393 1.1 agc *doc++ = hexnums[((unsigned int)*p) & 0xf];
394 1.1 agc p++;
395 1.1 agc }
396 1.1 agc }
397 1.1 agc *doc = '\0';
398 1.1 agc } else if ((u->doc = strdup(p)) == NULL) {
399 1.1 agc _fetch_syserr();
400 1.1 agc goto ouch;
401 1.1 agc }
402 1.1 agc
403 1.1 agc DEBUG(fprintf(stderr,
404 1.1 agc "scheme: [%s]\n"
405 1.1 agc "user: [%s]\n"
406 1.1 agc "password: [%s]\n"
407 1.1 agc "host: [%s]\n"
408 1.1 agc "port: [%d]\n"
409 1.1 agc "document: [%s]\n",
410 1.1 agc u->scheme, u->user, u->pwd,
411 1.1 agc u->host, u->port, u->doc));
412 1.1 agc
413 1.1 agc return (u);
414 1.1 agc
415 1.1 agc ouch:
416 1.1 agc free(u);
417 1.1 agc return (NULL);
418 1.1 agc }
419 1.1 agc
420 1.1 agc /*
421 1.1 agc * Free a URL
422 1.1 agc */
423 1.1 agc void
424 1.1 agc fetchFreeURL(struct url *u)
425 1.1 agc {
426 1.1 agc free(u->doc);
427 1.1 agc free(u);
428 1.1 agc }
429