Home | History | Annotate | Line # | Download | only in libfetch
      1  1.1  agc /*-
      2  1.1  agc  * Copyright (c) 1998-2004 Dag-Erling Codan Smrgrav
      3  1.1  agc  * All rights reserved.
      4  1.1  agc  *
      5  1.1  agc  * Redistribution and use in source and binary forms, with or without
      6  1.1  agc  * modification, are permitted provided that the following conditions
      7  1.1  agc  * are met:
      8  1.1  agc  * 1. Redistributions of source code must retain the above copyright
      9  1.1  agc  *    notice, this list of conditions and the following disclaimer
     10  1.1  agc  *    in this position and unchanged.
     11  1.1  agc  * 2. Redistributions in binary form must reproduce the above copyright
     12  1.1  agc  *    notice, this list of conditions and the following disclaimer in the
     13  1.1  agc  *    documentation and/or other materials provided with the distribution.
     14  1.1  agc  * 3. The name of the author may not be used to endorse or promote products
     15  1.1  agc  *    derived from this software without specific prior written permission
     16  1.1  agc  *
     17  1.1  agc  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  1.1  agc  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  1.1  agc  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  1.1  agc  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  1.1  agc  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  1.1  agc  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  1.1  agc  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  1.1  agc  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  1.1  agc  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  1.1  agc  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  1.1  agc  */
     28  1.1  agc 
     29  1.1  agc #include "free2net.h"
     30  1.1  agc 
     31  1.1  agc #include <sys/cdefs.h>
     32  1.1  agc __FBSDID("$FreeBSD: src/lib/libfetch/fetch.c,v 1.37.6.1 2006/11/11 00:16:07 des Exp $");
     33  1.1  agc 
     34  1.1  agc #include <sys/param.h>
     35  1.1  agc #include <sys/errno.h>
     36  1.1  agc 
     37  1.1  agc #include <ctype.h>
     38  1.1  agc #include <stdio.h>
     39  1.1  agc #include <stdlib.h>
     40  1.1  agc #include <string.h>
     41  1.1  agc 
     42  1.1  agc #include "fetch.h"
     43  1.1  agc #include "common.h"
     44  1.1  agc 
     45  1.1  agc auth_t	 fetchAuthMethod;
     46  1.1  agc int	 fetchLastErrCode;
     47  1.1  agc char	 fetchLastErrString[MAXERRSTRING];
     48  1.1  agc int	 fetchTimeout;
     49  1.1  agc int	 fetchRestartCalls = 1;
     50  1.1  agc int	 fetchDebug;
     51  1.1  agc 
     52  1.1  agc 
     53  1.1  agc /*** Local data **************************************************************/
     54  1.1  agc 
     55  1.1  agc /*
     56  1.1  agc  * Error messages for parser errors
     57  1.1  agc  */
     58  1.1  agc #define URL_MALFORMED		1
     59  1.1  agc #define URL_BAD_SCHEME		2
     60  1.1  agc #define URL_BAD_PORT		3
     61  1.1  agc static struct fetcherr _url_errlist[] = {
     62  1.1  agc 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
     63  1.1  agc 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
     64  1.1  agc 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
     65  1.1  agc 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
     66  1.1  agc };
     67  1.1  agc 
     68  1.1  agc 
     69  1.1  agc /*** Public API **************************************************************/
     70  1.1  agc 
     71  1.1  agc /*
     72  1.1  agc  * Select the appropriate protocol for the URL scheme, and return a
     73  1.1  agc  * read-only stream connected to the document referenced by the URL.
     74  1.1  agc  * Also fill out the struct url_stat.
     75  1.1  agc  */
     76  1.1  agc FILE *
     77  1.1  agc fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
     78  1.1  agc {
     79  1.1  agc 	if (us != NULL) {
     80  1.1  agc 		us->size = -1;
     81  1.1  agc 		us->atime = us->mtime = 0;
     82  1.1  agc 	}
     83  1.1  agc 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
     84  1.1  agc 		return (fetchXGetFile(URL, us, flags));
     85  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
     86  1.1  agc 		return (fetchXGetFTP(URL, us, flags));
     87  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
     88  1.1  agc 		return (fetchXGetHTTP(URL, us, flags));
     89  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
     90  1.1  agc 		return (fetchXGetHTTP(URL, us, flags));
     91  1.1  agc 	_url_seterr(URL_BAD_SCHEME);
     92  1.1  agc 	return (NULL);
     93  1.1  agc }
     94  1.1  agc 
     95  1.1  agc /*
     96  1.1  agc  * Select the appropriate protocol for the URL scheme, and return a
     97  1.1  agc  * read-only stream connected to the document referenced by the URL.
     98  1.1  agc  */
     99  1.1  agc FILE *
    100  1.1  agc fetchGet(struct url *URL, const char *flags)
    101  1.1  agc {
    102  1.1  agc 	return (fetchXGet(URL, NULL, flags));
    103  1.1  agc }
    104  1.1  agc 
    105  1.1  agc /*
    106  1.1  agc  * Select the appropriate protocol for the URL scheme, and return a
    107  1.1  agc  * write-only stream connected to the document referenced by the URL.
    108  1.1  agc  */
    109  1.1  agc FILE *
    110  1.1  agc fetchPut(struct url *URL, const char *flags)
    111  1.1  agc {
    112  1.1  agc 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
    113  1.1  agc 		return (fetchPutFile(URL, flags));
    114  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
    115  1.1  agc 		return (fetchPutFTP(URL, flags));
    116  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
    117  1.1  agc 		return (fetchPutHTTP(URL, flags));
    118  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
    119  1.1  agc 		return (fetchPutHTTP(URL, flags));
    120  1.1  agc 	_url_seterr(URL_BAD_SCHEME);
    121  1.1  agc 	return (NULL);
    122  1.1  agc }
    123  1.1  agc 
    124  1.1  agc /*
    125  1.1  agc  * Select the appropriate protocol for the URL scheme, and return the
    126  1.1  agc  * size of the document referenced by the URL if it exists.
    127  1.1  agc  */
    128  1.1  agc int
    129  1.1  agc fetchStat(struct url *URL, struct url_stat *us, const char *flags)
    130  1.1  agc {
    131  1.1  agc 	if (us != NULL) {
    132  1.1  agc 		us->size = -1;
    133  1.1  agc 		us->atime = us->mtime = 0;
    134  1.1  agc 	}
    135  1.1  agc 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
    136  1.1  agc 		return (fetchStatFile(URL, us, flags));
    137  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
    138  1.1  agc 		return (fetchStatFTP(URL, us, flags));
    139  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
    140  1.1  agc 		return (fetchStatHTTP(URL, us, flags));
    141  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
    142  1.1  agc 		return (fetchStatHTTP(URL, us, flags));
    143  1.1  agc 	_url_seterr(URL_BAD_SCHEME);
    144  1.1  agc 	return (-1);
    145  1.1  agc }
    146  1.1  agc 
    147  1.1  agc /*
    148  1.1  agc  * Select the appropriate protocol for the URL scheme, and return a
    149  1.1  agc  * list of files in the directory pointed to by the URL.
    150  1.1  agc  */
    151  1.1  agc struct url_ent *
    152  1.1  agc fetchList(struct url *URL, const char *flags)
    153  1.1  agc {
    154  1.1  agc 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
    155  1.1  agc 		return (fetchListFile(URL, flags));
    156  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
    157  1.1  agc 		return (fetchListFTP(URL, flags));
    158  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
    159  1.1  agc 		return (fetchListHTTP(URL, flags));
    160  1.1  agc 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
    161  1.1  agc 		return (fetchListHTTP(URL, flags));
    162  1.1  agc 	_url_seterr(URL_BAD_SCHEME);
    163  1.1  agc 	return (NULL);
    164  1.1  agc }
    165  1.1  agc 
    166  1.1  agc /*
    167  1.1  agc  * Attempt to parse the given URL; if successful, call fetchXGet().
    168  1.1  agc  */
    169  1.1  agc FILE *
    170  1.1  agc fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
    171  1.1  agc {
    172  1.1  agc 	struct url *u;
    173  1.1  agc 	FILE *f;
    174  1.1  agc 
    175  1.1  agc 	if ((u = fetchParseURL(URL)) == NULL)
    176  1.1  agc 		return (NULL);
    177  1.1  agc 
    178  1.1  agc 	f = fetchXGet(u, us, flags);
    179  1.1  agc 
    180  1.1  agc 	fetchFreeURL(u);
    181  1.1  agc 	return (f);
    182  1.1  agc }
    183  1.1  agc 
    184  1.1  agc /*
    185  1.1  agc  * Attempt to parse the given URL; if successful, call fetchGet().
    186  1.1  agc  */
    187  1.1  agc FILE *
    188  1.1  agc fetchGetURL(const char *URL, const char *flags)
    189  1.1  agc {
    190  1.1  agc 	return (fetchXGetURL(URL, NULL, flags));
    191  1.1  agc }
    192  1.1  agc 
    193  1.1  agc /*
    194  1.1  agc  * Attempt to parse the given URL; if successful, call fetchPut().
    195  1.1  agc  */
    196  1.1  agc FILE *
    197  1.1  agc fetchPutURL(const char *URL, const char *flags)
    198  1.1  agc {
    199  1.1  agc 	struct url *u;
    200  1.1  agc 	FILE *f;
    201  1.1  agc 
    202  1.1  agc 	if ((u = fetchParseURL(URL)) == NULL)
    203  1.1  agc 		return (NULL);
    204  1.1  agc 
    205  1.1  agc 	f = fetchPut(u, flags);
    206  1.1  agc 
    207  1.1  agc 	fetchFreeURL(u);
    208  1.1  agc 	return (f);
    209  1.1  agc }
    210  1.1  agc 
    211  1.1  agc /*
    212  1.1  agc  * Attempt to parse the given URL; if successful, call fetchStat().
    213  1.1  agc  */
    214  1.1  agc int
    215  1.1  agc fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
    216  1.1  agc {
    217  1.1  agc 	struct url *u;
    218  1.1  agc 	int s;
    219  1.1  agc 
    220  1.1  agc 	if ((u = fetchParseURL(URL)) == NULL)
    221  1.1  agc 		return (-1);
    222  1.1  agc 
    223  1.1  agc 	s = fetchStat(u, us, flags);
    224  1.1  agc 
    225  1.1  agc 	fetchFreeURL(u);
    226  1.1  agc 	return (s);
    227  1.1  agc }
    228  1.1  agc 
    229  1.1  agc /*
    230  1.1  agc  * Attempt to parse the given URL; if successful, call fetchList().
    231  1.1  agc  */
    232  1.1  agc struct url_ent *
    233  1.1  agc fetchListURL(const char *URL, const char *flags)
    234  1.1  agc {
    235  1.1  agc 	struct url *u;
    236  1.1  agc 	struct url_ent *ue;
    237  1.1  agc 
    238  1.1  agc 	if ((u = fetchParseURL(URL)) == NULL)
    239  1.1  agc 		return (NULL);
    240  1.1  agc 
    241  1.1  agc 	ue = fetchList(u, flags);
    242  1.1  agc 
    243  1.1  agc 	fetchFreeURL(u);
    244  1.1  agc 	return (ue);
    245  1.1  agc }
    246  1.1  agc 
    247  1.1  agc /*
    248  1.1  agc  * Make a URL
    249  1.1  agc  */
    250  1.1  agc struct url *
    251  1.1  agc fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
    252  1.1  agc     const char *user, const char *pwd)
    253  1.1  agc {
    254  1.1  agc 	struct url *u;
    255  1.1  agc 
    256  1.1  agc 	if (!scheme || (!host && !doc)) {
    257  1.1  agc 		_url_seterr(URL_MALFORMED);
    258  1.1  agc 		return (NULL);
    259  1.1  agc 	}
    260  1.1  agc 
    261  1.1  agc 	if (port < 0 || port > 65535) {
    262  1.1  agc 		_url_seterr(URL_BAD_PORT);
    263  1.1  agc 		return (NULL);
    264  1.1  agc 	}
    265  1.1  agc 
    266  1.1  agc 	/* allocate struct url */
    267  1.1  agc 	if ((u = calloc(1, sizeof(*u))) == NULL) {
    268  1.1  agc 		_fetch_syserr();
    269  1.1  agc 		return (NULL);
    270  1.1  agc 	}
    271  1.1  agc 
    272  1.1  agc 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
    273  1.1  agc 		_fetch_syserr();
    274  1.1  agc 		free(u);
    275  1.1  agc 		return (NULL);
    276  1.1  agc 	}
    277  1.1  agc 
    278  1.1  agc #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
    279  1.1  agc 	seturl(scheme);
    280  1.1  agc 	seturl(host);
    281  1.1  agc 	seturl(user);
    282  1.1  agc 	seturl(pwd);
    283  1.1  agc #undef seturl
    284  1.1  agc 	u->port = port;
    285  1.1  agc 
    286  1.1  agc 	return (u);
    287  1.1  agc }
    288  1.1  agc 
    289  1.1  agc /*
    290  1.1  agc  * Split an URL into components. URL syntax is:
    291  1.1  agc  * [method:/][/[user[:pwd]@]host[:port]/][document]
    292  1.1  agc  * This almost, but not quite, RFC1738 URL syntax.
    293  1.1  agc  */
    294  1.1  agc struct url *
    295  1.1  agc fetchParseURL(const char *URL)
    296  1.1  agc {
    297  1.1  agc 	char *doc;
    298  1.1  agc 	const char *p, *q;
    299  1.1  agc 	struct url *u;
    300  1.1  agc 	int i;
    301  1.1  agc 
    302  1.1  agc 	/* allocate struct url */
    303  1.1  agc 	if ((u = calloc(1, sizeof(*u))) == NULL) {
    304  1.1  agc 		_fetch_syserr();
    305  1.1  agc 		return (NULL);
    306  1.1  agc 	}
    307  1.1  agc 
    308  1.1  agc 	/* scheme name */
    309  1.1  agc 	if ((p = strstr(URL, ":/")) != NULL) {
    310  1.1  agc 		snprintf(u->scheme, URL_SCHEMELEN+1,
    311  1.1  agc 		    "%.*s", (int)(p - URL), URL);
    312  1.1  agc 		URL = ++p;
    313  1.1  agc 		/*
    314  1.1  agc 		 * Only one slash: no host, leave slash as part of document
    315  1.1  agc 		 * Two slashes: host follows, strip slashes
    316  1.1  agc 		 */
    317  1.1  agc 		if (URL[1] == '/')
    318  1.1  agc 			URL = (p += 2);
    319  1.1  agc 	} else {
    320  1.1  agc 		p = URL;
    321  1.1  agc 	}
    322  1.1  agc 	if (!*URL || *URL == '/' || *URL == '.' ||
    323  1.1  agc 	    (u->scheme[0] == '\0' &&
    324  1.1  agc 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
    325  1.1  agc 		goto nohost;
    326  1.1  agc 
    327  1.1  agc 	p = strpbrk(URL, "/@");
    328  1.1  agc 	if (p && *p == '@') {
    329  1.1  agc 		/* username */
    330  1.1  agc 		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++)
    331  1.1  agc 			if (i < URL_USERLEN)
    332  1.1  agc 				u->user[i++] = *q;
    333  1.1  agc 
    334  1.1  agc 		/* password */
    335  1.1  agc 		if (*q == ':')
    336  1.1  agc 			for (q++, i = 0; (*q != ':') && (*q != '@'); q++)
    337  1.1  agc 				if (i < URL_PWDLEN)
    338  1.1  agc 					u->pwd[i++] = *q;
    339  1.1  agc 
    340  1.1  agc 		p++;
    341  1.1  agc 	} else {
    342  1.1  agc 		p = URL;
    343  1.1  agc 	}
    344  1.1  agc 
    345  1.1  agc 	/* hostname */
    346  1.1  agc #ifdef INET6
    347  1.1  agc 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
    348  1.1  agc 	    (*++q == '\0' || *q == '/' || *q == ':')) {
    349  1.1  agc 		if ((i = q - p - 2) > MAXHOSTNAMELEN)
    350  1.1  agc 			i = MAXHOSTNAMELEN;
    351  1.1  agc 		strncpy(u->host, ++p, i);
    352  1.1  agc 		p = q;
    353  1.1  agc 	} else
    354  1.1  agc #endif
    355  1.1  agc 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
    356  1.1  agc 			if (i < MAXHOSTNAMELEN)
    357  1.1  agc 				u->host[i++] = *p;
    358  1.1  agc 
    359  1.1  agc 	/* port */
    360  1.1  agc 	if (*p == ':') {
    361  1.1  agc 		for (q = ++p; *q && (*q != '/'); q++)
    362  1.1  agc 			if (isdigit((unsigned)*q))
    363  1.1  agc 				u->port = u->port * 10 + (*q - '0');
    364  1.1  agc 			else {
    365  1.1  agc 				/* invalid port */
    366  1.1  agc 				_url_seterr(URL_BAD_PORT);
    367  1.1  agc 				goto ouch;
    368  1.1  agc 			}
    369  1.1  agc 		p = q;
    370  1.1  agc 	}
    371  1.1  agc 
    372  1.1  agc nohost:
    373  1.1  agc 	/* document */
    374  1.1  agc 	if (!*p)
    375  1.1  agc 		p = "/";
    376  1.1  agc 
    377  1.1  agc 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
    378  1.1  agc 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
    379  1.1  agc 		const char hexnums[] = "0123456789abcdef";
    380  1.1  agc 
    381  1.1  agc 		/* percent-escape whitespace. */
    382  1.1  agc 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
    383  1.1  agc 			_fetch_syserr();
    384  1.1  agc 			goto ouch;
    385  1.1  agc 		}
    386  1.1  agc 		u->doc = doc;
    387  1.1  agc 		while (*p != '\0') {
    388  1.1  agc 			if (!isspace((unsigned)*p)) {
    389  1.1  agc 				*doc++ = *p++;
    390  1.1  agc 			} else {
    391  1.1  agc 				*doc++ = '%';
    392  1.1  agc 				*doc++ = hexnums[((unsigned int)*p) >> 4];
    393  1.1  agc 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
    394  1.1  agc 				p++;
    395  1.1  agc 			}
    396  1.1  agc 		}
    397  1.1  agc 		*doc = '\0';
    398  1.1  agc 	} else if ((u->doc = strdup(p)) == NULL) {
    399  1.1  agc 		_fetch_syserr();
    400  1.1  agc 		goto ouch;
    401  1.1  agc 	}
    402  1.1  agc 
    403  1.1  agc 	DEBUG(fprintf(stderr,
    404  1.1  agc 		  "scheme:   [%s]\n"
    405  1.1  agc 		  "user:     [%s]\n"
    406  1.1  agc 		  "password: [%s]\n"
    407  1.1  agc 		  "host:     [%s]\n"
    408  1.1  agc 		  "port:     [%d]\n"
    409  1.1  agc 		  "document: [%s]\n",
    410  1.1  agc 		  u->scheme, u->user, u->pwd,
    411  1.1  agc 		  u->host, u->port, u->doc));
    412  1.1  agc 
    413  1.1  agc 	return (u);
    414  1.1  agc 
    415  1.1  agc ouch:
    416  1.1  agc 	free(u);
    417  1.1  agc 	return (NULL);
    418  1.1  agc }
    419  1.1  agc 
    420  1.1  agc /*
    421  1.1  agc  * Free a URL
    422  1.1  agc  */
    423  1.1  agc void
    424  1.1  agc fetchFreeURL(struct url *u)
    425  1.1  agc {
    426  1.1  agc 	free(u->doc);
    427  1.1  agc 	free(u);
    428  1.1  agc }
    429