Home | History | Annotate | Line # | Download | only in isc
      1  1.6  christos /*	$NetBSD: url.c,v 1.6 2025/01/26 16:25:39 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.3  christos  * SPDX-License-Identifier: MPL-2.0 and MIT
      7  1.3  christos  *
      8  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  1.1  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  1.1  christos  *
     12  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13  1.1  christos  * information regarding copyright ownership.
     14  1.1  christos  */
     15  1.1  christos 
     16  1.1  christos /*
     17  1.1  christos  * Copyright Joyent, Inc. and other Node contributors. All rights reserved.
     18  1.1  christos  *
     19  1.1  christos  * Permission is hereby granted, free of charge, to any person obtaining a copy
     20  1.1  christos  * of this software and associated documentation files (the "Software"), to
     21  1.1  christos  * deal in the Software without restriction, including without limitation the
     22  1.1  christos  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
     23  1.1  christos  * sell copies of the Software, and to permit persons to whom the Software is
     24  1.1  christos  * furnished to do so, subject to the following conditions:
     25  1.1  christos  *
     26  1.1  christos  * The above copyright notice and this permission notice shall be included in
     27  1.1  christos  * all copies or substantial portions of the Software.
     28  1.1  christos  *
     29  1.1  christos  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     30  1.1  christos  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     31  1.1  christos  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     32  1.1  christos  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     33  1.1  christos  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     34  1.1  christos  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     35  1.1  christos  * IN THE SOFTWARE.
     36  1.1  christos  */
     37  1.1  christos 
     38  1.1  christos #include <ctype.h>
     39  1.1  christos #include <limits.h>
     40  1.1  christos #include <stddef.h>
     41  1.1  christos #include <string.h>
     42  1.1  christos 
     43  1.1  christos #include <isc/url.h>
     44  1.1  christos #include <isc/util.h>
     45  1.1  christos 
     46  1.1  christos #ifndef BIT_AT
     47  1.1  christos #define BIT_AT(a, i)                                    \
     48  1.1  christos 	(!!((unsigned int)(a)[(unsigned int)(i) >> 3] & \
     49  1.5  christos 	    (1 << ((unsigned int)(i) & 7))))
     50  1.1  christos #endif
     51  1.1  christos 
     52  1.1  christos #if HTTP_PARSER_STRICT
     53  1.1  christos #define T(v) 0
     54  1.1  christos #else
     55  1.1  christos #define T(v) v
     56  1.1  christos #endif
     57  1.1  christos 
     58  1.1  christos static const uint8_t normal_url_char[32] = {
     59  1.1  christos 	/*   0 nul  1 soh  2 stx  3 etx  4 eot  5 enq  6 ack  7 bel  */
     60  1.1  christos 	0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
     61  1.1  christos 	/*   8 bs   9 ht  10 nl  11 vt  12 np  13 cr  14 so  15 si */
     62  1.1  christos 	0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
     63  1.1  christos 	/*  16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
     64  1.1  christos 	0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
     65  1.1  christos 	/*  24 can 25 em  26 sub 27 esc 28 fs  29 gs  30 rs  31 us */
     66  1.1  christos 	0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
     67  1.1  christos 	/*  32 sp  33  !  34  "  35  #  36  $  37  %  38  &  39  ' */
     68  1.1  christos 	0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
     69  1.1  christos 	/*  40  (  41  )  42  *  43  +  44  ,  45  -  46  .  47  / */
     70  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     71  1.1  christos 	/*  48  0  49  1  50  2  51  3  52  4  53  5  54  6  55  7 */
     72  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     73  1.1  christos 	/*  56  8  57  9  58  :  59  ;  60  <  61  =  62  >  63  ?  */
     74  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
     75  1.1  christos 	/*  64  @  65  A  66  B  67  C  68  D  69  E  70  F  71  G */
     76  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     77  1.1  christos 	/*  72  H  73  I  74  J  75  K  76  L  77  M  78  N  79  O */
     78  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     79  1.1  christos 	/*  80  P  81  Q  82  R  83  S  84  T  85  U  86  V  87  W */
     80  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     81  1.1  christos 	/*  88  X  89  Y  90  Z  91  [  92  \  93  ]  94  ^  95  _ */
     82  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     83  1.1  christos 	/*  96  `  97  a  98  b  99  c 100  d 101  e 102  f 103  g */
     84  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     85  1.1  christos 	/* 104  h 105  i 106  j 107  k 108  l 109  m 110  n 111  o */
     86  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     87  1.1  christos 	/* 112  p 113  q 114  r 115  s 116  t 117  u 118  v 119  w */
     88  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
     89  1.1  christos 	/* 120  x 121  y 122  z 123  { 124  | 125  } 126  ~ 127 del */
     90  1.1  christos 	1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
     91  1.1  christos };
     92  1.1  christos 
     93  1.1  christos #undef T
     94  1.1  christos 
     95  1.1  christos typedef enum {
     96  1.1  christos 	s_dead = 1, /* important that this is > 0 */
     97  1.1  christos 
     98  1.1  christos 	s_start_req_or_res,
     99  1.1  christos 	s_res_or_resp_H,
    100  1.1  christos 	s_start_res,
    101  1.1  christos 	s_res_H,
    102  1.1  christos 	s_res_HT,
    103  1.1  christos 	s_res_HTT,
    104  1.1  christos 	s_res_HTTP,
    105  1.1  christos 	s_res_http_major,
    106  1.1  christos 	s_res_http_dot,
    107  1.1  christos 	s_res_http_minor,
    108  1.1  christos 	s_res_http_end,
    109  1.1  christos 	s_res_first_status_code,
    110  1.1  christos 	s_res_status_code,
    111  1.1  christos 	s_res_status_start,
    112  1.1  christos 	s_res_status,
    113  1.1  christos 	s_res_line_almost_done,
    114  1.1  christos 
    115  1.1  christos 	s_start_req,
    116  1.1  christos 
    117  1.1  christos 	s_req_method,
    118  1.1  christos 	s_req_spaces_before_url,
    119  1.1  christos 	s_req_schema,
    120  1.1  christos 	s_req_schema_slash,
    121  1.1  christos 	s_req_schema_slash_slash,
    122  1.1  christos 	s_req_server_start,
    123  1.1  christos 	s_req_server,
    124  1.1  christos 	s_req_server_with_at,
    125  1.1  christos 	s_req_path,
    126  1.1  christos 	s_req_query_string_start,
    127  1.1  christos 	s_req_query_string,
    128  1.1  christos 	s_req_fragment_start,
    129  1.1  christos 	s_req_fragment,
    130  1.1  christos 	s_req_http_start,
    131  1.1  christos 	s_req_http_H,
    132  1.1  christos 	s_req_http_HT,
    133  1.1  christos 	s_req_http_HTT,
    134  1.1  christos 	s_req_http_HTTP,
    135  1.1  christos 	s_req_http_I,
    136  1.1  christos 	s_req_http_IC,
    137  1.1  christos 	s_req_http_major,
    138  1.1  christos 	s_req_http_dot,
    139  1.1  christos 	s_req_http_minor,
    140  1.1  christos 	s_req_http_end,
    141  1.1  christos 	s_req_line_almost_done,
    142  1.1  christos 
    143  1.1  christos 	s_header_field_start,
    144  1.1  christos 	s_header_field,
    145  1.1  christos 	s_header_value_discard_ws,
    146  1.1  christos 	s_header_value_discard_ws_almost_done,
    147  1.1  christos 	s_header_value_discard_lws,
    148  1.1  christos 	s_header_value_start,
    149  1.1  christos 	s_header_value,
    150  1.1  christos 	s_header_value_lws,
    151  1.1  christos 
    152  1.1  christos 	s_header_almost_done,
    153  1.1  christos 
    154  1.1  christos 	s_chunk_size_start,
    155  1.1  christos 	s_chunk_size,
    156  1.1  christos 	s_chunk_parameters,
    157  1.1  christos 	s_chunk_size_almost_done,
    158  1.1  christos 
    159  1.1  christos 	s_headers_almost_done,
    160  1.1  christos 	s_headers_done,
    161  1.1  christos 
    162  1.1  christos 	/*
    163  1.1  christos 	 * Important: 's_headers_done' must be the last 'header' state. All
    164  1.1  christos 	 * states beyond this must be 'body' states. It is used for overflow
    165  1.1  christos 	 * checking. See the PARSING_HEADER() macro.
    166  1.1  christos 	 */
    167  1.1  christos 
    168  1.1  christos 	s_chunk_data,
    169  1.1  christos 	s_chunk_data_almost_done,
    170  1.1  christos 	s_chunk_data_done,
    171  1.1  christos 
    172  1.1  christos 	s_body_identity,
    173  1.1  christos 	s_body_identity_eof,
    174  1.1  christos 
    175  1.1  christos 	s_message_done
    176  1.1  christos } state_t;
    177  1.1  christos 
    178  1.1  christos typedef enum {
    179  1.1  christos 	s_http_host_dead = 1,
    180  1.1  christos 	s_http_userinfo_start,
    181  1.1  christos 	s_http_userinfo,
    182  1.1  christos 	s_http_host_start,
    183  1.1  christos 	s_http_host_v6_start,
    184  1.1  christos 	s_http_host,
    185  1.1  christos 	s_http_host_v6,
    186  1.1  christos 	s_http_host_v6_end,
    187  1.1  christos 	s_http_host_v6_zone_start,
    188  1.1  christos 	s_http_host_v6_zone,
    189  1.1  christos 	s_http_host_port_start,
    190  1.1  christos 	s_http_host_port
    191  1.1  christos } host_state_t;
    192  1.1  christos 
    193  1.1  christos /* Macros for character classes; depends on strict-mode  */
    194  1.1  christos #define IS_MARK(c)                                                             \
    195  1.1  christos 	((c) == '-' || (c) == '_' || (c) == '.' || (c) == '!' || (c) == '~' || \
    196  1.1  christos 	 (c) == '*' || (c) == '\'' || (c) == '(' || (c) == ')')
    197  1.1  christos #define IS_USERINFO_CHAR(c)                                                    \
    198  1.1  christos 	(isalnum((unsigned char)c) || IS_MARK(c) || (c) == '%' ||              \
    199  1.1  christos 	 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
    200  1.1  christos 	 (c) == '$' || (c) == ',')
    201  1.1  christos 
    202  1.1  christos #if HTTP_PARSER_STRICT
    203  1.1  christos #define IS_URL_CHAR(c)	(BIT_AT(normal_url_char, (unsigned char)c))
    204  1.1  christos #define IS_HOST_CHAR(c) (isalnum((unsigned char)c) || (c) == '.' || (c) == '-')
    205  1.1  christos #else
    206  1.5  christos #define IS_URL_CHAR(c) \
    207  1.5  christos 	(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
    208  1.1  christos #define IS_HOST_CHAR(c) \
    209  1.1  christos 	(isalnum((unsigned char)c) || (c) == '.' || (c) == '-' || (c) == '_')
    210  1.1  christos #endif
    211  1.1  christos 
    212  1.1  christos /*
    213  1.1  christos  * Our URL parser.
    214  1.1  christos  *
    215  1.1  christos  * This is designed to be shared by http_parser_execute() for URL validation,
    216  1.1  christos  * hence it has a state transition + byte-for-byte interface. In addition, it
    217  1.1  christos  * is meant to be embedded in http_parser_parse_url(), which does the dirty
    218  1.1  christos  * work of turning state transitions URL components for its API.
    219  1.1  christos  *
    220  1.1  christos  * This function should only be invoked with non-space characters. It is
    221  1.1  christos  * assumed that the caller cares about (and can detect) the transition between
    222  1.1  christos  * URL and non-URL states by looking for these.
    223  1.1  christos  */
    224  1.1  christos static state_t
    225  1.1  christos parse_url_char(state_t s, const char ch) {
    226  1.1  christos 	if (ch == ' ' || ch == '\r' || ch == '\n') {
    227  1.6  christos 		return s_dead;
    228  1.1  christos 	}
    229  1.1  christos 
    230  1.1  christos #if HTTP_PARSER_STRICT
    231  1.1  christos 	if (ch == '\t' || ch == '\f') {
    232  1.6  christos 		return s_dead;
    233  1.1  christos 	}
    234  1.1  christos #endif
    235  1.1  christos 
    236  1.1  christos 	switch (s) {
    237  1.1  christos 	case s_req_spaces_before_url:
    238  1.1  christos 		/* Proxied requests are followed by scheme of an absolute URI
    239  1.1  christos 		 * (alpha). All methods except CONNECT are followed by '/' or
    240  1.1  christos 		 * '*'.
    241  1.1  christos 		 */
    242  1.1  christos 
    243  1.1  christos 		if (ch == '/' || ch == '*') {
    244  1.6  christos 			return s_req_path;
    245  1.1  christos 		}
    246  1.1  christos 
    247  1.1  christos 		if (isalpha((unsigned char)ch)) {
    248  1.6  christos 			return s_req_schema;
    249  1.1  christos 		}
    250  1.1  christos 
    251  1.1  christos 		break;
    252  1.1  christos 
    253  1.1  christos 	case s_req_schema:
    254  1.1  christos 		if (isalpha((unsigned char)ch)) {
    255  1.6  christos 			return s;
    256  1.1  christos 		}
    257  1.1  christos 
    258  1.1  christos 		if (ch == ':') {
    259  1.6  christos 			return s_req_schema_slash;
    260  1.1  christos 		}
    261  1.1  christos 
    262  1.1  christos 		break;
    263  1.1  christos 
    264  1.1  christos 	case s_req_schema_slash:
    265  1.1  christos 		if (ch == '/') {
    266  1.6  christos 			return s_req_schema_slash_slash;
    267  1.1  christos 		}
    268  1.1  christos 
    269  1.1  christos 		break;
    270  1.1  christos 
    271  1.1  christos 	case s_req_schema_slash_slash:
    272  1.1  christos 		if (ch == '/') {
    273  1.6  christos 			return s_req_server_start;
    274  1.1  christos 		}
    275  1.1  christos 
    276  1.1  christos 		break;
    277  1.1  christos 
    278  1.1  christos 	case s_req_server_with_at:
    279  1.1  christos 		if (ch == '@') {
    280  1.6  christos 			return s_dead;
    281  1.1  christos 		}
    282  1.1  christos 
    283  1.3  christos 		FALLTHROUGH;
    284  1.1  christos 	case s_req_server_start:
    285  1.1  christos 	case s_req_server:
    286  1.1  christos 		if (ch == '/') {
    287  1.6  christos 			return s_req_path;
    288  1.1  christos 		}
    289  1.1  christos 
    290  1.1  christos 		if (ch == '?') {
    291  1.6  christos 			return s_req_query_string_start;
    292  1.1  christos 		}
    293  1.1  christos 
    294  1.1  christos 		if (ch == '@') {
    295  1.6  christos 			return s_req_server_with_at;
    296  1.1  christos 		}
    297  1.1  christos 
    298  1.1  christos 		if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
    299  1.6  christos 			return s_req_server;
    300  1.1  christos 		}
    301  1.1  christos 
    302  1.1  christos 		break;
    303  1.1  christos 
    304  1.1  christos 	case s_req_path:
    305  1.1  christos 		if (IS_URL_CHAR(ch)) {
    306  1.6  christos 			return s;
    307  1.1  christos 		}
    308  1.1  christos 
    309  1.1  christos 		switch (ch) {
    310  1.1  christos 		case '?':
    311  1.6  christos 			return s_req_query_string_start;
    312  1.1  christos 
    313  1.1  christos 		case '#':
    314  1.6  christos 			return s_req_fragment_start;
    315  1.1  christos 		}
    316  1.1  christos 
    317  1.1  christos 		break;
    318  1.1  christos 
    319  1.1  christos 	case s_req_query_string_start:
    320  1.1  christos 	case s_req_query_string:
    321  1.1  christos 		if (IS_URL_CHAR(ch)) {
    322  1.6  christos 			return s_req_query_string;
    323  1.1  christos 		}
    324  1.1  christos 
    325  1.1  christos 		switch (ch) {
    326  1.1  christos 		case '?':
    327  1.1  christos 			/* allow extra '?' in query string */
    328  1.6  christos 			return s_req_query_string;
    329  1.1  christos 
    330  1.1  christos 		case '#':
    331  1.6  christos 			return s_req_fragment_start;
    332  1.1  christos 		}
    333  1.1  christos 
    334  1.1  christos 		break;
    335  1.1  christos 
    336  1.1  christos 	case s_req_fragment_start:
    337  1.1  christos 		if (IS_URL_CHAR(ch)) {
    338  1.6  christos 			return s_req_fragment;
    339  1.1  christos 		}
    340  1.1  christos 
    341  1.1  christos 		switch (ch) {
    342  1.1  christos 		case '?':
    343  1.6  christos 			return s_req_fragment;
    344  1.1  christos 
    345  1.1  christos 		case '#':
    346  1.6  christos 			return s;
    347  1.1  christos 		}
    348  1.1  christos 
    349  1.1  christos 		break;
    350  1.1  christos 
    351  1.1  christos 	case s_req_fragment:
    352  1.1  christos 		if (IS_URL_CHAR(ch)) {
    353  1.6  christos 			return s;
    354  1.1  christos 		}
    355  1.1  christos 
    356  1.1  christos 		switch (ch) {
    357  1.1  christos 		case '?':
    358  1.1  christos 		case '#':
    359  1.6  christos 			return s;
    360  1.1  christos 		}
    361  1.1  christos 
    362  1.1  christos 		break;
    363  1.1  christos 
    364  1.1  christos 	default:
    365  1.1  christos 		break;
    366  1.1  christos 	}
    367  1.1  christos 
    368  1.1  christos 	/*
    369  1.1  christos 	 * We should never fall out of the switch above unless there's an
    370  1.1  christos 	 * error.
    371  1.1  christos 	 */
    372  1.6  christos 	return s_dead;
    373  1.1  christos }
    374  1.1  christos 
    375  1.1  christos static host_state_t
    376  1.1  christos http_parse_host_char(host_state_t s, const char ch) {
    377  1.1  christos 	switch (s) {
    378  1.1  christos 	case s_http_userinfo:
    379  1.1  christos 	case s_http_userinfo_start:
    380  1.1  christos 		if (ch == '@') {
    381  1.6  christos 			return s_http_host_start;
    382  1.1  christos 		}
    383  1.1  christos 
    384  1.1  christos 		if (IS_USERINFO_CHAR(ch)) {
    385  1.6  christos 			return s_http_userinfo;
    386  1.1  christos 		}
    387  1.1  christos 		break;
    388  1.1  christos 
    389  1.1  christos 	case s_http_host_start:
    390  1.1  christos 		if (ch == '[') {
    391  1.6  christos 			return s_http_host_v6_start;
    392  1.1  christos 		}
    393  1.1  christos 
    394  1.1  christos 		if (IS_HOST_CHAR(ch)) {
    395  1.6  christos 			return s_http_host;
    396  1.1  christos 		}
    397  1.1  christos 
    398  1.1  christos 		break;
    399  1.1  christos 
    400  1.1  christos 	case s_http_host:
    401  1.1  christos 		if (IS_HOST_CHAR(ch)) {
    402  1.6  christos 			return s_http_host;
    403  1.1  christos 		}
    404  1.1  christos 
    405  1.3  christos 		FALLTHROUGH;
    406  1.1  christos 	case s_http_host_v6_end:
    407  1.1  christos 		if (ch == ':') {
    408  1.6  christos 			return s_http_host_port_start;
    409  1.1  christos 		}
    410  1.1  christos 
    411  1.1  christos 		break;
    412  1.1  christos 
    413  1.1  christos 	case s_http_host_v6:
    414  1.1  christos 		if (ch == ']') {
    415  1.6  christos 			return s_http_host_v6_end;
    416  1.1  christos 		}
    417  1.1  christos 
    418  1.3  christos 		FALLTHROUGH;
    419  1.1  christos 	case s_http_host_v6_start:
    420  1.1  christos 		if (isxdigit((unsigned char)ch) || ch == ':' || ch == '.') {
    421  1.6  christos 			return s_http_host_v6;
    422  1.1  christos 		}
    423  1.1  christos 
    424  1.1  christos 		if (s == s_http_host_v6 && ch == '%') {
    425  1.6  christos 			return s_http_host_v6_zone_start;
    426  1.1  christos 		}
    427  1.1  christos 		break;
    428  1.1  christos 
    429  1.1  christos 	case s_http_host_v6_zone:
    430  1.1  christos 		if (ch == ']') {
    431  1.6  christos 			return s_http_host_v6_end;
    432  1.1  christos 		}
    433  1.1  christos 
    434  1.3  christos 		FALLTHROUGH;
    435  1.1  christos 	case s_http_host_v6_zone_start:
    436  1.1  christos 		/* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
    437  1.1  christos 		if (isalnum((unsigned char)ch) || ch == '%' || ch == '.' ||
    438  1.1  christos 		    ch == '-' || ch == '_' || ch == '~')
    439  1.1  christos 		{
    440  1.6  christos 			return s_http_host_v6_zone;
    441  1.1  christos 		}
    442  1.1  christos 		break;
    443  1.1  christos 
    444  1.1  christos 	case s_http_host_port:
    445  1.1  christos 	case s_http_host_port_start:
    446  1.1  christos 		if (isdigit((unsigned char)ch)) {
    447  1.6  christos 			return s_http_host_port;
    448  1.1  christos 		}
    449  1.1  christos 
    450  1.1  christos 		break;
    451  1.1  christos 
    452  1.1  christos 	default:
    453  1.1  christos 		break;
    454  1.1  christos 	}
    455  1.1  christos 
    456  1.6  christos 	return s_http_host_dead;
    457  1.1  christos }
    458  1.1  christos 
    459  1.1  christos static isc_result_t
    460  1.1  christos http_parse_host(const char *buf, isc_url_parser_t *up, int found_at) {
    461  1.1  christos 	host_state_t s;
    462  1.1  christos 	const char *p = NULL;
    463  1.1  christos 	size_t buflen = up->field_data[ISC_UF_HOST].off +
    464  1.1  christos 			up->field_data[ISC_UF_HOST].len;
    465  1.1  christos 
    466  1.1  christos 	REQUIRE((up->field_set & (1 << ISC_UF_HOST)) != 0);
    467  1.1  christos 
    468  1.1  christos 	up->field_data[ISC_UF_HOST].len = 0;
    469  1.1  christos 
    470  1.1  christos 	s = found_at ? s_http_userinfo_start : s_http_host_start;
    471  1.1  christos 
    472  1.1  christos 	for (p = buf + up->field_data[ISC_UF_HOST].off; p < buf + buflen; p++) {
    473  1.1  christos 		host_state_t new_s = http_parse_host_char(s, *p);
    474  1.1  christos 
    475  1.1  christos 		if (new_s == s_http_host_dead) {
    476  1.6  christos 			return ISC_R_FAILURE;
    477  1.1  christos 		}
    478  1.1  christos 
    479  1.1  christos 		switch (new_s) {
    480  1.1  christos 		case s_http_host:
    481  1.1  christos 			if (s != s_http_host) {
    482  1.1  christos 				up->field_data[ISC_UF_HOST].off =
    483  1.1  christos 					(uint16_t)(p - buf);
    484  1.1  christos 			}
    485  1.1  christos 			up->field_data[ISC_UF_HOST].len++;
    486  1.1  christos 			break;
    487  1.1  christos 
    488  1.1  christos 		case s_http_host_v6:
    489  1.1  christos 			if (s != s_http_host_v6) {
    490  1.1  christos 				up->field_data[ISC_UF_HOST].off =
    491  1.1  christos 					(uint16_t)(p - buf);
    492  1.1  christos 			}
    493  1.1  christos 			up->field_data[ISC_UF_HOST].len++;
    494  1.1  christos 			break;
    495  1.1  christos 
    496  1.1  christos 		case s_http_host_v6_zone_start:
    497  1.1  christos 		case s_http_host_v6_zone:
    498  1.1  christos 			up->field_data[ISC_UF_HOST].len++;
    499  1.1  christos 			break;
    500  1.1  christos 
    501  1.1  christos 		case s_http_host_port:
    502  1.1  christos 			if (s != s_http_host_port) {
    503  1.1  christos 				up->field_data[ISC_UF_PORT].off =
    504  1.1  christos 					(uint16_t)(p - buf);
    505  1.1  christos 				up->field_data[ISC_UF_PORT].len = 0;
    506  1.1  christos 				up->field_set |= (1 << ISC_UF_PORT);
    507  1.1  christos 			}
    508  1.1  christos 			up->field_data[ISC_UF_PORT].len++;
    509  1.1  christos 			break;
    510  1.1  christos 
    511  1.1  christos 		case s_http_userinfo:
    512  1.1  christos 			if (s != s_http_userinfo) {
    513  1.1  christos 				up->field_data[ISC_UF_USERINFO].off =
    514  1.1  christos 					(uint16_t)(p - buf);
    515  1.1  christos 				up->field_data[ISC_UF_USERINFO].len = 0;
    516  1.1  christos 				up->field_set |= (1 << ISC_UF_USERINFO);
    517  1.1  christos 			}
    518  1.1  christos 			up->field_data[ISC_UF_USERINFO].len++;
    519  1.1  christos 			break;
    520  1.1  christos 
    521  1.1  christos 		default:
    522  1.1  christos 			break;
    523  1.1  christos 		}
    524  1.1  christos 
    525  1.1  christos 		s = new_s;
    526  1.1  christos 	}
    527  1.1  christos 
    528  1.1  christos 	/* Make sure we don't end somewhere unexpected */
    529  1.1  christos 	switch (s) {
    530  1.1  christos 	case s_http_host_start:
    531  1.1  christos 	case s_http_host_v6_start:
    532  1.1  christos 	case s_http_host_v6:
    533  1.1  christos 	case s_http_host_v6_zone_start:
    534  1.1  christos 	case s_http_host_v6_zone:
    535  1.1  christos 	case s_http_host_port_start:
    536  1.1  christos 	case s_http_userinfo:
    537  1.1  christos 	case s_http_userinfo_start:
    538  1.6  christos 		return ISC_R_FAILURE;
    539  1.1  christos 	default:
    540  1.1  christos 		break;
    541  1.1  christos 	}
    542  1.1  christos 
    543  1.6  christos 	return ISC_R_SUCCESS;
    544  1.1  christos }
    545  1.1  christos 
    546  1.1  christos isc_result_t
    547  1.1  christos isc_url_parse(const char *buf, size_t buflen, bool is_connect,
    548  1.1  christos 	      isc_url_parser_t *up) {
    549  1.1  christos 	state_t s;
    550  1.1  christos 	isc_url_field_t uf, old_uf;
    551  1.1  christos 	int found_at = 0;
    552  1.1  christos 	const char *p = NULL;
    553  1.1  christos 
    554  1.1  christos 	if (buflen == 0) {
    555  1.6  christos 		return ISC_R_FAILURE;
    556  1.1  christos 	}
    557  1.1  christos 
    558  1.1  christos 	up->port = up->field_set = 0;
    559  1.1  christos 	s = is_connect ? s_req_server_start : s_req_spaces_before_url;
    560  1.1  christos 	old_uf = ISC_UF_MAX;
    561  1.1  christos 
    562  1.1  christos 	for (p = buf; p < buf + buflen; p++) {
    563  1.1  christos 		s = parse_url_char(s, *p);
    564  1.1  christos 
    565  1.1  christos 		/* Figure out the next field that we're operating on */
    566  1.1  christos 		switch (s) {
    567  1.1  christos 		case s_dead:
    568  1.6  christos 			return ISC_R_FAILURE;
    569  1.1  christos 
    570  1.1  christos 		/* Skip delimiters */
    571  1.1  christos 		case s_req_schema_slash:
    572  1.1  christos 		case s_req_schema_slash_slash:
    573  1.1  christos 		case s_req_server_start:
    574  1.1  christos 		case s_req_query_string_start:
    575  1.1  christos 		case s_req_fragment_start:
    576  1.1  christos 			continue;
    577  1.1  christos 
    578  1.1  christos 		case s_req_schema:
    579  1.1  christos 			uf = ISC_UF_SCHEMA;
    580  1.1  christos 			break;
    581  1.1  christos 
    582  1.1  christos 		case s_req_server_with_at:
    583  1.1  christos 			found_at = 1;
    584  1.3  christos 			FALLTHROUGH;
    585  1.1  christos 		case s_req_server:
    586  1.1  christos 			uf = ISC_UF_HOST;
    587  1.1  christos 			break;
    588  1.1  christos 
    589  1.1  christos 		case s_req_path:
    590  1.1  christos 			uf = ISC_UF_PATH;
    591  1.1  christos 			break;
    592  1.1  christos 
    593  1.1  christos 		case s_req_query_string:
    594  1.1  christos 			uf = ISC_UF_QUERY;
    595  1.1  christos 			break;
    596  1.1  christos 
    597  1.1  christos 		case s_req_fragment:
    598  1.1  christos 			uf = ISC_UF_FRAGMENT;
    599  1.1  christos 			break;
    600  1.1  christos 
    601  1.1  christos 		default:
    602  1.3  christos 			UNREACHABLE();
    603  1.1  christos 		}
    604  1.1  christos 
    605  1.1  christos 		/* Nothing's changed; soldier on */
    606  1.1  christos 		if (uf == old_uf) {
    607  1.1  christos 			up->field_data[uf].len++;
    608  1.1  christos 			continue;
    609  1.1  christos 		}
    610  1.1  christos 
    611  1.1  christos 		up->field_data[uf].off = (uint16_t)(p - buf);
    612  1.1  christos 		up->field_data[uf].len = 1;
    613  1.1  christos 
    614  1.1  christos 		up->field_set |= (1 << uf);
    615  1.1  christos 		old_uf = uf;
    616  1.1  christos 	}
    617  1.1  christos 
    618  1.1  christos 	/* host must be present if there is a schema */
    619  1.1  christos 	/* parsing http:///toto will fail */
    620  1.1  christos 	if ((up->field_set & (1 << ISC_UF_SCHEMA)) &&
    621  1.1  christos 	    (up->field_set & (1 << ISC_UF_HOST)) == 0)
    622  1.1  christos 	{
    623  1.6  christos 		return ISC_R_FAILURE;
    624  1.1  christos 	}
    625  1.1  christos 
    626  1.1  christos 	if (up->field_set & (1 << ISC_UF_HOST)) {
    627  1.1  christos 		isc_result_t result;
    628  1.1  christos 
    629  1.1  christos 		result = http_parse_host(buf, up, found_at);
    630  1.1  christos 		if (result != ISC_R_SUCCESS) {
    631  1.6  christos 			return result;
    632  1.1  christos 		}
    633  1.1  christos 	}
    634  1.1  christos 
    635  1.1  christos 	/* CONNECT requests can only contain "hostname:port" */
    636  1.1  christos 	if (is_connect &&
    637  1.4  christos 	    up->field_set != ((1 << ISC_UF_HOST) | (1 << ISC_UF_PORT)))
    638  1.4  christos 	{
    639  1.6  christos 		return ISC_R_FAILURE;
    640  1.1  christos 	}
    641  1.1  christos 
    642  1.1  christos 	if (up->field_set & (1 << ISC_UF_PORT)) {
    643  1.1  christos 		uint16_t off;
    644  1.1  christos 		uint16_t len;
    645  1.1  christos 		const char *pp = NULL;
    646  1.1  christos 		const char *end = NULL;
    647  1.1  christos 		unsigned long v;
    648  1.1  christos 
    649  1.1  christos 		off = up->field_data[ISC_UF_PORT].off;
    650  1.1  christos 		len = up->field_data[ISC_UF_PORT].len;
    651  1.1  christos 		end = buf + off + len;
    652  1.1  christos 
    653  1.1  christos 		/*
    654  1.1  christos 		 * NOTE: The characters are already validated and are in the
    655  1.1  christos 		 * [0-9] range
    656  1.1  christos 		 */
    657  1.1  christos 		INSIST(off + len <= buflen);
    658  1.1  christos 
    659  1.1  christos 		v = 0;
    660  1.1  christos 		for (pp = buf + off; pp < end; pp++) {
    661  1.1  christos 			v *= 10;
    662  1.1  christos 			v += *pp - '0';
    663  1.1  christos 
    664  1.1  christos 			/* Ports have a max value of 2^16 */
    665  1.1  christos 			if (v > 0xffff) {
    666  1.6  christos 				return ISC_R_RANGE;
    667  1.1  christos 			}
    668  1.1  christos 		}
    669  1.1  christos 
    670  1.1  christos 		up->port = (uint16_t)v;
    671  1.1  christos 	}
    672  1.1  christos 
    673  1.6  christos 	return ISC_R_SUCCESS;
    674  1.1  christos }
    675