1 1.6 christos /* $NetBSD: url.c,v 1.6 2025/01/26 16:25:39 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* 4 1.1 christos * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 1.1 christos * 6 1.3 christos * SPDX-License-Identifier: MPL-2.0 and MIT 7 1.3 christos * 8 1.1 christos * This Source Code Form is subject to the terms of the Mozilla Public 9 1.1 christos * License, v. 2.0. If a copy of the MPL was not distributed with this 10 1.1 christos * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 1.1 christos * 12 1.1 christos * See the COPYRIGHT file distributed with this work for additional 13 1.1 christos * information regarding copyright ownership. 14 1.1 christos */ 15 1.1 christos 16 1.1 christos /* 17 1.1 christos * Copyright Joyent, Inc. and other Node contributors. All rights reserved. 18 1.1 christos * 19 1.1 christos * Permission is hereby granted, free of charge, to any person obtaining a copy 20 1.1 christos * of this software and associated documentation files (the "Software"), to 21 1.1 christos * deal in the Software without restriction, including without limitation the 22 1.1 christos * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 23 1.1 christos * sell copies of the Software, and to permit persons to whom the Software is 24 1.1 christos * furnished to do so, subject to the following conditions: 25 1.1 christos * 26 1.1 christos * The above copyright notice and this permission notice shall be included in 27 1.1 christos * all copies or substantial portions of the Software. 28 1.1 christos * 29 1.1 christos * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 1.1 christos * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 1.1 christos * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 1.1 christos * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 1.1 christos * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 34 1.1 christos * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 35 1.1 christos * IN THE SOFTWARE. 36 1.1 christos */ 37 1.1 christos 38 1.1 christos #include <ctype.h> 39 1.1 christos #include <limits.h> 40 1.1 christos #include <stddef.h> 41 1.1 christos #include <string.h> 42 1.1 christos 43 1.1 christos #include <isc/url.h> 44 1.1 christos #include <isc/util.h> 45 1.1 christos 46 1.1 christos #ifndef BIT_AT 47 1.1 christos #define BIT_AT(a, i) \ 48 1.1 christos (!!((unsigned int)(a)[(unsigned int)(i) >> 3] & \ 49 1.5 christos (1 << ((unsigned int)(i) & 7)))) 50 1.1 christos #endif 51 1.1 christos 52 1.1 christos #if HTTP_PARSER_STRICT 53 1.1 christos #define T(v) 0 54 1.1 christos #else 55 1.1 christos #define T(v) v 56 1.1 christos #endif 57 1.1 christos 58 1.1 christos static const uint8_t normal_url_char[32] = { 59 1.1 christos /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 60 1.1 christos 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 61 1.1 christos /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 62 1.1 christos 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, 63 1.1 christos /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 64 1.1 christos 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 65 1.1 christos /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 66 1.1 christos 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 67 1.1 christos /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 68 1.1 christos 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, 69 1.1 christos /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 70 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 71 1.1 christos /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 72 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 73 1.1 christos /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 74 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 75 1.1 christos /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 76 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 77 1.1 christos /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 78 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 79 1.1 christos /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 80 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 81 1.1 christos /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 82 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 83 1.1 christos /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 84 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 85 1.1 christos /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 86 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 87 1.1 christos /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 88 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 89 1.1 christos /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 90 1.1 christos 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 91 1.1 christos }; 92 1.1 christos 93 1.1 christos #undef T 94 1.1 christos 95 1.1 christos typedef enum { 96 1.1 christos s_dead = 1, /* important that this is > 0 */ 97 1.1 christos 98 1.1 christos s_start_req_or_res, 99 1.1 christos s_res_or_resp_H, 100 1.1 christos s_start_res, 101 1.1 christos s_res_H, 102 1.1 christos s_res_HT, 103 1.1 christos s_res_HTT, 104 1.1 christos s_res_HTTP, 105 1.1 christos s_res_http_major, 106 1.1 christos s_res_http_dot, 107 1.1 christos s_res_http_minor, 108 1.1 christos s_res_http_end, 109 1.1 christos s_res_first_status_code, 110 1.1 christos s_res_status_code, 111 1.1 christos s_res_status_start, 112 1.1 christos s_res_status, 113 1.1 christos s_res_line_almost_done, 114 1.1 christos 115 1.1 christos s_start_req, 116 1.1 christos 117 1.1 christos s_req_method, 118 1.1 christos s_req_spaces_before_url, 119 1.1 christos s_req_schema, 120 1.1 christos s_req_schema_slash, 121 1.1 christos s_req_schema_slash_slash, 122 1.1 christos s_req_server_start, 123 1.1 christos s_req_server, 124 1.1 christos s_req_server_with_at, 125 1.1 christos s_req_path, 126 1.1 christos s_req_query_string_start, 127 1.1 christos s_req_query_string, 128 1.1 christos s_req_fragment_start, 129 1.1 christos s_req_fragment, 130 1.1 christos s_req_http_start, 131 1.1 christos s_req_http_H, 132 1.1 christos s_req_http_HT, 133 1.1 christos s_req_http_HTT, 134 1.1 christos s_req_http_HTTP, 135 1.1 christos s_req_http_I, 136 1.1 christos s_req_http_IC, 137 1.1 christos s_req_http_major, 138 1.1 christos s_req_http_dot, 139 1.1 christos s_req_http_minor, 140 1.1 christos s_req_http_end, 141 1.1 christos s_req_line_almost_done, 142 1.1 christos 143 1.1 christos s_header_field_start, 144 1.1 christos s_header_field, 145 1.1 christos s_header_value_discard_ws, 146 1.1 christos s_header_value_discard_ws_almost_done, 147 1.1 christos s_header_value_discard_lws, 148 1.1 christos s_header_value_start, 149 1.1 christos s_header_value, 150 1.1 christos s_header_value_lws, 151 1.1 christos 152 1.1 christos s_header_almost_done, 153 1.1 christos 154 1.1 christos s_chunk_size_start, 155 1.1 christos s_chunk_size, 156 1.1 christos s_chunk_parameters, 157 1.1 christos s_chunk_size_almost_done, 158 1.1 christos 159 1.1 christos s_headers_almost_done, 160 1.1 christos s_headers_done, 161 1.1 christos 162 1.1 christos /* 163 1.1 christos * Important: 's_headers_done' must be the last 'header' state. All 164 1.1 christos * states beyond this must be 'body' states. It is used for overflow 165 1.1 christos * checking. See the PARSING_HEADER() macro. 166 1.1 christos */ 167 1.1 christos 168 1.1 christos s_chunk_data, 169 1.1 christos s_chunk_data_almost_done, 170 1.1 christos s_chunk_data_done, 171 1.1 christos 172 1.1 christos s_body_identity, 173 1.1 christos s_body_identity_eof, 174 1.1 christos 175 1.1 christos s_message_done 176 1.1 christos } state_t; 177 1.1 christos 178 1.1 christos typedef enum { 179 1.1 christos s_http_host_dead = 1, 180 1.1 christos s_http_userinfo_start, 181 1.1 christos s_http_userinfo, 182 1.1 christos s_http_host_start, 183 1.1 christos s_http_host_v6_start, 184 1.1 christos s_http_host, 185 1.1 christos s_http_host_v6, 186 1.1 christos s_http_host_v6_end, 187 1.1 christos s_http_host_v6_zone_start, 188 1.1 christos s_http_host_v6_zone, 189 1.1 christos s_http_host_port_start, 190 1.1 christos s_http_host_port 191 1.1 christos } host_state_t; 192 1.1 christos 193 1.1 christos /* Macros for character classes; depends on strict-mode */ 194 1.1 christos #define IS_MARK(c) \ 195 1.1 christos ((c) == '-' || (c) == '_' || (c) == '.' || (c) == '!' || (c) == '~' || \ 196 1.1 christos (c) == '*' || (c) == '\'' || (c) == '(' || (c) == ')') 197 1.1 christos #define IS_USERINFO_CHAR(c) \ 198 1.1 christos (isalnum((unsigned char)c) || IS_MARK(c) || (c) == '%' || \ 199 1.1 christos (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ 200 1.1 christos (c) == '$' || (c) == ',') 201 1.1 christos 202 1.1 christos #if HTTP_PARSER_STRICT 203 1.1 christos #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) 204 1.1 christos #define IS_HOST_CHAR(c) (isalnum((unsigned char)c) || (c) == '.' || (c) == '-') 205 1.1 christos #else 206 1.5 christos #define IS_URL_CHAR(c) \ 207 1.5 christos (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) 208 1.1 christos #define IS_HOST_CHAR(c) \ 209 1.1 christos (isalnum((unsigned char)c) || (c) == '.' || (c) == '-' || (c) == '_') 210 1.1 christos #endif 211 1.1 christos 212 1.1 christos /* 213 1.1 christos * Our URL parser. 214 1.1 christos * 215 1.1 christos * This is designed to be shared by http_parser_execute() for URL validation, 216 1.1 christos * hence it has a state transition + byte-for-byte interface. In addition, it 217 1.1 christos * is meant to be embedded in http_parser_parse_url(), which does the dirty 218 1.1 christos * work of turning state transitions URL components for its API. 219 1.1 christos * 220 1.1 christos * This function should only be invoked with non-space characters. It is 221 1.1 christos * assumed that the caller cares about (and can detect) the transition between 222 1.1 christos * URL and non-URL states by looking for these. 223 1.1 christos */ 224 1.1 christos static state_t 225 1.1 christos parse_url_char(state_t s, const char ch) { 226 1.1 christos if (ch == ' ' || ch == '\r' || ch == '\n') { 227 1.6 christos return s_dead; 228 1.1 christos } 229 1.1 christos 230 1.1 christos #if HTTP_PARSER_STRICT 231 1.1 christos if (ch == '\t' || ch == '\f') { 232 1.6 christos return s_dead; 233 1.1 christos } 234 1.1 christos #endif 235 1.1 christos 236 1.1 christos switch (s) { 237 1.1 christos case s_req_spaces_before_url: 238 1.1 christos /* Proxied requests are followed by scheme of an absolute URI 239 1.1 christos * (alpha). All methods except CONNECT are followed by '/' or 240 1.1 christos * '*'. 241 1.1 christos */ 242 1.1 christos 243 1.1 christos if (ch == '/' || ch == '*') { 244 1.6 christos return s_req_path; 245 1.1 christos } 246 1.1 christos 247 1.1 christos if (isalpha((unsigned char)ch)) { 248 1.6 christos return s_req_schema; 249 1.1 christos } 250 1.1 christos 251 1.1 christos break; 252 1.1 christos 253 1.1 christos case s_req_schema: 254 1.1 christos if (isalpha((unsigned char)ch)) { 255 1.6 christos return s; 256 1.1 christos } 257 1.1 christos 258 1.1 christos if (ch == ':') { 259 1.6 christos return s_req_schema_slash; 260 1.1 christos } 261 1.1 christos 262 1.1 christos break; 263 1.1 christos 264 1.1 christos case s_req_schema_slash: 265 1.1 christos if (ch == '/') { 266 1.6 christos return s_req_schema_slash_slash; 267 1.1 christos } 268 1.1 christos 269 1.1 christos break; 270 1.1 christos 271 1.1 christos case s_req_schema_slash_slash: 272 1.1 christos if (ch == '/') { 273 1.6 christos return s_req_server_start; 274 1.1 christos } 275 1.1 christos 276 1.1 christos break; 277 1.1 christos 278 1.1 christos case s_req_server_with_at: 279 1.1 christos if (ch == '@') { 280 1.6 christos return s_dead; 281 1.1 christos } 282 1.1 christos 283 1.3 christos FALLTHROUGH; 284 1.1 christos case s_req_server_start: 285 1.1 christos case s_req_server: 286 1.1 christos if (ch == '/') { 287 1.6 christos return s_req_path; 288 1.1 christos } 289 1.1 christos 290 1.1 christos if (ch == '?') { 291 1.6 christos return s_req_query_string_start; 292 1.1 christos } 293 1.1 christos 294 1.1 christos if (ch == '@') { 295 1.6 christos return s_req_server_with_at; 296 1.1 christos } 297 1.1 christos 298 1.1 christos if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { 299 1.6 christos return s_req_server; 300 1.1 christos } 301 1.1 christos 302 1.1 christos break; 303 1.1 christos 304 1.1 christos case s_req_path: 305 1.1 christos if (IS_URL_CHAR(ch)) { 306 1.6 christos return s; 307 1.1 christos } 308 1.1 christos 309 1.1 christos switch (ch) { 310 1.1 christos case '?': 311 1.6 christos return s_req_query_string_start; 312 1.1 christos 313 1.1 christos case '#': 314 1.6 christos return s_req_fragment_start; 315 1.1 christos } 316 1.1 christos 317 1.1 christos break; 318 1.1 christos 319 1.1 christos case s_req_query_string_start: 320 1.1 christos case s_req_query_string: 321 1.1 christos if (IS_URL_CHAR(ch)) { 322 1.6 christos return s_req_query_string; 323 1.1 christos } 324 1.1 christos 325 1.1 christos switch (ch) { 326 1.1 christos case '?': 327 1.1 christos /* allow extra '?' in query string */ 328 1.6 christos return s_req_query_string; 329 1.1 christos 330 1.1 christos case '#': 331 1.6 christos return s_req_fragment_start; 332 1.1 christos } 333 1.1 christos 334 1.1 christos break; 335 1.1 christos 336 1.1 christos case s_req_fragment_start: 337 1.1 christos if (IS_URL_CHAR(ch)) { 338 1.6 christos return s_req_fragment; 339 1.1 christos } 340 1.1 christos 341 1.1 christos switch (ch) { 342 1.1 christos case '?': 343 1.6 christos return s_req_fragment; 344 1.1 christos 345 1.1 christos case '#': 346 1.6 christos return s; 347 1.1 christos } 348 1.1 christos 349 1.1 christos break; 350 1.1 christos 351 1.1 christos case s_req_fragment: 352 1.1 christos if (IS_URL_CHAR(ch)) { 353 1.6 christos return s; 354 1.1 christos } 355 1.1 christos 356 1.1 christos switch (ch) { 357 1.1 christos case '?': 358 1.1 christos case '#': 359 1.6 christos return s; 360 1.1 christos } 361 1.1 christos 362 1.1 christos break; 363 1.1 christos 364 1.1 christos default: 365 1.1 christos break; 366 1.1 christos } 367 1.1 christos 368 1.1 christos /* 369 1.1 christos * We should never fall out of the switch above unless there's an 370 1.1 christos * error. 371 1.1 christos */ 372 1.6 christos return s_dead; 373 1.1 christos } 374 1.1 christos 375 1.1 christos static host_state_t 376 1.1 christos http_parse_host_char(host_state_t s, const char ch) { 377 1.1 christos switch (s) { 378 1.1 christos case s_http_userinfo: 379 1.1 christos case s_http_userinfo_start: 380 1.1 christos if (ch == '@') { 381 1.6 christos return s_http_host_start; 382 1.1 christos } 383 1.1 christos 384 1.1 christos if (IS_USERINFO_CHAR(ch)) { 385 1.6 christos return s_http_userinfo; 386 1.1 christos } 387 1.1 christos break; 388 1.1 christos 389 1.1 christos case s_http_host_start: 390 1.1 christos if (ch == '[') { 391 1.6 christos return s_http_host_v6_start; 392 1.1 christos } 393 1.1 christos 394 1.1 christos if (IS_HOST_CHAR(ch)) { 395 1.6 christos return s_http_host; 396 1.1 christos } 397 1.1 christos 398 1.1 christos break; 399 1.1 christos 400 1.1 christos case s_http_host: 401 1.1 christos if (IS_HOST_CHAR(ch)) { 402 1.6 christos return s_http_host; 403 1.1 christos } 404 1.1 christos 405 1.3 christos FALLTHROUGH; 406 1.1 christos case s_http_host_v6_end: 407 1.1 christos if (ch == ':') { 408 1.6 christos return s_http_host_port_start; 409 1.1 christos } 410 1.1 christos 411 1.1 christos break; 412 1.1 christos 413 1.1 christos case s_http_host_v6: 414 1.1 christos if (ch == ']') { 415 1.6 christos return s_http_host_v6_end; 416 1.1 christos } 417 1.1 christos 418 1.3 christos FALLTHROUGH; 419 1.1 christos case s_http_host_v6_start: 420 1.1 christos if (isxdigit((unsigned char)ch) || ch == ':' || ch == '.') { 421 1.6 christos return s_http_host_v6; 422 1.1 christos } 423 1.1 christos 424 1.1 christos if (s == s_http_host_v6 && ch == '%') { 425 1.6 christos return s_http_host_v6_zone_start; 426 1.1 christos } 427 1.1 christos break; 428 1.1 christos 429 1.1 christos case s_http_host_v6_zone: 430 1.1 christos if (ch == ']') { 431 1.6 christos return s_http_host_v6_end; 432 1.1 christos } 433 1.1 christos 434 1.3 christos FALLTHROUGH; 435 1.1 christos case s_http_host_v6_zone_start: 436 1.1 christos /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 437 1.1 christos if (isalnum((unsigned char)ch) || ch == '%' || ch == '.' || 438 1.1 christos ch == '-' || ch == '_' || ch == '~') 439 1.1 christos { 440 1.6 christos return s_http_host_v6_zone; 441 1.1 christos } 442 1.1 christos break; 443 1.1 christos 444 1.1 christos case s_http_host_port: 445 1.1 christos case s_http_host_port_start: 446 1.1 christos if (isdigit((unsigned char)ch)) { 447 1.6 christos return s_http_host_port; 448 1.1 christos } 449 1.1 christos 450 1.1 christos break; 451 1.1 christos 452 1.1 christos default: 453 1.1 christos break; 454 1.1 christos } 455 1.1 christos 456 1.6 christos return s_http_host_dead; 457 1.1 christos } 458 1.1 christos 459 1.1 christos static isc_result_t 460 1.1 christos http_parse_host(const char *buf, isc_url_parser_t *up, int found_at) { 461 1.1 christos host_state_t s; 462 1.1 christos const char *p = NULL; 463 1.1 christos size_t buflen = up->field_data[ISC_UF_HOST].off + 464 1.1 christos up->field_data[ISC_UF_HOST].len; 465 1.1 christos 466 1.1 christos REQUIRE((up->field_set & (1 << ISC_UF_HOST)) != 0); 467 1.1 christos 468 1.1 christos up->field_data[ISC_UF_HOST].len = 0; 469 1.1 christos 470 1.1 christos s = found_at ? s_http_userinfo_start : s_http_host_start; 471 1.1 christos 472 1.1 christos for (p = buf + up->field_data[ISC_UF_HOST].off; p < buf + buflen; p++) { 473 1.1 christos host_state_t new_s = http_parse_host_char(s, *p); 474 1.1 christos 475 1.1 christos if (new_s == s_http_host_dead) { 476 1.6 christos return ISC_R_FAILURE; 477 1.1 christos } 478 1.1 christos 479 1.1 christos switch (new_s) { 480 1.1 christos case s_http_host: 481 1.1 christos if (s != s_http_host) { 482 1.1 christos up->field_data[ISC_UF_HOST].off = 483 1.1 christos (uint16_t)(p - buf); 484 1.1 christos } 485 1.1 christos up->field_data[ISC_UF_HOST].len++; 486 1.1 christos break; 487 1.1 christos 488 1.1 christos case s_http_host_v6: 489 1.1 christos if (s != s_http_host_v6) { 490 1.1 christos up->field_data[ISC_UF_HOST].off = 491 1.1 christos (uint16_t)(p - buf); 492 1.1 christos } 493 1.1 christos up->field_data[ISC_UF_HOST].len++; 494 1.1 christos break; 495 1.1 christos 496 1.1 christos case s_http_host_v6_zone_start: 497 1.1 christos case s_http_host_v6_zone: 498 1.1 christos up->field_data[ISC_UF_HOST].len++; 499 1.1 christos break; 500 1.1 christos 501 1.1 christos case s_http_host_port: 502 1.1 christos if (s != s_http_host_port) { 503 1.1 christos up->field_data[ISC_UF_PORT].off = 504 1.1 christos (uint16_t)(p - buf); 505 1.1 christos up->field_data[ISC_UF_PORT].len = 0; 506 1.1 christos up->field_set |= (1 << ISC_UF_PORT); 507 1.1 christos } 508 1.1 christos up->field_data[ISC_UF_PORT].len++; 509 1.1 christos break; 510 1.1 christos 511 1.1 christos case s_http_userinfo: 512 1.1 christos if (s != s_http_userinfo) { 513 1.1 christos up->field_data[ISC_UF_USERINFO].off = 514 1.1 christos (uint16_t)(p - buf); 515 1.1 christos up->field_data[ISC_UF_USERINFO].len = 0; 516 1.1 christos up->field_set |= (1 << ISC_UF_USERINFO); 517 1.1 christos } 518 1.1 christos up->field_data[ISC_UF_USERINFO].len++; 519 1.1 christos break; 520 1.1 christos 521 1.1 christos default: 522 1.1 christos break; 523 1.1 christos } 524 1.1 christos 525 1.1 christos s = new_s; 526 1.1 christos } 527 1.1 christos 528 1.1 christos /* Make sure we don't end somewhere unexpected */ 529 1.1 christos switch (s) { 530 1.1 christos case s_http_host_start: 531 1.1 christos case s_http_host_v6_start: 532 1.1 christos case s_http_host_v6: 533 1.1 christos case s_http_host_v6_zone_start: 534 1.1 christos case s_http_host_v6_zone: 535 1.1 christos case s_http_host_port_start: 536 1.1 christos case s_http_userinfo: 537 1.1 christos case s_http_userinfo_start: 538 1.6 christos return ISC_R_FAILURE; 539 1.1 christos default: 540 1.1 christos break; 541 1.1 christos } 542 1.1 christos 543 1.6 christos return ISC_R_SUCCESS; 544 1.1 christos } 545 1.1 christos 546 1.1 christos isc_result_t 547 1.1 christos isc_url_parse(const char *buf, size_t buflen, bool is_connect, 548 1.1 christos isc_url_parser_t *up) { 549 1.1 christos state_t s; 550 1.1 christos isc_url_field_t uf, old_uf; 551 1.1 christos int found_at = 0; 552 1.1 christos const char *p = NULL; 553 1.1 christos 554 1.1 christos if (buflen == 0) { 555 1.6 christos return ISC_R_FAILURE; 556 1.1 christos } 557 1.1 christos 558 1.1 christos up->port = up->field_set = 0; 559 1.1 christos s = is_connect ? s_req_server_start : s_req_spaces_before_url; 560 1.1 christos old_uf = ISC_UF_MAX; 561 1.1 christos 562 1.1 christos for (p = buf; p < buf + buflen; p++) { 563 1.1 christos s = parse_url_char(s, *p); 564 1.1 christos 565 1.1 christos /* Figure out the next field that we're operating on */ 566 1.1 christos switch (s) { 567 1.1 christos case s_dead: 568 1.6 christos return ISC_R_FAILURE; 569 1.1 christos 570 1.1 christos /* Skip delimiters */ 571 1.1 christos case s_req_schema_slash: 572 1.1 christos case s_req_schema_slash_slash: 573 1.1 christos case s_req_server_start: 574 1.1 christos case s_req_query_string_start: 575 1.1 christos case s_req_fragment_start: 576 1.1 christos continue; 577 1.1 christos 578 1.1 christos case s_req_schema: 579 1.1 christos uf = ISC_UF_SCHEMA; 580 1.1 christos break; 581 1.1 christos 582 1.1 christos case s_req_server_with_at: 583 1.1 christos found_at = 1; 584 1.3 christos FALLTHROUGH; 585 1.1 christos case s_req_server: 586 1.1 christos uf = ISC_UF_HOST; 587 1.1 christos break; 588 1.1 christos 589 1.1 christos case s_req_path: 590 1.1 christos uf = ISC_UF_PATH; 591 1.1 christos break; 592 1.1 christos 593 1.1 christos case s_req_query_string: 594 1.1 christos uf = ISC_UF_QUERY; 595 1.1 christos break; 596 1.1 christos 597 1.1 christos case s_req_fragment: 598 1.1 christos uf = ISC_UF_FRAGMENT; 599 1.1 christos break; 600 1.1 christos 601 1.1 christos default: 602 1.3 christos UNREACHABLE(); 603 1.1 christos } 604 1.1 christos 605 1.1 christos /* Nothing's changed; soldier on */ 606 1.1 christos if (uf == old_uf) { 607 1.1 christos up->field_data[uf].len++; 608 1.1 christos continue; 609 1.1 christos } 610 1.1 christos 611 1.1 christos up->field_data[uf].off = (uint16_t)(p - buf); 612 1.1 christos up->field_data[uf].len = 1; 613 1.1 christos 614 1.1 christos up->field_set |= (1 << uf); 615 1.1 christos old_uf = uf; 616 1.1 christos } 617 1.1 christos 618 1.1 christos /* host must be present if there is a schema */ 619 1.1 christos /* parsing http:///toto will fail */ 620 1.1 christos if ((up->field_set & (1 << ISC_UF_SCHEMA)) && 621 1.1 christos (up->field_set & (1 << ISC_UF_HOST)) == 0) 622 1.1 christos { 623 1.6 christos return ISC_R_FAILURE; 624 1.1 christos } 625 1.1 christos 626 1.1 christos if (up->field_set & (1 << ISC_UF_HOST)) { 627 1.1 christos isc_result_t result; 628 1.1 christos 629 1.1 christos result = http_parse_host(buf, up, found_at); 630 1.1 christos if (result != ISC_R_SUCCESS) { 631 1.6 christos return result; 632 1.1 christos } 633 1.1 christos } 634 1.1 christos 635 1.1 christos /* CONNECT requests can only contain "hostname:port" */ 636 1.1 christos if (is_connect && 637 1.4 christos up->field_set != ((1 << ISC_UF_HOST) | (1 << ISC_UF_PORT))) 638 1.4 christos { 639 1.6 christos return ISC_R_FAILURE; 640 1.1 christos } 641 1.1 christos 642 1.1 christos if (up->field_set & (1 << ISC_UF_PORT)) { 643 1.1 christos uint16_t off; 644 1.1 christos uint16_t len; 645 1.1 christos const char *pp = NULL; 646 1.1 christos const char *end = NULL; 647 1.1 christos unsigned long v; 648 1.1 christos 649 1.1 christos off = up->field_data[ISC_UF_PORT].off; 650 1.1 christos len = up->field_data[ISC_UF_PORT].len; 651 1.1 christos end = buf + off + len; 652 1.1 christos 653 1.1 christos /* 654 1.1 christos * NOTE: The characters are already validated and are in the 655 1.1 christos * [0-9] range 656 1.1 christos */ 657 1.1 christos INSIST(off + len <= buflen); 658 1.1 christos 659 1.1 christos v = 0; 660 1.1 christos for (pp = buf + off; pp < end; pp++) { 661 1.1 christos v *= 10; 662 1.1 christos v += *pp - '0'; 663 1.1 christos 664 1.1 christos /* Ports have a max value of 2^16 */ 665 1.1 christos if (v > 0xffff) { 666 1.6 christos return ISC_R_RANGE; 667 1.1 christos } 668 1.1 christos } 669 1.1 christos 670 1.1 christos up->port = (uint16_t)v; 671 1.1 christos } 672 1.1 christos 673 1.6 christos return ISC_R_SUCCESS; 674 1.1 christos } 675