diff options
author | Rob Landley <rob@landley.net> | 2022-04-08 03:59:13 -0500 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2022-04-08 03:59:13 -0500 |
commit | 418405e22175580f07cbad1298d3a7321cd8543c (patch) | |
tree | 7a7aec54448a761a6e55cc662bfc733067aabb47 | |
parent | 965fabbe2c3d24972301426f9f63919ef87ffb12 (diff) | |
download | toybox-418405e22175580f07cbad1298d3a7321cd8543c.tar.gz |
Cleanup pass on wget.
TT.postdata -> TT.p, replace both WGET_IS_HTTP macros with a test in
wget_info() setting TT.https (although still want an HTTP macro to include
the WGET_SSL test because I don't trust the compiler to do dead code
elimination based on a global in a union), inline wget_redirect() and
wget_filename() at their only caller, use xmprintf() for request rather
than copying user-defined strings into toybuf without bounds checking,
try not to assume other side sent us good data.
-rw-r--r-- | toys/pending/wget.c | 194 |
1 files changed, 79 insertions, 115 deletions
diff --git a/toys/pending/wget.c b/toys/pending/wget.c index 03f98754..01291f66 100644 --- a/toys/pending/wget.c +++ b/toys/pending/wget.c @@ -71,17 +71,14 @@ config WGET_OPENSSL #else #define WGET_SSL 0 #endif +#define HTTPS (WGET_SSL && TT.https) -#define WGET_IS_HTTP (strncmp(TT.url, "http://", 7) == 0) -#define WGET_IS_HTTPS (WGET_SSL && (strncmp(TT.url, "https://", 8) == 0)) - GLOBALS( - char *postdata; - char *O; + char *p, *O; long max_redirect; - int sock; + int sock, https; char *url; #if CFG_WGET_LIBTLS struct tls *tls; @@ -94,37 +91,33 @@ GLOBALS( // get http info in URL static void wget_info(char *url, char **host, char **port, char **path) { - *host = strafter(url, "://"); - *path = strchr(*host, '/'); - - if ((*path = strchr(*host, '/'))) { - **path = '\0'; - *path = *path + 1; - } else { - *path = ""; + char *ss = url; + + // Must start with case insensitive http:// or https:// + if (strncmp(url, "http", 4)) url = 0; + else { + url += 4; + if ((TT.https = WGET_SSL && toupper(*url=='s'))) url++; + if (!strstart(&url, "://")) url = 0; } + if (!url) error_exit("unsupported protocol: %s", ss); - if ( *host[0] == '[' && strchr(*host, ']') ) { // IPv6 - *port = strafter(*host, "]:"); - *host = *host + 1; - strchr(*host, ']')[0] = '\0'; - } else { // IPv4 - if ((*port = strchr(*host, ':'))) { - **port = '\0'; - *port = *port + 1; - } - } + if ((*path = strchr(*host = url, '/'))) *(*path++) = 0; + else *path = ""; - if (!*port && WGET_IS_HTTP) *port = "80"; - else if (!*port && WGET_IS_HTTPS) *port = "443"; - else if (!*port) error_exit("unsupported protocol"); + // Get port number and trim literal IPv6 addresses + if (**host=='[' && (ss = strchr(++*host, ']'))) { + *ss++ = 0; + *port = (*ss==':') ? ++ss : 0; + } else if ((*port = strchr(*host, ':'))) *(*port++) = 0; + if (!*port) *port = HTTPS ? "443" : "80"; } static void wget_connect(char *host, char *port) { - if (WGET_IS_HTTP) + if (!HTTPS) TT.sock = xconnectany(xgetaddrinfo(host, port, AF_UNSPEC, SOCK_STREAM, 0, 0)); - else if (WGET_IS_HTTPS) { + else { #if CFG_WGET_LIBTLS struct tls_config *cfg = NULL; uint32_t protocols; @@ -166,41 +159,43 @@ static void wget_connect(char *host, char *port) error_exit("SSL_set_fd: %s", ERR_error_string(ERR_get_error(), NULL)); if (FLAG(d)) printf("TLS: %s\n", SSL_get_cipher(TT.ssl)); -#else - error_exit("unsupported protocol"); #endif - } else error_exit("unsupported protocol"); + } } static size_t wget_read(void *buf, size_t len) { - if (WGET_IS_HTTP) return xread(TT.sock, buf, len); - else if (WGET_IS_HTTPS) { + if (!HTTPS) return xread(TT.sock, buf, len); + else { + char *err = 0; + int ret; + #if CFG_WGET_LIBTLS - ssize_t ret = tls_read(TT.tls, buf, len); - if (ret < 0) error_exit("tls_read: %s", tls_error(TT.tls)); - return ret; + if ((ret = tls_read(TT.tls, buf, len))<0) err = tls_error(TT.tls); #elif CFG_WGET_OPENSSL - int ret = SSL_read(TT.ssl, buf, (int) len); - if (ret < 0) - error_exit("SSL_read: %s", ERR_error_string(ERR_get_error(), NULL)); - return ret; + if ((ret = SSL_read(TT.ssl, buf, len))<0) + err = ERR_error_string(ERR_get_error(), 0); #endif - } else error_exit("unsupported protocol"); + if (err) error_exit("https read: %s", err); + + return ret; + } } static void wget_write(void *buf, size_t len) { - if (WGET_IS_HTTP) xwrite(TT.sock, buf, len); - else if (WGET_IS_HTTPS) { + if (!HTTPS) xwrite(TT.sock, buf, len); + else { + char *err = 0; + #if CFG_WGET_LIBTLS - if (len != tls_write(TT.tls, buf, len)) - error_exit("tls_write: %s", tls_error(TT.tls)); + if (len != tls_write(TT.tls, buf, len)) err = tls_error(TT.tls); #elif CFG_WGET_OPENSSL - if (len != SSL_write(TT.ssl, buf, (int) len)) - error_exit("SSL_write: %s", ERR_error_string(ERR_get_error(), NULL)); + if (len != SSL_write(TT.ssl, buf, len)) + err = ERR_error_string(ERR_get_error(), 0); #endif - } else error_exit("unsupported protocol"); + if (err) error_exit("https write: %s", err); + } } static void wget_close() @@ -214,18 +209,18 @@ static void wget_close() if (TT.tls) { tls_close(TT.tls); tls_free(TT.tls); - TT.tls = NULL; + TT.tls = 0; } #elif CFG_WGET_OPENSSL if (TT.ssl) { SSL_shutdown(TT.ssl); SSL_free(TT.ssl); - TT.ssl = NULL; + TT.ssl = 0; } if (TT.ctx) { SSL_CTX_free(TT.ctx); - TT.ctx = NULL; + TT.ctx = 0; } #endif } @@ -237,32 +232,12 @@ static char *wget_find_header(char *header, char *val) return result ? result + strlen(val) : 0; } -static char *wget_redirect(char *header) -{ - char *redir = wget_find_header(header, "Location: "); - - if (!redir) error_exit("could not parse redirect URL"); - - return xstrdup(redir); -} - -static char *wget_filename(char *header, char *path) -{ - char *f = wget_find_header(header, - "Content-Disposition: attachment; filename="); - - if (!f && strchr(path, '/')) f = getbasename(path); - if (!f || !*f ) f = "index.html"; - - return f; -} - void wget_main(void) { long status = 0; size_t len, c_len = 0; int fd; - char *body, *index, *host, *port, *path, *chunked; + char *body, *index, *host, *port, *path, *chunked, *ss; char agent[] = "toybox wget/" TOYBOX_VERSION; TT.url = xstrdup(toys.optargs[0]); @@ -270,67 +245,56 @@ void wget_main(void) while (status != 200) { if (!TT.max_redirect--) error_exit("Too many redirects"); + // Connect and write request wget_info(TT.url, &host, &port, &path); - if (!FLAG(p)) { - sprintf(toybuf, "GET /%s HTTP/1.1\r\nHost: %s\r\n" - "User-Agent: %s\r\nConnection: close\r\n\r\n", - path, host, agent); - } else { - sprintf(toybuf, "POST /%s HTTP/1.1\r\nHost: %s\r\n" - "User-Agent: %s\r\nConnection: close\r\n" - "Content-Length: %ld\r\n\r\n" - "%s", - path, host, agent, strlen(TT.postdata), TT.postdata); - } - if (FLAG(d)) printf("--- Request\n%s", toybuf); - + if (TT.p) sprintf(toybuf, "Content-Length: %ld\r\n", strlen(TT.p)); + ss = xmprintf("%s /%s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n" + "Connection: close\r\n%s\r\n%s", FLAG(p) ? "POST" : "GET", + path, host, agent, FLAG(p) ? toybuf : "", FLAG(p)?TT.p:""); + if (FLAG(d)) printf("--- Request\n%s", ss); wget_connect(host, port); - wget_write(toybuf, strlen(toybuf)); - - // Greedily read the HTTP response until either complete or toybuf is full - index = toybuf; - while ((len = wget_read(index, sizeof(toybuf) - (index - toybuf))) > 0) - index += len; - - //Process the response such that - // Valid ranges toybuf[0...index) valid length is (index - toybuf) - // Header ranges toybuf[0...body) header length strlen(toybuf) - // Remnant Body toybuf[body...index) valid remnant body length is len - // - // Per RFC7230 the header cannot contain a NUL octet so we NUL terminate at - // the footer of the header. This allows for normal string functions to be - // used when processing the header. - body = memmem(toybuf, index - toybuf, "\r\n\r\n", 4); - if (!body) error_exit("response header too large"); - body[0] = '\0'; // NUL terminate the headers - body += 4; // Skip to the head of body - len = index - body; // Adjust len to be body length + wget_write(ss, strlen(ss)); + free(ss); + + // Read HTTP response until either complete or toybuf is full + for (index = toybuf; + (len = wget_read(index, sizeof(toybuf)-(index-toybuf)))>0; index += len); + + // Split response into header and body, and null terminate header. + // (RFC7230 says header cannot contain NUL.) + if (!(body = memmem(toybuf, index-toybuf, "\r\n\r\n", 4))) + error_exit("response header too large"); + *body = 0; + body += 4; + len = index-body; if (FLAG(d)) printf("--- Response\n%s\n\n", toybuf); status = strtol(strafter(toybuf, " "), NULL, 10); if ((status == 301) || (status == 302)) { + if (!(ss = wget_find_header(toybuf, "Location: "))) + error_exit("bad redirect"); free(TT.url); - TT.url = wget_redirect(toybuf); + TT.url = xstrdup(ss); wget_close(); } else if (status != 200) error_exit("response: %ld", status); } if (!FLAG(O)) { - TT.O = wget_filename(toybuf, path); - if (!access(TT.O, F_OK)) error_exit("%s already exists", TT.O); + ss = wget_find_header(toybuf, "Content-Disposition: attachment; filename="); + if (!ss && strchr(path, '/')) ss = getbasename(path); + if (!ss || !*ss ) ss = "index.html"; + if (!access((TT.O = ss), F_OK)) error_exit("%s already exists", TT.O); } + // TODO: don't allow header/basename to write to stdout fd = !strcmp(TT.O, "-") ? 1 : xcreate(TT.O, (O_WRONLY|O_CREAT|O_TRUNC), 0644); - chunked = wget_find_header(toybuf, "transfer-encoding: chunked"); - // If chunked we offset the first buffer by 2 character, meaning it is // pointing at half of the header boundary, aka '\r\n'. This simplifies // parsing of the first c_len length by allowing the do while loop to fall // through on the first iteration and parse the first c_len size. - if (chunked) { - len = len + 2; - memmove(toybuf, body - 2, len); - } else memmove(toybuf, body, len); + chunked = wget_find_header(toybuf, "transfer-encoding: chunked"); + if (chunked) memmove(toybuf, body-2, len += 2); + else memmove(toybuf, body, len); // len is the size remaining in toybuf // c_len is the size of the remaining bytes in the current chunk |