aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2022-04-08 03:59:13 -0500
committerRob Landley <rob@landley.net>2022-04-08 03:59:13 -0500
commit418405e22175580f07cbad1298d3a7321cd8543c (patch)
tree7a7aec54448a761a6e55cc662bfc733067aabb47
parent965fabbe2c3d24972301426f9f63919ef87ffb12 (diff)
downloadtoybox-418405e22175580f07cbad1298d3a7321cd8543c.tar.gz
Cleanup pass on wget.
TT.postdata -> TT.p, replace both WGET_IS_HTTP macros with a test in wget_info() setting TT.https (although still want an HTTP macro to include the WGET_SSL test because I don't trust the compiler to do dead code elimination based on a global in a union), inline wget_redirect() and wget_filename() at their only caller, use xmprintf() for request rather than copying user-defined strings into toybuf without bounds checking, try not to assume other side sent us good data.
-rw-r--r--toys/pending/wget.c194
1 files changed, 79 insertions, 115 deletions
diff --git a/toys/pending/wget.c b/toys/pending/wget.c
index 03f98754..01291f66 100644
--- a/toys/pending/wget.c
+++ b/toys/pending/wget.c
@@ -71,17 +71,14 @@ config WGET_OPENSSL
#else
#define WGET_SSL 0
#endif
+#define HTTPS (WGET_SSL && TT.https)
-#define WGET_IS_HTTP (strncmp(TT.url, "http://", 7) == 0)
-#define WGET_IS_HTTPS (WGET_SSL && (strncmp(TT.url, "https://", 8) == 0))
-
GLOBALS(
- char *postdata;
- char *O;
+ char *p, *O;
long max_redirect;
- int sock;
+ int sock, https;
char *url;
#if CFG_WGET_LIBTLS
struct tls *tls;
@@ -94,37 +91,33 @@ GLOBALS(
// get http info in URL
static void wget_info(char *url, char **host, char **port, char **path)
{
- *host = strafter(url, "://");
- *path = strchr(*host, '/');
-
- if ((*path = strchr(*host, '/'))) {
- **path = '\0';
- *path = *path + 1;
- } else {
- *path = "";
+ char *ss = url;
+
+ // Must start with case insensitive http:// or https://
+ if (strncmp(url, "http", 4)) url = 0;
+ else {
+ url += 4;
+ if ((TT.https = WGET_SSL && toupper(*url=='s'))) url++;
+ if (!strstart(&url, "://")) url = 0;
}
+ if (!url) error_exit("unsupported protocol: %s", ss);
- if ( *host[0] == '[' && strchr(*host, ']') ) { // IPv6
- *port = strafter(*host, "]:");
- *host = *host + 1;
- strchr(*host, ']')[0] = '\0';
- } else { // IPv4
- if ((*port = strchr(*host, ':'))) {
- **port = '\0';
- *port = *port + 1;
- }
- }
+ if ((*path = strchr(*host = url, '/'))) *(*path++) = 0;
+ else *path = "";
- if (!*port && WGET_IS_HTTP) *port = "80";
- else if (!*port && WGET_IS_HTTPS) *port = "443";
- else if (!*port) error_exit("unsupported protocol");
+ // Get port number and trim literal IPv6 addresses
+ if (**host=='[' && (ss = strchr(++*host, ']'))) {
+ *ss++ = 0;
+ *port = (*ss==':') ? ++ss : 0;
+ } else if ((*port = strchr(*host, ':'))) *(*port++) = 0;
+ if (!*port) *port = HTTPS ? "443" : "80";
}
static void wget_connect(char *host, char *port)
{
- if (WGET_IS_HTTP)
+ if (!HTTPS)
TT.sock = xconnectany(xgetaddrinfo(host, port, AF_UNSPEC, SOCK_STREAM, 0, 0));
- else if (WGET_IS_HTTPS) {
+ else {
#if CFG_WGET_LIBTLS
struct tls_config *cfg = NULL;
uint32_t protocols;
@@ -166,41 +159,43 @@ static void wget_connect(char *host, char *port)
error_exit("SSL_set_fd: %s", ERR_error_string(ERR_get_error(), NULL));
if (FLAG(d)) printf("TLS: %s\n", SSL_get_cipher(TT.ssl));
-#else
- error_exit("unsupported protocol");
#endif
- } else error_exit("unsupported protocol");
+ }
}
static size_t wget_read(void *buf, size_t len)
{
- if (WGET_IS_HTTP) return xread(TT.sock, buf, len);
- else if (WGET_IS_HTTPS) {
+ if (!HTTPS) return xread(TT.sock, buf, len);
+ else {
+ char *err = 0;
+ int ret;
+
#if CFG_WGET_LIBTLS
- ssize_t ret = tls_read(TT.tls, buf, len);
- if (ret < 0) error_exit("tls_read: %s", tls_error(TT.tls));
- return ret;
+ if ((ret = tls_read(TT.tls, buf, len))<0) err = tls_error(TT.tls);
#elif CFG_WGET_OPENSSL
- int ret = SSL_read(TT.ssl, buf, (int) len);
- if (ret < 0)
- error_exit("SSL_read: %s", ERR_error_string(ERR_get_error(), NULL));
- return ret;
+ if ((ret = SSL_read(TT.ssl, buf, len))<0)
+ err = ERR_error_string(ERR_get_error(), 0);
#endif
- } else error_exit("unsupported protocol");
+ if (err) error_exit("https read: %s", err);
+
+ return ret;
+ }
}
static void wget_write(void *buf, size_t len)
{
- if (WGET_IS_HTTP) xwrite(TT.sock, buf, len);
- else if (WGET_IS_HTTPS) {
+ if (!HTTPS) xwrite(TT.sock, buf, len);
+ else {
+ char *err = 0;
+
#if CFG_WGET_LIBTLS
- if (len != tls_write(TT.tls, buf, len))
- error_exit("tls_write: %s", tls_error(TT.tls));
+ if (len != tls_write(TT.tls, buf, len)) err = tls_error(TT.tls);
#elif CFG_WGET_OPENSSL
- if (len != SSL_write(TT.ssl, buf, (int) len))
- error_exit("SSL_write: %s", ERR_error_string(ERR_get_error(), NULL));
+ if (len != SSL_write(TT.ssl, buf, len))
+ err = ERR_error_string(ERR_get_error(), 0);
#endif
- } else error_exit("unsupported protocol");
+ if (err) error_exit("https write: %s", err);
+ }
}
static void wget_close()
@@ -214,18 +209,18 @@ static void wget_close()
if (TT.tls) {
tls_close(TT.tls);
tls_free(TT.tls);
- TT.tls = NULL;
+ TT.tls = 0;
}
#elif CFG_WGET_OPENSSL
if (TT.ssl) {
SSL_shutdown(TT.ssl);
SSL_free(TT.ssl);
- TT.ssl = NULL;
+ TT.ssl = 0;
}
if (TT.ctx) {
SSL_CTX_free(TT.ctx);
- TT.ctx = NULL;
+ TT.ctx = 0;
}
#endif
}
@@ -237,32 +232,12 @@ static char *wget_find_header(char *header, char *val)
return result ? result + strlen(val) : 0;
}
-static char *wget_redirect(char *header)
-{
- char *redir = wget_find_header(header, "Location: ");
-
- if (!redir) error_exit("could not parse redirect URL");
-
- return xstrdup(redir);
-}
-
-static char *wget_filename(char *header, char *path)
-{
- char *f = wget_find_header(header,
- "Content-Disposition: attachment; filename=");
-
- if (!f && strchr(path, '/')) f = getbasename(path);
- if (!f || !*f ) f = "index.html";
-
- return f;
-}
-
void wget_main(void)
{
long status = 0;
size_t len, c_len = 0;
int fd;
- char *body, *index, *host, *port, *path, *chunked;
+ char *body, *index, *host, *port, *path, *chunked, *ss;
char agent[] = "toybox wget/" TOYBOX_VERSION;
TT.url = xstrdup(toys.optargs[0]);
@@ -270,67 +245,56 @@ void wget_main(void)
while (status != 200) {
if (!TT.max_redirect--) error_exit("Too many redirects");
+ // Connect and write request
wget_info(TT.url, &host, &port, &path);
- if (!FLAG(p)) {
- sprintf(toybuf, "GET /%s HTTP/1.1\r\nHost: %s\r\n"
- "User-Agent: %s\r\nConnection: close\r\n\r\n",
- path, host, agent);
- } else {
- sprintf(toybuf, "POST /%s HTTP/1.1\r\nHost: %s\r\n"
- "User-Agent: %s\r\nConnection: close\r\n"
- "Content-Length: %ld\r\n\r\n"
- "%s",
- path, host, agent, strlen(TT.postdata), TT.postdata);
- }
- if (FLAG(d)) printf("--- Request\n%s", toybuf);
-
+ if (TT.p) sprintf(toybuf, "Content-Length: %ld\r\n", strlen(TT.p));
+ ss = xmprintf("%s /%s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n"
+ "Connection: close\r\n%s\r\n%s", FLAG(p) ? "POST" : "GET",
+ path, host, agent, FLAG(p) ? toybuf : "", FLAG(p)?TT.p:"");
+ if (FLAG(d)) printf("--- Request\n%s", ss);
wget_connect(host, port);
- wget_write(toybuf, strlen(toybuf));
-
- // Greedily read the HTTP response until either complete or toybuf is full
- index = toybuf;
- while ((len = wget_read(index, sizeof(toybuf) - (index - toybuf))) > 0)
- index += len;
-
- //Process the response such that
- // Valid ranges toybuf[0...index) valid length is (index - toybuf)
- // Header ranges toybuf[0...body) header length strlen(toybuf)
- // Remnant Body toybuf[body...index) valid remnant body length is len
- //
- // Per RFC7230 the header cannot contain a NUL octet so we NUL terminate at
- // the footer of the header. This allows for normal string functions to be
- // used when processing the header.
- body = memmem(toybuf, index - toybuf, "\r\n\r\n", 4);
- if (!body) error_exit("response header too large");
- body[0] = '\0'; // NUL terminate the headers
- body += 4; // Skip to the head of body
- len = index - body; // Adjust len to be body length
+ wget_write(ss, strlen(ss));
+ free(ss);
+
+ // Read HTTP response until either complete or toybuf is full
+ for (index = toybuf;
+ (len = wget_read(index, sizeof(toybuf)-(index-toybuf)))>0; index += len);
+
+ // Split response into header and body, and null terminate header.
+ // (RFC7230 says header cannot contain NUL.)
+ if (!(body = memmem(toybuf, index-toybuf, "\r\n\r\n", 4)))
+ error_exit("response header too large");
+ *body = 0;
+ body += 4;
+ len = index-body;
if (FLAG(d)) printf("--- Response\n%s\n\n", toybuf);
status = strtol(strafter(toybuf, " "), NULL, 10);
if ((status == 301) || (status == 302)) {
+ if (!(ss = wget_find_header(toybuf, "Location: ")))
+ error_exit("bad redirect");
free(TT.url);
- TT.url = wget_redirect(toybuf);
+ TT.url = xstrdup(ss);
wget_close();
} else if (status != 200) error_exit("response: %ld", status);
}
if (!FLAG(O)) {
- TT.O = wget_filename(toybuf, path);
- if (!access(TT.O, F_OK)) error_exit("%s already exists", TT.O);
+ ss = wget_find_header(toybuf, "Content-Disposition: attachment; filename=");
+ if (!ss && strchr(path, '/')) ss = getbasename(path);
+ if (!ss || !*ss ) ss = "index.html";
+ if (!access((TT.O = ss), F_OK)) error_exit("%s already exists", TT.O);
}
+ // TODO: don't allow header/basename to write to stdout
fd = !strcmp(TT.O, "-") ? 1 : xcreate(TT.O, (O_WRONLY|O_CREAT|O_TRUNC), 0644);
- chunked = wget_find_header(toybuf, "transfer-encoding: chunked");
-
// If chunked we offset the first buffer by 2 character, meaning it is
// pointing at half of the header boundary, aka '\r\n'. This simplifies
// parsing of the first c_len length by allowing the do while loop to fall
// through on the first iteration and parse the first c_len size.
- if (chunked) {
- len = len + 2;
- memmove(toybuf, body - 2, len);
- } else memmove(toybuf, body, len);
+ chunked = wget_find_header(toybuf, "transfer-encoding: chunked");
+ if (chunked) memmove(toybuf, body-2, len += 2);
+ else memmove(toybuf, body, len);
// len is the size remaining in toybuf
// c_len is the size of the remaining bytes in the current chunk