Cleanup pass on wget.

TT.postdata -> TT.p, replace both WGET_IS_HTTP macros with a test in wget_info() setting TT.https (although still want an HTTP macro to include the WGET_SSL test because I don't trust the compiler to do dead code elimination based on a global in a union), inline wget_redirect() and wget_filename() at their only caller, use xmprintf() for request rather than copying user-defined strings into toybuf without bounds checking, try not to assume other side sent us good data.
author: Rob Landley <rob@landley.net> 2022-04-08 03:59:13 -0500
committer: Rob Landley <rob@landley.net> 2022-04-08 03:59:13 -0500
commit: 418405e22175580f07cbad1298d3a7321cd8543c (patch)
tree: 7a7aec54448a761a6e55cc662bfc733067aabb47
parent: 965fabbe2c3d24972301426f9f63919ef87ffb12 (diff)
download: toybox-418405e22175580f07cbad1298d3a7321cd8543c.tar.gz
1 files changed, 79 insertions, 115 deletions
diff --git a/toys/pending/wget.c b/toys/pending/wget.c
index 03f98754..01291f66 100644
--- a/toys/pending/wget.c
+++ b/toys/pending/wget.c
@@ -71,17 +71,14 @@ config WGET_OPENSSL
 #else
 #define WGET_SSL 0
 #endif
+#define HTTPS (WGET_SSL && TT.https)
 
 
-#define WGET_IS_HTTP  (strncmp(TT.url, "http://", 7) == 0)
-#define WGET_IS_HTTPS (WGET_SSL && (strncmp(TT.url, "https://", 8) == 0))
-
 GLOBALS(
-  char *postdata;
-  char *O;
+  char *p, *O;
   long max_redirect;
 
-  int sock;
+  int sock, https;
   char *url;
 #if CFG_WGET_LIBTLS
   struct tls *tls;
@@ -94,37 +91,33 @@ GLOBALS(
 // get http info in URL
 static void wget_info(char *url, char **host, char **port, char **path)
 {
-  *host = strafter(url, "://");
-  *path = strchr(*host, '/');
-
-  if ((*path = strchr(*host, '/'))) {
-    **path = '\0';
-    *path = *path + 1;
-  } else {
-    *path = "";
+  char *ss = url;
+
+  // Must start with case insensitive http:// or https://
+  if (strncmp(url, "http", 4)) url = 0;
+  else {
+    url += 4;
+    if ((TT.https = WGET_SSL && toupper(*url=='s'))) url++;
+    if (!strstart(&url, "://")) url = 0;
   }
+  if (!url) error_exit("unsupported protocol: %s", ss);
 
-  if ( *host[0] == '[' && strchr(*host, ']') ) { // IPv6
-    *port = strafter(*host, "]:");
-    *host = *host + 1;
-    strchr(*host, ']')[0] = '\0';
-  } else { // IPv4
-    if ((*port = strchr(*host, ':'))) {
-      **port = '\0';
-      *port = *port + 1;
-    }
-  }
+  if ((*path = strchr(*host = url, '/'))) *(*path++) = 0;
+  else *path = "";
 
-  if (!*port && WGET_IS_HTTP) *port = "80";
-  else if (!*port && WGET_IS_HTTPS) *port = "443";
-  else if (!*port) error_exit("unsupported protocol");
+  // Get port number and trim literal IPv6 addresses
+  if (**host=='[' && (ss = strchr(++*host, ']'))) {
+    *ss++ = 0;
+    *port = (*ss==':') ? ++ss : 0;
+  } else if ((*port = strchr(*host, ':'))) *(*port++) = 0;
+  if (!*port) *port = HTTPS ? "443" : "80";
 }
 
 static void wget_connect(char *host, char *port)
 {
-  if (WGET_IS_HTTP)
+  if (!HTTPS)
     TT.sock = xconnectany(xgetaddrinfo(host, port, AF_UNSPEC, SOCK_STREAM, 0, 0));
-  else if (WGET_IS_HTTPS) {
+  else {
 #if CFG_WGET_LIBTLS
     struct tls_config *cfg = NULL;
     uint32_t protocols;
@@ -166,41 +159,43 @@ static void wget_connect(char *host, char *port)
       error_exit("SSL_set_fd: %s", ERR_error_string(ERR_get_error(), NULL));
 
     if (FLAG(d)) printf("TLS: %s\n", SSL_get_cipher(TT.ssl));
-#else
-    error_exit("unsupported protocol");
 #endif
-  } else error_exit("unsupported protocol");
+  }
 }
 
 static size_t wget_read(void *buf, size_t len)
 {
-  if (WGET_IS_HTTP) return xread(TT.sock, buf, len);
-  else if (WGET_IS_HTTPS) {
+  if (!HTTPS) return xread(TT.sock, buf, len);
+  else {
+    char *err = 0;
+    int ret;
+
 #if CFG_WGET_LIBTLS
-   ssize_t ret = tls_read(TT.tls, buf, len);
-   if (ret < 0) error_exit("tls_read: %s", tls_error(TT.tls));
-   return ret;
+    if ((ret = tls_read(TT.tls, buf, len))<0) err = tls_error(TT.tls);
 #elif CFG_WGET_OPENSSL
-   int ret = SSL_read(TT.ssl, buf, (int) len);
-   if (ret < 0)
-     error_exit("SSL_read: %s", ERR_error_string(ERR_get_error(), NULL));
-   return ret;
+    if ((ret = SSL_read(TT.ssl, buf, len))<0)
+      err = ERR_error_string(ERR_get_error(), 0);
 #endif
-  } else error_exit("unsupported protocol");
+    if (err) error_exit("https read: %s", err);
+
+    return ret;
+  }
 }
 
 static void wget_write(void *buf, size_t len)
 {
-  if (WGET_IS_HTTP) xwrite(TT.sock, buf, len);
-  else if (WGET_IS_HTTPS) {
+  if (!HTTPS) xwrite(TT.sock, buf, len);
+  else {
+    char *err = 0;
+
 #if CFG_WGET_LIBTLS
-    if (len != tls_write(TT.tls, buf, len))
-      error_exit("tls_write: %s", tls_error(TT.tls));
+    if (len != tls_write(TT.tls, buf, len)) err = tls_error(TT.tls);
 #elif CFG_WGET_OPENSSL
-    if (len != SSL_write(TT.ssl, buf, (int) len))
-      error_exit("SSL_write: %s", ERR_error_string(ERR_get_error(), NULL));
+    if (len != SSL_write(TT.ssl, buf, len))
+      err = ERR_error_string(ERR_get_error(), 0);
 #endif
-  } else error_exit("unsupported protocol");
+    if (err) error_exit("https write: %s", err);
+  }
 }
 
 static void wget_close()
@@ -214,18 +209,18 @@ static void wget_close()
   if (TT.tls) {
     tls_close(TT.tls);
     tls_free(TT.tls);
-    TT.tls = NULL;
+    TT.tls = 0;
   }
 #elif CFG_WGET_OPENSSL
   if (TT.ssl) {
     SSL_shutdown(TT.ssl);
     SSL_free(TT.ssl);
-    TT.ssl = NULL;
+    TT.ssl = 0;
   }
 
   if (TT.ctx) {
     SSL_CTX_free(TT.ctx);
-    TT.ctx = NULL;
+    TT.ctx = 0;
   }
 #endif
 }
@@ -237,32 +232,12 @@ static char *wget_find_header(char *header, char *val)
   return result ? result + strlen(val) : 0;
 }
 
-static char *wget_redirect(char *header)
-{
-  char *redir = wget_find_header(header, "Location: ");
-
-  if (!redir) error_exit("could not parse redirect URL");
-
-  return xstrdup(redir);
-}
-
-static char *wget_filename(char *header, char *path)
-{
-  char *f = wget_find_header(header,
-    "Content-Disposition: attachment; filename=");
-
-  if (!f && strchr(path, '/')) f = getbasename(path);
-  if (!f || !*f ) f = "index.html";
-
-  return f;
-}
-
 void wget_main(void)
 {
   long status = 0;
   size_t len, c_len = 0;
   int fd;
-  char *body, *index, *host, *port, *path, *chunked;
+  char *body, *index, *host, *port, *path, *chunked, *ss;
   char agent[] = "toybox wget/" TOYBOX_VERSION;
 
   TT.url = xstrdup(toys.optargs[0]);
@@ -270,67 +245,56 @@ void wget_main(void)
   while (status != 200) {
     if (!TT.max_redirect--) error_exit("Too many redirects");
 
+    // Connect and write request
     wget_info(TT.url, &host, &port, &path);
-    if (!FLAG(p)) {
-      sprintf(toybuf, "GET /%s HTTP/1.1\r\nHost: %s\r\n"
-                      "User-Agent: %s\r\nConnection: close\r\n\r\n",
-                      path, host, agent);
-    } else {
-      sprintf(toybuf, "POST /%s HTTP/1.1\r\nHost: %s\r\n"
-                      "User-Agent: %s\r\nConnection: close\r\n"
-                      "Content-Length: %ld\r\n\r\n"
-                      "%s",
-                      path, host, agent, strlen(TT.postdata), TT.postdata);
-    }
-    if (FLAG(d)) printf("--- Request\n%s", toybuf);
-
+    if (TT.p) sprintf(toybuf, "Content-Length: %ld\r\n", strlen(TT.p));
+    ss = xmprintf("%s /%s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\n"
+                  "Connection: close\r\n%s\r\n%s", FLAG(p) ? "POST" : "GET",
+                  path, host, agent, FLAG(p) ? toybuf : "", FLAG(p)?TT.p:"");
+    if (FLAG(d)) printf("--- Request\n%s", ss);
     wget_connect(host, port);
-    wget_write(toybuf, strlen(toybuf));
-
-    // Greedily read the HTTP response until either complete or toybuf is full
-    index = toybuf;
-    while ((len = wget_read(index, sizeof(toybuf) - (index - toybuf))) > 0)
-      index += len;
-
-    //Process the response such that
-    //  Valid ranges  toybuf[0...index)      valid length is (index - toybuf)
-    //  Header ranges toybuf[0...body)       header length strlen(toybuf)
-    //  Remnant Body  toybuf[body...index)   valid remnant body length is len
-    //
-    // Per RFC7230 the header cannot contain a NUL octet so we NUL terminate at
-    // the footer of the header. This allows for normal string functions to be
-    // used when processing the header.
-    body = memmem(toybuf, index - toybuf, "\r\n\r\n", 4);
-    if (!body) error_exit("response header too large");
-    body[0] = '\0'; // NUL terminate the headers
-    body += 4; // Skip to the head of body
-    len = index - body; // Adjust len to be body length
+    wget_write(ss, strlen(ss));
+    free(ss);
+
+    // Read HTTP response until either complete or toybuf is full
+    for (index = toybuf;
+      (len = wget_read(index, sizeof(toybuf)-(index-toybuf)))>0; index += len);
+
+    // Split response into header and body, and null terminate header.
+    // (RFC7230 says header cannot contain NUL.)
+    if (!(body = memmem(toybuf, index-toybuf, "\r\n\r\n", 4)))
+      error_exit("response header too large");
+    *body = 0;
+    body += 4;
+    len = index-body;
     if (FLAG(d)) printf("--- Response\n%s\n\n", toybuf);
 
     status = strtol(strafter(toybuf, " "), NULL, 10);
     if ((status == 301) || (status == 302)) {
+      if (!(ss = wget_find_header(toybuf, "Location: ")))
+        error_exit("bad redirect");
       free(TT.url);
-      TT.url = wget_redirect(toybuf);
+      TT.url = xstrdup(ss);
       wget_close();
     } else if (status != 200) error_exit("response: %ld", status);
   }
 
   if (!FLAG(O)) {
-    TT.O = wget_filename(toybuf, path);
-    if (!access(TT.O, F_OK)) error_exit("%s already exists", TT.O);
+    ss = wget_find_header(toybuf, "Content-Disposition: attachment; filename=");
+    if (!ss && strchr(path, '/')) ss = getbasename(path);
+    if (!ss || !*ss ) ss = "index.html";
+    if (!access((TT.O = ss), F_OK)) error_exit("%s already exists", TT.O);
   }
+  // TODO: don't allow header/basename to write to stdout
   fd = !strcmp(TT.O, "-") ? 1 : xcreate(TT.O, (O_WRONLY|O_CREAT|O_TRUNC), 0644);
 
-  chunked = wget_find_header(toybuf, "transfer-encoding: chunked");
-
   // If chunked we offset the first buffer by 2 character, meaning it is
   // pointing at half of the header boundary, aka '\r\n'. This simplifies
   // parsing of the first c_len length by allowing the do while loop to fall
   // through on the first iteration and parse the first c_len size.
-  if (chunked) {
-    len = len + 2;
-    memmove(toybuf, body - 2, len);
-  } else memmove(toybuf, body, len);
+  chunked = wget_find_header(toybuf, "transfer-encoding: chunked");
+  if (chunked) memmove(toybuf, body-2, len += 2);
+  else memmove(toybuf, body, len);
 
   // len is the size remaining in toybuf
   // c_len is the size of the remaining bytes in the current chunk
author	Rob Landley <rob@landley.net>	2022-04-08 03:59:13 -0500
committer	Rob Landley <rob@landley.net>	2022-04-08 03:59:13 -0500
commit	418405e22175580f07cbad1298d3a7321cd8543c (patch)
tree	7a7aec54448a761a6e55cc662bfc733067aabb47
parent	965fabbe2c3d24972301426f9f63919ef87ffb12 (diff)
download	toybox-418405e22175580f07cbad1298d3a7321cd8543c.tar.gz