Merge remote-tracking branch \'toybox/master\' into HEAD

am: 19ef8b8a7d * commit '19ef8b8a7d596e032eb5b2449120d6d3d3031f0e': Implement "insmod -". Add test_COMMAND for each command, update "make help" to describe TEST_HOST and VERBOSE=fail, and fix a quote mismatch. Help text update from Isaac Dunham, tweaked slightly. Add xxd -p and -r. RLIMIT_RTTIME was added to the kernel in 2008, you can't expect uClibc to have noticed yet. The perl build's attempt to escape spaces and such in LD_LIBRARY_PATH is _SAD_. It uses a sed expression that assumes you can escape - to use it as a literal (you can't, it has to be first or last char of the range), and assumes you have to escape delimiters in sed [] context (you don't), and/or that non-printf escapes become the literal character (they don't, the backslash is preserved as a literal), meaning it winds up doing "s/[\-\]//" which is a length 1 range, which is officially undefined behavior according to posix, and regcomp errors out. Add support for getprop -Z Fix remounting /system on Android. Testfile with the 3 different types of "not utf8 output" escaped chars in it. Add dynamic/static checking to file (printing dynamic linker if found). Add the sed invocations to convert tabs/spaces and back, plus some tweaks. Cleanup pass on file.c, and add detection of different ELF architectures. Add "make list", rename make working->list_working and pending->list_pending, and filter out commands that aren't nofork but aren't installed either (toyflags 0, I.E. recognized aliases like "-sh" called from login). Implement file(1). Fix wc -c optimization. removed unread assignment in sed.c
author: Elliott Hughes <enh@google.com> 2016-02-20 05:43:54 +0000
committer: android-build-merger <android-build-merger@google.com> 2016-02-20 05:43:54 +0000
commit: b1a47859ffa0f1839a8190d8c879724f25d80109 (patch)
tree: fc21ef8bc00518064722b53ee3f3437d62a5aeca
parent: e201948901e6bec5a0657867ca519203e6e7e0a0 (diff)
parent: 19ef8b8a7d596e032eb5b2449120d6d3d3031f0e (diff)
download: toybox-b1a47859ffa0f1839a8190d8c879724f25d80109.tar.gz
16 files changed, 506 insertions, 74 deletions
diff --git a/Makefile b/Makefile
index 39d44fcf..677bbb06 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ toybox toybox_unstripped: toybox_stuff
 
 .PHONY: clean distclean baseline bloatcheck install install_flat \
 	uinstall uninstall_flat test tests help toybox_stuff change \
-	working pending
+	list list_working list_pending
 
 include kconfig/Makefile
 -include .singlemake
@@ -66,12 +66,14 @@ tests:
 help::
 	@echo  '  toybox          - Build toybox.'
 	@echo  '  COMMANDNAME     - Build individual toybox command as a standalone binary.'
-	@echo  '  working         - List working COMMANDNAMEs.'
-	@echo  '  pending         - List pending (unfinished) COMMANDNAMEs.'
+	@echo  '  list            - List COMMANDNAMEs (also list_working and list_pending).'
 	@echo  '  change          - Build each command standalone under change/.'
 	@echo  '  baseline        - Create toybox_old for use by bloatcheck.'
 	@echo  '  bloatcheck      - Report size differences between old and current versions'
-	@echo  '  test            - Run test suite against compiled commands.'
+	@echo  '  test_COMMAND    - Run tests for COMMAND (test_ps, test_cat, etc.)
+	@echo  '  test            - Run test suite against all compiled commands.'
+	@echo  '                    export TEST_HOST=1 to test host command, VERBOSE=1'
+	@echo  '                    to show diff, VERBOSE=fail to stop after first failure.'
 	@echo  '  clean           - Delete temporary files.'
 	@echo  "  distclean       - Delete everything that isn't shipped."
 	@echo  '  install_flat    - Install toybox into $$PREFIX directory.'
diff --git a/lib/lib.c b/lib/lib.c
index 681d4d23..43db2e37 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -475,12 +475,12 @@ char *readfileat(int dirfd, char *name, char *ibuf, off_t *plen)
     rbuf = buf+rlen;
     len -= rlen;
   }
-  *plen = len = rlen+(buf-ibuf);
+  *plen = len = rlen+(rbuf-buf);
   close(fd);
 
   if (rlen<0) {
     if (ibuf != buf) free(buf);
-    buf =  0;
+    buf = 0;
   } else buf[len] = 0;
 
   return buf;
@@ -508,8 +508,7 @@ int64_t peek_le(void *ptr, unsigned size)
   char *c = ptr;
   int i;
 
-  for (i=0; i<size; i++) ret |= ((int64_t)c[i])<<i;
-
+  for (i=0; i<size; i++) ret |= ((int64_t)c[i])<<(i*8);
   return ret;
 }
 
@@ -517,9 +516,9 @@ int64_t peek_be(void *ptr, unsigned size)
 {
   int64_t ret = 0;
   char *c = ptr;
+  int i;
 
-  while (size--) ret = (ret<<8)|c[size];
-
+  for (i=0; i<size; i++) ret = (ret<<8)|(c[i]&0xff);
   return ret;
 }
 
diff --git a/lib/portability.h b/lib/portability.h
index 53b08704..d0d0bd90 100644
--- a/lib/portability.h
+++ b/lib/portability.h
@@ -41,6 +41,10 @@
 #define AT_REMOVEDIR 0x200
 #endif
 
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+
 // We don't define GNU_dammit because we're not part of the gnu project, and
 // don't want to get any FSF on us. Unfortunately glibc (gnu libc)
 // won't give us Linux syscall wrappers without claiming to be part of the
diff --git a/scripts/genconfig.sh b/scripts/genconfig.sh
index e53d3607..5b0715f4 100755
--- a/scripts/genconfig.sh
+++ b/scripts/genconfig.sh
@@ -123,7 +123,7 @@ genconfig > generated/Config.in || rm generated/Config.in
 # Find names of commands that can be built standalone in these C files
 toys()
 {
-  grep 'TOY(.*)' "$@" | grep -v TOYFLAG_NOFORK | \
+  grep 'TOY(.*)' "$@" | grep -v TOYFLAG_NOFORK | grep -v "0))" | \
     sed -rn 's/([^:]*):.*(OLD|NEW)TOY\( *([a-zA-Z][^,]*) *,.*/\1:\3/p'
 }
 
@@ -135,13 +135,16 @@ do
   [ "$NAME" == help ] && continue
   [ "$NAME" == install ] && continue
   echo -e "$NAME: $FILE *.[ch] lib/*.[ch]\n\tscripts/single.sh $NAME\n"
+  echo -e "test_$NAME:\n\tscripts/test.sh $NAME\n"
   [ "${FILE/pending//}" != "$FILE" ] &&
     PENDING="$PENDING $NAME" ||
     WORKING="$WORKING $NAME"
 done > .singlemake &&
 echo -e "clean::\n\trm -f $WORKING $PENDING" >> .singlemake &&
-echo -e "working:\n\t@echo $(echo $WORKING | tr ' ' '\n' | sort | xargs)" \
+echo -e "list:\n\t@echo $(echo $WORKING $PENDING | tr ' ' '\n' | sort | xargs)"\
   >> .singlemake &&
-echo -e "pending:\n\t@echo $(echo $PENDING | tr ' ' '\n' | sort | xargs)" \
+echo -e "list_working:\n\t@echo $(echo $WORKING | tr ' ' '\n' | sort | xargs)" \
+  >> .singlemake &&
+echo -e "list_pending:\n\t@echo $(echo $PENDING | tr ' ' '\n' | sort | xargs)" \
   >> .singlemake
 )
diff --git a/tests/files/utf8/bad.txt b/tests/files/utf8/bad.txt
new file mode 100644
index 00000000..1a91e8ae
--- /dev/null
+++ b/tests/files/utf8/bad.txt
@@ -0,0 +1 @@
+�
diff --git a/tests/sed.test b/tests/sed.test
index eff2306a..c62f9c4d 100755
--- a/tests/sed.test
+++ b/tests/sed.test
@@ -131,10 +131,13 @@ hello'" "merp\nhello\n" "" "merp"
 
 testing "" "sed -e '/x/c\' -e 'y'" 'y\n' '' 'x\n'
 testing "" "sed -e 's/a[([]*b/X/'" 'X' '' 'a[(b'
+testing "" "sed 'y/a\\bc/de\f/'" "db\f" "" "abc"
+testing "sed [a-a] (for perl)" "sed '"'s/\([^a-zA-Z0-9.:_\-\/]\)/\\\1/g'"'" \
+  'he\ llo' "" "he llo"
 
 # You have to match the first line of a range in order to activate
 # the range, numeric and ascii work the same way
-testing "skip start of range" "sed -e n -e '1,2s/b/c/'" "a\nb\n" "" "a\nb\n"
+testing "sed skip start of range" "sed -e n -e '1,2s/b/c/'" "a\nb\n" "" "a\nb\n"
 
 #echo meep | sed/sed -e '1a\' -e 'huh'
 #echo blah | sed/sed -f <(echo -e "1a\\\\\nboom")
@@ -146,4 +149,5 @@ testing "sed bonus backslashes" \
   "hello\nl x\nab\nc\n" "" "hello\n"
 # -i with $ last line test
 
+
 exit $FAILCOUNT
diff --git a/tests/xxd.test b/tests/xxd.test
index e036865a..68f52be4 100644
--- a/tests/xxd.test
+++ b/tests/xxd.test
@@ -25,4 +25,23 @@ testing "xxd -c 8 -g 4 file1" "xxd -c 8 -g 4 file1" \
 testing "xxd -c 8 -g 3 file1" "xxd -c 8 -g 3 file1" \
     "00000000: 746869 732069 7320 this is \n00000008: 736f6d 652074 6578 some tex\n00000010: 740a               t.\n" "" ""
 
+testing "xxd -p" "xxd -p file1" "7468697320697320736f6d6520746578740a\n" "" ""
+
+testing "xxd -r" "xxd file1 | xxd -r" "this is some text\n" "" ""
+testing "xxd -r -p" "xxd -p file1 | xxd -r -p" "this is some text\n" "" ""
+
+testing "xxd -r garbage" "echo '0000: 68 65 6c6c 6fxxxx' | xxd -r -" "hello" "" ""
+
+# -r will only read -c bytes (default 16) before skipping to the next line,
+# ignoring the rest.
+testing "xxd -r long" \
+    "echo '0000: 40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -" \
+    "@@@@@@@@@@@@@@@@" "" ""
+
+# -r -p ignores the usual -p 30-byte/line limit (or any limit set by -c) and
+# will take as many bytes as you give it.
+testing "xxd -r -p long" \
+    "echo '40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -p -" \
+    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" "" ""
+
 rm file1 file2
diff --git a/toys/android/getprop.c b/toys/android/getprop.c
index 4afac1a5..efb1e447 100644
--- a/toys/android/getprop.c
+++ b/toys/android/getprop.c
@@ -2,7 +2,7 @@
  *
  * Copyright 2015 The Android Open Source Project
 
-USE_GETPROP(NEWTOY(getprop, ">2", TOYFLAG_USR|TOYFLAG_SBIN))
+USE_GETPROP(NEWTOY(getprop, ">2Z", TOYFLAG_USR|TOYFLAG_SBIN))
 
 config GETPROP
   bool "getprop"
@@ -21,32 +21,85 @@ config GETPROP
 
 #include <cutils/properties.h>
 
+#include <selinux/android.h>
+#include <selinux/label.h>
+#include <selinux/selinux.h>
+
 GLOBALS(
   size_t size;
   char **nv; // name/value pairs: even=name, odd=value
+  struct selabel_handle *handle;
 )
 
+static char *get_property_context(char *property)
+{
+  char *context = NULL;
+
+  if (selabel_lookup(TT.handle, &context, property, 1)) {
+    perror_exit("unable to lookup label for \"%s\"", property);
+  }
+  return context;
+}
+
 static void add_property(char *name, char *value, void *unused)
 {
   if (!(TT.size&31)) TT.nv = xrealloc(TT.nv, (TT.size+32)*2*sizeof(char *));
 
   TT.nv[2*TT.size] = xstrdup(name);
-  TT.nv[1+2*TT.size++] = xstrdup(value);
+  if (toys.optflags & FLAG_Z) {
+    TT.nv[1+2*TT.size++] = get_property_context(name);
+  } else {
+    TT.nv[1+2*TT.size++] = xstrdup(value);
+  }
+}
+
+// Needed to supress extraneous "Loaded property_contexts from" message
+int selinux_log_callback(int type, const char *fmt, ...) {
+  va_list ap;
+
+  if (type == SELINUX_INFO) return 0;
+  va_start(ap, fmt);
+  verror_msg(fmt, 0, ap);
+  va_end(ap);
+  return 0;
 }
 
 void getprop_main(void)
 {
+  if (toys.optflags & FLAG_Z) {
+    union selinux_callback cb;
+
+    cb.func_log = selinux_log_callback;
+    selinux_set_callback(SELINUX_CB_LOG, cb);
+    TT.handle = selinux_android_prop_context_handle();
+    if (!TT.handle) error_exit("unable to get selinux property context handle");
+  }
+
   if (*toys.optargs) {
-    property_get(*toys.optargs, toybuf, toys.optargs[1] ? toys.optargs[1] : "");
-    puts(toybuf);
+    if (toys.optflags & FLAG_Z) {
+      char *context = get_property_context(*toys.optargs);
+
+      puts(context);
+      if (CFG_TOYBOX_FREE) free(context);
+    } else {
+      property_get(*toys.optargs, toybuf, toys.optargs[1] ? toys.optargs[1] : "");
+      puts(toybuf);
+    }
   } else {
     size_t i;
 
     if (property_list((void *)add_property, 0)) error_exit("property_list");
     qsort(TT.nv, TT.size, 2*sizeof(char *), qstrcmp);
     for (i = 0; i<TT.size; i++) printf("[%s]: [%s]\n", TT.nv[i*2],TT.nv[1+i*2]);
-    if (CFG_TOYBOX_FREE) free(TT.nv);
+    if (CFG_TOYBOX_FREE) {
+      for (i = 0; i<TT.size; i++) {
+        free(TT.nv[i*2]);
+        free(TT.nv[1+i*2]);
+      }
+      free(TT.nv);
+    }
   }
+  if (CFG_TOYBOX_FREE && (toys.optflags & FLAG_Z)) selabel_close(TT.handle);
 }
 
 #else
diff --git a/toys/lsb/mount.c b/toys/lsb/mount.c
index 789d9a53..9510c5e9 100644
--- a/toys/lsb/mount.c
+++ b/toys/lsb/mount.c
@@ -91,6 +91,7 @@ static long flag_opts(char *new, long flags, char **more)
     {"noexec", MS_NOEXEC}, {"exec", ~MS_NOEXEC},
     {"sync", MS_SYNCHRONOUS}, {"async", ~MS_SYNCHRONOUS},
     {"noatime", MS_NOATIME}, {"atime", ~MS_NOATIME},
+    {"norelatime", ~MS_RELATIME}, {"relatime", MS_RELATIME},
     {"nodiratime", MS_NODIRATIME}, {"diratime", ~MS_NODIRATIME},
     {"loud", ~MS_SILENT},
     {"shared", MS_SHARED}, {"rshared", MS_SHARED|MS_REC},
@@ -195,16 +196,21 @@ static void mount_filesystem(char *dev, char *dir, char *type,
       printf("try '%s' type '%s' on '%s'\n", dev, type, dir);
     for (;;) {
       rc = mount(dev, dir, type, flags, opts);
-      if ((rc != EACCES && rc != EROFS) || (flags & MS_RDONLY)) break;
-      if (rc == EROFS && fd == -1) {
+      // Did we succeed, fail unrecoverably, or already try read-only?
+      if (rc == 0 || (errno != EACCES && errno != EROFS) || (flags&MS_RDONLY))
+        break;
+      // If we haven't already tried it, use the BLKROSET ioctl to ensure
+      // that the underlying device isn't read-only.
+      if (fd == -1) {
+        if (toys.optflags & FLAG_v)
+          printf("trying BLKROSET ioctl on '%s'\n", dev);
         if (-1 != (fd = open(dev, O_RDONLY))) {
-          ioctl(fd, BLKROSET, &ro);
+          rc = ioctl(fd, BLKROSET, &ro);
           close(fd);
-
-          continue;
+          if (rc == 0) continue;
         }
       }
-      fprintf(stderr, "'%s' is read-only", dev);
+      fprintf(stderr, "'%s' is read-only\n", dev);
       flags |= MS_RDONLY;
     }
 
diff --git a/toys/other/hexedit.c b/toys/other/hexedit.c
index ff13b513..a9d97aa2 100644
--- a/toys/other/hexedit.c
+++ b/toys/other/hexedit.c
@@ -12,9 +12,16 @@ config HEXEDIT
   help
     usage: hexedit FILENAME
 
-    Hexadecimal file editor.
+    Hexadecimal file editor. All changes are written to disk immediately.
 
     -r	Read only (display but don't edit)
+
+    Keys:
+    Arrows        Move left/right/up/down by one line/column
+    Pg Up/Pg Dn   Move up/down by one page
+    0-9, a-f      Change current half-byte to hexadecimal value
+    u             Undo
+    q/^c/^d/<esc> Quit
 */
 
 #define FOR_hexedit
diff --git a/toys/other/insmod.c b/toys/other/insmod.c
index cb222a54..098d2cfa 100644
--- a/toys/other/insmod.c
+++ b/toys/other/insmod.c
@@ -16,31 +16,35 @@ config INSMOD
 #include "toys.h"
 
 #include <sys/syscall.h>
-#define init_module(mod, len, opts) syscall(__NR_init_module, mod, len, opts)
+#define finit_module(fd, opts, flags) syscall(SYS_finit_module, fd, opts, flags)
+#define init_module(mod, len, opts) syscall(SYS_init_module, mod, len, opts)
 
 void insmod_main(void)
 {
-  char * buf = NULL;
-  int len, res, i;
-  int fd = xopen(*toys.optargs, O_RDONLY);
-
-  len = fdlength(fd);
-  buf = xmalloc(len);
-  xreadall(fd, buf, len);
+  int fd = !strcmp(*toys.optargs, "-") ? 0 : xopen(*toys.optargs, O_RDONLY);
+  int i, rc;
 
   i = 1;
-  while(toys.optargs[i] &&
+  while (toys.optargs[i] &&
     strlen(toybuf) + strlen(toys.optargs[i]) + 2 < sizeof(toybuf))
   {
     strcat(toybuf, toys.optargs[i++]);
     strcat(toybuf, " ");
   }
 
-  res = init_module(buf, len, toybuf);
-  if (CFG_TOYBOX_FREE) {
-    if (buf != toybuf) free(buf);
-    close(fd);
+  // finit_module was new in Linux 3.8, and doesn't work on stdin,
+  // so we fall back to init_module if necessary.
+  rc = finit_module(fd, toybuf, 0);
+  if (rc && (fd == 0 || errno == ENOSYS)) {
+    off_t len = 0;
+    char *path = !strcmp(*toys.optargs, "-") ? "/dev/stdin" : *toys.optargs;
+    char *buf = readfileat(AT_FDCWD, path, NULL, &len);
+
+    rc = init_module(buf, len, toybuf);
+    if (CFG_TOYBOX_FREE) free(buf);
   }
 
-  if (res) perror_exit("failed to load %s", toys.optargs[0]);
+  if (rc) perror_exit("failed to load %s", toys.optargs[0]);
+
+  if (CFG_TOYBOX_FREE) close(fd);
 }
diff --git a/toys/other/xxd.c b/toys/other/xxd.c
index e9ad8393..0250f8f8 100644
--- a/toys/other/xxd.c
+++ b/toys/other/xxd.c
@@ -8,13 +8,13 @@
  * TODO: support for reversing a hexdump back into the original data.
  * TODO: -s seek
 
-USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2", TOYFLAG_USR|TOYFLAG_BIN))
+USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2pr", TOYFLAG_USR|TOYFLAG_BIN))
 
 config XXD
   bool "xxd"
   default y
   help
-    usage: xxd [-c n] [-g n] [-l n] [file]
+    usage: xxd [-c n] [-g n] [-l n] [-p] [-r] [file]
 
     Hexdump a file to stdout.  If no file is listed, copy from stdin.
     Filename "-" is a synonym for stdin.
@@ -22,6 +22,8 @@ config XXD
     -c n	Show n bytes per line (default 16).
     -g n	Group bytes by adding a ' ' every n bytes (default 2).
     -l n	Limit of n bytes before stopping (default is no limit).
+    -p	Plain hexdump (30 bytes/line, no grouping).
+    -r	Reverse operation: turn a hexdump into a binary file.
 */
 
 #define FOR_xxd
@@ -39,7 +41,7 @@ static void do_xxd(int fd, char *name)
   int i, len, space;
 
   while (0<(len = readall(fd, toybuf, (TT.l && TT.l-pos<TT.c)?TT.l-pos:TT.c))) {
-    printf("%08llx: ", pos);
+    if (!(toys.optflags&FLAG_p)) printf("%08llx: ", pos);
     pos += len;
     space = 2*TT.c+TT.c/TT.g+1;
 
@@ -51,15 +53,79 @@ static void do_xxd(int fd, char *name)
       }
     }
 
-    printf("%*s", space, "");
-    for (i=0; i<len; i++)
-      putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+    if (!(toys.optflags&FLAG_p)) {
+      printf("%*s", space, "");
+      for (i=0; i<len; i++)
+        putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+    }
     putchar('\n');
   }
   if (len<0) perror_exit("read");
 }
 
+static int dehex(char ch)
+{
+  if (ch >= '0' && ch <= '9') return ch - '0';
+  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
+  if (ch >= 'A' && ch <= 'F') return ch - 'a' + 10;
+  return (ch == '\n') ? -2 : -1;
+}
+
+static void do_xxd_reverse(int fd, char *name)
+{
+  FILE *fp = xfdopen(fd, "r");
+
+  while (!feof(fp)) {
+    int col = 0;
+    int tmp;
+
+    // Each line of a non-plain hexdump starts with an offset/address.
+    if (!(toys.optflags&FLAG_p)) {
+      long long pos;
+
+      if (fscanf(fp, "%llx: ", &pos) == 1) {
+        if (fseek(stdout, pos, SEEK_SET) != 0) {
+          // TODO: just write out zeros if non-seekable?
+          perror_exit("%s: seek failed", name);
+        }
+      }
+    }
+
+    // A plain hexdump can have as many bytes per line as you like,
+    // but a non-plain hexdump assumes garbage after it's seen the
+    // specified number of bytes.
+    while (toys.optflags&FLAG_p || col < TT.c) {
+      int n1, n2;
+
+      // If we're at EOF or EOL or we read some non-hex...
+      if ((n1 = n2 = dehex(fgetc(fp))) < 0 || (n2 = dehex(fgetc(fp))) < 0) {
+        // If we're at EOL, start on that line.
+        if (n1 == -2 || n2 == -2) continue;
+        // Otherwise, skip to the next line.
+        break;
+      }
+
+      fputc((n1 << 4) | (n2 & 0xf), stdout);
+      col++;
+
+      // Is there any grouping going on? Ignore a single space.
+      tmp = fgetc(fp);
+      if (tmp != ' ') ungetc(tmp, fp);
+    }
+
+    // Skip anything else on this line (such as the ASCII dump).
+    while ((tmp = fgetc(fp)) != EOF && tmp != '\n')
+      ;
+  }
+  if (ferror(fp)) perror_msg_raw(name);
+
+  fclose(fp);
+}
+
 void xxd_main(void)
 {
-  loopfiles(toys.optargs, do_xxd);
+  // Plain style is 30 bytes/line, no grouping.
+  if (toys.optflags&FLAG_p) TT.c = TT.g = 30;
+
+  loopfiles(toys.optargs, toys.optflags&FLAG_r ? do_xxd_reverse : do_xxd);
 }
diff --git a/toys/pending/file.c b/toys/pending/file.c
new file mode 100644
index 00000000..1d09471d
--- /dev/null
+++ b/toys/pending/file.c
@@ -0,0 +1,232 @@
+/* file.c - describe file type
+ *
+ * Copyright 2016 The Android Open Source Project
+ *
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/file.html
+ *
+ * TODO: ar
+
+USE_FILE(NEWTOY(file, "<1", TOYFLAG_USR|TOYFLAG_BIN))
+
+config FILE
+  bool "file"
+  default n
+  help
+    usage: file [file...]
+
+    Examine the given files and describe their content types.
+*/
+
+#define FOR_file
+#include "toys.h"
+
+GLOBALS(
+  int max_name_len;
+)
+
+// We don't trust elf.h to be there, and two codepaths for 32/64 is awkward
+// anyway, so calculate struct offsets manually. (It's a fixed ABI.)
+static void do_elf_file(int fd)
+{
+  int endian = toybuf[5], bits = toybuf[4], i, j;
+  int64_t (*elf_int)(void *ptr, unsigned size) = peek_le;
+  // Values from include/linux/elf-em.h (plus arch/*/include/asm/elf.h)
+  // Names are linux/arch/ directory name
+  struct {int val; char *name;} type[] = {{0x9026, "alpha"},
+    {40, "arm"}, {183, "arm"}, {0x18ad, "avr32"}, {106, "blackfin"},
+    {76, "cris"}, {0x5441, "frv"}, {46, "h8300"}, {50, "ia64"},//ia intel ftaghn
+    {88, "m32r"}, {4, "m68k"}, {0xbaab, "microblaze"}, {8, "mips"},
+    {10, "mips"}, {89, "mn10300"}, {15, "parisc"}, {22, "s390"},
+    {135, "score"}, {42, "sh"}, {2, "sparc"}, {18, "sparc"}, {43, "sparc"},
+    {187, "tile"}, {188, "tile"}, {191, "tile"}, {3, "x86"}, {6, "x86"},
+    {62, "x86"}, {94, "xtensa"}, {0xabc7, "xtensa"}};
+
+  xprintf("ELF ");
+
+  // "64-bit"
+  if (bits == 1) xprintf("32-bit ");
+  else if (bits == 2) xprintf("64-bit ");
+  else {
+    xprintf("(bad class %d) ", bits);
+    bits = 0;
+  }
+
+  // e_machine, ala "x86", from big table above
+  j = elf_int(toybuf+18, 2);
+  for (i = 0; i<ARRAY_LEN(type); i++) if (j==type[i].val) break;
+  if (i<ARRAY_LEN(type)) xprintf("%s ", type[i].name);
+  else xprintf("(unknown arch %d) ", j);
+
+  // "LSB"
+  if (endian == 1) xprintf("LSB ");
+  else if (endian == 2) {
+    xprintf("MSB ");
+    elf_int = peek_be;
+  } else {
+    xprintf("(bad endian %d)\n", endian);
+    endian = 0;
+  }
+
+  // ", executable"
+  i = elf_int(toybuf+16, 2);
+  if (i == 1) xprintf("relocatable");
+  else if (i == 2) xprintf("executable");
+  else if (i == 3) xprintf("shared object");
+  else if (i == 4) xprintf("core dump");
+  else xprintf("(bad type %d)", i);
+
+  bits--;
+  // If we know our bits and endianness and phentsize agrees show dynamic linker
+  if ((bits&1)==bits && endian &&
+      (i = elf_int(toybuf+42+12*bits, 2)) == 32+24*bits)
+  {
+    char *map, *phdr;
+    int phsize = i, phnum = elf_int(toybuf+44+12*bits, 2),
+        psz = sysconf(_SC_PAGE_SIZE), lib = 0;
+    off_t phoff = elf_int(toybuf+28+4*bits, 4+4*bits),
+          mapoff = phoff^(phoff&(psz-1));
+
+    // map e_phentsize*e_phnum bytes at e_phoff
+    map = mmap(0, phsize*phnum, PROT_READ, MAP_SHARED, fd, mapoff);
+    if (map) {
+      // Find PT_INTERP entry. (Not: fields got reordered for 64 bit)
+      for (i = 0; i<phnum; i++) {
+        long long dlpos, dllen;
+
+        // skip non-PT_INTERP entries
+        j = elf_int(phdr = map+(phoff-mapoff)+i*phsize, 4);
+        if (j==2) lib++;
+        if (j!=3) continue;
+
+        // Read p_offset and p_filesz
+        j = bits+1;
+        dlpos = elf_int(phdr+4*j, 4*j);
+        dllen = elf_int(phdr+16*j, 4*j);
+        if (dllen<0 || dllen>sizeof(toybuf)-128
+            || dlpos!=lseek(fd, dlpos, SEEK_SET)
+            || dllen!=readall(fd, toybuf+128, dllen)) break;
+        printf(", dynamic (%.*s)", (int)dllen, toybuf+128);
+      }
+      if (!lib) printf(", static");
+      else printf(", needs %d lib%s", lib, lib>1 ? "s" : "");
+      munmap(map, phsize*phnum);
+    }
+  }
+
+  // TODO: we'd need to actually parse the ELF file to report the rest...
+  // ", dynamically linked"
+  // " (uses shared libs)"
+  // ", for Linux 2.6.24"
+  // ", BuildID[sha1]=SHA"
+  // ", stripped"
+  xputc('\n');
+}
+
+static void do_regular_file(int fd, char *name)
+{
+  char *s;
+  int len = read(fd, s = toybuf, sizeof(toybuf)-256);
+
+  if (len<0) perror_msg("%s", name);
+
+  if (len>40 && strstart(&s, "\177ELF")) do_elf_file(fd);
+  else if (len>28 && strstart(&s, "\x89PNG\x0d\x0a\x1a\x0a")) {
+    // PNG is big-endian: https://www.w3.org/TR/PNG/#7Integers-and-byte-order
+    int chunk_length = peek_be(s, 4);
+
+    xprintf("PNG image data");
+
+    // The IHDR chunk comes first: https://www.w3.org/TR/PNG/#11IHDR
+    s += 4;
+    if (chunk_length == 13 && strstart(&s, "IHDR")) {
+      // https://www.w3.org/TR/PNG/#6Colour-values
+      char *c = 0, *colors[] = {"grayscale", 0, "color RGB", "indexed color",
+                                "grayscale with alpha", 0, "color RGBA"};
+
+      if (s[9]<ARRAY_LEN(colors)) c = colors[s[9]];
+      if (!c) c = "unknown";
+
+      xprintf(", %d x %d, %d-bit/%s, %sinterlaced", (int)peek_be(s, 4),
+        (int)peek_be(s+4, 4), s[8], c, s[12] ? "" : "non-");
+    }
+
+    xputc('\n');
+
+  // https://www.w3.org/Graphics/GIF/spec-gif89a.txt
+  } else if (len>16 && (strstart(&s, "GIF87a") || strstart(&s, "GIF89a")))
+    xprintf("GIF image data, %d x %d\n",
+      (int)peek_le(s, 2), (int)peek_le(s+8, 2));
+
+  // TODO: parsing JPEG for width/height is harder than GIF or PNG.
+  else if (len>32 && memcmp(toybuf, "\xff\xd8", 2) == 0)
+    xprintf("JPEG image data\n");
+
+  // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html
+  else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe"))
+    xprintf("Java class file, version %d.%d\n",
+      (int)peek_be(s+6, 2), (int)peek_be(s, 2));
+
+    // TODO: cpio archive.
+    // TODO: tar archive.
+    // TODO: zip/jar/apk archive.
+  else {
+    char *what = 0;
+    int i, bytes;
+
+    // If shell script, report which interpreter
+    if (len>3 && strstart(&s, "#!")) {
+      for (what = s; (s-toybuf)<len && !isspace(*s); s++);
+      strcpy(s, " script");
+
+    // Distinguish ASCII text, UTF-8 text, or data
+    } else for (i = 0; i<len; ++i) {
+      if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) {
+        wchar_t wc;
+        if ((bytes = mbrtowc(&wc, s+i, len-i, 0))>0 && wcwidth(wc)>=0) {
+          i += bytes-1;
+          if (!what) what = "UTF-8 text";
+        } else {
+          what = "data";
+          break;
+        }
+      }
+    }
+    xputs(what ? what : "ASCII text");
+  }
+}
+
+static void do_file(int fd, char *name)
+{
+  struct stat sb;
+  char *what = "unknown";
+
+  xprintf("%s: %*s", name, (int)(TT.max_name_len - strlen(name)), "");
+
+  if (!fstat(fd, &sb)) what = "cannot open";
+  if (S_ISREG(sb.st_mode)) {
+    if (sb.st_size == 0) what = "empty";
+    else {
+      do_regular_file(fd, name);
+      return;
+    }
+  } else if (S_ISBLK(sb.st_mode)) what = "block special";
+  else if (S_ISCHR(sb.st_mode)) what = "character special";
+  else if (S_ISDIR(sb.st_mode)) what = "directory";
+  else if (S_ISFIFO(sb.st_mode)) what = "fifo";
+  else if (S_ISSOCK(sb.st_mode)) what = "socket";
+  else if (S_ISLNK(sb.st_mode)) what = "symbolic link";
+  xputs(what);
+}
+
+void file_main(void)
+{
+  char **name;
+
+  for (name = toys.optargs; *name; ++name) {
+    int name_len = strlen(*name);
+
+    if (name_len > TT.max_name_len) TT.max_name_len = name_len;
+  }
+
+  loopfiles(toys.optargs, do_file);
+}
diff --git a/toys/posix/sed.c b/toys/posix/sed.c
index 30d8a154..9b5e6664 100644
--- a/toys/posix/sed.c
+++ b/toys/posix/sed.c
@@ -701,11 +701,11 @@ static void do_sed(int fd, char *name)
 // returns processed copy of string (0 if error), *pstr advances to next
 // unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
 // if regxex, ignore delimiter in [ranges]
-static char *unescape_delimited_string(char **pstr, char *delim, int regex)
+static char *unescape_delimited_string(char **pstr, char *delim)
 {
   char *to, *from, mode = 0, d;
 
-  to = from = *pstr;
+  from = *pstr;
   if (!delim || !*delim) {
     if (!(d = *(from++))) return 0;
     if (d == '\\') d = *(from++);
@@ -720,9 +720,14 @@ static char *unescape_delimited_string(char **pstr, char *delim, int regex)
     // delimiter in regex character range doesn't count
     if (!mode && *from == '[') {
       mode = '[';
-      if (from[1] == ']') *(to++) = *(from++);
+      if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
     } else if (mode && *from == ']') mode = 0;
-    else if (*from == '\\') {
+    // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
+    // but the perl build does it, so we need to filter it out.
+    else if (mode && *from == '-' && from[-1] == from[1]) {
+      from+=2;
+      continue;
+    } else if (*from == '\\') {
       if (!from[1]) return 0;
 
       // Check escaped end delimiter before printf style escapes.
@@ -735,7 +740,7 @@ static char *unescape_delimited_string(char **pstr, char *delim, int regex)
           *(to++) = c;
           from+=2;
           continue;
-        } else *(to++) = *(from++);
+        } else if (!mode) *(to++) = *(from++);
       }
     }
     *(to++) = *(from++);
@@ -802,7 +807,7 @@ static void jewel_of_judgement(char **pline, long len)
       } else if (*line == '/' || *line == '\\') {
         char *s = line;
 
-        if (!(s = unescape_delimited_string(&line, 0, 1))) goto brand;
+        if (!(s = unescape_delimited_string(&line, 0))) goto brand;
         if (!*s) corwin->rmatch[i] = 0;
         else {
           xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
@@ -844,7 +849,7 @@ static void jewel_of_judgement(char **pline, long len)
 
       // get pattern (just record, we parse it later)
       corwin->arg2 = reg - (char *)corwin;
-      if (!(TT.remember = unescape_delimited_string(&line, &delim, 1)))
+      if (!(TT.remember = unescape_delimited_string(&line, &delim)))
         goto brand;
 
       reg += sizeof(regex_t);
@@ -940,13 +945,13 @@ writenow:
       char *s, delim = 0;
       int len;
 
-      if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
+      if (!(s = unescape_delimited_string(&line, &delim))) goto brand;
       corwin->arg1 = reg-(char *)corwin;
       len = strlen(s);
       reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len);
       free(s);
       corwin->arg2 = reg-(char *)corwin;
-      if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
+      if (!(s = unescape_delimited_string(&line, &delim))) goto brand;
       if (len != strlen(s)) goto brand;
       reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len);
       free(s);
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 62d1f7a0..e7afc813 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -53,8 +53,8 @@ static void do_wc(int fd, char *name)
   if (toys.optflags == FLAG_c) {
     struct stat st;
 
-    fstat(fd, &st);
-    if (S_ISREG(st.st_mode)) {
+    // On Linux, files in /proc often report their size as 0.
+    if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size > 0) {
       lengths[2] = st.st_size;
       goto show;
     }
diff --git a/www/design.html b/www/design.html
index 4a42753c..050c953f 100755
--- a/www/design.html
+++ b/www/design.html
@@ -294,24 +294,40 @@ feeding the compiler -funsigned-char.</p>
 <p>The reason to pick "unsigned" is that way we're 8-bit clean by default.</p>
 
 <p><h3>Error messages and internationalization:</h3></p>
+
 <p>Error messages are extremely terse not just to save bytes, but because we
-don't use any sort of _("string") translation infrastructure.</p>
+don't use any sort of _("string") translation infrastructure. (We're not
+translating the command names themselves, so we must expect a minimum amount of
+english knowledge from our users, but let's keep it to a minimum.)</p>
 
 <p>Thus "bad -A '%c'" is
 preferable to "Unrecognized address base '%c'", because a non-english speaker
-can see that -A was the problem, and with a ~20 word english vocabulary is
-more likely to know (or guess) "bad" than the longer message.</p>
-
-<p>The help text might someday have translated versions, and strerror()
-messages produced by perror_exit() and friends can be expected to be
-localized by libc. Our error functions also prepend the command name,
-which non-english speakers can presumably recognize already.</p>
-
-<p>An enventual goal is <a href=http://yarchive.net/comp/linux/utf8.html>UTF-8</a> support, although it isn't a priority for the
-first pass of each command. (All commands should at least be 8-bit clean.)</p>
-
-<p>Locale support isn't currently a goal; that's a presentation layer issue,
-X11 or Dalvik's problem.</p>
+can see that -A was the problem (giving back the command line argument they
+supplied). A user with a ~20 word english vocabulary is
+more likely to know (or guess) "bad" than the longer message, and you can
+use "bad" in place of "invalid", "inappropriate", "unrecognized"...
+Similarly when atolx_range() complains about range constraints with
+"4 < 17" or "12 > 5", it's intentional: those don't need to be translated.</p>
+
+<p>The strerror() messages produced by perror_exit() and friends should be
+localized by libc, and our error functions also prepend the command name
+(which non-english speakers can presumably recognize already). Keep the
+explanation in between to a minimum, and where possible feed back the values
+they passed in to identify _what_ we couldn't process.
+If you say perror_exit("setsockopt"), you've identified the action you
+were trying to take, and the perror gives a translated error message (from libc)
+explaining _why_ it couldn't do it, so you probably don't need to add english
+words like "failed" or "couldn't assign".</p>
+
+<p>All commands should be 8-bit clean, with explicit
+<a href=http://yarchive.net/comp/linux/utf8.html>UTF-8</a> support where
+necessary. Assume all input data might be utf8, and at least preserve
+it and pass it through. (For this reason, our build is -funsigned-char on
+all architectures; "char" is unsigned unless you stick "signed" in front
+of it.)</p>
+
+<p>Locale support isn't currently a goal; that's a presentation layer issue
+(I.E. a GUI problem).</p>
 
 <a name="codestyle" />
 <h2>Coding style</h2>
@@ -327,6 +343,17 @@ columns. (Indentation of continuation lines is awkward no matter what
 you do, sometimes two spaces looks better, sometimes indenting to the
 contents of a parentheses looks better.)</p>
 
+<p>I'm aware this indentation style creeps some people out, so here's
+the sed invocation to convert groups of two leading spaces to tabs:</p>
+<blockquote><pre>
+sed -i ':loop;s/^\( *\)  /\1\t/;t loop' filename
+</pre></blockquote>
+
+<p>And here's the sed invocation to convert leading tabs to two spaces each:</p>
+<blockquote><pre>
+sed -i ':loop;s/^\( *\)\t/\1  /;t loop' filename
+</pre></blockquote>
+
 <p>There's a space after C flow control statements that look like functions, so
 "if (blah)" instead of "if(blah)". (Note that sizeof is actually an
 operator, so we don't give it a space for the same reason ++ doesn't get
@@ -336,8 +363,8 @@ to read.) We also put a space around assignment operators (on both sides),
 so "int x = 0;".</p>
 
 <p>Blank lines (vertical whitespace) go between thoughts. "We were doing that,
-now we're doing this. (Not a hard and fast rule about _where_ it goes,
-but there should be some.)"</p>
+now we're doing this." (Not a hard and fast rule about _where_ it goes,
+but there should be some for the same reason writing has paragraph breaks.)</p>
 
 <p>Variable declarations go at the start of blocks, with a blank line between
 them and other code. Yes, c99 allows you to put them anywhere, but they're
author	Elliott Hughes <enh@google.com>	2016-02-20 05:43:54 +0000
committer	android-build-merger <android-build-merger@google.com>	2016-02-20 05:43:54 +0000
commit	b1a47859ffa0f1839a8190d8c879724f25d80109 (patch)
tree	fc21ef8bc00518064722b53ee3f3437d62a5aeca
parent	e201948901e6bec5a0657867ca519203e6e7e0a0 (diff)
parent	19ef8b8a7d596e032eb5b2449120d6d3d3031f0e (diff)
download	toybox-b1a47859ffa0f1839a8190d8c879724f25d80109.tar.gz