aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2016-02-20 05:43:54 +0000
committerandroid-build-merger <android-build-merger@google.com>2016-02-20 05:43:54 +0000
commitb1a47859ffa0f1839a8190d8c879724f25d80109 (patch)
treefc21ef8bc00518064722b53ee3f3437d62a5aeca
parente201948901e6bec5a0657867ca519203e6e7e0a0 (diff)
parent19ef8b8a7d596e032eb5b2449120d6d3d3031f0e (diff)
downloadtoybox-b1a47859ffa0f1839a8190d8c879724f25d80109.tar.gz
Merge remote-tracking branch \'toybox/master\' into HEAD
am: 19ef8b8a7d * commit '19ef8b8a7d596e032eb5b2449120d6d3d3031f0e': Implement "insmod -". Add test_COMMAND for each command, update "make help" to describe TEST_HOST and VERBOSE=fail, and fix a quote mismatch. Help text update from Isaac Dunham, tweaked slightly. Add xxd -p and -r. RLIMIT_RTTIME was added to the kernel in 2008, you can't expect uClibc to have noticed yet. The perl build's attempt to escape spaces and such in LD_LIBRARY_PATH is _SAD_. It uses a sed expression that assumes you can escape - to use it as a literal (you can't, it has to be first or last char of the range), and assumes you have to escape delimiters in sed [] context (you don't), and/or that non-printf escapes become the literal character (they don't, the backslash is preserved as a literal), meaning it winds up doing "s/[\-\]//" which is a length 1 range, which is officially undefined behavior according to posix, and regcomp errors out. Add support for getprop -Z Fix remounting /system on Android. Testfile with the 3 different types of "not utf8 output" escaped chars in it. Add dynamic/static checking to file (printing dynamic linker if found). Add the sed invocations to convert tabs/spaces and back, plus some tweaks. Cleanup pass on file.c, and add detection of different ELF architectures. Add "make list", rename make working->list_working and pending->list_pending, and filter out commands that aren't nofork but aren't installed either (toyflags 0, I.E. recognized aliases like "-sh" called from login). Implement file(1). Fix wc -c optimization. removed unread assignment in sed.c
-rw-r--r--Makefile10
-rw-r--r--lib/lib.c11
-rw-r--r--lib/portability.h4
-rwxr-xr-xscripts/genconfig.sh9
-rw-r--r--tests/files/utf8/bad.txt1
-rwxr-xr-xtests/sed.test6
-rw-r--r--tests/xxd.test19
-rw-r--r--toys/android/getprop.c63
-rw-r--r--toys/lsb/mount.c18
-rw-r--r--toys/other/hexedit.c9
-rw-r--r--toys/other/insmod.c32
-rw-r--r--toys/other/xxd.c80
-rw-r--r--toys/pending/file.c232
-rw-r--r--toys/posix/sed.c23
-rw-r--r--toys/posix/wc.c4
-rwxr-xr-xwww/design.html59
16 files changed, 506 insertions, 74 deletions
diff --git a/Makefile b/Makefile
index 39d44fcf..677bbb06 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ toybox toybox_unstripped: toybox_stuff
.PHONY: clean distclean baseline bloatcheck install install_flat \
uinstall uninstall_flat test tests help toybox_stuff change \
- working pending
+ list list_working list_pending
include kconfig/Makefile
-include .singlemake
@@ -66,12 +66,14 @@ tests:
help::
@echo ' toybox - Build toybox.'
@echo ' COMMANDNAME - Build individual toybox command as a standalone binary.'
- @echo ' working - List working COMMANDNAMEs.'
- @echo ' pending - List pending (unfinished) COMMANDNAMEs.'
+ @echo ' list - List COMMANDNAMEs (also list_working and list_pending).'
@echo ' change - Build each command standalone under change/.'
@echo ' baseline - Create toybox_old for use by bloatcheck.'
@echo ' bloatcheck - Report size differences between old and current versions'
- @echo ' test - Run test suite against compiled commands.'
+ @echo ' test_COMMAND - Run tests for COMMAND (test_ps, test_cat, etc.)
+ @echo ' test - Run test suite against all compiled commands.'
+ @echo ' export TEST_HOST=1 to test host command, VERBOSE=1'
+ @echo ' to show diff, VERBOSE=fail to stop after first failure.'
@echo ' clean - Delete temporary files.'
@echo " distclean - Delete everything that isn't shipped."
@echo ' install_flat - Install toybox into $$PREFIX directory.'
diff --git a/lib/lib.c b/lib/lib.c
index 681d4d23..43db2e37 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -475,12 +475,12 @@ char *readfileat(int dirfd, char *name, char *ibuf, off_t *plen)
rbuf = buf+rlen;
len -= rlen;
}
- *plen = len = rlen+(buf-ibuf);
+ *plen = len = rlen+(rbuf-buf);
close(fd);
if (rlen<0) {
if (ibuf != buf) free(buf);
- buf = 0;
+ buf = 0;
} else buf[len] = 0;
return buf;
@@ -508,8 +508,7 @@ int64_t peek_le(void *ptr, unsigned size)
char *c = ptr;
int i;
- for (i=0; i<size; i++) ret |= ((int64_t)c[i])<<i;
-
+ for (i=0; i<size; i++) ret |= ((int64_t)c[i])<<(i*8);
return ret;
}
@@ -517,9 +516,9 @@ int64_t peek_be(void *ptr, unsigned size)
{
int64_t ret = 0;
char *c = ptr;
+ int i;
- while (size--) ret = (ret<<8)|c[size];
-
+ for (i=0; i<size; i++) ret = (ret<<8)|(c[i]&0xff);
return ret;
}
diff --git a/lib/portability.h b/lib/portability.h
index 53b08704..d0d0bd90 100644
--- a/lib/portability.h
+++ b/lib/portability.h
@@ -41,6 +41,10 @@
#define AT_REMOVEDIR 0x200
#endif
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#endif
+
// We don't define GNU_dammit because we're not part of the gnu project, and
// don't want to get any FSF on us. Unfortunately glibc (gnu libc)
// won't give us Linux syscall wrappers without claiming to be part of the
diff --git a/scripts/genconfig.sh b/scripts/genconfig.sh
index e53d3607..5b0715f4 100755
--- a/scripts/genconfig.sh
+++ b/scripts/genconfig.sh
@@ -123,7 +123,7 @@ genconfig > generated/Config.in || rm generated/Config.in
# Find names of commands that can be built standalone in these C files
toys()
{
- grep 'TOY(.*)' "$@" | grep -v TOYFLAG_NOFORK | \
+ grep 'TOY(.*)' "$@" | grep -v TOYFLAG_NOFORK | grep -v "0))" | \
sed -rn 's/([^:]*):.*(OLD|NEW)TOY\( *([a-zA-Z][^,]*) *,.*/\1:\3/p'
}
@@ -135,13 +135,16 @@ do
[ "$NAME" == help ] && continue
[ "$NAME" == install ] && continue
echo -e "$NAME: $FILE *.[ch] lib/*.[ch]\n\tscripts/single.sh $NAME\n"
+ echo -e "test_$NAME:\n\tscripts/test.sh $NAME\n"
[ "${FILE/pending//}" != "$FILE" ] &&
PENDING="$PENDING $NAME" ||
WORKING="$WORKING $NAME"
done > .singlemake &&
echo -e "clean::\n\trm -f $WORKING $PENDING" >> .singlemake &&
-echo -e "working:\n\t@echo $(echo $WORKING | tr ' ' '\n' | sort | xargs)" \
+echo -e "list:\n\t@echo $(echo $WORKING $PENDING | tr ' ' '\n' | sort | xargs)"\
>> .singlemake &&
-echo -e "pending:\n\t@echo $(echo $PENDING | tr ' ' '\n' | sort | xargs)" \
+echo -e "list_working:\n\t@echo $(echo $WORKING | tr ' ' '\n' | sort | xargs)" \
+ >> .singlemake &&
+echo -e "list_pending:\n\t@echo $(echo $PENDING | tr ' ' '\n' | sort | xargs)" \
>> .singlemake
)
diff --git a/tests/files/utf8/bad.txt b/tests/files/utf8/bad.txt
new file mode 100644
index 00000000..1a91e8ae
--- /dev/null
+++ b/tests/files/utf8/bad.txt
@@ -0,0 +1 @@
+Áï¿¿
diff --git a/tests/sed.test b/tests/sed.test
index eff2306a..c62f9c4d 100755
--- a/tests/sed.test
+++ b/tests/sed.test
@@ -131,10 +131,13 @@ hello'" "merp\nhello\n" "" "merp"
testing "" "sed -e '/x/c\' -e 'y'" 'y\n' '' 'x\n'
testing "" "sed -e 's/a[([]*b/X/'" 'X' '' 'a[(b'
+testing "" "sed 'y/a\\bc/de\f/'" "db\f" "" "abc"
+testing "sed [a-a] (for perl)" "sed '"'s/\([^a-zA-Z0-9.:_\-\/]\)/\\\1/g'"'" \
+ 'he\ llo' "" "he llo"
# You have to match the first line of a range in order to activate
# the range, numeric and ascii work the same way
-testing "skip start of range" "sed -e n -e '1,2s/b/c/'" "a\nb\n" "" "a\nb\n"
+testing "sed skip start of range" "sed -e n -e '1,2s/b/c/'" "a\nb\n" "" "a\nb\n"
#echo meep | sed/sed -e '1a\' -e 'huh'
#echo blah | sed/sed -f <(echo -e "1a\\\\\nboom")
@@ -146,4 +149,5 @@ testing "sed bonus backslashes" \
"hello\nl x\nab\nc\n" "" "hello\n"
# -i with $ last line test
+
exit $FAILCOUNT
diff --git a/tests/xxd.test b/tests/xxd.test
index e036865a..68f52be4 100644
--- a/tests/xxd.test
+++ b/tests/xxd.test
@@ -25,4 +25,23 @@ testing "xxd -c 8 -g 4 file1" "xxd -c 8 -g 4 file1" \
testing "xxd -c 8 -g 3 file1" "xxd -c 8 -g 3 file1" \
"00000000: 746869 732069 7320 this is \n00000008: 736f6d 652074 6578 some tex\n00000010: 740a t.\n" "" ""
+testing "xxd -p" "xxd -p file1" "7468697320697320736f6d6520746578740a\n" "" ""
+
+testing "xxd -r" "xxd file1 | xxd -r" "this is some text\n" "" ""
+testing "xxd -r -p" "xxd -p file1 | xxd -r -p" "this is some text\n" "" ""
+
+testing "xxd -r garbage" "echo '0000: 68 65 6c6c 6fxxxx' | xxd -r -" "hello" "" ""
+
+# -r will only read -c bytes (default 16) before skipping to the next line,
+# ignoring the rest.
+testing "xxd -r long" \
+ "echo '0000: 40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -" \
+ "@@@@@@@@@@@@@@@@" "" ""
+
+# -r -p ignores the usual -p 30-byte/line limit (or any limit set by -c) and
+# will take as many bytes as you give it.
+testing "xxd -r -p long" \
+ "echo '40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -p -" \
+ "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" "" ""
+
rm file1 file2
diff --git a/toys/android/getprop.c b/toys/android/getprop.c
index 4afac1a5..efb1e447 100644
--- a/toys/android/getprop.c
+++ b/toys/android/getprop.c
@@ -2,7 +2,7 @@
*
* Copyright 2015 The Android Open Source Project
-USE_GETPROP(NEWTOY(getprop, ">2", TOYFLAG_USR|TOYFLAG_SBIN))
+USE_GETPROP(NEWTOY(getprop, ">2Z", TOYFLAG_USR|TOYFLAG_SBIN))
config GETPROP
bool "getprop"
@@ -21,32 +21,85 @@ config GETPROP
#include <cutils/properties.h>
+#include <selinux/android.h>
+#include <selinux/label.h>
+#include <selinux/selinux.h>
+
GLOBALS(
size_t size;
char **nv; // name/value pairs: even=name, odd=value
+ struct selabel_handle *handle;
)
+static char *get_property_context(char *property)
+{
+ char *context = NULL;
+
+ if (selabel_lookup(TT.handle, &context, property, 1)) {
+ perror_exit("unable to lookup label for \"%s\"", property);
+ }
+ return context;
+}
+
static void add_property(char *name, char *value, void *unused)
{
if (!(TT.size&31)) TT.nv = xrealloc(TT.nv, (TT.size+32)*2*sizeof(char *));
TT.nv[2*TT.size] = xstrdup(name);
- TT.nv[1+2*TT.size++] = xstrdup(value);
+ if (toys.optflags & FLAG_Z) {
+ TT.nv[1+2*TT.size++] = get_property_context(name);
+ } else {
+ TT.nv[1+2*TT.size++] = xstrdup(value);
+ }
+}
+
+// Needed to supress extraneous "Loaded property_contexts from" message
+int selinux_log_callback(int type, const char *fmt, ...) {
+ va_list ap;
+
+ if (type == SELINUX_INFO) return 0;
+ va_start(ap, fmt);
+ verror_msg(fmt, 0, ap);
+ va_end(ap);
+ return 0;
}
void getprop_main(void)
{
+ if (toys.optflags & FLAG_Z) {
+ union selinux_callback cb;
+
+ cb.func_log = selinux_log_callback;
+ selinux_set_callback(SELINUX_CB_LOG, cb);
+ TT.handle = selinux_android_prop_context_handle();
+ if (!TT.handle) error_exit("unable to get selinux property context handle");
+ }
+
if (*toys.optargs) {
- property_get(*toys.optargs, toybuf, toys.optargs[1] ? toys.optargs[1] : "");
- puts(toybuf);
+ if (toys.optflags & FLAG_Z) {
+ char *context = get_property_context(*toys.optargs);
+
+ puts(context);
+ if (CFG_TOYBOX_FREE) free(context);
+ } else {
+ property_get(*toys.optargs, toybuf, toys.optargs[1] ? toys.optargs[1] : "");
+ puts(toybuf);
+ }
} else {
size_t i;
if (property_list((void *)add_property, 0)) error_exit("property_list");
qsort(TT.nv, TT.size, 2*sizeof(char *), qstrcmp);
for (i = 0; i<TT.size; i++) printf("[%s]: [%s]\n", TT.nv[i*2],TT.nv[1+i*2]);
- if (CFG_TOYBOX_FREE) free(TT.nv);
+ if (CFG_TOYBOX_FREE) {
+ for (i = 0; i<TT.size; i++) {
+ free(TT.nv[i*2]);
+ free(TT.nv[1+i*2]);
+ }
+ free(TT.nv);
+ }
}
+ if (CFG_TOYBOX_FREE && (toys.optflags & FLAG_Z)) selabel_close(TT.handle);
}
#else
diff --git a/toys/lsb/mount.c b/toys/lsb/mount.c
index 789d9a53..9510c5e9 100644
--- a/toys/lsb/mount.c
+++ b/toys/lsb/mount.c
@@ -91,6 +91,7 @@ static long flag_opts(char *new, long flags, char **more)
{"noexec", MS_NOEXEC}, {"exec", ~MS_NOEXEC},
{"sync", MS_SYNCHRONOUS}, {"async", ~MS_SYNCHRONOUS},
{"noatime", MS_NOATIME}, {"atime", ~MS_NOATIME},
+ {"norelatime", ~MS_RELATIME}, {"relatime", MS_RELATIME},
{"nodiratime", MS_NODIRATIME}, {"diratime", ~MS_NODIRATIME},
{"loud", ~MS_SILENT},
{"shared", MS_SHARED}, {"rshared", MS_SHARED|MS_REC},
@@ -195,16 +196,21 @@ static void mount_filesystem(char *dev, char *dir, char *type,
printf("try '%s' type '%s' on '%s'\n", dev, type, dir);
for (;;) {
rc = mount(dev, dir, type, flags, opts);
- if ((rc != EACCES && rc != EROFS) || (flags & MS_RDONLY)) break;
- if (rc == EROFS && fd == -1) {
+ // Did we succeed, fail unrecoverably, or already try read-only?
+ if (rc == 0 || (errno != EACCES && errno != EROFS) || (flags&MS_RDONLY))
+ break;
+ // If we haven't already tried it, use the BLKROSET ioctl to ensure
+ // that the underlying device isn't read-only.
+ if (fd == -1) {
+ if (toys.optflags & FLAG_v)
+ printf("trying BLKROSET ioctl on '%s'\n", dev);
if (-1 != (fd = open(dev, O_RDONLY))) {
- ioctl(fd, BLKROSET, &ro);
+ rc = ioctl(fd, BLKROSET, &ro);
close(fd);
-
- continue;
+ if (rc == 0) continue;
}
}
- fprintf(stderr, "'%s' is read-only", dev);
+ fprintf(stderr, "'%s' is read-only\n", dev);
flags |= MS_RDONLY;
}
diff --git a/toys/other/hexedit.c b/toys/other/hexedit.c
index ff13b513..a9d97aa2 100644
--- a/toys/other/hexedit.c
+++ b/toys/other/hexedit.c
@@ -12,9 +12,16 @@ config HEXEDIT
help
usage: hexedit FILENAME
- Hexadecimal file editor.
+ Hexadecimal file editor. All changes are written to disk immediately.
-r Read only (display but don't edit)
+
+ Keys:
+ Arrows Move left/right/up/down by one line/column
+ Pg Up/Pg Dn Move up/down by one page
+ 0-9, a-f Change current half-byte to hexadecimal value
+ u Undo
+ q/^c/^d/<esc> Quit
*/
#define FOR_hexedit
diff --git a/toys/other/insmod.c b/toys/other/insmod.c
index cb222a54..098d2cfa 100644
--- a/toys/other/insmod.c
+++ b/toys/other/insmod.c
@@ -16,31 +16,35 @@ config INSMOD
#include "toys.h"
#include <sys/syscall.h>
-#define init_module(mod, len, opts) syscall(__NR_init_module, mod, len, opts)
+#define finit_module(fd, opts, flags) syscall(SYS_finit_module, fd, opts, flags)
+#define init_module(mod, len, opts) syscall(SYS_init_module, mod, len, opts)
void insmod_main(void)
{
- char * buf = NULL;
- int len, res, i;
- int fd = xopen(*toys.optargs, O_RDONLY);
-
- len = fdlength(fd);
- buf = xmalloc(len);
- xreadall(fd, buf, len);
+ int fd = !strcmp(*toys.optargs, "-") ? 0 : xopen(*toys.optargs, O_RDONLY);
+ int i, rc;
i = 1;
- while(toys.optargs[i] &&
+ while (toys.optargs[i] &&
strlen(toybuf) + strlen(toys.optargs[i]) + 2 < sizeof(toybuf))
{
strcat(toybuf, toys.optargs[i++]);
strcat(toybuf, " ");
}
- res = init_module(buf, len, toybuf);
- if (CFG_TOYBOX_FREE) {
- if (buf != toybuf) free(buf);
- close(fd);
+ // finit_module was new in Linux 3.8, and doesn't work on stdin,
+ // so we fall back to init_module if necessary.
+ rc = finit_module(fd, toybuf, 0);
+ if (rc && (fd == 0 || errno == ENOSYS)) {
+ off_t len = 0;
+ char *path = !strcmp(*toys.optargs, "-") ? "/dev/stdin" : *toys.optargs;
+ char *buf = readfileat(AT_FDCWD, path, NULL, &len);
+
+ rc = init_module(buf, len, toybuf);
+ if (CFG_TOYBOX_FREE) free(buf);
}
- if (res) perror_exit("failed to load %s", toys.optargs[0]);
+ if (rc) perror_exit("failed to load %s", toys.optargs[0]);
+
+ if (CFG_TOYBOX_FREE) close(fd);
}
diff --git a/toys/other/xxd.c b/toys/other/xxd.c
index e9ad8393..0250f8f8 100644
--- a/toys/other/xxd.c
+++ b/toys/other/xxd.c
@@ -8,13 +8,13 @@
* TODO: support for reversing a hexdump back into the original data.
* TODO: -s seek
-USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2", TOYFLAG_USR|TOYFLAG_BIN))
+USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2pr", TOYFLAG_USR|TOYFLAG_BIN))
config XXD
bool "xxd"
default y
help
- usage: xxd [-c n] [-g n] [-l n] [file]
+ usage: xxd [-c n] [-g n] [-l n] [-p] [-r] [file]
Hexdump a file to stdout. If no file is listed, copy from stdin.
Filename "-" is a synonym for stdin.
@@ -22,6 +22,8 @@ config XXD
-c n Show n bytes per line (default 16).
-g n Group bytes by adding a ' ' every n bytes (default 2).
-l n Limit of n bytes before stopping (default is no limit).
+ -p Plain hexdump (30 bytes/line, no grouping).
+ -r Reverse operation: turn a hexdump into a binary file.
*/
#define FOR_xxd
@@ -39,7 +41,7 @@ static void do_xxd(int fd, char *name)
int i, len, space;
while (0<(len = readall(fd, toybuf, (TT.l && TT.l-pos<TT.c)?TT.l-pos:TT.c))) {
- printf("%08llx: ", pos);
+ if (!(toys.optflags&FLAG_p)) printf("%08llx: ", pos);
pos += len;
space = 2*TT.c+TT.c/TT.g+1;
@@ -51,15 +53,79 @@ static void do_xxd(int fd, char *name)
}
}
- printf("%*s", space, "");
- for (i=0; i<len; i++)
- putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+ if (!(toys.optflags&FLAG_p)) {
+ printf("%*s", space, "");
+ for (i=0; i<len; i++)
+ putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+ }
putchar('\n');
}
if (len<0) perror_exit("read");
}
+static int dehex(char ch)
+{
+ if (ch >= '0' && ch <= '9') return ch - '0';
+ if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
+ if (ch >= 'A' && ch <= 'F') return ch - 'a' + 10;
+ return (ch == '\n') ? -2 : -1;
+}
+
+static void do_xxd_reverse(int fd, char *name)
+{
+ FILE *fp = xfdopen(fd, "r");
+
+ while (!feof(fp)) {
+ int col = 0;
+ int tmp;
+
+ // Each line of a non-plain hexdump starts with an offset/address.
+ if (!(toys.optflags&FLAG_p)) {
+ long long pos;
+
+ if (fscanf(fp, "%llx: ", &pos) == 1) {
+ if (fseek(stdout, pos, SEEK_SET) != 0) {
+ // TODO: just write out zeros if non-seekable?
+ perror_exit("%s: seek failed", name);
+ }
+ }
+ }
+
+ // A plain hexdump can have as many bytes per line as you like,
+ // but a non-plain hexdump assumes garbage after it's seen the
+ // specified number of bytes.
+ while (toys.optflags&FLAG_p || col < TT.c) {
+ int n1, n2;
+
+ // If we're at EOF or EOL or we read some non-hex...
+ if ((n1 = n2 = dehex(fgetc(fp))) < 0 || (n2 = dehex(fgetc(fp))) < 0) {
+ // If we're at EOL, start on that line.
+ if (n1 == -2 || n2 == -2) continue;
+ // Otherwise, skip to the next line.
+ break;
+ }
+
+ fputc((n1 << 4) | (n2 & 0xf), stdout);
+ col++;
+
+ // Is there any grouping going on? Ignore a single space.
+ tmp = fgetc(fp);
+ if (tmp != ' ') ungetc(tmp, fp);
+ }
+
+ // Skip anything else on this line (such as the ASCII dump).
+ while ((tmp = fgetc(fp)) != EOF && tmp != '\n')
+ ;
+ }
+ if (ferror(fp)) perror_msg_raw(name);
+
+ fclose(fp);
+}
+
void xxd_main(void)
{
- loopfiles(toys.optargs, do_xxd);
+ // Plain style is 30 bytes/line, no grouping.
+ if (toys.optflags&FLAG_p) TT.c = TT.g = 30;
+
+ loopfiles(toys.optargs, toys.optflags&FLAG_r ? do_xxd_reverse : do_xxd);
}
diff --git a/toys/pending/file.c b/toys/pending/file.c
new file mode 100644
index 00000000..1d09471d
--- /dev/null
+++ b/toys/pending/file.c
@@ -0,0 +1,232 @@
+/* file.c - describe file type
+ *
+ * Copyright 2016 The Android Open Source Project
+ *
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/file.html
+ *
+ * TODO: ar
+
+USE_FILE(NEWTOY(file, "<1", TOYFLAG_USR|TOYFLAG_BIN))
+
+config FILE
+ bool "file"
+ default n
+ help
+ usage: file [file...]
+
+ Examine the given files and describe their content types.
+*/
+
+#define FOR_file
+#include "toys.h"
+
+GLOBALS(
+ int max_name_len;
+)
+
+// We don't trust elf.h to be there, and two codepaths for 32/64 is awkward
+// anyway, so calculate struct offsets manually. (It's a fixed ABI.)
+static void do_elf_file(int fd)
+{
+ int endian = toybuf[5], bits = toybuf[4], i, j;
+ int64_t (*elf_int)(void *ptr, unsigned size) = peek_le;
+ // Values from include/linux/elf-em.h (plus arch/*/include/asm/elf.h)
+ // Names are linux/arch/ directory name
+ struct {int val; char *name;} type[] = {{0x9026, "alpha"},
+ {40, "arm"}, {183, "arm"}, {0x18ad, "avr32"}, {106, "blackfin"},
+ {76, "cris"}, {0x5441, "frv"}, {46, "h8300"}, {50, "ia64"},//ia intel ftaghn
+ {88, "m32r"}, {4, "m68k"}, {0xbaab, "microblaze"}, {8, "mips"},
+ {10, "mips"}, {89, "mn10300"}, {15, "parisc"}, {22, "s390"},
+ {135, "score"}, {42, "sh"}, {2, "sparc"}, {18, "sparc"}, {43, "sparc"},
+ {187, "tile"}, {188, "tile"}, {191, "tile"}, {3, "x86"}, {6, "x86"},
+ {62, "x86"}, {94, "xtensa"}, {0xabc7, "xtensa"}};
+
+ xprintf("ELF ");
+
+ // "64-bit"
+ if (bits == 1) xprintf("32-bit ");
+ else if (bits == 2) xprintf("64-bit ");
+ else {
+ xprintf("(bad class %d) ", bits);
+ bits = 0;
+ }
+
+ // e_machine, ala "x86", from big table above
+ j = elf_int(toybuf+18, 2);
+ for (i = 0; i<ARRAY_LEN(type); i++) if (j==type[i].val) break;
+ if (i<ARRAY_LEN(type)) xprintf("%s ", type[i].name);
+ else xprintf("(unknown arch %d) ", j);
+
+ // "LSB"
+ if (endian == 1) xprintf("LSB ");
+ else if (endian == 2) {
+ xprintf("MSB ");
+ elf_int = peek_be;
+ } else {
+ xprintf("(bad endian %d)\n", endian);
+ endian = 0;
+ }
+
+ // ", executable"
+ i = elf_int(toybuf+16, 2);
+ if (i == 1) xprintf("relocatable");
+ else if (i == 2) xprintf("executable");
+ else if (i == 3) xprintf("shared object");
+ else if (i == 4) xprintf("core dump");
+ else xprintf("(bad type %d)", i);
+
+ bits--;
+ // If we know our bits and endianness and phentsize agrees show dynamic linker
+ if ((bits&1)==bits && endian &&
+ (i = elf_int(toybuf+42+12*bits, 2)) == 32+24*bits)
+ {
+ char *map, *phdr;
+ int phsize = i, phnum = elf_int(toybuf+44+12*bits, 2),
+ psz = sysconf(_SC_PAGE_SIZE), lib = 0;
+ off_t phoff = elf_int(toybuf+28+4*bits, 4+4*bits),
+ mapoff = phoff^(phoff&(psz-1));
+
+ // map e_phentsize*e_phnum bytes at e_phoff
+ map = mmap(0, phsize*phnum, PROT_READ, MAP_SHARED, fd, mapoff);
+ if (map) {
+ // Find PT_INTERP entry. (Not: fields got reordered for 64 bit)
+ for (i = 0; i<phnum; i++) {
+ long long dlpos, dllen;
+
+ // skip non-PT_INTERP entries
+ j = elf_int(phdr = map+(phoff-mapoff)+i*phsize, 4);
+ if (j==2) lib++;
+ if (j!=3) continue;
+
+ // Read p_offset and p_filesz
+ j = bits+1;
+ dlpos = elf_int(phdr+4*j, 4*j);
+ dllen = elf_int(phdr+16*j, 4*j);
+ if (dllen<0 || dllen>sizeof(toybuf)-128
+ || dlpos!=lseek(fd, dlpos, SEEK_SET)
+ || dllen!=readall(fd, toybuf+128, dllen)) break;
+ printf(", dynamic (%.*s)", (int)dllen, toybuf+128);
+ }
+ if (!lib) printf(", static");
+ else printf(", needs %d lib%s", lib, lib>1 ? "s" : "");
+ munmap(map, phsize*phnum);
+ }
+ }
+
+ // TODO: we'd need to actually parse the ELF file to report the rest...
+ // ", dynamically linked"
+ // " (uses shared libs)"
+ // ", for Linux 2.6.24"
+ // ", BuildID[sha1]=SHA"
+ // ", stripped"
+ xputc('\n');
+}
+
+static void do_regular_file(int fd, char *name)
+{
+ char *s;
+ int len = read(fd, s = toybuf, sizeof(toybuf)-256);
+
+ if (len<0) perror_msg("%s", name);
+
+ if (len>40 && strstart(&s, "\177ELF")) do_elf_file(fd);
+ else if (len>28 && strstart(&s, "\x89PNG\x0d\x0a\x1a\x0a")) {
+ // PNG is big-endian: https://www.w3.org/TR/PNG/#7Integers-and-byte-order
+ int chunk_length = peek_be(s, 4);
+
+ xprintf("PNG image data");
+
+ // The IHDR chunk comes first: https://www.w3.org/TR/PNG/#11IHDR
+ s += 4;
+ if (chunk_length == 13 && strstart(&s, "IHDR")) {
+ // https://www.w3.org/TR/PNG/#6Colour-values
+ char *c = 0, *colors[] = {"grayscale", 0, "color RGB", "indexed color",
+ "grayscale with alpha", 0, "color RGBA"};
+
+ if (s[9]<ARRAY_LEN(colors)) c = colors[s[9]];
+ if (!c) c = "unknown";
+
+ xprintf(", %d x %d, %d-bit/%s, %sinterlaced", (int)peek_be(s, 4),
+ (int)peek_be(s+4, 4), s[8], c, s[12] ? "" : "non-");
+ }
+
+ xputc('\n');
+
+ // https://www.w3.org/Graphics/GIF/spec-gif89a.txt
+ } else if (len>16 && (strstart(&s, "GIF87a") || strstart(&s, "GIF89a")))
+ xprintf("GIF image data, %d x %d\n",
+ (int)peek_le(s, 2), (int)peek_le(s+8, 2));
+
+ // TODO: parsing JPEG for width/height is harder than GIF or PNG.
+ else if (len>32 && memcmp(toybuf, "\xff\xd8", 2) == 0)
+ xprintf("JPEG image data\n");
+
+ // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html
+ else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe"))
+ xprintf("Java class file, version %d.%d\n",
+ (int)peek_be(s+6, 2), (int)peek_be(s, 2));
+
+ // TODO: cpio archive.
+ // TODO: tar archive.
+ // TODO: zip/jar/apk archive.
+ else {
+ char *what = 0;
+ int i, bytes;
+
+ // If shell script, report which interpreter
+ if (len>3 && strstart(&s, "#!")) {
+ for (what = s; (s-toybuf)<len && !isspace(*s); s++);
+ strcpy(s, " script");
+
+ // Distinguish ASCII text, UTF-8 text, or data
+ } else for (i = 0; i<len; ++i) {
+ if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) {
+ wchar_t wc;
+ if ((bytes = mbrtowc(&wc, s+i, len-i, 0))>0 && wcwidth(wc)>=0) {
+ i += bytes-1;
+ if (!what) what = "UTF-8 text";
+ } else {
+ what = "data";
+ break;
+ }
+ }
+ }
+ xputs(what ? what : "ASCII text");
+ }
+}
+
+static void do_file(int fd, char *name)
+{
+ struct stat sb;
+ char *what = "unknown";
+
+ xprintf("%s: %*s", name, (int)(TT.max_name_len - strlen(name)), "");
+
+ if (!fstat(fd, &sb)) what = "cannot open";
+ if (S_ISREG(sb.st_mode)) {
+ if (sb.st_size == 0) what = "empty";
+ else {
+ do_regular_file(fd, name);
+ return;
+ }
+ } else if (S_ISBLK(sb.st_mode)) what = "block special";
+ else if (S_ISCHR(sb.st_mode)) what = "character special";
+ else if (S_ISDIR(sb.st_mode)) what = "directory";
+ else if (S_ISFIFO(sb.st_mode)) what = "fifo";
+ else if (S_ISSOCK(sb.st_mode)) what = "socket";
+ else if (S_ISLNK(sb.st_mode)) what = "symbolic link";
+ xputs(what);
+}
+
+void file_main(void)
+{
+ char **name;
+
+ for (name = toys.optargs; *name; ++name) {
+ int name_len = strlen(*name);
+
+ if (name_len > TT.max_name_len) TT.max_name_len = name_len;
+ }
+
+ loopfiles(toys.optargs, do_file);
+}
diff --git a/toys/posix/sed.c b/toys/posix/sed.c
index 30d8a154..9b5e6664 100644
--- a/toys/posix/sed.c
+++ b/toys/posix/sed.c
@@ -701,11 +701,11 @@ static void do_sed(int fd, char *name)
// returns processed copy of string (0 if error), *pstr advances to next
// unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
// if regxex, ignore delimiter in [ranges]
-static char *unescape_delimited_string(char **pstr, char *delim, int regex)
+static char *unescape_delimited_string(char **pstr, char *delim)
{
char *to, *from, mode = 0, d;
- to = from = *pstr;
+ from = *pstr;
if (!delim || !*delim) {
if (!(d = *(from++))) return 0;
if (d == '\\') d = *(from++);
@@ -720,9 +720,14 @@ static char *unescape_delimited_string(char **pstr, char *delim, int regex)
// delimiter in regex character range doesn't count
if (!mode && *from == '[') {
mode = '[';
- if (from[1] == ']') *(to++) = *(from++);
+ if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
} else if (mode && *from == ']') mode = 0;
- else if (*from == '\\') {
+ // Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
+ // but the perl build does it, so we need to filter it out.
+ else if (mode && *from == '-' && from[-1] == from[1]) {
+ from+=2;
+ continue;
+ } else if (*from == '\\') {
if (!from[1]) return 0;
// Check escaped end delimiter before printf style escapes.
@@ -735,7 +740,7 @@ static char *unescape_delimited_string(char **pstr, char *delim, int regex)
*(to++) = c;
from+=2;
continue;
- } else *(to++) = *(from++);
+ } else if (!mode) *(to++) = *(from++);
}
}
*(to++) = *(from++);
@@ -802,7 +807,7 @@ static void jewel_of_judgement(char **pline, long len)
} else if (*line == '/' || *line == '\\') {
char *s = line;
- if (!(s = unescape_delimited_string(&line, 0, 1))) goto brand;
+ if (!(s = unescape_delimited_string(&line, 0))) goto brand;
if (!*s) corwin->rmatch[i] = 0;
else {
xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
@@ -844,7 +849,7 @@ static void jewel_of_judgement(char **pline, long len)
// get pattern (just record, we parse it later)
corwin->arg2 = reg - (char *)corwin;
- if (!(TT.remember = unescape_delimited_string(&line, &delim, 1)))
+ if (!(TT.remember = unescape_delimited_string(&line, &delim)))
goto brand;
reg += sizeof(regex_t);
@@ -940,13 +945,13 @@ writenow:
char *s, delim = 0;
int len;
- if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
+ if (!(s = unescape_delimited_string(&line, &delim))) goto brand;
corwin->arg1 = reg-(char *)corwin;
len = strlen(s);
reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len);
free(s);
corwin->arg2 = reg-(char *)corwin;
- if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
+ if (!(s = unescape_delimited_string(&line, &delim))) goto brand;
if (len != strlen(s)) goto brand;
reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len);
free(s);
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 62d1f7a0..e7afc813 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -53,8 +53,8 @@ static void do_wc(int fd, char *name)
if (toys.optflags == FLAG_c) {
struct stat st;
- fstat(fd, &st);
- if (S_ISREG(st.st_mode)) {
+ // On Linux, files in /proc often report their size as 0.
+ if (!fstat(fd, &st) && S_ISREG(st.st_mode) && st.st_size > 0) {
lengths[2] = st.st_size;
goto show;
}
diff --git a/www/design.html b/www/design.html
index 4a42753c..050c953f 100755
--- a/www/design.html
+++ b/www/design.html
@@ -294,24 +294,40 @@ feeding the compiler -funsigned-char.</p>
<p>The reason to pick "unsigned" is that way we're 8-bit clean by default.</p>
<p><h3>Error messages and internationalization:</h3></p>
+
<p>Error messages are extremely terse not just to save bytes, but because we
-don't use any sort of _("string") translation infrastructure.</p>
+don't use any sort of _("string") translation infrastructure. (We're not
+translating the command names themselves, so we must expect a minimum amount of
+english knowledge from our users, but let's keep it to a minimum.)</p>
<p>Thus "bad -A '%c'" is
preferable to "Unrecognized address base '%c'", because a non-english speaker
-can see that -A was the problem, and with a ~20 word english vocabulary is
-more likely to know (or guess) "bad" than the longer message.</p>
-
-<p>The help text might someday have translated versions, and strerror()
-messages produced by perror_exit() and friends can be expected to be
-localized by libc. Our error functions also prepend the command name,
-which non-english speakers can presumably recognize already.</p>
-
-<p>An enventual goal is <a href=http://yarchive.net/comp/linux/utf8.html>UTF-8</a> support, although it isn't a priority for the
-first pass of each command. (All commands should at least be 8-bit clean.)</p>
-
-<p>Locale support isn't currently a goal; that's a presentation layer issue,
-X11 or Dalvik's problem.</p>
+can see that -A was the problem (giving back the command line argument they
+supplied). A user with a ~20 word english vocabulary is
+more likely to know (or guess) "bad" than the longer message, and you can
+use "bad" in place of "invalid", "inappropriate", "unrecognized"...
+Similarly when atolx_range() complains about range constraints with
+"4 < 17" or "12 > 5", it's intentional: those don't need to be translated.</p>
+
+<p>The strerror() messages produced by perror_exit() and friends should be
+localized by libc, and our error functions also prepend the command name
+(which non-english speakers can presumably recognize already). Keep the
+explanation in between to a minimum, and where possible feed back the values
+they passed in to identify _what_ we couldn't process.
+If you say perror_exit("setsockopt"), you've identified the action you
+were trying to take, and the perror gives a translated error message (from libc)
+explaining _why_ it couldn't do it, so you probably don't need to add english
+words like "failed" or "couldn't assign".</p>
+
+<p>All commands should be 8-bit clean, with explicit
+<a href=http://yarchive.net/comp/linux/utf8.html>UTF-8</a> support where
+necessary. Assume all input data might be utf8, and at least preserve
+it and pass it through. (For this reason, our build is -funsigned-char on
+all architectures; "char" is unsigned unless you stick "signed" in front
+of it.)</p>
+
+<p>Locale support isn't currently a goal; that's a presentation layer issue
+(I.E. a GUI problem).</p>
<a name="codestyle" />
<h2>Coding style</h2>
@@ -327,6 +343,17 @@ columns. (Indentation of continuation lines is awkward no matter what
you do, sometimes two spaces looks better, sometimes indenting to the
contents of a parentheses looks better.)</p>
+<p>I'm aware this indentation style creeps some people out, so here's
+the sed invocation to convert groups of two leading spaces to tabs:</p>
+<blockquote><pre>
+sed -i ':loop;s/^\( *\) /\1\t/;t loop' filename
+</pre></blockquote>
+
+<p>And here's the sed invocation to convert leading tabs to two spaces each:</p>
+<blockquote><pre>
+sed -i ':loop;s/^\( *\)\t/\1 /;t loop' filename
+</pre></blockquote>
+
<p>There's a space after C flow control statements that look like functions, so
"if (blah)" instead of "if(blah)". (Note that sizeof is actually an
operator, so we don't give it a space for the same reason ++ doesn't get
@@ -336,8 +363,8 @@ to read.) We also put a space around assignment operators (on both sides),
so "int x = 0;".</p>
<p>Blank lines (vertical whitespace) go between thoughts. "We were doing that,
-now we're doing this. (Not a hard and fast rule about _where_ it goes,
-but there should be some.)"</p>
+now we're doing this." (Not a hard and fast rule about _where_ it goes,
+but there should be some for the same reason writing has paragraph breaks.)</p>
<p>Variable declarations go at the start of blocks, with a blank line between
them and other code. Yes, c99 allows you to put them anywhere, but they're