aboutsummaryrefslogtreecommitdiff
path: root/toys/posix/tsort.c
blob: e21196b70609b438aea173443b61d1b5927e0e69 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/* tsort.c - topological sort dependency resolver
 *
 * Copyright 2023 Rob Landley <rob@landley.net>
 *
 * See http://opengroup.org/onlinepubs/9699919799/utilities/tsort.html

USE_TSORT(NEWTOY(tsort, ">1", TOYFLAG_USR|TOYFLAG_BIN))

config TSORT
  bool "tsort"
  default y
  help
    usage: tsort [FILE]

    Topological sort dependency resolver.

    Read pairs of input strings indicating before/after dependency relationships
    and find an ordering that respects all dependencies. On success output each
    string once to stdout, on failure print error and output cycle pairs.
*/

#include "toys.h"

// Comparison callback for qsort and bsearch: sort by second element
static int sbse(char **a, char **b)
{
  return strcmp(a[1], b[1]);
}

// Read pairs of "A must go before B" input strings into pair list. Sort pair
// list by second element. Loop over list to find pairs where the first string
// is not any pair's second string (I.E. nothing depends on this) and remove
// each pair from list. For each removed pair, add first string to output
// list, and also add second string to output if after removing it no other
// pair has it as the second string. Suppress duplicates by checking each
// string added to output against the strings added in the last 2 passes
// through the pair list. (Because "a b c a" removes "c a" pair after checking
// "a b" pair, so removing "a b" next pass would try to output "a" again.)
// If a pass through the pair list finds no pairs to remove, what's left is
// all circular dependencies.

// TODO: this treats NUL in input as EOF
static void do_tsort(int fd, char *name)
{
  off_t plen = 0;
  char *ss, **pair, *keep[2];
  long count,    // remaining unprocessed pairs
       len,      // total strings in pair list
       out,      // most recently added output (counts down from len)
       otop,     // out at start of this loop over pair[]
       otop2,    // out at start of previous loop over pair[]
       ii, jj, kk;
  unsigned long sigh;

  // bsearch()'s first argument is the element to search for,
  // and sbse() compares the second element of each pair, so to find
  // which FIRST element matches a second element, we need to feed bsearch
  // keep-1 but the compiler clutches its pearls about this even when
  // typecast to (void *), so do the usual LP64 trick to MAKE IT SHUT UP.
  // (The search function adds 1 to each argument so we never access
  // memory outside the pair.)
  sigh = ((unsigned long)keep)-sizeof(*keep);

  // Count input entries in data block read from fd
  if (!(ss = readfd(fd, 0, &plen))) return;
  for (ii = len = 0; ii<plen; len++) {
    while (isspace(ss[ii])) ii++;
    if (ii==plen) break;
    while (ii<plen && !isspace(ss[ii])) ii++;
  }
  if (len&1) error_exit("bad input (not pairs)");

  // get dependency pair list, null terminate strings, mark depends-on-self
  pair = xmalloc(len*sizeof(char *));
  for (ii = len = 0;;) {
    while (isspace(ss[ii])) ii++;
    if (ii>=plen) break;
    pair[len] = ss+ii;
    while (ii<plen && !isspace(ss[ii])) ii++;
    if (ii<plen) ss[ii++] = 0;
    if ((len&1) && !strcmp(pair[len], pair[len-1])) pair[len] = pair[len-1];
    len++;
  }

  // sort pair list by 2nd element to binary search "does anyone depend on this"
  count = (out = otop = otop2 = len)/2;
  qsort(pair, count, sizeof(keep), (void *)sbse);

  // repeat until pair list empty or nothing added to output list.
  while (count) {
    // find/remove/process pairs no other pair depends on
    for (ii = 0; ii<count; ii++) {
      // Find pair that depends on itself or no other pair depends on first str
      memcpy(keep, pair+2*ii, sizeof(keep));
      // The compiler won't shut up about keep-1 no matter how we typecast it.
      if (keep[0]!=keep[1] && bsearch((void *)sigh, pair, count, sizeof(keep),
          (void *)sbse)) continue;

      // Remove from pair list
      memmove(pair+2*ii, pair+2*(ii+1), (--count-ii)*sizeof(keep));
      ii--;

      // Drop depends-on-self pairs that any other pair depends on
      if (keep[0]==keep[1] &&
          bsearch(keep, pair, count, sizeof(keep),(void *)sbse)) continue;

      // Process removed pair: add unique strings to output list in reverse
      // order. Output is stored in space at end of list freed up by memmove(),
      // defers output until we know there are no cycles, and zaps duplicates.
      for (kk = 0;; kk++) {
        // duplicate killer checks through previous TWO passes, because
        // "a b c a" removes "c a" pair after checking "a b" pair, so removing
        // "a b" next pass tries to output "a" again.

        for (jj = out; jj<otop2; jj++) if (!strcmp(keep[kk], pair[jj])) break;
        if (jj==otop2) pair[--out] = keep[kk];

        // Print second string too if no other pair depends on it
        if (kk || bsearch(keep, pair, count, sizeof(keep), (void *)sbse)) break;
      }
    }
    if (out == otop) break;
    otop2 = otop;
    otop = out;
  }

  // If we couldn't empty the list there's a cycle
  if (count) {
    error_msg("cycle pairs");
    while (count--) xprintf("%s %s\n", pair[count*2], pair[count*2+1]);
  } else while (len>out) xprintf("%s\n", pair[--len]);
}

void tsort_main(void)
{
  loopfiles(toys.optargs, do_tsort);
}