aboutsummaryrefslogtreecommitdiff
path: root/gcc_plugin/afl-gcc-pass.so.cc
blob: 633dedcb1b1ac58050749acf91522228b721e81c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
//
// There are some TODOs in this file:
//   - fix instrumentation via external call
//   - fix inline instrumentation
//   - implement whitelist feature
//   - dont instrument blocks that are uninteresting
//   - implement neverZero
//

/*
   american fuzzy lop - GCC instrumentation pass
   ---------------------------------------------

   Written by Austin Seipp <aseipp@pobox.com> with bits from
              Emese Revfy <re.emese@gmail.com>

   Fixed by Heiko Eißfeldt 2019 for AFL++

   GCC integration design is based on the LLVM design, which comes
   from Laszlo Szekeres. Some of the boilerplate code below for
   afl_pass to adapt to different GCC versions was taken from Emese
   Revfy's Size Overflow plugin for GCC, licensed under the GPLv2/v3.

   (NOTE: this plugin code is under GPLv3, in order to comply with the
   GCC runtime library exception, which states that you may distribute
   "Target Code" from the compiler under a license of your choice, as
   long as the "Compilation Process" is "Eligible", and contains no
   GPL-incompatible software in GCC "during the process of
   transforming high level code to target code". In this case, the
   plugin will be used to generate "Target Code" during the
   "Compilation Process", and thus it must be GPLv3 to be "eligible".)

   Copyright (C) 2015 Austin Seipp

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */

#define BUILD_INLINE_INST

#include "../config.h"
#include "../include/debug.h"

/* clear helper AFL types pulls in, which intervene with gcc-plugin geaders from
 * GCC-8 */
#ifdef likely
#undef likely
#endif
#ifdef unlikely
#undef unlikely
#endif

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <list>
#include <string>
#include <fstream>

#include <gcc-plugin.h>
#include <plugin-version.h>
#include <diagnostic.h>
#include <tree.h>
#include <tree-ssa.h>
#include <tree-pass.h>
#include <tree-ssa-alias.h>
#include <basic-block.h>
#include <gimple-expr.h>
#include <gimple.h>
#include <gimple-iterator.h>
#include <gimple-ssa.h>
#include <version.h>
#include <toplev.h>
#include <intl.h>
#include <context.h>
#include <stringpool.h>
#include <cgraph.h>
#include <cfgloop.h>

/* -------------------------------------------------------------------------- */
/* -- AFL instrumentation pass ---------------------------------------------- */

static int                    be_quiet = 0;
static unsigned int           inst_ratio = 100;
static bool                   inst_ext = true;
static std::list<std::string> myWhitelist;

static unsigned int ext_call_instrument(function *fun) {

  /* Instrument all the things! */
  basic_block bb;
  unsigned    finst_blocks = 0;
  unsigned    fcnt_blocks = 0;

  tree fntype = build_function_type_list(void_type_node,          /* return */
                                         uint32_type_node,          /* args */
                                         NULL_TREE);                /* done */
  tree fndecl = build_fn_decl("__afl_trace", fntype);
  TREE_STATIC(fndecl) = 1;                             /* Defined elsewhere */
  TREE_PUBLIC(fndecl) = 1;                                        /* Public */
  DECL_EXTERNAL(fndecl) = 1;                            /* External linkage */
  DECL_ARTIFICIAL(fndecl) = 1;                      /* Injected by compiler */

  FOR_EACH_BB_FN(bb, fun) {

    gimple_seq           fcall;
    gimple_seq           seq = NULL;
    gimple_stmt_iterator bentry;
    ++fcnt_blocks;

    // only instrument if this basic block is the destination of a previous
    // basic block that has multiple successors
    // this gets rid of ~5-10% of instrumentations that are unnecessary
    // result: a little more speed and less map pollution

    int           more_than_one = -1;
    edge          ep;
    edge_iterator eip;
    FOR_EACH_EDGE(ep, eip, bb->preds) {

      int count = 0;
      if (more_than_one == -1) more_than_one = 0;

      basic_block   Pred = ep->src;
      edge          es;
      edge_iterator eis;
      FOR_EACH_EDGE(es, eis, Pred->succs) {

        basic_block Succ = es->dest;
        if (Succ != NULL) count++;

      }

      if (count > 1) more_than_one = 1;

    }

    if (more_than_one != 1) continue;

    /* Bail on this block if we trip the specified ratio */
    if (R(100) >= inst_ratio) continue;

    /* Make up cur_loc */
    unsigned int rand_loc = R(MAP_SIZE);
    tree         cur_loc = build_int_cst(uint32_type_node, rand_loc);

    /* Update bitmap via external call */
    /* to quote:
     * /+ Trace a basic block with some ID +/
     * void __afl_trace(u32 x);
     */

    fcall = gimple_build_call(
        fndecl, 1,
        cur_loc); /* generate the function _call_ to above built reference, with
                     *1* parameter -> the random const for the location */
    gimple_seq_add_stmt(&seq, fcall);         /* and insert into a sequence */

    /* Done - grab the entry to the block and insert sequence */
    bentry = gsi_after_labels(bb);
    gsi_insert_seq_before(&bentry, seq, GSI_SAME_STMT);

    ++finst_blocks;

  }

  /* Say something nice. */
  if (!be_quiet) {

    if (!finst_blocks)
      WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST),
            function_name(fun));
    else if (finst_blocks < fcnt_blocks)
      OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST),
          finst_blocks, fcnt_blocks, function_name(fun));
    else
      OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks,
          function_name(fun));

  }

  return 0;

}

static unsigned int inline_instrument(function *fun) {

  /* Instrument all the things! */
  basic_block bb;
  unsigned    finst_blocks = 0;
  unsigned    fcnt_blocks = 0;

  /* Set up global type declarations */
  tree map_type = build_pointer_type(unsigned_char_type_node);
  tree map_ptr_g =
      build_decl(UNKNOWN_LOCATION, VAR_DECL,
                 get_identifier_with_length("__afl_area_ptr", 14), map_type);
  TREE_USED(map_ptr_g) = 1;
  TREE_STATIC(map_ptr_g) = 1;                          /* Defined elsewhere */
  DECL_EXTERNAL(map_ptr_g) = 1;                         /* External linkage */
  DECL_PRESERVE_P(map_ptr_g) = 1;
  DECL_ARTIFICIAL(map_ptr_g) = 1;                   /* Injected by compiler */
  rest_of_decl_compilation(map_ptr_g, 1, 0);

  tree prev_loc_g = build_decl(UNKNOWN_LOCATION, VAR_DECL,
                               get_identifier_with_length("__afl_prev_loc", 14),
                               uint32_type_node);
  TREE_USED(prev_loc_g) = 1;
  TREE_STATIC(prev_loc_g) = 1;                         /* Defined elsewhere */
  DECL_EXTERNAL(prev_loc_g) = 1;                        /* External linkage */
  DECL_PRESERVE_P(prev_loc_g) = 1;
  DECL_ARTIFICIAL(prev_loc_g) = 1;                  /* Injected by compiler */
  rest_of_decl_compilation(prev_loc_g, 1, 0);

  FOR_EACH_BB_FN(bb, fun) {

    gimple_seq           seq = NULL;
    gimple_stmt_iterator bentry;
    ++fcnt_blocks;

    // only instrument if this basic block is the destination of a previous
    // basic block that has multiple successors
    // this gets rid of ~5-10% of instrumentations that are unnecessary
    // result: a little more speed and less map pollution

    int           more_than_one = -1;
    edge          ep;
    edge_iterator eip;
    FOR_EACH_EDGE(ep, eip, bb->preds) {

      int count = 0;
      if (more_than_one == -1) more_than_one = 0;

      basic_block   Pred = ep->src;
      edge          es;
      edge_iterator eis;
      FOR_EACH_EDGE(es, eis, Pred->succs) {

        basic_block Succ = es->dest;
        if (Succ != NULL) count++;

      }

      if (count > 1) more_than_one = 1;

    }

    if (more_than_one != 1) continue;

    /* Bail on this block if we trip the specified ratio */
    if (R(100) >= inst_ratio) continue;

    /* Make up cur_loc */

    unsigned int rand_loc = R(MAP_SIZE);
    tree         cur_loc = build_int_cst(uint32_type_node, rand_loc);

    /* Load prev_loc, xor with cur_loc */
    // gimple_assign <var_decl, prev_loc.0_1, prev_loc, NULL, NULL>
    tree     prev_loc = create_tmp_var_raw(uint32_type_node, "prev_loc");
    gassign *g = gimple_build_assign(prev_loc, VAR_DECL, prev_loc_g);
    gimple_seq_add_stmt(&seq, g);  // load prev_loc
    update_stmt(g);

    // gimple_assign <bit_xor_expr, _2, prev_loc.0_1, 47231, NULL>
    tree area_off = create_tmp_var_raw(uint32_type_node, "area_off");
    g = gimple_build_assign(area_off, BIT_XOR_EXPR, prev_loc, cur_loc);
    gimple_seq_add_stmt(&seq, g);  // area_off = prev_loc ^ cur_loc
    update_stmt(g);

    /* Update bitmap */

    tree one = build_int_cst(unsigned_char_type_node, 1);
    //		tree zero = build_int_cst(unsigned_char_type_node, 0);

    // gimple_assign <addr_expr, p_6, &map[_2], NULL, NULL>
    tree map_ptr = create_tmp_var(map_type, "map_ptr");
    tree map_ptr2 = create_tmp_var(map_type, "map_ptr2");

    g = gimple_build_assign(map_ptr, map_ptr_g);
    gimple_seq_add_stmt(&seq, g);  // map_ptr = __afl_area_ptr
    update_stmt(g);

#if 0
		tree addr = build2(ADDR_EXPR, map_type, map_ptr, area_off);
		g = gimple_build_assign(map_ptr2, MODIFY_EXPR, addr);
		gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off
		update_stmt(g);
#else
    g = gimple_build_assign(map_ptr2, PLUS_EXPR, map_ptr, area_off);
    gimple_seq_add_stmt(&seq, g);  // map_ptr2 = map_ptr + area_off
    update_stmt(g);
#endif
    // gimple_assign <mem_ref, _3, *p_6, NULL, NULL>
    tree tmp1 = create_tmp_var_raw(unsigned_char_type_node, "tmp1");
    g = gimple_build_assign(tmp1, MEM_REF, map_ptr2);
    gimple_seq_add_stmt(&seq, g);  // tmp1 = *map_ptr2
    update_stmt(g);

    // gimple_assign <plus_expr, _4, _3, 1, NULL>
    tree tmp2 = create_tmp_var_raw(unsigned_char_type_node, "tmp2");
    g = gimple_build_assign(tmp2, PLUS_EXPR, tmp1, one);
    gimple_seq_add_stmt(&seq, g);  // tmp2 = tmp1 + 1
    update_stmt(g);

    // TODO: neverZero: here we have to check if tmp3 == 0
    //                  and add 1 if so

    // gimple_assign <ssa_name, *p_6, _4, NULL, NULL>
    //		tree map_ptr3 = create_tmp_var_raw(map_type, "map_ptr3");
    g = gimple_build_assign(map_ptr_g, INDIRECT_REF, tmp2);
    gimple_seq_add_stmt(&seq, g);  // *map_ptr3 = tmp2
    update_stmt(g);

    /* Set prev_loc to cur_loc >> 1 */

    // gimple_assign <integer_cst, prev_loc, 23615, NULL, NULL>
    tree shifted_loc = build_int_cst(TREE_TYPE(prev_loc_g), rand_loc >> 1);
    tree prev_loc2 = create_tmp_var_raw(uint32_type_node, "prev_loc2");
    g = gimple_build_assign(prev_loc2, shifted_loc);
    gimple_seq_add_stmt(&seq, g);  // __afl_prev_loc = cur_loc >> 1
    update_stmt(g);
    g = gimple_build_assign(prev_loc_g, prev_loc2);
    gimple_seq_add_stmt(&seq, g);  // __afl_prev_loc = cur_loc >> 1
    update_stmt(g);

    /* Done - grab the entry to the block and insert sequence */

    bentry = gsi_after_labels(bb);
    gsi_insert_seq_before(&bentry, seq, GSI_NEW_STMT);

    ++finst_blocks;

  }

  /* Say something nice. */
  if (!be_quiet) {

    if (!finst_blocks)
      WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST),
            function_name(fun));
    else if (finst_blocks < fcnt_blocks)
      OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST),
          finst_blocks, fcnt_blocks, function_name(fun));
    else
      OKF(G_("Instrumented   %2u   locations in " cBRI "%s" cRST), finst_blocks,
          function_name(fun));

  }

  return 0;

}

/* -------------------------------------------------------------------------- */
/* -- Boilerplate and initialization ---------------------------------------- */

static const struct pass_data afl_pass_data = {

    .type = GIMPLE_PASS,
    .name = "afl-inst",
    .optinfo_flags = OPTGROUP_NONE,

    .tv_id = TV_NONE,
    .properties_required = 0,
    .properties_provided = 0,
    .properties_destroyed = 0,
    .todo_flags_start = 0,
    // NOTE(aseipp): it's very, very important to include
    // at least 'TODO_update_ssa' here so that GCC will
    // properly update the resulting SSA form, e.g., to
    // include new PHI nodes for newly added symbols or
    // names. Do not remove this. Do not taunt Happy Fun
    // Ball.
    .todo_flags_finish = TODO_update_ssa | TODO_verify_il | TODO_cleanup_cfg,

};

namespace {

class afl_pass : public gimple_opt_pass {

 private:
  bool do_ext_call;

 public:
  afl_pass(bool ext_call, gcc::context *g)
      : gimple_opt_pass(afl_pass_data, g), do_ext_call(ext_call) {

  }

  virtual unsigned int execute(function *fun) {

    if (!myWhitelist.empty()) {

      bool instrumentBlock = false;

      /* EXPR_FILENAME
      This macro returns the name of the file in which the entity was declared,
      as a char*. For an entity declared implicitly by the compiler (like
      __builtin_ memcpy), this will be the string "<internal>".
      */
      const char *fname = DECL_SOURCE_FILE(fun->decl);

      if (0 != strncmp("<internal>", fname, 10) &&
          0 != strncmp("<built-in>", fname, 10)) {

        std::string instFilename(fname);

        /* Continue only if we know where we actually are */
        if (!instFilename.empty()) {

          for (std::list<std::string>::iterator it = myWhitelist.begin();
               it != myWhitelist.end(); ++it) {

            /* We don't check for filename equality here because
             * filenames might actually be full paths. Instead we
             * check that the actual filename ends in the filename
             * specified in the list. */
            if (instFilename.length() >= it->length()) {

              if (instFilename.compare(instFilename.length() - it->length(),
                                       it->length(), *it) == 0) {

                instrumentBlock = true;
                break;

              }

            }

          }

        }

      }

      /* Either we couldn't figure out our location or the location is
       * not whitelisted, so we skip instrumentation. */
      if (!instrumentBlock) return 0;

    }

    return do_ext_call ? ext_call_instrument(fun) : inline_instrument(fun);

  }

};                                                        /* class afl_pass */

}  // namespace

static struct opt_pass *make_afl_pass(bool ext_call, gcc::context *ctxt) {

  return new afl_pass(ext_call, ctxt);

}

/* -------------------------------------------------------------------------- */
/* -- Initialization -------------------------------------------------------- */

int plugin_is_GPL_compatible = 1;

static struct plugin_info afl_plugin_info = {

    .version = "20191015",
    .help = "AFL++ gcc plugin\n",

};

int plugin_init(struct plugin_name_args *  plugin_info,
                struct plugin_gcc_version *version) {

  struct register_pass_info afl_pass_info;
  struct timeval            tv;
  struct timezone           tz;
  u32                       rand_seed;

  /* Setup random() so we get Actually Random(TM) outputs from R() */
  gettimeofday(&tv, &tz);
  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
  srandom(rand_seed);

  /* Pass information */
  afl_pass_info.pass = make_afl_pass(inst_ext, g);
  afl_pass_info.reference_pass_name = "ssa";
  afl_pass_info.ref_pass_instance_number = 1;
  afl_pass_info.pos_op = PASS_POS_INSERT_AFTER;

  if (!plugin_default_version_check(version, &gcc_version)) {

    FATAL(G_("Incompatible gcc/plugin versions!"));

  }

  /* Show a banner */
  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(G_(cCYA "afl-gcc-pass" VERSION cRST
                 " initially by <aseipp@pobox.com>, maintainer: hexcoder-\n"));

  } else

    be_quiet = 1;

  /* Decide instrumentation ratio */
  char *inst_ratio_str = getenv("AFL_INST_RATIO");

  if (inst_ratio_str) {

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
        inst_ratio > 100)
      FATAL(G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)"));
    else {

      if (!be_quiet)
        ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."),
             inst_ext ? G_("Call-based") : G_("Inline"), inst_ratio,
             getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened"));

    }

  }

  char *instWhiteListFilename = getenv("AFL_GCC_WHITELIST");
  if (instWhiteListFilename) {

    std::string   line;
    std::ifstream fileStream;
    fileStream.open(instWhiteListFilename);
    if (!fileStream) fatal_error(0, "Unable to open AFL_GCC_WHITELIST");
    getline(fileStream, line);
    while (fileStream) {

      myWhitelist.push_back(line);
      getline(fileStream, line);

    }

  } else if (!be_quiet && getenv("AFL_LLVM_WHITELIST"))

    SAYF(cYEL "[-] " cRST
              "AFL_LLVM_WHITELIST environment variable detected - did you mean "
              "AFL_GCC_WHITELIST?\n");

  /* Go go gadget */
  register_callback(plugin_info->base_name, PLUGIN_INFO, NULL,
                    &afl_plugin_info);
  register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
                    &afl_pass_info);
  return 0;

}