summaryrefslogtreecommitdiff
path: root/MagickWand/script-token.c
blob: 4397cbba6c6593a6878928e938362791360f34fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%    SSS    CCC  RRRR   III  PPPP  TTTTT    TTTTT  OOO   K  K  EEEE  N   N    %
%   S      C     R   R   I   P   P   T        T   O   O  K K   E     NN  N    %
%    SSS   C     RRRR    I   PPPP    T        T   O   O  KK    EEE   N N N    %
%       S  C     R R     I   P       T        T   O   O  K K   E     N  NN    %
%   SSSS    CCC  R  RR  III  P       T        T    OOO   K  K  EEEE  N   N    %
%                                                                             %
%                    Tokenize Magick Script into Options                      %
%                                                                             %
%                             Dragon Computing                                %
%                             Anthony Thyssen                                 %
%                               January 2012                                  %
%                                                                             %
%                                                                             %
%  Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization      %
%  dedicated to making software imaging solutions freely available.           %
%                                                                             %
%  You may not use this file except in compliance with the License.  You may  %
%  obtain a copy of the License at                                            %
%                                                                             %
%    https://imagemagick.org/script/license.php                               %
%                                                                             %
%  Unless required by applicable law or agreed to in writing, software        %
%  distributed under the License is distributed on an "AS IS" BASIS,          %
%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
%  See the License for the specific language governing permissions and        %
%  limitations under the License.                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Read a stream of characters and return tokens one at a time.
%
%  The input stream is divided into individual 'tokens' (representing 'words'
%  or 'options'), in a way that is as close to a UNIX shell, as is feasable.
%  Only shell variable, and command substitutions will not be performed.
%  Tokens can be any length.
%
%  The main function call is GetScriptToken() (see below) whcih returns one
%  and only one token at a time.  The other functions provide support to this
%  function, opening scripts, and seting up the required structures.
%
%  More specifically...
%
%  Tokens are white space separated, and may be quoted, or even partially
%  quoted by either single or double quotes, or the use of backslashes,
%  or any mix of the three.
%
%  For example:    This\ is' a 'single" token"
%
%  A token is returned immediatally the end of token is found. That is as soon
%  as a unquoted white-space or EOF condition has been found.  That is to say
%  the file stream is parsed purely character-by-character, regardless any
%  buffering constraints set by the system.  It is not parsed line-by-line.
%
%  The function will return 'MagickTrue' if a valid token was found, while
%  the token status will be set accordingally to 'OK' or 'EOF', according to
%  the cause of the end of token.  The token may be an empty string if the
%  input was a quoted empty string.  Other error conditions return a value of
%  MagickFalse, indicating any token found but was incomplete due to some
%  error condition.
%
%  Single quotes will preserve all characters including backslashes. Double
%  quotes will also preserve backslashes unless escaping a double quote,
%  or another backslashes.  Other shell meta-characters are not treated as
%  special by this tokenizer.
%
%  For example Quoting the quote chars:
%              \'  "'"       \"  '"'  "\""      \\  '\'  "\\"
%
%  Outside quotes, backslash characters will make spaces, tabs and quotes part
%  of a token returned. However a backslash at the end of a line (and outside
%  quotes) will cause the newline to be completely ignored (as per the shell
%  line continuation).
%
%  Comments start with a '#' character at the start of a new token, will be
%  completely ignored upto the end of line, regardless of any backslash at the
%  end of the line.  You can escape a comment '#', using quotes or backlsashes
%  just as you can in a shell.
%
%  The parser will accept both newlines, returns, or return-newlines to mark
%  the EOL. Though this is technically breaking (or perhaps adding to) the
%  'BASH' syntax that is being followed.
%
%
%  UNIX script Launcher...
%
%  The use of '#' comments allow normal UNIX 'scripting' to be used to call on
%  the "magick" command to parse the tokens from a file
%
%    #!/path/to/command/magick -script
%
%
%  UNIX 'env' command launcher...
%
%  If "magick" is renamed "magick-script" you can use a 'env' UNIX launcher
%
%    #!/usr/bin/env magick-script
%
%
%  Shell script launcher...
%
%  As a special case a ':' at the start of a line is also treated as a comment
%  This allows a magick script to ignore a line that can be parsed by the shell
%  and not by the magick script (tokenizer).  This allows for an alternative
%  script 'launcher' to be used for magick scripts.
%
%    #!/bin/sh
%    :; exec magick -script "$0" "$@"; exit 10
%    #
%    # The rest of the file is magick script
%    -read label:"This is a Magick Script!"
%    -write show: -exit
%
% Or with some shell pre/post processing...
%
%    #!/bin/sh
%    :; echo "This part is run in the shell, but ignored by Magick"
%    :; magick -script "$0" "$@"
%    :; echo "This is run after the "magick" script is finished!"
%    :; exit 10
%    #
%    # The rest of the file is magick script
%    -read label:"This is a Magick Script!"
%    -write show: -exit
%
%
%  DOS script launcher...
%
%  Similarly any '@' at the start of the line (outside of quotes) will also be
%  treated as comment. This allow you to create a DOS script launcher, to
%  allow a ".bat" DOS scripts to run as "magick" scripts instead.
%
%    @echo This line is DOS executed but ignored by Magick
%    @magick -script %~dpnx0 %*
%    @echo This line is processed after the Magick script is finished
%    @GOTO :EOF
%    #
%    # The rest of the file is magick script
%    -read label:"This is a Magick Script!"
%    -write show: -exit
%
% But this can also be used as a shell script launcher as well!
% Though is more restrictive and less free-form than using ':'.
%
%    #!/bin/sh
%    @() { exec magick -script "$@"; }
%    @ "$0" "$@"; exit
%    #
%    # The rest of the file is magick script
%    -read label:"This is a Magick Script!"
%    -write show: -exit
%
% Or even like this...
%
%    #!/bin/sh
%    @() { }
%    @; exec magick -script "$0" "$@"; exit
%    #
%    # The rest of the file is magick script
%    -read label:"This is a Magick Script!"
%    -write show: -exit
%
*/

/*
  Include declarations.

  NOTE: Do not include if being compiled into the "test/script-token-test.c"
  module, for low level token testing.
*/
#ifndef SCRIPT_TOKEN_TESTING
#  include "MagickWand/studio.h"
#  include "MagickWand/MagickWand.h"
#  include "MagickWand/script-token.h"
#  include "MagickCore/string-private.h"
#  include "MagickCore/utility-private.h"
#endif

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   A c q u i r e S c r i p t T o k e n I n f o                               %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  AcquireScriptTokenInfo() allocated, initializes and opens the given
%  file stream from which tokens are to be extracted.
%
%  The format of the AcquireScriptTokenInfo method is:
%
%     ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
%
%  A description of each parameter follows:
%
%    o filename   the filename to open  ("-" means stdin)
%
*/
WandExport ScriptTokenInfo *AcquireScriptTokenInfo(const char *filename)
{
  ScriptTokenInfo
    *token_info;

  token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
  if (token_info == (ScriptTokenInfo *) NULL)
    return token_info;
  (void) memset(token_info,0,sizeof(*token_info));

  token_info->opened=MagickFalse;
  if ( LocaleCompare(filename,"-") == 0 ) {
    token_info->stream=stdin;
    token_info->opened=MagickFalse;
  }
  else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
    token_info->stream=fdopen(StringToLong(filename+3),"r");
    token_info->opened=MagickFalse;
  }
  else {
    token_info->stream=fopen_utf8(filename, "r");
  }
  if ( token_info->stream == (FILE *) NULL ) {
    token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
    return(token_info);
  }

  token_info->curr_line=1;
  token_info->length=INITAL_TOKEN_LENGTH;
  token_info->token=(char *) AcquireQuantumMemory(1,token_info->length);

  token_info->status=(token_info->token != (char *) NULL)
                      ? TokenStatusOK : TokenStatusMemoryFailed;
  token_info->signature=MagickWandSignature;

  return token_info;
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   D e s t r o y S c r i p t T o k e n I n f o                               %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  DestroyScriptTokenInfo() allocated, initializes and opens the given
%  file stream from which tokens are to be extracted.
%
%  The format of the DestroyScriptTokenInfo method is:
%
%     ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
%
%  A description of each parameter follows:
%
%    o token_info   The ScriptTokenInfo structure to be destroyed
%
*/
WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
{
  assert(token_info != (ScriptTokenInfo *) NULL);
  assert(token_info->signature == MagickWandSignature);

  if ( token_info->opened != MagickFalse )
    fclose(token_info->stream);

  if (token_info->token != (char *) NULL )
    token_info->token=(char *) RelinquishMagickMemory(token_info->token);
  token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
  return(token_info);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   G e t S c r i p t T o k e n                                               %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  GetScriptToken() a fairly general, finite state token parser. That returns
%  tokens one at a time, as soon as posible.
%
%
%  The format of the GetScriptToken method is:
%
%     MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
%
%  A description of each parameter follows:
%
%    o token_info    pointer to a structure holding token details
%
*/
/* States of the parser */
#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_COMMENT 3

/* Macro to read character from stream

   This also keeps track of the line and column counts.
   The EOL is defined as either '\r\n', or '\r', or '\n'.
   A '\r' on its own is converted into a '\n' to correctly handle
   raw input, typically due to 'copy-n-paste' of text files.
   But a '\r\n' sequence is left ASIS for string handling
*/
#define GetChar(c) \
{ \
  c=fgetc(token_info->stream); \
  token_info->curr_column++; \
  if ( c == '\r' ) { \
    c=fgetc(token_info->stream); \
    ungetc(c,token_info->stream); \
    c = (c!='\n')?'\n':'\r'; \
  } \
  if ( c == '\n' ) \
    token_info->curr_line++, token_info->curr_column=0; \
  if (c == EOF ) \
    break; \
  if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
    token_info->status=TokenStatusBinary; \
    break; \
  } \
}
/* macro to collect the token characters */
#define SaveChar(c) \
{ \
  if ((size_t) offset >= (token_info->length-1)) { \
    if ( token_info->length >= MagickPathExtent ) \
      token_info->length += MagickPathExtent; \
    else \
      token_info->length *= 4; \
    token_info->token=(char *) ResizeQuantumMemory(token_info->token, \
      token_info->length,sizeof(*token_info->token)); \
    if ( token_info->token == (char *) NULL ) { \
      token_info->status=TokenStatusMemoryFailed; \
      break; \
    } \
  } \
  token_info->token[offset++]=(char) (c); \
}

WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
{
  int
    quote,
    c;

  int
    state;

  ssize_t
    offset;

  /* EOF - no more tokens! */
  if (token_info == (ScriptTokenInfo *) NULL)
    return(MagickFalse);
  if (token_info->status != TokenStatusOK)
    {
      token_info->token[0]='\0';
      return(MagickFalse);
    }
  state=IN_WHITE;
  quote='\0';
  offset=0;
DisableMSCWarning(4127)
  while(1)
RestoreMSCWarning
  {
    /* get character */
    GetChar(c);

    /* hash comment handling */
    if ( state == IN_COMMENT ) {
      if ( c == '\n' )
        state=IN_WHITE;
      continue;
    }
    /* comment lines start with '#' anywhere, or ':' or '@' at start of line */
    if ( state == IN_WHITE )
      if ( ( c == '#' ) ||
           ( token_info->curr_column==1 && (c == ':' || c == '@' ) ) )
        state=IN_COMMENT;
    /* whitespace token separator character */
    if (strchr(" \n\r\t",c) != (char *) NULL) {
      switch (state) {
        case IN_TOKEN:
          token_info->token[offset]='\0';
          return(MagickTrue);
        case IN_QUOTE:
          SaveChar(c);
          break;
      }
      continue;
    }
    /* quote character */
    if ( c=='\'' || c =='"' ) {
      switch (state) {
        case IN_WHITE:
          token_info->token_line=token_info->curr_line;
          token_info->token_column=token_info->curr_column;
        case IN_TOKEN:
          state=IN_QUOTE;
          quote=c;
          break;
        case IN_QUOTE:
          if (c == quote)
            {
              state=IN_TOKEN;
              quote='\0';
            }
          else
            SaveChar(c);
          break;
      }
      continue;
    }
    /* escape char (preserve in quotes - unless escaping the same quote) */
    if (c == '\\')
      {
        if ( state==IN_QUOTE && quote == '\'' ) {
            SaveChar('\\');
            continue;
          }
        GetChar(c);
        if (c == '\n')
          switch (state) {
            case IN_COMMENT:
              state=IN_WHITE;  /* end comment */
            case IN_QUOTE:
              if (quote != '"')
                break;         /* in double quotes only */
            case IN_WHITE:
            case IN_TOKEN:
              continue;        /* line continuation - remove line feed */
          }
        switch (state) {
          case IN_WHITE:
            token_info->token_line=token_info->curr_line;
            token_info->token_column=token_info->curr_column;
            state=IN_TOKEN;
            break;
          case IN_QUOTE:
            if (c != quote && c != '\\')
              SaveChar('\\');
            break;
        }
        SaveChar(c);
        continue;
      }
    /* ordinary character */
    switch (state) {
      case IN_WHITE:
        token_info->token_line=token_info->curr_line;
        token_info->token_column=token_info->curr_column;
        state=IN_TOKEN;
      case IN_TOKEN:
      case IN_QUOTE:
        SaveChar(c);
        break;
      case IN_COMMENT:
        break;
    }
  }
  /* input stream has EOF or produced a fatal error */
  token_info->token[offset]='\0';
  if ( token_info->status != TokenStatusOK )
    return(MagickFalse);  /* fatal condition - no valid token */
  token_info->status = TokenStatusEOF;
  if ( state == IN_QUOTE)
    token_info->status = TokenStatusBadQuotes;
  if ( state == IN_TOKEN)
    return(MagickTrue);   /* token with EOF at end - no problem */
  return(MagickFalse);    /* in white space or in quotes - invalid token */
}