Source/Doxygen/doxyparser.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377

/* -----------------------------------------------------------------------------
 * This file is part of SWIG, which is licensed as a whole under version 3
 * (or any later version) of the GNU General Public License. Some additional
 * terms also apply to certain portions of SWIG. The full details of the SWIG
 * license and copyrights can be found in the LICENSE and COPYRIGHT files
 * included with the SWIG source code as distributed by the SWIG developers
 * and at https://www.swig.org/legal.html.
 *
 * doxyparser.h
 * ----------------------------------------------------------------------------- */

#ifndef SWIG_DOXYPARSER_H
#define SWIG_DOXYPARSER_H
#include <string>
#include <list>
#include <map>
#include <vector>
#include <set>

#include "swig.h"

#include "doxyentity.h"

// Utility function to return the base part of a command that may
// include options, e.g. param[in] -> param
std::string getBaseCommand(const std::string &cmd);


class DoxygenParser {
private:

  enum DoxyCommandEnum {
    NONE = -1,
    SIMPLECOMMAND,
    COMMANDWORD,
    COMMANDLINE,
    COMMANDPARAGRAPH,
    COMMANDENDCOMMAND,
    COMMANDWORDPARAGRAPH,
    COMMANDWORDLINE,
    COMMANDWORDOWORDWORD,
    COMMANDOWORD,
    COMMANDERRORTHROW,
    COMMANDUNIQUE,
    COMMAND_HTML,
    COMMAND_HTML_ENTITY,
    COMMAND_ALIAS,
    COMMAND_IGNORE,
    END_LINE,
    PARAGRAPH_END,
    PLAINSTRING,
    COMMAND
  };


  /** This class contains parts of Doxygen comment as a token. */
  class Token {
  public:
    DoxyCommandEnum m_tokenType;
    std::string m_tokenString; /* the data , such as param for @param */

    Token(DoxyCommandEnum tType, std::string tString) : m_tokenType(tType), m_tokenString(tString) {
    }
    
    std::string toString() const {
      switch (m_tokenType) {
      case END_LINE:
        return "{END OF LINE}";
      case PARAGRAPH_END:
        return "{END OF PARAGRAPH}";
      case PLAINSTRING:
        return "{PLAINSTRING :" + m_tokenString + "}";
      case COMMAND:
        return "{COMMAND : " + m_tokenString + "}";
      default:
        return "";
      }
    }
  };


  typedef std::vector<Token> TokenList;
  typedef TokenList::const_iterator TokenListCIt;
  typedef TokenList::iterator TokenListIt;

  TokenList m_tokenList;
  TokenListCIt m_tokenListIt;

  typedef std::map<std::string, DoxyCommandEnum> DoxyCommandsMap;
  typedef DoxyCommandsMap::iterator DoxyCommandsMapIt;

  /*
   * Map of Doxygen commands to determine if a string is a
   * command and how it needs to be parsed
   */
  static DoxyCommandsMap doxygenCommands;
  static std::set<std::string> doxygenSectionIndicators;

  bool m_isVerbatimText; // used to handle \htmlonly and \verbatim commands
  bool m_isInQuotedString;

  Node *m_node;
  std::string m_fileName;
  int m_fileLineNo;

  /*
   * Return the end command for a command appearing in "ignore" feature or empty
   * string if this is a simple command and not a block one.
   */
  std::string getIgnoreFeatureEndCommand(const std::string &theCommand) const;

  /*
   * Helper for getting the value of doxygen:ignore feature or its argument.
   */
  String *getIgnoreFeature(const std::string &theCommand, const char *argument = NULL) const;

  /*
   * Whether to print lots of debug info during parsing
   */
  bool noisy;

  /*
   *Changes a std::string to all lower case
   */
  std::string stringToLower(const std::string &stringToConvert);

  /* 
   * isSectionIndicator returns a boolean if the command is a section indicator
   * This is a helper method for finding the end of a paragraph
   * by Doxygen's terms
   */
  bool isSectionIndicator(const std::string &smallString);
  /*
   * Determines how a command should be handled (what group it belongs to
   * for parsing rules
   */
  DoxyCommandEnum commandBelongs(const std::string &theCommand);

  /*
   *prints the parse tree
   */
  void printTree(const std::list<DoxygenEntity> &rootList);

  /**
   * Returns true if the next token is end of line token. This is important
   * when single word commands like \c are at the end of line.
   */
  bool isEndOfLine();

  /**
   * Skips spaces, tabs, and end of line tokens.
   */
  void skipWhitespaceTokens();

  /**
   * Removes all spaces and tabs from beginning end end of string.
   */
  std::string trim(const std::string &text);

  /*
   * Returns string of the next token if the next token is PLAINSTRING. Returns
   * empty string otherwise.
   */
  std::string getNextToken();

  /*
   * Returns the next word ON THE CURRENT LINE ONLY
   * if a new line is encountered, returns a blank std::string.
   * Updates the iterator if successful.
   */
  std::string getNextWord();

  /*
   * Returns the next word, which is not necessarily on the same line.
   * Updates the iterator if successful.
   */
  std::string getNextWordInComment();

  /* 
   * Returns the location of the end of the line as
   * an iterator.
   */
  TokenListCIt getOneLine(const TokenList &tokList);

  /*
   * Returns a properly formatted std::string
   * up til ANY command or end of line is encountered.
   */
  std::string getStringTilCommand(const TokenList &tokList);

  /*
   * Returns a properly formatted std::string
   * up til the command specified is encountered
   */
  //TODO check that this behaves properly for formulas
  std::string getStringTilEndCommand(const std::string &theCommand, const TokenList &tokList);

  /*
   * Returns the end of a Paragraph as an iterator-
   * Paragraph is defined in Doxygen to be a paragraph of text
   * separated by either a structural command or a blank line
   */
  TokenListCIt getEndOfParagraph(const TokenList &tokList);

  /*
   * Returns the end of a section, defined as the first blank line OR first
   * encounter of the same command. Example of this behaviour is \arg.
   * If no end is encountered, returns the last token of the std::list.
   */
  TokenListCIt getEndOfSection(const std::string &theCommand, const TokenList &tokList);

  /*
   * This method is for returning the end of a specific form of doxygen command
   * that begins with a \command and ends in \endcommand
   * such as \code and \endcode. The proper usage is
   * progressTilEndCommand("endcode", tokenList);
   * If the end is never encountered, it returns the end of the std::list.
   */
  TokenListCIt getEndCommand(const std::string &theCommand, const TokenList &tokList);
  /*
   * A special method for commands such as \arg that end at the end of a
   * paragraph OR when another \arg is encountered
  //TODO getTilAnyCommand
  TokenListCIt getTilAnyCommand(const std::string &theCommand, const TokenList &tokList);
   */

  /**
   * This methods skips end of line token, if it is the next token to be
   * processed. It is called with comment commands which have args till the
   * end of line, such as 'addtogroup' or 'addindex'.
   * It is up to translator to specific language to decide whether
   * to insert eol or not. For example, if a command is ignored in target
   * language, new lines may make formatting ugly (Python).
   */
  void skipEndOfLine();

  /*
   * Method for Adding a Simple Command
   * Format: @command
   * Plain commands, such as newline etc, they contain no other data
   *  \n \\ \@ \& \$ \# \< \> \% \{ \}
   */
  void addSimpleCommand(const std::string &theCommand, DoxygenEntityList &doxyList);
  /*
   * CommandWord
   * Format: @command <word>
   * Commands with a single WORD after then such as @b
   * "a", "b", "c", "e", "em", "p", "def", "enum", "example", "package", 
   * "relates", "namespace", "relatesalso","anchor", "dontinclude", "include",
   * "includelineno"
   */
  void addCommandWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * CommandLine
   * Format: @command (line)
   * Commands with a single LINE after then such as @var
   * "addindex", "fn", "name", "line", "var", "skipline", "typedef", "skip",
   * "until", "property"
   */
  void addCommandLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * CommandParagraph
   * Format: @command {paragraph}
   * Commands with a single paragraph after then such as @return
   * "return", "remarks", "since", "test", "sa", "see", "pre", "post",
   * "details", "invariant", "deprecated", "date", "note", "warning",
   * "version", "todo", "bug", "attention", "brief", "arg", "author"
   */
  void addCommandParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * Command EndCommand
   * Format: @command and ends at @endcommand
   * Commands that take in a block of text such as @code:
   * "code", "dot", "msc", "f$", "f[", "f{environment}{", "htmlonly",
   * "latexonly", "manonly", "verbatim", "xmlonly", "cond", "if", "ifnot",
   * "link"
   * Returns 1 if success, 0 if the endcommand is never encountered.
   */
  void addCommandEndCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * CommandWordParagraph
   * Format: @command <word> {paragraph}
   * Commands such as param
   * "param", "tparam", "throw", "throws", "retval", "exception"
   */
  void addCommandWordParagraph(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * CommandWordLine
   * Format: @command <word> (line)
   * Commands such as param
   * "page", "subsection", "subsubsection", "section", "paragraph", "defgroup"
   */
  void addCommandWordLine(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * Command Word Optional Word Optional Word
   * Format: @command <word> [<header-file>] [<header-name>]
   * Commands such as class
   * "category", "class", "protocol", "interface", "struct", "union"
   */
  void addCommandWordOWordOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);
  /*
   * Command Optional Word
   * Format: @command [<word>]
   * Commands such as dir
   * "dir", "file", "cond"
   */
  void addCommandOWord(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  /*
   * Commands that should not be encountered (such as PHP only)
   * goes til the end of line then returns
   */
  void addCommandErrorThrow(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  void addCommandHtml(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  void addCommandHtmlEntity(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  /*
   *Adds the unique commands- different process for each unique command
   */
  void addCommandUnique(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  /*
   * Replace the given command with its predefined alias expansion.
   */
  void aliasCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  /*
   * Simply ignore the given command, possibly with the word following it or
   * until the matching end command.
   */
  void ignoreCommand(const std::string &theCommand, const TokenList &tokList, DoxygenEntityList &doxyList);

  /* 
   * The actual "meat" of the doxygen parser. Calls the correct addCommand...()
   * function.
   */
  void addCommand(const std::string &commandString, const TokenList &tokList, DoxygenEntityList &doxyList);

  DoxygenEntityList parse(TokenListCIt endParsingIndex, const TokenList &tokList, bool root = false);

  /*
   * Fill static doxygenCommands and sectionIndicators containers
   */
  void fillTables();

  /** Processes comment when \htmlonly and \verbatim commands are encountered. */
  size_t processVerbatimText(size_t pos, const std::string &line);

  bool processEscapedChars(size_t &pos, const std::string &line);
  void processWordCommands(size_t &pos, const std::string &line);
  void processHtmlTags(size_t &pos, const std::string &line);
  void processHtmlEntities(size_t &pos, const std::string &line);


  /** Processes comment outside \htmlonly and \verbatim commands. */
  size_t processNormalComment(size_t pos, const std::string &line);

  void tokenizeDoxygenComment(const std::string &doxygenComment, const std::string &fileName, int fileLine);
  void printList();
  void printListError(int warningType, const std::string &message);

  typedef std::vector<std::string> StringVector;
  typedef StringVector::const_iterator StringVectorCIt;

  StringVector split(const std::string &text, char separator);
  bool isStartOfDoxyCommentChar(char c);
  bool addDoxyCommand(DoxygenParser::TokenList &tokList, const std::string &cmd);

public:
  DoxygenParser(bool noisy = false);
  virtual ~DoxygenParser();
  DoxygenEntityList createTree(Node *node, String *documentation);
};

#endif