summaryrefslogtreecommitdiff
path: root/src/regex.h
blob: 7b74f32ae61665db9dddaffe38e64b0a9e359ba5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#ifndef SRC_REGEX_H_
#define SRC_REGEX_H_

#include <stdio.h>

#ifdef USE_PCRE2
#include <pcre2.h>
#else
#include <pcre.h>
#endif

enum {
	REGEX_MATCH,
	REGEX_MATCH_PARTIAL,
	REGEX_NO_MATCH,
	REGEX_ERROR = -1,
};

#ifdef USE_PCRE2
struct regex_data {
	pcre2_code * regex; /* compiled regular expression */
	pcre2_match_data * match_data; /* match data block required for the compiled
	 pattern in regex2 */
};

struct regex_error_data {
	int error_code;
	PCRE2_SIZE error_offset;
};

/* ^^^^^^ USE_PCRE2  ^^^^^^ */
#else
/* vvvvvv USE_PCRE vvvvvv */

/* Prior to version 8.20, libpcre did not have pcre_free_study() */
#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
#define pcre_free_study  pcre_free
#endif

struct regex_data {
	pcre *regex; /* compiled regular expression */
	int extra_owned;
	union {
		pcre_extra *sd; /* pointer to extra compiled stuff */
		pcre_extra lsd; /* used to hold the mmap'd version */
	};
};

struct regex_error_data {
	char const * error_buffer;
	int error_offset;
};

#endif /* USE_PCRE2 */

struct mmap_area;

/**
 * regex_verison returns the version string of the underlying regular
 * regular expressions library. In the case of PCRE it just returns the
 * result of pcre_version(). In the case of PCRE2, the very first time this
 * function is called it allocates a buffer large enough to hold the version
 * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
 * The allocated buffer will linger in memory until the calling process is being
 * reaped.
 *
 * It may return NULL on error.
 */
char const * regex_version();
/**
 * This constructor function allocates a buffer for a regex_data structure.
 * The buffer is being initialized with zeroes.
 */
struct regex_data * regex_data_create();
/**
 * This complementary destructor function frees the a given regex_data buffer.
 * It also frees any non NULL member pointers with the appropriate pcreX_X_free
 * function. For PCRE this function respects the extra_owned field and frees
 * the pcre_extra data conditionally. Calling this function on a NULL pointer is
 * save.
 */
void regex_data_free(struct regex_data * regex);
/**
 * This function compiles the regular expression. Additionally, it prepares
 * data structures required by the different underlying engines. For PCRE
 * it calls pcre_study to generate optional data required for optimized
 * execution of the compiled pattern. In the case of PCRE2, it allocates
 * a pcre2_match_data structure of appropriate size to hold all possible
 * matches created by the pattern.
 *
 * @arg regex If successful, the structure returned through *regex was allocated
 *            with regex_data_create and must be freed with regex_data_free.
 * @arg pattern_string The pattern string that is to be compiled.
 * @arg errordata A pointer to a regex_error_data structure must be passed
 *                to this function. This structure depends on the underlying
 *                implementation. It can be passed to regex_format_error
 *                to generate a human readable error message.
 * @retval 0 on success
 * @retval -1 on error
 */
int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
			struct regex_error_data * errordata);
/**
 * This function loads a serialized precompiled pattern from a contiguous
 * data region given by map_area.
 *
 * @arg map_area Description of the memory region holding a serialized
 *               representation of the precompiled pattern.
 * @arg regex If successful, the structure returned through *regex was allocated
 *            with regex_data_create and must be freed with regex_data_free.
 *
 * @retval 0 on success
 * @retval -1 on error
 */
int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
/**
 * This function stores a precompiled regular expression to a file.
 * In the case of PCRE, it just dumps the binary representation of the
 * precomplied pattern into a file. In the case of PCRE2, it uses the
 * serialization function provided by the library.
 *
 * @arg regex The precomplied regular expression data.
 * @arg fp A file stream specifying the output file.
 */
int regex_writef(struct regex_data * regex, FILE * fp);
/**
 * This function applies a precompiled pattern to a subject string and
 * returns whether or not a match was found.
 *
 * @arg regex The precompiled pattern.
 * @arg subject The subject string.
 * @arg partial Boolean indicating if partial matches are wanted. A nonzero
 *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
 *              option to pcre_exec of pcre2_match.
 * @retval REGEX_MATCH if a match was found
 * @retval REGEX_MATCH_PARTIAL if a partial match was found
 * @retval REGEX_NO_MATCH if no match was found
 * @retval REGEX_ERROR if an error was encountered during the execution of the
 *                     regular expression
 */
int regex_match(struct regex_data * regex, char const * subject, int partial);
/**
 * This function compares two compiled regular expressions (regex1 and regex2).
 * It compares the binary representations of the compiled patterns. It is a very
 * crude approximation because the binary representation holds data like
 * reference counters, that has nothing to do with the actual state machine.
 *
 * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
 *                       the same
 * @retval SELABEL_INCOMPARABLE otherwise
 */
int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
/**
 * This function takes the error data returned by regex_prepare_data and turns
 * it in to a human readable error message.
 * If the buffer given to hold the error message is to small it truncates the
 * message and indicates the truncation with an ellipsis ("...") at the end of
 * the buffer.
 *
 * @arg error_data Error data as returned by regex_prepare_data.
 * @arg buffer String buffer to hold the formated error string.
 * @arg buf_size Total size of the given bufer in bytes.
 */
void regex_format_error(struct regex_error_data const * error_data,
			char * buffer, size_t buf_size);
#endif  /* SRC_REGEX_H_ */