aboutsummaryrefslogtreecommitdiff
path: root/libdb/odb.h
blob: 1e777da5333f6f44cb50c2252249ea07f0f3b44c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/**
 * @file odb.h
 * This file contains various definitions and interface for management
 * of in-memory, through mmaped file, growable hash table, that stores
 * sample files.
 *
 * @remark Copyright 2002 OProfile authors
 * @remark Read the file COPYING
 *
 * @author Philippe Elie
 */

#ifndef ODB_HASH_H
#define ODB_HASH_H

#include <stddef.h>
#include <stdint.h>

#include "op_list.h"

/** the type of key. 64-bit because CG needs 32-bit pair {from,to} */
typedef uint64_t odb_key_t;
/** the type of an information in the database */
typedef unsigned int odb_value_t;
/** the type of index (node number), list are implemented through index */
typedef unsigned int odb_index_t;
/** the type store node number */
typedef odb_index_t odb_node_nr_t;
/** store the hash mask, hash table size are always power of two */
typedef odb_index_t odb_hash_mask_t;

/* there is (bucket factor * nr node) entry in hash table, this can seem
 * excessive but hash coding eip don't give a good distributions and our
 * goal is to get a O(1) amortized insert time. bucket factor must be a
 * power of two. FIXME: see big comment in odb_hash_add_node, you must
 * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you
 * want to read the comment in odb_hash_add_node() if you tune this define)
 */
#define BUCKET_FACTOR 1

/** a db hash node */
typedef struct {
	odb_key_t key;			/**< eip */
	odb_value_t value;		/**< samples count */
	odb_index_t next;		/**< next entry for this bucket */
} odb_node_t;

/** the minimal information which must be stored in the file to reload
 * properly the data base, following this header is the node array then
 * the hash table (when growing we avoid to copy node array)
 */
typedef struct {
	odb_node_nr_t size;		/**< in node nr (power of two) */
	odb_node_nr_t current_size;	/**< nr used node + 1, node 0 unused */
	int padding[6];			/**< for padding and future use */
} odb_descr_t;

/** a "database". this is an in memory only description.
 *
 * We allow to manage a database inside a mapped file with an "header" of
 * unknown size so odb_open get a parameter to specify the size of this header.
 * A typical use is:
 *
 * struct header { int etc; ... };
 * odb_open(&hash, filename, ODB_RW, sizeof(header));
 * so on this library have no dependency on the header type.
 *
 * the internal memory layout from base_memory is:
 *  the unknown header (sizeof_header)
 *  odb_descr_t
 *  the node array: (descr->size * sizeof(odb_node_t) entries
 *  the hash table: array of odb_index_t indexing the node array 
 *    (descr->size * BUCKET_FACTOR) entries
 */
typedef struct odb_data {
	odb_node_t * node_base;		/**< base memory area of the page */
	odb_index_t * hash_base;	/**< base memory of hash table */
	odb_descr_t * descr;		/**< the current state of database */
	odb_hash_mask_t hash_mask;	/**< == descr->size - 1 */
	unsigned int sizeof_header;	/**< from base_memory to odb header */
	unsigned int offset_node;	/**< from base_memory to node array */
	void * base_memory;		/**< base memory of the maped memory */
	int fd;				/**< mmaped memory file descriptor */
	char * filename;                /**< full path name of sample file */
	int ref_count;                  /**< reference count */
	struct list_head list;          /**< hash bucket list */
} odb_data_t;

typedef struct {
	odb_data_t * data;
} odb_t;

#ifdef __cplusplus
extern "C" {
#endif

/* db_manage.c */

/** how to open the DB file */
enum odb_rw {
	ODB_RDONLY = 0,	/**< open for read only */
	ODB_RDWR = 1	/**< open for read and/or write */
};

/**
 * odb_init - initialize a DB file
 * @param odb the DB file to init
 */
void odb_init(odb_t * odb);

/**
 * odb_open - open a DB file
 * @param odb the data base object to setup
 * @param filename the filename where go the maped memory
 * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY
 * @param sizeof_header size of the file header if any
 *
 * The sizeof_header parameter allows the data file to have a header
 * at the start of the file which is skipped.
 * odb_open() always preallocate a few number of pages.
 * returns 0 on success, errno on failure
 */
int odb_open(odb_t * odb, char const * filename,
             enum odb_rw rw, size_t sizeof_header);

/** Close the given ODB file */
void odb_close(odb_t * odb);

/** return the number of times this sample file is open */
int odb_open_count(odb_t const * odb);

/** return the start of the mapped data */
void * odb_get_data(odb_t * odb);

/** issue a msync on the used size of the mmaped file */
void odb_sync(odb_t const * odb);

/**
 * grow the hashtable in such way current_size is the index of the first free
 * node. Take care all node pointer can be invalidated by this call.
 *
 * Node allocation is done in a two step way 1st) ensure a free node exist
 * eventually, caller can setup it, 2nd) commit the node allocation with
 * odb_commit_reservation().
 * This is done in this way to ensure node setup is visible from another
 * process like pp tools in an atomic way.
 *
 * returns 0 on success, non zero on failure in this case this function do
 * nothing and errno is set by the first libc call failure allowing to retry
 * after cleanup some program resource.
 */
int odb_grow_hashtable(odb_data_t * data);
/**
 * commit a previously successfull node reservation. This can't fail.
 */
static __inline void odb_commit_reservation(odb_data_t * data)
{
	++data->descr->current_size;
}

/** "immpossible" node number to indicate an error from odb_hash_add_node() */
#define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1)

/* db_debug.c */
/** check that the hash is well built */
int odb_check_hash(odb_t const * odb);

/* db_stat.c */
typedef struct odb_hash_stat_t odb_hash_stat_t;
odb_hash_stat_t * odb_hash_stat(odb_t const * odb);
void odb_hash_display_stat(odb_hash_stat_t const * stats);
void odb_hash_free_stat(odb_hash_stat_t * stats);

/* db_insert.c */
/** update info at key by incrementing its associated value by one, 
 * if the key does not exist a new node is created and the value associated
 * is set to one.
 *
 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
 */
int odb_update_node(odb_t * odb, odb_key_t key);

/**
 * odb_update_node_with_offset
 * @param odb the data base object to setup
 * @param key the hash key
 * @param offset the offset to be added
 *
 * update info at key by adding the specified offset to its associated value,
 * if the key does not exist a new node is created and the value associated
 * is set to offset.
 *
 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
 */
int odb_update_node_with_offset(odb_t * odb, 
				odb_key_t key, 
				unsigned long int offset);

/** Add a new node w/o regarding if a node with the same key already exists
 *
 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
 */
int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value);

/* db_travel.c */
/**
 * return a base pointer to the node array and number of node in this array
 * caller then will iterate through:
 *
 * odb_node_nr_t node_nr, pos;
 * odb_node_t * node = odb_get_iterator(odb, &node_nr);
 *	for ( pos = 0 ; pos < node_nr ; ++pos)
 *		// do something
 *
 *  note than caller does not need to filter nil key as it's a valid key,
 * The returned range is all valid (i.e. should never contain zero value).
 */
odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr);

static __inline unsigned int
odb_do_hash(odb_data_t const * data, odb_key_t value)
{
	/* FIXME: better hash for eip value, needs to instrument code
	 * and do a lot of tests ... */
	/* trying to combine high order bits his a no-op: inside a binary image
	 * high order bits don't vary a lot, hash table start with 7 bits mask
	 * so this hash coding use bits 0-7, 8-15. Hash table is stored in
	 * files avoiding to rebuilding them at profiling re-start so
	 * on changing do_hash() change the file format!
	 */
	uint32_t temp = value & 0xffffffff;
	return ((temp << 0) ^ (temp >> 8)) & data->hash_mask;
}

#ifdef __cplusplus
}
#endif

#endif /* !ODB_H */