MDB
|
Modules | |
ID List Management | |
Windows Compatibility Macros | |
Debug Macros | |
Reader Lock Table | |
Page Flags | |
Node Flags | |
Transaction DB Flags | |
Transaction Flags | |
Cursor Flags | |
Data Structures | |
struct | MDB_page |
struct | MDB_node |
struct | MDB_db |
struct | MDB_meta |
union | MDB_pagebuf |
struct | MDB_dbx |
struct | MDB_txn |
Opaque structure for a transaction handle. More... | |
struct | MDB_cursor |
Opaque structure for navigating through a database. More... | |
struct | MDB_xcursor |
struct | MDB_pgstate |
struct | MDB_env |
Opaque structure for a database environment. More... | |
struct | MDB_ntxn |
Macros | |
#define | MDB_DSYNC O_DSYNC |
#define | MDB_FDATASYNC fdatasync |
#define | MDB_MSYNC(addr, len, flags) msync(addr,len,flags) |
#define | MS_SYNC 1 |
#define | MS_ASYNC 0 |
#define | MDB_PAGESIZE 4096 |
#define | MDB_MINKEYS 2 |
#define | MDB_MAGIC 0xBEEFC0DE |
#define | MDB_DATA_VERSION 1 |
#define | MDB_LOCK_VERSION 1 |
#define | MDB_MAXKEYSIZE 511 |
The maximum size of a key in the database. More... | |
#define | MAXDATASIZE 0xffffffffUL |
The maximum size of a data item. More... | |
#define | DKBUF |
#define | DKEY(x) 0 |
#define | P_INVALID (~(pgno_t)0) |
#define | F_ISSET(w, f) (((w) & (f)) == (f)) |
#define | DEFAULT_MAPSIZE 1048576 |
#define | PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) |
#define | METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ)) |
#define | NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) |
#define | SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) |
#define | PAGEFILL(env, p) |
#define | FILL_THRESHOLD 250 |
#define | IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) |
#define | IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2) |
#define | IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) |
#define | IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) |
#define | IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP) |
#define | OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1) |
#define | NODESIZE offsetof(MDB_node, mn_data) |
#define | PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) |
#define | INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) |
#define | LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) |
#define | NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) |
#define | NODEKEY(node) (void *)((node)->mn_data) |
#define | NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) |
#define | NODEPGNO(node) |
#define | SETPGNO(node, pgno) |
#define | NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) |
#define | SETDSZ(node, size) |
#define | NODEKSZ(node) ((node)->mn_ksize) |
#define | COPY_PGNO(dst, src) |
#define | LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) |
#define | MDB_GET_KEY(node, keyptr) |
#define | MDB_GET_KEY2(node, key) { key.mv_size = NODEKSZ(node); key.mv_data = NODEKEY(node); } |
#define | MDB_VALID 0x8000 |
#define | PERSISTENT_FLAGS (0xffff & ~(MDB_VALID)) |
#define | VALID_FLAGS |
#define | FREE_DBI 0 |
#define | MAIN_DBI 1 |
#define | CURSOR_STACK 32 |
#define | MDB_COMMIT_PAGES 64 |
#define | MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) |
#define | MDB_PS_MODIFY 1 |
#define | MDB_PS_ROOTONLY 2 |
#define | MDB_SPLIT_REPLACE MDB_APPENDDUP |
#define | mdb_env_close0(env, excl) mdb_env_close1(env) |
#define | mdb_txn_reset0(txn, act) mdb_txn_reset0(txn) |
#define | LOCKNAME "/lock.mdb" |
#define | DATANAME "/data.mdb" |
#define | LOCKSUFF "-lock" |
#define | CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC) |
#define | CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP|MDB_NOTLS) |
#define | MDB_NOSPILL 0x8000 |
Typedefs | |
typedef MDB_ID | pgno_t |
typedef MDB_ID | txnid_t |
typedef uint16_t | indx_t |
Enumerations | |
enum | Pidlock_op { Pidset = F_SETLK, Pidcheck = F_GETLK } |
Functions | |
static int | mdb_page_alloc (MDB_cursor *mc, int num, MDB_page **mp) |
static int | mdb_page_new (MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp) |
static int | mdb_page_touch (MDB_cursor *mc) |
static int | mdb_page_get (MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl) |
static int | mdb_page_search_root (MDB_cursor *mc, MDB_val *key, int modify) |
static int | mdb_page_search (MDB_cursor *mc, MDB_val *key, int flags) |
static int | mdb_page_merge (MDB_cursor *csrc, MDB_cursor *cdst) |
static int | mdb_page_split (MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno, unsigned int nflags) |
static int | mdb_env_read_header (MDB_env *env, MDB_meta *meta) |
static int | mdb_env_pick_meta (const MDB_env *env) |
static int | mdb_env_write_meta (MDB_txn *txn) |
static void | mdb_env_close0 (MDB_env *env, int excl) |
static MDB_node * | mdb_node_search (MDB_cursor *mc, MDB_val *key, int *exactp) |
static int | mdb_node_add (MDB_cursor *mc, indx_t indx, MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags) |
static void | mdb_node_del (MDB_page *mp, indx_t indx, int ksize) |
static void | mdb_node_shrink (MDB_page *mp, indx_t indx) |
static int | mdb_node_move (MDB_cursor *csrc, MDB_cursor *cdst) |
static int | mdb_node_read (MDB_txn *txn, MDB_node *leaf, MDB_val *data) |
static size_t | mdb_leaf_size (MDB_env *env, MDB_val *key, MDB_val *data) |
static size_t | mdb_branch_size (MDB_env *env, MDB_val *key) |
static int | mdb_rebalance (MDB_cursor *mc) |
static int | mdb_update_key (MDB_cursor *mc, MDB_val *key) |
static void | mdb_cursor_pop (MDB_cursor *mc) |
static int | mdb_cursor_push (MDB_cursor *mc, MDB_page *mp) |
static int | mdb_cursor_del0 (MDB_cursor *mc, MDB_node *leaf) |
static int | mdb_cursor_sibling (MDB_cursor *mc, int move_right) |
static int | mdb_cursor_next (MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) |
static int | mdb_cursor_prev (MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) |
static int | mdb_cursor_set (MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op, int *exactp) |
static int | mdb_cursor_first (MDB_cursor *mc, MDB_val *key, MDB_val *data) |
static int | mdb_cursor_last (MDB_cursor *mc, MDB_val *key, MDB_val *data) |
static void | mdb_cursor_init (MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) |
static void | mdb_xcursor_init0 (MDB_cursor *mc) |
static void | mdb_xcursor_init1 (MDB_cursor *mc, MDB_node *node) |
static int | mdb_drop0 (MDB_cursor *mc, int subs) |
static void | mdb_default_cmp (MDB_txn *txn, MDB_dbi dbi) |
char * | mdb_version (int *major, int *minor, int *patch) |
Return the mdb library version information. More... | |
char * | mdb_strerror (int err) |
Return a string describing a given error code. More... | |
int | mdb_cmp (MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) |
Compare two data items according to a particular database. More... | |
int | mdb_dcmp (MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) |
Compare two data items according to a particular database. More... | |
static MDB_page * | mdb_page_malloc (MDB_txn *txn, unsigned num) |
static void | mdb_page_free (MDB_env *env, MDB_page *mp) |
static void | mdb_dpage_free (MDB_env *env, MDB_page *dp) |
static void | mdb_dlist_free (MDB_txn *txn) |
static void | mdb_cursorpages_mark (MDB_cursor *mc, unsigned pflags) |
static int | mdb_page_flush (MDB_txn *txn) |
static int | mdb_page_spill (MDB_cursor *m0, MDB_val *key, MDB_val *data) |
static txnid_t | mdb_find_oldest (MDB_txn *txn) |
static void | mdb_page_dirty (MDB_txn *txn, MDB_page *mp) |
static void | mdb_page_copy (MDB_page *dst, MDB_page *src, unsigned int psize) |
static int | mdb_page_unspill (MDB_txn *tx0, MDB_page *mp, MDB_page **ret) |
int | mdb_env_sync (MDB_env *env, int force) |
Flush the data buffers to disk. More... | |
static int | mdb_cursor_shadow (MDB_txn *src, MDB_txn *dst) |
static void | mdb_cursors_close (MDB_txn *txn, unsigned merge) |
static void | mdb_txn_reset0 (MDB_txn *txn, const char *act) |
static int | mdb_reader_pid (MDB_env *env, enum Pidlock_op op, pid_t pid) |
static int | mdb_txn_renew0 (MDB_txn *txn) |
int | mdb_txn_renew (MDB_txn *txn) |
Renew a read-only transaction. More... | |
int | mdb_txn_begin (MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) |
Create a transaction for use with the environment. More... | |
MDB_env * | mdb_txn_env (MDB_txn *txn) |
Returns the transaction's MDB_env. More... | |
static void | mdb_dbis_update (MDB_txn *txn, int keep) |
void | mdb_txn_reset (MDB_txn *txn) |
Reset a read-only transaction. More... | |
void | mdb_txn_abort (MDB_txn *txn) |
Abandon all the operations of the transaction instead of saving them. More... | |
static int | mdb_freelist_save (MDB_txn *txn) |
int | mdb_txn_commit (MDB_txn *txn) |
Commit all the operations of a transaction into the database. More... | |
static int | mdb_env_init_meta (MDB_env *env, MDB_meta *meta) |
int | mdb_env_create (MDB_env **env) |
Create an MDB environment handle. More... | |
int | mdb_env_set_mapsize (MDB_env *env, size_t size) |
Set the size of the memory map to use for this environment. More... | |
int | mdb_env_set_maxdbs (MDB_env *env, MDB_dbi dbs) |
Set the maximum number of named databases for the environment. More... | |
int | mdb_env_set_maxreaders (MDB_env *env, unsigned int readers) |
Set the maximum number of threads/reader slots for the environment. More... | |
int | mdb_env_get_maxreaders (MDB_env *env, unsigned int *readers) |
Get the maximum number of threads/reader slots for the environment. More... | |
static int | mdb_env_open2 (MDB_env *env) |
static void | mdb_env_reader_dest (void *ptr) |
static int | mdb_env_share_locks (MDB_env *env, int *excl) |
static int | mdb_env_excl_lock (MDB_env *env, int *excl) |
static int | mdb_env_setup_locks (MDB_env *env, char *lpath, int mode, int *excl) |
int | mdb_env_open (MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode) |
Open an environment handle. More... | |
int | mdb_env_copyfd (MDB_env *env, HANDLE fd) |
int | mdb_env_copy (MDB_env *env, const char *path) |
Copy an MDB environment to the specified path. More... | |
void | mdb_env_close (MDB_env *env) |
Close the environment and release the memory map. More... | |
static int | mdb_cmp_long (const MDB_val *a, const MDB_val *b) |
static int | mdb_cmp_int (const MDB_val *a, const MDB_val *b) |
static int | mdb_cmp_cint (const MDB_val *a, const MDB_val *b) |
static int | mdb_cmp_memn (const MDB_val *a, const MDB_val *b) |
static int | mdb_cmp_memnr (const MDB_val *a, const MDB_val *b) |
static int | mdb_page_search_lowest (MDB_cursor *mc) |
static int | mdb_ovpage_free (MDB_cursor *mc, MDB_page *mp) |
int | mdb_get (MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data) |
Get items from a database. More... | |
int | mdb_cursor_get (MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) |
Retrieve by cursor. More... | |
static int | mdb_cursor_touch (MDB_cursor *mc) |
int | mdb_cursor_put (MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags) |
Store by cursor. More... | |
int | mdb_cursor_del (MDB_cursor *mc, unsigned int flags) |
Delete current key/data pair. More... | |
int | mdb_cursor_open (MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) |
Create a cursor handle. More... | |
int | mdb_cursor_renew (MDB_txn *txn, MDB_cursor *mc) |
Renew a cursor handle. More... | |
int | mdb_cursor_count (MDB_cursor *mc, size_t *countp) |
Return count of duplicates for current key. More... | |
void | mdb_cursor_close (MDB_cursor *mc) |
Close a cursor handle. More... | |
MDB_txn * | mdb_cursor_txn (MDB_cursor *mc) |
Return the cursor's transaction handle. More... | |
MDB_dbi | mdb_cursor_dbi (MDB_cursor *mc) |
Return the cursor's database handle. More... | |
static void | mdb_cursor_copy (const MDB_cursor *csrc, MDB_cursor *cdst) |
int | mdb_del (MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data) |
Delete items from a database. More... | |
int | mdb_put (MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned int flags) |
Store items into a database. More... | |
int | mdb_env_set_flags (MDB_env *env, unsigned int flag, int onoff) |
Set environment flags. More... | |
int | mdb_env_get_flags (MDB_env *env, unsigned int *arg) |
Get environment flags. More... | |
int | mdb_env_get_path (MDB_env *env, const char **arg) |
Return the path that was used in mdb_env_open(). More... | |
static int | mdb_stat0 (MDB_env *env, MDB_db *db, MDB_stat *arg) |
int | mdb_env_stat (MDB_env *env, MDB_stat *arg) |
Return statistics about the MDB environment. More... | |
int | mdb_env_info (MDB_env *env, MDB_envinfo *arg) |
Return information about the MDB environment. More... | |
int | mdb_dbi_open (MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) |
Open a database in the environment. More... | |
int | mdb_stat (MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) |
Retrieve statistics for a database. More... | |
void | mdb_dbi_close (MDB_env *env, MDB_dbi dbi) |
Close a database handle. More... | |
int | mdb_dbi_flags (MDB_env *env, MDB_dbi dbi, unsigned int *flags) |
Retrieve the DB flags for a database handle. More... | |
int | mdb_drop (MDB_txn *txn, MDB_dbi dbi, int del) |
Empty or delete+close a database. More... | |
int | mdb_set_compare (MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) |
Set a custom key comparison function for a database. More... | |
int | mdb_set_dupsort (MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) |
Set a custom data comparison function for a MDB_DUPSORT database. More... | |
int | mdb_set_relfunc (MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) |
Set a relocation function for a MDB_FIXEDMAP database. More... | |
int | mdb_set_relctx (MDB_txn *txn, MDB_dbi dbi, void *ctx) |
Set a context pointer for a MDB_FIXEDMAP database's relocation function. More... | |
int | mdb_env_get_maxkeysize (MDB_env *env) |
Get the maximum size of a key for the environment. More... | |
int | mdb_reader_list (MDB_env *env, MDB_msg_func *func, void *ctx) |
Dump the entries in the reader lock table. More... | |
static int | mdb_pid_insert (pid_t *ids, pid_t pid) |
int | mdb_reader_check (MDB_env *env, int *dead) |
Check for stale entries in the reader lock table. More... | |
Variables | |
static char *const | mdb_errstr [] |
struct MDB_page |
Common header for all page types. Overflow records occupy a number of contiguous pages with no headers on any page after the first.
Data Fields | |
union { | |
pgno_t p_pgno | |
void * p_next | |
} | mp_p |
uint16_t | mp_pad |
uint16_t | mp_flags |
union { | |
struct { | |
indx_t pb_lower | |
indx_t pb_upper | |
} pb | |
uint32_t pb_pages | |
} | mp_pb |
indx_t | mp_ptrs [1] |
pgno_t MDB_page::p_pgno |
page number
void* MDB_page::p_next |
for in-memory list of freed structs
uint16_t MDB_page::mp_flags |
indx_t MDB_page::pb_lower |
lower bound of free space
indx_t MDB_page::pb_upper |
upper bound of free space
uint32_t MDB_page::pb_pages |
number of overflow pages
indx_t MDB_page::mp_ptrs[1] |
dynamic size
struct MDB_node |
Header for a single key/data pair within a page. We guarantee 2-byte alignment for nodes.
Data Fields | |
unsigned short | mn_offset [2] |
unsigned short | mn_flags |
unsigned short | mn_ksize |
char | mn_data [1] |
unsigned short MDB_node::mn_flags |
unsigned short MDB_node::mn_ksize |
key size
char MDB_node::mn_data[1] |
key and data are appended here
struct MDB_db |
Information about a single database in the environment.
Data Fields | |
uint32_t | md_pad |
uint16_t | md_flags |
uint16_t | md_depth |
pgno_t | md_branch_pages |
pgno_t | md_leaf_pages |
pgno_t | md_overflow_pages |
size_t | md_entries |
pgno_t | md_root |
uint32_t MDB_db::md_pad |
also ksize for LEAF2 pages
uint16_t MDB_db::md_flags |
uint16_t MDB_db::md_depth |
depth of this tree
pgno_t MDB_db::md_branch_pages |
number of internal pages
pgno_t MDB_db::md_leaf_pages |
number of leaf pages
pgno_t MDB_db::md_overflow_pages |
number of overflow pages
size_t MDB_db::md_entries |
number of data items
pgno_t MDB_db::md_root |
the root page of this tree
struct MDB_meta |
Meta page content.
Data Fields | |
uint32_t | mm_magic |
uint32_t | mm_version |
void * | mm_address |
size_t | mm_mapsize |
MDB_db | mm_dbs [2] |
pgno_t | mm_last_pg |
txnid_t | mm_txnid |
uint32_t MDB_meta::mm_magic |
Stamp identifying this as an MDB file. It must be set to MDB_MAGIC.
uint32_t MDB_meta::mm_version |
Version number of this lock file. Must be set to MDB_DATA_VERSION.
void* MDB_meta::mm_address |
address for fixed mapping
size_t MDB_meta::mm_mapsize |
size of mmap region
MDB_db MDB_meta::mm_dbs[2] |
first is free space, 2nd is main db
pgno_t MDB_meta::mm_last_pg |
last used page in file
txnid_t MDB_meta::mm_txnid |
txnid that committed this page
union MDB_pagebuf |
Buffer for a stack-allocated dirty page. The members define size and alignment, and silence type aliasing warnings. They are not used directly; that could mean incorrectly using several union members in parallel.
Data Fields | |
char | mb_raw [MDB_PAGESIZE] |
MDB_page | mb_page |
struct { | |
char mm_pad [PAGEHDRSZ] | |
MDB_meta mm_meta | |
} | mb_metabuf |
struct MDB_dbx |
Auxiliary DB info. The information here is mostly static/read-only. There is only a single copy of this record in the environment.
Data Fields | |
MDB_val | md_name |
MDB_cmp_func * | md_cmp |
MDB_cmp_func * | md_dcmp |
MDB_rel_func * | md_rel |
void * | md_relctx |
MDB_val MDB_dbx::md_name |
name of the database
MDB_cmp_func* MDB_dbx::md_cmp |
function for comparing keys
MDB_cmp_func* MDB_dbx::md_dcmp |
function for comparing data items
MDB_rel_func* MDB_dbx::md_rel |
user relocate function
void* MDB_dbx::md_relctx |
user-provided context for md_rel
struct MDB_txn |
Opaque structure for a transaction handle.
A database transaction. Every operation requires a transaction handle.
All database operations require a transaction handle. Transactions may be read-only or read-write.
Data Fields | |
MDB_txn * | mt_parent |
MDB_txn * | mt_child |
pgno_t | mt_next_pgno |
txnid_t | mt_txnid |
MDB_env * | mt_env |
MDB_IDL | mt_free_pgs |
MDB_IDL | mt_spill_pgs |
union { | |
MDB_ID2L dirty_list | |
MDB_reader * reader | |
} | mt_u |
MDB_dbx * | mt_dbxs |
MDB_db * | mt_dbs |
MDB_cursor ** | mt_cursors |
unsigned char * | mt_dbflags |
MDB_dbi | mt_numdbs |
unsigned int | mt_flags |
unsigned int | mt_dirty_room |
unsigned int | mt_toggle |
MDB_txn* MDB_txn::mt_parent |
parent of a nested txn
MDB_txn* MDB_txn::mt_child |
nested txn under this txn
pgno_t MDB_txn::mt_next_pgno |
next unallocated page
txnid_t MDB_txn::mt_txnid |
The ID of this transaction. IDs are integers incrementing from 1. Only committed write transactions increment the ID. If a transaction aborts, the ID may be re-used by the next writer.
MDB_env* MDB_txn::mt_env |
the DB environment
MDB_IDL MDB_txn::mt_free_pgs |
The list of pages that became unused during this transaction.
MDB_IDL MDB_txn::mt_spill_pgs |
The sorted list of dirty pages we temporarily wrote to disk because the dirty list was full.
MDB_ID2L MDB_txn::dirty_list |
For write txns: Modified pages. Sorted when not MDB_WRITEMAP.
MDB_reader* MDB_txn::reader |
For read txns: This thread/txn's reader table slot, or NULL.
MDB_dbx* MDB_txn::mt_dbxs |
Array of records for each DB known in the environment.
MDB_cursor** MDB_txn::mt_cursors |
In write txns, array of cursors for each DB
unsigned char* MDB_txn::mt_dbflags |
Array of flags for each DB
MDB_dbi MDB_txn::mt_numdbs |
Number of DB records in use. This number only ever increments; we don't decrement it when individual DB handles are closed.
unsigned int MDB_txn::mt_flags |
unsigned int MDB_txn::mt_dirty_room |
dirty_list maxsize - # of allocated pages allowed, including in parent txns
unsigned int MDB_txn::mt_toggle |
Tracks which of the two meta pages was used at the start of this transaction.
struct MDB_cursor |
Opaque structure for navigating through a database.
Cursors are used for all DB operations
Data Fields | |
MDB_cursor * | mc_next |
MDB_cursor * | mc_backup |
struct MDB_xcursor * | mc_xcursor |
MDB_txn * | mc_txn |
MDB_dbi | mc_dbi |
MDB_db * | mc_db |
MDB_dbx * | mc_dbx |
unsigned char * | mc_dbflag |
unsigned short | mc_snum |
unsigned short | mc_top |
unsigned int | mc_flags |
MDB_page * | mc_pg [CURSOR_STACK] |
indx_t | mc_ki [CURSOR_STACK] |
MDB_cursor* MDB_cursor::mc_next |
Next cursor on this DB in this txn
MDB_cursor* MDB_cursor::mc_backup |
Backup of the original cursor if this cursor is a shadow
struct MDB_xcursor* MDB_cursor::mc_xcursor |
Context used for databases with MDB_DUPSORT, otherwise NULL
MDB_txn* MDB_cursor::mc_txn |
The transaction that owns this cursor
MDB_dbi MDB_cursor::mc_dbi |
The database handle this cursor operates on
MDB_db* MDB_cursor::mc_db |
The database record for this cursor
MDB_dbx* MDB_cursor::mc_dbx |
The database auxiliary record for this cursor
unsigned char* MDB_cursor::mc_dbflag |
The Transaction DB Flags for this database
unsigned short MDB_cursor::mc_snum |
number of pushed pages
unsigned short MDB_cursor::mc_top |
index of top page, normally mc_snum-1
unsigned int MDB_cursor::mc_flags |
MDB_page* MDB_cursor::mc_pg[CURSOR_STACK] |
stack of pushed pages
indx_t MDB_cursor::mc_ki[CURSOR_STACK] |
stack of page indices
struct MDB_xcursor |
Context for sorted-dup records. We could have gone to a fully recursive design, with arbitrarily deep nesting of sub-databases. But for now we only handle these levels - main DB, optional sub-DB, sorted-duplicate DB.
Data Fields | |
MDB_cursor | mx_cursor |
MDB_db | mx_db |
MDB_dbx | mx_dbx |
unsigned char | mx_dbflag |
MDB_cursor MDB_xcursor::mx_cursor |
A sub-cursor for traversing the Dup DB
MDB_db MDB_xcursor::mx_db |
The database record for this Dup DB
MDB_dbx MDB_xcursor::mx_dbx |
The auxiliary DB record for this Dup DB
unsigned char MDB_xcursor::mx_dbflag |
The Transaction DB Flags for this Dup DB
struct MDB_pgstate |
struct MDB_env |
Opaque structure for a database environment.
The database environment.
A DB environment supports multiple databases, all residing in the same shared-memory map.
Data Fields | |
HANDLE | me_fd |
HANDLE | me_lfd |
HANDLE | me_mfd |
uint32_t | me_flags |
unsigned int | me_psize |
unsigned int | me_maxreaders |
unsigned int | me_numreaders |
MDB_dbi | me_numdbs |
MDB_dbi | me_maxdbs |
pid_t | me_pid |
char * | me_path |
char * | me_map |
MDB_txninfo * | me_txns |
MDB_meta * | me_metas [2] |
MDB_txn * | me_txn |
size_t | me_mapsize |
off_t | me_size |
pgno_t | me_maxpg |
MDB_dbx * | me_dbxs |
uint16_t * | me_dbflags |
pthread_key_t | me_txkey |
MDB_pgstate | me_pgstate |
MDB_page * | me_dpages |
MDB_IDL | me_free_pgs |
MDB_ID2L | me_dirty_list |
int | me_maxfree_1pg |
unsigned int | me_nodemax |
HANDLE MDB_env::me_fd |
The main data file
HANDLE MDB_env::me_lfd |
The lock file
HANDLE MDB_env::me_mfd |
just for writing the meta pages
uint32_t MDB_env::me_flags |
unsigned int MDB_env::me_psize |
size of a page, from GET_PAGESIZE
unsigned int MDB_env::me_maxreaders |
size of the reader table
unsigned int MDB_env::me_numreaders |
max numreaders set by this env
MDB_dbi MDB_env::me_numdbs |
number of DBs opened
MDB_dbi MDB_env::me_maxdbs |
size of the DB table
pid_t MDB_env::me_pid |
process ID of this env
char* MDB_env::me_path |
path to the DB files
char* MDB_env::me_map |
the memory map of the data file
MDB_txninfo* MDB_env::me_txns |
the memory map of the lock file or NULL
MDB_meta* MDB_env::me_metas[2] |
pointers to the two meta pages
MDB_txn* MDB_env::me_txn |
current write transaction
size_t MDB_env::me_mapsize |
size of the data memory map
off_t MDB_env::me_size |
current file size
pgno_t MDB_env::me_maxpg |
me_mapsize / me_psize
MDB_dbx* MDB_env::me_dbxs |
array of static DB info
uint16_t* MDB_env::me_dbflags |
array of flags from MDB_db.md_flags
pthread_key_t MDB_env::me_txkey |
thread-key for readers
MDB_pgstate MDB_env::me_pgstate |
state of old pages from freeDB
MDB_page* MDB_env::me_dpages |
list of malloc'd blocks for re-use
MDB_IDL MDB_env::me_free_pgs |
IDL of pages that became unused in a write txn
MDB_ID2L MDB_env::me_dirty_list |
ID2L of pages written during a write txn. Length MDB_IDL_UM_SIZE.
int MDB_env::me_maxfree_1pg |
Max number of freelist items that can fit in a single overflow page
unsigned int MDB_env::me_nodemax |
Max size of a node on a page
struct MDB_ntxn |
Nested transaction
Data Fields | |
MDB_txn | mnt_txn |
MDB_pgstate | mnt_pgstate |
#define MDB_DSYNC O_DSYNC |
A flag for opening a file and requesting synchronous data writes.
This is only used when writing a meta page. It's not strictly needed; we could just do a normal write and then immediately perform a flush. But if this flag is available it saves us an extra system call.
#define MDB_FDATASYNC fdatasync |
Function for flushing the data of a file. Define this to fsync if fdatasync() is not supported.
#define MDB_PAGESIZE 4096 |
A default memory page size. The actual size is platform-dependent, but we use this for boot-strapping. We probably should not be using this any more. The GET_PAGESIZE() macro is used to get the actual size.
Note that we don't currently support Huge pages. On Linux, regular data files cannot use Huge pages, and in general Huge pages aren't actually pageable. We rely on the OS demand-pager to read our data and page it out when memory pressure from other processes is high. So until OSs have actual paging support for Huge pages, they're not viable.
#define MDB_MINKEYS 2 |
The minimum number of keys required in a database page. Setting this to a larger value will place a smaller bound on the maximum size of a data item. Data items larger than this size will be pushed into overflow pages instead of being stored directly in the B-tree node. This value used to default to 4. With a page size of 4096 bytes that meant that any item larger than 1024 bytes would go into an overflow page. That also meant that on average 2-3KB of each overflow page was wasted space. The value cannot be lower than 2 because then there would no longer be a tree structure. With this value, items larger than 2KB will go into overflow pages, and on average only 1KB will be wasted.
#define MDB_MAGIC 0xBEEFC0DE |
A stamp that identifies a file as an MDB file. There's nothing special about this value other than that it is easily recognizable, and it will reflect any byte order mismatches.
#define MDB_DATA_VERSION 1 |
The version number for a database's datafile format.
#define MDB_LOCK_VERSION 1 |
The version number for a database's lockfile format.
#define MDB_MAXKEYSIZE 511 |
The maximum size of a key in the database.
The library rejects bigger keys, and cannot deal with records with bigger keys stored by a library with bigger max keysize.
We require that keys all fit onto a regular page. This limit could be raised a bit further if needed; to something just under MDB_PAGESIZE / MDB_MINKEYS.
Note that data items in an MDB_DUPSORT database are actually keys of a subDB, so they're also limited to this size.
#define MAXDATASIZE 0xffffffffUL |
The maximum size of a data item.
We only store a 32 bit value for node sizes.
#define P_INVALID (~(pgno_t)0) |
An invalid page number. Mainly used to denote an empty tree.
#define F_ISSET | ( | w, | |
f | |||
) | (((w) & (f)) == (f)) |
Test if the flags f are set in a flag word w.
#define DEFAULT_MAPSIZE 1048576 |
Default size of memory map. This is certainly too small for any actual applications. Apps should always set the size explicitly using mdb_env_set_mapsize().
#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) |
Size of the page header, excluding dynamic data at the end
#define METADATA | ( | p) | ((void *)((char *)(p) + PAGEHDRSZ)) |
Address of first usable data byte in a page, after the header
#define NUMKEYS | ( | p) | (((p)->mp_lower - PAGEHDRSZ) >> 1) |
Number of nodes on a page
#define SIZELEFT | ( | p) | (indx_t)((p)->mp_upper - (p)->mp_lower) |
The amount of space remaining in the page
#define PAGEFILL | ( | env, | |
p | |||
) |
#define FILL_THRESHOLD 250 |
The minimum page fill factor, in tenths of a percent. Pages emptier than this are candidates for merging.
#define IS_OVERFLOW | ( | p) | F_ISSET((p)->mp_flags, P_OVERFLOW) |
Test if a page is an overflow page
#define OVPAGES | ( | size, | |
psize | |||
) | ((PAGEHDRSZ-1 + (size)) / (psize) + 1) |
The number of overflow pages needed to store the given size.
#define NODESIZE offsetof(MDB_node, mn_data) |
Size of the node header, excluding dynamic data at the end
#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0) |
Bit position of top word in page number, for shifting mn_flags
#define INDXSIZE | ( | k) | (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) |
Size of a node in a branch page with a given key. This is just the node header plus the key, there is no data.
#define LEAFSIZE | ( | k, | |
d | |||
) | (NODESIZE + (k)->mv_size + (d)->mv_size) |
Size of a node in a leaf page with a given key and data. This is node header plus key plus data size.
#define NODEPTR | ( | p, | |
i | |||
) | ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) |
Address of node i in page p
#define NODEKEY | ( | node) | (void *)((node)->mn_data) |
Address of the key for the node
#define NODEDATA | ( | node) | (void *)((char *)(node)->mn_data + (node)->mn_ksize) |
Address of the data for a node
#define NODEPGNO | ( | node) |
Get the page number pointed to by a branch node
#define SETPGNO | ( | node, | |
pgno | |||
) |
Set the page number in a branch node
Get the size of the data in a leaf node
#define SETDSZ | ( | node, | |
size | |||
) |
#define NODEKSZ | ( | node) | ((node)->mn_ksize) |
The size of a key in a node
#define COPY_PGNO | ( | dst, | |
src | |||
) |
Copy a page number from src to dst
#define LEAF2KEY | ( | p, | |
i, | |||
ks | |||
) | ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) |
The address of a key in a LEAF2 page. LEAF2 pages are used for MDB_DUPFIXED sorted-duplicate sub-DBs. There are no node headers, keys are stored contiguously.
#define MDB_GET_KEY | ( | node, | |
keyptr | |||
) |
Set the node's key into key.
#define MDB_VALID 0x8000 |
mdb_dbi_open flags DB handle is valid, for me_dbflags
#define VALID_FLAGS |
#define FREE_DBI 0 |
Handle for the DB used to track free pages.
#define MAIN_DBI 1 |
Handle for the default DB.
#define CURSOR_STACK 32 |
Enough space for 2^32 nodes with minimum of 2 keys per node. I.e., plenty. At 4 keys per node, enough for 2^64 nodes, so there's probably no need to raise this on a 64 bit machine.
#define MDB_COMMIT_PAGES 64 |
max number of pages to commit in one writev() call
#define MDB_SPLIT_REPLACE MDB_APPENDDUP |
newkey is not new
#define LOCKNAME "/lock.mdb" |
The name of the lock file in the DB environment
#define DATANAME "/data.mdb" |
The name of the data file in the DB environment
#define LOCKSUFF "-lock" |
The suffix of the lock file when no subdir is used
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC) |
Only a subset of the Environment Flags flags can be changed at runtime. Changing other flags requires closing the environment and re-opening it with the new flags.
#define MDB_NOSPILL 0x8000 |
Do not spill pages to disk if txn is getting full, may fail instead
A page number in the database. Note that 64 bit page numbers are overkill, since pages themselves already represent 12-13 bits of addressable memory, and the OS will always limit applications to a maximum of 63 bits of address space.
A transaction ID. See struct MDB_txn.mt_txnid for details.
typedef uint16_t indx_t |
Used for offsets within a single page. Since memory pages are typically 4 or 8KB in size, 12-13 bits, this is plenty.
|
static |
Allocate page numbers and memory for writing. Maintain me_pglast, me_pghead and mt_next_pgno.
If there are free pages available from older transactions, they are re-used first. Otherwise allocate a new page at mt_next_pgno. Do not modify the freedB, just merge freeDB records into me_pghead[] and move me_pglast to say which records were consumed. Only this function can create me_pghead and move me_pglast/mt_next_pgno.
[in] | mc | cursor A cursor handle identifying the transaction and database for which we are allocating. |
[in] | num | the number of pages to allocate. |
[out] | mp | Address of the allocated page(s). Requests for multiple pages will always be satisfied by a single contiguous chunk of memory. |
|
static |
Allocate and initialize new pages for a database.
[in] | mc | a cursor on the database being added to. |
[in] | flags | flags defining what type of page is being allocated. |
[in] | num | the number of pages to allocate. This is usually 1, unless allocating overflow pages for a large record. |
[out] | mp | Address of a page, or NULL on failure. |
|
static |
Touch a page: make it dirty and re-insert into tree with updated pgno.
[in] | mc | cursor pointing to the page to be touched |
Find the address of the page corresponding to a given page number.
[in] | txn | the transaction for this access. |
[in] | pgno | the page number for the page to retrieve. |
[out] | ret | address of a pointer where the page's address will be stored. |
[out] | lvl | dirty_list inheritance level of found page. 1=current txn, 0=mapped page. |
|
static |
Search for the page a given key should be in. Pushes parent pages on the cursor stack. This function continues a search on a cursor that has already been initialized. (Usually by mdb_page_search() but also by mdb_node_move().)
[in,out] | mc | the cursor for this operation. |
[in] | key | the key to search for. If NULL, search for the lowest page. (This is used by mdb_cursor_first().) |
[in] | modify | If true, visited pages are updated with new page numbers. |
|
static |
Search for the page a given key should be in. Pushes parent pages on the cursor stack. This function just sets up the search; it finds the root page for mc's database and sets this as the root of the cursor's stack. Then mdb_page_search_root() is called to complete the search.
[in,out] | mc | the cursor for this operation. |
[in] | key | the key to search for. If NULL, search for the lowest page. (This is used by mdb_cursor_first().) |
[in] | flags | If MDB_PS_MODIFY set, visited pages are updated with new page numbers. If MDB_PS_ROOTONLY set, just fetch root node, no further lookups. |
|
static |
Merge one page into another. The nodes from the page pointed to by csrc will be copied to the page pointed to by cdst and then the csrc page will be freed.
[in] | csrc | Cursor pointing to the source page. |
[in] | cdst | Cursor pointing to the destination page. |
|
static |
Split a page and insert a new node.
[in,out] | mc | Cursor pointing to the page and desired insertion index. The cursor will be updated to point to the actual page and index where the node got inserted after the split. |
[in] | newkey | The key for the newly inserted node. |
[in] | newdata | The data for the newly inserted node. |
[in] | newpgno | The page number, if the new node is a branch node. |
[in] | nflags | The NODE_ADD_FLAGS for the new node. |
Read the environment parameters of a DB environment before mapping it into memory.
[in] | env | the environment handle |
[out] | meta | address of where to store the meta information |
|
static |
Check both meta pages to see which one is newer.
[in] | env | the environment handle |
|
static |
Update the environment info to commit a transaction.
[in] | txn | the transaction that's being committed |
|
static |
Destroy resources from mdb_env_open(), clear our readers & DBIs
|
static |
Search for key within a page, using binary search. Returns the smallest entry larger or equal to the key. If exactp is non-null, stores whether the found entry was an exact match in *exactp (1 or 0). Updates the cursor index with the index of the found entry. If no entry larger or equal to the key is found, returns NULL.
|
static |
Add a node to the page pointed to by the cursor.
[in] | mc | The cursor for this operation. |
[in] | indx | The index on the page where the new node should be added. |
[in] | key | The key for the new node. |
[in] | data | The data for the new node, if any. |
[in] | pgno | The page number, if adding a branch node. |
[in] | flags | Flags for the node. |
Delete the specified node from a page.
[in] | mp | The page to operate on. |
[in] | indx | The index of the node to delete. |
[in] | ksize | The size of a node. Only used if the page is part of a MDB_DUPFIXED database. |
Compact the main page after deleting a node on a subpage.
[in] | mp | The main page to operate on. |
[in] | indx | The index of the subpage on the main page. |
|
static |
Move a node from csrc to cdst.
Return the data associated with a given node.
[in] | txn | The transaction for this operation. |
[in] | leaf | The node being read. |
[out] | data | Updated to point to the node's data. |
Calculate the size of a leaf node. The size depends on the environment's page size; if a data item is too large it will be put onto an overflow page and the node size will only include the key and not the data. Sizes are always rounded up to an even number of bytes, to guarantee 2-byte alignment of the MDB_node headers.
[in] | env | The environment handle. |
[in] | key | The key for the node. |
[in] | data | The data for the node. |
Calculate the size of a branch node. The size should depend on the environment's page size but since we currently don't support spilling large keys onto overflow pages, it's simply the size of the MDB_node header plus the size of the key. Sizes are always rounded up to an even number of bytes, to guarantee 2-byte alignment of the MDB_node headers.
[in] | env | The environment handle. |
[in] | key | The key for the node. |
|
static |
Rebalance the tree after a delete operation.
[in] | mc | Cursor pointing to the page where rebalancing should begin. |
|
static |
Replace the key for a node with a new key.
[in] | mc | Cursor pointing to the node to operate on. |
[in] | key | The new key to use. |
|
static |
Pop a page off the top of the cursor's stack.
|
static |
Push a page onto the top of the cursor's stack.
|
static |
Complete a delete operation started by mdb_cursor_del().
|
static |
Find a sibling for a page. Replaces the page at the top of the cursor's stack with the specified sibling, if one exists.
[in] | mc | The cursor for this operation. |
[in] | move_right | Non-zero if the right sibling is requested, otherwise the left sibling. |
|
static |
Move the cursor to the next data item.
|
static |
Move the cursor to the previous data item.
|
static |
Set the cursor on a specific data item.
|
static |
Move the cursor to the first item in the database.
|
static |
Move the cursor to the last item in the database.
|
static |
Initialize a cursor for a given transaction and database.
|
static |
Initial setup of a sorted-dups cursor. Sorted duplicates are implemented as a sub-database for the given key. The duplicate data items are actually keys of the sub-database. Operations on the duplicate data items are performed using a sub-cursor initialized when the sub-database is first accessed. This function does the preliminary setup of the sub-cursor, filling in the fields that depend only on the parent DB.
[in] | mc | The main cursor whose sorted-dups cursor is to be initialized. |
|
static |
Final setup of a sorted-dups cursor. Sets up the fields that depend on the data from the main cursor.
[in] | mc | The main cursor whose sorted-dups cursor is to be initialized. |
[in] | node | The data containing the MDB_db record for the sorted-dup database. |
|
static |
Add all the DB's pages to the free list.
[in] | mc | Cursor on the DB to free. |
[in] | subs | non-Zero to check for sub-DBs in this DB. |
Set the default comparison functions for a database. Called immediately after a database is opened to set the defaults. The user can then override them with mdb_set_compare() or mdb_set_dupsort().
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
char* mdb_version | ( | int * | major, |
int * | minor, | ||
int * | patch | ||
) |
Return the mdb library version information.
Return the library version info.
char* mdb_strerror | ( | int | err) |
Return a string describing a given error code.
This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) function. If the error code is greater than or equal to 0, then the string returned by the system function strerror(3) is returned. If the error code is less than 0, an error string corresponding to the MDB library error is returned. See Return Codes for a list of MDB-specific error codes.
[in] | err | The error code |
error message | The description of the error |
Compare two data items according to a particular database.
This returns a comparison as if the two data items were keys in the specified database.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | a | The first item to compare |
[in] | b | The second item to compare |
Compare two data items according to a particular database.
This returns a comparison as if the two items were data items of the specified database. The database must have the MDB_DUPSORT flag.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | a | The first item to compare |
[in] | b | The second item to compare |
Allocate memory for a page. Re-use old malloc'd pages first for singletons, otherwise just malloc.
Free a single page. Saves single pages to a list, for future reuse. (This is not used for multi-page overflow pages.)
|
static |
Return all dirty pages to dpage list
|
static |
Flush dirty pages to the map, after clearing their dirty flag.
|
static |
Spill pages from the dirty list back to disk.
This is intended to prevent running into MDB_TXN_FULL situations, but note that they may still occur in a few cases: 1) pages in MDB_DUPSORT sub-DBs are never spilled, so if there are too many of these dirtied in one txn, the txn may still get too full. 2) child txns may run out of space if their parents dirtied a lot of pages and never spilled them. TODO: we probably should do a preemptive spill during mdb_txn_begin() of a child txn, if the parent's dirty_room is below a given threshold. 3) our estimate of the txn size could be too small. At the moment this seems unlikely.
Otherwise, if not using nested txns, it is expected that apps will not run into MDB_TXN_FULL any more. The pages are flushed to disk the same way as for a txn commit, e.g. their P_DIRTY flag is cleared. If the txn never references them again, they can be left alone. If the txn only reads them, they can be used without any fuss. If the txn writes them again, they can be dirtied immediately without going thru all of the work of mdb_page_touch(). Such references are handled by mdb_page_unspill().
Also note, we never spill DB root pages, nor pages of active cursors, because we'll need these back again soon anyway. And in nested txns, we can't spill a page in a child txn if it was already spilled in a parent txn. That would alter the parent txns' data even though the child hasn't committed yet, and we'd have no way to undo it if the child aborted.
[in] | m0 | cursor A cursor handle identifying the transaction and database for which we are checking space. |
[in] | key | For a put operation, the key being stored. |
[in] | data | For a put operation, the data being stored. |
Find oldest txnid still referenced. Expects txn->mt_txnid > 0.
Add a page to the txn's dirty list
Copy the used portions of a non-overflow page.
[in] | dst | page to copy into |
[in] | src | page to copy from |
[in] | psize | size of a page |
Pull a page off the txn's spill list, if present. If a page being referenced was spilled to disk in this txn, bring it back and make it dirty/writable again.
[in] | tx0 | the transaction handle. |
[in] | mp | the page being referenced. |
[out] | ret | the writable page, if any. ret is unchanged if mp wasn't spilled. |
int mdb_env_sync | ( | MDB_env * | env, |
int | force | ||
) |
Flush the data buffers to disk.
Data is always written to disk when mdb_txn_commit() is called, but the operating system may keep it buffered. MDB always flushes the OS buffers upon commit as well, unless the environment was opened with MDB_NOSYNC or in part MDB_NOMETASYNC.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | force | If non-zero, force a synchronous flush. Otherwise if the environment has the MDB_NOSYNC flag set the flushes will be omitted, and with MDB_MAPASYNC they will be asynchronous. |
Back up parent txn's cursors, then grab the originals for tracking
|
static |
Close this write txn's cursors, give parent txn's cursors back to parent.
[in] | txn | the transaction handle. |
[in] | merge | true to keep changes to parent cursors, false to revert. |
|
static |
Common code for mdb_txn_reset() and mdb_txn_abort(). May be called twice for readonly txns: First reset it, then abort.
[in] | txn | the transaction handle to reset |
[in] | act | why the transaction is being reset |
|
static |
Set or check a pid lock. Set returns 0 on success. Check returns 0 if the process is certainly dead, nonzero if it may be alive (the lock exists or an error happened so we do not know).
On Windows Pidset is a no-op, we merely check for the existence of the process with the given pid. On POSIX we use a single byte lock on the lockfile, set at an offset equal to the pid.
|
static |
Common code for mdb_txn_begin() and mdb_txn_renew().
[in] | txn | the transaction handle to initialize |
int mdb_txn_renew | ( | MDB_txn * | txn) |
Renew a read-only transaction.
This acquires a new reader lock for a transaction handle that had been released by mdb_txn_reset(). It must be called before a reset transaction may be used again.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
Create a transaction for use with the environment.
The transaction handle may be discarded using mdb_txn_abort() or mdb_txn_commit().
[in] | env | An environment handle returned by mdb_env_create() |
[in] | parent | If this parameter is non-NULL, the new transaction will be a nested transaction, with the transaction indicated by parent as its parent. Transactions may be nested to any level. A parent transaction and its cursors may not issue any other operations than mdb_txn_commit and mdb_txn_abort while it has active child transactions. |
[in] | flags | Special options for this transaction. This parameter must be set to 0 or by bitwise OR'ing together one or more of the values described here.
|
[out] | txn | Address where the new MDB_txn handle will be stored |
Returns the transaction's MDB_env.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
|
static |
Export or close DBI handles opened in this txn.
void mdb_txn_reset | ( | MDB_txn * | txn) |
Reset a read-only transaction.
Abort the transaction like mdb_txn_abort(), but keep the transaction handle. mdb_txn_renew() may reuse the handle. This saves allocation overhead if the process will start a new read-only transaction soon, and also locking overhead if MDB_NOTLS is in use. The reader table lock is released, but the table slot stays tied to its thread or MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free its lock table slot if MDB_NOTLS is in use. Cursors opened within the transaction must not be used again after this call, except with mdb_cursor_renew(). Reader locks generally don't interfere with writers, but they keep old versions of database pages allocated. Thus they prevent the old pages from being reused when writers commit new data, and so under heavy load the database size may grow much more rapidly than otherwise.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
void mdb_txn_abort | ( | MDB_txn * | txn) |
Abandon all the operations of the transaction instead of saving them.
The transaction handle is freed. It and its cursors must not be used again after this call, except with mdb_cursor_renew().
[in] | txn | A transaction handle returned by mdb_txn_begin() |
|
static |
Save the freelist as of this transaction to the freeDB. This changes the freelist. Keep trying until it stabilizes.
int mdb_txn_commit | ( | MDB_txn * | txn) |
Commit all the operations of a transaction into the database.
The transaction handle is freed. It and its cursors must not be used again after this call, except with mdb_cursor_renew().
[in] | txn | A transaction handle returned by mdb_txn_begin() |
Write the environment parameters of a freshly created DB environment.
[in] | env | the environment handle |
[out] | meta | address of where to store the meta information |
int mdb_env_create | ( | MDB_env ** | env) |
Create an MDB environment handle.
This function allocates memory for a MDB_env structure. To release the allocated memory and discard the handle, call mdb_env_close(). Before the handle may be used, it must be opened using mdb_env_open(). Various other options may also need to be set before opening the handle, e.g. mdb_env_set_mapsize(), mdb_env_set_maxreaders(), mdb_env_set_maxdbs(), depending on usage requirements.
[out] | env | The address where the new handle will be stored |
int mdb_env_set_mapsize | ( | MDB_env * | env, |
size_t | size | ||
) |
Set the size of the memory map to use for this environment.
The size should be a multiple of the OS page size. The default is 10485760 bytes. The size of the memory map is also the maximum size of the database. The value should be chosen as large as possible, to accommodate future growth of the database. This function may only be called after mdb_env_create() and before mdb_env_open(). The size may be changed by closing and reopening the environment. Any attempt to set a size smaller than the space already consumed by the environment will be silently changed to the current size of the used space.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | size | The size in bytes |
Set the maximum number of named databases for the environment.
This function is only needed if multiple databases will be used in the environment. Simpler applications that use the environment as a single unnamed database can ignore this option. This function may only be called after mdb_env_create() and before mdb_env_open().
[in] | env | An environment handle returned by mdb_env_create() |
[in] | dbs | The maximum number of databases |
int mdb_env_set_maxreaders | ( | MDB_env * | env, |
unsigned int | readers | ||
) |
Set the maximum number of threads/reader slots for the environment.
This defines the number of slots in the lock table that is used to track readers in the the environment. The default is 126. Starting a read-only transaction normally ties a lock table slot to the current thread until the environment closes or the thread exits. If MDB_NOTLS is in use, mdb_txn_begin() instead ties the slot to the MDB_txn object until it or the MDB_env object is destroyed. This function may only be called after mdb_env_create() and before mdb_env_open().
[in] | env | An environment handle returned by mdb_env_create() |
[in] | readers | The maximum number of reader lock table slots |
int mdb_env_get_maxreaders | ( | MDB_env * | env, |
unsigned int * | readers | ||
) |
Get the maximum number of threads/reader slots for the environment.
[in] | env | An environment handle returned by mdb_env_create() |
[out] | readers | Address of an integer to store the number of readers |
|
static |
Further setup required for opening an MDB environment
|
static |
Release a reader thread's slot in the reader lock table. This function is called automatically when a thread exits.
[in] | ptr | This points to the slot in the reader lock table. |
|
static |
Downgrade the exclusive lock on the region back to shared
|
static |
Try to get exlusive lock, otherwise shared. Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
|
static |
Open and/or initialize the lock region for the environment.
[in] | env | The MDB environment. |
[in] | lpath | The pathname of the file used for the lock region. |
[in] | mode | The Unix permissions for the file, if we create it. |
[out] | excl | Resulting file lock type: -1 none, 0 shared, 1 exclusive |
[in,out] | excl | In -1, out lock type: -1 none, 0 shared, 1 exclusive |
int mdb_env_open | ( | MDB_env * | env, |
const char * | path, | ||
unsigned int | flags, | ||
mdb_mode_t | mode | ||
) |
Open an environment handle.
If this function fails, mdb_env_close() must be called to discard the MDB_env handle.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | path | The directory in which the database files reside. This directory must already exist and be writable. |
[in] | flags | Special options for this environment. This parameter must be set to 0 or by bitwise OR'ing together one or more of the values described here. Flags set by mdb_env_set_flags() are also used.
|
[in] | mode | The UNIX permissions to set on created files. This parameter is ignored on Windows. |
int mdb_env_copy | ( | MDB_env * | env, |
const char * | path | ||
) |
Copy an MDB environment to the specified path.
This function may be used to make a backup of an existing environment. No lockfile is created, since it gets recreated at need.
[in] | env | An environment handle returned by mdb_env_create(). It must have already been opened successfully. |
[in] | path | The directory in which the copy will reside. This directory must already exist and be writable but must otherwise be empty. |
void mdb_env_close | ( | MDB_env * | env) |
Close the environment and release the memory map.
Only a single thread may call this function. All transactions, databases, and cursors must already be closed before calling this function. Attempts to use any such handles after calling this function will cause a SIGSEGV. The environment handle will be freed and must not be used again after this call.
[in] | env | An environment handle returned by mdb_env_create() |
Compare two items pointing at aligned size_t's
Compare two items pointing at aligned int's
Compare two items pointing at ints of unknown alignment. Nodes and keys are guaranteed to be 2-byte aligned.
Compare two items in reverse byte order
|
static |
Search for the lowest key under the current branch page. This just bypasses a NUMKEYS check in the current page before calling mdb_page_search_root(), because the callers are all in situations where the current page is known to be underfilled.
Get items from a database.
This function retrieves key/data pairs from the database. The address and length of the data associated with the specified key are returned in the structure to which data refers. If the database supports duplicate keys (MDB_DUPSORT) then the first data item for the key will be returned. Retrieval of other items requires the use of mdb_cursor_get().
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | key | The key to search for in the database |
[out] | data | The data corresponding to the key |
int mdb_cursor_get | ( | MDB_cursor * | cursor, |
MDB_val * | key, | ||
MDB_val * | data, | ||
MDB_cursor_op | op | ||
) |
Retrieve by cursor.
This function retrieves key/data pairs from the database. The address and length of the key are returned in the object to which key refers (except for the case of the MDB_SET option, in which the key object is unchanged), and the address and length of the data are returned in the object to which data refers. See mdb_get() for restrictions on using the output values.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
[in,out] | key | The key for a retrieved item |
[in,out] | data | The data of a retrieved item |
[in] | op | A cursor operation MDB_cursor_op |
|
static |
Touch all the pages in the cursor stack. Makes sure all the pages are writable, before attempting a write operation.
[in] | mc | The cursor to operate on. |
int mdb_cursor_put | ( | MDB_cursor * | cursor, |
MDB_val * | key, | ||
MDB_val * | data, | ||
unsigned int | flags | ||
) |
Store by cursor.
This function stores key/data pairs into the database. If the function fails for any reason, the state of the cursor will be unchanged. If the function succeeds and an item is inserted into the database, the cursor is always positioned to refer to the newly inserted item.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
[in] | key | The key operated on. |
[in] | data | The data operated on. |
[in] | flags | Options for this operation. This parameter must be set to 0 or one of the values described here.
|
int mdb_cursor_del | ( | MDB_cursor * | cursor, |
unsigned int | flags | ||
) |
Delete current key/data pair.
This function deletes the key/data pair to which the cursor refers.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
[in] | flags | Options for this operation. This parameter must be set to 0 or one of the values described here.
|
int mdb_cursor_open | ( | MDB_txn * | txn, |
MDB_dbi | dbi, | ||
MDB_cursor ** | cursor | ||
) |
Create a cursor handle.
A cursor is associated with a specific transaction and database. A cursor cannot be used when its database handle is closed. Nor when its transaction has ended, except with mdb_cursor_renew(). It can be discarded with mdb_cursor_close(). A cursor in a write-transaction can be closed before its transaction ends, and will otherwise be closed when its transaction ends. A cursor in a read-only transaction must be closed explicitly, before or after its transaction ends. It can be reused with mdb_cursor_renew() before finally closing it.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[out] | cursor | Address where the new MDB_cursor handle will be stored |
int mdb_cursor_renew | ( | MDB_txn * | txn, |
MDB_cursor * | cursor | ||
) |
Renew a cursor handle.
A cursor is associated with a specific transaction and database. Cursors that are only used in read-only transactions may be re-used, to avoid unnecessary malloc/free overhead. The cursor may be associated with a new read-only transaction, and referencing the same database handle as it was created with. This may be done whether the previous transaction is live or dead.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
int mdb_cursor_count | ( | MDB_cursor * | cursor, |
size_t * | countp | ||
) |
Return count of duplicates for current key.
This call is only valid on databases that support sorted duplicate data items MDB_DUPSORT.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
[out] | countp | Address where the count will be stored |
void mdb_cursor_close | ( | MDB_cursor * | cursor) |
Close a cursor handle.
The cursor handle will be freed and must not be used again after this call. Its transaction must still be live if it is a write-transaction.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
MDB_txn* mdb_cursor_txn | ( | MDB_cursor * | cursor) |
Return the cursor's transaction handle.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
MDB_dbi mdb_cursor_dbi | ( | MDB_cursor * | cursor) |
Return the cursor's database handle.
[in] | cursor | A cursor handle returned by mdb_cursor_open() |
|
static |
Copy the contents of a cursor.
[in] | csrc | The cursor to copy from. |
[out] | cdst | The cursor to copy to. |
Delete items from a database.
This function removes key/data pairs from the database. If the database does not support sorted duplicate data items (MDB_DUPSORT) the data parameter is ignored. If the database supports sorted duplicates and the data parameter is NULL, all of the duplicate data items for the key will be deleted. Otherwise, if the data parameter is non-NULL only the matching data item will be deleted. This function will return MDB_NOTFOUND if the specified key/data pair is not in the database.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | key | The key to delete from the database |
[in] | data | The data to delete |
Store items into a database.
This function stores key/data pairs in the database. The default behavior is to enter the new key/data pair, replacing any previously existing key if duplicates are disallowed, or adding a duplicate data item if duplicates are allowed (MDB_DUPSORT).
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | key | The key to store in the database |
[in,out] | data | The data to store |
[in] | flags | Special options for this operation. This parameter must be set to 0 or by bitwise OR'ing together one or more of the values described here.
|
int mdb_env_set_flags | ( | MDB_env * | env, |
unsigned int | flags, | ||
int | onoff | ||
) |
Set environment flags.
This may be used to set some flags in addition to those from mdb_env_open(), or to unset these flags.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | flags | The flags to change, bitwise OR'ed together |
[in] | onoff | A non-zero value sets the flags, zero clears them. |
int mdb_env_get_flags | ( | MDB_env * | env, |
unsigned int * | flags | ||
) |
Get environment flags.
[in] | env | An environment handle returned by mdb_env_create() |
[out] | flags | The address of an integer to store the flags |
int mdb_env_get_path | ( | MDB_env * | env, |
const char ** | path | ||
) |
Return the path that was used in mdb_env_open().
[in] | env | An environment handle returned by mdb_env_create() |
[out] | path | Address of a string pointer to contain the path. This is the actual string in the environment, not a copy. It should not be altered in any way. |
Common code for mdb_stat() and mdb_env_stat().
[in] | env | the environment to operate in. |
[in] | db | the MDB_db record containing the stats to return. |
[out] | arg | the address of an MDB_stat structure to receive the stats. |
Return statistics about the MDB environment.
[in] | env | An environment handle returned by mdb_env_create() |
[out] | stat | The address of an MDB_stat structure where the statistics will be copied |
int mdb_env_info | ( | MDB_env * | env, |
MDB_envinfo * | stat | ||
) |
Return information about the MDB environment.
[in] | env | An environment handle returned by mdb_env_create() |
[out] | stat | The address of an MDB_envinfo structure where the information will be copied |
Open a database in the environment.
A database handle denotes the name and parameters of a database, independently of whether such a database exists. The database handle may be discarded by calling mdb_dbi_close(). The old database handle is returned if the database was already open. The handle must only be closed once. The database handle will be private to the current transaction until the transaction is successfully committed. If the transaction is aborted the handle will be closed automatically. After a successful commit the handle will reside in the shared environment, and may be used by other transactions. This function must not be called from multiple concurrent transactions. A transaction that uses this function must finish (either commit or abort) before any other transaction may use this function.
To use named databases (with name != NULL), mdb_env_set_maxdbs() must be called before opening the environment.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | name | The name of the database to open. If only a single database is needed in the environment, this value may be NULL. |
[in] | flags | Special options for this database. This parameter must be set to 0 or by bitwise OR'ing together one or more of the values described here.
|
[out] | dbi | Address where the new MDB_dbi handle will be stored |
Retrieve statistics for a database.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[out] | stat | The address of an MDB_stat structure where the statistics will be copied |
Close a database handle.
This call is not mutex protected. Handles should only be closed by a single thread, and only if no other threads are going to reference the database handle or one of its cursors any further. Do not close a handle if an existing transaction has modified its database.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
Retrieve the DB flags for a database handle.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[out] | flags | Address where the flags will be returned. |
Empty or delete+close a database.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | del | 0 to empty the DB, 1 to delete it from the environment and close the DB handle. |
int mdb_set_compare | ( | MDB_txn * | txn, |
MDB_dbi | dbi, | ||
MDB_cmp_func * | cmp | ||
) |
Set a custom key comparison function for a database.
The comparison function is called whenever it is necessary to compare a key specified by the application with a key currently stored in the database. If no comparison function is specified, and no special key flags were specified with mdb_dbi_open(), the keys are compared lexically, with shorter keys collating before longer keys.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | cmp | A MDB_cmp_func function |
int mdb_set_dupsort | ( | MDB_txn * | txn, |
MDB_dbi | dbi, | ||
MDB_cmp_func * | cmp | ||
) |
Set a custom data comparison function for a MDB_DUPSORT database.
This comparison function is called whenever it is necessary to compare a data item specified by the application with a data item currently stored in the database. This function only takes effect if the database was opened with the MDB_DUPSORT flag. If no comparison function is specified, and no special key flags were specified with mdb_dbi_open(), the data items are compared lexically, with shorter items collating before longer items.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | cmp | A MDB_cmp_func function |
int mdb_set_relfunc | ( | MDB_txn * | txn, |
MDB_dbi | dbi, | ||
MDB_rel_func * | rel | ||
) |
Set a relocation function for a MDB_FIXEDMAP database.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | rel | A MDB_rel_func function |
Set a context pointer for a MDB_FIXEDMAP database's relocation function.
See mdb_set_relfunc and MDB_rel_func for more details.
[in] | txn | A transaction handle returned by mdb_txn_begin() |
[in] | dbi | A database handle returned by mdb_dbi_open() |
[in] | ctx | An arbitrary pointer for whatever the application needs. It will be passed to the callback function set by mdb_set_relfunc as its relctx parameter whenever the callback is invoked. |
int mdb_env_get_maxkeysize | ( | MDB_env * | env) |
Get the maximum size of a key for the environment.
[in] | env | An environment handle returned by mdb_env_create() |
int mdb_reader_list | ( | MDB_env * | env, |
MDB_msg_func * | func, | ||
void * | ctx | ||
) |
Dump the entries in the reader lock table.
[in] | env | An environment handle returned by mdb_env_create() |
[in] | func | A MDB_msg_func function |
[in] | ctx | Anything the message function needs |
int mdb_reader_check | ( | MDB_env * | env, |
int * | dead | ||
) |
Check for stale entries in the reader lock table.
[in] | env | An environment handle returned by mdb_env_create() |
[out] | dead | Number of stale slots that were cleared |
|
static |
Table of descriptions for MDB Return Codes