libgpac
Documentation of the core library of GPAC
libregexp.c File Reference
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <inttypes.h>
#include <string.h>
#include <assert.h>
#include "cutils.h"
#include "libregexp.h"
#include "libregexp-opcode.h"
+ Include dependency graph for libregexp.c:

Data Structures

struct  REParseState
 
struct  REOpCode
 
struct  REExecState
 
struct  REExecContext
 
union  REParseState.u
 

Macros

#define DEF(id, size)   REOP_ ## id,
 
#define CAPTURE_COUNT_MAX   255
 
#define STACK_SIZE_MAX   255
 
#define CP_LS   0x2028
 
#define CP_PS   0x2029
 
#define TMP_BUF_SIZE   128
 
#define DEF(id, size)   { size },
 
#define RE_HEADER_FLAGS   0
 
#define RE_HEADER_CAPTURE_COUNT   1
 
#define RE_HEADER_STACK_SIZE   2
 
#define RE_HEADER_LEN   7
 
#define CLASS_RANGE_BASE   0x40000000
 
#define GET_CHAR(c, cptr, cbuf_end)
 
#define PEEK_CHAR(c, cptr, cbuf_end)
 
#define PEEK_PREV_CHAR(c, cptr, cbuf_start)
 
#define GET_PREV_CHAR(c, cptr, cbuf_start)
 
#define PREV_CHAR(cptr, cbuf_start)
 

Typedefs

typedef uintptr_t StackInt
 

Enumerations

enum  REOPCodeEnum { REOP_COUNT }
 
enum  CharRangeEnum {
  CHAR_RANGE_d , CHAR_RANGE_D , CHAR_RANGE_s , CHAR_RANGE_S ,
  CHAR_RANGE_w , CHAR_RANGE_W
}
 
enum  REExecStateEnum { RE_EXEC_STATE_SPLIT , RE_EXEC_STATE_LOOKAHEAD , RE_EXEC_STATE_NEGATIVE_LOOKAHEAD , RE_EXEC_STATE_GREEDY_QUANT }
 

Functions

static int is_digit (int c)
 
static int dbuf_insert (DynBuf *s, int pos, int len)
 
static uint32_t lre_canonicalize (uint32_t c, BOOL is_utf16)
 
BOOL lre_is_space (int c)
 
static int cr_init_char_range (REParseState *s, CharRange *cr, uint32_t c)
 
static int cr_canonicalize (CharRange *cr)
 
static void re_emit_op (REParseState *s, int op)
 
static int re_emit_op_u32 (REParseState *s, int op, uint32_t val)
 
static int re_emit_goto (REParseState *s, int op, uint32_t val)
 
static void re_emit_op_u8 (REParseState *s, int op, uint32_t val)
 
static void re_emit_op_u16 (REParseState *s, int op, uint32_t val)
 
static int __attribute__ ((format(printf, 2, 3)))
 
static int re_parse_out_of_memory (REParseState *s)
 
static int parse_digits (const uint8_t **pp, BOOL allow_overflow)
 
static int re_parse_expect (REParseState *s, const uint8_t **pp, int c)
 
int lre_parse_escape (const uint8_t **pp, int allow_utf16)
 
static BOOL is_unicode_char (int c)
 
static int parse_unicode_property (REParseState *s, CharRange *cr, const uint8_t **pp, BOOL is_inv)
 
static int get_class_atom (REParseState *s, CharRange *cr, const uint8_t **pp, BOOL inclass)
 
static int re_emit_range (REParseState *s, const CharRange *cr)
 
static int re_parse_char_class (REParseState *s, const uint8_t **pp)
 
static int re_check_advance (const uint8_t *bc_buf, int bc_buf_len)
 
static int re_is_simple_quantifier (const uint8_t *bc_buf, int bc_buf_len)
 
static int re_parse_group_name (char *buf, int buf_size, const uint8_t **pp, BOOL is_utf16)
 
static int re_parse_captures (REParseState *s, int *phas_named_captures, const char *capture_name)
 
static int re_count_captures (REParseState *s)
 
static BOOL re_has_named_captures (REParseState *s)
 
static int find_group_name (REParseState *s, const char *name)
 
static int re_parse_disjunction (REParseState *s, BOOL is_backward_dir)
 
static int re_parse_term (REParseState *s, BOOL is_backward_dir)
 
static int re_parse_alternative (REParseState *s, BOOL is_backward_dir)
 
static int compute_stack_size (const uint8_t *bc_buf, int bc_buf_len)
 
uint8_tlre_compile (int *plen, char *error_msg, int error_msg_size, const char *buf, size_t buf_len, int re_flags, void *opaque)
 
static BOOL is_line_terminator (uint32_t c)
 
static BOOL is_word_char (uint32_t c)
 
static int push_state (REExecContext *s, uint8_t **capture, StackInt *stack, size_t stack_len, const uint8_t *pc, const uint8_t *cptr, REExecStateEnum type, size_t count)
 
static intptr_t lre_exec_backtrack (REExecContext *s, uint8_t **capture, StackInt *stack, int stack_len, const uint8_t *pc, const uint8_t *cptr, BOOL no_recurse)
 
int lre_exec (uint8_t **capture, const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen, int cbuf_type, void *opaque)
 
int lre_get_capture_count (const uint8_t *bc_buf)
 
int lre_get_flags (const uint8_t *bc_buf)
 
const char * lre_get_groupnames (const uint8_t *bc_buf)
 

Variables

static const REOpCode reopcode_info [REOP_COUNT]
 
static const uint16_t char_range_d []
 
static const uint16_t char_range_s []
 
uint32_t const lre_id_start_table_ascii [4]
 
uint32_t const lre_id_continue_table_ascii [4]
 
static const uint16_t char_range_w []
 
static const uint16_tchar_range_table []
 

Data Structure Documentation

◆ REParseState

struct REParseState
+ Collaboration diagram for REParseState:
Data Fields
DynBuf byte_code
const uint8_t * buf_ptr
const uint8_t * buf_end
const uint8_t * buf_start
int re_flags
BOOL is_utf16
BOOL ignore_case
BOOL dotall
int capture_count
int total_capture_count
int has_named_captures
void * opaque
DynBuf group_names
union REParseState u

◆ REOpCode

struct REOpCode
Data Fields
uint8_t size

◆ REExecState

struct REExecState
Data Fields
REExecStateEnum type: 8
uint8_t stack_len
size_t count
const uint8_t * cptr
const uint8_t * pc
void * buf[0]

◆ REExecContext

struct REExecContext
Data Fields
const uint8_t * cbuf
const uint8_t * cbuf_end
int cbuf_type
int capture_count
int stack_size_max
BOOL multi_line
BOOL ignore_case
BOOL is_utf16
void * opaque
size_t state_size
uint8_t * state_stack
size_t state_stack_size
size_t state_stack_len

◆ REParseState.u

union REParseState.u
Data Fields
char error_msg[TMP_BUF_SIZE]
char tmp_buf[TMP_BUF_SIZE]

Macro Definition Documentation

◆ DEF [1/2]

#define DEF (   id,
  size 
)    REOP_ ## id,

◆ CAPTURE_COUNT_MAX

#define CAPTURE_COUNT_MAX   255

◆ STACK_SIZE_MAX

#define STACK_SIZE_MAX   255

◆ CP_LS

#define CP_LS   0x2028

◆ CP_PS

#define CP_PS   0x2029

◆ TMP_BUF_SIZE

#define TMP_BUF_SIZE   128

◆ DEF [2/2]

#define DEF (   id,
  size 
)    { size },

◆ RE_HEADER_FLAGS

#define RE_HEADER_FLAGS   0

◆ RE_HEADER_CAPTURE_COUNT

#define RE_HEADER_CAPTURE_COUNT   1

◆ RE_HEADER_STACK_SIZE

#define RE_HEADER_STACK_SIZE   2

◆ RE_HEADER_LEN

#define RE_HEADER_LEN   7

◆ CLASS_RANGE_BASE

#define CLASS_RANGE_BASE   0x40000000

◆ GET_CHAR

#define GET_CHAR (   c,
  cptr,
  cbuf_end 
)
Value:
do { \
if (cbuf_type == 0) { \
c = *cptr++; \
} else { \
uint32_t __c1; \
c = *(uint16_t *)cptr; \
cptr += 2; \
if (c >= 0xd800 && c < 0xdc00 && \
cbuf_type == 2 && cptr < cbuf_end) { \
__c1 = *(uint16_t *)cptr; \
if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
cptr += 2; \
} \
} \
} \
} while (0)
unsigned short uint16_t
Definition: inttypes.h:14

◆ PEEK_CHAR

#define PEEK_CHAR (   c,
  cptr,
  cbuf_end 
)
Value:
do { \
if (cbuf_type == 0) { \
c = cptr[0]; \
} else { \
uint32_t __c1; \
c = ((uint16_t *)cptr)[0]; \
if (c >= 0xd800 && c < 0xdc00 && \
cbuf_type == 2 && (cptr + 2) < cbuf_end) { \
__c1 = ((uint16_t *)cptr)[1]; \
if (__c1 >= 0xdc00 && __c1 < 0xe000) { \
c = (((c & 0x3ff) << 10) | (__c1 & 0x3ff)) + 0x10000; \
} \
} \
} \
} while (0)

◆ PEEK_PREV_CHAR

#define PEEK_PREV_CHAR (   c,
  cptr,
  cbuf_start 
)
Value:
do { \
if (cbuf_type == 0) { \
c = cptr[-1]; \
} else { \
uint32_t __c1; \
c = ((uint16_t *)cptr)[-1]; \
if (c >= 0xdc00 && c < 0xe000 && \
cbuf_type == 2 && (cptr - 4) >= cbuf_start) { \
__c1 = ((uint16_t *)cptr)[-2]; \
if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
} \
} \
} \
} while (0)

◆ GET_PREV_CHAR

#define GET_PREV_CHAR (   c,
  cptr,
  cbuf_start 
)
Value:
do { \
if (cbuf_type == 0) { \
cptr--; \
c = cptr[0]; \
} else { \
uint32_t __c1; \
cptr -= 2; \
c = ((uint16_t *)cptr)[0]; \
if (c >= 0xdc00 && c < 0xe000 && \
cbuf_type == 2 && cptr > cbuf_start) { \
__c1 = ((uint16_t *)cptr)[-1]; \
if (__c1 >= 0xd800 && __c1 < 0xdc00 ) { \
cptr -= 2; \
c = (((__c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; \
} \
} \
} \
} while (0)

◆ PREV_CHAR

#define PREV_CHAR (   cptr,
  cbuf_start 
)
Value:
do { \
if (cbuf_type == 0) { \
cptr--; \
} else { \
cptr -= 2; \
if (cbuf_type == 2) { \
c = ((uint16_t *)cptr)[0]; \
if (c >= 0xdc00 && c < 0xe000 && cptr > cbuf_start) { \
c = ((uint16_t *)cptr)[-1]; \
if (c >= 0xd800 && c < 0xdc00) \
cptr -= 2; \
} \
} \
} \
} while (0)

Typedef Documentation

◆ StackInt

typedef uintptr_t StackInt

Enumeration Type Documentation

◆ REOPCodeEnum

Enumerator
REOP_COUNT 

◆ CharRangeEnum

Enumerator
CHAR_RANGE_d 
CHAR_RANGE_D 
CHAR_RANGE_s 
CHAR_RANGE_S 
CHAR_RANGE_w 
CHAR_RANGE_W 

◆ REExecStateEnum

Enumerator
RE_EXEC_STATE_SPLIT 
RE_EXEC_STATE_LOOKAHEAD 
RE_EXEC_STATE_NEGATIVE_LOOKAHEAD 
RE_EXEC_STATE_GREEDY_QUANT 

Function Documentation

◆ is_digit()

static int is_digit ( int  c)
static
+ Here is the caller graph for this function:

◆ dbuf_insert()

static int dbuf_insert ( DynBuf s,
int  pos,
int  len 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_canonicalize()

static uint32_t lre_canonicalize ( uint32_t  c,
BOOL  is_utf16 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_is_space()

BOOL lre_is_space ( int  c)
+ Here is the caller graph for this function:

◆ cr_init_char_range()

static int cr_init_char_range ( REParseState s,
CharRange cr,
uint32_t  c 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ cr_canonicalize()

static int cr_canonicalize ( CharRange cr)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_op()

static void re_emit_op ( REParseState s,
int  op 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_op_u32()

static int re_emit_op_u32 ( REParseState s,
int  op,
uint32_t  val 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_goto()

static int re_emit_goto ( REParseState s,
int  op,
uint32_t  val 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_op_u8()

static void re_emit_op_u8 ( REParseState s,
int  op,
uint32_t  val 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_op_u16()

static void re_emit_op_u16 ( REParseState s,
int  op,
uint32_t  val 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ __attribute__()

static int __attribute__ ( (format(printf, 2, 3))  )
static

◆ re_parse_out_of_memory()

static int re_parse_out_of_memory ( REParseState s)
static
+ Here is the caller graph for this function:

◆ parse_digits()

static int parse_digits ( const uint8_t **  pp,
BOOL  allow_overflow 
)
static
+ Here is the caller graph for this function:

◆ re_parse_expect()

static int re_parse_expect ( REParseState s,
const uint8_t **  pp,
int  c 
)
static
+ Here is the caller graph for this function:

◆ lre_parse_escape()

int lre_parse_escape ( const uint8_t **  pp,
int  allow_utf16 
)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_unicode_char()

static BOOL is_unicode_char ( int  c)
static
+ Here is the caller graph for this function:

◆ parse_unicode_property()

static int parse_unicode_property ( REParseState s,
CharRange cr,
const uint8_t **  pp,
BOOL  is_inv 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ get_class_atom()

static int get_class_atom ( REParseState s,
CharRange cr,
const uint8_t **  pp,
BOOL  inclass 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_emit_range()

static int re_emit_range ( REParseState s,
const CharRange cr 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_parse_char_class()

static int re_parse_char_class ( REParseState s,
const uint8_t **  pp 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_check_advance()

static int re_check_advance ( const uint8_t bc_buf,
int  bc_buf_len 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_is_simple_quantifier()

static int re_is_simple_quantifier ( const uint8_t bc_buf,
int  bc_buf_len 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_parse_group_name()

static int re_parse_group_name ( char *  buf,
int  buf_size,
const uint8_t **  pp,
BOOL  is_utf16 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_parse_captures()

static int re_parse_captures ( REParseState s,
int *  phas_named_captures,
const char *  capture_name 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_count_captures()

static int re_count_captures ( REParseState s)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_has_named_captures()

static BOOL re_has_named_captures ( REParseState s)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ find_group_name()

static int find_group_name ( REParseState s,
const char *  name 
)
static
+ Here is the caller graph for this function:

◆ re_parse_disjunction()

static int re_parse_disjunction ( REParseState s,
BOOL  is_backward_dir 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_parse_term()

static int re_parse_term ( REParseState s,
BOOL  is_backward_dir 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ re_parse_alternative()

static int re_parse_alternative ( REParseState s,
BOOL  is_backward_dir 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compute_stack_size()

static int compute_stack_size ( const uint8_t bc_buf,
int  bc_buf_len 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_compile()

uint8_t* lre_compile ( int *  plen,
char *  error_msg,
int  error_msg_size,
const char *  buf,
size_t  buf_len,
int  re_flags,
void *  opaque 
)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ is_line_terminator()

static BOOL is_line_terminator ( uint32_t  c)
static
+ Here is the caller graph for this function:

◆ is_word_char()

static BOOL is_word_char ( uint32_t  c)
static
+ Here is the caller graph for this function:

◆ push_state()

static int push_state ( REExecContext s,
uint8_t **  capture,
StackInt stack,
size_t  stack_len,
const uint8_t pc,
const uint8_t cptr,
REExecStateEnum  type,
size_t  count 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_exec_backtrack()

static intptr_t lre_exec_backtrack ( REExecContext s,
uint8_t **  capture,
StackInt stack,
int  stack_len,
const uint8_t pc,
const uint8_t cptr,
BOOL  no_recurse 
)
static
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_exec()

int lre_exec ( uint8_t **  capture,
const uint8_t bc_buf,
const uint8_t cbuf,
int  cindex,
int  clen,
int  cbuf_type,
void *  opaque 
)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ lre_get_capture_count()

int lre_get_capture_count ( const uint8_t bc_buf)
+ Here is the caller graph for this function:

◆ lre_get_flags()

int lre_get_flags ( const uint8_t bc_buf)
+ Here is the caller graph for this function:

◆ lre_get_groupnames()

const char* lre_get_groupnames ( const uint8_t bc_buf)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Variable Documentation

◆ reopcode_info

const REOpCode reopcode_info[REOP_COUNT]
static
Initial value:
= {
#define DEF(id, size)
}

◆ char_range_d

const uint16_t char_range_d[]
static
Initial value:
= {
1,
0x0030, 0x0039 + 1,
}

◆ char_range_s

const uint16_t char_range_s[]
static
Initial value:
= {
10,
0x0009, 0x000D + 1,
0x0020, 0x0020 + 1,
0x00A0, 0x00A0 + 1,
0x1680, 0x1680 + 1,
0x2000, 0x200A + 1,
0x2028, 0x2029 + 1,
0x202F, 0x202F + 1,
0x205F, 0x205F + 1,
0x3000, 0x3000 + 1,
0xFEFF, 0xFEFF + 1,
}

◆ lre_id_start_table_ascii

uint32_t const lre_id_start_table_ascii[4]
Initial value:
= {
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
}

◆ lre_id_continue_table_ascii

uint32_t const lre_id_continue_table_ascii[4]
Initial value:
= {
0x00000000, 0x03FF0010, 0x87FFFFFE, 0x07FFFFFE
}

◆ char_range_w

const uint16_t char_range_w[]
static
Initial value:
= {
4,
0x0030, 0x0039 + 1,
0x0041, 0x005A + 1,
0x005F, 0x005F + 1,
0x0061, 0x007A + 1,
}

◆ char_range_table

const uint16_t* char_range_table[]
static
Initial value:
= {
}
static const uint16_t char_range_w[]
Definition: libregexp.c:199
static const uint16_t char_range_s[]
Definition: libregexp.c:156
static const uint16_t char_range_d[]
Definition: libregexp.c:150