mirror of
git://git.psyced.org/git/psyclpc
synced 2024-08-15 03:20:16 +00:00
removed historic pcre bundling
This commit is contained in:
parent
27f21a3bf0
commit
8bd51f2a48
34 changed files with 26 additions and 13115 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -22,7 +22,6 @@ src/autoconf/autom4te.cache
|
|||
src/config.h
|
||||
src/config.status
|
||||
src/configure-do
|
||||
src/dftables
|
||||
src/efun_defs.c
|
||||
src/instrs.h
|
||||
src/lang.c
|
||||
|
@ -33,7 +32,6 @@ src/machine.h.in
|
|||
src/make_func.c
|
||||
src/mkfunc
|
||||
src/patchlevel.h
|
||||
src/pcre/chartables.c
|
||||
src/psyclpc
|
||||
src/settings/nedko_psyced
|
||||
src/settings/psyced-current
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
[see git log for recent changes]
|
||||
|
||||
2010-06-21 (nedko/lynX) (4.0.14)
|
||||
- removed traditional configure script from distribution
|
||||
new ./configure uses autotools to generate the configure script
|
||||
|
|
|
@ -62,11 +62,6 @@ Inc.
|
|||
The Apache-compatible MD5 password encryption is Copyright (C) 2000 Apache
|
||||
Software Foundation.
|
||||
|
||||
The PCRE (Perl Compatible Regular Expression) is Copyright (C) 1997-2001 by
|
||||
Philip Hazel. LDMud contains just the files required for the driver (with
|
||||
small modifications); see pcre/LICENCE for the licence terms and the location
|
||||
of the full package, and pcre/README.LDMUD for an explanation of the changes.
|
||||
|
||||
The lpc-mode.el for EMACS is Copyright (C) 2002 Vivek Dasmohapatra
|
||||
<vivek@etla.org>, and distributed under the GPL.
|
||||
|
||||
|
|
1
TODO
1
TODO
|
@ -10,7 +10,6 @@ BUGS
|
|||
- configure should warn more vehemently when libidn is missing
|
||||
- libpsyc isnt recognized even if properly installed
|
||||
- x86_64 seems to require -ldl explicitly at the end of libs
|
||||
- sometimes -lpsyc and -lpcre are added twice to $LIBS !?
|
||||
- should autoconf to sysmalloc also for osol (OpenSolaris)
|
||||
- #define USE_EXPAT und JSON werden trotzdem gesetzt in config.h
|
||||
auch wenn configure keine libs gefunden hat (egal, wir verwenden beide nicht)
|
||||
|
|
|
@ -105,8 +105,8 @@ SRC = access_check.c actions.c array.c backend.c bitstrings.c call_out.c \
|
|||
interpret.c \
|
||||
lex.c main.c mapping.c md5.c mempools.c mregex.c mstrings.c object.c \
|
||||
otable.c\
|
||||
parser.c parse.c pkg-alists.c pgk-iksemel.c pkg-idna.c pkg-expat.c \
|
||||
pkg-psyc.c pkg-mccp.c pkg-mysql.c pkg-pcre.c \
|
||||
parser.c parse.c pkg-alists.c pkg-iksemel.c pkg-idna.c pkg-expat.c \
|
||||
pkg-psyc.c pkg-mccp.c pkg-mysql.c \
|
||||
pkg-pgsql.c pkg-sqlite.c pkg-tls.c pkg-gnunet.c \
|
||||
ptmalloc.c port.c ptrtable.c \
|
||||
random.c regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||
|
@ -118,7 +118,7 @@ OBJ = access_check.o actions.o array.o backend.o bitstrings.o call_out.o \
|
|||
lex.o main.o mapping.o md5.o mempools.o mregex.o mstrings.o object.o \
|
||||
otable.o \
|
||||
parser.o parse.o pkg-alists.o pkg-iksemel.o pkg-idna.o pkg-expat.o \
|
||||
pkg-psyc.o pkg-mccp.o pkg-mysql.o pkg-pcre.o \
|
||||
pkg-psyc.o pkg-mccp.o pkg-mysql.o \
|
||||
pkg-pgsql.o pkg-sqlite.o pkg-tls.o pkg-gnunet.o \
|
||||
ptmalloc.o port.o ptrtable.o \
|
||||
random.o regexp.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
||||
|
@ -162,7 +162,6 @@ lint: *.c
|
|||
|
||||
clean:
|
||||
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc@EXEEXT@
|
||||
$(RM) dftables@EXEEXT@ pcre/chartables.c
|
||||
$(RM) stdstrings.c stdstrings.h
|
||||
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||
$(RM) @PROGNAME@@EXEEXT@ core mudlib/core mudlib/debug.log
|
||||
|
@ -195,12 +194,6 @@ make_func.c: make_func.y
|
|||
mkfunc@EXEEXT@: mkfunc.o hash.o exec.h
|
||||
$(CC) @OPTIMIZE_LINKING@ $(LDFLAGS) mkfunc.o hash.o -o mkfunc@EXEEXT@
|
||||
|
||||
dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
||||
$(CC) $(CFLAGS) -c pcre/dftables.c -o dftables.o
|
||||
|
||||
dftables@EXEEXT@ : dftables.o
|
||||
$(CC) @OPTIMIZE_LINKING@ $(LDFLAGS) dftables.o -o dftables@EXEEXT@
|
||||
|
||||
lang.y: mkfunc@EXEEXT@ prolang.y config.h
|
||||
$(RM) lang.y
|
||||
./mkfunc@EXEEXT@ lang
|
||||
|
@ -221,9 +214,6 @@ lang.c lang.h: lang.y
|
|||
@CLEAN_YACC_TAB@
|
||||
$(MV) $(YACCTAB)h lang.h
|
||||
|
||||
pcre/chartables.c : dftables@EXEEXT@
|
||||
./dftables@EXEEXT@ pcre/chartables.c
|
||||
|
||||
random.o : random.c config.h driver.h
|
||||
$(CC) $(CFLAGS) $(SFMT_FLAGS) -c random.c -o random.o
|
||||
|
||||
|
@ -232,10 +222,10 @@ random.o : random.c config.h driver.h
|
|||
|
||||
# Generated source files (overlaps with of SRC) which need to be
|
||||
# present for mkdepend to work.
|
||||
GENSRC = make_func.c stdstrings.c lang.c instrs.h pcre/chartables.c
|
||||
GENSRC = make_func.c stdstrings.c lang.c instrs.h
|
||||
|
||||
# Macros for MkDepend:
|
||||
SKELETON = $(SRC) mkfunc.c pcre/dftables.c
|
||||
SKELETON = $(SRC) mkfunc.c
|
||||
EXCEPT = -x efun_defs.c
|
||||
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
||||
|
||||
|
@ -407,7 +397,7 @@ mkfunc.o : make_func.c ../mudlib/sys/driver_hook.h hash.h exec.h \
|
|||
mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
||||
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
||||
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h pcre/pcre.h \
|
||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h \
|
||||
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
||||
machine.h
|
||||
|
||||
|
@ -443,8 +433,6 @@ parser.o : lang.c ../mudlib/sys/driver_hook.h i-eval_cost.h xalloc.h \
|
|||
my-alloca.h typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h \
|
||||
bytecode.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h pcre/config.h
|
||||
|
||||
pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h mstrings.h \
|
||||
main.h interpret.h array.h my-alloca.h pkg-alists.h typedefs.h driver.h \
|
||||
closure.h strfuns.h sent.h bytecode.h hash.h backend.h port.h config.h \
|
||||
|
@ -465,12 +453,6 @@ pkg-mysql.o : xalloc.h svalue.h stdstrings.h simulate.h mstrings.h main.h \
|
|||
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
pkg-pcre.o : pcre/study.c pcre/maketables.c pcre/get.c pcre/pcre.c \
|
||||
simulate.h interpret.h pkg-pcre.h driver.h pcre/internal.h \
|
||||
pcre/chartables.c svalue.h strfuns.h sent.h bytecode.h typedefs.h \
|
||||
backend.h pcre/pcre.h port.h config.h pcre/config.h main.h hosts/unix.h \
|
||||
hosts/be/be.h machine.h
|
||||
|
||||
pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
||||
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
||||
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
||||
|
@ -507,8 +489,7 @@ random.o : random.h driver.h port.h config.h hosts/unix.h hosts/be/be.h \
|
|||
|
||||
regexp.o : i-eval_cost.h main.h xalloc.h simulate.h regexp.h driver.h \
|
||||
interpret.h typedefs.h svalue.h strfuns.h sent.h bytecode.h pkg-pcre.h \
|
||||
port.h config.h backend.h pcre/pcre.h hosts/unix.h hosts/be/be.h \
|
||||
machine.h
|
||||
port.h config.h backend.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
||||
hosts/be/be.h machine.h
|
||||
|
|
|
@ -143,7 +143,7 @@ AC_MY_ARG_ENABLE(use-swap,no,,[Enables support for LPC memory swapping])
|
|||
AC_MY_ARG_ENABLE(use-ldmud-compatibility,yes,,[Makes psyclpc behave more like LDMUD than you will want])
|
||||
|
||||
AC_MY_ARG_ENABLE(use-pthreads,no,,[Enables using of threads for socket writes])
|
||||
AC_MY_ARG_ENABLE(use-pcre,yes,,[Enables PCRE: no/yes/builtin/no-builtin])
|
||||
AC_MY_ARG_ENABLE(use-pcre,yes,,[Enables PCRE: yes/no])
|
||||
AC_MY_ARG_ENABLE(use-iksemel,no,,[Enables use of iksemel for XML parsing])
|
||||
AC_MY_ARG_ENABLE(use-deprecated,yes,,[Enables obsolete and deprecated efuns])
|
||||
AC_MY_ARG_ENABLE(use-structs,yes,,[Enables structs])
|
||||
|
@ -289,16 +289,6 @@ if test "x$enable_use_pcre" = "x" || test "x$enable_use_pcre" = "xyes"; then
|
|||
cdef_use_pcre="#define"
|
||||
cdef_use_builtin_pcre="#undef"
|
||||
enable_use_builtin_pcre="no"
|
||||
elif test "x$enable_use_pcre" = "xbuiltin"; then
|
||||
cdef_use_pcre="#define"
|
||||
cdef_use_builtin_pcre="#define"
|
||||
enable_use_pcre="yes"
|
||||
enable_use_builtin_pcre="yes"
|
||||
elif test "x$enable_use_pcre" = "xno-builtin"; then
|
||||
cdef_use_pcre="#undef"
|
||||
cdef_use_builtin_pcre="#define"
|
||||
enable_use_pcre="no"
|
||||
enable_use_builtin_pcre="yes"
|
||||
else
|
||||
cdef_use_pcre="#undef"
|
||||
cdef_use_builtin_pcre="#undef"
|
||||
|
@ -1320,11 +1310,7 @@ int main(void)
|
|||
AC_DEFINE(HAS_PCRE, 1, [Does the machine offer PCRE?])
|
||||
PKGLIBS="$PKGLIBS -lpcre"
|
||||
else
|
||||
if test $enable_use_builtin_pcre = no ; then
|
||||
echo "PCRE not available in the system - using builtin version."
|
||||
enable_use_builtin_pcre="yes"
|
||||
cdef_use_builtin_pcre="#define"
|
||||
fi
|
||||
echo "PCRE not available in the system."
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@ -2990,7 +2976,6 @@ AC_SUBST(cdef_use_alists)
|
|||
AC_SUBST(cdef_use_psyc)
|
||||
AC_SUBST(cdef_use_mccp)
|
||||
AC_SUBST(cdef_use_pcre)
|
||||
AC_SUBST(cdef_use_builtin_pcre)
|
||||
AC_SUBST(cdef_use_deprecated)
|
||||
AC_SUBST(cdef_use_structs)
|
||||
AC_SUBST(cdef_use_tls)
|
||||
|
|
|
@ -383,11 +383,6 @@
|
|||
*/
|
||||
@cdef_use_pcre@ USE_PCRE
|
||||
|
||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
||||
* is disabled as a whole).
|
||||
*/
|
||||
@cdef_use_builtin_pcre@ USE_BUILTIN_PCRE
|
||||
|
||||
/* Define this if you want iksemel library support.
|
||||
*/
|
||||
@cdef_use_iksemel@ USE_IKSEMEL
|
||||
|
|
|
@ -337,14 +337,8 @@
|
|||
|
||||
/* Define this if you want PCRE instead of traditional regexps.
|
||||
*/
|
||||
|
||||
#define USE_PCRE
|
||||
|
||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
||||
* is disabled as a whole).
|
||||
*/
|
||||
#undef USE_BUILTIN_PCRE
|
||||
|
||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||
*/
|
||||
#define USE_MCCP
|
||||
|
|
|
@ -103,17 +103,17 @@ endif
|
|||
interpret.c lex.c main.c mapping.c md5.c mempools.c mregex.c \
|
||||
mstrings.c \
|
||||
object.c otable.c parser.c parse.c \
|
||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pcre.c pkg-pgsql.c \
|
||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pgsql.c \
|
||||
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c random.c \
|
||||
regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||
strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
||||
|
||||
# Generated source files (overlaps with of SRCS) which need to be
|
||||
# present for mkdepend to work.
|
||||
GENSRCS = make_func.c stdstrings.c lang.c instrs.h pcre/chartables.c
|
||||
GENSRCS = make_func.c stdstrings.c lang.c instrs.h
|
||||
|
||||
# Macros for MkDepend:
|
||||
SKELETON = $(SRCS) mkfunc.c pcre/dftables.c
|
||||
SKELETON = $(SRCS) mkfunc.c
|
||||
EXCEPT = -x efun_defs.c
|
||||
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
||||
|
||||
|
@ -279,11 +279,10 @@ endif
|
|||
|
||||
clean :: FORCE
|
||||
-rm -f $(YACCTAB)h $(YACCTAB)c make_func.c $(OBJ)/mkfunc
|
||||
-rm -f $(OBJ)/dftables pcre/chartables.c
|
||||
-rm -f *~ efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||
-rm -f stdstrings.c stdstrings.h
|
||||
-rm -f hosts/*~ hosts/*/*~ bugs/*~ done/*~ $(OBJ)/*.o
|
||||
-rm -f pcre/*~ wk/*~ settings/*~
|
||||
-rm -f wk/*~ settings/*~
|
||||
|
||||
cleanall :: clean
|
||||
-rm -f $(OBJ_CROSS)/*.o
|
||||
|
@ -318,7 +317,7 @@ depend-generic: $(SRCS) $(GENSRCS) $(OBJ)
|
|||
|
||||
AMIGASRCS:=
|
||||
|
||||
# Special rules for making mkfunc and dftables, depending on whether we're
|
||||
# Special rules for making mkfunc, depending on whether we're
|
||||
# crosscompiling or not.
|
||||
|
||||
make_func.c : make_func.y
|
||||
|
@ -338,19 +337,6 @@ $(OBJ_NATIVE)/mkfunc :
|
|||
make CPU=$(NATIVE) $@
|
||||
endif
|
||||
|
||||
$(OBJ)/dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
||||
$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJ)/dftables : $(OBJ)/dftables.o
|
||||
$(LD) -o $@ $^ $(LDFLAGS)
|
||||
$(MIMESET) -f $@
|
||||
|
||||
ifneq ($(CPU), $(NATIVE))
|
||||
$(OBJ_NATIVE)/dftables :
|
||||
@[ -d $(OBJ_NATIVE) ] || mkdir $(OBJ_NATIVE) > /dev/null 2>&1
|
||||
make CPU=$(NATIVE) $@
|
||||
endif
|
||||
|
||||
# The making of the compiler and associated files.
|
||||
|
||||
efun_defs.c instrs.h : func_spec config.h $(OBJ_NATIVE)/mkfunc
|
||||
|
@ -370,9 +356,6 @@ lang.c lang.h : lang.y
|
|||
mv $(YACCTAB)c lang.c
|
||||
mv $(YACCTAB)h lang.h
|
||||
|
||||
pcre/chartables.c : $(OBJ_NATIVE)/dftables
|
||||
$(OBJ_NATIVE)/dftables > pcre/chartables.c
|
||||
|
||||
# Be Resource Mangling
|
||||
|
||||
$(RSRC_FULL) : hosts/be/driver.r hosts/be/icon-32x32.raw hosts/be/icon-16x16.raw
|
||||
|
@ -544,7 +527,7 @@ $(OBJ)/mkfunc.o : make_func.c ../mudlib/sys/driver_hook.h hash.h exec.h \
|
|||
$(OBJ)/mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
||||
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
||||
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h pcre/pcre.h \
|
||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h \
|
||||
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
||||
machine.h
|
||||
|
||||
|
@ -580,9 +563,6 @@ $(OBJ)/parser.o : lang.c ../mudlib/sys/driver_hook.h xalloc.h wiz_list.h \
|
|||
typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h bytecode.h \
|
||||
port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
$(OBJ)/dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h \
|
||||
pcre/config.h
|
||||
|
||||
$(OBJ)/pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h \
|
||||
mstrings.h main.h interpret.h array.h my-alloca.h pkg-alists.h \
|
||||
typedefs.h driver.h closure.h strfuns.h sent.h bytecode.h hash.h \
|
||||
|
@ -603,12 +583,6 @@ $(OBJ)/pkg-mysql.o : xalloc.h svalue.h stdstrings.h simulate.h mstrings.h \
|
|||
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
$(OBJ)/pkg-pcre.o : pcre/study.c pcre/maketables.c pcre/get.c pcre/pcre.c \
|
||||
simulate.h interpret.h pkg-pcre.h driver.h pcre/internal.h \
|
||||
pcre/chartables.c svalue.h strfuns.h sent.h bytecode.h typedefs.h \
|
||||
backend.h pcre/pcre.h port.h config.h pcre/config.h main.h hosts/unix.h \
|
||||
hosts/be/be.h machine.h
|
||||
|
||||
$(OBJ)/pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
||||
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
||||
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
||||
|
@ -642,7 +616,7 @@ $(OBJ)/random.o : random.h driver.h port.h config.h hosts/unix.h \
|
|||
|
||||
$(OBJ)/regexp.o : main.h xalloc.h simulate.h interpret.h regexp.h driver.h \
|
||||
typedefs.h svalue.h strfuns.h sent.h bytecode.h backend.h pkg-pcre.h \
|
||||
port.h config.h pcre/pcre.h hosts/unix.h hosts/be/be.h machine.h
|
||||
port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
$(OBJ)/sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
||||
hosts/be/be.h machine.h
|
||||
|
|
|
@ -325,11 +325,6 @@
|
|||
*/
|
||||
#define USE_PCRE
|
||||
|
||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
||||
* is disabled as a whole).
|
||||
*/
|
||||
#undef USE_BUILTIN_PCRE
|
||||
|
||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||
*/
|
||||
#define USE_MCCP
|
||||
|
|
|
@ -72,14 +72,14 @@ MFLAGS = "BINDIR=$(BINDIR)" "MUD_LIB=$(MUD_LIB)"
|
|||
SRC = access_check.c actions.c array.c backend.c call_out.c closure.c comm.c \
|
||||
dumpstat.c ed.c efuns.c gcollect.c hash.c heartbeat.c interpret.c \
|
||||
parser.c lex.c main.c mapping.c mempools.c object.c otable.c parse.c \
|
||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pcre.c pkg-pgsql.c \
|
||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pgsql.c \
|
||||
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c md5.c \
|
||||
random.c regexp.c mregex.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||
stralloc.c strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
||||
OBJ = access_check.o actions.o array.o backend.o call_out.o closure.o comm.o \
|
||||
dumpstat.o ed.o efuns.o gcollect.o hash.o heartbeat.o interpret.o \
|
||||
parser.o lex.o main.o mapping.o mempools.o object.o otable.o parse.o \
|
||||
pkg-alists.o pkg-mccp.o pkg-mysql.o pkg-pcre.o pkg-pgsql.o \
|
||||
pkg-alists.o pkg-mccp.o pkg-mysql.o pkg-pgsql.o \
|
||||
pkg-sqlite.o pkg-tls.o ptmalloc.o port.o ptrtable.o md5.o\
|
||||
random.o regexp.o mregex.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
||||
stralloc.o strfuns.o structs.o sprintf.o swap.o wiz_list.o xalloc.o
|
||||
|
@ -106,7 +106,6 @@ lint: *.c
|
|||
|
||||
clean:
|
||||
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc.exe
|
||||
$(RM) dftables.exe pcre/chartables.c
|
||||
$(RM) stdstrings.c stdstrings.h
|
||||
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||
$(RM) parse core mudlib/core mudlib/debug.log lpmud.log ldmud
|
||||
|
@ -134,12 +133,6 @@ mkfunc.o : mkfunc.c make_func.c driver.h config.h machine.h port.h
|
|||
mkfunc: mkfunc.o hash.o exec.h
|
||||
$(CC) $(OPTIMIZE) $(LDFLAGS) mkfunc.o hash.o -o mkfunc
|
||||
|
||||
dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
||||
$(CC) $(CFLAGS) -c pcre/dftables.c -o dftables.o
|
||||
|
||||
dftables : dftables.o
|
||||
$(CC) $(OPTIMIZE) (LDFLAGS) dftables.o -o dftables
|
||||
|
||||
lang.y: mkfunc prolang.y config.h
|
||||
$(RM) lang.y
|
||||
./mkfunc lang
|
||||
|
@ -160,9 +153,6 @@ lang.c lang.h: lang.y
|
|||
|
||||
$(MV) $(YACCTAB)h lang.h
|
||||
|
||||
pcre/chartables.c : dftables
|
||||
./dftables > pcre/chartables.c
|
||||
|
||||
#--------------------------------------------------------
|
||||
# Dependencies, manual and automatic.
|
||||
|
||||
|
@ -271,7 +261,7 @@ lex.o : efun_defs.c ../mudlib/sys/driver_hook.h xalloc.h wiz_list.h \
|
|||
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
main.o : xalloc.h wiz_list.h swap.h svalue.h stdstrings.h simul_efun.h \
|
||||
simulate.h rxcache.h random.h pcre/pcre.h patchlevel.h otable.h \
|
||||
simulate.h rxcache.h random.h patchlevel.h otable.h \
|
||||
object.h mstrings.h mapping.h lex.h interpret.h gcollect.h filestat.h \
|
||||
comm.h array.h backend.h main.h my-alloca.h typedefs.h driver.h \
|
||||
ptrtable.h exec.h strfuns.h sent.h regexp.h instrs.h port.h config.h \
|
||||
|
@ -323,20 +313,6 @@ parser.o : lang.c pkg-alists.h ../mudlib/sys/driver_hook.h xalloc.h \
|
|||
my-alloca.h typedefs.h driver.h ptrtable.h strfuns.h sent.h port.h \
|
||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
pcre-get.o : pcre/get.c driver.h pcre/internal.h port.h config.h \
|
||||
pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
pcre-maketables.o : pcre/maketables.c driver.h pcre/internal.h port.h \
|
||||
config.h pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
pcre-pcre.o : pcre/pcre.c driver.h pcre/chartables.c pcre/internal.h port.h \
|
||||
config.h pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
pcre-study.o : pcre/study.c driver.h pcre/internal.h port.h config.h \
|
||||
pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
||||
dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h pcre/config.h
|
||||
|
||||
pkg-alists.o : xalloc.h svalue.h simulate.h mstrings.h main.h interpret.h \
|
||||
array.h my-alloca.h pkg-alists.h typedefs.h driver.h strfuns.h sent.h \
|
||||
exec.h instrs.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||
|
|
|
@ -328,14 +328,8 @@
|
|||
|
||||
/* Define this if you want PCRE instead of traditional regexps.
|
||||
*/
|
||||
|
||||
#define USE_PCRE
|
||||
|
||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
||||
* is disabled as a whole).
|
||||
*/
|
||||
#undef USE_BUILTIN_PCRE
|
||||
|
||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||
*/
|
||||
#define USE_MCCP
|
||||
|
|
|
@ -209,9 +209,6 @@ rx_pcre_version (void)
|
|||
{
|
||||
static char buf[40];
|
||||
sprintf(buf, "%d.%d", PCRE_MAJOR, PCRE_MINOR);
|
||||
# ifdef USE_BUILTIN_PCRE
|
||||
strcat(buf, " (builtin)");
|
||||
# endif
|
||||
return buf;
|
||||
} /* rx_pcre_version() */
|
||||
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
|
@ -1,54 +0,0 @@
|
|||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission. In practice, this means that if you use
|
||||
PCRE in software that you distribute to others, commercially or
|
||||
otherwise, you must put a sentence like this
|
||||
|
||||
Regular expression support is provided by the PCRE library package,
|
||||
which is open source software, written by Philip Hazel, and copyright
|
||||
by the University of Cambridge, England.
|
||||
|
||||
somewhere reasonably visible in your documentation and in any relevant
|
||||
files or online help data or similar. A reference to the ftp site for
|
||||
the source, that is, to
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
|
||||
|
||||
should also be given in the documentation. However, this condition is not
|
||||
intended to apply to whole chains of software. If package A includes PCRE,
|
||||
it must acknowledge it, but if package B is software that includes package
|
||||
A, the condition is not imposed on package B (unless it uses PCRE
|
||||
independently).
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
|
||||
then the terms of that licence shall supersede any condition above with
|
||||
which it is incompatible.
|
||||
|
||||
The documentation for PCRE, supplied in the "doc" directory, is distributed
|
||||
under the same terms as the software itself.
|
||||
|
||||
End
|
1475
src/pcre/ChangeLog
1475
src/pcre/ChangeLog
File diff suppressed because it is too large
Load diff
|
@ -1,54 +0,0 @@
|
|||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission. In practice, this means that if you use
|
||||
PCRE in software that you distribute to others, commercially or
|
||||
otherwise, you must put a sentence like this
|
||||
|
||||
Regular expression support is provided by the PCRE library package,
|
||||
which is open source software, written by Philip Hazel, and copyright
|
||||
by the University of Cambridge, England.
|
||||
|
||||
somewhere reasonably visible in your documentation and in any relevant
|
||||
files or online help data or similar. A reference to the ftp site for
|
||||
the source, that is, to
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
|
||||
|
||||
should also be given in the documentation. However, this condition is not
|
||||
intended to apply to whole chains of software. If package A includes PCRE,
|
||||
it must acknowledge it, but if package B is software that includes package
|
||||
A, the condition is not imposed on package B (unless it uses PCRE
|
||||
independently).
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
|
||||
then the terms of that licence shall supersede any condition above with
|
||||
which it is incompatible.
|
||||
|
||||
The documentation for PCRE, supplied in the "doc" directory, is distributed
|
||||
under the same terms as the software itself.
|
||||
|
||||
End
|
154
src/pcre/NEWS
154
src/pcre/NEWS
|
@ -1,154 +0,0 @@
|
|||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 4.5 01-Dec-03
|
||||
---------------------
|
||||
|
||||
Again mainly a bug-fix and tidying release, with only a couple of new features:
|
||||
|
||||
1. It's possible now to compile PCRE so that it does not use recursive
|
||||
function calls when matching. Instead it gets memory from the heap. This slows
|
||||
things down, but may be necessary on systems with limited stacks.
|
||||
|
||||
2. UTF-8 string checking has been tightened to reject overlong sequences and to
|
||||
check that a starting offset points to the start of a character. Failure of the
|
||||
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
|
||||
|
||||
3. PCRE can now be compiled for systems that use EBCDIC code.
|
||||
|
||||
|
||||
Release 4.4 21-Aug-03
|
||||
---------------------
|
||||
|
||||
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
|
||||
checks UTF-8 strings for validity by default. There is an option to suppress
|
||||
this, just in case anybody wants that teeny extra bit of performance.
|
||||
|
||||
|
||||
Releases 4.1 - 4.3
|
||||
------------------
|
||||
|
||||
Sorry, I forgot about updating the NEWS file for these releases. Please take a
|
||||
look at ChangeLog.
|
||||
|
||||
|
||||
Release 4.0 17-Feb-03
|
||||
---------------------
|
||||
|
||||
There have been a lot of changes for the 4.0 release, adding additional
|
||||
functionality and mending bugs. Below is a list of the highlights of the new
|
||||
functionality. For full details of these features, please consult the
|
||||
documentation. For a complete list of changes, see the ChangeLog file.
|
||||
|
||||
1. Support for Perl's \Q...\E escapes.
|
||||
|
||||
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
|
||||
package. They provide some syntactic sugar for simple cases of "atomic
|
||||
grouping".
|
||||
|
||||
3. Support for the \G assertion. It is true when the current matching position
|
||||
is at the start point of the match.
|
||||
|
||||
4. A new feature that provides some of the functionality that Perl provides
|
||||
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
|
||||
is for the caller to provide an optional function, by setting pcre_callout to
|
||||
its entry point. To get the function called, the regex must include (?C) at
|
||||
appropriate points.
|
||||
|
||||
5. Support for recursive calls to individual subpatterns. This makes it really
|
||||
easy to get totally confused.
|
||||
|
||||
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
|
||||
name a group.
|
||||
|
||||
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
|
||||
option for pcregrep to make it operate in UTF-8 mode.
|
||||
|
||||
8. The single man page has been split into a number of separate man pages.
|
||||
These also give rise to individual HTML pages which are put in a separate
|
||||
directory. There is an index.html page that lists them all. Some hyperlinking
|
||||
between the pages has been installed.
|
||||
|
||||
|
||||
Release 3.5 15-Aug-01
|
||||
---------------------
|
||||
|
||||
1. The configuring system has been upgraded to use later versions of autoconf
|
||||
and libtool. By default it builds both a shared and a static library if the OS
|
||||
supports it. You can use --disable-shared or --disable-static on the configure
|
||||
command if you want only one of them.
|
||||
|
||||
2. The pcretest utility is now installed along with pcregrep because it is
|
||||
useful for users (to test regexs) and by doing this, it automatically gets
|
||||
relinked by libtool. The documentation has been turned into a man page, so
|
||||
there are now .1, .txt, and .html versions in /doc.
|
||||
|
||||
3. Upgrades to pcregrep:
|
||||
(i) Added long-form option names like gnu grep.
|
||||
(ii) Added --help to list all options with an explanatory phrase.
|
||||
(iii) Added -r, --recursive to recurse into sub-directories.
|
||||
(iv) Added -f, --file to read patterns from a file.
|
||||
|
||||
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
|
||||
script, to force use of CR or LF instead of \n in the source. On non-Unix
|
||||
systems, the value can be set in config.h.
|
||||
|
||||
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
|
||||
absolute limit. Changed the text of the error message to make this clear, and
|
||||
likewise updated the man page.
|
||||
|
||||
6. The limit of 99 on the number of capturing subpatterns has been removed.
|
||||
The new limit is 65535, which I hope will not be a "real" limit.
|
||||
|
||||
|
||||
Release 3.3 01-Aug-00
|
||||
---------------------
|
||||
|
||||
There is some support for UTF-8 character strings. This is incomplete and
|
||||
experimental. The documentation describes what is and what is not implemented.
|
||||
Otherwise, this is just a bug-fixing release.
|
||||
|
||||
|
||||
Release 3.0 01-Feb-00
|
||||
---------------------
|
||||
|
||||
1. A "configure" script is now used to configure PCRE for Unix systems. It
|
||||
builds a Makefile, a config.h file, and the pcre-config script.
|
||||
|
||||
2. PCRE is built as a shared library by default.
|
||||
|
||||
3. There is support for POSIX classes such as [:alpha:].
|
||||
|
||||
5. There is an experimental recursion feature.
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
|
||||
|
||||
Please note that there has been a change in the API such that a larger
|
||||
ovector is required at matching time, to provide some additional workspace.
|
||||
The new man page has details. This change was necessary in order to support
|
||||
some of the new functionality in Perl 5.005.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
|
||||
|
||||
Another (I hope this is the last!) change has been made to the API for the
|
||||
pcre_compile() function. An additional argument has been added to make it
|
||||
possible to pass over a pointer to character tables built in the current
|
||||
locale by pcre_maketables(). To use the default tables, this new arguement
|
||||
should be passed as NULL.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||
|
||||
Yet another (and again I hope this really is the last) change has been made
|
||||
to the API for the pcre_exec() function. An additional argument has been
|
||||
added to make it possible to start the match other than at the start of the
|
||||
subject string. This is important if there are lookbehinds. The new man
|
||||
page has the details, but you just want to convert existing programs, all
|
||||
you need to do is to stick in a new fifth argument to pcre_exec(), with a
|
||||
value of zero. For example, change
|
||||
|
||||
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
|
||||
to
|
||||
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
|
||||
|
||||
****
|
365
src/pcre/README
365
src/pcre/README
|
@ -1,365 +0,0 @@
|
|||
README file for PCRE (Perl-compatible regular expression library)
|
||||
-----------------------------------------------------------------
|
||||
|
||||
The latest release of PCRE is always available from
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
|
||||
PCRE has its own native API, but a set of "wrapper" functions that are based on
|
||||
the POSIX API are also supplied in the library libpcreposix. Note that this
|
||||
just provides a POSIX calling interface to PCRE: the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The header file
|
||||
for the POSIX-style functions is called pcreposix.h. The official POSIX name is
|
||||
regex.h, but I didn't want to risk possible problems with existing files of
|
||||
that name by distributing it that way. To use it with an existing program that
|
||||
uses the POSIX API, it will have to be renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
||||
library installed on your system, you must take care when linking programs to
|
||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
||||
up the "real" POSIX functions of the same name.
|
||||
|
||||
|
||||
Contributions by users of PCRE
|
||||
------------------------------
|
||||
|
||||
You can find contributions from PCRE users in the directory
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
where there is also a README file giving brief descriptions of what they are.
|
||||
Several of them provide support for compiling PCRE on various flavours of
|
||||
Windows systems (I myself do not use Windows). Some are complete in themselves;
|
||||
others are pointers to URLs containing relevant files.
|
||||
|
||||
|
||||
Building PCRE on a Unix-like system
|
||||
-----------------------------------
|
||||
|
||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
||||
PCRE distribution directory, with your current directory set to the directory
|
||||
where you want the files to be created. This command is a standard GNU
|
||||
"autoconf" configuration script, for which generic instructions are supplied in
|
||||
INSTALL.
|
||||
|
||||
Most commonly, people build PCRE within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient, but the
|
||||
usual methods of changing standard defaults are available. For example,
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
specifies that the C compiler should be run with the flags '-O2 -Wall' instead
|
||||
of the default, and that "make install" should install PCRE under /opt/local
|
||||
instead of the default /usr/local.
|
||||
|
||||
If you want to build in a different directory, just run "configure" with that
|
||||
directory as current. For example, suppose you have unpacked the PCRE source
|
||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
||||
|
||||
cd /build/pcre/pcre-xxx
|
||||
/source/pcre/pcre-xxx/configure
|
||||
|
||||
There are some optional features that can be included or omitted from the PCRE
|
||||
library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
. If you want to make use of the support for UTF-8 character strings in PCRE,
|
||||
you must add --enable-utf8 to the "configure" command. Without it, the code
|
||||
for handling UTF-8 is not included in the library. (Even when included, it
|
||||
still has to be enabled by an option at run time.)
|
||||
|
||||
. You can build PCRE to recognized CR or NL as the newline character, instead
|
||||
of whatever your compiler uses for "\n", by adding --newline-is-cr or
|
||||
--newline-is-nl to the "configure" command, respectively. Only do this if you
|
||||
really understand what you are doing. On traditional Unix-like systems, the
|
||||
newline character is NL.
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
them. You can increase this threshold by setting, for example,
|
||||
|
||||
--with-posix-malloc-threshold=20
|
||||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter which can be set to limit the amount of resources it uses.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre_exec() can supply their own value. There is discussion on the pcreapi
|
||||
man page.
|
||||
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. You can
|
||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
||||
ever to be necessary. If you build PCRE with an increased link size, test 2
|
||||
(and 5 if you are using UTF-8) will fail. Part of the output of these tests
|
||||
is a representation of the compiled pattern, and this changes with the link
|
||||
size.
|
||||
|
||||
. You can build PCRE so that its match() function does not call itself
|
||||
recursively. Instead, it uses blocks of data from the heap via special
|
||||
functions pcre_stack_malloc() and pcre_stack_free() to save data that would
|
||||
otherwise be saved on the stack. To build PCRE like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||
necessary in environments with limited stack sizes.
|
||||
|
||||
The "configure" script builds five files:
|
||||
|
||||
. libtool is a script that builds shared and/or static libraries
|
||||
. Makefile is built by copying Makefile.in and making substitutions.
|
||||
. config.h is built by copying config.in and making substitutions.
|
||||
. pcre-config is built by copying pcre-config.in and making substitutions.
|
||||
. RunTest is a script for running tests
|
||||
|
||||
Once "configure" has run, you can run "make". It builds two libraries called
|
||||
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
|
||||
command. You can use "make install" to copy these, the public header files
|
||||
pcre.h and pcreposix.h, and the man pages to appropriate live directories on
|
||||
your system, in the normal way.
|
||||
|
||||
Running "make install" also installs the command pcre-config, which can be used
|
||||
to recall information about the PCRE configuration and installation. For
|
||||
example,
|
||||
|
||||
pcre-config --version
|
||||
|
||||
prints the version number, and
|
||||
|
||||
pcre-config --libs
|
||||
|
||||
outputs information about where the library is installed. This command can be
|
||||
included in makefiles for programs that use PCRE, saving the programmer from
|
||||
having to remember too many details.
|
||||
|
||||
|
||||
Shared libraries on Unix-like systems
|
||||
-------------------------------------
|
||||
|
||||
The default distribution builds PCRE as two shared libraries and two static
|
||||
libraries, as long as the operating system supports shared libraries. Shared
|
||||
library support relies on the "libtool" script which is built as part of the
|
||||
"configure" process.
|
||||
|
||||
The libtool script is used to compile and link both shared and static
|
||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
||||
built. The programs pcretest and pcregrep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcregrep and pcretest are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the source directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE using static libraries only you must use --disable-shared when
|
||||
configuring it. For example
|
||||
|
||||
./configure --prefix=/usr/gnu --disable-shared
|
||||
|
||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling on a Unix-like system
|
||||
-------------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE for some other host. However, during the building
|
||||
process, the dftables.c source file is compiled *and run* on the local host, in
|
||||
order to generate the default character tables (the chartables.c file). It
|
||||
therefore needs to be compiled with the local compiler, not the cross compiler.
|
||||
You can do this by specifying CC_FOR_BUILD (and if necessary CFLAGS_FOR_BUILD)
|
||||
when calling the "configure" command. If they are not specified, they default
|
||||
to the values of CC and CFLAGS.
|
||||
|
||||
|
||||
Building on non-Unix systems
|
||||
----------------------------
|
||||
|
||||
For a non-Unix system, read the comments in the file NON-UNIX-USE, though if
|
||||
the system supports the use of "configure" and "make" you may be able to build
|
||||
PCRE in the same way as for Unix systems.
|
||||
|
||||
PCRE has been compiled on Windows systems and on Macintoshes, but I don't know
|
||||
the details because I don't use those systems. It should be straightforward to
|
||||
build PCRE on any system that has a Standard C compiler, because it uses only
|
||||
Standard C functions.
|
||||
|
||||
|
||||
Testing PCRE
|
||||
------------
|
||||
|
||||
To test PCRE on a Unix system, run the RunTest script that is created by the
|
||||
configuring process. (This can also be run by "make runtest", "make check", or
|
||||
"make test".) For other systems, see the instructions in NON-UNIX-USE.
|
||||
|
||||
The script runs the pcretest test program (which is documented in its own man
|
||||
page) on each of the testinput files (in the testdata directory) in turn,
|
||||
and compares the output with the contents of the corresponding testoutput file.
|
||||
A file called testtry is used to hold the output from pcretest. To run pcretest
|
||||
on just one of the test files, give its number as an argument to RunTest, for
|
||||
example:
|
||||
|
||||
RunTest 2
|
||||
|
||||
The first file can also be fed directly into the perltest script to check that
|
||||
Perl gives the same results. The only difference you should see is in the first
|
||||
few lines, where the Perl version is given instead of the PCRE version.
|
||||
|
||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||
wrapper API. It also uses the debugging flag to check some of the internals of
|
||||
pcre_compile().
|
||||
|
||||
If you build PCRE with a locale setting that is not the standard C locale, the
|
||||
character tables may be different (see next paragraph). In some cases, this may
|
||||
cause failures in the second set of tests. For example, in a locale where the
|
||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
||||
[:isascii:] inside a character class defines a different set of characters, and
|
||||
this shows up in this test as a difference in the compiled code, which is being
|
||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
||||
bug in PCRE.
|
||||
|
||||
The third set of tests checks pcre_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
|
||||
running the test, the script checks for the presence of this locale by running
|
||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
|
||||
in the list of available locales, the third test cannot be run, and a comment
|
||||
is output to say why. If running this test produces instances of the error
|
||||
|
||||
** Failed to set locale "fr_FR"
|
||||
|
||||
in the comparison output, it means that locale is not available on your system,
|
||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||
|
||||
The fourth test checks the UTF-8 support. It is not run automatically unless
|
||||
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
|
||||
running "configure". This file can be also fed directly to the perltest script,
|
||||
provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch,
|
||||
commented in the script, can be be used.)
|
||||
|
||||
The fifth and final file tests error handling with UTF-8 encoding, and internal
|
||||
UTF-8 features of PCRE that are not relevant to Perl.
|
||||
|
||||
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
PCRE uses four tables for manipulating and identifying characters. The final
|
||||
argument of the pcre_compile() function is a pointer to a block of memory
|
||||
containing the concatenated tables. A call to pcre_maketables() can be used to
|
||||
generate a set of tables in the current locale. If the final argument for
|
||||
pcre_compile() is passed as NULL, a set of default tables that is built into
|
||||
the binary is used.
|
||||
|
||||
The source file called chartables.c contains the default set of tables. This is
|
||||
not supplied in the distribution, but is built by the program dftables
|
||||
(compiled from dftables.c), which uses the ANSI C character handling functions
|
||||
such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table
|
||||
sources. This means that the default C locale which is set for your system will
|
||||
control the contents of these default tables. You can change the default tables
|
||||
by editing chartables.c and then re-building PCRE. If you do this, you should
|
||||
probably also edit Makefile to ensure that the file doesn't ever get
|
||||
re-generated.
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes.
|
||||
|
||||
The final 256-byte table has bits indicating various character types, as
|
||||
follows:
|
||||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 decimal digit
|
||||
8 hexadecimal digit
|
||||
16 alphanumeric or '_'
|
||||
128 regular expression metacharacter or binary zero
|
||||
|
||||
You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE to malfunction.
|
||||
|
||||
|
||||
Manifest
|
||||
--------
|
||||
|
||||
The distribution should contain the following files:
|
||||
|
||||
(A) The actual source files of the PCRE library functions and their
|
||||
headers:
|
||||
|
||||
dftables.c auxiliary program for building chartables.c
|
||||
get.c )
|
||||
maketables.c )
|
||||
study.c ) source of
|
||||
pcre.c ) the functions
|
||||
pcreposix.c )
|
||||
printint.c )
|
||||
pcre.in "source" for the header for the external API; pcre.h
|
||||
is built from this by "configure"
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
internal.h header for internal use
|
||||
config.in template for config.h, which is built by configure
|
||||
|
||||
(B) Auxiliary files:
|
||||
|
||||
AUTHORS information about the author of PCRE
|
||||
ChangeLog log of changes to the code
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in template for Unix Makefile, which is built by configure
|
||||
NEWS important changes in this release
|
||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
||||
README this file
|
||||
RunTest.in template for a Unix shell script for running tests
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.in the autoconf input used to build configure
|
||||
doc/Tech.Notes notes on the encoding
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre.txt plain text version of the man pages
|
||||
doc/pcretest.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
ltmain.sh file used to build a libtool script
|
||||
pcretest.c comprehensive test program
|
||||
pcredemo.c simple demonstration of coding calls to PCRE
|
||||
perltest Perl test program
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
testdata/testinput1 test data, compatible with Perl
|
||||
testdata/testinput2 test data for error messages and non-Perl things
|
||||
testdata/testinput3 test data for locale-specific tests
|
||||
testdata/testinput4 test data for UTF-8 tests compatible with Perl
|
||||
testdata/testinput5 test data for other UTF-8 tests
|
||||
testdata/testoutput1 test results corresponding to testinput1
|
||||
testdata/testoutput2 test results corresponding to testinput2
|
||||
testdata/testoutput3 test results corresponding to testinput3
|
||||
testdata/testoutput4 test results corresponding to testinput4
|
||||
testdata/testoutput5 test results corresponding to testinput5
|
||||
|
||||
(C) Auxiliary files for Win32 DLL
|
||||
|
||||
dll.mk
|
||||
pcre.def
|
||||
|
||||
(D) Auxiliary file for VPASCAL
|
||||
|
||||
makevp.bat
|
||||
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
December 2003
|
|
@ -1,6 +0,0 @@
|
|||
This is PCRE, stripped down to what is required by the LDMud gamedriver.
|
||||
The files README and COPYING tell you where to get the complete package.
|
||||
|
||||
The file internal.h was modified so that it is protected against multiple
|
||||
inclusion.
|
||||
|
|
@ -1,107 +0,0 @@
|
|||
|
||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
||||
written in Standard C, but there are a few non-standard things it can cope
|
||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
On a non-Unix system you should just copy this file into config.h, and set up
|
||||
the macros the way you need them. You should normally change the definitions of
|
||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
||||
function will be used. */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use "configure",
|
||||
this can be done via --enable-ebcdic. */
|
||||
|
||||
#ifndef EBCDIC
|
||||
#define EBCDIC 0
|
||||
#endif
|
||||
|
||||
/* If you are compiling for a system that needs some magic to be inserted
|
||||
before the definition of an exported function, define this macro to contain the
|
||||
relevant magic. It apears at the start of every exported function. */
|
||||
|
||||
#define EXPORT
|
||||
|
||||
/* Define to empty if the "const" keyword does not work. */
|
||||
|
||||
/* #undef const */
|
||||
|
||||
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
|
||||
|
||||
/* #undef size_t */
|
||||
|
||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
||||
doesn't have the strerror() or memmove() functions that should be present in
|
||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
||||
normally be defined with the value 1 for other systems, but unfortunately we
|
||||
can't make this the default because "configure" files generated by autoconf
|
||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
||||
found. */
|
||||
|
||||
#define HAVE_STRERROR 1
|
||||
#define HAVE_MEMMOVE 1
|
||||
|
||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
||||
HAVE_BCOPY is not relevant. */
|
||||
|
||||
#define HAVE_BCOPY 1
|
||||
|
||||
/* The value of NEWLINE determines the newline character. The default is to
|
||||
leave it up to the compiler, but some sites want to force a particular value.
|
||||
On Unix systems, "configure" can be used to override this default. */
|
||||
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE '\n'
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
||||
override this default. */
|
||||
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
||||
function can be called during a single execution of pcre_exec(). (There is a
|
||||
runtime method of setting a different limit.) The limit exists in order to
|
||||
catch runaway regular expressions that take for ever to determine that they do
|
||||
not match. The default is set very large so that it does not accidentally catch
|
||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
||||
default default. */
|
||||
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE requires
|
||||
three integers per substring, whereas the POSIX interface provides only two. If
|
||||
the number of expected substrings is small, the wrapper function uses space on
|
||||
the stack, because this is faster than using malloc() for each call. The
|
||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
||||
*/
|
||||
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited size.
|
||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
||||
to get memory. For more detail, see comments and other stuff just above the
|
||||
match() function. On Unix systems, "configure" can be used to set this in the
|
||||
Makefile (use --disable-recursion). */
|
||||
|
||||
/* #define NO_RECURSE */
|
||||
|
||||
/* End */
|
|
@ -1,167 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
See the file Tech.Notes for some information on the internals.
|
||||
*/
|
||||
|
||||
|
||||
/* This is a support program to generate the file chartables.c, containing
|
||||
character tables of various kinds. They are built according to the default C
|
||||
locale and used as the default tables by PCRE. Now that pcre_maketables is
|
||||
a function visible to the outside world, we make use of its code from here in
|
||||
order to be consistent. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
#define DFTABLES /* maketables.c notices this */
|
||||
#include "maketables.c"
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
FILE *f;
|
||||
const unsigned char *tables = pcre_maketables();
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen(argv[1], "w");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
||||
about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file is automatically written by the dftables auxiliary \n"
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
fprintf(f,
|
||||
"This file is #included in the compilation of pcre.c to build the default\n"
|
||||
"character tables which are used when no tables are passed to the compile\n"
|
||||
"function. */\n\n"
|
||||
"static unsigned char pcre_default_tables[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table contains bit maps for various character classes.\n"
|
||||
"Each map is 32 bytes long and the bits run from the least\n"
|
||||
"significant end of each byte. The classes that have their own\n"
|
||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x hexadecimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n"
|
||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||
ctype_meta);
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
349
src/pcre/get.c
349
src/pcre/get.c
|
@ -1,349 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
This is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. See
|
||||
the file Tech.Notes for some information on the internals.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer <scottw@cgibuilder.com>. */
|
||||
|
||||
|
||||
/* Include the internals header, which itself includes Standard C headers plus
|
||||
the external pcre header. */
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the two extraction functions below, as well
|
||||
as being generally available.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(char *);
|
||||
int double_count = stringcount * 2;
|
||||
char **stringlist;
|
||||
char *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||
|
||||
stringlist = (char **)(pcre_malloc)(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = (const char **)stringlist;
|
||||
p = (char *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], len);
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
int yield;
|
||||
char *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (char *)(pcre_malloc)(yield + 1);
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||
substring[yield] = 0;
|
||||
*stringptr = substring;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of get.c */
|
|
@ -1,682 +0,0 @@
|
|||
#ifndef PCRE_INTERNAL
|
||||
#define PCRE_INTERNAL
|
||||
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
|
||||
/* This is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. See
|
||||
the file Tech.Notes for some information on the internals.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This header contains definitions that are shared between the different
|
||||
modules, but which are not relevant to the outside. */
|
||||
|
||||
/* Get the definitions provided by running "configure" */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* Standard C headers plus the external interface definition. The only time
|
||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef PCRE_SPY
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
#endif
|
||||
|
||||
#include "pcre.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). This assumes
|
||||
that all calls to memmove are moving strings upwards in store, which is the
|
||||
case in PCRE. */
|
||||
|
||||
#if ! HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#if HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
{
|
||||
int i;
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities by default.
|
||||
These are used, for example, to link from the start of a subpattern to its
|
||||
alternatives and its end. The use of 2 bytes per offset limits the size of the
|
||||
compiled regex to around 64K, which is big enough for almost everybody.
|
||||
However, I received a request for an even bigger limit. For this reason, and
|
||||
also to make the code easier to maintain, the storing and loading of offsets
|
||||
from the byte string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
||||
the config.h file, but can be overridden by using -D on the command line. This
|
||||
is automated on Unix systems via the "configure" command. */
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 8), \
|
||||
(a[(n)+1] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 16), \
|
||||
(a[(n)+1] = (d) >> 8), \
|
||||
(a[(n)+2] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 24), \
|
||||
(a[(n)+1] = (d) >> 16), \
|
||||
(a[(n)+2] = (d) >> 8), \
|
||||
(a[(n)+3] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be either 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* Convenience macro defined in terms of the others */
|
||||
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
|
||||
|
||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
||||
offsets changes. There are used for repeat counts and for other things such as
|
||||
capturing parenthesis numbers in back references. */
|
||||
|
||||
#define PUT2(a,n,d) \
|
||||
a[n] = (d) >> 8; \
|
||||
a[(n)+1] = (d) & 255
|
||||
|
||||
#define GET2(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
||||
|
||||
|
||||
/* In case there is no definition of offsetof() provided - though any proper
|
||||
Standard C system should have one. */
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
||||
#endif
|
||||
|
||||
/* These are the public options that can change during matching. */
|
||||
|
||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||
|
||||
/* Private options flags start at the most significant end of the four bytes,
|
||||
but skip the top bit so we can use ints for convenience without getting tangled
|
||||
with negative values. The public options defined in pcre.h start at the least
|
||||
significant end. Make sure they don't overlap, though now that we have expanded
|
||||
to four bytes there is plenty of space. */
|
||||
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
||||
|
||||
/* Masks for identifying the public options which are permitted at compile
|
||||
time, run time or study time, respectively. */
|
||||
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* Negative values for the firstchar and reqchar variables */
|
||||
|
||||
#define REQ_UNSET (-2)
|
||||
#define REQ_NONE (-1)
|
||||
|
||||
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
||||
variable-length repeat, or a anything other than literal characters. */
|
||||
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
/* Escape items that are just an encoding of a particular data value. Note that
|
||||
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
||||
(the default) or \r (which some people want). */
|
||||
|
||||
#ifndef ESC_e
|
||||
#define ESC_e 27
|
||||
#endif
|
||||
|
||||
#ifndef ESC_f
|
||||
#define ESC_f '\f'
|
||||
#endif
|
||||
|
||||
#ifndef ESC_n
|
||||
#define ESC_n NEWLINE
|
||||
#endif
|
||||
|
||||
#ifndef ESC_r
|
||||
#define ESC_r '\r'
|
||||
#endif
|
||||
|
||||
/* We can't officially use ESC_t because it is a POSIX reserved identifier
|
||||
(presumably because of all the others like size_t). */
|
||||
|
||||
#ifndef ESC_tee
|
||||
#define ESC_tee '\t'
|
||||
#endif
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||
corresponds to "." rather than an escape sequence. The final one must be
|
||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
||||
detect the types that may be repeated. These are the types that consume a
|
||||
character. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
Note that whenever this list is updated, the two macro definitions that follow
|
||||
must also be updated to match. */
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
|
||||
/* Values corresponding to backslashed metacharacters */
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
||||
OP_WORD_BOUNDARY, /* 4 \b */
|
||||
OP_NOT_DIGIT, /* 5 \D */
|
||||
OP_DIGIT, /* 6 \d */
|
||||
OP_NOT_WHITESPACE, /* 7 \S */
|
||||
OP_WHITESPACE, /* 8 \s */
|
||||
OP_NOT_WORDCHAR, /* 9 \W */
|
||||
OP_WORDCHAR, /* 10 \w */
|
||||
OP_ANY, /* 11 Match any character */
|
||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_EODN, /* 13 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 14 End of data: \z */
|
||||
|
||||
OP_OPT, /* 15 Set runtime options */
|
||||
OP_CIRC, /* 16 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 17 End of line - varies with multiline switch */
|
||||
OP_CHARS, /* 18 Match string of characters */
|
||||
OP_NOT, /* 19 Match anything but the following char */
|
||||
|
||||
OP_STAR, /* 20 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 21 all these opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 22 the minimizing one second. */
|
||||
OP_MINPLUS, /* 23 This first set applies to single characters */
|
||||
OP_QUERY, /* 24 */
|
||||
OP_MINQUERY, /* 25 */
|
||||
OP_UPTO, /* 26 From 0 to n matches */
|
||||
OP_MINUPTO, /* 27 */
|
||||
OP_EXACT, /* 28 Exactly n matches */
|
||||
|
||||
OP_NOTSTAR, /* 29 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 30 all these opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 31 the minimizing one second. */
|
||||
OP_NOTMINPLUS, /* 32 This set applies to "not" single characters */
|
||||
OP_NOTQUERY, /* 33 */
|
||||
OP_NOTMINQUERY, /* 34 */
|
||||
OP_NOTUPTO, /* 35 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 36 */
|
||||
OP_NOTEXACT, /* 37 Exactly n matches */
|
||||
|
||||
OP_TYPESTAR, /* 38 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 39 all these opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 40 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 41 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 42 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 43 */
|
||||
OP_TYPEUPTO, /* 44 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 45 */
|
||||
OP_TYPEEXACT, /* 46 Exactly n matches */
|
||||
|
||||
OP_CRSTAR, /* 47 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 48 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 49 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 50 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 51 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 52 */
|
||||
OP_CRRANGE, /* 53 These are different to the three seta above. */
|
||||
OP_CRMINRANGE, /* 54 */
|
||||
|
||||
OP_CLASS, /* 55 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 56 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a UTF-8
|
||||
character > 255 is encountered. */
|
||||
|
||||
OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the
|
||||
class. This does both positive and negative. */
|
||||
|
||||
OP_REF, /* 58 Match a back reference */
|
||||
OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 60 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 61 Start of alternation */
|
||||
OP_KET, /* 62 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 63 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before ONCE and COND */
|
||||
|
||||
OP_ASSERT, /* 65 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 66 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 67 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
|
||||
OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
||||
a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 70 Once matched, don't back up into the subpattern */
|
||||
OP_COND, /* 71 Conditional group */
|
||||
OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */
|
||||
|
||||
OP_BRAZERO, /* 73 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 74 order. */
|
||||
|
||||
OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater
|
||||
than can fit into an opcode. */
|
||||
|
||||
OP_BRA /* 76 This and greater values are used for brackets that
|
||||
extract substrings up to a basic limit. After that,
|
||||
use is made of OP_BRANUMBER. */
|
||||
};
|
||||
|
||||
/* WARNING: There is an implicit assumption in study.c that all opcodes are
|
||||
less than 128 in value. This makes handling UTF-8 character sequences easier.
|
||||
*/
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. There are used only
|
||||
for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The
|
||||
macro is referenced only in printint.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \
|
||||
"Opt", "^", "$", "chars", "not", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
|
||||
"Brazero", "Braminzero", "Branumber", "Bra"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
regex. The lengths are used when searching for specific things, and also in the
|
||||
debugging printing of a compiled regex. We use a macro so that it can be
|
||||
incorporated both into pcre.c and pcretest.c without being publicly exposed.
|
||||
|
||||
As things have been extended, some of these are no longer fixed lenths, but are
|
||||
minima instead. For example, the length of a single-character repeat may vary
|
||||
in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Chars - the minimum length */ \
|
||||
2, /* not */ \
|
||||
/* Positive single-char repeats ** These are */ \
|
||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||
4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \
|
||||
/* Negative single-char repeats - only for chars < 256 */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* NOT upto, minupto, exact */ \
|
||||
/* Positive type repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* Type upto, minupto, exact */ \
|
||||
/* Character class & ref repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
5, 5, /* CRRANGE, CRMINRANGE */ \
|
||||
33, /* CLASS */ \
|
||||
33, /* NCLASS */ \
|
||||
0, /* XCLASS - variable length */ \
|
||||
3, /* REF */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
2, /* CALLOUT */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
1+LINK_SIZE, /* KetRmin */ \
|
||||
1+LINK_SIZE, /* Assert */ \
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* Reverse */ \
|
||||
1+LINK_SIZE, /* Once */ \
|
||||
1+LINK_SIZE, /* COND */ \
|
||||
3, /* CREF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
3, /* BRANUMBER */ \
|
||||
1+LINK_SIZE /* BRA */ \
|
||||
|
||||
|
||||
/* The highest extraction number before we have to start using additional
|
||||
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
||||
opcodes. */
|
||||
|
||||
#define EXTRACT_BASIC_MAX 150
|
||||
|
||||
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
||||
|
||||
#define CREF_RECURSE 0xffff
|
||||
|
||||
/* The texts of compile-time error messages are defined as macros here so that
|
||||
they can be accessed by the POSIX wrapper and converted into error codes. Yes,
|
||||
I could have used error codes in the first place, but didn't feel like changing
|
||||
just to accommodate the POSIX wrapper. */
|
||||
|
||||
#define ERR1 "\\ at end of pattern"
|
||||
#define ERR2 "\\c at end of pattern"
|
||||
#define ERR3 "unrecognized character follows \\"
|
||||
#define ERR4 "numbers out of order in {} quantifier"
|
||||
#define ERR5 "number too big in {} quantifier"
|
||||
#define ERR6 "missing terminating ] for character class"
|
||||
#define ERR7 "invalid escape sequence in character class"
|
||||
#define ERR8 "range out of order in character class"
|
||||
#define ERR9 "nothing to repeat"
|
||||
#define ERR10 "operand of unlimited repeat could match the empty string"
|
||||
#define ERR11 "internal error: unexpected repeat"
|
||||
#define ERR12 "unrecognized character after (?"
|
||||
#define ERR13 "POSIX named classes are supported only within a class"
|
||||
#define ERR14 "missing )"
|
||||
#define ERR15 "reference to non-existent subpattern"
|
||||
#define ERR16 "erroffset passed as NULL"
|
||||
#define ERR17 "unknown option bit(s) set"
|
||||
#define ERR18 "missing ) after comment"
|
||||
#define ERR19 "parentheses nested too deeply"
|
||||
#define ERR20 "regular expression too large"
|
||||
#define ERR21 "failed to get memory"
|
||||
#define ERR22 "unmatched parentheses"
|
||||
#define ERR23 "internal error: code overflow"
|
||||
#define ERR24 "unrecognized character after (?<"
|
||||
#define ERR25 "lookbehind assertion is not fixed length"
|
||||
#define ERR26 "malformed number after (?("
|
||||
#define ERR27 "conditional group contains more than two branches"
|
||||
#define ERR28 "assertion expected after (?("
|
||||
#define ERR29 "(?R or (?digits must be followed by )"
|
||||
#define ERR30 "unknown POSIX class name"
|
||||
#define ERR31 "POSIX collating elements are not supported"
|
||||
#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
|
||||
#define ERR33 "spare error"
|
||||
#define ERR34 "character value in \\x{...} sequence is too large"
|
||||
#define ERR35 "invalid condition (?(0)"
|
||||
#define ERR36 "\\C not allowed in lookbehind assertion"
|
||||
#define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
|
||||
#define ERR38 "number after (?C is > 255"
|
||||
#define ERR39 "closing ) for (?C expected"
|
||||
#define ERR40 "recursive call could loop indefinitely"
|
||||
#define ERR41 "unrecognized character after (?P"
|
||||
#define ERR42 "syntax error after (?P"
|
||||
#define ERR43 "two named groups have the same name"
|
||||
#define ERR44 "invalid UTF-8 string"
|
||||
|
||||
/* All character handling must be done as unsigned characters. Otherwise there
|
||||
are problems with top-bit-set characters and functions such as isspace().
|
||||
However, we leave the interface to the outside world as char *, because that
|
||||
should make things easier for callers. We define a short type for unsigned char
|
||||
to save lots of typing. I tried "uchar", but it causes problems on Digital
|
||||
Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. */
|
||||
|
||||
typedef struct real_pcre {
|
||||
unsigned long int magic_number;
|
||||
size_t size; /* Total that was malloced */
|
||||
const unsigned char *tables; /* Pointer to tables */
|
||||
unsigned long int options;
|
||||
unsigned short int top_bracket;
|
||||
unsigned short int top_backref;
|
||||
unsigned short int first_byte;
|
||||
unsigned short int req_byte;
|
||||
unsigned short int name_entry_size; /* Size of any name items; 0 => none */
|
||||
unsigned short int name_count; /* Number of name items */
|
||||
} real_pcre;
|
||||
|
||||
/* The format of the block used to store data from pcre_study(). */
|
||||
|
||||
typedef struct pcre_study_data {
|
||||
size_t size; /* Total that was malloced */
|
||||
uschar options;
|
||||
uschar start_bits[32];
|
||||
} pcre_study_data;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_data {
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *fcc; /* Points to case-flipping table */
|
||||
const uschar *cbits; /* Points to character type table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
const uschar *start_code; /* The start of the compiled code */
|
||||
uschar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
} compile_data;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
uschar *current;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern. */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
const uschar *save_start; /* Old value of md->start_match */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre.c, close to the match() function, so that it is easy to keep it in step
|
||||
with any changes of local variable. However, the pointer to the current frame
|
||||
must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure.
|
||||
NOTE: This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the matching, so that they are thread-safe. */
|
||||
|
||||
typedef struct match_data {
|
||||
unsigned long int match_call_count; /* As it says */
|
||||
unsigned long int match_limit;/* As it says */
|
||||
int *offset_vector; /* Offset vector */
|
||||
int offset_end; /* One past the end */
|
||||
int offset_max; /* The maximum usable for return data */
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
BOOL offset_overflow; /* Set if too many extractions */
|
||||
BOOL notbol; /* NOTBOL flag */
|
||||
BOOL noteol; /* NOTEOL flag */
|
||||
BOOL utf8; /* UTF8 flag */
|
||||
BOOL endonly; /* Dollar not before final \n */
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of the subject string */
|
||||
const uschar *start_match; /* Start of this match attempt */
|
||||
const uschar *end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
int start_offset; /* The start offset value */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
||||
} match_data;
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_digit 0x04
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphameric or '_' */
|
||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
of bits for a class map. Some classes are built by combining these tables. */
|
||||
|
||||
#define cbit_space 0 /* [:space:] or \s */
|
||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||
#define cbit_digit 64 /* [:digit:] or \d */
|
||||
#define cbit_upper 96 /* [:upper:] */
|
||||
#define cbit_lower 128 /* [:lower:] */
|
||||
#define cbit_word 160 /* [:word:] or \w */
|
||||
#define cbit_graph 192 /* [:graph:] */
|
||||
#define cbit_print 224 /* [:print:] */
|
||||
#define cbit_punct 256 /* [:punct:] */
|
||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length. */
|
||||
|
||||
#define lcc_offset 0
|
||||
#define fcc_offset 256
|
||||
#define cbits_offset 512
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
/* End of internal.h */
|
||||
|
||||
#endif /* PCRE_INTERNAL */
|
|
@ -1,140 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
See the file Tech.Notes for some information on the internals.
|
||||
*/
|
||||
|
||||
|
||||
/* This file is compiled on its own as part of the PCRE library. However,
|
||||
it is also included in the compilation of dftables.c, in which case the macro
|
||||
DFTABLES is defined. */
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
||||
inside dftables, use malloc().
|
||||
|
||||
Arguments: none
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
const unsigned char *
|
||||
pcre_maketables(void)
|
||||
{
|
||||
unsigned char *yield, *p;
|
||||
int i;
|
||||
|
||||
#ifndef DFTABLES
|
||||
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||
#else
|
||||
yield = (unsigned char*)malloc(tables_length);
|
||||
#endif
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort
|
||||
on exclusive ones - in some locales things may be different. Note that the
|
||||
table for "space" includes everything "isspace" gives, including VT in the
|
||||
default locale. This makes it work for the POSIX class [:space:]. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i))
|
||||
{
|
||||
p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isupper(i))
|
||||
{
|
||||
p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (islower(i))
|
||||
{
|
||||
p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we exclude VT from the white
|
||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||
within regexes. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of maketables.c */
|
8306
src/pcre/pcre.c
8306
src/pcre/pcre.c
File diff suppressed because it is too large
Load diff
193
src/pcre/pcre.h
193
src/pcre/pcre.h
|
@ -1,193 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* Copyright (c) 1997-2003 University of Cambridge */
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
||||
make changes to pcre.in. */
|
||||
|
||||
#define PCRE_MAJOR 4
|
||||
#define PCRE_MINOR 5
|
||||
#define PCRE_DATE 01-December-2003
|
||||
|
||||
/* Win32 uses DLL by default */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options */
|
||||
|
||||
#define PCRE_CASELESS 0x0001
|
||||
#define PCRE_MULTILINE 0x0002
|
||||
#define PCRE_DOTALL 0x0004
|
||||
#define PCRE_EXTENDED 0x0008
|
||||
#define PCRE_ANCHORED 0x0010
|
||||
#define PCRE_DOLLAR_ENDONLY 0x0020
|
||||
#define PCRE_EXTRA 0x0040
|
||||
#define PCRE_NOTBOL 0x0080
|
||||
#define PCRE_NOTEOL 0x0100
|
||||
#define PCRE_UNGREEDY 0x0200
|
||||
#define PCRE_NOTEMPTY 0x0400
|
||||
#define PCRE_UTF8 0x0800
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x1000
|
||||
#define PCRE_NO_UTF8_CHECK 0x2000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10)
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
|
||||
/* Request types for pcre_config() */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
|
||||
/* Bit flags for the pcre_extra structure */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
const char *subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. Some magic is required for Win32 DLL;
|
||||
it is null on other OS. For Virtual Pascal, these have to be different again.
|
||||
*/
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
extern void *pcre_malloc(size_t);
|
||||
extern void pcre_free(void *);
|
||||
extern void *pcre_stack_malloc(size_t);
|
||||
extern void pcre_stack_free(void *);
|
||||
extern int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
extern pcre *pcre_compile(const char *, int, const char **,
|
||||
int *, const unsigned char *);
|
||||
extern int pcre_config(int, void *);
|
||||
extern int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
extern int pcre_copy_substring(const char *, int *, int, int,
|
||||
char *, int);
|
||||
extern int pcre_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int);
|
||||
extern void pcre_free_substring(const char *);
|
||||
extern void pcre_free_substring_list(const char **);
|
||||
extern int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
extern int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
extern int pcre_get_stringnumber(const pcre *, const char *);
|
||||
extern int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
extern int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
extern int pcre_info(const pcre *, int *, int *);
|
||||
extern const unsigned char *pcre_maketables(void);
|
||||
extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
extern const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
|
@ -1,305 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
This is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. See
|
||||
the file Tech.Notes for some information on the internals.
|
||||
|
||||
This module is a wrapper that provides a POSIX API to the underlying PCRE
|
||||
functions.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "internal.h"
|
||||
#include "pcreposix.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
|
||||
|
||||
/* Corresponding tables of PCRE error messages and POSIX error codes. */
|
||||
|
||||
static const char *const estring[] = {
|
||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
|
||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||
ERR41, ERR42, ERR43, ERR44 };
|
||||
|
||||
static const int eint[] = {
|
||||
REG_EESCAPE, /* "\\ at end of pattern" */
|
||||
REG_EESCAPE, /* "\\c at end of pattern" */
|
||||
REG_EESCAPE, /* "unrecognized character follows \\" */
|
||||
REG_BADBR, /* "numbers out of order in {} quantifier" */
|
||||
REG_BADBR, /* "number too big in {} quantifier" */
|
||||
REG_EBRACK, /* "missing terminating ] for character class" */
|
||||
REG_ECTYPE, /* "invalid escape sequence in character class" */
|
||||
REG_ERANGE, /* "range out of order in character class" */
|
||||
REG_BADRPT, /* "nothing to repeat" */
|
||||
REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
|
||||
REG_ASSERT, /* "internal error: unexpected repeat" */
|
||||
REG_BADPAT, /* "unrecognized character after (?" */
|
||||
REG_BADPAT, /* "POSIX named classes are supported only within a class" */
|
||||
REG_EPAREN, /* "missing )" */
|
||||
REG_ESUBREG, /* "reference to non-existent subpattern" */
|
||||
REG_INVARG, /* "erroffset passed as NULL" */
|
||||
REG_INVARG, /* "unknown option bit(s) set" */
|
||||
REG_EPAREN, /* "missing ) after comment" */
|
||||
REG_ESIZE, /* "parentheses nested too deeply" */
|
||||
REG_ESIZE, /* "regular expression too large" */
|
||||
REG_ESPACE, /* "failed to get memory" */
|
||||
REG_EPAREN, /* "unmatched brackets" */
|
||||
REG_ASSERT, /* "internal error: code overflow" */
|
||||
REG_BADPAT, /* "unrecognized character after (?<" */
|
||||
REG_BADPAT, /* "lookbehind assertion is not fixed length" */
|
||||
REG_BADPAT, /* "malformed number after (?(" */
|
||||
REG_BADPAT, /* "conditional group containe more than two branches" */
|
||||
REG_BADPAT, /* "assertion expected after (?(" */
|
||||
REG_BADPAT, /* "(?R or (?digits must be followed by )" */
|
||||
REG_ECTYPE, /* "unknown POSIX class name" */
|
||||
REG_BADPAT, /* "POSIX collating elements are not supported" */
|
||||
REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
|
||||
REG_BADPAT, /* "spare error" */
|
||||
REG_BADPAT, /* "character value in \x{...} sequence is too large" */
|
||||
REG_BADPAT, /* "invalid condition (?(0)" */
|
||||
REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
|
||||
REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
|
||||
REG_BADPAT, /* "number after (?C is > 255" */
|
||||
REG_BADPAT, /* "closing ) for (?C expected" */
|
||||
REG_BADPAT, /* "recursive call could loop indefinitely" */
|
||||
REG_BADPAT, /* "unrecognized character after (?P" */
|
||||
REG_BADPAT, /* "syntax error after (?P" */
|
||||
REG_BADPAT, /* "two named groups have the same name" */
|
||||
REG_BADPAT /* "invalid UTF-8 string" */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
static const char *const pstring[] = {
|
||||
"", /* Dummy for value 0 */
|
||||
"internal error", /* REG_ASSERT */
|
||||
"invalid repeat counts in {}", /* BADBR */
|
||||
"pattern error", /* BADPAT */
|
||||
"? * + invalid", /* BADRPT */
|
||||
"unbalanced {}", /* EBRACE */
|
||||
"unbalanced []", /* EBRACK */
|
||||
"collation error - not relevant", /* ECOLLATE */
|
||||
"bad class", /* ECTYPE */
|
||||
"bad escape sequence", /* EESCAPE */
|
||||
"empty expression", /* EMPTY */
|
||||
"unbalanced ()", /* EPAREN */
|
||||
"bad range inside []", /* ERANGE */
|
||||
"expression too big", /* ESIZE */
|
||||
"failed to get memory", /* ESPACE */
|
||||
"bad back reference", /* ESUBREG */
|
||||
"bad argument", /* INVARG */
|
||||
"match failed" /* NOMATCH */
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Translate PCRE text code to int *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE compile-time errors are given as strings defined as macros. We can just
|
||||
look them up in a table to turn them into POSIX-style error codes. */
|
||||
|
||||
static int
|
||||
pcre_posix_error_code(const char *s)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
|
||||
if (strcmp(s, estring[i]) == 0) return eint[i];
|
||||
return REG_ASSERT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
EXPORT size_t
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const char *message, *addmessage;
|
||||
size_t length, addlength;
|
||||
|
||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
length = strlen(message) + 1;
|
||||
|
||||
addmessage = " at offset ";
|
||||
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
||||
strlen(addmessage) + 6 : 0;
|
||||
|
||||
if (errbuf_size > 0)
|
||||
{
|
||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
||||
else
|
||||
{
|
||||
strncpy(errbuf, message, errbuf_size - 1);
|
||||
errbuf[errbuf_size-1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return length + addlength;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
EXPORT void
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
(pcre_free)(preg->re_pcre);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
preg points to a structure for recording the compiled expression
|
||||
pattern the pattern to compile
|
||||
cflags compilation flags
|
||||
|
||||
Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
const char *errorptr;
|
||||
int erroffset;
|
||||
int options = 0;
|
||||
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
||||
|
||||
preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
|
||||
|
||||
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/* Unfortunately, PCRE requires 3 ints of working space for each captured
|
||||
substring, so we have to get and release working store instead of just using
|
||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
||||
ints. However, if the number of possible capturing brackets is small, use a
|
||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
||||
in a macro that can be changed at configure time. */
|
||||
|
||||
EXPORT int
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
int rc;
|
||||
int options = 0;
|
||||
int *ovector = NULL;
|
||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
||||
BOOL allocated_ovector = FALSE;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
||||
|
||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
|
||||
if (nmatch > 0)
|
||||
{
|
||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
||||
{
|
||||
ovector = &(small_ovector[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
||||
if (ovector == NULL) return REG_ESPACE;
|
||||
allocated_ovector = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
|
||||
0, options, ovector, nmatch * 3);
|
||||
|
||||
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = ovector[i*2];
|
||||
pmatch[i].rm_eo = ovector[i*2+1];
|
||||
}
|
||||
if (allocated_ovector) free(ovector);
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (allocated_ovector) free(ovector);
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
case PCRE_ERROR_NULL: return REG_INVARG;
|
||||
case PCRE_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
|
||||
case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
|
||||
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE_ERROR_BADUTF8: return REG_INVARG;
|
||||
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
|
||||
default: return REG_ASSERT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcreposix.c */
|
|
@ -1,88 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* Copyright (c) 1997-2003 University of Cambridge */
|
||||
|
||||
#ifndef _PCREPOSIX_H
|
||||
#define _PCREPOSIX_H
|
||||
|
||||
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
|
||||
Compatible Regular Expression library. It defines the things POSIX says should
|
||||
be there. I hope. */
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options defined by POSIX. */
|
||||
|
||||
#define REG_ICASE 0x01
|
||||
#define REG_NEWLINE 0x02
|
||||
#define REG_NOTBOL 0x04
|
||||
#define REG_NOTEOL 0x08
|
||||
|
||||
/* These are not used by PCRE, but by defining them we make it easier
|
||||
to slot PCRE into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
#define REG_NOSUB 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* The functions */
|
||||
|
||||
extern int regcomp(regex_t *, const char *, int);
|
||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
|
||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
extern void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcreposix.h */
|
472
src/pcre/study.c
472
src/pcre/study.c
|
@ -1,472 +0,0 @@
|
|||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
This is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. See
|
||||
the file Tech.Notes for some information on the internals.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2003 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
computer system, and to redistribute it freely, subject to the following
|
||||
restrictions:
|
||||
|
||||
1. This software is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
4. If PCRE is embedded in any software that is released under the GNU
|
||||
General Purpose Licence (GPL), then the terms of that licence shall
|
||||
supersede any condition above with which it is incompatible.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* Include the internals header, which itself includes Standard C headers plus
|
||||
the external pcre header. */
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its bit in the table, and also the bit for the other
|
||||
version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
start_bits points to the bit map
|
||||
c is the character
|
||||
caseless the caseless flag
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
|
||||
{
|
||||
start_bits[c/8] |= (1 << (c&7));
|
||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting chars *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression and attempts to build a
|
||||
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
||||
goes by, we may be able to get more clever at doing this.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
start_bits points to a 32-byte table, initialized to 0
|
||||
caseless the current state of the caseless flag
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: TRUE if table built, FALSE otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get).
|
||||
|
||||
Breaking OS/2 in favor of gcc's paranoia. --lynX 2016
|
||||
*/
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next)
|
||||
{
|
||||
/* If a branch starts with a bracket or a positive lookahead assertion,
|
||||
recurse to set bits from within them. That's all for this branch. */
|
||||
|
||||
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
||||
{
|
||||
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
try_next = FALSE;
|
||||
}
|
||||
|
||||
else switch(*tcode)
|
||||
{
|
||||
default:
|
||||
return FALSE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Skip over extended extraction bracket number */
|
||||
|
||||
case OP_BRANUMBER:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
|
||||
case OP_OPT:
|
||||
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT: /* Fall through */
|
||||
tcode++;
|
||||
|
||||
case OP_CHARS: /* Fall through */
|
||||
tcode++;
|
||||
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character type sets the bits and stops */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return FALSE;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Character class where all the information is in a bit map: set the
|
||||
bits and either carry on or not, according to the repeat count. If it was
|
||||
a negative class, and we are operating with UTF-8 characters, any byte
|
||||
with a value >= 0xc4 is a potentially valid starter because it starts a
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
{
|
||||
tcode++;
|
||||
|
||||
/* In UTF-8 mode, the bits in a bit map correspond to character
|
||||
values, not to byte values. However, the bit map we are constructing is
|
||||
for byte values. So we have to do a conversion for characters whose
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
|
||||
/* Advance past the bit map, and act on what follows */
|
||||
|
||||
tcode += 32;
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break; /* End of bitmap class handling */
|
||||
|
||||
} /* End of switch */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching. It returns a pcre_extra block
|
||||
which then gets handed back to pcre_exec().
|
||||
|
||||
Arguments:
|
||||
re points to the compiled expression
|
||||
options contains option bits
|
||||
errorptr points to where to place error messages;
|
||||
set NULL unless error
|
||||
|
||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
appropriate flag set;
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
EXPORT pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
||||
(re->name_count * re->name_entry_size);
|
||||
compile_data compile_block;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
*errorptr = "argument is not a compiled regular expression";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
{
|
||||
*errorptr = "unknown or incorrect option bit(s) set";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block which is passed around */
|
||||
|
||||
compile_block.lcc = re->tables + lcc_offset;
|
||||
compile_block.fcc = re->tables + fcc_offset;
|
||||
compile_block.cbits = re->tables + cbits_offset;
|
||||
compile_block.ctypes = re->tables + ctypes_offset;
|
||||
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
data set later by the calling program. At the moment, the size of
|
||||
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||
don't have to change that code. */
|
||||
|
||||
extra = (pcre_extra *)(pcre_malloc)
|
||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||
|
||||
if (extra == NULL)
|
||||
{
|
||||
*errorptr = "failed to get memory";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
||||
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||
extra->study_data = study;
|
||||
|
||||
study->size = sizeof(pcre_study_data);
|
||||
study->options = PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
|
||||
return extra;
|
||||
}
|
||||
|
||||
/* End of study.c */
|
|
@ -1,35 +0,0 @@
|
|||
/*------------------------------------------------------------------
|
||||
* Wrapper for the pcre modules.
|
||||
*
|
||||
* Compile the pcre modules into one file.
|
||||
* To make this possible the pcre/internal.h had to be augmented with
|
||||
* protection against multiple inclusion.
|
||||
*------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "driver.h"
|
||||
|
||||
#include "pkg-pcre.h"
|
||||
|
||||
#if defined(USE_BUILTIN_PCRE)
|
||||
|
||||
#include "interpret.h"
|
||||
#include "simulate.h"
|
||||
|
||||
/* Provide a definition for NEWLINE */
|
||||
#define NEWLINE '\n'
|
||||
|
||||
/* DEBUG has a different meaning for pcre than for us */
|
||||
#ifdef DEBUG
|
||||
# undef DEBUG
|
||||
#endif
|
||||
|
||||
/* activated UTF8 support --lynX 2008 */
|
||||
#define SUPPORT_UTF8
|
||||
|
||||
#include "pcre/pcre.c"
|
||||
#include "pcre/get.c"
|
||||
#include "pcre/maketables.c"
|
||||
#include "pcre/study.c"
|
||||
|
||||
#endif /* USE_BUILTIN_PCRE */
|
|
@ -11,13 +11,8 @@
|
|||
|
||||
#include "driver.h"
|
||||
|
||||
#if defined(USE_BUILTIN_PCRE) || !defined(HAS_PCRE)
|
||||
# include "pcre/pcre.h"
|
||||
# if !defined(USE_BUILTIN_PCRE)
|
||||
# define USE_BUILTIN_PCRE
|
||||
# endif
|
||||
#else
|
||||
# include <pcre.h>
|
||||
#ifdef HAS_PCRE
|
||||
#include <pcre.h>
|
||||
#endif
|
||||
|
||||
/* Error code to be returned if too many backtracks are detected.
|
||||
|
@ -25,7 +20,7 @@
|
|||
#ifdef PCRE_ERROR_RECURSIONLIMIT
|
||||
#define RE_ERROR_BACKTRACK PCRE_ERROR_RECURSIONLIMIT
|
||||
#else
|
||||
#define RE_ERROR_BACKTRACK PCRE_ERROR_MATCHLIMIT
|
||||
#define RE_ERROR_BACKTRACK (-8) // PCRE_ERROR_MATCHLIMIT from PCRE
|
||||
#endif
|
||||
|
||||
#endif /* PKG_PCRE_H_ */
|
||||
|
|
|
@ -293,18 +293,7 @@ enable_lpc_array_calls=yes
|
|||
enable_use_deprecated=no
|
||||
|
||||
# Enable PCRE instead of traditional regexps
|
||||
# 'no': use traditional regexps by default
|
||||
# 'no-builtin': use traditional regexps by default, or the builtin PCRE
|
||||
# package if PCRE is requested
|
||||
# 'builtin': use PCRE package by default, using the builtin package
|
||||
# 'yes': use the system's PCRE package if available, otherwise the
|
||||
# builtin package
|
||||
#
|
||||
# yes is nicer to your system, but currently many linuces are distributed
|
||||
# with old non-utf8 pcre variants, so let's use our own copy by default
|
||||
# until this is settled. if you think your pcre installation is smarter,
|
||||
# say "yes" here instead of "builtin"
|
||||
enable_use_pcre=builtin
|
||||
enable_use_pcre=yes
|
||||
|
||||
#-- COMPILATION
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ version_longtype="stable"
|
|||
# A timestamp, to be used by bumpversion and other scripts.
|
||||
# It can be used, for example, to 'touch' this file on every build, thus
|
||||
# forcing revision control systems to add it on every checkin automatically.
|
||||
version_stamp="Sun Aug 14 19:57:43 CEST 2016"
|
||||
version_stamp="Thu Sep 29 11:49:32 CEST 2016"
|
||||
|
||||
# Okay, LDMUD is using 3.x.x so to avoid conflicts let's just use 4.x.x
|
||||
version_major=4
|
||||
|
|
Loading…
Reference in a new issue