mirror of
git://git.psyced.org/git/psyclpc
synced 2024-08-15 03:20:16 +00:00
removed historic pcre bundling
This commit is contained in:
parent
27f21a3bf0
commit
8bd51f2a48
34 changed files with 26 additions and 13115 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -22,7 +22,6 @@ src/autoconf/autom4te.cache
|
||||||
src/config.h
|
src/config.h
|
||||||
src/config.status
|
src/config.status
|
||||||
src/configure-do
|
src/configure-do
|
||||||
src/dftables
|
|
||||||
src/efun_defs.c
|
src/efun_defs.c
|
||||||
src/instrs.h
|
src/instrs.h
|
||||||
src/lang.c
|
src/lang.c
|
||||||
|
@ -33,7 +32,6 @@ src/machine.h.in
|
||||||
src/make_func.c
|
src/make_func.c
|
||||||
src/mkfunc
|
src/mkfunc
|
||||||
src/patchlevel.h
|
src/patchlevel.h
|
||||||
src/pcre/chartables.c
|
|
||||||
src/psyclpc
|
src/psyclpc
|
||||||
src/settings/nedko_psyced
|
src/settings/nedko_psyced
|
||||||
src/settings/psyced-current
|
src/settings/psyced-current
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[see git log for recent changes]
|
||||||
|
|
||||||
2010-06-21 (nedko/lynX) (4.0.14)
|
2010-06-21 (nedko/lynX) (4.0.14)
|
||||||
- removed traditional configure script from distribution
|
- removed traditional configure script from distribution
|
||||||
new ./configure uses autotools to generate the configure script
|
new ./configure uses autotools to generate the configure script
|
||||||
|
|
|
@ -62,11 +62,6 @@ Inc.
|
||||||
The Apache-compatible MD5 password encryption is Copyright (C) 2000 Apache
|
The Apache-compatible MD5 password encryption is Copyright (C) 2000 Apache
|
||||||
Software Foundation.
|
Software Foundation.
|
||||||
|
|
||||||
The PCRE (Perl Compatible Regular Expression) is Copyright (C) 1997-2001 by
|
|
||||||
Philip Hazel. LDMud contains just the files required for the driver (with
|
|
||||||
small modifications); see pcre/LICENCE for the licence terms and the location
|
|
||||||
of the full package, and pcre/README.LDMUD for an explanation of the changes.
|
|
||||||
|
|
||||||
The lpc-mode.el for EMACS is Copyright (C) 2002 Vivek Dasmohapatra
|
The lpc-mode.el for EMACS is Copyright (C) 2002 Vivek Dasmohapatra
|
||||||
<vivek@etla.org>, and distributed under the GPL.
|
<vivek@etla.org>, and distributed under the GPL.
|
||||||
|
|
||||||
|
|
1
TODO
1
TODO
|
@ -10,7 +10,6 @@ BUGS
|
||||||
- configure should warn more vehemently when libidn is missing
|
- configure should warn more vehemently when libidn is missing
|
||||||
- libpsyc isnt recognized even if properly installed
|
- libpsyc isnt recognized even if properly installed
|
||||||
- x86_64 seems to require -ldl explicitly at the end of libs
|
- x86_64 seems to require -ldl explicitly at the end of libs
|
||||||
- sometimes -lpsyc and -lpcre are added twice to $LIBS !?
|
|
||||||
- should autoconf to sysmalloc also for osol (OpenSolaris)
|
- should autoconf to sysmalloc also for osol (OpenSolaris)
|
||||||
- #define USE_EXPAT und JSON werden trotzdem gesetzt in config.h
|
- #define USE_EXPAT und JSON werden trotzdem gesetzt in config.h
|
||||||
auch wenn configure keine libs gefunden hat (egal, wir verwenden beide nicht)
|
auch wenn configure keine libs gefunden hat (egal, wir verwenden beide nicht)
|
||||||
|
|
|
@ -105,8 +105,8 @@ SRC = access_check.c actions.c array.c backend.c bitstrings.c call_out.c \
|
||||||
interpret.c \
|
interpret.c \
|
||||||
lex.c main.c mapping.c md5.c mempools.c mregex.c mstrings.c object.c \
|
lex.c main.c mapping.c md5.c mempools.c mregex.c mstrings.c object.c \
|
||||||
otable.c\
|
otable.c\
|
||||||
parser.c parse.c pkg-alists.c pgk-iksemel.c pkg-idna.c pkg-expat.c \
|
parser.c parse.c pkg-alists.c pkg-iksemel.c pkg-idna.c pkg-expat.c \
|
||||||
pkg-psyc.c pkg-mccp.c pkg-mysql.c pkg-pcre.c \
|
pkg-psyc.c pkg-mccp.c pkg-mysql.c \
|
||||||
pkg-pgsql.c pkg-sqlite.c pkg-tls.c pkg-gnunet.c \
|
pkg-pgsql.c pkg-sqlite.c pkg-tls.c pkg-gnunet.c \
|
||||||
ptmalloc.c port.c ptrtable.c \
|
ptmalloc.c port.c ptrtable.c \
|
||||||
random.c regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
random.c regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||||
|
@ -118,7 +118,7 @@ OBJ = access_check.o actions.o array.o backend.o bitstrings.o call_out.o \
|
||||||
lex.o main.o mapping.o md5.o mempools.o mregex.o mstrings.o object.o \
|
lex.o main.o mapping.o md5.o mempools.o mregex.o mstrings.o object.o \
|
||||||
otable.o \
|
otable.o \
|
||||||
parser.o parse.o pkg-alists.o pkg-iksemel.o pkg-idna.o pkg-expat.o \
|
parser.o parse.o pkg-alists.o pkg-iksemel.o pkg-idna.o pkg-expat.o \
|
||||||
pkg-psyc.o pkg-mccp.o pkg-mysql.o pkg-pcre.o \
|
pkg-psyc.o pkg-mccp.o pkg-mysql.o \
|
||||||
pkg-pgsql.o pkg-sqlite.o pkg-tls.o pkg-gnunet.o \
|
pkg-pgsql.o pkg-sqlite.o pkg-tls.o pkg-gnunet.o \
|
||||||
ptmalloc.o port.o ptrtable.o \
|
ptmalloc.o port.o ptrtable.o \
|
||||||
random.o regexp.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
random.o regexp.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
||||||
|
@ -162,7 +162,6 @@ lint: *.c
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc@EXEEXT@
|
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc@EXEEXT@
|
||||||
$(RM) dftables@EXEEXT@ pcre/chartables.c
|
|
||||||
$(RM) stdstrings.c stdstrings.h
|
$(RM) stdstrings.c stdstrings.h
|
||||||
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||||
$(RM) @PROGNAME@@EXEEXT@ core mudlib/core mudlib/debug.log
|
$(RM) @PROGNAME@@EXEEXT@ core mudlib/core mudlib/debug.log
|
||||||
|
@ -195,12 +194,6 @@ make_func.c: make_func.y
|
||||||
mkfunc@EXEEXT@: mkfunc.o hash.o exec.h
|
mkfunc@EXEEXT@: mkfunc.o hash.o exec.h
|
||||||
$(CC) @OPTIMIZE_LINKING@ $(LDFLAGS) mkfunc.o hash.o -o mkfunc@EXEEXT@
|
$(CC) @OPTIMIZE_LINKING@ $(LDFLAGS) mkfunc.o hash.o -o mkfunc@EXEEXT@
|
||||||
|
|
||||||
dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
|
||||||
$(CC) $(CFLAGS) -c pcre/dftables.c -o dftables.o
|
|
||||||
|
|
||||||
dftables@EXEEXT@ : dftables.o
|
|
||||||
$(CC) @OPTIMIZE_LINKING@ $(LDFLAGS) dftables.o -o dftables@EXEEXT@
|
|
||||||
|
|
||||||
lang.y: mkfunc@EXEEXT@ prolang.y config.h
|
lang.y: mkfunc@EXEEXT@ prolang.y config.h
|
||||||
$(RM) lang.y
|
$(RM) lang.y
|
||||||
./mkfunc@EXEEXT@ lang
|
./mkfunc@EXEEXT@ lang
|
||||||
|
@ -221,9 +214,6 @@ lang.c lang.h: lang.y
|
||||||
@CLEAN_YACC_TAB@
|
@CLEAN_YACC_TAB@
|
||||||
$(MV) $(YACCTAB)h lang.h
|
$(MV) $(YACCTAB)h lang.h
|
||||||
|
|
||||||
pcre/chartables.c : dftables@EXEEXT@
|
|
||||||
./dftables@EXEEXT@ pcre/chartables.c
|
|
||||||
|
|
||||||
random.o : random.c config.h driver.h
|
random.o : random.c config.h driver.h
|
||||||
$(CC) $(CFLAGS) $(SFMT_FLAGS) -c random.c -o random.o
|
$(CC) $(CFLAGS) $(SFMT_FLAGS) -c random.c -o random.o
|
||||||
|
|
||||||
|
@ -232,10 +222,10 @@ random.o : random.c config.h driver.h
|
||||||
|
|
||||||
# Generated source files (overlaps with of SRC) which need to be
|
# Generated source files (overlaps with of SRC) which need to be
|
||||||
# present for mkdepend to work.
|
# present for mkdepend to work.
|
||||||
GENSRC = make_func.c stdstrings.c lang.c instrs.h pcre/chartables.c
|
GENSRC = make_func.c stdstrings.c lang.c instrs.h
|
||||||
|
|
||||||
# Macros for MkDepend:
|
# Macros for MkDepend:
|
||||||
SKELETON = $(SRC) mkfunc.c pcre/dftables.c
|
SKELETON = $(SRC) mkfunc.c
|
||||||
EXCEPT = -x efun_defs.c
|
EXCEPT = -x efun_defs.c
|
||||||
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
||||||
|
|
||||||
|
@ -407,7 +397,7 @@ mkfunc.o : make_func.c ../mudlib/sys/driver_hook.h hash.h exec.h \
|
||||||
mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
||||||
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
||||||
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
||||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h pcre/pcre.h \
|
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h \
|
||||||
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
||||||
machine.h
|
machine.h
|
||||||
|
|
||||||
|
@ -443,8 +433,6 @@ parser.o : lang.c ../mudlib/sys/driver_hook.h i-eval_cost.h xalloc.h \
|
||||||
my-alloca.h typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h \
|
my-alloca.h typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h \
|
||||||
bytecode.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
bytecode.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h pcre/config.h
|
|
||||||
|
|
||||||
pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h mstrings.h \
|
pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h mstrings.h \
|
||||||
main.h interpret.h array.h my-alloca.h pkg-alists.h typedefs.h driver.h \
|
main.h interpret.h array.h my-alloca.h pkg-alists.h typedefs.h driver.h \
|
||||||
closure.h strfuns.h sent.h bytecode.h hash.h backend.h port.h config.h \
|
closure.h strfuns.h sent.h bytecode.h hash.h backend.h port.h config.h \
|
||||||
|
@ -465,12 +453,6 @@ pkg-mysql.o : xalloc.h svalue.h stdstrings.h simulate.h mstrings.h main.h \
|
||||||
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
||||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
pkg-pcre.o : pcre/study.c pcre/maketables.c pcre/get.c pcre/pcre.c \
|
|
||||||
simulate.h interpret.h pkg-pcre.h driver.h pcre/internal.h \
|
|
||||||
pcre/chartables.c svalue.h strfuns.h sent.h bytecode.h typedefs.h \
|
|
||||||
backend.h pcre/pcre.h port.h config.h pcre/config.h main.h hosts/unix.h \
|
|
||||||
hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
||||||
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
||||||
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
||||||
|
@ -507,8 +489,7 @@ random.o : random.h driver.h port.h config.h hosts/unix.h hosts/be/be.h \
|
||||||
|
|
||||||
regexp.o : i-eval_cost.h main.h xalloc.h simulate.h regexp.h driver.h \
|
regexp.o : i-eval_cost.h main.h xalloc.h simulate.h regexp.h driver.h \
|
||||||
interpret.h typedefs.h svalue.h strfuns.h sent.h bytecode.h pkg-pcre.h \
|
interpret.h typedefs.h svalue.h strfuns.h sent.h bytecode.h pkg-pcre.h \
|
||||||
port.h config.h backend.h pcre/pcre.h hosts/unix.h hosts/be/be.h \
|
port.h config.h backend.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
machine.h
|
|
||||||
|
|
||||||
sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
||||||
hosts/be/be.h machine.h
|
hosts/be/be.h machine.h
|
||||||
|
|
|
@ -143,7 +143,7 @@ AC_MY_ARG_ENABLE(use-swap,no,,[Enables support for LPC memory swapping])
|
||||||
AC_MY_ARG_ENABLE(use-ldmud-compatibility,yes,,[Makes psyclpc behave more like LDMUD than you will want])
|
AC_MY_ARG_ENABLE(use-ldmud-compatibility,yes,,[Makes psyclpc behave more like LDMUD than you will want])
|
||||||
|
|
||||||
AC_MY_ARG_ENABLE(use-pthreads,no,,[Enables using of threads for socket writes])
|
AC_MY_ARG_ENABLE(use-pthreads,no,,[Enables using of threads for socket writes])
|
||||||
AC_MY_ARG_ENABLE(use-pcre,yes,,[Enables PCRE: no/yes/builtin/no-builtin])
|
AC_MY_ARG_ENABLE(use-pcre,yes,,[Enables PCRE: yes/no])
|
||||||
AC_MY_ARG_ENABLE(use-iksemel,no,,[Enables use of iksemel for XML parsing])
|
AC_MY_ARG_ENABLE(use-iksemel,no,,[Enables use of iksemel for XML parsing])
|
||||||
AC_MY_ARG_ENABLE(use-deprecated,yes,,[Enables obsolete and deprecated efuns])
|
AC_MY_ARG_ENABLE(use-deprecated,yes,,[Enables obsolete and deprecated efuns])
|
||||||
AC_MY_ARG_ENABLE(use-structs,yes,,[Enables structs])
|
AC_MY_ARG_ENABLE(use-structs,yes,,[Enables structs])
|
||||||
|
@ -289,16 +289,6 @@ if test "x$enable_use_pcre" = "x" || test "x$enable_use_pcre" = "xyes"; then
|
||||||
cdef_use_pcre="#define"
|
cdef_use_pcre="#define"
|
||||||
cdef_use_builtin_pcre="#undef"
|
cdef_use_builtin_pcre="#undef"
|
||||||
enable_use_builtin_pcre="no"
|
enable_use_builtin_pcre="no"
|
||||||
elif test "x$enable_use_pcre" = "xbuiltin"; then
|
|
||||||
cdef_use_pcre="#define"
|
|
||||||
cdef_use_builtin_pcre="#define"
|
|
||||||
enable_use_pcre="yes"
|
|
||||||
enable_use_builtin_pcre="yes"
|
|
||||||
elif test "x$enable_use_pcre" = "xno-builtin"; then
|
|
||||||
cdef_use_pcre="#undef"
|
|
||||||
cdef_use_builtin_pcre="#define"
|
|
||||||
enable_use_pcre="no"
|
|
||||||
enable_use_builtin_pcre="yes"
|
|
||||||
else
|
else
|
||||||
cdef_use_pcre="#undef"
|
cdef_use_pcre="#undef"
|
||||||
cdef_use_builtin_pcre="#undef"
|
cdef_use_builtin_pcre="#undef"
|
||||||
|
@ -1320,11 +1310,7 @@ int main(void)
|
||||||
AC_DEFINE(HAS_PCRE, 1, [Does the machine offer PCRE?])
|
AC_DEFINE(HAS_PCRE, 1, [Does the machine offer PCRE?])
|
||||||
PKGLIBS="$PKGLIBS -lpcre"
|
PKGLIBS="$PKGLIBS -lpcre"
|
||||||
else
|
else
|
||||||
if test $enable_use_builtin_pcre = no ; then
|
echo "PCRE not available in the system."
|
||||||
echo "PCRE not available in the system - using builtin version."
|
|
||||||
enable_use_builtin_pcre="yes"
|
|
||||||
cdef_use_builtin_pcre="#define"
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -2990,7 +2976,6 @@ AC_SUBST(cdef_use_alists)
|
||||||
AC_SUBST(cdef_use_psyc)
|
AC_SUBST(cdef_use_psyc)
|
||||||
AC_SUBST(cdef_use_mccp)
|
AC_SUBST(cdef_use_mccp)
|
||||||
AC_SUBST(cdef_use_pcre)
|
AC_SUBST(cdef_use_pcre)
|
||||||
AC_SUBST(cdef_use_builtin_pcre)
|
|
||||||
AC_SUBST(cdef_use_deprecated)
|
AC_SUBST(cdef_use_deprecated)
|
||||||
AC_SUBST(cdef_use_structs)
|
AC_SUBST(cdef_use_structs)
|
||||||
AC_SUBST(cdef_use_tls)
|
AC_SUBST(cdef_use_tls)
|
||||||
|
|
|
@ -383,11 +383,6 @@
|
||||||
*/
|
*/
|
||||||
@cdef_use_pcre@ USE_PCRE
|
@cdef_use_pcre@ USE_PCRE
|
||||||
|
|
||||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
|
||||||
* is disabled as a whole).
|
|
||||||
*/
|
|
||||||
@cdef_use_builtin_pcre@ USE_BUILTIN_PCRE
|
|
||||||
|
|
||||||
/* Define this if you want iksemel library support.
|
/* Define this if you want iksemel library support.
|
||||||
*/
|
*/
|
||||||
@cdef_use_iksemel@ USE_IKSEMEL
|
@cdef_use_iksemel@ USE_IKSEMEL
|
||||||
|
|
|
@ -337,14 +337,8 @@
|
||||||
|
|
||||||
/* Define this if you want PCRE instead of traditional regexps.
|
/* Define this if you want PCRE instead of traditional regexps.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define USE_PCRE
|
#define USE_PCRE
|
||||||
|
|
||||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
|
||||||
* is disabled as a whole).
|
|
||||||
*/
|
|
||||||
#undef USE_BUILTIN_PCRE
|
|
||||||
|
|
||||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||||
*/
|
*/
|
||||||
#define USE_MCCP
|
#define USE_MCCP
|
||||||
|
|
|
@ -103,17 +103,17 @@ endif
|
||||||
interpret.c lex.c main.c mapping.c md5.c mempools.c mregex.c \
|
interpret.c lex.c main.c mapping.c md5.c mempools.c mregex.c \
|
||||||
mstrings.c \
|
mstrings.c \
|
||||||
object.c otable.c parser.c parse.c \
|
object.c otable.c parser.c parse.c \
|
||||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pcre.c pkg-pgsql.c \
|
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pgsql.c \
|
||||||
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c random.c \
|
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c random.c \
|
||||||
regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
regexp.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||||
strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
||||||
|
|
||||||
# Generated source files (overlaps with of SRCS) which need to be
|
# Generated source files (overlaps with of SRCS) which need to be
|
||||||
# present for mkdepend to work.
|
# present for mkdepend to work.
|
||||||
GENSRCS = make_func.c stdstrings.c lang.c instrs.h pcre/chartables.c
|
GENSRCS = make_func.c stdstrings.c lang.c instrs.h
|
||||||
|
|
||||||
# Macros for MkDepend:
|
# Macros for MkDepend:
|
||||||
SKELETON = $(SRCS) mkfunc.c pcre/dftables.c
|
SKELETON = $(SRCS) mkfunc.c
|
||||||
EXCEPT = -x efun_defs.c
|
EXCEPT = -x efun_defs.c
|
||||||
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
SELECT = -S instrs.h -S stdstrings.h -S make_func.c -S lang.c -S lang.h -S efun_defs.c
|
||||||
|
|
||||||
|
@ -279,11 +279,10 @@ endif
|
||||||
|
|
||||||
clean :: FORCE
|
clean :: FORCE
|
||||||
-rm -f $(YACCTAB)h $(YACCTAB)c make_func.c $(OBJ)/mkfunc
|
-rm -f $(YACCTAB)h $(YACCTAB)c make_func.c $(OBJ)/mkfunc
|
||||||
-rm -f $(OBJ)/dftables pcre/chartables.c
|
|
||||||
-rm -f *~ efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
-rm -f *~ efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||||
-rm -f stdstrings.c stdstrings.h
|
-rm -f stdstrings.c stdstrings.h
|
||||||
-rm -f hosts/*~ hosts/*/*~ bugs/*~ done/*~ $(OBJ)/*.o
|
-rm -f hosts/*~ hosts/*/*~ bugs/*~ done/*~ $(OBJ)/*.o
|
||||||
-rm -f pcre/*~ wk/*~ settings/*~
|
-rm -f wk/*~ settings/*~
|
||||||
|
|
||||||
cleanall :: clean
|
cleanall :: clean
|
||||||
-rm -f $(OBJ_CROSS)/*.o
|
-rm -f $(OBJ_CROSS)/*.o
|
||||||
|
@ -318,7 +317,7 @@ depend-generic: $(SRCS) $(GENSRCS) $(OBJ)
|
||||||
|
|
||||||
AMIGASRCS:=
|
AMIGASRCS:=
|
||||||
|
|
||||||
# Special rules for making mkfunc and dftables, depending on whether we're
|
# Special rules for making mkfunc, depending on whether we're
|
||||||
# crosscompiling or not.
|
# crosscompiling or not.
|
||||||
|
|
||||||
make_func.c : make_func.y
|
make_func.c : make_func.y
|
||||||
|
@ -338,19 +337,6 @@ $(OBJ_NATIVE)/mkfunc :
|
||||||
make CPU=$(NATIVE) $@
|
make CPU=$(NATIVE) $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(OBJ)/dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
|
||||||
$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
$(OBJ)/dftables : $(OBJ)/dftables.o
|
|
||||||
$(LD) -o $@ $^ $(LDFLAGS)
|
|
||||||
$(MIMESET) -f $@
|
|
||||||
|
|
||||||
ifneq ($(CPU), $(NATIVE))
|
|
||||||
$(OBJ_NATIVE)/dftables :
|
|
||||||
@[ -d $(OBJ_NATIVE) ] || mkdir $(OBJ_NATIVE) > /dev/null 2>&1
|
|
||||||
make CPU=$(NATIVE) $@
|
|
||||||
endif
|
|
||||||
|
|
||||||
# The making of the compiler and associated files.
|
# The making of the compiler and associated files.
|
||||||
|
|
||||||
efun_defs.c instrs.h : func_spec config.h $(OBJ_NATIVE)/mkfunc
|
efun_defs.c instrs.h : func_spec config.h $(OBJ_NATIVE)/mkfunc
|
||||||
|
@ -370,9 +356,6 @@ lang.c lang.h : lang.y
|
||||||
mv $(YACCTAB)c lang.c
|
mv $(YACCTAB)c lang.c
|
||||||
mv $(YACCTAB)h lang.h
|
mv $(YACCTAB)h lang.h
|
||||||
|
|
||||||
pcre/chartables.c : $(OBJ_NATIVE)/dftables
|
|
||||||
$(OBJ_NATIVE)/dftables > pcre/chartables.c
|
|
||||||
|
|
||||||
# Be Resource Mangling
|
# Be Resource Mangling
|
||||||
|
|
||||||
$(RSRC_FULL) : hosts/be/driver.r hosts/be/icon-32x32.raw hosts/be/icon-16x16.raw
|
$(RSRC_FULL) : hosts/be/driver.r hosts/be/icon-32x32.raw hosts/be/icon-16x16.raw
|
||||||
|
@ -544,7 +527,7 @@ $(OBJ)/mkfunc.o : make_func.c ../mudlib/sys/driver_hook.h hash.h exec.h \
|
||||||
$(OBJ)/mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
$(OBJ)/mregex.o : ../mudlib/sys/regexp.h ../mudlib/sys/driver_hook.h \
|
||||||
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
../mudlib/sys/debug_info.h xalloc.h svalue.h strfuns.h simulate.h \
|
||||||
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
regexp.h pkg-pcre.h mstrings.h main.h interpret.h hash.h gcollect.h \
|
||||||
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h pcre/pcre.h \
|
comm.h mregex.h driver.h typedefs.h sent.h bytecode.h \
|
||||||
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
backend.h pkg-tls.h port.h config.h hosts/unix.h hosts/be/be.h \
|
||||||
machine.h
|
machine.h
|
||||||
|
|
||||||
|
@ -580,9 +563,6 @@ $(OBJ)/parser.o : lang.c ../mudlib/sys/driver_hook.h xalloc.h wiz_list.h \
|
||||||
typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h bytecode.h \
|
typedefs.h driver.h strfuns.h hash.h ptrtable.h sent.h bytecode.h \
|
||||||
port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
$(OBJ)/dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h \
|
|
||||||
pcre/config.h
|
|
||||||
|
|
||||||
$(OBJ)/pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h \
|
$(OBJ)/pkg-alists.o : i-svalue_cmp.h xalloc.h svalue.h simulate.h \
|
||||||
mstrings.h main.h interpret.h array.h my-alloca.h pkg-alists.h \
|
mstrings.h main.h interpret.h array.h my-alloca.h pkg-alists.h \
|
||||||
typedefs.h driver.h closure.h strfuns.h sent.h bytecode.h hash.h \
|
typedefs.h driver.h closure.h strfuns.h sent.h bytecode.h hash.h \
|
||||||
|
@ -603,12 +583,6 @@ $(OBJ)/pkg-mysql.o : xalloc.h svalue.h stdstrings.h simulate.h mstrings.h \
|
||||||
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
driver.h strfuns.h sent.h bytecode.h hash.h exec.h backend.h port.h \
|
||||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
$(OBJ)/pkg-pcre.o : pcre/study.c pcre/maketables.c pcre/get.c pcre/pcre.c \
|
|
||||||
simulate.h interpret.h pkg-pcre.h driver.h pcre/internal.h \
|
|
||||||
pcre/chartables.c svalue.h strfuns.h sent.h bytecode.h typedefs.h \
|
|
||||||
backend.h pcre/pcre.h port.h config.h pcre/config.h main.h hosts/unix.h \
|
|
||||||
hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
$(OBJ)/pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
$(OBJ)/pkg-pgsql.o : ../mudlib/sys/pgsql.h xalloc.h stdstrings.h simulate.h \
|
||||||
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
mstrings.h mapping.h main.h interpret.h instrs.h gcollect.h array.h \
|
||||||
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
actions.h pkg-pgsql.h my-alloca.h typedefs.h driver.h svalue.h \
|
||||||
|
@ -642,7 +616,7 @@ $(OBJ)/random.o : random.h driver.h port.h config.h hosts/unix.h \
|
||||||
|
|
||||||
$(OBJ)/regexp.o : main.h xalloc.h simulate.h interpret.h regexp.h driver.h \
|
$(OBJ)/regexp.o : main.h xalloc.h simulate.h interpret.h regexp.h driver.h \
|
||||||
typedefs.h svalue.h strfuns.h sent.h bytecode.h backend.h pkg-pcre.h \
|
typedefs.h svalue.h strfuns.h sent.h bytecode.h backend.h pkg-pcre.h \
|
||||||
port.h config.h pcre/pcre.h hosts/unix.h hosts/be/be.h machine.h
|
port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
$(OBJ)/sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
$(OBJ)/sha1.o : sha1.h my-stdint.h driver.h port.h config.h hosts/unix.h \
|
||||||
hosts/be/be.h machine.h
|
hosts/be/be.h machine.h
|
||||||
|
|
|
@ -325,11 +325,6 @@
|
||||||
*/
|
*/
|
||||||
#define USE_PCRE
|
#define USE_PCRE
|
||||||
|
|
||||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
|
||||||
* is disabled as a whole).
|
|
||||||
*/
|
|
||||||
#undef USE_BUILTIN_PCRE
|
|
||||||
|
|
||||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||||
*/
|
*/
|
||||||
#define USE_MCCP
|
#define USE_MCCP
|
||||||
|
|
|
@ -72,14 +72,14 @@ MFLAGS = "BINDIR=$(BINDIR)" "MUD_LIB=$(MUD_LIB)"
|
||||||
SRC = access_check.c actions.c array.c backend.c call_out.c closure.c comm.c \
|
SRC = access_check.c actions.c array.c backend.c call_out.c closure.c comm.c \
|
||||||
dumpstat.c ed.c efuns.c gcollect.c hash.c heartbeat.c interpret.c \
|
dumpstat.c ed.c efuns.c gcollect.c hash.c heartbeat.c interpret.c \
|
||||||
parser.c lex.c main.c mapping.c mempools.c object.c otable.c parse.c \
|
parser.c lex.c main.c mapping.c mempools.c object.c otable.c parse.c \
|
||||||
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pcre.c pkg-pgsql.c \
|
pkg-alists.c pkg-mccp.c pkg-mysql.c pkg-pgsql.c \
|
||||||
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c md5.c \
|
pkg-sqlite.c pkg-tls.c ptmalloc.c port.c ptrtable.c md5.c \
|
||||||
random.c regexp.c mregex.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
random.c regexp.c mregex.c sha1.c simulate.c simul_efun.c stdstrings.c \
|
||||||
stralloc.c strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
stralloc.c strfuns.c structs.c sprintf.c swap.c wiz_list.c xalloc.c
|
||||||
OBJ = access_check.o actions.o array.o backend.o call_out.o closure.o comm.o \
|
OBJ = access_check.o actions.o array.o backend.o call_out.o closure.o comm.o \
|
||||||
dumpstat.o ed.o efuns.o gcollect.o hash.o heartbeat.o interpret.o \
|
dumpstat.o ed.o efuns.o gcollect.o hash.o heartbeat.o interpret.o \
|
||||||
parser.o lex.o main.o mapping.o mempools.o object.o otable.o parse.o \
|
parser.o lex.o main.o mapping.o mempools.o object.o otable.o parse.o \
|
||||||
pkg-alists.o pkg-mccp.o pkg-mysql.o pkg-pcre.o pkg-pgsql.o \
|
pkg-alists.o pkg-mccp.o pkg-mysql.o pkg-pgsql.o \
|
||||||
pkg-sqlite.o pkg-tls.o ptmalloc.o port.o ptrtable.o md5.o\
|
pkg-sqlite.o pkg-tls.o ptmalloc.o port.o ptrtable.o md5.o\
|
||||||
random.o regexp.o mregex.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
random.o regexp.o mregex.o sha1.o simulate.o simul_efun.o stdstrings.o \
|
||||||
stralloc.o strfuns.o structs.o sprintf.o swap.o wiz_list.o xalloc.o
|
stralloc.o strfuns.o structs.o sprintf.o swap.o wiz_list.o xalloc.o
|
||||||
|
@ -106,7 +106,6 @@ lint: *.c
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc.exe
|
$(RM) $(YACCTAB)h $(YACCTAB)c make_func.c *.o mkfunc.exe
|
||||||
$(RM) dftables.exe pcre/chartables.c
|
|
||||||
$(RM) stdstrings.c stdstrings.h
|
$(RM) stdstrings.c stdstrings.h
|
||||||
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
$(RM) efun_defs.c instrs.h lang.y lang.h lang.c y.output tags TAGS
|
||||||
$(RM) parse core mudlib/core mudlib/debug.log lpmud.log ldmud
|
$(RM) parse core mudlib/core mudlib/debug.log lpmud.log ldmud
|
||||||
|
@ -134,12 +133,6 @@ mkfunc.o : mkfunc.c make_func.c driver.h config.h machine.h port.h
|
||||||
mkfunc: mkfunc.o hash.o exec.h
|
mkfunc: mkfunc.o hash.o exec.h
|
||||||
$(CC) $(OPTIMIZE) $(LDFLAGS) mkfunc.o hash.o -o mkfunc
|
$(CC) $(OPTIMIZE) $(LDFLAGS) mkfunc.o hash.o -o mkfunc
|
||||||
|
|
||||||
dftables.o : pcre/dftables.c pcre/maketables.c pcre/config.h
|
|
||||||
$(CC) $(CFLAGS) -c pcre/dftables.c -o dftables.o
|
|
||||||
|
|
||||||
dftables : dftables.o
|
|
||||||
$(CC) $(OPTIMIZE) (LDFLAGS) dftables.o -o dftables
|
|
||||||
|
|
||||||
lang.y: mkfunc prolang.y config.h
|
lang.y: mkfunc prolang.y config.h
|
||||||
$(RM) lang.y
|
$(RM) lang.y
|
||||||
./mkfunc lang
|
./mkfunc lang
|
||||||
|
@ -160,9 +153,6 @@ lang.c lang.h: lang.y
|
||||||
|
|
||||||
$(MV) $(YACCTAB)h lang.h
|
$(MV) $(YACCTAB)h lang.h
|
||||||
|
|
||||||
pcre/chartables.c : dftables
|
|
||||||
./dftables > pcre/chartables.c
|
|
||||||
|
|
||||||
#--------------------------------------------------------
|
#--------------------------------------------------------
|
||||||
# Dependencies, manual and automatic.
|
# Dependencies, manual and automatic.
|
||||||
|
|
||||||
|
@ -271,7 +261,7 @@ lex.o : efun_defs.c ../mudlib/sys/driver_hook.h xalloc.h wiz_list.h \
|
||||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
main.o : xalloc.h wiz_list.h swap.h svalue.h stdstrings.h simul_efun.h \
|
main.o : xalloc.h wiz_list.h swap.h svalue.h stdstrings.h simul_efun.h \
|
||||||
simulate.h rxcache.h random.h pcre/pcre.h patchlevel.h otable.h \
|
simulate.h rxcache.h random.h patchlevel.h otable.h \
|
||||||
object.h mstrings.h mapping.h lex.h interpret.h gcollect.h filestat.h \
|
object.h mstrings.h mapping.h lex.h interpret.h gcollect.h filestat.h \
|
||||||
comm.h array.h backend.h main.h my-alloca.h typedefs.h driver.h \
|
comm.h array.h backend.h main.h my-alloca.h typedefs.h driver.h \
|
||||||
ptrtable.h exec.h strfuns.h sent.h regexp.h instrs.h port.h config.h \
|
ptrtable.h exec.h strfuns.h sent.h regexp.h instrs.h port.h config.h \
|
||||||
|
@ -323,20 +313,6 @@ parser.o : lang.c pkg-alists.h ../mudlib/sys/driver_hook.h xalloc.h \
|
||||||
my-alloca.h typedefs.h driver.h ptrtable.h strfuns.h sent.h port.h \
|
my-alloca.h typedefs.h driver.h ptrtable.h strfuns.h sent.h port.h \
|
||||||
config.h hosts/unix.h hosts/be/be.h machine.h
|
config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
||||||
pcre-get.o : pcre/get.c driver.h pcre/internal.h port.h config.h \
|
|
||||||
pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
pcre-maketables.o : pcre/maketables.c driver.h pcre/internal.h port.h \
|
|
||||||
config.h pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
pcre-pcre.o : pcre/pcre.c driver.h pcre/chartables.c pcre/internal.h port.h \
|
|
||||||
config.h pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
pcre-study.o : pcre/study.c driver.h pcre/internal.h port.h config.h \
|
|
||||||
pcre/pcre.h pcre/config.h hosts/unix.h hosts/be/be.h machine.h
|
|
||||||
|
|
||||||
dftables.o : pcre/maketables.c pcre/internal.h pcre/pcre.h pcre/config.h
|
|
||||||
|
|
||||||
pkg-alists.o : xalloc.h svalue.h simulate.h mstrings.h main.h interpret.h \
|
pkg-alists.o : xalloc.h svalue.h simulate.h mstrings.h main.h interpret.h \
|
||||||
array.h my-alloca.h pkg-alists.h typedefs.h driver.h strfuns.h sent.h \
|
array.h my-alloca.h pkg-alists.h typedefs.h driver.h strfuns.h sent.h \
|
||||||
exec.h instrs.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
exec.h instrs.h port.h config.h hosts/unix.h hosts/be/be.h machine.h
|
||||||
|
|
|
@ -328,14 +328,8 @@
|
||||||
|
|
||||||
/* Define this if you want PCRE instead of traditional regexps.
|
/* Define this if you want PCRE instead of traditional regexps.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define USE_PCRE
|
#define USE_PCRE
|
||||||
|
|
||||||
/* Define this if you want to use builtin PCRE (ignored when PCRE
|
|
||||||
* is disabled as a whole).
|
|
||||||
*/
|
|
||||||
#undef USE_BUILTIN_PCRE
|
|
||||||
|
|
||||||
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
/* Define this if you want MCCP (Mud Control Compression Protocol).
|
||||||
*/
|
*/
|
||||||
#define USE_MCCP
|
#define USE_MCCP
|
||||||
|
|
|
@ -209,9 +209,6 @@ rx_pcre_version (void)
|
||||||
{
|
{
|
||||||
static char buf[40];
|
static char buf[40];
|
||||||
sprintf(buf, "%d.%d", PCRE_MAJOR, PCRE_MINOR);
|
sprintf(buf, "%d.%d", PCRE_MAJOR, PCRE_MINOR);
|
||||||
# ifdef USE_BUILTIN_PCRE
|
|
||||||
strcat(buf, " (builtin)");
|
|
||||||
# endif
|
|
||||||
return buf;
|
return buf;
|
||||||
} /* rx_pcre_version() */
|
} /* rx_pcre_version() */
|
||||||
|
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
|
||||||
Cambridge, England. Phone: +44 1223 334714.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
|
@ -1,54 +0,0 @@
|
||||||
PCRE LICENCE
|
|
||||||
------------
|
|
||||||
|
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
|
||||||
Cambridge, England. Phone: +44 1223 334714.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission. In practice, this means that if you use
|
|
||||||
PCRE in software that you distribute to others, commercially or
|
|
||||||
otherwise, you must put a sentence like this
|
|
||||||
|
|
||||||
Regular expression support is provided by the PCRE library package,
|
|
||||||
which is open source software, written by Philip Hazel, and copyright
|
|
||||||
by the University of Cambridge, England.
|
|
||||||
|
|
||||||
somewhere reasonably visible in your documentation and in any relevant
|
|
||||||
files or online help data or similar. A reference to the ftp site for
|
|
||||||
the source, that is, to
|
|
||||||
|
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
|
|
||||||
|
|
||||||
should also be given in the documentation. However, this condition is not
|
|
||||||
intended to apply to whole chains of software. If package A includes PCRE,
|
|
||||||
it must acknowledge it, but if package B is software that includes package
|
|
||||||
A, the condition is not imposed on package B (unless it uses PCRE
|
|
||||||
independently).
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
|
|
||||||
then the terms of that licence shall supersede any condition above with
|
|
||||||
which it is incompatible.
|
|
||||||
|
|
||||||
The documentation for PCRE, supplied in the "doc" directory, is distributed
|
|
||||||
under the same terms as the software itself.
|
|
||||||
|
|
||||||
End
|
|
1475
src/pcre/ChangeLog
1475
src/pcre/ChangeLog
File diff suppressed because it is too large
Load diff
|
@ -1,54 +0,0 @@
|
||||||
PCRE LICENCE
|
|
||||||
------------
|
|
||||||
|
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
|
||||||
Cambridge, England. Phone: +44 1223 334714.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission. In practice, this means that if you use
|
|
||||||
PCRE in software that you distribute to others, commercially or
|
|
||||||
otherwise, you must put a sentence like this
|
|
||||||
|
|
||||||
Regular expression support is provided by the PCRE library package,
|
|
||||||
which is open source software, written by Philip Hazel, and copyright
|
|
||||||
by the University of Cambridge, England.
|
|
||||||
|
|
||||||
somewhere reasonably visible in your documentation and in any relevant
|
|
||||||
files or online help data or similar. A reference to the ftp site for
|
|
||||||
the source, that is, to
|
|
||||||
|
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
|
|
||||||
|
|
||||||
should also be given in the documentation. However, this condition is not
|
|
||||||
intended to apply to whole chains of software. If package A includes PCRE,
|
|
||||||
it must acknowledge it, but if package B is software that includes package
|
|
||||||
A, the condition is not imposed on package B (unless it uses PCRE
|
|
||||||
independently).
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
|
|
||||||
then the terms of that licence shall supersede any condition above with
|
|
||||||
which it is incompatible.
|
|
||||||
|
|
||||||
The documentation for PCRE, supplied in the "doc" directory, is distributed
|
|
||||||
under the same terms as the software itself.
|
|
||||||
|
|
||||||
End
|
|
154
src/pcre/NEWS
154
src/pcre/NEWS
|
@ -1,154 +0,0 @@
|
||||||
News about PCRE releases
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Release 4.5 01-Dec-03
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
Again mainly a bug-fix and tidying release, with only a couple of new features:
|
|
||||||
|
|
||||||
1. It's possible now to compile PCRE so that it does not use recursive
|
|
||||||
function calls when matching. Instead it gets memory from the heap. This slows
|
|
||||||
things down, but may be necessary on systems with limited stacks.
|
|
||||||
|
|
||||||
2. UTF-8 string checking has been tightened to reject overlong sequences and to
|
|
||||||
check that a starting offset points to the start of a character. Failure of the
|
|
||||||
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
|
|
||||||
|
|
||||||
3. PCRE can now be compiled for systems that use EBCDIC code.
|
|
||||||
|
|
||||||
|
|
||||||
Release 4.4 21-Aug-03
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
|
|
||||||
checks UTF-8 strings for validity by default. There is an option to suppress
|
|
||||||
this, just in case anybody wants that teeny extra bit of performance.
|
|
||||||
|
|
||||||
|
|
||||||
Releases 4.1 - 4.3
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Sorry, I forgot about updating the NEWS file for these releases. Please take a
|
|
||||||
look at ChangeLog.
|
|
||||||
|
|
||||||
|
|
||||||
Release 4.0 17-Feb-03
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
There have been a lot of changes for the 4.0 release, adding additional
|
|
||||||
functionality and mending bugs. Below is a list of the highlights of the new
|
|
||||||
functionality. For full details of these features, please consult the
|
|
||||||
documentation. For a complete list of changes, see the ChangeLog file.
|
|
||||||
|
|
||||||
1. Support for Perl's \Q...\E escapes.
|
|
||||||
|
|
||||||
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
|
|
||||||
package. They provide some syntactic sugar for simple cases of "atomic
|
|
||||||
grouping".
|
|
||||||
|
|
||||||
3. Support for the \G assertion. It is true when the current matching position
|
|
||||||
is at the start point of the match.
|
|
||||||
|
|
||||||
4. A new feature that provides some of the functionality that Perl provides
|
|
||||||
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
|
|
||||||
is for the caller to provide an optional function, by setting pcre_callout to
|
|
||||||
its entry point. To get the function called, the regex must include (?C) at
|
|
||||||
appropriate points.
|
|
||||||
|
|
||||||
5. Support for recursive calls to individual subpatterns. This makes it really
|
|
||||||
easy to get totally confused.
|
|
||||||
|
|
||||||
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
|
|
||||||
name a group.
|
|
||||||
|
|
||||||
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
|
|
||||||
option for pcregrep to make it operate in UTF-8 mode.
|
|
||||||
|
|
||||||
8. The single man page has been split into a number of separate man pages.
|
|
||||||
These also give rise to individual HTML pages which are put in a separate
|
|
||||||
directory. There is an index.html page that lists them all. Some hyperlinking
|
|
||||||
between the pages has been installed.
|
|
||||||
|
|
||||||
|
|
||||||
Release 3.5 15-Aug-01
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
1. The configuring system has been upgraded to use later versions of autoconf
|
|
||||||
and libtool. By default it builds both a shared and a static library if the OS
|
|
||||||
supports it. You can use --disable-shared or --disable-static on the configure
|
|
||||||
command if you want only one of them.
|
|
||||||
|
|
||||||
2. The pcretest utility is now installed along with pcregrep because it is
|
|
||||||
useful for users (to test regexs) and by doing this, it automatically gets
|
|
||||||
relinked by libtool. The documentation has been turned into a man page, so
|
|
||||||
there are now .1, .txt, and .html versions in /doc.
|
|
||||||
|
|
||||||
3. Upgrades to pcregrep:
|
|
||||||
(i) Added long-form option names like gnu grep.
|
|
||||||
(ii) Added --help to list all options with an explanatory phrase.
|
|
||||||
(iii) Added -r, --recursive to recurse into sub-directories.
|
|
||||||
(iv) Added -f, --file to read patterns from a file.
|
|
||||||
|
|
||||||
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
|
|
||||||
script, to force use of CR or LF instead of \n in the source. On non-Unix
|
|
||||||
systems, the value can be set in config.h.
|
|
||||||
|
|
||||||
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
|
|
||||||
absolute limit. Changed the text of the error message to make this clear, and
|
|
||||||
likewise updated the man page.
|
|
||||||
|
|
||||||
6. The limit of 99 on the number of capturing subpatterns has been removed.
|
|
||||||
The new limit is 65535, which I hope will not be a "real" limit.
|
|
||||||
|
|
||||||
|
|
||||||
Release 3.3 01-Aug-00
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
There is some support for UTF-8 character strings. This is incomplete and
|
|
||||||
experimental. The documentation describes what is and what is not implemented.
|
|
||||||
Otherwise, this is just a bug-fixing release.
|
|
||||||
|
|
||||||
|
|
||||||
Release 3.0 01-Feb-00
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
1. A "configure" script is now used to configure PCRE for Unix systems. It
|
|
||||||
builds a Makefile, a config.h file, and the pcre-config script.
|
|
||||||
|
|
||||||
2. PCRE is built as a shared library by default.
|
|
||||||
|
|
||||||
3. There is support for POSIX classes such as [:alpha:].
|
|
||||||
|
|
||||||
5. There is an experimental recursion feature.
|
|
||||||
|
|
||||||
----------------------------------------------------------------------------
|
|
||||||
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
|
|
||||||
|
|
||||||
Please note that there has been a change in the API such that a larger
|
|
||||||
ovector is required at matching time, to provide some additional workspace.
|
|
||||||
The new man page has details. This change was necessary in order to support
|
|
||||||
some of the new functionality in Perl 5.005.
|
|
||||||
|
|
||||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
|
|
||||||
|
|
||||||
Another (I hope this is the last!) change has been made to the API for the
|
|
||||||
pcre_compile() function. An additional argument has been added to make it
|
|
||||||
possible to pass over a pointer to character tables built in the current
|
|
||||||
locale by pcre_maketables(). To use the default tables, this new arguement
|
|
||||||
should be passed as NULL.
|
|
||||||
|
|
||||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
|
||||||
|
|
||||||
Yet another (and again I hope this really is the last) change has been made
|
|
||||||
to the API for the pcre_exec() function. An additional argument has been
|
|
||||||
added to make it possible to start the match other than at the start of the
|
|
||||||
subject string. This is important if there are lookbehinds. The new man
|
|
||||||
page has the details, but you just want to convert existing programs, all
|
|
||||||
you need to do is to stick in a new fifth argument to pcre_exec(), with a
|
|
||||||
value of zero. For example, change
|
|
||||||
|
|
||||||
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
|
|
||||||
to
|
|
||||||
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
|
|
||||||
|
|
||||||
****
|
|
365
src/pcre/README
365
src/pcre/README
|
@ -1,365 +0,0 @@
|
||||||
README file for PCRE (Perl-compatible regular expression library)
|
|
||||||
-----------------------------------------------------------------
|
|
||||||
|
|
||||||
The latest release of PCRE is always available from
|
|
||||||
|
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
|
||||||
|
|
||||||
Please read the NEWS file if you are upgrading from a previous release.
|
|
||||||
|
|
||||||
PCRE has its own native API, but a set of "wrapper" functions that are based on
|
|
||||||
the POSIX API are also supplied in the library libpcreposix. Note that this
|
|
||||||
just provides a POSIX calling interface to PCRE: the regular expressions
|
|
||||||
themselves still follow Perl syntax and semantics. The header file
|
|
||||||
for the POSIX-style functions is called pcreposix.h. The official POSIX name is
|
|
||||||
regex.h, but I didn't want to risk possible problems with existing files of
|
|
||||||
that name by distributing it that way. To use it with an existing program that
|
|
||||||
uses the POSIX API, it will have to be renamed or pointed at by a link.
|
|
||||||
|
|
||||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
|
||||||
library installed on your system, you must take care when linking programs to
|
|
||||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
|
||||||
up the "real" POSIX functions of the same name.
|
|
||||||
|
|
||||||
|
|
||||||
Contributions by users of PCRE
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
You can find contributions from PCRE users in the directory
|
|
||||||
|
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
|
||||||
|
|
||||||
where there is also a README file giving brief descriptions of what they are.
|
|
||||||
Several of them provide support for compiling PCRE on various flavours of
|
|
||||||
Windows systems (I myself do not use Windows). Some are complete in themselves;
|
|
||||||
others are pointers to URLs containing relevant files.
|
|
||||||
|
|
||||||
|
|
||||||
Building PCRE on a Unix-like system
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
|
||||||
PCRE distribution directory, with your current directory set to the directory
|
|
||||||
where you want the files to be created. This command is a standard GNU
|
|
||||||
"autoconf" configuration script, for which generic instructions are supplied in
|
|
||||||
INSTALL.
|
|
||||||
|
|
||||||
Most commonly, people build PCRE within its own distribution directory, and in
|
|
||||||
this case, on many systems, just running "./configure" is sufficient, but the
|
|
||||||
usual methods of changing standard defaults are available. For example,
|
|
||||||
|
|
||||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
|
||||||
|
|
||||||
specifies that the C compiler should be run with the flags '-O2 -Wall' instead
|
|
||||||
of the default, and that "make install" should install PCRE under /opt/local
|
|
||||||
instead of the default /usr/local.
|
|
||||||
|
|
||||||
If you want to build in a different directory, just run "configure" with that
|
|
||||||
directory as current. For example, suppose you have unpacked the PCRE source
|
|
||||||
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:
|
|
||||||
|
|
||||||
cd /build/pcre/pcre-xxx
|
|
||||||
/source/pcre/pcre-xxx/configure
|
|
||||||
|
|
||||||
There are some optional features that can be included or omitted from the PCRE
|
|
||||||
library. You can read more about them in the pcrebuild man page.
|
|
||||||
|
|
||||||
. If you want to make use of the support for UTF-8 character strings in PCRE,
|
|
||||||
you must add --enable-utf8 to the "configure" command. Without it, the code
|
|
||||||
for handling UTF-8 is not included in the library. (Even when included, it
|
|
||||||
still has to be enabled by an option at run time.)
|
|
||||||
|
|
||||||
. You can build PCRE to recognized CR or NL as the newline character, instead
|
|
||||||
of whatever your compiler uses for "\n", by adding --newline-is-cr or
|
|
||||||
--newline-is-nl to the "configure" command, respectively. Only do this if you
|
|
||||||
really understand what you are doing. On traditional Unix-like systems, the
|
|
||||||
newline character is NL.
|
|
||||||
|
|
||||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
|
||||||
storage for processing capturing parentheses if there are more than 10 of
|
|
||||||
them. You can increase this threshold by setting, for example,
|
|
||||||
|
|
||||||
--with-posix-malloc-threshold=20
|
|
||||||
|
|
||||||
on the "configure" command.
|
|
||||||
|
|
||||||
. PCRE has a counter which can be set to limit the amount of resources it uses.
|
|
||||||
If the limit is exceeded during a match, the match fails. The default is ten
|
|
||||||
million. You can change the default by setting, for example,
|
|
||||||
|
|
||||||
--with-match-limit=500000
|
|
||||||
|
|
||||||
on the "configure" command. This is just the default; individual calls to
|
|
||||||
pcre_exec() can supply their own value. There is discussion on the pcreapi
|
|
||||||
man page.
|
|
||||||
|
|
||||||
. The default maximum compiled pattern size is around 64K. You can increase
|
|
||||||
this by adding --with-link-size=3 to the "configure" command. You can
|
|
||||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
|
||||||
ever to be necessary. If you build PCRE with an increased link size, test 2
|
|
||||||
(and 5 if you are using UTF-8) will fail. Part of the output of these tests
|
|
||||||
is a representation of the compiled pattern, and this changes with the link
|
|
||||||
size.
|
|
||||||
|
|
||||||
. You can build PCRE so that its match() function does not call itself
|
|
||||||
recursively. Instead, it uses blocks of data from the heap via special
|
|
||||||
functions pcre_stack_malloc() and pcre_stack_free() to save data that would
|
|
||||||
otherwise be saved on the stack. To build PCRE like this, use
|
|
||||||
|
|
||||||
--disable-stack-for-recursion
|
|
||||||
|
|
||||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
|
||||||
necessary in environments with limited stack sizes.
|
|
||||||
|
|
||||||
The "configure" script builds five files:
|
|
||||||
|
|
||||||
. libtool is a script that builds shared and/or static libraries
|
|
||||||
. Makefile is built by copying Makefile.in and making substitutions.
|
|
||||||
. config.h is built by copying config.in and making substitutions.
|
|
||||||
. pcre-config is built by copying pcre-config.in and making substitutions.
|
|
||||||
. RunTest is a script for running tests
|
|
||||||
|
|
||||||
Once "configure" has run, you can run "make". It builds two libraries called
|
|
||||||
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
|
|
||||||
command. You can use "make install" to copy these, the public header files
|
|
||||||
pcre.h and pcreposix.h, and the man pages to appropriate live directories on
|
|
||||||
your system, in the normal way.
|
|
||||||
|
|
||||||
Running "make install" also installs the command pcre-config, which can be used
|
|
||||||
to recall information about the PCRE configuration and installation. For
|
|
||||||
example,
|
|
||||||
|
|
||||||
pcre-config --version
|
|
||||||
|
|
||||||
prints the version number, and
|
|
||||||
|
|
||||||
pcre-config --libs
|
|
||||||
|
|
||||||
outputs information about where the library is installed. This command can be
|
|
||||||
included in makefiles for programs that use PCRE, saving the programmer from
|
|
||||||
having to remember too many details.
|
|
||||||
|
|
||||||
|
|
||||||
Shared libraries on Unix-like systems
|
|
||||||
-------------------------------------
|
|
||||||
|
|
||||||
The default distribution builds PCRE as two shared libraries and two static
|
|
||||||
libraries, as long as the operating system supports shared libraries. Shared
|
|
||||||
library support relies on the "libtool" script which is built as part of the
|
|
||||||
"configure" process.
|
|
||||||
|
|
||||||
The libtool script is used to compile and link both shared and static
|
|
||||||
libraries. They are placed in a subdirectory called .libs when they are newly
|
|
||||||
built. The programs pcretest and pcregrep are built to use these uninstalled
|
|
||||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
|
||||||
you use "make install" to install shared libraries, pcregrep and pcretest are
|
|
||||||
automatically re-built to use the newly installed shared libraries before being
|
|
||||||
installed themselves. However, the versions left in the source directory still
|
|
||||||
use the uninstalled libraries.
|
|
||||||
|
|
||||||
To build PCRE using static libraries only you must use --disable-shared when
|
|
||||||
configuring it. For example
|
|
||||||
|
|
||||||
./configure --prefix=/usr/gnu --disable-shared
|
|
||||||
|
|
||||||
Then run "make" in the usual way. Similarly, you can use --disable-static to
|
|
||||||
build only shared libraries.
|
|
||||||
|
|
||||||
|
|
||||||
Cross-compiling on a Unix-like system
|
|
||||||
-------------------------------------
|
|
||||||
|
|
||||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
|
||||||
order to cross-compile PCRE for some other host. However, during the building
|
|
||||||
process, the dftables.c source file is compiled *and run* on the local host, in
|
|
||||||
order to generate the default character tables (the chartables.c file). It
|
|
||||||
therefore needs to be compiled with the local compiler, not the cross compiler.
|
|
||||||
You can do this by specifying CC_FOR_BUILD (and if necessary CFLAGS_FOR_BUILD)
|
|
||||||
when calling the "configure" command. If they are not specified, they default
|
|
||||||
to the values of CC and CFLAGS.
|
|
||||||
|
|
||||||
|
|
||||||
Building on non-Unix systems
|
|
||||||
----------------------------
|
|
||||||
|
|
||||||
For a non-Unix system, read the comments in the file NON-UNIX-USE, though if
|
|
||||||
the system supports the use of "configure" and "make" you may be able to build
|
|
||||||
PCRE in the same way as for Unix systems.
|
|
||||||
|
|
||||||
PCRE has been compiled on Windows systems and on Macintoshes, but I don't know
|
|
||||||
the details because I don't use those systems. It should be straightforward to
|
|
||||||
build PCRE on any system that has a Standard C compiler, because it uses only
|
|
||||||
Standard C functions.
|
|
||||||
|
|
||||||
|
|
||||||
Testing PCRE
|
|
||||||
------------
|
|
||||||
|
|
||||||
To test PCRE on a Unix system, run the RunTest script that is created by the
|
|
||||||
configuring process. (This can also be run by "make runtest", "make check", or
|
|
||||||
"make test".) For other systems, see the instructions in NON-UNIX-USE.
|
|
||||||
|
|
||||||
The script runs the pcretest test program (which is documented in its own man
|
|
||||||
page) on each of the testinput files (in the testdata directory) in turn,
|
|
||||||
and compares the output with the contents of the corresponding testoutput file.
|
|
||||||
A file called testtry is used to hold the output from pcretest. To run pcretest
|
|
||||||
on just one of the test files, give its number as an argument to RunTest, for
|
|
||||||
example:
|
|
||||||
|
|
||||||
RunTest 2
|
|
||||||
|
|
||||||
The first file can also be fed directly into the perltest script to check that
|
|
||||||
Perl gives the same results. The only difference you should see is in the first
|
|
||||||
few lines, where the Perl version is given instead of the PCRE version.
|
|
||||||
|
|
||||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
|
||||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
|
||||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
|
||||||
wrapper API. It also uses the debugging flag to check some of the internals of
|
|
||||||
pcre_compile().
|
|
||||||
|
|
||||||
If you build PCRE with a locale setting that is not the standard C locale, the
|
|
||||||
character tables may be different (see next paragraph). In some cases, this may
|
|
||||||
cause failures in the second set of tests. For example, in a locale where the
|
|
||||||
isprint() function yields TRUE for characters in the range 128-255, the use of
|
|
||||||
[:isascii:] inside a character class defines a different set of characters, and
|
|
||||||
this shows up in this test as a difference in the compiled code, which is being
|
|
||||||
listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
|
||||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
|
||||||
bug in PCRE.
|
|
||||||
|
|
||||||
The third set of tests checks pcre_maketables(), the facility for building a
|
|
||||||
set of character tables for a specific locale and using them instead of the
|
|
||||||
default tables. The tests make use of the "fr_FR" (French) locale. Before
|
|
||||||
running the test, the script checks for the presence of this locale by running
|
|
||||||
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
|
|
||||||
in the list of available locales, the third test cannot be run, and a comment
|
|
||||||
is output to say why. If running this test produces instances of the error
|
|
||||||
|
|
||||||
** Failed to set locale "fr_FR"
|
|
||||||
|
|
||||||
in the comparison output, it means that locale is not available on your system,
|
|
||||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
|
||||||
|
|
||||||
The fourth test checks the UTF-8 support. It is not run automatically unless
|
|
||||||
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
|
|
||||||
running "configure". This file can be also fed directly to the perltest script,
|
|
||||||
provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch,
|
|
||||||
commented in the script, can be be used.)
|
|
||||||
|
|
||||||
The fifth and final file tests error handling with UTF-8 encoding, and internal
|
|
||||||
UTF-8 features of PCRE that are not relevant to Perl.
|
|
||||||
|
|
||||||
|
|
||||||
Character tables
|
|
||||||
----------------
|
|
||||||
|
|
||||||
PCRE uses four tables for manipulating and identifying characters. The final
|
|
||||||
argument of the pcre_compile() function is a pointer to a block of memory
|
|
||||||
containing the concatenated tables. A call to pcre_maketables() can be used to
|
|
||||||
generate a set of tables in the current locale. If the final argument for
|
|
||||||
pcre_compile() is passed as NULL, a set of default tables that is built into
|
|
||||||
the binary is used.
|
|
||||||
|
|
||||||
The source file called chartables.c contains the default set of tables. This is
|
|
||||||
not supplied in the distribution, but is built by the program dftables
|
|
||||||
(compiled from dftables.c), which uses the ANSI C character handling functions
|
|
||||||
such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table
|
|
||||||
sources. This means that the default C locale which is set for your system will
|
|
||||||
control the contents of these default tables. You can change the default tables
|
|
||||||
by editing chartables.c and then re-building PCRE. If you do this, you should
|
|
||||||
probably also edit Makefile to ensure that the file doesn't ever get
|
|
||||||
re-generated.
|
|
||||||
|
|
||||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
|
||||||
respectively. The next table consists of three 32-byte bit maps which identify
|
|
||||||
digits, "word" characters, and white space, respectively. These are used when
|
|
||||||
building 32-byte bit maps that represent character classes.
|
|
||||||
|
|
||||||
The final 256-byte table has bits indicating various character types, as
|
|
||||||
follows:
|
|
||||||
|
|
||||||
1 white space character
|
|
||||||
2 letter
|
|
||||||
4 decimal digit
|
|
||||||
8 hexadecimal digit
|
|
||||||
16 alphanumeric or '_'
|
|
||||||
128 regular expression metacharacter or binary zero
|
|
||||||
|
|
||||||
You should not alter the set of characters that contain the 128 bit, as that
|
|
||||||
will cause PCRE to malfunction.
|
|
||||||
|
|
||||||
|
|
||||||
Manifest
|
|
||||||
--------
|
|
||||||
|
|
||||||
The distribution should contain the following files:
|
|
||||||
|
|
||||||
(A) The actual source files of the PCRE library functions and their
|
|
||||||
headers:
|
|
||||||
|
|
||||||
dftables.c auxiliary program for building chartables.c
|
|
||||||
get.c )
|
|
||||||
maketables.c )
|
|
||||||
study.c ) source of
|
|
||||||
pcre.c ) the functions
|
|
||||||
pcreposix.c )
|
|
||||||
printint.c )
|
|
||||||
pcre.in "source" for the header for the external API; pcre.h
|
|
||||||
is built from this by "configure"
|
|
||||||
pcreposix.h header for the external POSIX wrapper API
|
|
||||||
internal.h header for internal use
|
|
||||||
config.in template for config.h, which is built by configure
|
|
||||||
|
|
||||||
(B) Auxiliary files:
|
|
||||||
|
|
||||||
AUTHORS information about the author of PCRE
|
|
||||||
ChangeLog log of changes to the code
|
|
||||||
INSTALL generic installation instructions
|
|
||||||
LICENCE conditions for the use of PCRE
|
|
||||||
COPYING the same, using GNU's standard name
|
|
||||||
Makefile.in template for Unix Makefile, which is built by configure
|
|
||||||
NEWS important changes in this release
|
|
||||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
|
||||||
README this file
|
|
||||||
RunTest.in template for a Unix shell script for running tests
|
|
||||||
config.guess ) files used by libtool,
|
|
||||||
config.sub ) used only when building a shared library
|
|
||||||
configure a configuring shell script (built by autoconf)
|
|
||||||
configure.in the autoconf input used to build configure
|
|
||||||
doc/Tech.Notes notes on the encoding
|
|
||||||
doc/*.3 man page sources for the PCRE functions
|
|
||||||
doc/*.1 man page sources for pcregrep and pcretest
|
|
||||||
doc/html/* HTML documentation
|
|
||||||
doc/pcre.txt plain text version of the man pages
|
|
||||||
doc/pcretest.txt plain text documentation of test program
|
|
||||||
doc/perltest.txt plain text documentation of Perl test program
|
|
||||||
install-sh a shell script for installing files
|
|
||||||
ltmain.sh file used to build a libtool script
|
|
||||||
pcretest.c comprehensive test program
|
|
||||||
pcredemo.c simple demonstration of coding calls to PCRE
|
|
||||||
perltest Perl test program
|
|
||||||
pcregrep.c source of a grep utility that uses PCRE
|
|
||||||
pcre-config.in source of script which retains PCRE information
|
|
||||||
testdata/testinput1 test data, compatible with Perl
|
|
||||||
testdata/testinput2 test data for error messages and non-Perl things
|
|
||||||
testdata/testinput3 test data for locale-specific tests
|
|
||||||
testdata/testinput4 test data for UTF-8 tests compatible with Perl
|
|
||||||
testdata/testinput5 test data for other UTF-8 tests
|
|
||||||
testdata/testoutput1 test results corresponding to testinput1
|
|
||||||
testdata/testoutput2 test results corresponding to testinput2
|
|
||||||
testdata/testoutput3 test results corresponding to testinput3
|
|
||||||
testdata/testoutput4 test results corresponding to testinput4
|
|
||||||
testdata/testoutput5 test results corresponding to testinput5
|
|
||||||
|
|
||||||
(C) Auxiliary files for Win32 DLL
|
|
||||||
|
|
||||||
dll.mk
|
|
||||||
pcre.def
|
|
||||||
|
|
||||||
(D) Auxiliary file for VPASCAL
|
|
||||||
|
|
||||||
makevp.bat
|
|
||||||
|
|
||||||
Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
December 2003
|
|
|
@ -1,6 +0,0 @@
|
||||||
This is PCRE, stripped down to what is required by the LDMud gamedriver.
|
|
||||||
The files README and COPYING tell you where to get the complete package.
|
|
||||||
|
|
||||||
The file internal.h was modified so that it is protected against multiple
|
|
||||||
inclusion.
|
|
||||||
|
|
|
@ -1,107 +0,0 @@
|
||||||
|
|
||||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
|
||||||
written in Standard C, but there are a few non-standard things it can cope
|
|
||||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
|
||||||
|
|
||||||
On a non-Unix system you should just copy this file into config.h, and set up
|
|
||||||
the macros the way you need them. You should normally change the definitions of
|
|
||||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
|
||||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
|
||||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
|
||||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
|
||||||
function will be used. */
|
|
||||||
|
|
||||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
|
||||||
character codes, define this macro as 1. On systems that can use "configure",
|
|
||||||
this can be done via --enable-ebcdic. */
|
|
||||||
|
|
||||||
#ifndef EBCDIC
|
|
||||||
#define EBCDIC 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* If you are compiling for a system that needs some magic to be inserted
|
|
||||||
before the definition of an exported function, define this macro to contain the
|
|
||||||
relevant magic. It apears at the start of every exported function. */
|
|
||||||
|
|
||||||
#define EXPORT
|
|
||||||
|
|
||||||
/* Define to empty if the "const" keyword does not work. */
|
|
||||||
|
|
||||||
/* #undef const */
|
|
||||||
|
|
||||||
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
|
|
||||||
|
|
||||||
/* #undef size_t */
|
|
||||||
|
|
||||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
|
||||||
doesn't have the strerror() or memmove() functions that should be present in
|
|
||||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
|
||||||
normally be defined with the value 1 for other systems, but unfortunately we
|
|
||||||
can't make this the default because "configure" files generated by autoconf
|
|
||||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
|
||||||
found. */
|
|
||||||
|
|
||||||
#define HAVE_STRERROR 1
|
|
||||||
#define HAVE_MEMMOVE 1
|
|
||||||
|
|
||||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
|
||||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
|
||||||
HAVE_BCOPY is not relevant. */
|
|
||||||
|
|
||||||
#define HAVE_BCOPY 1
|
|
||||||
|
|
||||||
/* The value of NEWLINE determines the newline character. The default is to
|
|
||||||
leave it up to the compiler, but some sites want to force a particular value.
|
|
||||||
On Unix systems, "configure" can be used to override this default. */
|
|
||||||
|
|
||||||
#ifndef NEWLINE
|
|
||||||
#define NEWLINE '\n'
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
|
||||||
links as offsets within the compiled regex. The default is 2, which allows for
|
|
||||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
|
||||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
|
||||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
|
||||||
override this default. */
|
|
||||||
|
|
||||||
#ifndef LINK_SIZE
|
|
||||||
#define LINK_SIZE 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
|
||||||
function can be called during a single execution of pcre_exec(). (There is a
|
|
||||||
runtime method of setting a different limit.) The limit exists in order to
|
|
||||||
catch runaway regular expressions that take for ever to determine that they do
|
|
||||||
not match. The default is set very large so that it does not accidentally catch
|
|
||||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
|
||||||
default default. */
|
|
||||||
|
|
||||||
#ifndef MATCH_LIMIT
|
|
||||||
#define MATCH_LIMIT 10000000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
|
||||||
required for holding the pointers to capturing substrings because PCRE requires
|
|
||||||
three integers per substring, whereas the POSIX interface provides only two. If
|
|
||||||
the number of expected substrings is small, the wrapper function uses space on
|
|
||||||
the stack, because this is faster than using malloc() for each call. The
|
|
||||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
|
||||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef POSIX_MALLOC_THRESHOLD
|
|
||||||
#define POSIX_MALLOC_THRESHOLD 10
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
|
||||||
This can sometimes be a problem on systems that have stacks of limited size.
|
|
||||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
|
||||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
|
||||||
to get memory. For more detail, see comments and other stuff just above the
|
|
||||||
match() function. On Unix systems, "configure" can be used to set this in the
|
|
||||||
Makefile (use --disable-recursion). */
|
|
||||||
|
|
||||||
/* #define NO_RECURSE */
|
|
||||||
|
|
||||||
/* End */
|
|
|
@ -1,167 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
See the file Tech.Notes for some information on the internals.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This is a support program to generate the file chartables.c, containing
|
|
||||||
character tables of various kinds. They are built according to the default C
|
|
||||||
locale and used as the default tables by PCRE. Now that pcre_maketables is
|
|
||||||
a function visible to the outside world, we make use of its code from here in
|
|
||||||
order to be consistent. */
|
|
||||||
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
#define DFTABLES /* maketables.c notices this */
|
|
||||||
#include "maketables.c"
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
FILE *f;
|
|
||||||
const unsigned char *tables = pcre_maketables();
|
|
||||||
|
|
||||||
if (argc != 2)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
f = fopen(argv[1], "w");
|
|
||||||
if (f == NULL)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
|
||||||
about the very long string otherwise. */
|
|
||||||
|
|
||||||
fprintf(f,
|
|
||||||
"/*************************************************\n"
|
|
||||||
"* Perl-Compatible Regular Expressions *\n"
|
|
||||||
"*************************************************/\n\n"
|
|
||||||
"/* This file is automatically written by the dftables auxiliary \n"
|
|
||||||
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
|
||||||
"prevent its ever being regenerated.\n\n");
|
|
||||||
fprintf(f,
|
|
||||||
"This file is #included in the compilation of pcre.c to build the default\n"
|
|
||||||
"character tables which are used when no tables are passed to the compile\n"
|
|
||||||
"function. */\n\n"
|
|
||||||
"static unsigned char pcre_default_tables[] = {\n\n"
|
|
||||||
"/* This table is a lower casing table. */\n\n");
|
|
||||||
|
|
||||||
fprintf(f, " ");
|
|
||||||
for (i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
|
||||||
fprintf(f, "%3d", *tables++);
|
|
||||||
if (i != 255) fprintf(f, ",");
|
|
||||||
}
|
|
||||||
fprintf(f, ",\n\n");
|
|
||||||
|
|
||||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
|
||||||
|
|
||||||
fprintf(f, " ");
|
|
||||||
for (i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
|
||||||
fprintf(f, "%3d", *tables++);
|
|
||||||
if (i != 255) fprintf(f, ",");
|
|
||||||
}
|
|
||||||
fprintf(f, ",\n\n");
|
|
||||||
|
|
||||||
fprintf(f,
|
|
||||||
"/* This table contains bit maps for various character classes.\n"
|
|
||||||
"Each map is 32 bytes long and the bits run from the least\n"
|
|
||||||
"significant end of each byte. The classes that have their own\n"
|
|
||||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
|
||||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
|
||||||
|
|
||||||
fprintf(f, " ");
|
|
||||||
for (i = 0; i < cbit_length; i++)
|
|
||||||
{
|
|
||||||
if ((i & 7) == 0 && i != 0)
|
|
||||||
{
|
|
||||||
if ((i & 31) == 0) fprintf(f, "\n");
|
|
||||||
fprintf(f, "\n ");
|
|
||||||
}
|
|
||||||
fprintf(f, "0x%02x", *tables++);
|
|
||||||
if (i != cbit_length - 1) fprintf(f, ",");
|
|
||||||
}
|
|
||||||
fprintf(f, ",\n\n");
|
|
||||||
|
|
||||||
fprintf(f,
|
|
||||||
"/* This table identifies various classes of character by individual bits:\n"
|
|
||||||
" 0x%02x white space character\n"
|
|
||||||
" 0x%02x letter\n"
|
|
||||||
" 0x%02x decimal digit\n"
|
|
||||||
" 0x%02x hexadecimal digit\n"
|
|
||||||
" 0x%02x alphanumeric or '_'\n"
|
|
||||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
|
||||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
|
||||||
ctype_meta);
|
|
||||||
|
|
||||||
fprintf(f, " ");
|
|
||||||
for (i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
if ((i & 7) == 0 && i != 0)
|
|
||||||
{
|
|
||||||
fprintf(f, " /* ");
|
|
||||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
|
||||||
else fprintf(f, "%3d-", i-8);
|
|
||||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
|
||||||
else fprintf(f, "%3d", i-1);
|
|
||||||
fprintf(f, " */\n ");
|
|
||||||
}
|
|
||||||
fprintf(f, "0x%02x", *tables++);
|
|
||||||
if (i != 255) fprintf(f, ",");
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(f, "};/* ");
|
|
||||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
|
||||||
else fprintf(f, "%3d-", i-8);
|
|
||||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
|
||||||
else fprintf(f, "%3d", i-1);
|
|
||||||
fprintf(f, " */\n\n/* End of chartables.c */\n");
|
|
||||||
|
|
||||||
fclose(f);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of dftables.c */
|
|
349
src/pcre/get.c
349
src/pcre/get.c
|
@ -1,349 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
This is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This module contains some convenience functions for extracting substrings
|
|
||||||
from the subject string after a regex match has succeeded. The original idea
|
|
||||||
for these functions came from Scott Wimer <scottw@cgibuilder.com>. */
|
|
||||||
|
|
||||||
|
|
||||||
/* Include the internals header, which itself includes Standard C headers plus
|
|
||||||
the external pcre header. */
|
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Find number for named string *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function is used by the two extraction functions below, as well
|
|
||||||
as being generally available.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
code the compiled regex
|
|
||||||
stringname the name whose number is required
|
|
||||||
|
|
||||||
Returns: the number of the named parentheses, or a negative number
|
|
||||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
int entrysize;
|
|
||||||
int top, bot;
|
|
||||||
uschar *nametable;
|
|
||||||
|
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
|
||||||
return rc;
|
|
||||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
|
||||||
|
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
|
||||||
return rc;
|
|
||||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
|
||||||
return rc;
|
|
||||||
|
|
||||||
bot = 0;
|
|
||||||
while (top > bot)
|
|
||||||
{
|
|
||||||
int mid = (top + bot) / 2;
|
|
||||||
uschar *entry = nametable + entrysize*mid;
|
|
||||||
int c = strcmp(stringname, (char *)(entry + 2));
|
|
||||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
|
||||||
if (c > 0) bot = mid + 1; else top = mid;
|
|
||||||
}
|
|
||||||
|
|
||||||
return PCRE_ERROR_NOSUBSTRING;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Copy captured string to given buffer *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function copies a single captured substring into a given buffer.
|
|
||||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
|
||||||
in the string.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
subject the subject string that was matched
|
|
||||||
ovector pointer to the offsets table
|
|
||||||
stringcount the number of substrings that were captured
|
|
||||||
(i.e. the yield of the pcre_exec call, unless
|
|
||||||
that was zero, in which case it should be 1/3
|
|
||||||
of the offset table size)
|
|
||||||
stringnumber the number of the required substring
|
|
||||||
buffer where to put the substring
|
|
||||||
size the size of the buffer
|
|
||||||
|
|
||||||
Returns: if successful:
|
|
||||||
the length of the copied string, not including the zero
|
|
||||||
that is put on the end; can be zero
|
|
||||||
if not successful:
|
|
||||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
|
||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
|
||||||
int stringnumber, char *buffer, int size)
|
|
||||||
{
|
|
||||||
int yield;
|
|
||||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
|
||||||
return PCRE_ERROR_NOSUBSTRING;
|
|
||||||
stringnumber *= 2;
|
|
||||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
|
||||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
|
||||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
|
||||||
buffer[yield] = 0;
|
|
||||||
return yield;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Copy named captured string to given buffer *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function copies a single captured substring into a given buffer,
|
|
||||||
identifying it by name.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
code the compiled regex
|
|
||||||
subject the subject string that was matched
|
|
||||||
ovector pointer to the offsets table
|
|
||||||
stringcount the number of substrings that were captured
|
|
||||||
(i.e. the yield of the pcre_exec call, unless
|
|
||||||
that was zero, in which case it should be 1/3
|
|
||||||
of the offset table size)
|
|
||||||
stringname the name of the required substring
|
|
||||||
buffer where to put the substring
|
|
||||||
size the size of the buffer
|
|
||||||
|
|
||||||
Returns: if successful:
|
|
||||||
the length of the copied string, not including the zero
|
|
||||||
that is put on the end; can be zero
|
|
||||||
if not successful:
|
|
||||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
|
||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
|
||||||
int stringcount, const char *stringname, char *buffer, int size)
|
|
||||||
{
|
|
||||||
int n = pcre_get_stringnumber(code, stringname);
|
|
||||||
if (n <= 0) return n;
|
|
||||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Copy all captured strings to new store *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function gets one chunk of store and builds a list of pointers and all
|
|
||||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
subject the subject string that was matched
|
|
||||||
ovector pointer to the offsets table
|
|
||||||
stringcount the number of substrings that were captured
|
|
||||||
(i.e. the yield of the pcre_exec call, unless
|
|
||||||
that was zero, in which case it should be 1/3
|
|
||||||
of the offset table size)
|
|
||||||
listptr set to point to the list of pointers
|
|
||||||
|
|
||||||
Returns: if successful: 0
|
|
||||||
if not successful:
|
|
||||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
|
||||||
const char ***listptr)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int size = sizeof(char *);
|
|
||||||
int double_count = stringcount * 2;
|
|
||||||
char **stringlist;
|
|
||||||
char *p;
|
|
||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
|
||||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
|
||||||
|
|
||||||
stringlist = (char **)(pcre_malloc)(size);
|
|
||||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
|
||||||
|
|
||||||
*listptr = (const char **)stringlist;
|
|
||||||
p = (char *)(stringlist + stringcount + 1);
|
|
||||||
|
|
||||||
for (i = 0; i < double_count; i += 2)
|
|
||||||
{
|
|
||||||
int len = ovector[i+1] - ovector[i];
|
|
||||||
memcpy(p, subject + ovector[i], len);
|
|
||||||
*stringlist++ = p;
|
|
||||||
p += len;
|
|
||||||
*p++ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
*stringlist = NULL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Free store obtained by get_substring_list *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function exists for the benefit of people calling PCRE from non-C
|
|
||||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
|
||||||
|
|
||||||
Argument: the result of a previous pcre_get_substring_list()
|
|
||||||
Returns: nothing
|
|
||||||
*/
|
|
||||||
|
|
||||||
void
|
|
||||||
pcre_free_substring_list(const char **pointer)
|
|
||||||
{
|
|
||||||
(pcre_free)((void *)pointer);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Copy captured string to new store *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function copies a single captured substring into a piece of new
|
|
||||||
store
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
subject the subject string that was matched
|
|
||||||
ovector pointer to the offsets table
|
|
||||||
stringcount the number of substrings that were captured
|
|
||||||
(i.e. the yield of the pcre_exec call, unless
|
|
||||||
that was zero, in which case it should be 1/3
|
|
||||||
of the offset table size)
|
|
||||||
stringnumber the number of the required substring
|
|
||||||
stringptr where to put a pointer to the substring
|
|
||||||
|
|
||||||
Returns: if successful:
|
|
||||||
the length of the string, not including the zero that
|
|
||||||
is put on the end; can be zero
|
|
||||||
if not successful:
|
|
||||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
|
||||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
|
||||||
int stringnumber, const char **stringptr)
|
|
||||||
{
|
|
||||||
int yield;
|
|
||||||
char *substring;
|
|
||||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
|
||||||
return PCRE_ERROR_NOSUBSTRING;
|
|
||||||
stringnumber *= 2;
|
|
||||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
|
||||||
substring = (char *)(pcre_malloc)(yield + 1);
|
|
||||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
|
||||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
|
||||||
substring[yield] = 0;
|
|
||||||
*stringptr = substring;
|
|
||||||
return yield;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Copy named captured string to new store *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function copies a single captured substring, identified by name, into
|
|
||||||
new store.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
code the compiled regex
|
|
||||||
subject the subject string that was matched
|
|
||||||
ovector pointer to the offsets table
|
|
||||||
stringcount the number of substrings that were captured
|
|
||||||
(i.e. the yield of the pcre_exec call, unless
|
|
||||||
that was zero, in which case it should be 1/3
|
|
||||||
of the offset table size)
|
|
||||||
stringname the name of the required substring
|
|
||||||
stringptr where to put the pointer
|
|
||||||
|
|
||||||
Returns: if successful:
|
|
||||||
the length of the copied string, not including the zero
|
|
||||||
that is put on the end; can be zero
|
|
||||||
if not successful:
|
|
||||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
|
||||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
|
||||||
*/
|
|
||||||
|
|
||||||
int
|
|
||||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
|
||||||
int stringcount, const char *stringname, const char **stringptr)
|
|
||||||
{
|
|
||||||
int n = pcre_get_stringnumber(code, stringname);
|
|
||||||
if (n <= 0) return n;
|
|
||||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Free store obtained by get_substring *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function exists for the benefit of people calling PCRE from non-C
|
|
||||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
|
||||||
|
|
||||||
Argument: the result of a previous pcre_get_substring()
|
|
||||||
Returns: nothing
|
|
||||||
*/
|
|
||||||
|
|
||||||
void
|
|
||||||
pcre_free_substring(const char *pointer)
|
|
||||||
{
|
|
||||||
(pcre_free)((void *)pointer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of get.c */
|
|
|
@ -1,682 +0,0 @@
|
||||||
#ifndef PCRE_INTERNAL
|
|
||||||
#define PCRE_INTERNAL
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
|
|
||||||
/* This is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This header contains definitions that are shared between the different
|
|
||||||
modules, but which are not relevant to the outside. */
|
|
||||||
|
|
||||||
/* Get the definitions provided by running "configure" */
|
|
||||||
|
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
/* Standard C headers plus the external interface definition. The only time
|
|
||||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
|
||||||
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <setjmp.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#ifndef PCRE_SPY
|
|
||||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "pcre.h"
|
|
||||||
|
|
||||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
|
||||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
|
||||||
option on the command line. */
|
|
||||||
|
|
||||||
#ifdef VPCOMPAT
|
|
||||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
|
||||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
|
||||||
#define memmove(d,s,n) _memmove(d,s,n)
|
|
||||||
#define memset(s,c,n) _memset(s,c,n)
|
|
||||||
#else /* VPCOMPAT */
|
|
||||||
|
|
||||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
|
||||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
|
||||||
is set. Otherwise, include an emulating function for those systems that have
|
|
||||||
neither (there some non-Unix environments where this is the case). This assumes
|
|
||||||
that all calls to memmove are moving strings upwards in store, which is the
|
|
||||||
case in PCRE. */
|
|
||||||
|
|
||||||
#if ! HAVE_MEMMOVE
|
|
||||||
#undef memmove /* some systems may have a macro */
|
|
||||||
#if HAVE_BCOPY
|
|
||||||
#define memmove(a, b, c) bcopy(b, a, c)
|
|
||||||
#else /* HAVE_BCOPY */
|
|
||||||
void *
|
|
||||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
dest += n;
|
|
||||||
src += n;
|
|
||||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
|
||||||
}
|
|
||||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
|
||||||
#endif /* not HAVE_BCOPY */
|
|
||||||
#endif /* not HAVE_MEMMOVE */
|
|
||||||
#endif /* not VPCOMPAT */
|
|
||||||
|
|
||||||
|
|
||||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities by default.
|
|
||||||
These are used, for example, to link from the start of a subpattern to its
|
|
||||||
alternatives and its end. The use of 2 bytes per offset limits the size of the
|
|
||||||
compiled regex to around 64K, which is big enough for almost everybody.
|
|
||||||
However, I received a request for an even bigger limit. For this reason, and
|
|
||||||
also to make the code easier to maintain, the storing and loading of offsets
|
|
||||||
from the byte string is now handled by the macros that are defined here.
|
|
||||||
|
|
||||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
|
||||||
the config.h file, but can be overridden by using -D on the command line. This
|
|
||||||
is automated on Unix systems via the "configure" command. */
|
|
||||||
|
|
||||||
#if LINK_SIZE == 2
|
|
||||||
|
|
||||||
#define PUT(a,n,d) \
|
|
||||||
(a[n] = (d) >> 8), \
|
|
||||||
(a[(n)+1] = (d) & 255)
|
|
||||||
|
|
||||||
#define GET(a,n) \
|
|
||||||
(((a)[n] << 8) | (a)[(n)+1])
|
|
||||||
|
|
||||||
#define MAX_PATTERN_SIZE (1 << 16)
|
|
||||||
|
|
||||||
|
|
||||||
#elif LINK_SIZE == 3
|
|
||||||
|
|
||||||
#define PUT(a,n,d) \
|
|
||||||
(a[n] = (d) >> 16), \
|
|
||||||
(a[(n)+1] = (d) >> 8), \
|
|
||||||
(a[(n)+2] = (d) & 255)
|
|
||||||
|
|
||||||
#define GET(a,n) \
|
|
||||||
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
|
||||||
|
|
||||||
#define MAX_PATTERN_SIZE (1 << 24)
|
|
||||||
|
|
||||||
|
|
||||||
#elif LINK_SIZE == 4
|
|
||||||
|
|
||||||
#define PUT(a,n,d) \
|
|
||||||
(a[n] = (d) >> 24), \
|
|
||||||
(a[(n)+1] = (d) >> 16), \
|
|
||||||
(a[(n)+2] = (d) >> 8), \
|
|
||||||
(a[(n)+3] = (d) & 255)
|
|
||||||
|
|
||||||
#define GET(a,n) \
|
|
||||||
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
|
||||||
|
|
||||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error LINK_SIZE must be either 2, 3, or 4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Convenience macro defined in terms of the others */
|
|
||||||
|
|
||||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
|
||||||
|
|
||||||
|
|
||||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
|
||||||
offsets changes. There are used for repeat counts and for other things such as
|
|
||||||
capturing parenthesis numbers in back references. */
|
|
||||||
|
|
||||||
#define PUT2(a,n,d) \
|
|
||||||
a[n] = (d) >> 8; \
|
|
||||||
a[(n)+1] = (d) & 255
|
|
||||||
|
|
||||||
#define GET2(a,n) \
|
|
||||||
(((a)[n] << 8) | (a)[(n)+1])
|
|
||||||
|
|
||||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
|
||||||
|
|
||||||
|
|
||||||
/* In case there is no definition of offsetof() provided - though any proper
|
|
||||||
Standard C system should have one. */
|
|
||||||
|
|
||||||
#ifndef offsetof
|
|
||||||
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* These are the public options that can change during matching. */
|
|
||||||
|
|
||||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
|
||||||
|
|
||||||
/* Private options flags start at the most significant end of the four bytes,
|
|
||||||
but skip the top bit so we can use ints for convenience without getting tangled
|
|
||||||
with negative values. The public options defined in pcre.h start at the least
|
|
||||||
significant end. Make sure they don't overlap, though now that we have expanded
|
|
||||||
to four bytes there is plenty of space. */
|
|
||||||
|
|
||||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
|
||||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
|
||||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
|
||||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
|
||||||
|
|
||||||
/* Options for the "extra" block produced by pcre_study(). */
|
|
||||||
|
|
||||||
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
|
||||||
|
|
||||||
/* Masks for identifying the public options which are permitted at compile
|
|
||||||
time, run time or study time, respectively. */
|
|
||||||
|
|
||||||
#define PUBLIC_OPTIONS \
|
|
||||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
|
||||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
|
||||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
|
|
||||||
|
|
||||||
#define PUBLIC_EXEC_OPTIONS \
|
|
||||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
|
|
||||||
|
|
||||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
|
||||||
|
|
||||||
/* Magic number to provide a small check against being handed junk. */
|
|
||||||
|
|
||||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
|
||||||
|
|
||||||
/* Negative values for the firstchar and reqchar variables */
|
|
||||||
|
|
||||||
#define REQ_UNSET (-2)
|
|
||||||
#define REQ_NONE (-1)
|
|
||||||
|
|
||||||
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
|
||||||
variable-length repeat, or a anything other than literal characters. */
|
|
||||||
|
|
||||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
|
||||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
|
||||||
|
|
||||||
/* Miscellaneous definitions */
|
|
||||||
|
|
||||||
typedef int BOOL;
|
|
||||||
|
|
||||||
#define FALSE 0
|
|
||||||
#define TRUE 1
|
|
||||||
|
|
||||||
/* Escape items that are just an encoding of a particular data value. Note that
|
|
||||||
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
|
||||||
(the default) or \r (which some people want). */
|
|
||||||
|
|
||||||
#ifndef ESC_e
|
|
||||||
#define ESC_e 27
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ESC_f
|
|
||||||
#define ESC_f '\f'
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ESC_n
|
|
||||||
#define ESC_n NEWLINE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef ESC_r
|
|
||||||
#define ESC_r '\r'
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* We can't officially use ESC_t because it is a POSIX reserved identifier
|
|
||||||
(presumably because of all the others like size_t). */
|
|
||||||
|
|
||||||
#ifndef ESC_tee
|
|
||||||
#define ESC_tee '\t'
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* These are escaped items that aren't just an encoding of a particular data
|
|
||||||
value such as \n. They must have non-zero values, as check_escape() returns
|
|
||||||
their negation. Also, they must appear in the same order as in the opcode
|
|
||||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
|
||||||
corresponds to "." rather than an escape sequence. The final one must be
|
|
||||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
|
||||||
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
|
||||||
detect the types that may be repeated. These are the types that consume a
|
|
||||||
character. If any new escapes are put in between that don't consume a
|
|
||||||
character, that code will have to change. */
|
|
||||||
|
|
||||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
|
||||||
ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
|
|
||||||
|
|
||||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
|
||||||
contain UTF-8 characters with values greater than 255. */
|
|
||||||
|
|
||||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
|
||||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
|
||||||
|
|
||||||
#define XCL_END 0 /* Marks end of individual items */
|
|
||||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
|
||||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
|
||||||
|
|
||||||
|
|
||||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
|
||||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
|
||||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
|
||||||
Note that whenever this list is updated, the two macro definitions that follow
|
|
||||||
must also be updated to match. */
|
|
||||||
|
|
||||||
enum {
|
|
||||||
OP_END, /* 0 End of pattern */
|
|
||||||
|
|
||||||
/* Values corresponding to backslashed metacharacters */
|
|
||||||
|
|
||||||
OP_SOD, /* 1 Start of data: \A */
|
|
||||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
|
||||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
|
||||||
OP_WORD_BOUNDARY, /* 4 \b */
|
|
||||||
OP_NOT_DIGIT, /* 5 \D */
|
|
||||||
OP_DIGIT, /* 6 \d */
|
|
||||||
OP_NOT_WHITESPACE, /* 7 \S */
|
|
||||||
OP_WHITESPACE, /* 8 \s */
|
|
||||||
OP_NOT_WORDCHAR, /* 9 \W */
|
|
||||||
OP_WORDCHAR, /* 10 \w */
|
|
||||||
OP_ANY, /* 11 Match any character */
|
|
||||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
|
||||||
OP_EODN, /* 13 End of data or \n at end of data: \Z. */
|
|
||||||
OP_EOD, /* 14 End of data: \z */
|
|
||||||
|
|
||||||
OP_OPT, /* 15 Set runtime options */
|
|
||||||
OP_CIRC, /* 16 Start of line - varies with multiline switch */
|
|
||||||
OP_DOLL, /* 17 End of line - varies with multiline switch */
|
|
||||||
OP_CHARS, /* 18 Match string of characters */
|
|
||||||
OP_NOT, /* 19 Match anything but the following char */
|
|
||||||
|
|
||||||
OP_STAR, /* 20 The maximizing and minimizing versions of */
|
|
||||||
OP_MINSTAR, /* 21 all these opcodes must come in pairs, with */
|
|
||||||
OP_PLUS, /* 22 the minimizing one second. */
|
|
||||||
OP_MINPLUS, /* 23 This first set applies to single characters */
|
|
||||||
OP_QUERY, /* 24 */
|
|
||||||
OP_MINQUERY, /* 25 */
|
|
||||||
OP_UPTO, /* 26 From 0 to n matches */
|
|
||||||
OP_MINUPTO, /* 27 */
|
|
||||||
OP_EXACT, /* 28 Exactly n matches */
|
|
||||||
|
|
||||||
OP_NOTSTAR, /* 29 The maximizing and minimizing versions of */
|
|
||||||
OP_NOTMINSTAR, /* 30 all these opcodes must come in pairs, with */
|
|
||||||
OP_NOTPLUS, /* 31 the minimizing one second. */
|
|
||||||
OP_NOTMINPLUS, /* 32 This set applies to "not" single characters */
|
|
||||||
OP_NOTQUERY, /* 33 */
|
|
||||||
OP_NOTMINQUERY, /* 34 */
|
|
||||||
OP_NOTUPTO, /* 35 From 0 to n matches */
|
|
||||||
OP_NOTMINUPTO, /* 36 */
|
|
||||||
OP_NOTEXACT, /* 37 Exactly n matches */
|
|
||||||
|
|
||||||
OP_TYPESTAR, /* 38 The maximizing and minimizing versions of */
|
|
||||||
OP_TYPEMINSTAR, /* 39 all these opcodes must come in pairs, with */
|
|
||||||
OP_TYPEPLUS, /* 40 the minimizing one second. These codes must */
|
|
||||||
OP_TYPEMINPLUS, /* 41 be in exactly the same order as those above. */
|
|
||||||
OP_TYPEQUERY, /* 42 This set applies to character types such as \d */
|
|
||||||
OP_TYPEMINQUERY, /* 43 */
|
|
||||||
OP_TYPEUPTO, /* 44 From 0 to n matches */
|
|
||||||
OP_TYPEMINUPTO, /* 45 */
|
|
||||||
OP_TYPEEXACT, /* 46 Exactly n matches */
|
|
||||||
|
|
||||||
OP_CRSTAR, /* 47 The maximizing and minimizing versions of */
|
|
||||||
OP_CRMINSTAR, /* 48 all these opcodes must come in pairs, with */
|
|
||||||
OP_CRPLUS, /* 49 the minimizing one second. These codes must */
|
|
||||||
OP_CRMINPLUS, /* 50 be in exactly the same order as those above. */
|
|
||||||
OP_CRQUERY, /* 51 These are for character classes and back refs */
|
|
||||||
OP_CRMINQUERY, /* 52 */
|
|
||||||
OP_CRRANGE, /* 53 These are different to the three seta above. */
|
|
||||||
OP_CRMINRANGE, /* 54 */
|
|
||||||
|
|
||||||
OP_CLASS, /* 55 Match a character class, chars < 256 only */
|
|
||||||
OP_NCLASS, /* 56 Same, but the bitmap was created from a negative
|
|
||||||
class - the difference is relevant only when a UTF-8
|
|
||||||
character > 255 is encountered. */
|
|
||||||
|
|
||||||
OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the
|
|
||||||
class. This does both positive and negative. */
|
|
||||||
|
|
||||||
OP_REF, /* 58 Match a back reference */
|
|
||||||
OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */
|
|
||||||
OP_CALLOUT, /* 60 Call out to external function if provided */
|
|
||||||
|
|
||||||
OP_ALT, /* 61 Start of alternation */
|
|
||||||
OP_KET, /* 62 End of group that doesn't have an unbounded repeat */
|
|
||||||
OP_KETRMAX, /* 63 These two must remain together and in this */
|
|
||||||
OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */
|
|
||||||
|
|
||||||
/* The assertions must come before ONCE and COND */
|
|
||||||
|
|
||||||
OP_ASSERT, /* 65 Positive lookahead */
|
|
||||||
OP_ASSERT_NOT, /* 66 Negative lookahead */
|
|
||||||
OP_ASSERTBACK, /* 67 Positive lookbehind */
|
|
||||||
OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
|
|
||||||
OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */
|
|
||||||
|
|
||||||
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
|
||||||
a test for >= ONCE for a subpattern that isn't an assertion. */
|
|
||||||
|
|
||||||
OP_ONCE, /* 70 Once matched, don't back up into the subpattern */
|
|
||||||
OP_COND, /* 71 Conditional group */
|
|
||||||
OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */
|
|
||||||
|
|
||||||
OP_BRAZERO, /* 73 These two must remain together and in this */
|
|
||||||
OP_BRAMINZERO, /* 74 order. */
|
|
||||||
|
|
||||||
OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater
|
|
||||||
than can fit into an opcode. */
|
|
||||||
|
|
||||||
OP_BRA /* 76 This and greater values are used for brackets that
|
|
||||||
extract substrings up to a basic limit. After that,
|
|
||||||
use is made of OP_BRANUMBER. */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* WARNING: There is an implicit assumption in study.c that all opcodes are
|
|
||||||
less than 128 in value. This makes handling UTF-8 character sequences easier.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines textual names for all the opcodes. There are used only
|
|
||||||
for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The
|
|
||||||
macro is referenced only in printint.c. */
|
|
||||||
|
|
||||||
#define OP_NAME_LIST \
|
|
||||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
|
||||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \
|
|
||||||
"Opt", "^", "$", "chars", "not", \
|
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
||||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
|
||||||
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
|
||||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
|
||||||
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
|
|
||||||
"Brazero", "Braminzero", "Branumber", "Bra"
|
|
||||||
|
|
||||||
|
|
||||||
/* This macro defines the length of fixed length operations in the compiled
|
|
||||||
regex. The lengths are used when searching for specific things, and also in the
|
|
||||||
debugging printing of a compiled regex. We use a macro so that it can be
|
|
||||||
incorporated both into pcre.c and pcretest.c without being publicly exposed.
|
|
||||||
|
|
||||||
As things have been extended, some of these are no longer fixed lenths, but are
|
|
||||||
minima instead. For example, the length of a single-character repeat may vary
|
|
||||||
in UTF-8 mode. The code that uses this table must know about such things. */
|
|
||||||
|
|
||||||
#define OP_LENGTHS \
|
|
||||||
1, /* End */ \
|
|
||||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
|
||||||
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \
|
|
||||||
2, /* Chars - the minimum length */ \
|
|
||||||
2, /* not */ \
|
|
||||||
/* Positive single-char repeats ** These are */ \
|
|
||||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
|
||||||
4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \
|
|
||||||
/* Negative single-char repeats - only for chars < 256 */ \
|
|
||||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
|
||||||
4, 4, 4, /* NOT upto, minupto, exact */ \
|
|
||||||
/* Positive type repeats */ \
|
|
||||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
|
||||||
4, 4, 4, /* Type upto, minupto, exact */ \
|
|
||||||
/* Character class & ref repeats */ \
|
|
||||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
|
||||||
5, 5, /* CRRANGE, CRMINRANGE */ \
|
|
||||||
33, /* CLASS */ \
|
|
||||||
33, /* NCLASS */ \
|
|
||||||
0, /* XCLASS - variable length */ \
|
|
||||||
3, /* REF */ \
|
|
||||||
1+LINK_SIZE, /* RECURSE */ \
|
|
||||||
2, /* CALLOUT */ \
|
|
||||||
1+LINK_SIZE, /* Alt */ \
|
|
||||||
1+LINK_SIZE, /* Ket */ \
|
|
||||||
1+LINK_SIZE, /* KetRmax */ \
|
|
||||||
1+LINK_SIZE, /* KetRmin */ \
|
|
||||||
1+LINK_SIZE, /* Assert */ \
|
|
||||||
1+LINK_SIZE, /* Assert not */ \
|
|
||||||
1+LINK_SIZE, /* Assert behind */ \
|
|
||||||
1+LINK_SIZE, /* Assert behind not */ \
|
|
||||||
1+LINK_SIZE, /* Reverse */ \
|
|
||||||
1+LINK_SIZE, /* Once */ \
|
|
||||||
1+LINK_SIZE, /* COND */ \
|
|
||||||
3, /* CREF */ \
|
|
||||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
|
||||||
3, /* BRANUMBER */ \
|
|
||||||
1+LINK_SIZE /* BRA */ \
|
|
||||||
|
|
||||||
|
|
||||||
/* The highest extraction number before we have to start using additional
|
|
||||||
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
|
||||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
|
||||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
|
||||||
opcodes. */
|
|
||||||
|
|
||||||
#define EXTRACT_BASIC_MAX 150
|
|
||||||
|
|
||||||
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
|
||||||
|
|
||||||
#define CREF_RECURSE 0xffff
|
|
||||||
|
|
||||||
/* The texts of compile-time error messages are defined as macros here so that
|
|
||||||
they can be accessed by the POSIX wrapper and converted into error codes. Yes,
|
|
||||||
I could have used error codes in the first place, but didn't feel like changing
|
|
||||||
just to accommodate the POSIX wrapper. */
|
|
||||||
|
|
||||||
#define ERR1 "\\ at end of pattern"
|
|
||||||
#define ERR2 "\\c at end of pattern"
|
|
||||||
#define ERR3 "unrecognized character follows \\"
|
|
||||||
#define ERR4 "numbers out of order in {} quantifier"
|
|
||||||
#define ERR5 "number too big in {} quantifier"
|
|
||||||
#define ERR6 "missing terminating ] for character class"
|
|
||||||
#define ERR7 "invalid escape sequence in character class"
|
|
||||||
#define ERR8 "range out of order in character class"
|
|
||||||
#define ERR9 "nothing to repeat"
|
|
||||||
#define ERR10 "operand of unlimited repeat could match the empty string"
|
|
||||||
#define ERR11 "internal error: unexpected repeat"
|
|
||||||
#define ERR12 "unrecognized character after (?"
|
|
||||||
#define ERR13 "POSIX named classes are supported only within a class"
|
|
||||||
#define ERR14 "missing )"
|
|
||||||
#define ERR15 "reference to non-existent subpattern"
|
|
||||||
#define ERR16 "erroffset passed as NULL"
|
|
||||||
#define ERR17 "unknown option bit(s) set"
|
|
||||||
#define ERR18 "missing ) after comment"
|
|
||||||
#define ERR19 "parentheses nested too deeply"
|
|
||||||
#define ERR20 "regular expression too large"
|
|
||||||
#define ERR21 "failed to get memory"
|
|
||||||
#define ERR22 "unmatched parentheses"
|
|
||||||
#define ERR23 "internal error: code overflow"
|
|
||||||
#define ERR24 "unrecognized character after (?<"
|
|
||||||
#define ERR25 "lookbehind assertion is not fixed length"
|
|
||||||
#define ERR26 "malformed number after (?("
|
|
||||||
#define ERR27 "conditional group contains more than two branches"
|
|
||||||
#define ERR28 "assertion expected after (?("
|
|
||||||
#define ERR29 "(?R or (?digits must be followed by )"
|
|
||||||
#define ERR30 "unknown POSIX class name"
|
|
||||||
#define ERR31 "POSIX collating elements are not supported"
|
|
||||||
#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
|
|
||||||
#define ERR33 "spare error"
|
|
||||||
#define ERR34 "character value in \\x{...} sequence is too large"
|
|
||||||
#define ERR35 "invalid condition (?(0)"
|
|
||||||
#define ERR36 "\\C not allowed in lookbehind assertion"
|
|
||||||
#define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
|
|
||||||
#define ERR38 "number after (?C is > 255"
|
|
||||||
#define ERR39 "closing ) for (?C expected"
|
|
||||||
#define ERR40 "recursive call could loop indefinitely"
|
|
||||||
#define ERR41 "unrecognized character after (?P"
|
|
||||||
#define ERR42 "syntax error after (?P"
|
|
||||||
#define ERR43 "two named groups have the same name"
|
|
||||||
#define ERR44 "invalid UTF-8 string"
|
|
||||||
|
|
||||||
/* All character handling must be done as unsigned characters. Otherwise there
|
|
||||||
are problems with top-bit-set characters and functions such as isspace().
|
|
||||||
However, we leave the interface to the outside world as char *, because that
|
|
||||||
should make things easier for callers. We define a short type for unsigned char
|
|
||||||
to save lots of typing. I tried "uchar", but it causes problems on Digital
|
|
||||||
Unix, where it is defined in sys/types, so use "uschar" instead. */
|
|
||||||
|
|
||||||
typedef unsigned char uschar;
|
|
||||||
|
|
||||||
/* The real format of the start of the pcre block; the index of names and the
|
|
||||||
code vector run on as long as necessary after the end. */
|
|
||||||
|
|
||||||
typedef struct real_pcre {
|
|
||||||
unsigned long int magic_number;
|
|
||||||
size_t size; /* Total that was malloced */
|
|
||||||
const unsigned char *tables; /* Pointer to tables */
|
|
||||||
unsigned long int options;
|
|
||||||
unsigned short int top_bracket;
|
|
||||||
unsigned short int top_backref;
|
|
||||||
unsigned short int first_byte;
|
|
||||||
unsigned short int req_byte;
|
|
||||||
unsigned short int name_entry_size; /* Size of any name items; 0 => none */
|
|
||||||
unsigned short int name_count; /* Number of name items */
|
|
||||||
} real_pcre;
|
|
||||||
|
|
||||||
/* The format of the block used to store data from pcre_study(). */
|
|
||||||
|
|
||||||
typedef struct pcre_study_data {
|
|
||||||
size_t size; /* Total that was malloced */
|
|
||||||
uschar options;
|
|
||||||
uschar start_bits[32];
|
|
||||||
} pcre_study_data;
|
|
||||||
|
|
||||||
/* Structure for passing "static" information around between the functions
|
|
||||||
doing the compiling, so that they are thread-safe. */
|
|
||||||
|
|
||||||
typedef struct compile_data {
|
|
||||||
const uschar *lcc; /* Points to lower casing table */
|
|
||||||
const uschar *fcc; /* Points to case-flipping table */
|
|
||||||
const uschar *cbits; /* Points to character type table */
|
|
||||||
const uschar *ctypes; /* Points to table of type maps */
|
|
||||||
const uschar *start_code; /* The start of the compiled code */
|
|
||||||
uschar *name_table; /* The name/number table */
|
|
||||||
int names_found; /* Number of entries so far */
|
|
||||||
int name_entry_size; /* Size of each entry */
|
|
||||||
int top_backref; /* Maximum back reference */
|
|
||||||
unsigned int backref_map; /* Bitmap of low back refs */
|
|
||||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
|
||||||
} compile_data;
|
|
||||||
|
|
||||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
|
||||||
branches, for testing for left recursion. */
|
|
||||||
|
|
||||||
typedef struct branch_chain {
|
|
||||||
struct branch_chain *outer;
|
|
||||||
uschar *current;
|
|
||||||
} branch_chain;
|
|
||||||
|
|
||||||
/* Structure for items in a linked list that represents an explicit recursive
|
|
||||||
call within the pattern. */
|
|
||||||
|
|
||||||
typedef struct recursion_info {
|
|
||||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
|
||||||
int group_num; /* Number of group that was called */
|
|
||||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
|
||||||
const uschar *save_start; /* Old value of md->start_match */
|
|
||||||
int *offset_save; /* Pointer to start of saved offsets */
|
|
||||||
int saved_max; /* Number of saved offsets */
|
|
||||||
} recursion_info;
|
|
||||||
|
|
||||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
|
||||||
a structure is used to remember local variables on the heap. It is defined in
|
|
||||||
pcre.c, close to the match() function, so that it is easy to keep it in step
|
|
||||||
with any changes of local variable. However, the pointer to the current frame
|
|
||||||
must be saved in some "static" place over a longjmp(). We declare the
|
|
||||||
structure here so that we can put a pointer in the match_data structure.
|
|
||||||
NOTE: This isn't used for a "normal" compilation of pcre. */
|
|
||||||
|
|
||||||
struct heapframe;
|
|
||||||
|
|
||||||
/* Structure for passing "static" information around between the functions
|
|
||||||
doing the matching, so that they are thread-safe. */
|
|
||||||
|
|
||||||
typedef struct match_data {
|
|
||||||
unsigned long int match_call_count; /* As it says */
|
|
||||||
unsigned long int match_limit;/* As it says */
|
|
||||||
int *offset_vector; /* Offset vector */
|
|
||||||
int offset_end; /* One past the end */
|
|
||||||
int offset_max; /* The maximum usable for return data */
|
|
||||||
const uschar *lcc; /* Points to lower casing table */
|
|
||||||
const uschar *ctypes; /* Points to table of type maps */
|
|
||||||
BOOL offset_overflow; /* Set if too many extractions */
|
|
||||||
BOOL notbol; /* NOTBOL flag */
|
|
||||||
BOOL noteol; /* NOTEOL flag */
|
|
||||||
BOOL utf8; /* UTF8 flag */
|
|
||||||
BOOL endonly; /* Dollar not before final \n */
|
|
||||||
BOOL notempty; /* Empty string match not wanted */
|
|
||||||
const uschar *start_code; /* For use when recursing */
|
|
||||||
const uschar *start_subject; /* Start of the subject string */
|
|
||||||
const uschar *end_subject; /* End of the subject string */
|
|
||||||
const uschar *start_match; /* Start of this match attempt */
|
|
||||||
const uschar *end_match_ptr; /* Subject position at end match */
|
|
||||||
int end_offset_top; /* Highwater mark at end of match */
|
|
||||||
int capture_last; /* Most recent capture number */
|
|
||||||
int start_offset; /* The start offset value */
|
|
||||||
recursion_info *recursive; /* Linked list of recursion data */
|
|
||||||
void *callout_data; /* To pass back to callouts */
|
|
||||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
|
||||||
} match_data;
|
|
||||||
|
|
||||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
|
||||||
|
|
||||||
#define ctype_space 0x01
|
|
||||||
#define ctype_letter 0x02
|
|
||||||
#define ctype_digit 0x04
|
|
||||||
#define ctype_xdigit 0x08
|
|
||||||
#define ctype_word 0x10 /* alphameric or '_' */
|
|
||||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
|
||||||
|
|
||||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
|
||||||
of bits for a class map. Some classes are built by combining these tables. */
|
|
||||||
|
|
||||||
#define cbit_space 0 /* [:space:] or \s */
|
|
||||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
|
||||||
#define cbit_digit 64 /* [:digit:] or \d */
|
|
||||||
#define cbit_upper 96 /* [:upper:] */
|
|
||||||
#define cbit_lower 128 /* [:lower:] */
|
|
||||||
#define cbit_word 160 /* [:word:] or \w */
|
|
||||||
#define cbit_graph 192 /* [:graph:] */
|
|
||||||
#define cbit_print 224 /* [:print:] */
|
|
||||||
#define cbit_punct 256 /* [:punct:] */
|
|
||||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
|
||||||
#define cbit_length 320 /* Length of the cbits table */
|
|
||||||
|
|
||||||
/* Offsets of the various tables from the base tables pointer, and
|
|
||||||
total length. */
|
|
||||||
|
|
||||||
#define lcc_offset 0
|
|
||||||
#define fcc_offset 256
|
|
||||||
#define cbits_offset 512
|
|
||||||
#define ctypes_offset (cbits_offset + cbit_length)
|
|
||||||
#define tables_length (ctypes_offset + 256)
|
|
||||||
|
|
||||||
/* End of internal.h */
|
|
||||||
|
|
||||||
#endif /* PCRE_INTERNAL */
|
|
|
@ -1,140 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
See the file Tech.Notes for some information on the internals.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This file is compiled on its own as part of the PCRE library. However,
|
|
||||||
it is also included in the compilation of dftables.c, in which case the macro
|
|
||||||
DFTABLES is defined. */
|
|
||||||
|
|
||||||
#ifndef DFTABLES
|
|
||||||
#include "internal.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Create PCRE character tables *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function builds a set of character tables for use by PCRE and returns
|
|
||||||
a pointer to them. They are build using the ctype functions, and consequently
|
|
||||||
their contents will depend upon the current locale setting. When compiled as
|
|
||||||
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
|
||||||
inside dftables, use malloc().
|
|
||||||
|
|
||||||
Arguments: none
|
|
||||||
Returns: pointer to the contiguous block of data
|
|
||||||
*/
|
|
||||||
|
|
||||||
const unsigned char *
|
|
||||||
pcre_maketables(void)
|
|
||||||
{
|
|
||||||
unsigned char *yield, *p;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
#ifndef DFTABLES
|
|
||||||
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
|
||||||
#else
|
|
||||||
yield = (unsigned char*)malloc(tables_length);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (yield == NULL) return NULL;
|
|
||||||
p = yield;
|
|
||||||
|
|
||||||
/* First comes the lower casing table */
|
|
||||||
|
|
||||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
|
||||||
|
|
||||||
/* Next the case-flipping table */
|
|
||||||
|
|
||||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
|
||||||
|
|
||||||
/* Then the character class tables. Don't try to be clever and save effort
|
|
||||||
on exclusive ones - in some locales things may be different. Note that the
|
|
||||||
table for "space" includes everything "isspace" gives, including VT in the
|
|
||||||
default locale. This makes it work for the POSIX class [:space:]. */
|
|
||||||
|
|
||||||
memset(p, 0, cbit_length);
|
|
||||||
for (i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
if (isdigit(i))
|
|
||||||
{
|
|
||||||
p[cbit_digit + i/8] |= 1 << (i&7);
|
|
||||||
p[cbit_word + i/8] |= 1 << (i&7);
|
|
||||||
}
|
|
||||||
if (isupper(i))
|
|
||||||
{
|
|
||||||
p[cbit_upper + i/8] |= 1 << (i&7);
|
|
||||||
p[cbit_word + i/8] |= 1 << (i&7);
|
|
||||||
}
|
|
||||||
if (islower(i))
|
|
||||||
{
|
|
||||||
p[cbit_lower + i/8] |= 1 << (i&7);
|
|
||||||
p[cbit_word + i/8] |= 1 << (i&7);
|
|
||||||
}
|
|
||||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
|
||||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
|
||||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
|
||||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
|
||||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
|
||||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
|
||||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
|
||||||
}
|
|
||||||
p += cbit_length;
|
|
||||||
|
|
||||||
/* Finally, the character type table. In this, we exclude VT from the white
|
|
||||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
|
||||||
within regexes. */
|
|
||||||
|
|
||||||
for (i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
int x = 0;
|
|
||||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
|
||||||
if (isalpha(i)) x += ctype_letter;
|
|
||||||
if (isdigit(i)) x += ctype_digit;
|
|
||||||
if (isxdigit(i)) x += ctype_xdigit;
|
|
||||||
if (isalnum(i) || i == '_') x += ctype_word;
|
|
||||||
|
|
||||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
|
||||||
In this instance, that is ok because we want binary zero to be flagged as a
|
|
||||||
meta-character, which in this sense is any character that terminates a run
|
|
||||||
of data characters. */
|
|
||||||
|
|
||||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
|
||||||
|
|
||||||
return yield;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of maketables.c */
|
|
8306
src/pcre/pcre.c
8306
src/pcre/pcre.c
File diff suppressed because it is too large
Load diff
193
src/pcre/pcre.h
193
src/pcre/pcre.h
|
@ -1,193 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Copyright (c) 1997-2003 University of Cambridge */
|
|
||||||
|
|
||||||
#ifndef _PCRE_H
|
|
||||||
#define _PCRE_H
|
|
||||||
|
|
||||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
|
||||||
make changes to pcre.in. */
|
|
||||||
|
|
||||||
#define PCRE_MAJOR 4
|
|
||||||
#define PCRE_MINOR 5
|
|
||||||
#define PCRE_DATE 01-December-2003
|
|
||||||
|
|
||||||
/* Win32 uses DLL by default */
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
# ifdef PCRE_DEFINITION
|
|
||||||
# ifdef DLL_EXPORT
|
|
||||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
# ifndef PCRE_STATIC
|
|
||||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#ifndef PCRE_DATA_SCOPE
|
|
||||||
# define PCRE_DATA_SCOPE extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
|
||||||
it is needed here for malloc. */
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* Allow for C++ users */
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Options */
|
|
||||||
|
|
||||||
#define PCRE_CASELESS 0x0001
|
|
||||||
#define PCRE_MULTILINE 0x0002
|
|
||||||
#define PCRE_DOTALL 0x0004
|
|
||||||
#define PCRE_EXTENDED 0x0008
|
|
||||||
#define PCRE_ANCHORED 0x0010
|
|
||||||
#define PCRE_DOLLAR_ENDONLY 0x0020
|
|
||||||
#define PCRE_EXTRA 0x0040
|
|
||||||
#define PCRE_NOTBOL 0x0080
|
|
||||||
#define PCRE_NOTEOL 0x0100
|
|
||||||
#define PCRE_UNGREEDY 0x0200
|
|
||||||
#define PCRE_NOTEMPTY 0x0400
|
|
||||||
#define PCRE_UTF8 0x0800
|
|
||||||
#define PCRE_NO_AUTO_CAPTURE 0x1000
|
|
||||||
#define PCRE_NO_UTF8_CHECK 0x2000
|
|
||||||
|
|
||||||
/* Exec-time and get/set-time error codes */
|
|
||||||
|
|
||||||
#define PCRE_ERROR_NOMATCH (-1)
|
|
||||||
#define PCRE_ERROR_NULL (-2)
|
|
||||||
#define PCRE_ERROR_BADOPTION (-3)
|
|
||||||
#define PCRE_ERROR_BADMAGIC (-4)
|
|
||||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
|
||||||
#define PCRE_ERROR_NOMEMORY (-6)
|
|
||||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
|
||||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
|
||||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
|
||||||
#define PCRE_ERROR_BADUTF8 (-10)
|
|
||||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
|
||||||
|
|
||||||
/* Request types for pcre_fullinfo() */
|
|
||||||
|
|
||||||
#define PCRE_INFO_OPTIONS 0
|
|
||||||
#define PCRE_INFO_SIZE 1
|
|
||||||
#define PCRE_INFO_CAPTURECOUNT 2
|
|
||||||
#define PCRE_INFO_BACKREFMAX 3
|
|
||||||
#define PCRE_INFO_FIRSTBYTE 4
|
|
||||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
|
||||||
#define PCRE_INFO_FIRSTTABLE 5
|
|
||||||
#define PCRE_INFO_LASTLITERAL 6
|
|
||||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
|
||||||
#define PCRE_INFO_NAMECOUNT 8
|
|
||||||
#define PCRE_INFO_NAMETABLE 9
|
|
||||||
#define PCRE_INFO_STUDYSIZE 10
|
|
||||||
|
|
||||||
/* Request types for pcre_config() */
|
|
||||||
|
|
||||||
#define PCRE_CONFIG_UTF8 0
|
|
||||||
#define PCRE_CONFIG_NEWLINE 1
|
|
||||||
#define PCRE_CONFIG_LINK_SIZE 2
|
|
||||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
|
||||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
|
||||||
#define PCRE_CONFIG_STACKRECURSE 5
|
|
||||||
|
|
||||||
/* Bit flags for the pcre_extra structure */
|
|
||||||
|
|
||||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
|
||||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
|
||||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
|
||||||
|
|
||||||
/* Types */
|
|
||||||
|
|
||||||
struct real_pcre; /* declaration; the definition is private */
|
|
||||||
typedef struct real_pcre pcre;
|
|
||||||
|
|
||||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
|
||||||
such as way as to be extensible. */
|
|
||||||
|
|
||||||
typedef struct pcre_extra {
|
|
||||||
unsigned long int flags; /* Bits for which fields are set */
|
|
||||||
void *study_data; /* Opaque data from pcre_study() */
|
|
||||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
|
||||||
void *callout_data; /* Data passed back in callouts */
|
|
||||||
} pcre_extra;
|
|
||||||
|
|
||||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
|
||||||
structure so that new fields can be added on the end in future versions,
|
|
||||||
without changing the API of the function, thereby allowing old clients to work
|
|
||||||
without modification. */
|
|
||||||
|
|
||||||
typedef struct pcre_callout_block {
|
|
||||||
int version; /* Identifies version of block */
|
|
||||||
/* ------------------------ Version 0 ------------------------------- */
|
|
||||||
int callout_number; /* Number compiled into pattern */
|
|
||||||
int *offset_vector; /* The offset vector */
|
|
||||||
const char *subject; /* The subject being matched */
|
|
||||||
int subject_length; /* The length of the subject */
|
|
||||||
int start_match; /* Offset to start of this match attempt */
|
|
||||||
int current_position; /* Where we currently are */
|
|
||||||
int capture_top; /* Max current capture */
|
|
||||||
int capture_last; /* Most recently closed capture */
|
|
||||||
void *callout_data; /* Data passed in with the call */
|
|
||||||
/* ------------------------------------------------------------------ */
|
|
||||||
} pcre_callout_block;
|
|
||||||
|
|
||||||
/* Indirection for store get and free functions. These can be set to
|
|
||||||
alternative malloc/free functions if required. Special ones are used in the
|
|
||||||
non-recursive case for "frames". There is also an optional callout function
|
|
||||||
that is triggered by the (?) regex item. Some magic is required for Win32 DLL;
|
|
||||||
it is null on other OS. For Virtual Pascal, these have to be different again.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef VPCOMPAT
|
|
||||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
|
||||||
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
|
||||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
|
||||||
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
|
||||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
|
||||||
#else /* VPCOMPAT */
|
|
||||||
extern void *pcre_malloc(size_t);
|
|
||||||
extern void pcre_free(void *);
|
|
||||||
extern void *pcre_stack_malloc(size_t);
|
|
||||||
extern void pcre_stack_free(void *);
|
|
||||||
extern int pcre_callout(pcre_callout_block *);
|
|
||||||
#endif /* VPCOMPAT */
|
|
||||||
|
|
||||||
/* Exported PCRE functions */
|
|
||||||
|
|
||||||
extern pcre *pcre_compile(const char *, int, const char **,
|
|
||||||
int *, const unsigned char *);
|
|
||||||
extern int pcre_config(int, void *);
|
|
||||||
extern int pcre_copy_named_substring(const pcre *, const char *,
|
|
||||||
int *, int, const char *, char *, int);
|
|
||||||
extern int pcre_copy_substring(const char *, int *, int, int,
|
|
||||||
char *, int);
|
|
||||||
extern int pcre_exec(const pcre *, const pcre_extra *,
|
|
||||||
const char *, int, int, int, int *, int);
|
|
||||||
extern void pcre_free_substring(const char *);
|
|
||||||
extern void pcre_free_substring_list(const char **);
|
|
||||||
extern int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
|
||||||
void *);
|
|
||||||
extern int pcre_get_named_substring(const pcre *, const char *,
|
|
||||||
int *, int, const char *, const char **);
|
|
||||||
extern int pcre_get_stringnumber(const pcre *, const char *);
|
|
||||||
extern int pcre_get_substring(const char *, int *, int, int,
|
|
||||||
const char **);
|
|
||||||
extern int pcre_get_substring_list(const char *, int *, int,
|
|
||||||
const char ***);
|
|
||||||
extern int pcre_info(const pcre *, int *, int *);
|
|
||||||
extern const unsigned char *pcre_maketables(void);
|
|
||||||
extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
|
||||||
extern const char *pcre_version(void);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* End of pcre.h */
|
|
|
@ -1,305 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
This is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
This module is a wrapper that provides a POSIX API to the underlying PCRE
|
|
||||||
functions.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
#include "pcreposix.h"
|
|
||||||
#include "stdlib.h"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Corresponding tables of PCRE error messages and POSIX error codes. */
|
|
||||||
|
|
||||||
static const char *const estring[] = {
|
|
||||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
|
||||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
|
||||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
|
|
||||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
|
||||||
ERR41, ERR42, ERR43, ERR44 };
|
|
||||||
|
|
||||||
static const int eint[] = {
|
|
||||||
REG_EESCAPE, /* "\\ at end of pattern" */
|
|
||||||
REG_EESCAPE, /* "\\c at end of pattern" */
|
|
||||||
REG_EESCAPE, /* "unrecognized character follows \\" */
|
|
||||||
REG_BADBR, /* "numbers out of order in {} quantifier" */
|
|
||||||
REG_BADBR, /* "number too big in {} quantifier" */
|
|
||||||
REG_EBRACK, /* "missing terminating ] for character class" */
|
|
||||||
REG_ECTYPE, /* "invalid escape sequence in character class" */
|
|
||||||
REG_ERANGE, /* "range out of order in character class" */
|
|
||||||
REG_BADRPT, /* "nothing to repeat" */
|
|
||||||
REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
|
|
||||||
REG_ASSERT, /* "internal error: unexpected repeat" */
|
|
||||||
REG_BADPAT, /* "unrecognized character after (?" */
|
|
||||||
REG_BADPAT, /* "POSIX named classes are supported only within a class" */
|
|
||||||
REG_EPAREN, /* "missing )" */
|
|
||||||
REG_ESUBREG, /* "reference to non-existent subpattern" */
|
|
||||||
REG_INVARG, /* "erroffset passed as NULL" */
|
|
||||||
REG_INVARG, /* "unknown option bit(s) set" */
|
|
||||||
REG_EPAREN, /* "missing ) after comment" */
|
|
||||||
REG_ESIZE, /* "parentheses nested too deeply" */
|
|
||||||
REG_ESIZE, /* "regular expression too large" */
|
|
||||||
REG_ESPACE, /* "failed to get memory" */
|
|
||||||
REG_EPAREN, /* "unmatched brackets" */
|
|
||||||
REG_ASSERT, /* "internal error: code overflow" */
|
|
||||||
REG_BADPAT, /* "unrecognized character after (?<" */
|
|
||||||
REG_BADPAT, /* "lookbehind assertion is not fixed length" */
|
|
||||||
REG_BADPAT, /* "malformed number after (?(" */
|
|
||||||
REG_BADPAT, /* "conditional group containe more than two branches" */
|
|
||||||
REG_BADPAT, /* "assertion expected after (?(" */
|
|
||||||
REG_BADPAT, /* "(?R or (?digits must be followed by )" */
|
|
||||||
REG_ECTYPE, /* "unknown POSIX class name" */
|
|
||||||
REG_BADPAT, /* "POSIX collating elements are not supported" */
|
|
||||||
REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
|
|
||||||
REG_BADPAT, /* "spare error" */
|
|
||||||
REG_BADPAT, /* "character value in \x{...} sequence is too large" */
|
|
||||||
REG_BADPAT, /* "invalid condition (?(0)" */
|
|
||||||
REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
|
|
||||||
REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
|
|
||||||
REG_BADPAT, /* "number after (?C is > 255" */
|
|
||||||
REG_BADPAT, /* "closing ) for (?C expected" */
|
|
||||||
REG_BADPAT, /* "recursive call could loop indefinitely" */
|
|
||||||
REG_BADPAT, /* "unrecognized character after (?P" */
|
|
||||||
REG_BADPAT, /* "syntax error after (?P" */
|
|
||||||
REG_BADPAT, /* "two named groups have the same name" */
|
|
||||||
REG_BADPAT /* "invalid UTF-8 string" */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Table of texts corresponding to POSIX error codes */
|
|
||||||
|
|
||||||
static const char *const pstring[] = {
|
|
||||||
"", /* Dummy for value 0 */
|
|
||||||
"internal error", /* REG_ASSERT */
|
|
||||||
"invalid repeat counts in {}", /* BADBR */
|
|
||||||
"pattern error", /* BADPAT */
|
|
||||||
"? * + invalid", /* BADRPT */
|
|
||||||
"unbalanced {}", /* EBRACE */
|
|
||||||
"unbalanced []", /* EBRACK */
|
|
||||||
"collation error - not relevant", /* ECOLLATE */
|
|
||||||
"bad class", /* ECTYPE */
|
|
||||||
"bad escape sequence", /* EESCAPE */
|
|
||||||
"empty expression", /* EMPTY */
|
|
||||||
"unbalanced ()", /* EPAREN */
|
|
||||||
"bad range inside []", /* ERANGE */
|
|
||||||
"expression too big", /* ESIZE */
|
|
||||||
"failed to get memory", /* ESPACE */
|
|
||||||
"bad back reference", /* ESUBREG */
|
|
||||||
"bad argument", /* INVARG */
|
|
||||||
"match failed" /* NOMATCH */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Translate PCRE text code to int *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* PCRE compile-time errors are given as strings defined as macros. We can just
|
|
||||||
look them up in a table to turn them into POSIX-style error codes. */
|
|
||||||
|
|
||||||
static int
|
|
||||||
pcre_posix_error_code(const char *s)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
|
|
||||||
if (strcmp(s, estring[i]) == 0) return eint[i];
|
|
||||||
return REG_ASSERT;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Translate error code to string *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
EXPORT size_t
|
|
||||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
|
||||||
{
|
|
||||||
const char *message, *addmessage;
|
|
||||||
size_t length, addlength;
|
|
||||||
|
|
||||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
|
||||||
"unknown error code" : pstring[errcode];
|
|
||||||
length = strlen(message) + 1;
|
|
||||||
|
|
||||||
addmessage = " at offset ";
|
|
||||||
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
|
||||||
strlen(addmessage) + 6 : 0;
|
|
||||||
|
|
||||||
if (errbuf_size > 0)
|
|
||||||
{
|
|
||||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
|
||||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
strncpy(errbuf, message, errbuf_size - 1);
|
|
||||||
errbuf[errbuf_size-1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return length + addlength;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Free store held by a regex *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
EXPORT void
|
|
||||||
regfree(regex_t *preg)
|
|
||||||
{
|
|
||||||
(pcre_free)(preg->re_pcre);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Compile a regular expression *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
Arguments:
|
|
||||||
preg points to a structure for recording the compiled expression
|
|
||||||
pattern the pattern to compile
|
|
||||||
cflags compilation flags
|
|
||||||
|
|
||||||
Returns: 0 on success
|
|
||||||
various non-zero codes on failure
|
|
||||||
*/
|
|
||||||
|
|
||||||
EXPORT int
|
|
||||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
|
||||||
{
|
|
||||||
const char *errorptr;
|
|
||||||
int erroffset;
|
|
||||||
int options = 0;
|
|
||||||
|
|
||||||
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
|
|
||||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
|
||||||
|
|
||||||
preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
|
|
||||||
preg->re_erroffset = erroffset;
|
|
||||||
|
|
||||||
if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
|
|
||||||
|
|
||||||
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Match a regular expression *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Unfortunately, PCRE requires 3 ints of working space for each captured
|
|
||||||
substring, so we have to get and release working store instead of just using
|
|
||||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
|
||||||
ints. However, if the number of possible capturing brackets is small, use a
|
|
||||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
|
||||||
in a macro that can be changed at configure time. */
|
|
||||||
|
|
||||||
EXPORT int
|
|
||||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
|
||||||
regmatch_t pmatch[], int eflags)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
int options = 0;
|
|
||||||
int *ovector = NULL;
|
|
||||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
|
||||||
BOOL allocated_ovector = FALSE;
|
|
||||||
|
|
||||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
|
||||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
|
||||||
|
|
||||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
|
||||||
|
|
||||||
if (nmatch > 0)
|
|
||||||
{
|
|
||||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
|
||||||
{
|
|
||||||
ovector = &(small_ovector[0]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
|
||||||
if (ovector == NULL) return REG_ESPACE;
|
|
||||||
allocated_ovector = TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
|
|
||||||
0, options, ovector, nmatch * 3);
|
|
||||||
|
|
||||||
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
|
||||||
|
|
||||||
if (rc >= 0)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
for (i = 0; i < (size_t)rc; i++)
|
|
||||||
{
|
|
||||||
pmatch[i].rm_so = ovector[i*2];
|
|
||||||
pmatch[i].rm_eo = ovector[i*2+1];
|
|
||||||
}
|
|
||||||
if (allocated_ovector) free(ovector);
|
|
||||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (allocated_ovector) free(ovector);
|
|
||||||
switch(rc)
|
|
||||||
{
|
|
||||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
|
||||||
case PCRE_ERROR_NULL: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADOPTION: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADMAGIC: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
|
|
||||||
case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
|
|
||||||
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
|
|
||||||
case PCRE_ERROR_BADUTF8: return REG_INVARG;
|
|
||||||
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
|
|
||||||
default: return REG_ASSERT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of pcreposix.c */
|
|
|
@ -1,88 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Copyright (c) 1997-2003 University of Cambridge */
|
|
||||||
|
|
||||||
#ifndef _PCREPOSIX_H
|
|
||||||
#define _PCREPOSIX_H
|
|
||||||
|
|
||||||
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
|
|
||||||
Compatible Regular Expression library. It defines the things POSIX says should
|
|
||||||
be there. I hope. */
|
|
||||||
|
|
||||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
/* Allow for C++ users */
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Options defined by POSIX. */
|
|
||||||
|
|
||||||
#define REG_ICASE 0x01
|
|
||||||
#define REG_NEWLINE 0x02
|
|
||||||
#define REG_NOTBOL 0x04
|
|
||||||
#define REG_NOTEOL 0x08
|
|
||||||
|
|
||||||
/* These are not used by PCRE, but by defining them we make it easier
|
|
||||||
to slot PCRE into existing programs that make POSIX calls. */
|
|
||||||
|
|
||||||
#define REG_EXTENDED 0
|
|
||||||
#define REG_NOSUB 0
|
|
||||||
|
|
||||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
|
||||||
|
|
||||||
enum {
|
|
||||||
REG_ASSERT = 1, /* internal error ? */
|
|
||||||
REG_BADBR, /* invalid repeat counts in {} */
|
|
||||||
REG_BADPAT, /* pattern error */
|
|
||||||
REG_BADRPT, /* ? * + invalid */
|
|
||||||
REG_EBRACE, /* unbalanced {} */
|
|
||||||
REG_EBRACK, /* unbalanced [] */
|
|
||||||
REG_ECOLLATE, /* collation error - not relevant */
|
|
||||||
REG_ECTYPE, /* bad class */
|
|
||||||
REG_EESCAPE, /* bad escape sequence */
|
|
||||||
REG_EMPTY, /* empty expression */
|
|
||||||
REG_EPAREN, /* unbalanced () */
|
|
||||||
REG_ERANGE, /* bad range inside [] */
|
|
||||||
REG_ESIZE, /* expression too big */
|
|
||||||
REG_ESPACE, /* failed to get memory */
|
|
||||||
REG_ESUBREG, /* bad back reference */
|
|
||||||
REG_INVARG, /* bad argument */
|
|
||||||
REG_NOMATCH /* match failed */
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* The structure representing a compiled regular expression. */
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
void *re_pcre;
|
|
||||||
size_t re_nsub;
|
|
||||||
size_t re_erroffset;
|
|
||||||
} regex_t;
|
|
||||||
|
|
||||||
/* The structure in which a captured offset is returned. */
|
|
||||||
|
|
||||||
typedef int regoff_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
regoff_t rm_so;
|
|
||||||
regoff_t rm_eo;
|
|
||||||
} regmatch_t;
|
|
||||||
|
|
||||||
/* The functions */
|
|
||||||
|
|
||||||
extern int regcomp(regex_t *, const char *, int);
|
|
||||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t *, int);
|
|
||||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
|
||||||
extern void regfree(regex_t *);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* extern "C" */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* End of pcreposix.h */
|
|
472
src/pcre/study.c
472
src/pcre/study.c
|
@ -1,472 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
This is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language. See
|
|
||||||
the file Tech.Notes for some information on the internals.
|
|
||||||
|
|
||||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
|
||||||
|
|
||||||
Copyright (c) 1997-2003 University of Cambridge
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Permission is granted to anyone to use this software for any purpose on any
|
|
||||||
computer system, and to redistribute it freely, subject to the following
|
|
||||||
restrictions:
|
|
||||||
|
|
||||||
1. This software is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
|
||||||
explicit claim or by omission.
|
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
|
||||||
misrepresented as being the original software.
|
|
||||||
|
|
||||||
4. If PCRE is embedded in any software that is released under the GNU
|
|
||||||
General Purpose Licence (GPL), then the terms of that licence shall
|
|
||||||
supersede any condition above with which it is incompatible.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* Include the internals header, which itself includes Standard C headers plus
|
|
||||||
the external pcre header. */
|
|
||||||
|
|
||||||
#include "internal.h"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Set a bit and maybe its alternate case *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Given a character, set its bit in the table, and also the bit for the other
|
|
||||||
version of a letter if we are caseless.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
start_bits points to the bit map
|
|
||||||
c is the character
|
|
||||||
caseless the caseless flag
|
|
||||||
cd the block with char table pointers
|
|
||||||
|
|
||||||
Returns: nothing
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void
|
|
||||||
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
|
|
||||||
{
|
|
||||||
start_bits[c/8] |= (1 << (c&7));
|
|
||||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
|
||||||
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Create bitmap of starting chars *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function scans a compiled unanchored expression and attempts to build a
|
|
||||||
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
|
||||||
goes by, we may be able to get more clever at doing this.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
code points to an expression
|
|
||||||
start_bits points to a 32-byte table, initialized to 0
|
|
||||||
caseless the current state of the caseless flag
|
|
||||||
utf8 TRUE if in UTF-8 mode
|
|
||||||
cd the block with char table pointers
|
|
||||||
|
|
||||||
Returns: TRUE if table built, FALSE otherwise
|
|
||||||
*/
|
|
||||||
|
|
||||||
static BOOL
|
|
||||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
|
||||||
BOOL utf8, compile_data *cd)
|
|
||||||
{
|
|
||||||
register int c;
|
|
||||||
|
|
||||||
/* This next statement and the later reference to dummy are here in order to
|
|
||||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
|
||||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
|
||||||
disable optimization (in this module it actually makes a big difference, and
|
|
||||||
the pcre module can use all the optimization it can get).
|
|
||||||
|
|
||||||
Breaking OS/2 in favor of gcc's paranoia. --lynX 2016
|
|
||||||
*/
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
|
||||||
BOOL try_next = TRUE;
|
|
||||||
|
|
||||||
while (try_next)
|
|
||||||
{
|
|
||||||
/* If a branch starts with a bracket or a positive lookahead assertion,
|
|
||||||
recurse to set bits from within them. That's all for this branch. */
|
|
||||||
|
|
||||||
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
|
||||||
{
|
|
||||||
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
|
||||||
return FALSE;
|
|
||||||
try_next = FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
else switch(*tcode)
|
|
||||||
{
|
|
||||||
default:
|
|
||||||
return FALSE;
|
|
||||||
|
|
||||||
/* Skip over callout */
|
|
||||||
|
|
||||||
case OP_CALLOUT:
|
|
||||||
tcode += 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Skip over extended extraction bracket number */
|
|
||||||
|
|
||||||
case OP_BRANUMBER:
|
|
||||||
tcode += 3;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Skip over lookbehind and negative lookahead assertions */
|
|
||||||
|
|
||||||
case OP_ASSERT_NOT:
|
|
||||||
case OP_ASSERTBACK:
|
|
||||||
case OP_ASSERTBACK_NOT:
|
|
||||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
|
||||||
tcode += 1+LINK_SIZE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Skip over an option setting, changing the caseless flag */
|
|
||||||
|
|
||||||
case OP_OPT:
|
|
||||||
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
|
||||||
tcode += 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* BRAZERO does the bracket, but carries on. */
|
|
||||||
|
|
||||||
case OP_BRAZERO:
|
|
||||||
case OP_BRAMINZERO:
|
|
||||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
|
||||||
return FALSE;
|
|
||||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
|
||||||
tcode += 1+LINK_SIZE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Single-char * or ? sets the bit and tries the next item */
|
|
||||||
|
|
||||||
case OP_STAR:
|
|
||||||
case OP_MINSTAR:
|
|
||||||
case OP_QUERY:
|
|
||||||
case OP_MINQUERY:
|
|
||||||
set_bit(start_bits, tcode[1], caseless, cd);
|
|
||||||
tcode += 2;
|
|
||||||
#ifdef SUPPORT_UTF8
|
|
||||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Single-char upto sets the bit and tries the next */
|
|
||||||
|
|
||||||
case OP_UPTO:
|
|
||||||
case OP_MINUPTO:
|
|
||||||
set_bit(start_bits, tcode[3], caseless, cd);
|
|
||||||
tcode += 4;
|
|
||||||
#ifdef SUPPORT_UTF8
|
|
||||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* At least one single char sets the bit and stops */
|
|
||||||
|
|
||||||
case OP_EXACT: /* Fall through */
|
|
||||||
tcode++;
|
|
||||||
|
|
||||||
case OP_CHARS: /* Fall through */
|
|
||||||
tcode++;
|
|
||||||
|
|
||||||
case OP_PLUS:
|
|
||||||
case OP_MINPLUS:
|
|
||||||
set_bit(start_bits, tcode[1], caseless, cd);
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Single character type sets the bits and stops */
|
|
||||||
|
|
||||||
case OP_NOT_DIGIT:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_DIGIT:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_NOT_WHITESPACE:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_WHITESPACE:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_NOT_WORDCHAR:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_WORDCHAR:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* One or more character type fudges the pointer and restarts, knowing
|
|
||||||
it will hit a single character type and stop there. */
|
|
||||||
|
|
||||||
case OP_TYPEPLUS:
|
|
||||||
case OP_TYPEMINPLUS:
|
|
||||||
tcode++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_TYPEEXACT:
|
|
||||||
tcode += 3;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Zero or more repeats of character types set the bits and then
|
|
||||||
try again. */
|
|
||||||
|
|
||||||
case OP_TYPEUPTO:
|
|
||||||
case OP_TYPEMINUPTO:
|
|
||||||
tcode += 2; /* Fall through */
|
|
||||||
|
|
||||||
case OP_TYPESTAR:
|
|
||||||
case OP_TYPEMINSTAR:
|
|
||||||
case OP_TYPEQUERY:
|
|
||||||
case OP_TYPEMINQUERY:
|
|
||||||
switch(tcode[1])
|
|
||||||
{
|
|
||||||
case OP_ANY:
|
|
||||||
return FALSE;
|
|
||||||
|
|
||||||
case OP_NOT_DIGIT:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_DIGIT:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_NOT_WHITESPACE:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_WHITESPACE:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_NOT_WORDCHAR:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_WORDCHAR:
|
|
||||||
for (c = 0; c < 32; c++)
|
|
||||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
tcode += 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Character class where all the information is in a bit map: set the
|
|
||||||
bits and either carry on or not, according to the repeat count. If it was
|
|
||||||
a negative class, and we are operating with UTF-8 characters, any byte
|
|
||||||
with a value >= 0xc4 is a potentially valid starter because it starts a
|
|
||||||
character with a value > 255. */
|
|
||||||
|
|
||||||
case OP_NCLASS:
|
|
||||||
if (utf8)
|
|
||||||
{
|
|
||||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
|
||||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
|
||||||
}
|
|
||||||
/* Fall through */
|
|
||||||
|
|
||||||
case OP_CLASS:
|
|
||||||
{
|
|
||||||
tcode++;
|
|
||||||
|
|
||||||
/* In UTF-8 mode, the bits in a bit map correspond to character
|
|
||||||
values, not to byte values. However, the bit map we are constructing is
|
|
||||||
for byte values. So we have to do a conversion for characters whose
|
|
||||||
value is > 127. In fact, there are only two possible starting bytes for
|
|
||||||
characters in the range 128 - 255. */
|
|
||||||
|
|
||||||
if (utf8)
|
|
||||||
{
|
|
||||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
|
||||||
for (c = 128; c < 256; c++)
|
|
||||||
{
|
|
||||||
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
|
||||||
{
|
|
||||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
|
||||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
|
||||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
|
||||||
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Advance past the bit map, and act on what follows */
|
|
||||||
|
|
||||||
tcode += 32;
|
|
||||||
switch (*tcode)
|
|
||||||
{
|
|
||||||
case OP_CRSTAR:
|
|
||||||
case OP_CRMINSTAR:
|
|
||||||
case OP_CRQUERY:
|
|
||||||
case OP_CRMINQUERY:
|
|
||||||
tcode++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case OP_CRRANGE:
|
|
||||||
case OP_CRMINRANGE:
|
|
||||||
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
|
||||||
else try_next = FALSE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
try_next = FALSE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break; /* End of bitmap class handling */
|
|
||||||
|
|
||||||
} /* End of switch */
|
|
||||||
} /* End of try_next loop */
|
|
||||||
|
|
||||||
code += GET(code, 1); /* Advance to next branch */
|
|
||||||
}
|
|
||||||
while (*code == OP_ALT);
|
|
||||||
return TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Study a compiled expression *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function is handed a compiled expression that it must study to produce
|
|
||||||
information that will speed up the matching. It returns a pcre_extra block
|
|
||||||
which then gets handed back to pcre_exec().
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
re points to the compiled expression
|
|
||||||
options contains option bits
|
|
||||||
errorptr points to where to place error messages;
|
|
||||||
set NULL unless error
|
|
||||||
|
|
||||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
|
||||||
appropriate flag set;
|
|
||||||
NULL on error or if no optimization possible
|
|
||||||
*/
|
|
||||||
|
|
||||||
EXPORT pcre_extra *
|
|
||||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
|
||||||
{
|
|
||||||
uschar start_bits[32];
|
|
||||||
pcre_extra *extra;
|
|
||||||
pcre_study_data *study;
|
|
||||||
const real_pcre *re = (const real_pcre *)external_re;
|
|
||||||
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
|
||||||
(re->name_count * re->name_entry_size);
|
|
||||||
compile_data compile_block;
|
|
||||||
|
|
||||||
*errorptr = NULL;
|
|
||||||
|
|
||||||
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
|
||||||
{
|
|
||||||
*errorptr = "argument is not a compiled regular expression";
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
|
||||||
{
|
|
||||||
*errorptr = "unknown or incorrect option bit(s) set";
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
|
||||||
a multiline pattern that matches only at "line starts", no further processing
|
|
||||||
at present. */
|
|
||||||
|
|
||||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
/* Set the character tables in the block which is passed around */
|
|
||||||
|
|
||||||
compile_block.lcc = re->tables + lcc_offset;
|
|
||||||
compile_block.fcc = re->tables + fcc_offset;
|
|
||||||
compile_block.cbits = re->tables + cbits_offset;
|
|
||||||
compile_block.ctypes = re->tables + ctypes_offset;
|
|
||||||
|
|
||||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
|
||||||
|
|
||||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
|
||||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
|
||||||
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
|
||||||
|
|
||||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
|
||||||
the latter, which is pointed to by the former, which may also get additional
|
|
||||||
data set later by the calling program. At the moment, the size of
|
|
||||||
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
|
||||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
|
||||||
don't have to change that code. */
|
|
||||||
|
|
||||||
extra = (pcre_extra *)(pcre_malloc)
|
|
||||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
|
||||||
|
|
||||||
if (extra == NULL)
|
|
||||||
{
|
|
||||||
*errorptr = "failed to get memory";
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
|
||||||
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
|
||||||
extra->study_data = study;
|
|
||||||
|
|
||||||
study->size = sizeof(pcre_study_data);
|
|
||||||
study->options = PCRE_STUDY_MAPPED;
|
|
||||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
|
||||||
|
|
||||||
return extra;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* End of study.c */
|
|
|
@ -1,35 +0,0 @@
|
||||||
/*------------------------------------------------------------------
|
|
||||||
* Wrapper for the pcre modules.
|
|
||||||
*
|
|
||||||
* Compile the pcre modules into one file.
|
|
||||||
* To make this possible the pcre/internal.h had to be augmented with
|
|
||||||
* protection against multiple inclusion.
|
|
||||||
*------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "driver.h"
|
|
||||||
|
|
||||||
#include "pkg-pcre.h"
|
|
||||||
|
|
||||||
#if defined(USE_BUILTIN_PCRE)
|
|
||||||
|
|
||||||
#include "interpret.h"
|
|
||||||
#include "simulate.h"
|
|
||||||
|
|
||||||
/* Provide a definition for NEWLINE */
|
|
||||||
#define NEWLINE '\n'
|
|
||||||
|
|
||||||
/* DEBUG has a different meaning for pcre than for us */
|
|
||||||
#ifdef DEBUG
|
|
||||||
# undef DEBUG
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* activated UTF8 support --lynX 2008 */
|
|
||||||
#define SUPPORT_UTF8
|
|
||||||
|
|
||||||
#include "pcre/pcre.c"
|
|
||||||
#include "pcre/get.c"
|
|
||||||
#include "pcre/maketables.c"
|
|
||||||
#include "pcre/study.c"
|
|
||||||
|
|
||||||
#endif /* USE_BUILTIN_PCRE */
|
|
|
@ -11,13 +11,8 @@
|
||||||
|
|
||||||
#include "driver.h"
|
#include "driver.h"
|
||||||
|
|
||||||
#if defined(USE_BUILTIN_PCRE) || !defined(HAS_PCRE)
|
#ifdef HAS_PCRE
|
||||||
# include "pcre/pcre.h"
|
#include <pcre.h>
|
||||||
# if !defined(USE_BUILTIN_PCRE)
|
|
||||||
# define USE_BUILTIN_PCRE
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
# include <pcre.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Error code to be returned if too many backtracks are detected.
|
/* Error code to be returned if too many backtracks are detected.
|
||||||
|
@ -25,7 +20,7 @@
|
||||||
#ifdef PCRE_ERROR_RECURSIONLIMIT
|
#ifdef PCRE_ERROR_RECURSIONLIMIT
|
||||||
#define RE_ERROR_BACKTRACK PCRE_ERROR_RECURSIONLIMIT
|
#define RE_ERROR_BACKTRACK PCRE_ERROR_RECURSIONLIMIT
|
||||||
#else
|
#else
|
||||||
#define RE_ERROR_BACKTRACK PCRE_ERROR_MATCHLIMIT
|
#define RE_ERROR_BACKTRACK (-8) // PCRE_ERROR_MATCHLIMIT from PCRE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* PKG_PCRE_H_ */
|
#endif /* PKG_PCRE_H_ */
|
||||||
|
|
|
@ -293,18 +293,7 @@ enable_lpc_array_calls=yes
|
||||||
enable_use_deprecated=no
|
enable_use_deprecated=no
|
||||||
|
|
||||||
# Enable PCRE instead of traditional regexps
|
# Enable PCRE instead of traditional regexps
|
||||||
# 'no': use traditional regexps by default
|
enable_use_pcre=yes
|
||||||
# 'no-builtin': use traditional regexps by default, or the builtin PCRE
|
|
||||||
# package if PCRE is requested
|
|
||||||
# 'builtin': use PCRE package by default, using the builtin package
|
|
||||||
# 'yes': use the system's PCRE package if available, otherwise the
|
|
||||||
# builtin package
|
|
||||||
#
|
|
||||||
# yes is nicer to your system, but currently many linuces are distributed
|
|
||||||
# with old non-utf8 pcre variants, so let's use our own copy by default
|
|
||||||
# until this is settled. if you think your pcre installation is smarter,
|
|
||||||
# say "yes" here instead of "builtin"
|
|
||||||
enable_use_pcre=builtin
|
|
||||||
|
|
||||||
#-- COMPILATION
|
#-- COMPILATION
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ version_longtype="stable"
|
||||||
# A timestamp, to be used by bumpversion and other scripts.
|
# A timestamp, to be used by bumpversion and other scripts.
|
||||||
# It can be used, for example, to 'touch' this file on every build, thus
|
# It can be used, for example, to 'touch' this file on every build, thus
|
||||||
# forcing revision control systems to add it on every checkin automatically.
|
# forcing revision control systems to add it on every checkin automatically.
|
||||||
version_stamp="Sun Aug 14 19:57:43 CEST 2016"
|
version_stamp="Thu Sep 29 11:49:32 CEST 2016"
|
||||||
|
|
||||||
# Okay, LDMUD is using 3.x.x so to avoid conflicts let's just use 4.x.x
|
# Okay, LDMUD is using 3.x.x so to avoid conflicts let's just use 4.x.x
|
||||||
version_major=4
|
version_major=4
|
||||||
|
|
Loading…
Reference in a new issue