From f5b5e292795b65df1847b90ab5b23889828ee28b Mon Sep 17 00:00:00 2001 From: Philip Paeps Date: Wed, 1 Apr 2026 16:49:18 +0800 Subject: [PATCH] Vendor import of expat 2.7.5 --- Changes | 159 +- Makefile.am | 3 +- Makefile.in | 6 +- README.md | 13 +- configure.ac | 66 +- doc/Makefile.in | 3 + doc/reference.html | 4677 +++++++++++++++++++++-------------- doc/xmlwf.1 | 15 +- doc/xmlwf.xml | 488 ++-- examples/Makefile.in | 3 + expat_config.h.in | 3 - fix-xmltest-log.sh | 5 +- lib/Makefile.am | 6 +- lib/Makefile.in | 21 +- lib/expat.h | 4 +- lib/expat_external.h | 5 +- lib/internal.h | 2 +- lib/libexpat.map.in | 119 + lib/xmlparse.c | 173 +- lib/xmlrole.c | 4 +- lib/xmltok.c | 4 +- lib/xmltok_ns.c | 7 +- tests/Makefile.in | 3 + tests/basic_tests.c | 74 +- tests/benchmark/Makefile.in | 3 + tests/handlers.c | 12 +- tests/handlers.h | 5 +- tests/misc_tests.c | 35 +- tests/nsalloc_tests.c | 27 + xmlwf/Makefile.in | 3 + xmlwf/xmlfile.c | 4 +- xmlwf/xmlwf.c | 13 +- xmlwf/xmlwf_helpgen.py | 186 +- 33 files changed, 3817 insertions(+), 2334 deletions(-) create mode 100644 lib/libexpat.map.in diff --git a/Changes b/Changes index 01e54b67641..2b3704a69b7 100644 --- a/Changes +++ b/Changes @@ -10,37 +10,160 @@ !! ~~~~~~~~~~~~ !! !! The following topics need *additional skilled C developers* to progress !! !! in a timely manner or at all (loosely ordered by descending priority): !! +!! _______________________ !! +!! - teaming up on fixing the UNFIXED SECURITY ISSUES listed at: !! +!! """"""""""""""""""""""" !! +!! https://github.com/libexpat/libexpat/issues/1160 !! !! !! !! - teaming up on researching and fixing future security reports and !! !! ClusterFuzz findings with few-days-max response times in communication !! !! in order to (1) have a sound fix ready before the end of a 90 days !! !! grace period and (2) in a sustainable manner, !! -!! - helping CPython Expat bindings with supporting Expat's amplification !! -!! attack protection API (https://github.com/python/cpython/issues/90949): !! -!! - XML_SetAllocTrackerActivationThreshold !! -!! - XML_SetAllocTrackerMaximumAmplification !! -!! - XML_SetBillionLaughsAttackProtectionActivationThreshold !! -!! - XML_SetBillionLaughsAttackProtectionMaximumAmplification !! -!! - helping Perl's XML::Parser Expat bindings with supporting Expat's !! -!! security API (https://github.com/cpan-authors/XML-Parser/issues/102): !! -!! - XML_SetAllocTrackerActivationThreshold !! -!! - XML_SetAllocTrackerMaximumAmplification !! -!! - XML_SetBillionLaughsAttackProtectionActivationThreshold !! -!! - XML_SetBillionLaughsAttackProtectionMaximumAmplification !! -!! - XML_SetReparseDeferralEnabled !! +!! !! !! - implementing and auto-testing XML 1.0r5 support !! !! (needs discussion before pull requests), !! -!! - smart ideas on fixing the Autotools CMake files generation issue !! -!! without breaking CI (needs discussion before pull requests), !! -!! - pushing migration from `int` to `size_t` further !! -!! including edge-cases test coverage (needs discussion before anything). !! !! !! !! For details, please reach out via e-mail to sebastian@pipping.org so we !! !! can schedule a voice call on the topic, in English or German. !! !! !! -!! THANK YOU! Sebastian Pipping -- Berlin, 2024-03-09 !! +!! THANK YOU! Sebastian Pipping -- Berlin, 2026-03-17 !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +Release 2.7.5 Tue March 17 2026 + Security fixes: + #1158 CVE-2026-32776 -- Fix NULL function pointer dereference for + empty external parameter entities; it takes use of both + functions XML_ExternalEntityParserCreate and + XML_SetParamEntityParsing for an application to be + vulnerable. + #1161 #1162 CVE-2026-32777 -- Protect from XML_TOK_INSTANCE_START + infinite loop in function entityValueProcessor; it takes + use of both functions XML_ExternalEntityParserCreate and + XML_SetParamEntityParsing for an application to be + vulnerable. + #1163 CVE-2026-32778 -- Fix NULL dereference in function setContext + on retry after an earlier ouf-of-memory condition; it takes + use of function XML_ParserCreateNS or XML_ParserCreate_MM + for an application to be vulnerable. + #1160 Three more unfixed vulnerabilities left + + Other changes: + #1146 #1147 Autotools: Fix condition for symbol versioning check, in + particular when compiling with slibtool (not libtool) + #1156 Address Cppcheck >=2.20.0 warnings + #1153 tests: Make test_buffer_can_grow_to_max work for MinGW on + Ubuntu 24.04 + #1157 #1159 Version info bumped from 12:2:11 (libexpat*.so.1.11.2) + to 12:3:11 (libexpat*.so.1.11.3); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #1148 CI: Fix FreeBSD and Solaris CI + #1149 CI: Bump to WASI SDK 30 + #1153 CI: Adapt to breaking changes with Ubuntu 22.04 + #1156 CI: Adapt to breaking changes in Cppcheck + + Special thanks to: + Berkay Eren Ürün + Christian Ng + Fabio Scaccabarozzi + Francesco Bertolaccini + Mark Brand + Rhodri James + and + AddressSanitizer + Buttercup + OSS-Fuzz / ClusterFuzz + Trail of Bits + +Release 2.7.4 Sat January 31 2026 + Security fixes: + #1131 CVE-2026-24515 -- Function XML_ExternalEntityParserCreate + failed to copy the encoding handler data passed to + XML_SetUnknownEncodingHandler from the parent to the new + subparser. This can cause a NULL dereference (CWE-476) from + external entities that declare use of an unknown encoding. + The expected impact is denial of service. It takes use of + both functions XML_ExternalEntityParserCreate and + XML_SetUnknownEncodingHandler for an application to be + vulnerable. + #1075 CVE-2026-25210 -- Add missing check for integer overflow + related to buffer size determination in function doContent + + Bug fixes: + #1073 lib: Fix missing undoing of group size expansion in doProlog + failure cases + #1107 xmlwf: Fix a memory leak + #1104 WASI: Fix format specifiers for 32bit WASI SDK + + Other changes: + #1105 lib: Fix strict aliasing + #1106 lib: Leverage feature "flexible array member" of C99 + #1051 lib: Swap (size_t)(-1) for C99 equivalent SIZE_MAX + #1109 lib|xmlwf: Return NULL instead of 0 for pointers + #1068 lib|Windows: Clean up use of macro _MSC_EXTENSIONS with MSVC + #1112 lib: Remove unused import + #1110 xmlwf: Warn about XXE in --help output (and man page) + #1102 #1103 WASI: Stop using getpid + #1113 #1130 Autotools: Drop file expat.m4 that provided obsolete Autoconf + macro AM_WITH_EXPAT + #1123 Autotools: Limit -Wno-pedantic-ms-format to MinGW + #1129 #1134 .. + #1087 Autotools|macOS: Sync CMake templates with CMake 4.0 + #1139 #1140 Autotools|CMake: Introduce off-by-default symbol versioning + The related build system flags are: + - For Autotools, configure with --enable-symbol-versioning + - For CMake, configure with -DEXPAT_SYMBOL_VERSIONING=ON + Please double-check for consequences before activating + this inside distro packaging. Bug reports welcome! + #1117 Autotools|CMake: Remove libbsd support + #1105 Autotools|CMake: Stop using -fno-strict-aliasing, and use + -Wstrict-aliasing=3 instead + #1124 Autotools|CMake: Prefer command gsed (GNU sed) over sed + (e.g. for Solaris) inside fix-xmltest-log.sh + #1067 CMake: Detect and warn about unusable check_c_compiler_flag + #1137 CMake: Drop support for CMake <3.17 + #1138 CMake|Windows: Fix libexpat.def.cmake version comments + + #1086 #1110 docs: Add warning about external reference handlers and XXE + #1066 docs: Be explicit that parent parsers need to outlive + subparsers + #1089 .. + #1090 #1091 .. + #1092 #1093 .. + #1094 #1098 .. + #1115 #1116 docs: Misc non-content improvements to doc/reference.html + #1132 #1133 Version info bumped from 12:1:11 (libexpat*.so.1.11.1) + to 12:2:11 (libexpat*.so.1.11.2); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #1119 #1121 Document guidelines for contributing to Expat + #1120 Introduce a pull request template + #1074 CI: Stop using about-to-be-removed image "macos-13" + #1083 #1088 CI: Mitigate random Wine crashes + #1104 CI: Cover compilation with WASI SDK + #1116 CI: Enforce clean doc XML formatting + #1124 .. + #1135 #1136 CI: Cover Solaris 11.4 + #1125 CI: Extend CI coverage of FreeBSD + #1139 #1140 CI: Cover symbol versioning + #1114 xmlwf: Reformat helpgen code (using Black 25.12.0) + #1071 .gitignore: Add files CPackConfig.cmake and + CPackSourceConfig.cmake + + Special thanks to: + Alfonso Gregory + Bénédikt Tran + Gordon Messmer + Hanno Böck + Jakub Kulík + Matthew Fernandez + Neil Pang + Rosen Penev + and + Artiphishell Inc. + Release 2.7.3 Wed September 24 2025 Security fixes: #1046 #1048 Fix alignment of internal allocations for some non-amd64 diff --git a/Makefile.am b/Makefile.am index d612d432bec..72f2fca59d6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2025 Sebastian Pipping +# Copyright (c) 2017-2026 Sebastian Pipping # Copyright (c) 2018 KangLin # Copyright (c) 2022 Johnny Jazeix # Copyright (c) 2023 Sony Corporation / Snild Dolkow @@ -94,7 +94,6 @@ EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ - conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_lpm_fuzzer.cpp \ diff --git a/Makefile.in b/Makefile.in index b799591f2fc..aa41b152525 100644 --- a/Makefile.in +++ b/Makefile.in @@ -22,7 +22,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2025 Sebastian Pipping +# Copyright (c) 2017-2026 Sebastian Pipping # Copyright (c) 2018 KangLin # Copyright (c) 2022 Johnny Jazeix # Copyright (c) 2023 Sony Corporation / Snild Dolkow @@ -395,6 +395,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -497,7 +500,6 @@ EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ - conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_lpm_fuzzer.cpp \ diff --git a/README.md b/README.md index c2f288ca124..a67548be7fc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ > at the top of the `Changes` file. -# Expat, Release 2.7.3 +# Expat, Release 2.7.5 This is Expat, a C99 library for parsing [XML 1.0 Fourth Edition](https://www.w3.org/TR/2006/REC-xml-20060816/), started by @@ -234,11 +234,6 @@ overrides the in-makefile set `DESTDIR`, because variable-setting priority is Note: This only applies to the Expat library itself, building UTF-16 versions of xmlwf and the tests is currently not supported. -When using Expat with a project using autoconf for configuration, you -can use the probing macro in `conftools/expat.m4` to determine how to -include Expat. See the comments at the top of that file for more -information. - A reference manual is available in the file `doc/reference.html` in this distribution. @@ -297,15 +292,15 @@ EXPAT_OSSFUZZ_BUILD:BOOL=OFF // Build a shared expat library EXPAT_SHARED_LIBS:BOOL=ON +// Define to provide symbol versioning for dependency generation +EXPAT_SYMBOL_VERSIONING:BOOL=OFF + // Treat all compiler warnings as errors EXPAT_WARNINGS_AS_ERRORS:BOOL=OFF // Make use of getrandom function (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_GETRANDOM:STRING=AUTO -// Utilize libbsd (for arc4random_buf) -EXPAT_WITH_LIBBSD:BOOL=OFF - // Make use of syscall SYS_getrandom (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_SYS_GETRANDOM:STRING=AUTO ``` diff --git a/configure.ac b/configure.ac index 072fea41ee8..6d028b5f665 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ dnl Copyright (c) 2000 Clark Cooper dnl Copyright (c) 2000-2005 Fred L. Drake, Jr. dnl Copyright (c) 2001-2003 Greg Stein dnl Copyright (c) 2006-2012 Karl Waclawek -dnl Copyright (c) 2016-2025 Sebastian Pipping +dnl Copyright (c) 2016-2026 Sebastian Pipping dnl Copyright (c) 2017 S. P. Zeidler dnl Copyright (c) 2017 Stephen Groat dnl Copyright (c) 2017-2020 Joe Orton @@ -25,6 +25,10 @@ dnl Copyright (c) 2020 Jeffrey Walton dnl Copyright (c) 2024 Ferenc Géczi dnl Copyright (c) 2024 Dag-Erling Smørgrav dnl Copyright (c) 2025 Matthew Fernandez +dnl Copyright (c) 2025 Alfonso Gregory +dnl Copyright (c) 2026 Rosen Penev +dnl Copyright (c) 2026 Gordon Messmer +dnl Copyright (c) 2026 Fabio Scaccabarozzi dnl Licensed under the MIT license: dnl dnl Permission is hereby granted, free of charge, to any person obtaining @@ -86,7 +90,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=12 # sync -LIBREVISION=1 # with +LIBREVISION=3 # with LIBAGE=11 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) @@ -117,10 +121,12 @@ AS_IF([test "$GCC" = yes], dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CFLAGS]) - AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) + AX_APPEND_COMPILE_FLAGS([-Wstrict-aliasing=3 -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS]) - AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])]) + AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wmisleading-indentation], [AM_CFLAGS]) + AS_CASE(["${host_os}"], [mingw*], [AX_APPEND_COMPILE_FLAGS([-Wno-pedantic-ms-format], [AM_CFLAGS])]) + ]) AC_LANG_PUSH([C++]) AC_PROG_CXX @@ -131,11 +137,23 @@ AS_IF([test "$GCC" = yes], dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CXXFLAGS]) - AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing], [AM_CXXFLAGS])]) + AX_APPEND_COMPILE_FLAGS([-Wstrict-aliasing=3], [AM_CXXFLAGS])]) AC_LANG_POP([C++]) AS_IF([test "$GCC" = yes], - [AX_APPEND_LINK_FLAGS([-fno-strict-aliasing],[AM_LDFLAGS])]) + [AX_APPEND_LINK_FLAGS([-Wstrict-aliasing=3],[AM_LDFLAGS])]) + +AC_ARG_ENABLE([symbol-versioning], + [AS_HELP_STRING([--enable-symbol-versioning], + [provide symbol versioning for dependency generation @<:@default=no@:>@])], + [enable_symbol_versioning=$enableval], + [enable_symbol_versioning=no]) +AS_IF([test "x$enable_symbol_versioning" != xno], + [VSCRIPT_LDFLAGS="-Wl,--version-script" + AC_SUBST([VSCRIPT_LDFLAGS]) + ]) +AM_CONDITIONAL([HAVE_VSCRIPT], + [test "x$enable_symbol_versioning" != xno]) dnl patching ${archive_cmds} to affect generation of file "libtool" to fix linking with clang (issue #312) AS_CASE(["$LD"],[*clang*], @@ -199,23 +217,9 @@ AM_CONDITIONAL([_INTERNAL_LARGE_SIZE], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP LT_LIB_M -AC_ARG_WITH([libbsd], - [AS_HELP_STRING([--with-libbsd], [utilize libbsd (for arc4random_buf)])], - [], - [with_libbsd=no]) -AS_IF([test "x${with_libbsd}" != xno], - [AC_CHECK_LIB([bsd], - [arc4random_buf], - [], - [AS_IF([test "x${with_libbsd}" = xyes], - [AC_MSG_ERROR([Enforced use of libbsd cannot be satisfied.])])])]) -AC_MSG_CHECKING([for arc4random_buf (BSD, libbsd or glibc 2.36+)]) +AC_MSG_CHECKING([for arc4random_buf (BSD or glibc 2.36+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ - #if defined(HAVE_LIBBSD) - # include - #else - # include /* for arc4random_buf on BSD */ - #endif + #include int main(void) { char dummy[[123]]; // double brackets for m4 arc4random_buf(dummy, 0U); @@ -226,13 +230,9 @@ AC_LINK_IFELSE([AC_LANG_SOURCE([ AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) - AC_MSG_CHECKING([for arc4random (BSD, macOS, libbsd or glibc 2.36+)]) + AC_MSG_CHECKING([for arc4random (BSD, macOS, or glibc 2.36+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ - #if defined(HAVE_LIBBSD) - # include - #else - # include - #endif + #include int main(void) { arc4random(); return 0; @@ -381,9 +381,14 @@ dnl NOTE: The *_TRUE variables read here are Automake conditionals dnl that are either set to "" when enabled or to "#" when disabled dnl (because they are used to dynamically comment out certain things) AS_IF([test "x${enable_xml_attr_info}" = xyes], - [EXPAT_ATTR_INFO=ON], - [EXPAT_ATTR_INFO=OFF]) + [EXPAT_ATTR_INFO=ON + _EXPAT_COMMENT_ATTR_INFO=" "], + [EXPAT_ATTR_INFO=OFF + _EXPAT_COMMENT_ATTR_INFO="#"]) +AC_SUBST([_EXPAT_COMMENT_ATTR_INFO]) EXPAT_DTD=ON +_EXPAT_COMMENT_DTD_OR_GE=" " +AC_SUBST([_EXPAT_COMMENT_DTD_OR_GE]) AS_IF([test "x${_INTERNAL_LARGE_SIZE_TRUE}" = x], [EXPAT_LARGE_SIZE=ON], [EXPAT_LARGE_SIZE=OFF]) @@ -461,6 +466,7 @@ AC_CONFIG_FILES([Makefile] [doc/Makefile] [examples/Makefile] [lib/Makefile] + [lib/libexpat.map] [tests/Makefile] [tests/benchmark/Makefile] [xmlwf/Makefile]) diff --git a/doc/Makefile.in b/doc/Makefile.in index 13be5107f89..0bda758420f 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -293,6 +293,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/doc/reference.html b/doc/reference.html index d2dded49943..5faa8d6515a 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -1,9 +1,9 @@ - + - - - - Expat XML Parser - - - - - - -
-

- The Expat XML Parser - Release 2.7.3 -

-
-
-

Expat is a library, written in C, for parsing XML documents. It's -the underlying XML parser for the open source Mozilla project, Perl's -XML::Parser, Python's xml.parsers.expat, and -other open-source XML parsers.

+ + Expat XML Parser + + + + + + +
+

+ The Expat XML Parser Release 2.7.5 +

+
-

This library is the creation of James Clark, who's also given us -groff (an nroff look-alike), Jade (an implementation of ISO's DSSSL -stylesheet language for SGML), XP (a Java XML parser package), XT (a -Java XSL engine). James was also the technical lead on the XML -Working Group at W3C that produced the XML specification.

+
+

+ Expat is a library, written in C, for parsing XML documents. It's the underlying + XML parser for the open source Mozilla project, Perl's XML::Parser, + Python's xml.parsers.expat, and other open-source XML parsers. +

-

This is free software, licensed under the MIT/X Consortium license. You may download it -from the Expat home page. -

+

+ This library is the creation of James Clark, who's also given us groff (an nroff + look-alike), Jade (an implementation of ISO's DSSSL stylesheet language for + SGML), XP (a Java XML parser package), XT (a Java XSL engine). James was also the + technical lead on the XML Working Group at W3C that produced the XML + specification. +

-

The bulk of this document was originally commissioned as an article -by XML.com. They graciously allowed -Clark Cooper to retain copyright and to distribute it with Expat. -This version has been substantially extended to include documentation -on features which have been added since the original article was -published, and additional information on using the original -interface.

+

+ This is free software, licensed under the MIT/X Consortium + license. You may download it from the + Expat home page. +

+ +

+ The bulk of this document was originally commissioned as an article by XML.com. They graciously allowed Clark Cooper to + retain copyright and to distribute it with Expat. This version has been + substantially extended to include documentation on features which have been added + since the original article was published, and additional information on using the + original interface. +

+ +
+ +

+ Table of Contents +

-
-

Table of Contents

- -
-

Overview

+
-

Expat is a stream-oriented parser. You register callback (or -handler) functions with the parser and then start feeding it the -document. As the parser recognizes parts of the document, it will -call the appropriate handler for that part (if you've registered one.) -The document is fed to the parser in pieces, so you can start parsing -before you have all the document. This also allows you to parse really -huge documents that won't fit into memory.

+

+ Overview +

-

Expat can be intimidating due to the many kinds of handlers and -options you can set. But you only need to learn four functions in -order to do 90% of what you'll want to do with it:

+

+ Expat is a stream-oriented parser. You register callback (or handler) functions + with the parser and then start feeding it the document. As the parser recognizes + parts of the document, it will call the appropriate handler for that part (if + you've registered one.) The document is fed to the parser in pieces, so you can + start parsing before you have all the document. This also allows you to parse + really huge documents that won't fit into memory. +

-
+

+ Expat can be intimidating due to the many kinds of handlers and options you can + set. But you only need to learn four functions in order to do 90% of what you'll + want to do with it: +

-
XML_ParserCreate
-
Create a new parser object.
+
+
+ XML_ParserCreate +
-
XML_SetElementHandler
-
Set handlers for start and end tags.
+
+ Create a new parser object. +
-
XML_SetCharacterDataHandler
-
Set handler for text.
+
+ XML_SetElementHandler +
-
XML_Parse
-
Pass a buffer full of document to the parser
-
+
+ Set handlers for start and end tags. +
-

These functions and others are described in the reference part of this document. The reference -section also describes in detail the parameters passed to the -different types of handlers.

+
+ XML_SetCharacterDataHandler +
-

Let's look at a very simple example program that only uses 3 of the -above functions (it doesn't need to set a character handler.) The -program outline.c prints an -element outline, indenting child elements to distinguish them from the -parent element that contains them. The start handler does all the -work. It prints two indenting spaces for every level of ancestor -elements, then it prints the element and attribute -information. Finally it increments the global Depth -variable.

+
+ Set handler for text. +
-
+        
+ XML_Parse +
+ +
+ Pass a buffer full of document to the parser +
+
+ +

+ These functions and others are described in the reference part of this document. The reference section also + describes in detail the parameters passed to the different types of handlers. +

+ +

+ Let's look at a very simple example program that only uses 3 of the above + functions (it doesn't need to set a character handler.) The program outline.c prints an element outline, indenting child + elements to distinguish them from the parent element that contains them. The + start handler does all the work. It prints two indenting spaces for every level + of ancestor elements, then it prints the element and attribute information. + Finally it increments the global Depth variable. +

+ +
 int Depth;
 
 void XMLCALL
@@ -260,39 +544,41 @@ start(void *data, const char *el, const char **attr) {
   Depth++;
 }  /* End of start handler */
 
+

+ The end tag simply does the bookkeeping work of decrementing Depth. +

-

The end tag simply does the bookkeeping work of decrementing -Depth.

-
+      
 void XMLCALL
 end(void *data, const char *el) {
   Depth--;
 }  /* End of end handler */
 
+

+ Note the XMLCALL annotation used for the callbacks. This is used to + ensure that the Expat and the callbacks are using the same calling convention in + case the compiler options used for Expat itself and the client code are + different. Expat tries not to care what the default calling convention is, though + it may require that it be compiled with a default convention of "cdecl" on some + platforms. For code which uses Expat, however, the calling convention is + specified by the XMLCALL annotation on most platforms; callbacks + should be defined using this annotation. +

-

Note the XMLCALL annotation used for the callbacks. -This is used to ensure that the Expat and the callbacks are using the -same calling convention in case the compiler options used for Expat -itself and the client code are different. Expat tries not to care -what the default calling convention is, though it may require that it -be compiled with a default convention of "cdecl" on some platforms. -For code which uses Expat, however, the calling convention is -specified by the XMLCALL annotation on most platforms; -callbacks should be defined using this annotation.

+

+ The XMLCALL annotation was added in Expat 1.95.7, but existing + working Expat applications don't need to add it (since they are already using the + "cdecl" calling convention, or they wouldn't be working). The annotation is only + needed if the default calling convention may be something other than "cdecl". To + use the annotation safely with older versions of Expat, you can conditionally + define it after including Expat's header file: +

-

The XMLCALL annotation was added in Expat 1.95.7, but -existing working Expat applications don't need to add it (since they -are already using the "cdecl" calling convention, or they wouldn't be -working). The annotation is only needed if the default calling -convention may be something other than "cdecl". To use the annotation -safely with older versions of Expat, you can conditionally define it -after including Expat's header file:

- -
+      
 #include <expat.h>
 
 #ifndef XMLCALL
-#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
+#if defined(_MSC_VER) && !defined(__BEOS__) && !defined(__CYGWIN__)
 #define XMLCALL __cdecl
 #elif defined(__GNUC__)
 #define XMLCALL __attribute__((cdecl))
@@ -301,186 +587,256 @@ safely with older versions of Expat, you can conditionally define it
 #endif
 #endif
 
+

+ After creating the parser, the main program just has the job of shoveling the + document to the parser so that it can do its work. +

-

After creating the parser, the main program just has the job of -shoveling the document to the parser so that it can do its work.

+
-
-

Building and Installing Expat

+

+ Building and Installing Expat +

-

The Expat distribution comes as a compressed (with GNU gzip) tar -file. You may download the latest version from Source Forge. After -unpacking this, cd into the directory. Then follow either the Win32 -directions or Unix directions below.

+

+ The Expat distribution comes as a compressed (with GNU gzip) tar file. You may + download the latest version from Source Forge. After unpacking this, + cd into the directory. Then follow either the Win32 directions or Unix directions + below. +

-

Building under Win32

+

+ Building under Win32 +

-

If you're using the GNU compiler under cygwin, follow the Unix -directions in the next section. Otherwise if you have Microsoft's -Developer Studio installed, -you can use CMake to generate a .sln file, e.g. - -cmake -G"Visual Studio 17 2022" -DCMAKE_BUILD_TYPE=RelWithDebInfo . -, and build Expat using msbuild /m expat.sln after.

+

+ If you're using the GNU compiler under cygwin, follow the Unix directions in the + next section. Otherwise if you have Microsoft's Developer Studio installed, you + can use CMake to generate a .sln file, e.g. cmake -G"Visual + Studio 17 2022" -DCMAKE_BUILD_TYPE=RelWithDebInfo . , and build Expat + using msbuild /m expat.sln after. +

-

Alternatively, you may download the Win32 binary package that -contains the "expat.h" include file and a pre-built DLL.

+

+ Alternatively, you may download the Win32 binary package that contains the + "expat.h" include file and a pre-built DLL. +

-

Building under Unix (or GNU)

+

+ Building under Unix (or GNU) +

-

First you'll need to run the configure shell script in order to -configure the Makefiles and headers for your system.

+

+ First you'll need to run the configure shell script in order to configure the + Makefiles and headers for your system. +

-

If you're happy with all the defaults that configure picks for you, -and you have permission on your system to install into /usr/local, you -can install Expat with this sequence of commands:

+

+ If you're happy with all the defaults that configure picks for you, and you have + permission on your system to install into /usr/local, you can install Expat with + this sequence of commands: +

-
+      
 ./configure
 make
 make install
 
+

+ There are some options that you can provide to this script, but the only one + we'll mention here is the --prefix option. You can find out all the + options available by running configure with just the --help option. +

-

There are some options that you can provide to this script, but the -only one we'll mention here is the --prefix option. You -can find out all the options available by running configure with just -the --help option.

+

+ By default, the configure script sets things up so that the library gets + installed in /usr/local/lib and the associated header file in + /usr/local/include. But if you were to give the option, + --prefix=/home/me/mystuff, then the library and header would get + installed in /home/me/mystuff/lib and + /home/me/mystuff/include respectively. +

-

By default, the configure script sets things up so that the library -gets installed in /usr/local/lib and the associated -header file in /usr/local/include. But if you were to -give the option, --prefix=/home/me/mystuff, then the -library and header would get installed in -/home/me/mystuff/lib and -/home/me/mystuff/include respectively.

+

+ Configuring Expat Using the Pre-Processor +

-

Configuring Expat Using the Pre-Processor

+

+ Expat's feature set can be configured using a small number of pre-processor + definitions. The symbols are: +

-

Expat's feature set can be configured using a small number of -pre-processor definitions. The symbols are:

+
+
+ XML_GE +
-
-
XML_GE
-
-Added in Expat 2.6.0. -Include support for -general entities -(syntax &e1; to reference and -syntax <!ENTITY e1 'value1'> (an internal general entity) or -<!ENTITY e2 SYSTEM 'file2'> (an external general entity) to declare). -With XML_GE enabled, general entities will be replaced by their declared replacement text; -for this to work for external general entities, in addition an -XML_ExternalEntityRefHandler must be set using -XML_SetExternalEntityRefHandler. -Also, enabling XML_GE makes -the functions -XML_SetBillionLaughsAttackProtectionMaximumAmplification and - -XML_SetBillionLaughsAttackProtectionActivationThreshold available. -
-With XML_GE disabled, Expat has a smaller memory footprint and can be faster, but will -not load external general entities and will replace all general entities -(except the predefined five: -amp, apos, gt, lt, quot) -with a self-reference: -for example, referencing an entity e1 via &e1; will be replaced -by text &e1;. -
+
+ Added in Expat 2.6.0. Include support for general + entities (syntax &e1; to reference and syntax + <!ENTITY e1 'value1'> (an internal general entity) or + <!ENTITY e2 SYSTEM 'file2'> (an external general entity) to + declare). With XML_GE enabled, general entities will be replaced + by their declared replacement text; for this to work for external + general entities, in addition an XML_ExternalEntityRefHandler must + be set using XML_SetExternalEntityRefHandler. + Also, enabling XML_GE makes the functions XML_SetBillionLaughsAttackProtectionMaximumAmplification + and XML_SetBillionLaughsAttackProtectionActivationThreshold + available.
+ With XML_GE disabled, Expat has a smaller memory footprint and can + be faster, but will not load external general entities and will replace all + general entities (except the predefined + five: amp, apos, gt, + lt, quot) with a self-reference: for example, + referencing an entity e1 via &e1; will be + replaced by text &e1;. +
-
XML_DTD
-
Include support for using and reporting DTD-based content. If -this is defined, default attribute values from an external DTD subset -are reported and attribute value normalization occurs based on the -type of attributes defined in the external subset. Without -this, Expat has a smaller memory footprint and can be faster, but will -not load external parameter entities or process conditional sections. If defined, makes -the functions -XML_SetBillionLaughsAttackProtectionMaximumAmplification and - -XML_SetBillionLaughsAttackProtectionActivationThreshold available.
+
+ XML_DTD +
-
XML_NS
-
When defined, support for the Namespaces in XML -specification is included.
+
+ Include support for using and reporting DTD-based content. If this is defined, + default attribute values from an external DTD subset are reported and attribute + value normalization occurs based on the type of attributes defined in the + external subset. Without this, Expat has a smaller memory footprint and can be + faster, but will not load external parameter entities or process conditional + sections. If defined, makes the functions XML_SetBillionLaughsAttackProtectionMaximumAmplification + and XML_SetBillionLaughsAttackProtectionActivationThreshold + available. +
-
XML_UNICODE
-
When defined, character data reported to the application is -encoded in UTF-16 using wide characters of the type -XML_Char. This is implied if -XML_UNICODE_WCHAR_T is defined.
+
+ XML_NS +
-
XML_UNICODE_WCHAR_T
-
If defined, causes the XML_Char character type to be -defined using the wchar_t type; otherwise, unsigned -short is used. Defining this implies -XML_UNICODE.
+
+ When defined, support for the Namespaces in XML + specification is included. +
-
XML_LARGE_SIZE
-
If defined, causes the XML_Size and XML_Index -integer types to be at least 64 bits in size. This is intended to support -processing of very large input streams, where the return values of -XML_GetCurrentByteIndex, -XML_GetCurrentLineNumber and -XML_GetCurrentColumnNumber -could overflow. It may not be supported by all compilers, and is turned -off by default.
+
+ XML_UNICODE +
-
XML_CONTEXT_BYTES
-
The number of input bytes of markup context which the parser will -ensure are available for reporting via XML_GetInputContext. This is -normally set to 1024, and must be set to a positive integer to enable. -If this is set to zero, the input context will not be available and XML_GetInputContext will -always report NULL. Without this, Expat has a smaller memory -footprint and can be faster.
+
+ When defined, character data reported to the application is encoded in UTF-16 + using wide characters of the type XML_Char. This is implied if + XML_UNICODE_WCHAR_T is defined. +
-
XML_STATIC
-
On Windows, this should be set if Expat is going to be linked -statically with the code that calls it; this is required to get all -the right MSVC magic annotations correct. This is ignored on other -platforms.
+
+ XML_UNICODE_WCHAR_T +
-
XML_ATTR_INFO
-
If defined, makes the additional function XML_GetAttributeInfo available -for reporting attribute byte offsets.
-
+
+ If defined, causes the XML_Char character type to be defined using + the wchar_t type; otherwise, unsigned short is used. + Defining this implies XML_UNICODE. +
-
-

Using Expat

+
+ XML_LARGE_SIZE +
-

Compiling and Linking Against Expat

+
+ If defined, causes the XML_Size and XML_Index integer + types to be at least 64 bits in size. This is intended to support processing of + very large input streams, where the return values of XML_GetCurrentByteIndex, XML_GetCurrentLineNumber and + XML_GetCurrentColumnNumber could + overflow. It may not be supported by all compilers, and is turned off by + default. +
-

Unless you installed Expat in a location not expected by your -compiler and linker, all you have to do to use Expat in your programs -is to include the Expat header (#include <expat.h>) -in your files that make calls to it and to tell the linker that it -needs to link against the Expat library. On Unix systems, this would -usually be done with the -lexpat argument. Otherwise, -you'll need to tell the compiler where to look for the Expat header -and the linker where to find the Expat library. You may also need to -take steps to tell the operating system where to find this library at -run time.

+
+ XML_CONTEXT_BYTES +
-

On a Unix-based system, here's what a Makefile might look like when -Expat is installed in a standard location:

+
+ The number of input bytes of markup context which the parser will ensure are + available for reporting via XML_GetInputContext. This is normally set to + 1024, and must be set to a positive integer to enable. If this is set to zero, + the input context will not be available and XML_GetInputContext will always report + NULL. Without this, Expat has a smaller memory footprint and can + be faster. +
-
+        
+ XML_STATIC +
+ +
+ On Windows, this should be set if Expat is going to be linked statically with + the code that calls it; this is required to get all the right MSVC magic + annotations correct. This is ignored on other platforms. +
+ +
+ XML_ATTR_INFO +
+ +
+ If defined, makes the additional function XML_GetAttributeInfo available for reporting + attribute byte offsets. +
+
+ +
+ +

+ Using Expat +

+ +

+ Compiling and Linking Against Expat +

+ +

+ Unless you installed Expat in a location not expected by your compiler and + linker, all you have to do to use Expat in your programs is to include the Expat + header (#include <expat.h>) in your files that make calls to + it and to tell the linker that it needs to link against the Expat library. On + Unix systems, this would usually be done with the -lexpat argument. + Otherwise, you'll need to tell the compiler where to look for the Expat header + and the linker where to find the Expat library. You may also need to take steps + to tell the operating system where to find this library at run time. +

+ +

+ On a Unix-based system, here's what a Makefile might look like when Expat is + installed in a standard location: +

+ +
 CC=cc
 LDFLAGS=
 LIBS= -lexpat
 xmlapp: xmlapp.o
         $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
 
+

+ If you installed Expat in, say, /home/me/mystuff, then the Makefile + would look like this: +

-

If you installed Expat in, say, /home/me/mystuff, then -the Makefile would look like this:

- -
+      
 CC=cc
 CFLAGS= -I/home/me/mystuff/include
 LDFLAGS=
@@ -488,65 +844,71 @@ LIBS= -L/home/me/mystuff/lib -lexpat
 xmlapp: xmlapp.o
         $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS)
 
+

+ You'd also have to set the environment variable LD_LIBRARY_PATH to + /home/me/mystuff/lib (or to + ${LD_LIBRARY_PATH}:/home/me/mystuff/lib if LD_LIBRARY_PATH already + has some directories in it) in order to run your application. +

-

You'd also have to set the environment variable -LD_LIBRARY_PATH to /home/me/mystuff/lib (or -to ${LD_LIBRARY_PATH}:/home/me/mystuff/lib if -LD_LIBRARY_PATH already has some directories in it) in order to run -your application.

+

+ Expat Basics +

-

Expat Basics

+

+ As we saw in the example in the overview, the first step in parsing an XML + document with Expat is to create a parser object. There are three functions in the Expat API for creating a parser object. + However, only two of these (XML_ParserCreate and XML_ParserCreateNS) can be used for constructing + a parser for a top-level document. The object returned by these functions is an + opaque pointer (i.e. "expat.h" declares it as void *) to data with further + internal structure. In order to free the memory associated with this object you + must call XML_ParserFree. Note that if + you have provided any user data that gets stored in the + parser, then your application is responsible for freeing it prior to calling + XML_ParserFree. +

-

As we saw in the example in the overview, the first step in parsing -an XML document with Expat is to create a parser object. There are three functions in the Expat API for creating a -parser object. However, only two of these (XML_ParserCreate and XML_ParserCreateNS) can be used for -constructing a parser for a top-level document. The object returned -by these functions is an opaque pointer (i.e. "expat.h" declares it as -void *) to data with further internal structure. In order to free the -memory associated with this object you must call XML_ParserFree. Note that if you have -provided any user data that gets stored in the -parser, then your application is responsible for freeing it prior to -calling XML_ParserFree.

+

+ The objects returned by the parser creation functions are good for parsing only + one XML document or external parsed entity. If your application needs to parse + many XML documents, then it needs to create a parser object for each one. The + best way to deal with this is to create a higher level object that contains all + the default initialization you want for your parser objects. +

-

The objects returned by the parser creation functions are good for -parsing only one XML document or external parsed entity. If your -application needs to parse many XML documents, then it needs to create -a parser object for each one. The best way to deal with this is to -create a higher level object that contains all the default -initialization you want for your parser objects.

+

+ Walking through a document hierarchy with a stream oriented parser will require a + good stack mechanism in order to keep track of current context. For instance, to + answer the simple question, "What element does this text belong to?" requires a + stack, since the parser may have descended into other elements that are children + of the current one and has encountered this text on the way out. +

-

Walking through a document hierarchy with a stream oriented parser -will require a good stack mechanism in order to keep track of current -context. For instance, to answer the simple question, "What element -does this text belong to?" requires a stack, since the parser may have -descended into other elements that are children of the current one and -has encountered this text on the way out.

+

+ The things you're likely to want to keep on a stack are the currently opened + element and it's attributes. You push this information onto the stack in the + start handler and you pop it off in the end handler. +

-

The things you're likely to want to keep on a stack are the -currently opened element and it's attributes. You push this -information onto the stack in the start handler and you pop it off in -the end handler.

+

+ For some tasks, it is sufficient to just keep information on what the depth of + the stack is (or would be if you had one.) The outline program shown above + presents one example. Another such task would be skipping over a complete + element. When you see the start tag for the element you want to skip, you set a + skip flag and record the depth at which the element started. When the end tag + handler encounters the same depth, the skipped element has ended and the flag may + be cleared. If you follow the convention that the root element starts at 1, then + you can use the same variable for skip flag and skip depth. +

-

For some tasks, it is sufficient to just keep information on what -the depth of the stack is (or would be if you had one.) The outline -program shown above presents one example. Another such task would be -skipping over a complete element. When you see the start tag for the -element you want to skip, you set a skip flag and record the depth at -which the element started. When the end tag handler encounters the -same depth, the skipped element has ended and the flag may be -cleared. If you follow the convention that the root element starts at -1, then you can use the same variable for skip flag and skip -depth.

- -
+      
 void
 init_info(Parseinfo *info) {
-  info->skip = 0;
-  info->depth = 1;
+  info->skip = 0;
+  info->depth = 1;
   /* Other initializations here */
 }  /* End of init_info */
 
@@ -554,87 +916,91 @@ void XMLCALL
 rawstart(void *data, const char *el, const char **attr) {
   Parseinfo *inf = (Parseinfo *) data;
 
-  if (! inf->skip) {
+  if (! inf->skip) {
     if (should_skip(inf, el, attr)) {
-      inf->skip = inf->depth;
+      inf->skip = inf->depth;
     }
     else
       start(inf, el, attr);     /* This does rest of start handling */
   }
 
-  inf->depth++;
+  inf->depth++;
 }  /* End of rawstart */
 
 void XMLCALL
 rawend(void *data, const char *el) {
   Parseinfo *inf = (Parseinfo *) data;
 
-  inf->depth--;
+  inf->depth--;
 
-  if (! inf->skip)
+  if (! inf->skip)
     end(inf, el);              /* This does rest of end handling */
 
-  if (inf->skip == inf->depth)
-    inf->skip = 0;
+  if (inf->skip == inf->depth)
+    inf->skip = 0;
 }  /* End rawend */
 
+

+ Notice in the above example the difference in how depth is manipulated in the + start and end handlers. The end tag handler should be the mirror image of the + start tag handler. This is necessary to properly model containment. Since, in the + start tag handler, we incremented depth after the main body of start tag + code, then in the end handler, we need to manipulate it before the main + body. If we'd decided to increment it first thing in the start handler, then we'd + have had to decrement it last thing in the end handler. +

-

Notice in the above example the difference in how depth is -manipulated in the start and end handlers. The end tag handler should -be the mirror image of the start tag handler. This is necessary to -properly model containment. Since, in the start tag handler, we -incremented depth after the main body of start tag code, then -in the end handler, we need to manipulate it before the main -body. If we'd decided to increment it first thing in the start -handler, then we'd have had to decrement it last thing in the end -handler.

+

+ Communicating between handlers +

-

Communicating between handlers

+

+ In order to be able to pass information between different handlers without using + globals, you'll need to define a data structure to hold the shared variables. You + can then tell Expat (with the XML_SetUserData function) to pass a pointer to this + structure to the handlers. This is the first argument received by most handlers. + In the reference section, an argument to a callback + function is named userData and have type void * if the + user data is passed; it will have the type XML_Parser if the parser + itself is passed. When the parser is passed, the user data may be retrieved using + XML_GetUserData. +

-

In order to be able to pass information between different handlers -without using globals, you'll need to define a data structure to hold -the shared variables. You can then tell Expat (with the XML_SetUserData function) to pass a -pointer to this structure to the handlers. This is the first -argument received by most handlers. In the reference section, an argument to a callback function is named -userData and have type void * if the user -data is passed; it will have the type XML_Parser if the -parser itself is passed. When the parser is passed, the user data may -be retrieved using XML_GetUserData.

+

+ One common case where multiple calls to a single handler may need to communicate + using an application data structure is the case when content passed to the + character data handler (set by XML_SetCharacterDataHandler) needs to + be accumulated. A common first-time mistake with any of the event-oriented + interfaces to an XML parser is to expect all the text contained in an element to + be reported by a single call to the character data handler. Expat, like many + other XML parsers, reports such data as a sequence of calls; there's no way to + know when the end of the sequence is reached until a different callback is made. + A buffer referenced by the user data structure proves both an effective and + convenient place to accumulate character data. +

+ -

One common case where multiple calls to a single handler may need -to communicate using an application data structure is the case when -content passed to the character data handler (set by XML_SetCharacterDataHandler) needs to be accumulated. A -common first-time mistake with any of the event-oriented interfaces to -an XML parser is to expect all the text contained in an element to be -reported by a single call to the character data handler. Expat, like -many other XML parsers, reports such data as a sequence of calls; -there's no way to know when the end of the sequence is reached until a -different callback is made. A buffer referenced by the user data -structure proves both an effective and convenient place to accumulate -character data.

+

+ XML Version +

- +

+ Expat is an XML 1.0 parser, and as such never complains based on the value of the + version pseudo-attribute in the XML declaration, if present. +

+

+ If an application needs to check the version number (to support alternate + processing), it should use the XML_SetXmlDeclHandler function to set a + handler that uses the information in the XML declaration to determine what to do. + This example shows how to check that only a version number of "1.0" + is accepted: +

-

XML Version

- -

Expat is an XML 1.0 parser, and as such never complains based on -the value of the version pseudo-attribute in the XML -declaration, if present.

- -

If an application needs to check the version number (to support -alternate processing), it should use the XML_SetXmlDeclHandler function to -set a handler that uses the information in the XML declaration to -determine what to do. This example shows how to check that only a -version number of "1.0" is accepted:

- -
+      
 static int wrong_version;
 static XML_Parser parser;
 
@@ -660,201 +1026,272 @@ xmldecl_handler(void            *userData,
   ...
 }
 
+

+ Namespace Processing +

-

Namespace Processing

+

+ When the parser is created using the XML_ParserCreateNS, function, Expat performs + namespace processing. Under namespace processing, Expat consumes + xmlns and xmlns:... attributes, which declare + namespaces for the scope of the element in which they occur. This means that your + start handler will not see these attributes. Your application can still be + informed of these declarations by setting namespace declaration handlers with + XML_SetNamespaceDeclHandler. +

-

When the parser is created using the XML_ParserCreateNS, function, Expat -performs namespace processing. Under namespace processing, Expat -consumes xmlns and xmlns:... attributes, -which declare namespaces for the scope of the element in which they -occur. This means that your start handler will not see these -attributes. Your application can still be informed of these -declarations by setting namespace declaration handlers with XML_SetNamespaceDeclHandler.

+

+ Element type and attribute names that belong to a given namespace are passed to + the appropriate handler in expanded form. By default this expanded form is a + concatenation of the namespace URI, the separator character (which is the 2nd + argument to XML_ParserCreateNS), + and the local name (i.e. the part after the colon). Names with undeclared + prefixes are not well-formed when namespace processing is enabled, and will + trigger an error. Unprefixed attribute names are never expanded, and unprefixed + element names are only expanded when they are in the scope of a default + namespace. +

-

Element type and attribute names that belong to a given namespace -are passed to the appropriate handler in expanded form. By default -this expanded form is a concatenation of the namespace URI, the -separator character (which is the 2nd argument to XML_ParserCreateNS), and the local -name (i.e. the part after the colon). Names with undeclared prefixes -are not well-formed when namespace processing is enabled, and will -trigger an error. Unprefixed attribute names are never expanded, -and unprefixed element names are only expanded when they are in the -scope of a default namespace.

+

+ However if XML_SetReturnNSTriplet has been called with + a non-zero do_nst parameter, then the expanded form for names with + an explicit prefix is a concatenation of: URI, separator, local name, separator, + prefix. +

-

However if XML_SetReturnNSTriplet has been called with a non-zero -do_nst parameter, then the expanded form for names with -an explicit prefix is a concatenation of: URI, separator, local name, -separator, prefix.

+

+ You can set handlers for the start of a namespace declaration and for the end of + a scope of a declaration with the XML_SetNamespaceDeclHandler function. + The StartNamespaceDeclHandler is called prior to the start tag handler and the + EndNamespaceDeclHandler is called after the corresponding end tag that ends the + namespace's scope. The namespace start handler gets passed the prefix and URI for + the namespace. For a default namespace declaration (xmlns='...'), the prefix will + be NULL. The URI will be NULL for the case where the + default namespace is being unset. The namespace end handler just gets the prefix + for the closing scope. +

-

You can set handlers for the start of a namespace declaration and -for the end of a scope of a declaration with the XML_SetNamespaceDeclHandler -function. The StartNamespaceDeclHandler is called prior to the start -tag handler and the EndNamespaceDeclHandler is called after the -corresponding end tag that ends the namespace's scope. The namespace -start handler gets passed the prefix and URI for the namespace. For a -default namespace declaration (xmlns='...'), the prefix will be -NULL. -The URI will be NULL for the case where the default namespace is being -unset. The namespace end handler just gets the prefix for the closing -scope.

+

+ These handlers are called for each declaration. So if, for instance, a start tag + had three namespace declarations, then the StartNamespaceDeclHandler would be + called three times before the start tag handler is called, once for each + declaration. +

-

These handlers are called for each declaration. So if, for -instance, a start tag had three namespace declarations, then the -StartNamespaceDeclHandler would be called three times before the start -tag handler is called, once for each declaration.

+

+ Character Encodings +

-

Character Encodings

+

+ While XML is based on Unicode, and every XML processor is required to recognized + UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), other encodings may be + declared in XML documents or entities. For the main document, an XML declaration + may contain an encoding declaration: +

-

While XML is based on Unicode, and every XML processor is required -to recognized UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), -other encodings may be declared in XML documents or entities. For the -main document, an XML declaration may contain an encoding -declaration:

-
+      
 <?xml version="1.0" encoding="ISO-8859-2"?>
 
+

+ External parsed entities may begin with a text declaration, which looks like an + XML declaration with just an encoding declaration: +

-

External parsed entities may begin with a text declaration, which -looks like an XML declaration with just an encoding declaration:

-
+      
 <?xml encoding="Big5"?>
 
+

+ With Expat, you may also specify an encoding at the time of creating a parser. + This is useful when the encoding information may come from a source outside the + document itself (like a higher level protocol.) +

-

With Expat, you may also specify an encoding at the time of -creating a parser. This is useful when the encoding information may -come from a source outside the document itself (like a higher level -protocol.)

+

+ There are four built-in + encodings in Expat: +

-

There are four built-in encodings -in Expat:

-
    -
  • UTF-8
  • -
  • UTF-16
  • -
  • ISO-8859-1
  • -
  • US-ASCII
  • -
+
    +
  • UTF-8 +
  • -

    Anything else discovered in an encoding declaration or in the -protocol encoding specified in the parser constructor, triggers a call -to the UnknownEncodingHandler. This handler gets passed -the encoding name and a pointer to an XML_Encoding data -structure. Your handler must fill in this structure and return -XML_STATUS_OK if it knows how to deal with the -encoding. Otherwise the handler should return -XML_STATUS_ERROR. The handler also gets passed a pointer -to an optional application data structure that you may indicate when -you set the handler.

    +
  • UTF-16 +
  • -

    Expat places restrictions on character encodings that it can -support by filling in the XML_Encoding structure. -include file:

    -
      -
    1. Every ASCII character that can appear in a well-formed XML document -must be represented by a single byte, and that byte must correspond to -it's ASCII encoding (except for the characters $@\^'{}~)
    2. -
    3. Characters must be encoded in 4 bytes or less.
    4. -
    5. All characters encoded must have Unicode scalar values less than or -equal to 65535 (0xFFFF)This does not apply to the built-in support -for UTF-16 and UTF-8
    6. -
    7. No character may be encoded by more that one distinct sequence of -bytes
    8. -
    +
  • ISO-8859-1 +
  • -

    XML_Encoding contains an array of integers that -correspond to the 1st byte of an encoding sequence. If the value in -the array for a byte is zero or positive, then the byte is a single -byte encoding that encodes the Unicode scalar value contained in the -array. A -1 in this array indicates a malformed byte. If the value is --2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte -sequence respectively. Multi-byte sequences are sent to the convert -function pointed at in the XML_Encoding structure. This -function should return the Unicode scalar value for the sequence or -1 -if the sequence is malformed.

    +
  • US-ASCII +
  • +
-

One pitfall that novice Expat users are likely to fall into is that -although Expat may accept input in various encodings, the strings that -it passes to the handlers are always encoded in UTF-8 or UTF-16 -(depending on how Expat was compiled). Your application is responsible -for any translation of these strings into other encodings.

+

+ Anything else discovered in an encoding declaration or in the protocol encoding + specified in the parser constructor, triggers a call to the + UnknownEncodingHandler. This handler gets passed the encoding name + and a pointer to an XML_Encoding data structure. Your handler must + fill in this structure and return XML_STATUS_OK if it knows how to + deal with the encoding. Otherwise the handler should return + XML_STATUS_ERROR. The handler also gets passed a pointer to an + optional application data structure that you may indicate when you set the + handler. +

-

Handling External Entity References

+

+ Expat places restrictions on character encodings that it can support by filling + in the XML_Encoding structure. include file: +

-

Expat does not read or parse external entities directly. Note that -any external DTD is a special case of an external entity. If you've -set no ExternalEntityRefHandler, then external entity -references are silently ignored. Otherwise, it calls your handler with -the information needed to read and parse the external entity.

+
    +
  1. Every ASCII character that can appear in a well-formed XML document must be + represented by a single byte, and that byte must correspond to it's ASCII + encoding (except for the characters $@\^'{}~) +
  2. -

    Your handler isn't actually responsible for parsing the entity, but -it is responsible for creating a subsidiary parser with XML_ExternalEntityParserCreate that will do the job. This -returns an instance of XML_Parser that has handlers and -other data structures initialized from the parent parser. You may then -use XML_Parse or XML_ParseBuffer calls against this -parser. Since external entities my refer to other external entities, -your handler should be prepared to be called recursively.

    +
  3. Characters must be encoded in 4 bytes or less. +
  4. -

    Parsing DTDs

    +
  5. All characters encoded must have Unicode scalar values less than or equal to + 65535 (0xFFFF)This does not apply to the built-in support for UTF-16 and + UTF-8 +
  6. -

    In order to parse parameter entities, before starting the parse, -you must call XML_SetParamEntityParsing with one of the following -arguments:

    -
    -
    XML_PARAM_ENTITY_PARSING_NEVER
    -
    Don't parse parameter entities or the external subset
    -
    XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
    -
    Parse parameter entities and the external subset unless -standalone was set to "yes" in the XML declaration.
    -
    XML_PARAM_ENTITY_PARSING_ALWAYS
    -
    Always parse parameter entities and the external subset
    -
    +
  7. No character may be encoded by more that one distinct sequence of bytes +
  8. +
-

In order to read an external DTD, you also have to set an external -entity reference handler as described above.

+

+ XML_Encoding contains an array of integers that correspond to the + 1st byte of an encoding sequence. If the value in the array for a byte is zero or + positive, then the byte is a single byte encoding that encodes the Unicode scalar + value contained in the array. A -1 in this array indicates a malformed byte. If + the value is -2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte + sequence respectively. Multi-byte sequences are sent to the convert function + pointed at in the XML_Encoding structure. This function should + return the Unicode scalar value for the sequence or -1 if the sequence is + malformed. +

-

Temporarily Stopping Parsing

+

+ One pitfall that novice Expat users are likely to fall into is that although + Expat may accept input in various encodings, the strings that it passes to the + handlers are always encoded in UTF-8 or UTF-16 (depending on how Expat was + compiled). Your application is responsible for any translation of these strings + into other encodings. +

-

Expat 1.95.8 introduces a new feature: its now possible to stop -parsing temporarily from within a handler function, even if more data -has already been passed into the parser. Applications for this -include

+

+ Handling External Entity References +

-
    -
  • Supporting the XInclude specification.
  • +

    + Expat does not read or parse external entities directly. Note that any external + DTD is a special case of an external entity. If you've set no + ExternalEntityRefHandler, then external entity references are + silently ignored. Otherwise, it calls your handler with the information needed to + read and parse the external entity. +

    -
  • Delaying further processing until additional information is - available from some other source.
  • +

    + Your handler isn't actually responsible for parsing the entity, but it is + responsible for creating a subsidiary parser with XML_ExternalEntityParserCreate that + will do the job. This returns an instance of XML_Parser that has + handlers and other data structures initialized from the parent parser. You may + then use XML_Parse or XML_ParseBuffer calls against this parser. Since + external entities my refer to other external entities, your handler should be + prepared to be called recursively. +

    -
  • Adjusting processor load as task priorities shift within an - application.
  • +

    + Parsing DTDs +

    -
  • Stopping parsing completely (simply free or reset the parser - instead of resuming in the outer parsing loop). This can be useful - if an application-domain error is found in the XML being parsed or if - the result of the parse is determined not to be useful after - all.
  • -
+

+ In order to parse parameter entities, before starting the parse, you must call + XML_SetParamEntityParsing + with one of the following arguments: +

-

To take advantage of this feature, the main parsing loop of an -application needs to support this specifically. It cannot be -supported with a parsing loop compatible with Expat 1.95.7 or -earlier (though existing loops will continue to work without -supporting the stop/resume feature).

+
+
+ XML_PARAM_ENTITY_PARSING_NEVER +
-

An application that uses this feature for a single parser will have -the rough structure (in pseudo-code):

+
+ Don't parse parameter entities or the external subset +
-
+        
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE +
+ +
+ Parse parameter entities and the external subset unless standalone + was set to "yes" in the XML declaration. +
+ +
+ XML_PARAM_ENTITY_PARSING_ALWAYS +
+ +
+ Always parse parameter entities and the external subset +
+
+ +

+ In order to read an external DTD, you also have to set an external entity + reference handler as described above. +

+ +

+ Temporarily Stopping Parsing +

+ +

+ Expat 1.95.8 introduces a new feature: its now possible to stop parsing + temporarily from within a handler function, even if more data has already been + passed into the parser. Applications for this include +

+ +
    +
  • Supporting the XInclude + specification. +
  • + +
  • Delaying further processing until additional information is available from + some other source. +
  • + +
  • Adjusting processor load as task priorities shift within an application. +
  • + +
  • Stopping parsing completely (simply free or reset the parser instead of + resuming in the outer parsing loop). This can be useful if an application-domain + error is found in the XML being parsed or if the result of the parse is + determined not to be useful after all. +
  • +
+ +

+ To take advantage of this feature, the main parsing loop of an application needs + to support this specifically. It cannot be supported with a parsing loop + compatible with Expat 1.95.7 or earlier (though existing loops will continue to + work without supporting the stop/resume feature). +

+ +

+ An application that uses this feature for a single parser will have the rough + structure (in pseudo-code): +

+ +
 fd = open_input()
 p = create_parser()
 
@@ -871,15 +1308,18 @@ if parse_xml(p, fd) {
   }
 }
 
+

+ An application that may resume any of several parsers based on input (either from + the XML being parsed or some other source) will certainly have more interesting + control structures. +

-

An application that may resume any of several parsers based on -input (either from the XML being parsed or some other source) will -certainly have more interesting control structures.

+

+ This C function could be used for the parse_xml function mentioned + in the pseudo-code above: +

-

This C function could be used for the parse_xml -function mentioned in the pseudo-code above:

- -
+      
 #define BUFF_SIZE 10240
 
 /* Parse a document from the open file descriptor 'fd' until the parse
@@ -918,14 +1358,14 @@ parse_xml(XML_Parser p, int fd)
   }
 }
 
+

+ The corresponding continue_parsing function is somewhat simpler, + since it only need deal with the return code from XML_ResumeParser; it can delegate the input + handling to the parse_xml function: +

-

The corresponding continue_parsing function is -somewhat simpler, since it only need deal with the return code from -XML_ResumeParser; it can -delegate the input handling to the parse_xml -function:

- -
+      
 /* Continue parsing a document which had been suspended.  The 'p' and
    'fd' arguments are the same as passed to parse_xml().  Return
    non-zero when the parse is suspended.
@@ -947,274 +1387,343 @@ continue_parsing(XML_Parser p, int fd)
   return parse_xml(p, fd);
 }
 
+

+ Now that we've seen what a mess the top-level parsing loop can become, what have + we gained? Very simply, we can now use the XML_StopParser function to stop parsing, without + having to go to great lengths to avoid additional processing that we're expecting + to ignore. As a bonus, we get to stop parsing temporarily, and come back + to it when we're ready. +

-

Now that we've seen what a mess the top-level parsing loop can -become, what have we gained? Very simply, we can now use the XML_StopParser function to stop -parsing, without having to go to great lengths to avoid additional -processing that we're expecting to ignore. As a bonus, we get to stop -parsing temporarily, and come back to it when we're -ready.

+

+ To stop parsing from a handler function, use the XML_StopParser function. This function takes two + arguments; the parser being stopped and a flag indicating whether the parse can + be resumed in the future. +

+ -

To stop parsing from a handler function, use the XML_StopParser function. This function -takes two arguments; the parser being stopped and a flag indicating -whether the parse can be resumed in the future.

+
+ - +

+ Expat Reference +

+

+ Parser Creation +

-
- +

+ XML_ParserCreate +

-

Expat Reference

- -

Parser Creation

- -

XML_ParserCreate

-
+      
 XML_Parser XMLCALL
 XML_ParserCreate(const XML_Char *encoding);
 
-
-

-Construct a new parser. If encoding is non-NULL, it specifies a -character encoding to use for the document. This overrides the document -encoding declaration. There are four built-in encodings: -

-
    -
  • US-ASCII
  • -
  • UTF-8
  • -
  • UTF-16
  • -
  • ISO-8859-1
  • -
-

-Any other value will invoke a call to the UnknownEncodingHandler. -

-
+
+

+ Construct a new parser. If encoding is non-NULL, it specifies a + character encoding to use for the document. This overrides the document + encoding declaration. There are four built-in encodings: +

-

XML_ParserCreateNS

-
+        
    +
  • US-ASCII +
  • + +
  • UTF-8 +
  • + +
  • UTF-16 +
  • + +
  • ISO-8859-1 +
  • +
+ +

+ Any other value will invoke a call to the UnknownEncodingHandler. +

+
+ +

+ XML_ParserCreateNS +

+ +
 XML_Parser XMLCALL
 XML_ParserCreateNS(const XML_Char *encoding,
                    XML_Char sep);
 
-
-Constructs a new parser that has namespace processing in effect. Namespace -expanded element names and attribute names are returned as a concatenation -of the namespace URI, sep, and the local part of the name. This -means that you should pick a character for sep that can't be part -of an URI. Since Expat does not check namespace URIs for conformance, the -only safe choice for a namespace separator is a character that is illegal -in XML. For instance, '\xFF' is not legal in UTF-8, and -'\xFFFF' is not legal in UTF-16. There is a special case when -sep is the null character '\0': the namespace URI and -the local part will be concatenated without any separator - this is intended -to support RDF processors. It is a programming error to use the null separator -with namespace triplets.
+
+ Constructs a new parser that has namespace processing in effect. Namespace + expanded element names and attribute names are returned as a concatenation of the + namespace URI, sep, and the local part of the name. This means that you + should pick a character for sep that can't be part of an URI. Since + Expat does not check namespace URIs for conformance, the only safe choice for a + namespace separator is a character that is illegal in XML. For instance, + '\xFF' is not legal in UTF-8, and '\xFFFF' is not legal + in UTF-16. There is a special case when sep is the null character + '\0': the namespace URI and the local part will be concatenated + without any separator - this is intended to support RDF processors. It is a + programming error to use the null separator with namespace triplets. +
-

Note: -Expat does not validate namespace URIs (beyond encoding) -against RFC 3986 today (and is not required to do so with regard to -the XML 1.0 namespaces specification) but it may start doing that -in future releases. Before that, an application using Expat must -be ready to receive namespace URIs containing non-URI characters. -

+

+ Note: Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to the XML 1.0 + namespaces specification) but it may start doing that in future releases. Before + that, an application using Expat must be ready to receive namespace URIs + containing non-URI characters. +

-

XML_ParserCreate_MM

-
+      

+ XML_ParserCreate_MM +

+ +
 XML_Parser XMLCALL
 XML_ParserCreate_MM(const XML_Char *encoding,
                     const XML_Memory_Handling_Suite *ms,
-		    const XML_Char *sep);
+                    const XML_Char *sep);
 
-
+
+      
 typedef struct {
   void *(XMLCALL *malloc_fcn)(size_t size);
   void *(XMLCALL *realloc_fcn)(void *ptr, size_t size);
   void (XMLCALL *free_fcn)(void *ptr);
 } XML_Memory_Handling_Suite;
 
-
-

Construct a new parser using the suite of memory handling functions -specified in ms. If ms is NULL, then use the -standard set of memory management functions. If sep is -non-NULL, then namespace processing is enabled in the created parser -and the character pointed at by sep is used as the separator between -the namespace URI and the local part of the name.

-
+
+

+ Construct a new parser using the suite of memory handling functions specified + in ms. If ms is NULL, then use the + standard set of memory management functions. If sep is + non-NULL, then namespace processing is enabled in the created + parser and the character pointed at by sep is used as the separator between the + namespace URI and the local part of the name. +

+
-

XML_ExternalEntityParserCreate

-
+      

+ XML_ExternalEntityParserCreate +

+ +
 XML_Parser XMLCALL
 XML_ExternalEntityParserCreate(XML_Parser p,
                                const XML_Char *context,
                                const XML_Char *encoding);
 
-
-Construct a new XML_Parser object for parsing an external -general entity. Context is the context argument passed in a call to a -ExternalEntityRefHandler. Other state information such as handlers, -user data, namespace processing is inherited from the parser passed as -the 1st argument. So you shouldn't need to call any of the behavior -changing functions on this parser (unless you want it to act -differently than the parent parser). -
+
+

+ Construct a new XML_Parser object for parsing an external general + entity. Context is the context argument passed in a call to a + ExternalEntityRefHandler. Other state information such as handlers, user data, + namespace processing is inherited from the parser passed as the 1st argument. + So you shouldn't need to call any of the behavior changing functions on this + parser (unless you want it to act differently than the parent parser). +

-

XML_ParserFree

-
+        

+ Note: Please be sure to free subparsers created by + XML_ExternalEntityParserCreate + prior to freeing their related parent parser, as subparsers reference + and use parts of their respective parent parser, internally. Parent parsers + must outlive subparsers. +

+
+ +

+ XML_ParserFree +

+ +
 void XMLCALL
 XML_ParserFree(XML_Parser p);
 
-
-Free memory used by the parser. Your application is responsible for -freeing any memory associated with user data. -
+
+

+ Free memory used by the parser. +

-

XML_ParserReset

-
+        

+ Note: Your application is responsible for freeing any memory + associated with user data. +

+ +

+ Note: Please be sure to free subparsers created by + XML_ExternalEntityParserCreate + prior to freeing their related parent parser, as subparsers reference + and use parts of their respective parent parser, internally. Parent parsers + must outlive subparsers. +

+
+ +

+ XML_ParserReset +

+ +
 XML_Bool XMLCALL
 XML_ParserReset(XML_Parser p,
                 const XML_Char *encoding);
 
-
-Clean up the memory structures maintained by the parser so that it may -be used again. After this has been called, parser is -ready to start parsing a new document. All handlers are cleared from -the parser, except for the unknownEncodingHandler. The parser's external -state is re-initialized except for the values of ns and ns_triplets. -This function may not be used on a parser created using XML_ExternalEntityParserCreate; it will return XML_FALSE in that case. Returns -XML_TRUE on success. Your application is responsible for -dealing with any memory associated with user data. -
+
+ Clean up the memory structures maintained by the parser so that it may be used + again. After this has been called, parser is ready to start parsing + a new document. All handlers are cleared from the parser, except for the + unknownEncodingHandler. The parser's external state is re-initialized except for + the values of ns and ns_triplets. This function may not be used on a parser + created using XML_ExternalEntityParserCreate; it + will return XML_FALSE in that case. Returns XML_TRUE on + success. Your application is responsible for dealing with any memory associated + with user data. +
-

Parsing

+

+ Parsing +

-

To state the obvious: the three parsing functions XML_Parse, -XML_ParseBuffer and -XML_GetBuffer must not be called from within a handler -unless they operate on a separate parser instance, that is, one that -did not call the handler. For example, it is OK to call the parsing -functions from within an XML_ExternalEntityRefHandler, -if they apply to the parser created by -XML_ExternalEntityParserCreate.

+

+ To state the obvious: the three parsing functions XML_Parse, XML_ParseBuffer and XML_GetBuffer must not be called from within a + handler unless they operate on a separate parser instance, that is, one that did + not call the handler. For example, it is OK to call the parsing functions from + within an XML_ExternalEntityRefHandler, if they apply to the parser + created by XML_ExternalEntityParserCreate. +

-

Note: The len argument passed to these functions -should be considerably less than the maximum value for an integer, -as it could create an integer overflow situation if the added -lengths of a buffer and the unprocessed portion of the previous buffer -exceed the maximum integer value. Input data at the end of a buffer -will remain unprocessed if it is part of an XML token for which the -end is not part of that buffer.

+

+ Note: The len argument passed to these functions should be + considerably less than the maximum value for an integer, as it could create an + integer overflow situation if the added lengths of a buffer and the unprocessed + portion of the previous buffer exceed the maximum integer value. Input data at + the end of a buffer will remain unprocessed if it is part of an XML token for + which the end is not part of that buffer. +

-

The application must make a concluding -XML_Parse or -XML_ParseBuffer call -with isFinal set to XML_TRUE.

+

+ The application must make a + concluding XML_Parse or XML_ParseBuffer call with isFinal set + to XML_TRUE. +

-

XML_Parse

-
+      

+ XML_Parse +

+ +
 enum XML_Status XMLCALL
 XML_Parse(XML_Parser p,
           const char *s,
           int len,
           int isFinal);
 
-
+
+      
 enum XML_Status {
   XML_STATUS_ERROR = 0,
   XML_STATUS_OK = 1
 };
 
-
-

-Parse some more of the document. The string s is a buffer -containing part (or perhaps all) of the document. The number of bytes of s -that are part of the document is indicated by len. This means -that s doesn't have to be null-terminated. It also means that -if len is larger than the number of bytes in the block of -memory that s points at, then a memory fault is likely. -Negative values for len are rejected since Expat 2.2.1. -The -isFinal parameter informs the parser that this is the last -piece of the document. Frequently, the last piece is empty (i.e. -len is zero.) -

+
+

+ Parse some more of the document. The string s is a buffer + containing part (or perhaps all) of the document. The number of bytes of s that + are part of the document is indicated by len. This means that + s doesn't have to be null-terminated. It also means that if + len is larger than the number of bytes in the block of memory that + s points at, then a memory fault is likely. Negative values for + len are rejected since Expat 2.2.1. The isFinal + parameter informs the parser that this is the last piece of the document. + Frequently, the last piece is empty (i.e. len is zero.) +

-

-If a parse error occurred, it returns XML_STATUS_ERROR. -Otherwise it returns XML_STATUS_OK value. -Note that regardless of the return value, there is no guarantee that all -provided input has been parsed; only after the -concluding call will all handler callbacks and parsing errors have -happened. -

+

+ If a parse error occurred, it returns XML_STATUS_ERROR. Otherwise + it returns XML_STATUS_OK value. Note that regardless of the return + value, there is no guarantee that all provided input has been parsed; only + after the concluding call will all handler callbacks and + parsing errors have happened. +

-

-Simplified, XML_Parse can be considered a convenience wrapper -that is pairing calls -to XML_GetBuffer -and XML_ParseBuffer -(when Expat is built with macro XML_CONTEXT_BYTES -defined to a positive value, which is both common and default). -XML_Parse is then functionally equivalent to calling -XML_GetBuffer, -memcpy, and -XML_ParseBuffer. -

+

+ Simplified, XML_Parse can be considered a convenience wrapper that + is pairing calls to XML_GetBuffer and + XML_ParseBuffer (when Expat is + built with macro XML_CONTEXT_BYTES defined to a positive value, + which is both common and default). XML_Parse is then functionally + equivalent to calling XML_GetBuffer, + memcpy, and XML_ParseBuffer. +

-

-To avoid double copying of the input, direct use of functions -XML_GetBuffer and -XML_ParseBuffer is advised -for most production use, e.g. -if you're using read or similar functionality to fill your -buffers, fill directly into the buffer from -XML_GetBuffer, -then parse with XML_ParseBuffer. -

-
+

+ To avoid double copying of the input, direct use of functions XML_GetBuffer and XML_ParseBuffer is advised for most production + use, e.g. if you're using read or similar functionality to fill + your buffers, fill directly into the buffer from XML_GetBuffer, then parse with XML_ParseBuffer. +

+
-

XML_ParseBuffer

-
+      

+ XML_ParseBuffer +

+ +
 enum XML_Status XMLCALL
 XML_ParseBuffer(XML_Parser p,
                 int len,
                 int isFinal);
 
-
-

-This is just like XML_Parse, -except in this case Expat provides the buffer. By obtaining the -buffer from Expat with the XML_GetBuffer function, the application can avoid double -copying of the input. -

+
+

+ This is just like XML_Parse, except in + this case Expat provides the buffer. By obtaining the buffer from Expat with + the XML_GetBuffer function, the + application can avoid double copying of the input. +

-

-Negative values for len are rejected since Expat 2.6.3. -

-
+

+ Negative values for len are rejected since Expat 2.6.3. +

+
-

XML_GetBuffer

-
+      

+ XML_GetBuffer +

+ +
 void * XMLCALL
 XML_GetBuffer(XML_Parser p,
               int len);
 
-
-Obtain a buffer of size len to read a piece of the document -into. A NULL value is returned if Expat can't allocate enough memory for -this buffer. A NULL value may also be returned if len is zero. -This has to be called prior to every call to -XML_ParseBuffer. A -typical use would look like this: +
+ Obtain a buffer of size len to read a piece of the document into. A + NULL value is returned if Expat can't allocate enough memory for + this buffer. A NULL value may also be returned if len + is zero. This has to be called prior to every call to XML_ParseBuffer. A typical use would look like + this: -
+        
 for (;;) {
   int bytes_read;
   void *buff = XML_GetBuffer(p, BUFF_SIZE);
@@ -1235,115 +1744,168 @@ for (;;) {
     break;
 }
 
-
+
-

XML_StopParser

-
+      

+ XML_StopParser +

+ +
 enum XML_Status XMLCALL
 XML_StopParser(XML_Parser p,
                XML_Bool resumable);
 
-
+
+

+ Stops parsing, causing XML_Parse or + XML_ParseBuffer to return. Must be + called from within a call-back handler, except when aborting (when + resumable is XML_FALSE) an already suspended parser. + Some call-backs may still follow because they would otherwise get lost, + including +

-

Stops parsing, causing XML_Parse or XML_ParseBuffer to return. Must be called from within a -call-back handler, except when aborting (when resumable -is XML_FALSE) an already suspended parser. Some -call-backs may still follow because they would otherwise get -lost, including

-
    -
  • the end element handler for empty elements when stopped in the - start element handler,
  • -
  • the end namespace declaration handler when stopped in the end - element handler,
  • -
  • the character data handler when stopped in the character data handler - while making multiple call-backs on a contiguous chunk of characters,
  • -
-

and possibly others.

+
    +
  • the end element handler for empty elements when stopped in the start + element handler, +
  • -

    This can be called from most handlers, including DTD related -call-backs, except when parsing an external parameter entity and -resumable is XML_TRUE. Returns -XML_STATUS_OK when successful, -XML_STATUS_ERROR otherwise. The possible error codes -are:

    -
    -
    XML_ERROR_NOT_STARTED
    -
    - when stopping or suspending a parser before it has started, - added in Expat 2.6.4. -
    -
    XML_ERROR_SUSPENDED
    -
    when suspending an already suspended parser.
    -
    XML_ERROR_FINISHED
    -
    when the parser has already finished.
    -
    XML_ERROR_SUSPEND_PE
    -
    when suspending while parsing an external PE.
    -
    +
  • the end namespace declaration handler when stopped in the end element + handler, +
  • -

    Since the stop/resume feature requires application support in the -outer parsing loop, it is an error to call this function for a parser -not being handled appropriately; see Temporarily Stopping Parsing for more information.

    +
  • the character data handler when stopped in the character data handler while + making multiple call-backs on a contiguous chunk of characters, +
  • +
-

When resumable is XML_TRUE then parsing -is suspended, that is, XML_Parse and XML_ParseBuffer return XML_STATUS_SUSPENDED. -Otherwise, parsing is aborted, that is, XML_Parse and XML_ParseBuffer return -XML_STATUS_ERROR with error code -XML_ERROR_ABORTED.

+

+ and possibly others. +

-

Note: -This will be applied to the current parser instance only, that is, if -there is a parent parser then it will continue parsing when the -external entity reference handler returns. It is up to the -implementation of that handler to call XML_StopParser on the parent parser -(recursively), if one wants to stop parsing altogether.

+

+ This can be called from most handlers, including DTD related call-backs, except + when parsing an external parameter entity and resumable is + XML_TRUE. Returns XML_STATUS_OK when successful, + XML_STATUS_ERROR otherwise. The possible error codes are: +

-

When suspended, parsing can be resumed by calling XML_ResumeParser.

+
+
+ XML_ERROR_NOT_STARTED +
-

New in Expat 1.95.8.

-
+
+ when stopping or suspending a parser before it has started, added in Expat + 2.6.4. +
-

XML_ResumeParser

-
+          
+ XML_ERROR_SUSPENDED +
+ +
+ when suspending an already suspended parser. +
+ +
+ XML_ERROR_FINISHED +
+ +
+ when the parser has already finished. +
+ +
+ XML_ERROR_SUSPEND_PE +
+ +
+ when suspending while parsing an external PE. +
+ + +

+ Since the stop/resume feature requires application support in the outer parsing + loop, it is an error to call this function for a parser not being handled + appropriately; see Temporarily Stopping Parsing for + more information. +

+ +

+ When resumable is XML_TRUE then parsing is + suspended, that is, XML_Parse + and XML_ParseBuffer return + XML_STATUS_SUSPENDED. Otherwise, parsing is aborted, that + is, XML_Parse and XML_ParseBuffer return + XML_STATUS_ERROR with error code XML_ERROR_ABORTED. +

+ +

+ Note: This will be applied to the current parser instance + only, that is, if there is a parent parser then it will continue parsing when + the external entity reference handler returns. It is up to the implementation + of that handler to call XML_StopParser on the parent parser (recursively), + if one wants to stop parsing altogether. +

+ +

+ When suspended, parsing can be resumed by calling XML_ResumeParser. +

+ +

+ New in Expat 1.95.8. +

+
+ +

+ XML_ResumeParser +

+ +
 enum XML_Status XMLCALL
 XML_ResumeParser(XML_Parser p);
 
-
-

Resumes parsing after it has been suspended with XML_StopParser. Must not be called from -within a handler call-back. Returns same status codes as XML_Parse or XML_ParseBuffer. An additional error -code, XML_ERROR_NOT_SUSPENDED, will be returned if the -parser was not currently suspended.

+
+

+ Resumes parsing after it has been suspended with XML_StopParser. Must not be called from within a + handler call-back. Returns same status codes as XML_Parse or XML_ParseBuffer. An additional error code, + XML_ERROR_NOT_SUSPENDED, will be returned if the parser was not + currently suspended. +

-

Note: -This must be called on the most deeply nested child parser instance -first, and on its parent parser only after the child parser has -finished, to be applied recursively until the document entity's parser -is restarted. That is, the parent parser will not resume by itself -and it is up to the application to call XML_ResumeParser on it at the -appropriate moment.

+

+ Note: This must be called on the most deeply nested child + parser instance first, and on its parent parser only after the child parser has + finished, to be applied recursively until the document entity's parser is + restarted. That is, the parent parser will not resume by itself and it is up to + the application to call XML_ResumeParser on it at the appropriate + moment. +

-

New in Expat 1.95.8.

-
+

+ New in Expat 1.95.8. +

+
-

XML_GetParsingStatus

-
+      

+ XML_GetParsingStatus +

+ +
 void XMLCALL
 XML_GetParsingStatus(XML_Parser p,
                      XML_ParsingStatus *status);
 
-
+
+      
 enum XML_Parsing {
   XML_INITIALIZED,
   XML_PARSING,
@@ -1356,244 +1918,322 @@ typedef struct {
   XML_Bool finalBuffer;
 } XML_ParsingStatus;
 
-
-

Returns status of parser with respect to being initialized, -parsing, finished, or suspended, and whether the final buffer is being -processed. The status parameter must not be -NULL.

+
+

+ Returns status of parser with respect to being initialized, parsing, finished, + or suspended, and whether the final buffer is being processed. The + status parameter must not be NULL. +

-

New in Expat 1.95.8.

-
+

+ New in Expat 1.95.8. +

+
+

+ Handler Setting +

-

Handler Setting

+

+ Although handlers are typically set prior to parsing and left alone, an + application may choose to set or change the handler for a parsing event while the + parse is in progress. For instance, your application may choose to ignore all + text not descended from a para element. One way it could do this is + to set the character handler when a para start tag is seen, and unset it for the + corresponding end tag. +

-

Although handlers are typically set prior to parsing and left alone, an -application may choose to set or change the handler for a parsing event -while the parse is in progress. For instance, your application may choose -to ignore all text not descended from a para element. One -way it could do this is to set the character handler when a para start tag -is seen, and unset it for the corresponding end tag.

+

+ A handler may be unset by providing a NULL pointer to the + appropriate handler setter. None of the handler setting functions have a return + value. +

-

A handler may be unset by providing a NULL pointer to the -appropriate handler setter. None of the handler setting functions have -a return value.

+

+ Your handlers will be receiving strings in arrays of type XML_Char. + This type is conditionally defined in expat.h as either char, + wchar_t or unsigned short. The former implies UTF-8 + encoding, the latter two imply UTF-16 encoding. Note that you'll receive them in + this form independent of the original encoding of the document. +

-

Your handlers will be receiving strings in arrays of type -XML_Char. This type is conditionally defined in expat.h as -either char, wchar_t or unsigned short. -The former implies UTF-8 encoding, the latter two imply UTF-16 encoding. -Note that you'll receive them in this form independent of the original -encoding of the document.

+
+

+ XML_SetStartElementHandler +

-
-

XML_SetStartElementHandler

-
+        
 void XMLCALL
 XML_SetStartElementHandler(XML_Parser p,
                            XML_StartElementHandler start);
 
-
+
+        
 typedef void
 (XMLCALL *XML_StartElementHandler)(void *userData,
                                    const XML_Char *name,
                                    const XML_Char **atts);
 
-

Set handler for start (and empty) tags. Attributes are passed to the start -handler as a pointer to a vector of char pointers. Each attribute seen in -a start (or empty) tag occupies 2 consecutive places in this vector: the -attribute name followed by the attribute value. These pairs are terminated -by a NULL pointer.

-

Note that an empty tag generates a call to both start and end handlers -(in that order).

-
+

+ Set handler for start (and empty) tags. Attributes are passed to the start + handler as a pointer to a vector of char pointers. Each attribute seen in a + start (or empty) tag occupies 2 consecutive places in this vector: the + attribute name followed by the attribute value. These pairs are terminated by a + NULL pointer. +

-
-

XML_SetEndElementHandler

-
+        

+ Note that an empty tag generates a call to both start and end handlers (in that + order). +

+
+ +
+

+ XML_SetEndElementHandler +

+ +
 void XMLCALL
 XML_SetEndElementHandler(XML_Parser p,
                          XML_EndElementHandler);
 
-
+
+        
 typedef void
 (XMLCALL *XML_EndElementHandler)(void *userData,
                                  const XML_Char *name);
 
-

Set handler for end (and empty) tags. As noted above, an empty tag -generates a call to both start and end handlers.

-
+

+ Set handler for end (and empty) tags. As noted above, an empty tag generates a + call to both start and end handlers. +

+
-
-

XML_SetElementHandler

-
+      
+

+ XML_SetElementHandler +

+ +
 void XMLCALL
 XML_SetElementHandler(XML_Parser p,
                       XML_StartElementHandler start,
                       XML_EndElementHandler end);
 
-

Set handlers for start and end tags with one call.

-
+

+ Set handlers for start and end tags with one call. +

+
-
-

XML_SetCharacterDataHandler

-
+      
+

+ XML_SetCharacterDataHandler +

+ +
 void XMLCALL
 XML_SetCharacterDataHandler(XML_Parser p,
                             XML_CharacterDataHandler charhndl)
 
-
+
+        
 typedef void
 (XMLCALL *XML_CharacterDataHandler)(void *userData,
                                     const XML_Char *s,
                                     int len);
 
-

Set a text handler. The string your handler receives -is NOT null-terminated. You have to use the length argument -to deal with the end of the string. A single block of contiguous text -free of markup may still result in a sequence of calls to this handler. -In other words, if you're searching for a pattern in the text, it may -be split across calls to this handler. Note: Setting this handler to NULL -may NOT immediately terminate call-backs if the parser is currently -processing such a single block of contiguous markup-free text, as the parser -will continue calling back until the end of the block is reached.

-
+

+ Set a text handler. The string your handler receives is NOT + null-terminated. You have to use the length argument to deal with the end + of the string. A single block of contiguous text free of markup may still + result in a sequence of calls to this handler. In other words, if you're + searching for a pattern in the text, it may be split across calls to this + handler. Note: Setting this handler to NULL may NOT + immediately terminate call-backs if the parser is currently processing + such a single block of contiguous markup-free text, as the parser will continue + calling back until the end of the block is reached. +

+
-
-

XML_SetProcessingInstructionHandler

-
+      
+

+ XML_SetProcessingInstructionHandler +

+ +
 void XMLCALL
 XML_SetProcessingInstructionHandler(XML_Parser p,
                                     XML_ProcessingInstructionHandler proc)
 
-
+
+        
 typedef void
 (XMLCALL *XML_ProcessingInstructionHandler)(void *userData,
                                             const XML_Char *target,
                                             const XML_Char *data);
 
 
-

Set a handler for processing instructions. The target is the first word -in the processing instruction. The data is the rest of the characters in -it after skipping all whitespace after the initial word.

-
+

+ Set a handler for processing instructions. The target is the first word in the + processing instruction. The data is the rest of the characters in it after + skipping all whitespace after the initial word. +

+
-
-

XML_SetCommentHandler

-
+      
+

+ XML_SetCommentHandler +

+ +
 void XMLCALL
 XML_SetCommentHandler(XML_Parser p,
                       XML_CommentHandler cmnt)
 
-
+
+        
 typedef void
 (XMLCALL *XML_CommentHandler)(void *userData,
                               const XML_Char *data);
 
-

Set a handler for comments. The data is all text inside the comment -delimiters.

-
+

+ Set a handler for comments. The data is all text inside the comment delimiters. +

+
-
-

XML_SetStartCdataSectionHandler

-
+      
+

+ XML_SetStartCdataSectionHandler +

+ +
 void XMLCALL
 XML_SetStartCdataSectionHandler(XML_Parser p,
                                 XML_StartCdataSectionHandler start);
 
-
+
+        
 typedef void
 (XMLCALL *XML_StartCdataSectionHandler)(void *userData);
 
-

Set a handler that gets called at the beginning of a CDATA section.

-
+

+ Set a handler that gets called at the beginning of a CDATA section. +

+
-
-

XML_SetEndCdataSectionHandler

-
+      
+

+ XML_SetEndCdataSectionHandler +

+ +
 void XMLCALL
 XML_SetEndCdataSectionHandler(XML_Parser p,
                               XML_EndCdataSectionHandler end);
 
-
+
+        
 typedef void
 (XMLCALL *XML_EndCdataSectionHandler)(void *userData);
 
-

Set a handler that gets called at the end of a CDATA section.

-
+

+ Set a handler that gets called at the end of a CDATA section. +

+
-
-

XML_SetCdataSectionHandler

-
+      
+

+ XML_SetCdataSectionHandler +

+ +
 void XMLCALL
 XML_SetCdataSectionHandler(XML_Parser p,
                            XML_StartCdataSectionHandler start,
                            XML_EndCdataSectionHandler end)
 
-

Sets both CDATA section handlers with one call.

-
+

+ Sets both CDATA section handlers with one call. +

+
-
-

XML_SetDefaultHandler

-
+      
+

+ XML_SetDefaultHandler +

+ +
 void XMLCALL
 XML_SetDefaultHandler(XML_Parser p,
                       XML_DefaultHandler hndl)
 
-
+
+        
 typedef void
 (XMLCALL *XML_DefaultHandler)(void *userData,
                               const XML_Char *s,
                               int len);
 
+

+ Sets a handler for any characters in the document which wouldn't otherwise be + handled. This includes both data for which no handlers can be set (like some + kinds of DTD declarations) and data which could be reported but which currently + has no handler set. The characters are passed exactly as they were present in + the XML document except that they will be encoded in UTF-8 or UTF-16. Line + boundaries are not normalized. Note that a byte order mark character is not + passed to the default handler. There are no guarantees about how characters are + divided between calls to the default handler: for example, a comment might be + split between multiple calls. Setting the handler with this call has the side + effect of turning off expansion of references to internally defined general + entities. Instead these references are passed to the default handler. +

-

Sets a handler for any characters in the document which wouldn't -otherwise be handled. This includes both data for which no handlers -can be set (like some kinds of DTD declarations) and data which could -be reported but which currently has no handler set. The characters -are passed exactly as they were present in the XML document except -that they will be encoded in UTF-8 or UTF-16. Line boundaries are not -normalized. Note that a byte order mark character is not passed to the -default handler. There are no guarantees about how characters are -divided between calls to the default handler: for example, a comment -might be split between multiple calls. Setting the handler with -this call has the side effect of turning off expansion of references -to internally defined general entities. Instead these references are -passed to the default handler.

+

+ See also XML_DefaultCurrent. +

+
-

See also XML_DefaultCurrent.

-
+
+

+ XML_SetDefaultHandlerExpand +

-
-

XML_SetDefaultHandlerExpand

-
+        
 void XMLCALL
 XML_SetDefaultHandlerExpand(XML_Parser p,
                             XML_DefaultHandler hndl)
 
-
+
+        
 typedef void
 (XMLCALL *XML_DefaultHandler)(void *userData,
                               const XML_Char *s,
                               int len);
 
-

This sets a default handler, but doesn't inhibit the expansion of -internal entity references. The entity reference will not be passed -to the default handler.

+

+ This sets a default handler, but doesn't inhibit the expansion of internal + entity references. The entity reference will not be passed to the default + handler. +

-

See also XML_DefaultCurrent.

-
+

+ See also XML_DefaultCurrent. +

+
-
-

XML_SetExternalEntityRefHandler

-
+      
+

+ XML_SetExternalEntityRefHandler +

+ +
 void XMLCALL
 XML_SetExternalEntityRefHandler(XML_Parser p,
                                 XML_ExternalEntityRefHandler hndl)
 
-
+
+        
 typedef int
 (XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser p,
                                         const XML_Char *context,
@@ -1601,109 +2241,151 @@ typedef int
                                         const XML_Char *systemId,
                                         const XML_Char *publicId);
 
-

Set an external entity reference handler. This handler is also -called for processing an external DTD subset if parameter entity parsing -is in effect. (See -XML_SetParamEntityParsing.)

+

+ Set an external entity reference handler. This handler is also called for + processing an external DTD subset if parameter entity parsing is in effect. + (See XML_SetParamEntityParsing.) +

-

The context parameter specifies the parsing context in -the format expected by the context argument to XML_ExternalEntityParserCreate. code is -valid only until the handler returns, so if the referenced entity is -to be parsed later, it must be copied. context is NULL -only when the entity is a parameter entity, which is how one can -differentiate between general and parameter entities.

+

+ Warning: Using an external entity reference handler can lead + to XXE + vulnerabilities. It should only be used in applications that do not parse + untrusted XML input. +

-

The base parameter is the base to use for relative -system identifiers. It is set by XML_SetBase and may be NULL. The -publicId parameter is the public id given in the entity -declaration and may be NULL. systemId is the system -identifier specified in the entity declaration and is never NULL.

+

+ The context parameter specifies the parsing context in the format + expected by the context argument to XML_ExternalEntityParserCreate. + code is valid only until the handler returns, so if the referenced + entity is to be parsed later, it must be copied. context is + NULL only when the entity is a parameter entity, which is how one + can differentiate between general and parameter entities. +

-

There are a couple of ways in which this handler differs from -others. First, this handler returns a status indicator (an -integer). XML_STATUS_OK should be returned for successful -handling of the external entity reference. Returning -XML_STATUS_ERROR indicates failure, and causes the -calling parser to return an -XML_ERROR_EXTERNAL_ENTITY_HANDLING error.

+

+ The base parameter is the base to use for relative system + identifiers. It is set by XML_SetBase + and may be NULL. The publicId parameter is the public + id given in the entity declaration and may be NULL. + systemId is the system identifier specified in the entity + declaration and is never NULL. +

-

Second, instead of having the user data as its first argument, it -receives the parser that encountered the entity reference. This, along -with the context parameter, may be used as arguments to a call to -XML_ExternalEntityParserCreate. Using the returned -parser, the body of the external entity can be recursively parsed.

+

+ There are a couple of ways in which this handler differs from others. First, + this handler returns a status indicator (an integer). + XML_STATUS_OK should be returned for successful handling of the + external entity reference. Returning XML_STATUS_ERROR indicates + failure, and causes the calling parser to return an + XML_ERROR_EXTERNAL_ENTITY_HANDLING error. +

-

Since this handler may be called recursively, it should not be saving -information into global or static variables.

-
+

+ Second, instead of having the user data as its first argument, it receives the + parser that encountered the entity reference. This, along with the context + parameter, may be used as arguments to a call to XML_ExternalEntityParserCreate. + Using the returned parser, the body of the external entity can be recursively + parsed. +

-

XML_SetExternalEntityRefHandlerArg

-
+        

+ Since this handler may be called recursively, it should not be saving + information into global or static variables. +

+
+ +

+ XML_SetExternalEntityRefHandlerArg +

+ +
 void XMLCALL
 XML_SetExternalEntityRefHandlerArg(XML_Parser p,
                                    void *arg)
 
-
-

Set the argument passed to the ExternalEntityRefHandler. If -arg is not NULL, it is the new value passed to the -handler set using XML_SetExternalEntityRefHandler; if arg is -NULL, the argument passed to the handler function will be the parser -object itself.

+
+

+ Set the argument passed to the ExternalEntityRefHandler. If arg is + not NULL, it is the new value passed to the handler set using + XML_SetExternalEntityRefHandler; + if arg is NULL, the argument passed to the handler + function will be the parser object itself. +

-

Note: -The type of arg and the type of the first argument to the -ExternalEntityRefHandler do not match. This function takes a -void * to be passed to the handler, while the handler -accepts an XML_Parser. This is a historical accident, -but will not be corrected before Expat 2.0 (at the earliest) to avoid -causing compiler warnings for code that's known to work with this -API. It is the responsibility of the application code to know the -actual type of the argument passed to the handler and to manage it -properly.

-
+

+ Note: The type of arg and the type of the first + argument to the ExternalEntityRefHandler do not match. This function takes a + void * to be passed to the handler, while the handler accepts an + XML_Parser. This is a historical accident, but will not be + corrected before Expat 2.0 (at the earliest) to avoid causing compiler warnings + for code that's known to work with this API. It is the responsibility of the + application code to know the actual type of the argument passed to the handler + and to manage it properly. +

+
-
-

XML_SetSkippedEntityHandler

-
+      
+

+ XML_SetSkippedEntityHandler +

+ +
 void XMLCALL
 XML_SetSkippedEntityHandler(XML_Parser p,
                             XML_SkippedEntityHandler handler)
 
-
+
+        
 typedef void
 (XMLCALL *XML_SkippedEntityHandler)(void *userData,
                                     const XML_Char *entityName,
                                     int is_parameter_entity);
 
-

Set a skipped entity handler. This is called in two situations:

-
    -
  1. An entity reference is encountered for which no declaration - has been read and this is not an error.
  2. -
  3. An internal entity reference is read, but not expanded, because - XML_SetDefaultHandler - has been called.
  4. -
-

The is_parameter_entity argument will be non-zero for -a parameter entity and zero for a general entity.

Note: Skipped -parameter entities in declarations and skipped general entities in -attribute values cannot be reported, because the event would be out of -sync with the reporting of the declarations or attribute values

-
+

+ Set a skipped entity handler. This is called in two situations: +

-
-

XML_SetUnknownEncodingHandler

-
+        
    +
  1. An entity reference is encountered for which no declaration has been read + and this is not an error. +
  2. + +
  3. An internal entity reference is read, but not expanded, because XML_SetDefaultHandler has been + called. +
  4. +
+ +

+ The is_parameter_entity argument will be non-zero for a parameter + entity and zero for a general entity. +

+ +

+ Note: Skipped parameter entities in declarations and skipped general entities + in attribute values cannot be reported, because the event would be out of sync + with the reporting of the declarations or attribute values +

+
+ +
+

+ XML_SetUnknownEncodingHandler +

+ +
 void XMLCALL
 XML_SetUnknownEncodingHandler(XML_Parser p,
                               XML_UnknownEncodingHandler enchandler,
-			      void *encodingHandlerData)
+                              void *encodingHandlerData)
 
-
+
+        
 typedef int
 (XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData,
                                       const XML_Char *name,
@@ -1716,115 +2398,147 @@ typedef struct {
   void (XMLCALL *release)(void *data);
 } XML_Encoding;
 
-

Set a handler to deal with encodings other than the built in set. This should be done before -XML_Parse or XML_ParseBuffer have been called on the -given parser.

If the handler knows how to deal with an encoding -with the given name, it should fill in the info data -structure and return XML_STATUS_OK. Otherwise it -should return XML_STATUS_ERROR. The handler will be called -at most once per parsed (external) entity. The optional application -data pointer encodingHandlerData will be passed back to -the handler.

+

+ Set a handler to deal with encodings other than the built in set. This should be done before + XML_Parse or XML_ParseBuffer have been called on the given + parser. +

-

The map array contains information for every possible leading -byte in a byte sequence. If the corresponding value is >= 0, then it's -a single byte sequence and the byte encodes that Unicode value. If the -value is -1, then that byte is invalid as the initial byte in a sequence. -If the value is -n, where n is an integer > 1, then n is the number of -bytes in the sequence and the actual conversion is accomplished by a -call to the function pointed at by convert. This function may return -1 -if the sequence itself is invalid. The convert pointer may be NULL if -there are only single byte codes. The data parameter passed to the convert -function is the data pointer from XML_Encoding. The -string s is NOT null-terminated and points at the sequence of -bytes to be converted.

+

+ If the handler knows how to deal with an encoding with the given name, it + should fill in the info data structure and return + XML_STATUS_OK. Otherwise it should return + XML_STATUS_ERROR. The handler will be called at most once per + parsed (external) entity. The optional application data pointer + encodingHandlerData will be passed back to the handler. +

-

The function pointed at by release is called by the -parser when it is finished with the encoding. It may be NULL.

-
+

+ The map array contains information for every possible leading byte in a byte + sequence. If the corresponding value is >= 0, then it's a single byte + sequence and the byte encodes that Unicode value. If the value is -1, then that + byte is invalid as the initial byte in a sequence. If the value is -n, where n + is an integer > 1, then n is the number of bytes in the sequence and the + actual conversion is accomplished by a call to the function pointed at by + convert. This function may return -1 if the sequence itself is invalid. The + convert pointer may be NULL if there are only single byte codes. + The data parameter passed to the convert function is the data pointer from + XML_Encoding. The string s is NOT null-terminated and + points at the sequence of bytes to be converted. +

-
-

XML_SetStartNamespaceDeclHandler

-
+        

+ The function pointed at by release is called by the parser when it + is finished with the encoding. It may be NULL. +

+
+ +
+

+ XML_SetStartNamespaceDeclHandler +

+ +
 void XMLCALL
 XML_SetStartNamespaceDeclHandler(XML_Parser p,
-			         XML_StartNamespaceDeclHandler start);
+                                 XML_StartNamespaceDeclHandler start);
 
-
+
+        
 typedef void
 (XMLCALL *XML_StartNamespaceDeclHandler)(void *userData,
                                          const XML_Char *prefix,
                                          const XML_Char *uri);
 
-

Set a handler to be called when a namespace is declared. Namespace -declarations occur inside start tags. But the namespace declaration start -handler is called before the start tag handler for each namespace declared -in that start tag.

-
+

+ Set a handler to be called when a namespace is declared. Namespace declarations + occur inside start tags. But the namespace declaration start handler is called + before the start tag handler for each namespace declared in that start tag. +

+
-
-

XML_SetEndNamespaceDeclHandler

-
+      
+

+ XML_SetEndNamespaceDeclHandler +

+ +
 void XMLCALL
 XML_SetEndNamespaceDeclHandler(XML_Parser p,
-			       XML_EndNamespaceDeclHandler end);
+                               XML_EndNamespaceDeclHandler end);
 
-
+
+        
 typedef void
 (XMLCALL *XML_EndNamespaceDeclHandler)(void *userData,
                                        const XML_Char *prefix);
 
-

Set a handler to be called when leaving the scope of a namespace -declaration. This will be called, for each namespace declaration, -after the handler for the end tag of the element in which the -namespace was declared.

-
+

+ Set a handler to be called when leaving the scope of a namespace declaration. + This will be called, for each namespace declaration, after the handler for the + end tag of the element in which the namespace was declared. +

+
-
-

XML_SetNamespaceDeclHandler

-
+      
+

+ XML_SetNamespaceDeclHandler +

+ +
 void XMLCALL
 XML_SetNamespaceDeclHandler(XML_Parser p,
                             XML_StartNamespaceDeclHandler start,
                             XML_EndNamespaceDeclHandler end)
 
-

Sets both namespace declaration handlers with a single call.

-
+

+ Sets both namespace declaration handlers with a single call. +

+
-
-

XML_SetXmlDeclHandler

-
+      
+

+ XML_SetXmlDeclHandler +

+ +
 void XMLCALL
 XML_SetXmlDeclHandler(XML_Parser p,
-		      XML_XmlDeclHandler xmldecl);
+                      XML_XmlDeclHandler xmldecl);
 
-
+
+        
 typedef void
 (XMLCALL *XML_XmlDeclHandler)(void            *userData,
                               const XML_Char  *version,
                               const XML_Char  *encoding,
                               int             standalone);
 
-

Sets a handler that is called for XML declarations and also for -text declarations discovered in external entities. The way to -distinguish is that the version parameter will be NULL -for text declarations. The encoding parameter may be NULL -for an XML declaration. The standalone argument will -contain -1, 0, or 1 indicating respectively that there was no -standalone parameter in the declaration, that it was given as no, or -that it was given as yes.

-
+

+ Sets a handler that is called for XML declarations and also for text + declarations discovered in external entities. The way to distinguish is that + the version parameter will be NULL for text + declarations. The encoding parameter may be NULL for + an XML declaration. The standalone argument will contain -1, 0, or + 1 indicating respectively that there was no standalone parameter in the + declaration, that it was given as no, or that it was given as yes. +

+
-
-

XML_SetStartDoctypeDeclHandler

-
+      
+

+ XML_SetStartDoctypeDeclHandler +

+ +
 void XMLCALL
 XML_SetStartDoctypeDeclHandler(XML_Parser p,
-			       XML_StartDoctypeDeclHandler start);
+                               XML_StartDoctypeDeclHandler start);
 
-
+
+        
 typedef void
 (XMLCALL *XML_StartDoctypeDeclHandler)(void           *userData,
                                        const XML_Char *doctypeName,
@@ -1832,52 +2546,71 @@ typedef void
                                        const XML_Char *pubid,
                                        int            has_internal_subset);
 
-

Set a handler that is called at the start of a DOCTYPE declaration, -before any external or internal subset is parsed. Both sysid -and pubid may be NULL. The has_internal_subset -will be non-zero if the DOCTYPE declaration has an internal subset.

-
+

+ Set a handler that is called at the start of a DOCTYPE declaration, before any + external or internal subset is parsed. Both sysid and + pubid may be NULL. The + has_internal_subset will be non-zero if the DOCTYPE declaration + has an internal subset. +

+
-
-

XML_SetEndDoctypeDeclHandler

-
+      
+

+ XML_SetEndDoctypeDeclHandler +

+ +
 void XMLCALL
 XML_SetEndDoctypeDeclHandler(XML_Parser p,
-			     XML_EndDoctypeDeclHandler end);
+                             XML_EndDoctypeDeclHandler end);
 
-
+
+        
 typedef void
 (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData);
 
-

Set a handler that is called at the end of a DOCTYPE declaration, -after parsing any external subset.

-
+

+ Set a handler that is called at the end of a DOCTYPE declaration, after parsing + any external subset. +

+
-
-

XML_SetDoctypeDeclHandler

-
+      
+

+ XML_SetDoctypeDeclHandler +

+ +
 void XMLCALL
 XML_SetDoctypeDeclHandler(XML_Parser p,
-			  XML_StartDoctypeDeclHandler start,
-			  XML_EndDoctypeDeclHandler end);
+                          XML_StartDoctypeDeclHandler start,
+                          XML_EndDoctypeDeclHandler end);
 
-

Set both doctype handlers with one call.

-
+

+ Set both doctype handlers with one call. +

+
-
-

XML_SetElementDeclHandler

-
+      
+

+ XML_SetElementDeclHandler +

+ +
 void XMLCALL
 XML_SetElementDeclHandler(XML_Parser p,
-			  XML_ElementDeclHandler eldecl);
+                          XML_ElementDeclHandler eldecl);
 
-
+
+        
 typedef void
 (XMLCALL *XML_ElementDeclHandler)(void *userData,
                                   const XML_Char *name,
                                   XML_Content *model);
 
-
+
+        
 enum XML_Content_Type {
   XML_CTYPE_EMPTY = 1,
   XML_CTYPE_ANY,
@@ -1897,55 +2630,65 @@ enum XML_Content_Quant {
 typedef struct XML_cp XML_Content;
 
 struct XML_cp {
-  enum XML_Content_Type		type;
-  enum XML_Content_Quant	quant;
-  const XML_Char *		name;
-  unsigned int			numchildren;
-  XML_Content *			children;
+  enum XML_Content_Type         type;
+  enum XML_Content_Quant        quant;
+  const XML_Char *              name;
+  unsigned int                  numchildren;
+  XML_Content *                 children;
 };
 
-

Sets a handler for element declarations in a DTD. The handler gets -called with the name of the element in the declaration and a pointer -to a structure that contains the element model. It's the user code's -responsibility to free model when finished with via a call to -XML_FreeContentModel. -There is no need to free the model from the handler, it can be kept -around and freed at a later stage.

+

+ Sets a handler for element declarations in a DTD. The handler gets called with + the name of the element in the declaration and a pointer to a structure that + contains the element model. It's the user code's responsibility to free model + when finished with via a call to XML_FreeContentModel. There is no need to + free the model from the handler, it can be kept around and freed at a later + stage. +

-

The model argument is the root of a tree of -XML_Content nodes. If type equals -XML_CTYPE_EMPTY or XML_CTYPE_ANY, then -quant will be XML_CQUANT_NONE, and the other -fields will be zero or NULL. If type is -XML_CTYPE_MIXED, then quant will be -XML_CQUANT_NONE or XML_CQUANT_REP and -numchildren will contain the number of elements that are -allowed to be mixed in and children points to an array of -XML_Content structures that will all have type -XML_CTYPE_NAME with no quantification. Only the root node can be type -XML_CTYPE_EMPTY, XML_CTYPE_ANY, or -XML_CTYPE_MIXED.

+

+ The model argument is the root of a tree of + XML_Content nodes. If type equals + XML_CTYPE_EMPTY or XML_CTYPE_ANY, then + quant will be XML_CQUANT_NONE, and the other fields + will be zero or NULL. If type is + XML_CTYPE_MIXED, then quant will be + XML_CQUANT_NONE or XML_CQUANT_REP and + numchildren will contain the number of elements that are allowed + to be mixed in and children points to an array of + XML_Content structures that will all have type XML_CTYPE_NAME with + no quantification. Only the root node can be type XML_CTYPE_EMPTY, + XML_CTYPE_ANY, or XML_CTYPE_MIXED. +

-

For type XML_CTYPE_NAME, the name field -points to the name and the numchildren and -children fields will be zero and NULL. The -quant field will indicate any quantifiers placed on the -name.

+

+ For type XML_CTYPE_NAME, the name field points to the + name and the numchildren and children fields will be + zero and NULL. The quant field will indicate any + quantifiers placed on the name. +

-

Types XML_CTYPE_CHOICE and XML_CTYPE_SEQ -indicate a choice or sequence respectively. The -numchildren field indicates how many nodes in the choice -or sequence and children points to the nodes.

-
+

+ Types XML_CTYPE_CHOICE and XML_CTYPE_SEQ indicate a + choice or sequence respectively. The numchildren field indicates + how many nodes in the choice or sequence and children points to + the nodes. +

+
-
-

XML_SetAttlistDeclHandler

-
+      
+

+ XML_SetAttlistDeclHandler +

+ +
 void XMLCALL
 XML_SetAttlistDeclHandler(XML_Parser p,
                           XML_AttlistDeclHandler attdecl);
 
-
+
+        
 typedef void
 (XMLCALL *XML_AttlistDeclHandler)(void           *userData,
                                   const XML_Char *elname,
@@ -1954,563 +2697,693 @@ typedef void
                                   const XML_Char *dflt,
                                   int            isrequired);
 
-

Set a handler for attlist declarations in the DTD. This handler is -called for each attribute. So a single attlist declaration -with multiple attributes declared will generate multiple calls to this -handler. The elname parameter returns the name of the -element for which the attribute is being declared. The attribute name -is in the attname parameter. The attribute type is in the -att_type parameter. It is the string representing the -type in the declaration with whitespace removed.

+

+ Set a handler for attlist declarations in the DTD. This handler is called for + each attribute. So a single attlist declaration with multiple + attributes declared will generate multiple calls to this handler. The + elname parameter returns the name of the element for which the + attribute is being declared. The attribute name is in the attname + parameter. The attribute type is in the att_type parameter. It is + the string representing the type in the declaration with whitespace removed. +

-

The dflt parameter holds the default value. It will be -NULL in the case of "#IMPLIED" or "#REQUIRED" attributes. You can -distinguish these two cases by checking the isrequired -parameter, which will be true in the case of "#REQUIRED" attributes. -Attributes which are "#FIXED" will have also have a true -isrequired, but they will have the non-NULL fixed value -in the dflt parameter.

-
+

+ The dflt parameter holds the default value. It will be + NULL in the case of "#IMPLIED" or "#REQUIRED" attributes. You can + distinguish these two cases by checking the isrequired parameter, + which will be true in the case of "#REQUIRED" attributes. Attributes which are + "#FIXED" will have also have a true isrequired, but they will have + the non-NULL fixed value in the dflt parameter. +

+
-
-

XML_SetEntityDeclHandler

-
+      
+

+ XML_SetEntityDeclHandler +

+ +
 void XMLCALL
 XML_SetEntityDeclHandler(XML_Parser p,
-			 XML_EntityDeclHandler handler);
+                         XML_EntityDeclHandler handler);
 
-
+
+        
 typedef void
 (XMLCALL *XML_EntityDeclHandler)(void           *userData,
                                  const XML_Char *entityName,
                                  int            is_parameter_entity,
                                  const XML_Char *value,
-                                 int            value_length, 
+                                 int            value_length,
                                  const XML_Char *base,
                                  const XML_Char *systemId,
                                  const XML_Char *publicId,
                                  const XML_Char *notationName);
 
-

Sets a handler that will be called for all entity declarations. -The is_parameter_entity argument will be non-zero in the -case of parameter entities and zero otherwise.

+

+ Sets a handler that will be called for all entity declarations. The + is_parameter_entity argument will be non-zero in the case of + parameter entities and zero otherwise. +

-

For internal entities (<!ENTITY foo "bar">), -value will be non-NULL and systemId, -publicId, and notationName will all be NULL. -The value string is not null-terminated; the length is -provided in the value_length parameter. Do not use -value_length to test for internal entities, since it is -legal to have zero-length values. Instead check for whether or not -value is NULL.

The notationName -argument will have a non-NULL value only for unparsed entity -declarations.

-
+

+ For internal entities (<!ENTITY foo "bar">), + value will be non-NULL and systemId, + publicId, and notationName will all be + NULL. The value string is not null-terminated; the length + is provided in the value_length parameter. Do not use + value_length to test for internal entities, since it is legal to + have zero-length values. Instead check for whether or not value is + NULL. +

-
-

XML_SetUnparsedEntityDeclHandler

-
+        

+ The notationName argument will have a non-NULL value + only for unparsed entity declarations. +

+
+ +
+

+ XML_SetUnparsedEntityDeclHandler +

+ +
 void XMLCALL
 XML_SetUnparsedEntityDeclHandler(XML_Parser p,
                                  XML_UnparsedEntityDeclHandler h)
 
-
+
+        
 typedef void
 (XMLCALL *XML_UnparsedEntityDeclHandler)(void *userData,
-                                         const XML_Char *entityName, 
+                                         const XML_Char *entityName,
                                          const XML_Char *base,
                                          const XML_Char *systemId,
                                          const XML_Char *publicId,
                                          const XML_Char *notationName);
 
-

Set a handler that receives declarations of unparsed entities. These -are entity declarations that have a notation (NDATA) field:

+

+ Set a handler that receives declarations of unparsed entities. These are entity + declarations that have a notation (NDATA) field: +

-
+        
+
 <!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
-
-

This handler is obsolete and is provided for backwards -compatibility. Use instead XML_SetEntityDeclHandler.

-
+
+
-
-

XML_SetNotationDeclHandler

-
+        

+ This handler is obsolete and is provided for backwards compatibility. Use + instead XML_SetEntityDeclHandler. +

+
+ +
+

+ XML_SetNotationDeclHandler +

+ +
 void XMLCALL
 XML_SetNotationDeclHandler(XML_Parser p,
                            XML_NotationDeclHandler h)
 
-
+
+        
 typedef void
-(XMLCALL *XML_NotationDeclHandler)(void *userData, 
+(XMLCALL *XML_NotationDeclHandler)(void *userData,
                                    const XML_Char *notationName,
                                    const XML_Char *base,
                                    const XML_Char *systemId,
                                    const XML_Char *publicId);
 
-

Set a handler that receives notation declarations.

-
+

+ Set a handler that receives notation declarations. +

+
-
-

XML_SetNotStandaloneHandler

-
+      
+

+ XML_SetNotStandaloneHandler +

+ +
 void XMLCALL
 XML_SetNotStandaloneHandler(XML_Parser p,
                             XML_NotStandaloneHandler h)
 
-
-typedef int 
+
+        
+typedef int
 (XMLCALL *XML_NotStandaloneHandler)(void *userData);
 
-

Set a handler that is called if the document is not "standalone". -This happens when there is an external subset or a reference to a -parameter entity, but does not have standalone set to "yes" in an XML -declaration. If this handler returns XML_STATUS_ERROR, -then the parser will throw an XML_ERROR_NOT_STANDALONE -error.

-
+

+ Set a handler that is called if the document is not "standalone". This happens + when there is an external subset or a reference to a parameter entity, but does + not have standalone set to "yes" in an XML declaration. If this handler returns + XML_STATUS_ERROR, then the parser will throw an + XML_ERROR_NOT_STANDALONE error. +

+
-

Parse position and error reporting functions

+

+ Parse position and error reporting functions +

-

These are the functions you'll want to call when the parse -functions return XML_STATUS_ERROR (a parse error has -occurred), although the position reporting functions are useful outside -of errors. The position reported is the byte position (in the original -document or entity encoding) of the first of the sequence of -characters that generated the current event (or the error that caused -the parse functions to return XML_STATUS_ERROR.) The -exceptions are callbacks triggered by declarations in the document -prologue, in which case they exact position reported is somewhere in the -relevant markup, but not necessarily as meaningful as for other -events.

+

+ These are the functions you'll want to call when the parse functions return + XML_STATUS_ERROR (a parse error has occurred), although the position + reporting functions are useful outside of errors. The position reported is the + byte position (in the original document or entity encoding) of the first of the + sequence of characters that generated the current event (or the error that caused + the parse functions to return XML_STATUS_ERROR.) The exceptions are + callbacks triggered by declarations in the document prologue, in which case they + exact position reported is somewhere in the relevant markup, but not necessarily + as meaningful as for other events. +

-

The position reporting functions are accurate only outside of the -DTD. In other words, they usually return bogus information when -called from within a DTD declaration handler.

+

+ The position reporting functions are accurate only outside of the DTD. In other + words, they usually return bogus information when called from within a DTD + declaration handler. +

-

XML_GetErrorCode

-
+      

+ XML_GetErrorCode +

+ +
 enum XML_Error XMLCALL
 XML_GetErrorCode(XML_Parser p);
 
-
-Return what type of error has occurred. -
+
+ Return what type of error has occurred. +
-

XML_ErrorString

-
+      

+ XML_ErrorString +

+ +
 const XML_LChar * XMLCALL
 XML_ErrorString(enum XML_Error code);
 
-
-Return a string describing the error corresponding to code. -The code should be one of the enums that can be returned from -XML_GetErrorCode. -
+
+ Return a string describing the error corresponding to code. The code should be + one of the enums that can be returned from XML_GetErrorCode. +
-

XML_GetCurrentByteIndex

-
+      

+ XML_GetCurrentByteIndex +

+ +
 XML_Index XMLCALL
 XML_GetCurrentByteIndex(XML_Parser p);
 
-
-Return the byte offset of the position. This always corresponds to -the values returned by XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber. -
+
+ Return the byte offset of the position. This always corresponds to the values + returned by XML_GetCurrentLineNumber and + XML_GetCurrentColumnNumber. +
-

XML_GetCurrentLineNumber

-
+      

+ XML_GetCurrentLineNumber +

+ +
 XML_Size XMLCALL
 XML_GetCurrentLineNumber(XML_Parser p);
 
-
-Return the line number of the position. The first line is reported as -1. -
+
+ Return the line number of the position. The first line is reported as + 1. +
-

XML_GetCurrentColumnNumber

-
+      

+ XML_GetCurrentColumnNumber +

+ +
 XML_Size XMLCALL
 XML_GetCurrentColumnNumber(XML_Parser p);
 
-
-Return the offset, from the beginning of the current line, of -the position. The first column is reported as 0. -
+
+ Return the offset, from the beginning of the current line, of the + position. The first column is reported as 0. +
-

XML_GetCurrentByteCount

-
+      

+ XML_GetCurrentByteCount +

+ +
 int XMLCALL
 XML_GetCurrentByteCount(XML_Parser p);
 
-
-Return the number of bytes in the current event. Returns -0 if the event is inside a reference to an internal -entity and for the end-tag event for empty element tags (the later can -be used to distinguish empty-element tags from empty elements using -separate start and end tags). -
+
+ Return the number of bytes in the current event. Returns 0 if the + event is inside a reference to an internal entity and for the end-tag event for + empty element tags (the later can be used to distinguish empty-element tags from + empty elements using separate start and end tags). +
-

XML_GetInputContext

-
+      

+ XML_GetInputContext +

+ +
 const char * XMLCALL
 XML_GetInputContext(XML_Parser p,
                     int *offset,
                     int *size);
 
-
+
+

+ Returns the parser's input buffer, sets the integer pointed at by + offset to the offset within this buffer of the current parse + position, and set the integer pointed at by size to the size of + the returned buffer. +

-

Returns the parser's input buffer, sets the integer pointed at by -offset to the offset within this buffer of the current -parse position, and set the integer pointed at by size to -the size of the returned buffer.

+

+ This should only be called from within a handler during an active parse and the + returned buffer should only be referred to from within the handler that made + the call. This input buffer contains the untranslated bytes of the input. +

-

This should only be called from within a handler during an active -parse and the returned buffer should only be referred to from within -the handler that made the call. This input buffer contains the -untranslated bytes of the input.

+

+ Only a limited amount of context is kept, so if the event triggering a call + spans over a very large amount of input, the actual parse position may be + before the beginning of the buffer. +

-

Only a limited amount of context is kept, so if the event -triggering a call spans over a very large amount of input, the actual -parse position may be before the beginning of the buffer.

+

+ If XML_CONTEXT_BYTES is zero, this will always return + NULL. +

+
-

If XML_CONTEXT_BYTES is zero, this will always -return NULL.

-
+

+ Attack Protection +

-

Attack Protection

+

+ XML_SetBillionLaughsAttackProtectionMaximumAmplification +

-

XML_SetBillionLaughsAttackProtectionMaximumAmplification

-
+      
 /* Added in Expat 2.4.0. */
 XML_Bool XMLCALL
 XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p,
                                                          float maximumAmplificationFactor);
 
-
-

- Sets the maximum tolerated amplification factor - for protection against - billion laughs attacks - (default: 100.0) - of parser p to maximumAmplificationFactor, and - returns XML_TRUE upon success and XML_FALSE upon error. -

+
+

+ Sets the maximum tolerated amplification factor for protection against billion laughs + attacks (default: 100.0) of parser p to + maximumAmplificationFactor, and returns XML_TRUE upon + success and XML_FALSE upon error. +

-

- Once the threshold for activation is reached, - the amplification factor is calculated as .. -

-
amplification := (direct + indirect) / direct
-

- .. while parsing, whereas - direct is the number of bytes read from the primary document in parsing and - indirect is the number of bytes added by expanding entities and reading of external DTD files, combined. -

+

+ Once the threshold for + activation is reached, the amplification factor is calculated as .. +

-

For a call to XML_SetBillionLaughsAttackProtectionMaximumAmplification to succeed:

-
    -
  • parser p must be a non-NULL root parser (without any parent parsers) and
  • -
  • maximumAmplificationFactor must be non-NaN and greater than or equal to 1.0.
  • -
+
amplification := (direct + indirect) / direct
+

+ .. while parsing, whereas direct is the number of bytes read from + the primary document in parsing and indirect is the number of + bytes added by expanding entities and reading of external DTD files, combined. +

-

- Note: - If you ever need to increase this value for non-attack payload, - please file a bug report. -

+

+ For a call to + XML_SetBillionLaughsAttackProtectionMaximumAmplification to + succeed: +

-

- Note: - Peak amplifications - of factor 15,000 for the entire payload and - of factor 30,000 in the middle of parsing - have been observed with small benign files in practice. +

    +
  • parser p must be a non-NULL root parser (without + any parent parsers) and +
  • - So if you do reduce the maximum allowed amplification, - please make sure that the activation threshold is still big enough - to not end up with undesired false positives (i.e. benign files being rejected). -

    -
+
  • + maximumAmplificationFactor must be non-NaN and + greater than or equal to 1.0. +
  • + -

    XML_SetBillionLaughsAttackProtectionActivationThreshold

    -
    +        

    + Note: If you ever need to increase this value for non-attack + payload, please file a + bug report. +

    + +

    + Note: Peak amplifications of factor 15,000 for the entire + payload and of factor 30,000 in the middle of parsing have been observed with + small benign files in practice. So if you do reduce the maximum allowed + amplification, please make sure that the activation threshold is still big + enough to not end up with undesired false positives (i.e. benign files being + rejected). +

    +
    + +

    + XML_SetBillionLaughsAttackProtectionActivationThreshold +

    + +
     /* Added in Expat 2.4.0. */
     XML_Bool XMLCALL
     XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
                                                             unsigned long long activationThresholdBytes);
     
    -
    -

    - Sets number of output bytes (including amplification from entity expansion and reading DTD files) - needed to activate protection against - billion laughs attacks - (default: 8 MiB) - of parser p to activationThresholdBytes, and - returns XML_TRUE upon success and XML_FALSE upon error. -

    +
    +

    + Sets number of output bytes (including amplification from entity expansion and + reading DTD files) needed to activate protection against billion laughs + attacks (default: 8 MiB) of parser p to + activationThresholdBytes, and returns XML_TRUE upon + success and XML_FALSE upon error. +

    -

    For a call to XML_SetBillionLaughsAttackProtectionActivationThreshold to succeed:

    -
      -
    • parser p must be a non-NULL root parser (without any parent parsers).
    • -
    +

    + For a call to + XML_SetBillionLaughsAttackProtectionActivationThreshold to + succeed: +

    -

    - Note: - If you ever need to increase this value for non-attack payload, - please file a bug report. -

    +
      +
    • parser p must be a non-NULL root parser (without + any parent parsers). +
    • +
    -

    - Note: - Activation thresholds below 4 MiB are known to break support for - DITA 1.3 payload - and are hence not recommended. -

    -
    +

    + Note: If you ever need to increase this value for non-attack + payload, please file a + bug report. +

    -

    XML_SetAllocTrackerMaximumAmplification

    -
    +        

    + Note: Activation thresholds below 4 MiB are known to break + support for DITA + 1.3 payload and are hence not recommended. +

    +
    + +

    + XML_SetAllocTrackerMaximumAmplification +

    + +
     /* Added in Expat 2.7.2. */
     XML_Bool
     XML_SetAllocTrackerMaximumAmplification(XML_Parser p,
                                             float maximumAmplificationFactor);
     
    -
    -

    - Sets the maximum tolerated amplification factor - between direct input and bytes of dynamic memory allocated - (default: 100.0) - of parser p to maximumAmplificationFactor, and - returns XML_TRUE upon success and XML_FALSE upon error. -

    +
    +

    + Sets the maximum tolerated amplification factor between direct input and bytes + of dynamic memory allocated (default: 100.0) of parser + p to maximumAmplificationFactor, and returns + XML_TRUE upon success and XML_FALSE upon error. +

    -

    - Note: - There are three types of allocations that intentionally bypass tracking and limiting: -

    - +

    + Note: There are three types of allocations that intentionally + bypass tracking and limiting: +

    -

    - Once the threshold for activation is reached, - the amplification factor is calculated as .. -

    -
    amplification := allocated / direct
    -

    - .. while parsing, whereas - direct is the number of bytes read from the primary document in parsing and - allocated is the number of bytes of dynamic memory allocated in the parser hierarchy. -

    + -

    - Note: - Amplifications factors greater than 100.0 can been observed near the start of parsing - even with benign files in practice. +

    + Once the threshold for + activation is reached, the amplification factor is calculated as .. +

    - So if you do reduce the maximum allowed amplification, - please make sure that the activation threshold is still big enough - to not end up with undesired false positives (i.e. benign files being rejected). -

    -
    +
    amplification := allocated / direct
    +

    + .. while parsing, whereas direct is the number of bytes read from + the primary document in parsing and allocated is the number of + bytes of dynamic memory allocated in the parser hierarchy. +

    -

    XML_SetAllocTrackerActivationThreshold

    -
    +        

    + For a call to XML_SetAllocTrackerMaximumAmplification to succeed: +

    + +
      +
    • parser p must be a non-NULL root parser (without + any parent parsers) and +
    • + +
    • + maximumAmplificationFactor must be non-NaN and + greater than or equal to 1.0. +
    • +
    + +

    + Note: If you ever need to increase this value for non-attack + payload, please file a + bug report. +

    + +

    + Note: Amplifications factors greater than 100.0 + can been observed near the start of parsing even with benign files in practice. + So if you do reduce the maximum allowed amplification, please make sure that + the activation threshold is still big enough to not end up with undesired false + positives (i.e. benign files being rejected). +

    +
    + +

    + XML_SetAllocTrackerActivationThreshold +

    + +
     /* Added in Expat 2.7.2. */
     XML_Bool
     XML_SetAllocTrackerActivationThreshold(XML_Parser p,
                                            unsigned long long activationThresholdBytes);
     
    -
    -

    - Sets number of allocated bytes of dynamic memory - needed to activate protection against disproportionate use of RAM - (default: 64 MiB) - of parser p to activationThresholdBytes, and - returns XML_TRUE upon success and XML_FALSE upon error. -

    +
    +

    + Sets number of allocated bytes of dynamic memory needed to activate protection + against disproportionate use of RAM (default: 64 MiB) of parser + p to activationThresholdBytes, and returns + XML_TRUE upon success and XML_FALSE upon error. +

    -

    - Note: - For types of allocations that intentionally bypass tracking and limiting, please see - XML_SetAllocTrackerMaximumAmplification - above. -

    +

    + Note: For types of allocations that intentionally bypass + tracking and limiting, please see XML_SetAllocTrackerMaximumAmplification + above. +

    -

    For a call to XML_SetAllocTrackerActivationThreshold to succeed:

    -
      -
    • parser p must be a non-NULL root parser (without any parent parsers).
    • -
    +

    + For a call to XML_SetAllocTrackerActivationThreshold to succeed: +

    -

    - Note: - If you ever need to increase this value for non-attack payload, - please file a bug report. -

    -
    +
      +
    • parser p must be a non-NULL root parser (without + any parent parsers). +
    • +
    -

    XML_SetReparseDeferralEnabled

    -
    +        

    + Note: If you ever need to increase this value for non-attack + payload, please file a + bug report. +

    +
    + +

    + XML_SetReparseDeferralEnabled +

    + +
     /* Added in Expat 2.6.0. */
     XML_Bool XMLCALL
     XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
     
    -
    -

    - Large tokens may require many parse calls before enough data is available for Expat to parse it in full. - If Expat retried parsing the token on every parse call, parsing could take quadratic time. - To avoid this, Expat only retries once a significant amount of new data is available. - This function allows disabling this behavior. -

    -

    - The enabled argument should be XML_TRUE or XML_FALSE. -

    -

    - Returns XML_TRUE on success, and XML_FALSE on error. -

    -
    +
    +

    + Large tokens may require many parse calls before enough data is available for + Expat to parse it in full. If Expat retried parsing the token on every parse + call, parsing could take quadratic time. To avoid this, Expat only retries once + a significant amount of new data is available. This function allows disabling + this behavior. +

    -

    Miscellaneous functions

    +

    + The enabled argument should be XML_TRUE or + XML_FALSE. +

    -

    The functions in this section either obtain state information from -the parser or can be used to dynamically set parser options.

    +

    + Returns XML_TRUE on success, and XML_FALSE on error. +

    +
    -

    XML_SetUserData

    -
    +      

    + Miscellaneous functions +

    + +

    + The functions in this section either obtain state information from the parser or + can be used to dynamically set parser options. +

    + +

    + XML_SetUserData +

    + +
     void XMLCALL
     XML_SetUserData(XML_Parser p,
                     void *userData);
     
    -
    -This sets the user data pointer that gets passed to handlers. It -overwrites any previous value for this pointer. Note that the -application is responsible for freeing the memory associated with -userData when it is finished with the parser. So if you -call this when there's already a pointer there, and you haven't freed -the memory associated with it, then you've probably just leaked -memory. -
    +
    + This sets the user data pointer that gets passed to handlers. It overwrites any + previous value for this pointer. Note that the application is responsible for + freeing the memory associated with userData when it is finished with + the parser. So if you call this when there's already a pointer there, and you + haven't freed the memory associated with it, then you've probably just leaked + memory. +
    -

    XML_GetUserData

    -
    +      

    + XML_GetUserData +

    + +
     void * XMLCALL
     XML_GetUserData(XML_Parser p);
     
    -
    -This returns the user data pointer that gets passed to handlers. -It is actually implemented as a macro. -
    +
    + This returns the user data pointer that gets passed to handlers. It is actually + implemented as a macro. +
    -

    XML_UseParserAsHandlerArg

    -
    +      

    + XML_UseParserAsHandlerArg +

    + +
     void XMLCALL
     XML_UseParserAsHandlerArg(XML_Parser p);
     
    -
    -After this is called, handlers receive the parser in their -userData arguments. The user data can still be obtained -using the XML_GetUserData function. -
    +
    + After this is called, handlers receive the parser in their userData + arguments. The user data can still be obtained using the XML_GetUserData function. +
    -

    XML_SetBase

    -
    +      

    + XML_SetBase +

    + +
     enum XML_Status XMLCALL
     XML_SetBase(XML_Parser p,
                 const XML_Char *base);
     
    -
    -Set the base to be used for resolving relative URIs in system -identifiers. The return value is XML_STATUS_ERROR if -there's no memory to store base, otherwise it's -XML_STATUS_OK. -
    +
    + Set the base to be used for resolving relative URIs in system identifiers. The + return value is XML_STATUS_ERROR if there's no memory to store base, + otherwise it's XML_STATUS_OK. +
    -

    XML_GetBase

    -
    +      

    + XML_GetBase +

    + +
     const XML_Char * XMLCALL
     XML_GetBase(XML_Parser p);
     
    -
    -Return the base for resolving relative URIs. -
    +
    + Return the base for resolving relative URIs. +
    -

    XML_GetSpecifiedAttributeCount

    -
    +      

    + XML_GetSpecifiedAttributeCount +

    + +
     int XMLCALL
     XML_GetSpecifiedAttributeCount(XML_Parser p);
     
    -
    -When attributes are reported to the start handler in the atts vector, -attributes that were explicitly set in the element occur before any -attributes that receive their value from default information in an -ATTLIST declaration. This function returns the number of attributes -that were explicitly set times two, thus giving the offset in the -atts array passed to the start tag handler of the first -attribute set due to defaults. It supplies information for the last -call to a start handler. If called inside a start handler, then that -means the current call. -
    +
    + When attributes are reported to the start handler in the atts vector, attributes + that were explicitly set in the element occur before any attributes that receive + their value from default information in an ATTLIST declaration. This function + returns the number of attributes that were explicitly set times two, thus giving + the offset in the atts array passed to the start tag handler of the + first attribute set due to defaults. It supplies information for the last call to + a start handler. If called inside a start handler, then that means the current + call. +
    -

    XML_GetIdAttributeIndex

    -
    +      

    + XML_GetIdAttributeIndex +

    + +
     int XMLCALL
     XML_GetIdAttributeIndex(XML_Parser p);
     
    -
    -Returns the index of the ID attribute passed in the atts array in the -last call to XML_StartElementHandler, or -1 if there is no ID -attribute. If called inside a start handler, then that means the -current call. -
    +
    + Returns the index of the ID attribute passed in the atts array in the last call + to XML_StartElementHandler, + or -1 if there is no ID attribute. If called inside a start handler, then that + means the current call. +
    -

    XML_GetAttributeInfo

    -
    +      

    + XML_GetAttributeInfo +

    + +
     const XML_AttrInfo * XMLCALL
     XML_GetAttributeInfo(XML_Parser parser);
     
    -
    +
    +      
     typedef struct {
       XML_Index  nameStart;  /* Offset to beginning of the attribute name. */
       XML_Index  nameEnd;    /* Offset after the attribute name's last byte. */
    @@ -2518,188 +3391,240 @@ typedef struct {
       XML_Index  valueEnd;   /* Offset after the attribute value's last byte. */
     } XML_AttrInfo;
     
    -
    -Returns an array of XML_AttrInfo structures for the -attribute/value pairs passed in the last call to the -XML_StartElementHandler that were specified -in the start-tag rather than defaulted. Each attribute/value pair counts -as 1; thus the number of entries in the array is -XML_GetSpecifiedAttributeCount(parser) / 2. -
    +
    + Returns an array of XML_AttrInfo structures for the attribute/value + pairs passed in the last call to the XML_StartElementHandler that + were specified in the start-tag rather than defaulted. Each attribute/value pair + counts as 1; thus the number of entries in the array is + XML_GetSpecifiedAttributeCount(parser) / 2. +
    -

    XML_SetEncoding

    -
    +      

    + XML_SetEncoding +

    + +
     enum XML_Status XMLCALL
     XML_SetEncoding(XML_Parser p,
                     const XML_Char *encoding);
     
    -
    -Set the encoding to be used by the parser. It is equivalent to -passing a non-NULL encoding argument to the parser creation functions. -It must not be called after XML_Parse or XML_ParseBuffer have been called on the given parser. -Returns XML_STATUS_OK on success or -XML_STATUS_ERROR on error. -
    +
    + Set the encoding to be used by the parser. It is equivalent to passing a + non-NULL encoding argument to the parser creation functions. It must + not be called after XML_Parse or + XML_ParseBuffer have been called on + the given parser. Returns XML_STATUS_OK on success or + XML_STATUS_ERROR on error. +
    -

    XML_SetParamEntityParsing

    -
    +      

    + XML_SetParamEntityParsing +

    + +
     int XMLCALL
     XML_SetParamEntityParsing(XML_Parser p,
                               enum XML_ParamEntityParsing code);
     
    -
    -This enables parsing of parameter entities, including the external -parameter entity that is the external DTD subset, according to -code. -The choices for code are: -
      -
    • XML_PARAM_ENTITY_PARSING_NEVER
    • -
    • XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
    • -
    • XML_PARAM_ENTITY_PARSING_ALWAYS
    • -
    -Note: If XML_SetParamEntityParsing is called after -XML_Parse or XML_ParseBuffer, then it has -no effect and will always return 0. -
    +
    + This enables parsing of parameter entities, including the external parameter + entity that is the external DTD subset, according to code. The + choices for code are: +
      +
    • + XML_PARAM_ENTITY_PARSING_NEVER +
    • -

      XML_SetHashSalt

      -
      +          
    • + XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE +
    • + +
    • + XML_PARAM_ENTITY_PARSING_ALWAYS +
    • +
    + Note: If XML_SetParamEntityParsing is called after + XML_Parse or XML_ParseBuffer, then it has no effect and + will always return 0. +
    + +

    + XML_SetHashSalt +

    + +
     int XMLCALL
     XML_SetHashSalt(XML_Parser p,
                     unsigned long hash_salt);
     
    -
    -Sets the hash salt to use for internal hash calculations. -Helps in preventing DoS attacks based on predicting hash -function behavior. In order to have an effect this must be called -before parsing has started. Returns 1 if successful, 0 when called -after XML_Parse or XML_ParseBuffer. -

    Note: This call is optional, as the parser will auto-generate -a new random salt value if no value has been set at the start of parsing.

    -

    Note: One should not call XML_SetHashSalt with a -hash salt value of 0, as this value is used as sentinel value to indicate -that XML_SetHashSalt has not been called. Consequently -such a call will have no effect, even if it returns 1.

    -
    +
    + Sets the hash salt to use for internal hash calculations. Helps in preventing DoS + attacks based on predicting hash function behavior. In order to have an effect + this must be called before parsing has started. Returns 1 if successful, 0 when + called after XML_Parse or XML_ParseBuffer. +

    + Note: This call is optional, as the parser will auto-generate a new + random salt value if no value has been set at the start of parsing. +

    -

    XML_UseForeignDTD

    -
    +        

    + Note: One should not call XML_SetHashSalt with a hash salt + value of 0, as this value is used as sentinel value to indicate that + XML_SetHashSalt has not been called. Consequently such a + call will have no effect, even if it returns 1. +

    +
    + +

    + XML_UseForeignDTD +

    + +
     enum XML_Error XMLCALL
     XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD);
     
    -
    -

    This function allows an application to provide an external subset -for the document type declaration for documents which do not specify -an external subset of their own. For documents which specify an -external subset in their DOCTYPE declaration, the application-provided -subset will be ignored. If the document does not contain a DOCTYPE -declaration at all and useDTD is true, the -application-provided subset will be parsed, but the -startDoctypeDeclHandler and -endDoctypeDeclHandler functions, if set, will not be -called. The setting of parameter entity parsing, controlled using -XML_SetParamEntityParsing, will be honored.

    +
    +

    + This function allows an application to provide an external subset for the + document type declaration for documents which do not specify an external subset + of their own. For documents which specify an external subset in their DOCTYPE + declaration, the application-provided subset will be ignored. If the document + does not contain a DOCTYPE declaration at all and useDTD is true, + the application-provided subset will be parsed, but the + startDoctypeDeclHandler and endDoctypeDeclHandler + functions, if set, will not be called. The setting of parameter entity parsing, + controlled using XML_SetParamEntityParsing, will be + honored. +

    -

    The application-provided external subset is read by calling the -external entity reference handler set via XML_SetExternalEntityRefHandler with both -publicId and systemId set to NULL.

    +

    + The application-provided external subset is read by calling the external entity + reference handler set via XML_SetExternalEntityRefHandler + with both publicId and systemId set to + NULL. +

    -

    If this function is called after parsing has begun, it returns -XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING and ignores -useDTD. If called when Expat has been compiled without -DTD support, it returns -XML_ERROR_FEATURE_REQUIRES_XML_DTD. Otherwise, it -returns XML_ERROR_NONE.

    +

    + If this function is called after parsing has begun, it returns + XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING and ignores + useDTD. If called when Expat has been compiled without DTD + support, it returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. Otherwise, + it returns XML_ERROR_NONE. +

    -

    Note: For the purpose of checking WFC: Entity Declared, passing -useDTD == XML_TRUE will make the parser behave as if -the document had a DTD with an external subset. This holds true even if -the external entity reference handler returns without action.

    -
    +

    + Note: For the purpose of checking WFC: Entity Declared, passing + useDTD == XML_TRUE will make the parser behave as if the document + had a DTD with an external subset. This holds true even if the external entity + reference handler returns without action. +

    +
    -

    XML_SetReturnNSTriplet

    -
    +      

    + XML_SetReturnNSTriplet +

    + +
     void XMLCALL
     XML_SetReturnNSTriplet(XML_Parser parser,
                            int        do_nst);
     
    -
    -

    -This function only has an effect when using a parser created with -XML_ParserCreateNS, -i.e. when namespace processing is in effect. The do_nst -sets whether or not prefixes are returned with names qualified with a -namespace prefix. If this function is called with do_nst -non-zero, then afterwards namespace qualified names (that is qualified -with a prefix as opposed to belonging to a default namespace) are -returned as a triplet with the three parts separated by the namespace -separator specified when the parser was created. The order of -returned parts is URI, local name, and prefix.

    If -do_nst is zero, then namespaces are reported in the -default manner, URI then local_name separated by the namespace -separator.

    -
    +
    +

    + This function only has an effect when using a parser created with + XML_ParserCreateNS, i.e. when + namespace processing is in effect. The do_nst sets whether or not + prefixes are returned with names qualified with a namespace prefix. If this + function is called with do_nst non-zero, then afterwards namespace + qualified names (that is qualified with a prefix as opposed to belonging to a + default namespace) are returned as a triplet with the three parts separated by + the namespace separator specified when the parser was created. The order of + returned parts is URI, local name, and prefix. +

    -

    XML_DefaultCurrent

    -
    +        

    + If do_nst is zero, then namespaces are reported in the default + manner, URI then local_name separated by the namespace separator. +

    +
    + +

    + XML_DefaultCurrent +

    + +
     void XMLCALL
     XML_DefaultCurrent(XML_Parser parser);
     
    -
    -This can be called within a handler for a start element, end element, -processing instruction or character data. It causes the corresponding -markup to be passed to the default handler set by XML_SetDefaultHandler or -XML_SetDefaultHandlerExpand. It does nothing if there is -not a default handler. -
    +
    + This can be called within a handler for a start element, end element, processing + instruction or character data. It causes the corresponding markup to be passed to + the default handler set by XML_SetDefaultHandler or XML_SetDefaultHandlerExpand. It does + nothing if there is not a default handler. +
    -

    XML_ExpatVersion

    -
    +      

    + XML_ExpatVersion +

    + +
     XML_LChar * XMLCALL
     XML_ExpatVersion();
     
    -
    -Return the library version as a string (e.g. "expat_1.95.1"). -
    +
    + Return the library version as a string (e.g. "expat_1.95.1"). +
    -

    XML_ExpatVersionInfo

    -
    +      

    + XML_ExpatVersionInfo +

    + +
     struct XML_Expat_Version XMLCALL
     XML_ExpatVersionInfo();
     
    -
    +
    +      
     typedef struct {
       int major;
       int minor;
       int micro;
     } XML_Expat_Version;
     
    -
    -Return the library version information as a structure. -Some macros are also defined that support compile-time tests of the -library version: -
      -
    • XML_MAJOR_VERSION
    • -
    • XML_MINOR_VERSION
    • -
    • XML_MICRO_VERSION
    • -
    -Testing these constants is currently the best way to determine if -particular parts of the Expat API are available. -
    +
    + Return the library version information as a structure. Some macros are also + defined that support compile-time tests of the library version: +
      +
    • + XML_MAJOR_VERSION +
    • -

      XML_GetFeatureList

      -
      +          
    • + XML_MINOR_VERSION +
    • + +
    • + XML_MICRO_VERSION +
    • +
    + Testing these constants is currently the best way to determine if particular + parts of the Expat API are available. +
    + +

    + XML_GetFeatureList +

    + +
     const XML_Feature * XMLCALL
     XML_GetFeatureList();
     
    -
    +
    +      
     enum XML_FeatureEnum {
       XML_FEATURE_END = 0,
       XML_FEATURE_UNICODE,
    @@ -2719,114 +3644,140 @@ typedef struct {
       long int              value;
     } XML_Feature;
     
    -
    -

    Returns a list of "feature" records, providing details on how -Expat was configured at compile time. Most applications should not -need to worry about this, but this information is otherwise not -available from Expat. This function allows code that does need to -check these features to do so at runtime.

    +
    +

    + Returns a list of "feature" records, providing details on how Expat was + configured at compile time. Most applications should not need to worry about + this, but this information is otherwise not available from Expat. This function + allows code that does need to check these features to do so at runtime. +

    -

    The return value is an array of XML_Feature, -terminated by a record with a feature of -XML_FEATURE_END and name of NULL, -identifying the feature-test macros Expat was compiled with. Since an -application that requires this kind of information needs to determine -the type of character the name points to, records for the -XML_FEATURE_SIZEOF_XML_CHAR and -XML_FEATURE_SIZEOF_XML_LCHAR will be located at the -beginning of the list, followed by XML_FEATURE_UNICODE -and XML_FEATURE_UNICODE_WCHAR_T, if they are present at -all.

    +

    + The return value is an array of XML_Feature, terminated by a + record with a feature of XML_FEATURE_END and + name of NULL, identifying the feature-test macros + Expat was compiled with. Since an application that requires this kind of + information needs to determine the type of character the name + points to, records for the XML_FEATURE_SIZEOF_XML_CHAR and + XML_FEATURE_SIZEOF_XML_LCHAR will be located at the beginning of + the list, followed by XML_FEATURE_UNICODE and + XML_FEATURE_UNICODE_WCHAR_T, if they are present at all. +

    -

    Some features have an associated value. If there isn't an -associated value, the value field is set to 0. At this -time, the following features have been defined to have values:

    +

    + Some features have an associated value. If there isn't an associated value, the + value field is set to 0. At this time, the following features have + been defined to have values: +

    -
    -
    XML_FEATURE_SIZEOF_XML_CHAR
    -
    The number of bytes occupied by one XML_Char - character.
    -
    XML_FEATURE_SIZEOF_XML_LCHAR
    -
    The number of bytes occupied by one XML_LChar - character.
    -
    XML_FEATURE_CONTEXT_BYTES
    -
    The maximum number of characters of context which can be - reported by XML_GetInputContext.
    -
    -
    +
    +
    + XML_FEATURE_SIZEOF_XML_CHAR +
    -

    XML_FreeContentModel

    -
    +          
    + The number of bytes occupied by one XML_Char character. +
    + +
    + XML_FEATURE_SIZEOF_XML_LCHAR +
    + +
    + The number of bytes occupied by one XML_LChar character. +
    + +
    + XML_FEATURE_CONTEXT_BYTES +
    + +
    + The maximum number of characters of context which can be reported by + XML_GetInputContext. +
    +
    +
    + +

    + XML_FreeContentModel +

    + +
     void XMLCALL
     XML_FreeContentModel(XML_Parser parser, XML_Content *model);
     
    -
    -Function to deallocate the model argument passed to the -XML_ElementDeclHandler callback set using XML_ElementDeclHandler. -This function should not be used for any other purpose. -
    +
    + Function to deallocate the model argument passed to the + XML_ElementDeclHandler callback set using XML_ElementDeclHandler. This function + should not be used for any other purpose. +
    -

    The following functions allow external code to share the memory -allocator an XML_Parser has been configured to use. This -is especially useful for third-party libraries that interact with a -parser object created by application code, or heavily layered -applications. This can be essential when using dynamically loaded -libraries which use different C standard libraries (this can happen on -Windows, at least).

    +

    + The following functions allow external code to share the memory allocator an + XML_Parser has been configured to use. This is especially useful for + third-party libraries that interact with a parser object created by application + code, or heavily layered applications. This can be essential when using + dynamically loaded libraries which use different C standard libraries (this can + happen on Windows, at least). +

    -

    XML_MemMalloc

    -
    +      

    + XML_MemMalloc +

    + +
     void * XMLCALL
     XML_MemMalloc(XML_Parser parser, size_t size);
     
    -
    -Allocate size bytes of memory using the allocator the -parser object has been configured to use. Returns a -pointer to the memory or NULL on failure. Memory allocated in this -way must be freed using XML_MemFree. -
    +
    + Allocate size bytes of memory using the allocator the + parser object has been configured to use. Returns a pointer to the + memory or NULL on failure. Memory allocated in this way must be + freed using XML_MemFree. +
    -

    XML_MemRealloc

    -
    +      

    + XML_MemRealloc +

    + +
     void * XMLCALL
     XML_MemRealloc(XML_Parser parser, void *ptr, size_t size);
     
    -
    -Allocate size bytes of memory using the allocator the -parser object has been configured to use. -ptr must point to a block of memory allocated by XML_MemMalloc or -XML_MemRealloc, or be NULL. This function tries to -expand the block pointed to by ptr if possible. Returns -a pointer to the memory or NULL on failure. On success, the original -block has either been expanded or freed. On failure, the original -block has not been freed; the caller is responsible for freeing the -original block. Memory allocated in this way must be freed using -XML_MemFree. -
    +
    + Allocate size bytes of memory using the allocator the + parser object has been configured to use. ptr must + point to a block of memory allocated by XML_MemMalloc or XML_MemRealloc, or be + NULL. This function tries to expand the block pointed to by + ptr if possible. Returns a pointer to the memory or + NULL on failure. On success, the original block has either been + expanded or freed. On failure, the original block has not been freed; the caller + is responsible for freeing the original block. Memory allocated in this way must + be freed using XML_MemFree. +
    -

    XML_MemFree

    -
    +      

    + XML_MemFree +

    + +
     void XMLCALL
     XML_MemFree(XML_Parser parser, void *ptr);
     
    -
    -Free a block of memory pointed to by ptr. The block must -have been allocated by XML_MemMalloc or XML_MemRealloc, or be NULL. -
    +
    + Free a block of memory pointed to by ptr. The block must have been + allocated by XML_MemMalloc or + XML_MemRealloc, or be NULL. +
    -
    +
    - - -
    - + +
    + diff --git a/doc/xmlwf.1 b/doc/xmlwf.1 index aa2e9c21800..75318fccc85 100644 --- a/doc/xmlwf.1 +++ b/doc/xmlwf.1 @@ -5,7 +5,7 @@ \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "September 24, 2025" "" "" +.TH XMLWF 1 "March 17, 2026" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS @@ -97,7 +97,7 @@ The amplification factor is calculated as .. .nf - amplification := (direct + indirect) / direct + amplification := (direct + indirect) / direct .fi @@ -105,7 +105,7 @@ The amplification factor is calculated as .. .nf - amplification := allocated / direct + amplification := allocated / direct .fi @@ -235,7 +235,7 @@ the operating system reporting memory in a strange way; there is not a leak in \fBxmlwf\fR. .TP \*(T<\fB\-s\fR\*(T> -Prints an error if the document is not standalone. +Prints an error if the document is not standalone. A document is standalone if it has no external subset and no references to parameter entities. .TP @@ -261,6 +261,7 @@ page. See also \*(T<\fB\-e\fR\*(T>. .TP \*(T<\fB\-x\fR\*(T> Turns on parsing external entities. +(CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).) Non-validating parsers are not required to resolve external entities, or even expand entities at all. @@ -275,6 +276,7 @@ This is an example of an internal entity: .nf + .fi And here are some examples of external entities: @@ -283,6 +285,7 @@ And here are some examples of external entities: (parsed) (unparsed) + .fi .TP \*(T<\fB\-\-\fR\*(T> @@ -293,6 +296,7 @@ starts with a hyphen. For example: .nf xmlwf \-\- \-myfile.xml + .fi will run \fBxmlwf\fR on the file @@ -307,7 +311,7 @@ input file cannot be opened, \fBxmlwf\fR prints a single line describing the problem to standard output. .PP If the \*(T<\fB\-k\fR\*(T> option is not provided, \fBxmlwf\fR -halts upon encountering a well-formedness or output-file error. +halts upon encountering a well-formedness or output-file error. If \*(T<\fB\-k\fR\*(T> is provided, \fBxmlwf\fR continues processing the remaining input files, describing problems found with any of them. .SH "EXIT STATUS" @@ -344,6 +348,7 @@ me, I'd like to add this information to this manpage. The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC\-xml\-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack + .fi .SH AUTHOR This manual page was originally written by Scott Bronson <\*(T> diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml index 01316bb1662..c4fe92d44fb 100644 --- a/doc/xmlwf.xml +++ b/doc/xmlwf.xml @@ -9,7 +9,7 @@ Copyright (c) 2001 Scott Bronson Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2009 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Ardo van Rangelrooij Copyright (c) 2017 Rhodri James Copyright (c) 2020 Joe Orton @@ -21,7 +21,7 @@ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ Scott"> Bronson"> - September 24, 2025"> + March 17, 2026"> 1"> bronson@rinspin.com"> @@ -29,8 +29,8 @@ XMLWF"> - Debian GNU/Linux"> GNU"> + Debian &gnu;/Linux"> ]> @@ -84,73 +84,77 @@ DESCRIPTION - &dhpackage; uses the Expat library to - determine if an XML document is well-formed. It is - non-validating. - - - - If you do not specify any files on the command-line, and you - have a recent version of &dhpackage;, the - input file will be read from standard input. - + &dhpackage; uses the Expat library to + determine if an XML document is well-formed. It is + non-validating. + + + If you do not specify any files on the command-line, and you + have a recent version of &dhpackage;, the + input file will be read from standard input. + WELL-FORMED DOCUMENTS - - - A well-formed document must adhere to the - following rules: - - - - - The file begins with an XML declaration. For instance, - <?xml version="1.0" standalone="yes"?>. - NOTE: - &dhpackage; does not currently - check for a valid XML declaration. - - - Every start tag is either empty (<tag/>) - or has a corresponding end tag. - - - There is exactly one root element. This element must contain - all other elements in the document. Only comments, white - space, and processing instructions may come after the close - of the root element. - - - All elements nest properly. - - - All attribute values are enclosed in quotes (either single - or double). - + + A well-formed document must adhere to the + following rules: + + + + + The file begins with an XML declaration. For instance, + <?xml version="1.0" standalone="yes"?>. + NOTE: + &dhpackage; does not currently + check for a valid XML declaration. + + + + + Every start tag is either empty (<tag/>) + or has a corresponding end tag. + + + + + There is exactly one root element. This element must contain + all other elements in the document. Only comments, white + space, and processing instructions may come after the close + of the root element. + + + + + All elements nest properly. + + + + + All attribute values are enclosed in quotes (either single + or double). + + - - - If the document has a DTD, and it strictly complies with that - DTD, then the document is also considered valid. - &dhpackage; is a non-validating parser -- - it does not check the DTD. However, it does support - external entities (see the option). - + + If the document has a DTD, and it strictly complies with that + DTD, then the document is also considered valid. + &dhpackage; is a non-validating parser -- + it does not check the DTD. However, it does support + external entities (see the option). + OPTIONS - - -When an option includes an argument, you may specify the argument either -separately (" output") or concatenated with the -option ("output"). &dhpackage; -supports both. - - + + When an option includes an argument, you may specify the argument either + separately (" output") or concatenated with the + option ("output"). &dhpackage; + supports both. + @@ -166,13 +170,13 @@ supports both. The amplification factor is calculated as .. - amplification := (direct + indirect) / direct + amplification := (direct + indirect) / direct .. with regard to use of entities and .. - amplification := allocated / direct + amplification := allocated / direct .. with regard to dynamic memory while parsing. @@ -214,60 +218,60 @@ supports both. - - If the input file is well-formed and &dhpackage; - doesn't encounter any errors, the input file is simply copied to - the output directory unchanged. - This implies no namespaces (turns off ) and - requires to specify an output directory. - + + If the input file is well-formed and &dhpackage; + doesn't encounter any errors, the input file is simply copied to + the output directory unchanged. + This implies no namespaces (turns off ) and + requires to specify an output directory. + output-dir - - Specifies a directory to contain transformed - representations of the input files. - By default, outputs a canonical representation - (described below). - You can select different output formats using , - and . - - - The output filenames will - be exactly the same as the input filenames or "STDIN" if the input is - coming from standard input. Therefore, you must be careful that the - output file does not go into the same directory as the input - file. Otherwise, &dhpackage; will delete the - input file before it generates the output file (just like running - cat < file > file in most shells). - - - Two structurally equivalent XML documents have a byte-for-byte - identical canonical XML representation. - Note that ignorable white space is considered significant and - is treated equivalently to data. - More on canonical XML can be found at - http://www.jclark.com/xml/canonxml.html . - + + Specifies a directory to contain transformed + representations of the input files. + By default, outputs a canonical representation + (described below). + You can select different output formats using , + and . + + + The output filenames will + be exactly the same as the input filenames or "STDIN" if the input is + coming from standard input. Therefore, you must be careful that the + output file does not go into the same directory as the input + file. Otherwise, &dhpackage; will delete the + input file before it generates the output file (just like running + cat < file > file in most shells). + + + Two structurally equivalent XML documents have a byte-for-byte + identical canonical XML representation. + Note that ignorable white space is considered significant and + is treated equivalently to data. + More on canonical XML can be found at + http://www.jclark.com/xml/canonxml.html . + encoding - - Specifies the character encoding for the document, overriding - any document encoding declaration. &dhpackage; - supports four built-in encodings: - US-ASCII, - UTF-8, - UTF-16, and - ISO-8859-1. - Also see the option. - + + Specifies the character encoding for the document, overriding + any document encoding declaration. &dhpackage; + supports four built-in encodings: + US-ASCII, + UTF-8, + UTF-16, and + ISO-8859-1. + Also see the option. + @@ -312,21 +316,21 @@ supports both. - - Outputs some strange sort of XML file that completely - describes the input file, including character positions. - Requires to specify an output file. - + + Outputs some strange sort of XML file that completely + describes the input file, including character positions. + Requires to specify an output file. + - - Turns on namespace processing. (describe namespaces) - disables namespaces. - + + Turns on namespace processing. (describe namespaces) + disables namespaces. + @@ -334,9 +338,9 @@ supports both. - Adds a doctype and notation declarations to canonical XML output. - This matches the example output used by the formal XML test cases. - Requires to specify an output file. + Adds a doctype and notation declarations to canonical XML output. + This matches the example output used by the formal XML test cases. + Requires to specify an output file. @@ -344,15 +348,15 @@ supports both. - - Tells &dhpackage; to process external DTDs and parameter - entities. - - - Normally &dhpackage; never parses parameter - entities. tells it to always parse them. - implies . - + + Tells &dhpackage; to process external DTDs and parameter + entities. + + + Normally &dhpackage; never parses parameter + entities. tells it to always parse them. + implies . + @@ -369,47 +373,47 @@ supports both. - - Normally &dhpackage; memory-maps the XML file - before parsing; this can result in faster parsing on many - platforms. - turns off memory-mapping and uses normal file - IO calls instead. - Of course, memory-mapping is automatically turned off - when reading from standard input. - - - Use of memory-mapping can cause some platforms to report - substantially higher memory usage for - &dhpackage;, but this appears to be a matter of - the operating system reporting memory in a strange way; there is - not a leak in &dhpackage;. - + + Normally &dhpackage; memory-maps the XML file + before parsing; this can result in faster parsing on many + platforms. + turns off memory-mapping and uses normal file + IO calls instead. + Of course, memory-mapping is automatically turned off + when reading from standard input. + + + Use of memory-mapping can cause some platforms to report + substantially higher memory usage for + &dhpackage;, but this appears to be a matter of + the operating system reporting memory in a strange way; there is + not a leak in &dhpackage;. + - - Prints an error if the document is not standalone. - A document is standalone if it has no external subset and no - references to parameter entities. - + + Prints an error if the document is not standalone. + A document is standalone if it has no external subset and no + references to parameter entities. + - - Turns on timings. This tells Expat to parse the entire file, - but not perform any processing. - This gives a fairly accurate idea of the raw speed of Expat itself - without client overhead. - turns off most of the output options - (, , , ...). - + + Turns on timings. This tells Expat to parse the entire file, + but not perform any processing. + This gives a fairly accurate idea of the raw speed of Expat itself + without client overhead. + turns off most of the output options + (, , , ...). + @@ -417,104 +421,102 @@ supports both. - - Prints the version of the Expat library being used, including some - information on the compile-time configuration of the library, and - then exits. - + + Prints the version of the Expat library being used, including some + information on the compile-time configuration of the library, and + then exits. + - - Enables support for Windows code pages. - Normally, &dhpackage; will throw an error if it - runs across an encoding that it is not equipped to handle itself. With - , &dhpackage; will try to use a Windows code - page. See also . - + + Enables support for Windows code pages. + Normally, &dhpackage; will throw an error if it + runs across an encoding that it is not equipped to handle itself. With + , &dhpackage; will try to use a Windows code + page. See also . + - - Turns on parsing external entities. - - - Non-validating parsers are not required to resolve external - entities, or even expand entities at all. - Expat always expands internal entities (?), - but external entity parsing must be enabled explicitly. - - - External entities are simply entities that obtain their - data from outside the XML file currently being parsed. - - - This is an example of an internal entity: - + + Turns on parsing external entities. + (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).) + + + Non-validating parsers are not required to resolve external + entities, or even expand entities at all. + Expat always expands internal entities (?), + but external entity parsing must be enabled explicitly. + + + External entities are simply entities that obtain their + data from outside the XML file currently being parsed. + + + This is an example of an internal entity: + <!ENTITY vers '1.0.2'> - - - - And here are some examples of external entities: + + + + And here are some examples of external entities: - + <!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) - - - + + - - (Two hyphens.) - Terminates the list of options. This is only needed if a filename - starts with a hyphen. For example: - - + + (Two hyphens.) + Terminates the list of options. This is only needed if a filename + starts with a hyphen. For example: + + &dhpackage; -- -myfile.xml - - - will run &dhpackage; on the file - -myfile.xml. - + + + will run &dhpackage; on the file + -myfile.xml. + - - - Older versions of &dhpackage; do not support - reading from standard input. - - - - - OUTPUT - &dhpackage; outputs nothing for files which are problem-free. - If any input file is not well-formed, or if the output for any - input file cannot be opened, &dhpackage; prints a single - line describing the problem to standard output. - - - If the option is not provided, &dhpackage; - halts upon encountering a well-formedness or output-file error. - If is provided, &dhpackage; continues - processing the remaining input files, describing problems found with any of them. + Older versions of &dhpackage; do not support + reading from standard input. - EXIT STATUS + OUTPUT + &dhpackage; outputs nothing for files which are problem-free. + If any input file is not well-formed, or if the output for any + input file cannot be opened, &dhpackage; prints a single + line describing the problem to standard output. + + + If the option is not provided, &dhpackage; + halts upon encountering a well-formedness or output-file error. + If is provided, &dhpackage; continues + processing the remaining input files, describing problems found with any of them. + + + + + EXIT STATUS For options | or |, &dhpackage; always exits with status code 0. For other cases, the following exit status codes are returned: @@ -543,39 +545,37 @@ supports both. - + BUGS - - The errors should go to standard error, not standard output. - - - There should be a way to get to send its - output to standard output rather than forcing the user to send - it to a file. - - - I have no idea why anyone would want to use the - , , and - options. If someone could explain it to - me, I'd like to add this information to this manpage. - + + The errors should go to standard error, not standard output. + + + There should be a way to get to send its + output to standard output rather than forcing the user to send + it to a file. + + + I have no idea why anyone would want to use the + , , and + options. If someone could explain it to + me, I'd like to add this information to this manpage. + SEE ALSO - - - + + The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack - - - + + @@ -585,8 +585,8 @@ Billion laughs attack: https://en.wikipedia.org/wiki/Bi in December 2001 for the &debian; system (but may be used by others). Permission is granted to copy, distribute and/or modify this document under - the terms of the GNU Free Documentation + the terms of the &gnu; Free Documentation License, Version 1.1. - + diff --git a/examples/Makefile.in b/examples/Makefile.in index 0e55052ce6e..56a6f69c07b 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -321,6 +321,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/expat_config.h.in b/expat_config.h.in index 543db825244..7541bf6005e 100644 --- a/expat_config.h.in +++ b/expat_config.h.in @@ -33,9 +33,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H -/* Define to 1 if you have the 'bsd' library (-lbsd). */ -#undef HAVE_LIBBSD - /* Define to 1 if you have a working 'mmap' system call. */ #undef HAVE_MMAP diff --git a/fix-xmltest-log.sh b/fix-xmltest-log.sh index 4739acab6b0..4deafe53a7a 100755 --- a/fix-xmltest-log.sh +++ b/fix-xmltest-log.sh @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2019-2022 Sebastian Pipping +# Copyright (c) 2019-2026 Sebastian Pipping # Copyright (c) 2024 Dag-Erling Smørgrav # Licensed under the MIT license: # @@ -31,9 +31,10 @@ set -e +sed="$(type -P gsed sed false | head -n 1)" # e.g. for Solaris filename="${1:-tests/xmltest.log}" -sed -i.bak \ +exec "${sed}" -i.bak \ -e '# convert DOS line endings to Unix without resorting to dos2unix' \ -e $'s/\r//' \ \ diff --git a/lib/Makefile.am b/lib/Makefile.am index 1958f322f31..493077231c5 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -6,9 +6,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2024 Sebastian Pipping +# Copyright (c) 2017-2026 Sebastian Pipping # Copyright (c) 2017 Tomasz Kłoczko # Copyright (c) 2019 David Loffredo +# Copyright (c) 2026 Gordon Messmer # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -45,6 +46,9 @@ libexpat_la_LDFLAGS = \ @LIBM@ \ -no-undefined \ -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ +if HAVE_VSCRIPT +libexpat_la_LDFLAGS += $(VSCRIPT_LDFLAGS),@builddir@/libexpat.map +endif libexpat_la_SOURCES = \ xmlparse.c \ diff --git a/lib/Makefile.in b/lib/Makefile.in index d85f80dbdbb..d8e4fd59e11 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -22,9 +22,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2024 Sebastian Pipping +# Copyright (c) 2017-2026 Sebastian Pipping # Copyright (c) 2017 Tomasz Kłoczko # Copyright (c) 2019 David Loffredo +# Copyright (c) 2026 Gordon Messmer # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -124,6 +125,7 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ +@HAVE_VSCRIPT_TRUE@am__append_1 = $(VSCRIPT_LDFLAGS),@builddir@/libexpat.map subdir = lib ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ @@ -146,7 +148,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/expat_config.h -CONFIG_CLEAN_FILES = +CONFIG_CLEAN_FILES = libexpat.map CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ @@ -259,7 +261,7 @@ am__define_uniq_tagged_files = \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` -am__DIST_COMMON = $(srcdir)/Makefile.in \ +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/libexpat.map.in \ $(top_srcdir)/conftools/depcomp DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ @@ -358,6 +360,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -421,12 +426,8 @@ include_HEADERS = \ lib_LTLIBRARIES = libexpat.la @WITH_TESTS_TRUE@noinst_LTLIBRARIES = libtestpat.la -libexpat_la_LDFLAGS = \ - @AM_LDFLAGS@ \ - @LIBM@ \ - -no-undefined \ - -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ - +libexpat_la_LDFLAGS = @AM_LDFLAGS@ @LIBM@ -no-undefined -version-info \ + @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ $(am__append_1) libexpat_la_SOURCES = \ xmlparse.c \ xmltok.c \ @@ -490,6 +491,8 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): +libexpat.map: $(top_builddir)/config.status $(srcdir)/libexpat.map.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) diff --git a/lib/expat.h b/lib/expat.h index 290dfeb0f6d..18dbaebde29 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -11,7 +11,7 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James @@ -1082,7 +1082,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ # define XML_MAJOR_VERSION 2 # define XML_MINOR_VERSION 7 -# define XML_MICRO_VERSION 3 +# define XML_MICRO_VERSION 5 # ifdef __cplusplus } diff --git a/lib/expat_external.h b/lib/expat_external.h index 96f955eefb6..d9ddeb612f6 100644 --- a/lib/expat_external.h +++ b/lib/expat_external.h @@ -12,7 +12,7 @@ Copyright (c) 2001-2002 Greg Stein Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2016 Cristian Rodríguez - Copyright (c) 2016-2019 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2018 Yury Gribov Licensed under the MIT license: @@ -88,8 +88,7 @@ # ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) \ - && ! defined(__CYGWIN__) +# if defined(_MSC_VER) && ! defined(__BEOS__) && ! defined(__CYGWIN__) # define XMLIMPORT __declspec(dllimport) # endif diff --git a/lib/internal.h b/lib/internal.h index 8f5edf48ef7..61266ebb772 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -128,7 +128,7 @@ # elif ULONG_MAX == 18446744073709551615u // 2^64-1 # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" -# elif defined(EMSCRIPTEN) // 32bit mode Emscripten +# elif defined(__wasm32__) // 32bit mode Emscripten or WASI SDK # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu" # else diff --git a/lib/libexpat.map.in b/lib/libexpat.map.in new file mode 100644 index 00000000000..52e59ed3d93 --- /dev/null +++ b/lib/libexpat.map.in @@ -0,0 +1,119 @@ +LIBEXPAT_1.0.0 { + global: + XML_DefaultCurrent; + XML_ErrorString; + XML_ExternalEntityParserCreate; + XML_GetBase; + XML_GetBuffer; + XML_GetCurrentByteIndex; + XML_GetCurrentColumnNumber; + XML_GetCurrentLineNumber; + XML_GetErrorCode; + XML_Parse; + XML_ParseBuffer; + XML_ParserCreate; + XML_ParserFree; + XML_SetBase; + XML_SetCharacterDataHandler; + XML_SetDefaultHandler; + XML_SetElementHandler; + XML_SetExternalEntityRefHandler; + XML_SetNotationDeclHandler; + XML_SetProcessingInstructionHandler; + XML_SetUnknownEncodingHandler; + XML_SetUnparsedEntityDeclHandler; + XML_SetUserData; + XML_UseParserAsHandlerArg; +}; + +LIBEXPAT_1.1.0 { + global: + XML_GetCurrentByteCount; + XML_GetSpecifiedAttributeCount; + XML_ParserCreateNS; + XML_SetCdataSectionHandler; + XML_SetCommentHandler; + XML_SetDefaultHandlerExpand; + XML_SetEncoding; + XML_SetExternalEntityRefHandlerArg; + XML_SetNamespaceDeclHandler; + XML_SetNotStandaloneHandler; +} LIBEXPAT_1.0.0; + +LIBEXPAT_1.95.0 { + global: + XML_ExpatVersion; + XML_GetIdAttributeIndex; + XML_GetInputContext; + XML_ParserCreate_MM; + XML_SetAttlistDeclHandler; + XML_SetDoctypeDeclHandler; + XML_SetElementDeclHandler; + XML_SetEndCdataSectionHandler; + XML_SetEndDoctypeDeclHandler; + XML_SetEndElementHandler; + XML_SetEndNamespaceDeclHandler; + XML_SetEntityDeclHandler; + XML_SetParamEntityParsing; + XML_SetReturnNSTriplet; + XML_SetStartCdataSectionHandler; + XML_SetStartDoctypeDeclHandler; + XML_SetStartElementHandler; + XML_SetStartNamespaceDeclHandler; + XML_SetXmlDeclHandler; +} LIBEXPAT_1.1.0; + +LIBEXPAT_1.95.3 { + global: + XML_ExpatVersionInfo; + XML_ParserReset; +} LIBEXPAT_1.95.0; + +LIBEXPAT_1.95.4 { + global: + XML_SetSkippedEntityHandler; +} LIBEXPAT_1.95.3; + +LIBEXPAT_1.95.5 { + global: + XML_GetFeatureList; + XML_UseForeignDTD; +} LIBEXPAT_1.95.4; + +LIBEXPAT_1.95.6 { + global: + XML_FreeContentModel; + XML_MemFree; + XML_MemMalloc; + XML_MemRealloc; +} LIBEXPAT_1.95.5; + +LIBEXPAT_1.95.8 { + global: + XML_GetParsingStatus; + XML_ResumeParser; + XML_StopParser; +} LIBEXPAT_1.95.6; + +LIBEXPAT_2.1.0 { + global: +@_EXPAT_COMMENT_ATTR_INFO@ XML_GetAttributeInfo; + XML_SetHashSalt; +} LIBEXPAT_1.95.8; + +LIBEXPAT_2.4.0 { + global: +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionActivationThreshold; +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionMaximumAmplification; +} LIBEXPAT_2.1.0; + +LIBEXPAT_2.6.0 { + global: + XML_SetReparseDeferralEnabled; +} LIBEXPAT_2.4.0; + +LIBEXPAT_2.7.2 { + global: +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerActivationThreshold; +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerMaximumAmplification; +} LIBEXPAT_2.6.0; diff --git a/lib/xmlparse.c b/lib/xmlparse.c index a187a3a18f1..0248b6651ff 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) +/* 93c1caa66e2b0310459482516af05505b57c5cb7b96df777105308fc585c85d1 (2.7.5+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie Copyright (c) 2016 Eric Rahm - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Gaurav Copyright (c) 2016 Thomas Beutlich Copyright (c) 2016 Gustavo Grieco @@ -42,6 +42,9 @@ Copyright (c) 2024-2025 Berkay Eren Ürün Copyright (c) 2024 Hanno Böck Copyright (c) 2025 Matthew Fernandez + Copyright (c) 2025 Atrem Borovik + Copyright (c) 2025 Alfonso Gregory + Copyright (c) 2026 Rosen Penev Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -101,7 +104,7 @@ #include /* INT_MAX, UINT_MAX */ #include /* fprintf */ #include /* getenv, rand_s */ -#include /* uintptr_t */ +#include /* SIZE_MAX, uintptr_t */ #include /* isnan */ #ifdef _WIN32 @@ -134,11 +137,6 @@ # endif /* defined(GRND_NONBLOCK) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ -#if defined(HAVE_LIBBSD) \ - && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) -# include -#endif - #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 #endif @@ -155,8 +153,6 @@ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ - * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ - * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ @@ -311,8 +307,11 @@ typedef struct tag { const char *rawName; /* tagName in the original encoding */ int rawNameLength; TAG_NAME name; /* tagName in the API encoding */ - char *buf; /* buffer for name components */ - char *bufEnd; /* end of the buffer */ + union { + char *raw; /* for byte-level access (rawName storage) */ + XML_Char *str; /* for character-level access (converted name) */ + } buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; @@ -349,7 +348,7 @@ typedef struct { typedef struct block { struct block *next; int size; - XML_Char s[1]; + XML_Char s[]; } BLOCK; typedef struct { @@ -591,6 +590,8 @@ static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool, + const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, @@ -1230,8 +1231,11 @@ generate_hash_secret_salt(XML_Parser parser) { # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ + entropy = gather_time_entropy(); +# if ! defined(__wasi__) /* Process ID is 0 bits entropy if attacker has local access */ - entropy = gather_time_entropy() ^ getpid(); + entropy ^= getpid(); +# endif /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ if (sizeof(unsigned long) == 4) { @@ -1754,6 +1758,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, XML_ExternalEntityRefHandler oldExternalEntityRefHandler; XML_SkippedEntityHandler oldSkippedEntityHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler; + void *oldUnknownEncodingHandlerData; XML_ElementDeclHandler oldElementDeclHandler; XML_AttlistDeclHandler oldAttlistDeclHandler; XML_EntityDeclHandler oldEntityDeclHandler; @@ -1799,6 +1804,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; oldSkippedEntityHandler = parser->m_skippedEntityHandler; oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData; oldElementDeclHandler = parser->m_elementDeclHandler; oldAttlistDeclHandler = parser->m_attlistDeclHandler; oldEntityDeclHandler = parser->m_entityDeclHandler; @@ -1859,6 +1865,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; parser->m_skippedEntityHandler = oldSkippedEntityHandler; parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData; parser->m_elementDeclHandler = oldElementDeclHandler; parser->m_attlistDeclHandler = oldAttlistDeclHandler; parser->m_entityDeclHandler = oldEntityDeclHandler; @@ -1934,7 +1941,7 @@ XML_ParserFree(XML_Parser parser) { } p = tagList; tagList = tagList->parent; - FREE(parser, p->buf); + FREE(parser, p->buf.raw); destroyBindings(p->bindings, parser); FREE(parser, p); } @@ -2599,7 +2606,7 @@ XML_GetBuffer(XML_Parser parser, int len) { // NOTE: We are avoiding MALLOC(..) here to leave limiting // the input size to the application using Expat. newBuf = parser->m_mem.malloc_fcn(bufferSize); - if (newBuf == 0) { + if (newBuf == NULL) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } @@ -3126,7 +3133,7 @@ storeRawNames(XML_Parser parser) { size_t bufSize; size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; - char *rawNameBuf = tag->buf + nameLen; + char *rawNameBuf = tag->buf.raw + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a @@ -3142,22 +3149,22 @@ storeRawNames(XML_Parser parser) { if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; bufSize = nameLen + rawNameLen; - if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { - char *temp = REALLOC(parser, tag->buf, bufSize); + if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) { + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_FALSE; - /* if tag->name.str points to tag->buf (only when namespace + /* if tag->name.str points to tag->buf.str (only when namespace processing is off) then we have to update it */ - if (tag->name.str == (XML_Char *)tag->buf) + if (tag->name.str == tag->buf.str) tag->name.str = (XML_Char *)temp; /* if tag->name.localPart is set (when namespace processing is on) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) tag->name.localPart - = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); - tag->buf = temp; + = (XML_Char *)temp + (tag->name.localPart - tag->buf.str); + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; } @@ -3472,12 +3479,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, tag = MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; - tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); - if (! tag->buf) { + tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf.raw) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; } - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE; } tag->bindings = NULL; tag->parent = parser->m_tagStack; @@ -3490,31 +3497,32 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, { const char *rawNameEnd = tag->rawName + tag->rawNameLength; const char *fromPtr = tag->rawName; - toPtr = (XML_Char *)tag->buf; + toPtr = tag->buf.str; for (;;) { - int bufSize; int convLen; const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = (int)(toPtr - (XML_Char *)tag->buf); + convLen = (int)(toPtr - tag->buf.str); if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } - bufSize = (int)(tag->bufEnd - tag->buf) << 1; + if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw)) + return XML_ERROR_NO_MEMORY; + const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2; { - char *temp = REALLOC(parser, tag->buf, bufSize); + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; - tag->buf = temp; + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; toPtr = (XML_Char *)temp + convLen; } } } - tag->name.str = (XML_Char *)tag->buf; + tag->name.str = tag->buf.str; *toPtr = XML_T('\0'); result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); @@ -3878,7 +3886,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -3897,7 +3905,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ # if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -4073,7 +4081,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { + if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; @@ -4256,7 +4264,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4502,7 +4510,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4529,7 +4537,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -5080,7 +5088,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with - "= SIZE_MAX - if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; } #endif int *const new_scaff_index = REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); - if (new_scaff_index == NULL) + if (new_scaff_index == NULL) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; + } dtd->scaffIndex = new_scaff_index; } } else { @@ -6780,7 +6800,14 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NO_MEMORY; } - const char *next; + const char *next = entityTextPtr; + + /* Nothing to tokenize. */ + if (entityTextPtr >= entityTextEnd) { + result = XML_ERROR_NONE; + goto endEntityValue; + } + for (;;) { next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ @@ -7190,7 +7217,7 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { + if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7430,16 +7457,24 @@ setContext(XML_Parser parser, const XML_Char *context) { else { if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - prefix - = (PREFIX *)lookup(parser, &dtd->prefixes, - poolStart(&parser->m_tempPool), sizeof(PREFIX)); + const XML_Char *const prefixName = poolCopyStringNoFinish( + &dtd->pool, poolStart(&parser->m_tempPool)); + if (! prefixName) { + return XML_FALSE; + } + + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName, + sizeof(PREFIX)); + + const bool prefixNameUsed = prefix && prefix->name == prefixName; + if (prefixNameUsed) + poolFinish(&dtd->pool); + else + poolDiscard(&dtd->pool); + if (! prefix) return XML_FALSE; - if (prefix->name == poolStart(&parser->m_tempPool)) { - prefix->name = poolCopyString(&dtd->pool, prefix->name); - if (! prefix->name) - return XML_FALSE; - } + poolDiscard(&parser->m_tempPool); } for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); @@ -7666,8 +7701,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, * from -Wtype-limits on platforms where * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((size_t)oldE->nDefaultAtts - > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { + if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7869,7 +7903,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { unsigned long newMask = (unsigned long)newSize - 1; /* Detect and prevent integer overflow */ - if (newSize > (size_t)(-1) / sizeof(NAMED *)) { + if (newSize > SIZE_MAX / sizeof(NAMED *)) { return NULL; } @@ -8028,6 +8062,23 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) { return s; } +// A version of `poolCopyString` that does not call `poolFinish` +// and reverts any partial advancement upon failure. +static const XML_Char *FASTCALL +poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) { + const XML_Char *const original = s; + do { + if (! poolAppendChar(pool, *s)) { + // Revert any previously successful advancement + const ptrdiff_t advancedBy = s - original; + if (advancedBy > 0) + pool->ptr -= advancedBy; + return NULL; + } + } while (*s++); + return pool->start; +} + static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (! pool->ptr && ! poolGrow(pool)) { @@ -8105,7 +8156,7 @@ poolBytesToAllocateFor(int blockSize) { static XML_Bool FASTCALL poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { - if (pool->start == 0) { + if (pool->start == NULL) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; pool->blocks->next = NULL; @@ -8217,7 +8268,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { return -1; } #endif @@ -8244,7 +8295,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { + if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) { return -1; } #endif @@ -8294,15 +8345,15 @@ build_model(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { + if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) { return NULL; } - if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { + if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) { return NULL; } #endif if (dtd->scaffCount * sizeof(XML_Content) - > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { + > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) { return NULL; } diff --git a/lib/xmlrole.c b/lib/xmlrole.c index 2c48bf40867..b1dfb456e5d 100644 --- a/lib/xmlrole.c +++ b/lib/xmlrole.c @@ -12,10 +12,11 @@ Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2023 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo Copyright (c) 2021 Donghee Na + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -46,7 +47,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmlrole.h" #include "ascii.h" diff --git a/lib/xmltok.c b/lib/xmltok.c index 95d5e84b67f..f6e5f742c92 100644 --- a/lib/xmltok.c +++ b/lib/xmltok.c @@ -12,7 +12,7 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002-2016 Karl Waclawek Copyright (c) 2005-2009 Steven Solie - Copyright (c) 2016-2024 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2016 Pascal Cuoq Copyright (c) 2016 Don Lewis Copyright (c) 2017 Rhodri James @@ -24,6 +24,7 @@ Copyright (c) 2022 Martin Ettl Copyright (c) 2022 Sean McBride Copyright (c) 2023 Hanno Böck + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -56,7 +57,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmltok.h" #include "nametab.h" diff --git a/lib/xmltok_ns.c b/lib/xmltok_ns.c index fbdd3e3c7b7..1cd60de1e4f 100644 --- a/lib/xmltok_ns.c +++ b/lib/xmltok_ns.c @@ -11,7 +11,8 @@ Copyright (c) 2002 Greg Stein Copyright (c) 2002 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek - Copyright (c) 2017-2021 Sebastian Pipping + Copyright (c) 2017-2026 Sebastian Pipping + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -98,13 +99,13 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) - return 0; + return NULL; *p = 0; if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) - return 0; + return NULL; return NS(encodings)[i]; } diff --git a/tests/Makefile.in b/tests/Makefile.in index 830560e2dab..9ffb46a09c7 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -616,6 +616,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/tests/basic_tests.c b/tests/basic_tests.c index 0231e0949ee..02d1d5fd3c1 100644 --- a/tests/basic_tests.c +++ b/tests/basic_tests.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein Copyright (c) 2005-2007 Steven Solie Copyright (c) 2005-2012 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017-2022 Rhodri James Copyright (c) 2017 Joe Orton Copyright (c) 2017 José Gutiérrez de la Concha @@ -3112,12 +3112,16 @@ START_TEST(test_buffer_can_grow_to_max) { #if defined(__MINGW32__) && ! defined(__MINGW64__) // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB // Can we make a big allocation? - void *big = malloc(maxbuf); - if (! big) { + for (int i = 1; i <= 2; i++) { + void *const big = malloc(maxbuf); + if (big != NULL) { + free(big); + break; + } // The big allocation failed. Let's be a little lenient. maxbuf = maxbuf / 2; + fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); } - free(big); #endif for (int i = 0; i < num_prefixes; ++i) { @@ -4570,6 +4574,46 @@ START_TEST(test_unknown_encoding_invalid_attr_value) { } END_TEST +START_TEST(test_unknown_encoding_user_data_primary) { + // This test is based on ideas contributed by Artiphishell Inc. + const char *const text = "\n" + "\n"; + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetUnknownEncodingHandler(parser, + user_data_checking_unknown_encoding_handler, + (void *)(intptr_t)0xC0FFEE); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_OK); + + XML_ParserFree(parser); +} +END_TEST + +START_TEST(test_unknown_encoding_user_data_secondary) { + // This test is based on ideas contributed by Artiphishell Inc. + const char *const text_main = "\n" + "]>\n" + "&ext;\n"; + const char *const text_external = "\n" + "data"; + ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUnknownEncodingHandler(parser, + user_data_checking_unknown_encoding_handler, + (void *)(intptr_t)0xC0FFEE); + XML_SetUserData(parser, &test_data); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), + XML_TRUE) + == XML_STATUS_OK); + + XML_ParserFree(parser); +} +END_TEST + /* Test an external entity parser set to use latin-1 detects UTF-16 * BOMs correctly. */ @@ -6001,6 +6045,7 @@ START_TEST(test_bypass_heuristic_when_close_to_bufsize) { const int document_length = 65536; char *const document = (char *)malloc(document_length); + assert_true(document != NULL); const XML_Memory_Handling_Suite memfuncs = { counting_malloc, @@ -6213,6 +6258,24 @@ START_TEST(test_varying_buffer_fills) { } END_TEST +START_TEST(test_empty_ext_param_entity_in_value) { + const char *text = ""; + ExtOption options[] = { + {XCS("ext.dtd"), "" + ""}, + {XCS("empty"), ""}, + {NULL, NULL}, + }; + + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); + XML_SetUserData(g_parser, options); + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +} +END_TEST + void make_basic_test_case(Suite *s) { TCase *tc_basic = tcase_create("basic tests"); @@ -6416,6 +6479,8 @@ make_basic_test_case(Suite *s) { tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); + tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); + tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); @@ -6458,6 +6523,7 @@ make_basic_test_case(Suite *s) { tcase_add_test(tc_basic, test_empty_element_abort); tcase_add_test__ifdef_xml_dtd(tc_basic, test_pool_integrity_with_unfinished_attr); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); diff --git a/tests/benchmark/Makefile.in b/tests/benchmark/Makefile.in index e72e901a39a..89a29cef053 100644 --- a/tests/benchmark/Makefile.in +++ b/tests/benchmark/Makefile.in @@ -311,6 +311,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/tests/handlers.c b/tests/handlers.c index 5bca2b1f551..e456df21c77 100644 --- a/tests/handlers.c +++ b/tests/handlers.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein Copyright (c) 2005-2007 Steven Solie Copyright (c) 2005-2012 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017-2022 Rhodri James Copyright (c) 2017 Joe Orton Copyright (c) 2017 José Gutiérrez de la Concha @@ -45,6 +45,7 @@ # undef NDEBUG /* because test suite relies on assert(...) at the moment */ #endif +#include #include #include #include @@ -407,6 +408,15 @@ long_encoding_handler(void *userData, const XML_Char *encoding, return XML_STATUS_OK; } +int XMLCALL +user_data_checking_unknown_encoding_handler(void *userData, + const XML_Char *encoding, + XML_Encoding *info) { + const intptr_t number = (intptr_t)userData; + assert_true(number == 0xC0FFEE); + return long_encoding_handler(userData, encoding, info); +} + /* External Entity Handlers */ int XMLCALL diff --git a/tests/handlers.h b/tests/handlers.h index fa6267fbbd0..fcde27ae494 100644 --- a/tests/handlers.h +++ b/tests/handlers.h @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein Copyright (c) 2005-2007 Steven Solie Copyright (c) 2005-2012 Karl Waclawek - Copyright (c) 2016-2024 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017-2022 Rhodri James Copyright (c) 2017 Joe Orton Copyright (c) 2017 José Gutiérrez de la Concha @@ -159,6 +159,9 @@ extern int XMLCALL long_encoding_handler(void *userData, const XML_Char *encoding, XML_Encoding *info); +extern int XMLCALL user_data_checking_unknown_encoding_handler( + void *userData, const XML_Char *encoding, XML_Encoding *info); + /* External Entity Handlers */ typedef struct ExtOption { diff --git a/tests/misc_tests.c b/tests/misc_tests.c index 2a8054546a1..1c508bd1046 100644 --- a/tests/misc_tests.c +++ b/tests/misc_tests.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein Copyright (c) 2005-2007 Steven Solie Copyright (c) 2005-2012 Karl Waclawek - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017-2022 Rhodri James Copyright (c) 2017 Joe Orton Copyright (c) 2017 José Gutiérrez de la Concha @@ -19,6 +19,7 @@ Copyright (c) 2020 Tim Gates Copyright (c) 2021 Donghee Na Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2025 Berkay Eren Ürün Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -211,7 +212,7 @@ START_TEST(test_misc_version) { if (! versions_equal(&read_version, &parsed_version)) fail("Version mismatch"); - if (xcstrcmp(version_text, XCS("expat_2.7.3")) + if (xcstrcmp(version_text, XCS("expat_2.7.5")) != 0) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); } @@ -771,6 +772,35 @@ START_TEST(test_misc_async_entity_rejected) { } END_TEST +START_TEST(test_misc_no_infinite_loop_issue_1161) { + XML_Parser parser = XML_ParserCreate(NULL); + + const char *text = ""; + + struct ExtOption options[] = { + {XCS("secondary.txt"), + ""}, + {XCS("tertiary.txt"), " Copyright (c) 2004-2006 Karl Waclawek Copyright (c) 2005-2007 Steven Solie - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo Copyright (c) 2021 Donghee Na Copyright (c) 2024 Hanno Böck + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -225,7 +226,6 @@ processStream(const XML_Char *filename, XML_Parser parser) { if (filename != NULL) close(fd); break; - ; } } return 1; diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c index 534f3217059..2d0c4f8efd1 100644 --- a/xmlwf/xmlwf.c +++ b/xmlwf/xmlwf.c @@ -11,7 +11,7 @@ Copyright (c) 2001-2003 Fred L. Drake, Jr. Copyright (c) 2004-2009 Karl Waclawek Copyright (c) 2005-2007 Steven Solie - Copyright (c) 2016-2025 Sebastian Pipping + Copyright (c) 2016-2026 Sebastian Pipping Copyright (c) 2017 Rhodri James Copyright (c) 2019 David Loffredo Copyright (c) 2020 Joe Orton @@ -19,6 +19,7 @@ Copyright (c) 2021 Tim Bray Copyright (c) 2022 Martin Ettl Copyright (c) 2022 Sean McBride + Copyright (c) 2025 Alfonso Gregory Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -390,16 +391,13 @@ endDoctypeDecl(void *userData) { notationCount++; if (notationCount == 0) { /* Nothing to report */ - free((void *)data->currentDoctypeName); - data->currentDoctypeName = NULL; - return; + goto cleanUp; } notations = malloc(notationCount * sizeof(NotationList *)); if (notations == NULL) { fprintf(stderr, "Unable to sort notations"); - freeNotations(data); - return; + goto cleanUp; } for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) { @@ -439,6 +437,8 @@ endDoctypeDecl(void *userData) { fputts(T("]>\n"), data->fp); free(notations); + +cleanUp: freeNotations(data); free((void *)data->currentDoctypeName); data->currentDoctypeName = NULL; @@ -900,6 +900,7 @@ usage(const XML_Char *prog, int rc) { T(" -n enable [n]amespace processing\n") T(" -p enable processing of external DTDs and [p]arameter entities\n") T(" -x enable processing of e[x]ternal entities\n") + T(" (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n") T(" -e ENCODING override any in-document [e]ncoding declaration\n") T(" -w enable support for [W]indows code pages\n") T(" -r disable memory-mapping and use [r]ead calls instead\n") diff --git a/xmlwf/xmlwf_helpgen.py b/xmlwf/xmlwf_helpgen.py index 71f7baa4339..be41d59f0a8 100755 --- a/xmlwf/xmlwf_helpgen.py +++ b/xmlwf/xmlwf_helpgen.py @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2019-2025 Sebastian Pipping +# Copyright (c) 2019-2026 Sebastian Pipping # Copyright (c) 2021 Tim Bray # Licensed under the MIT license: # @@ -30,28 +30,31 @@ # USE OR OTHER DEALINGS IN THE SOFTWARE. import argparse +from textwrap import dedent -epilog = """ -environment variables: - EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) - Control verbosity of accounting debugging (default: 0) - EXPAT_ENTITY_DEBUG=(0|1) - Control verbosity of entity debugging (default: 0) - EXPAT_ENTROPY_DEBUG=(0|1) - Control verbosity of entropy debugging (default: 0) - EXPAT_MALLOC_DEBUG=(0|1|2) - Control verbosity of allocation tracker (default: 0) +epilog = dedent( + """ + environment variables: + EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) + Control verbosity of accounting debugging (default: 0) + EXPAT_ENTITY_DEBUG=(0|1) + Control verbosity of entity debugging (default: 0) + EXPAT_ENTROPY_DEBUG=(0|1) + Control verbosity of entropy debugging (default: 0) + EXPAT_MALLOC_DEBUG=(0|1|2) + Control verbosity of allocation tracker (default: 0) -exit status: - 0 the input files are well-formed and the output (if requested) was written successfully - 1 could not allocate data structures, signals a serious problem with execution environment - 2 one or more input files were not well-formed - 3 could not create an output file - 4 command-line argument error + exit status: + 0 the input files are well-formed and the output (if requested) was written successfully + 1 could not allocate data structures, signals a serious problem with execution environment + 2 one or more input files were not well-formed + 3 could not create an output file + 4 command-line argument error -xmlwf of libexpat is software libre, licensed under the MIT license. -Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you! -""" + xmlwf of libexpat is software libre, licensed under the MIT license. + Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you! + """ +) usage = """ %(prog)s [OPTIONS] [FILE ...] @@ -59,50 +62,121 @@ %(prog)s -v|--version """ -parser = argparse.ArgumentParser(prog='xmlwf', add_help=False, - usage=usage, - description='xmlwf - Determines if an XML document is well-formed', - formatter_class=argparse.RawTextHelpFormatter, - epilog=epilog) +parser = argparse.ArgumentParser( + prog="xmlwf", + add_help=False, + usage=usage, + description="xmlwf - Determines if an XML document is well-formed", + formatter_class=argparse.RawTextHelpFormatter, + epilog=epilog, +) -input_related = parser.add_argument_group('input control arguments') -input_related.add_argument('-s', action='store_true', help='print an error if the document is not [s]tandalone') -input_related.add_argument('-n', action='store_true', help='enable [n]amespace processing') -input_related.add_argument('-p', action='store_true', help='enable processing of external DTDs and [p]arameter entities') -input_related.add_argument('-x', action='store_true', help='enable processing of e[x]ternal entities') -input_related.add_argument('-e', action='store', metavar='ENCODING', help='override any in-document [e]ncoding declaration') -input_related.add_argument('-w', action='store_true', help='enable support for [W]indows code pages') -input_related.add_argument('-r', action='store_true', help='disable memory-mapping and use [r]ead calls instead') -input_related.add_argument('-g', metavar='BYTES', help='buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)') -input_related.add_argument('-k', action='store_true', help='when processing multiple files, [k]eep processing after first file with error') +input_related = parser.add_argument_group("input control arguments") +input_related.add_argument( + "-s", action="store_true", help="print an error if the document is not [s]tandalone" +) +input_related.add_argument( + "-n", action="store_true", help="enable [n]amespace processing" +) +input_related.add_argument( + "-p", + action="store_true", + help="enable processing of external DTDs and [p]arameter entities", +) +input_related.add_argument( + "-x", + action="store_true", + help=( + "enable processing of e[x]ternal entities" + "\n" + "(CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)" + ), +) +input_related.add_argument( + "-e", + action="store", + metavar="ENCODING", + help="override any in-document [e]ncoding declaration", +) +input_related.add_argument( + "-w", action="store_true", help="enable support for [W]indows code pages" +) +input_related.add_argument( + "-r", + action="store_true", + help="disable memory-mapping and use [r]ead calls instead", +) +input_related.add_argument( + "-g", + metavar="BYTES", + help="buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)", +) +input_related.add_argument( + "-k", + action="store_true", + help="when processing multiple files, [k]eep processing after first file with error", +) -output_related = parser.add_argument_group('output control arguments') -output_related.add_argument('-d', action='store', metavar='DIRECTORY', help='output [d]estination directory') +output_related = parser.add_argument_group("output control arguments") +output_related.add_argument( + "-d", action="store", metavar="DIRECTORY", help="output [d]estination directory" +) output_mode = output_related.add_mutually_exclusive_group() -output_mode.add_argument('-c', action='store_true', help='write a [c]opy of input XML, not canonical XML') -output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not canonical XML') -output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') -output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') +output_mode.add_argument( + "-c", action="store_true", help="write a [c]opy of input XML, not canonical XML" +) +output_mode.add_argument( + "-m", action="store_true", help="write [m]eta XML, not canonical XML" +) +output_mode.add_argument( + "-t", action="store_true", help="write no XML output for [t]iming of plain parsing" +) +output_related.add_argument( + "-N", action="store_true", help="enable adding doctype and [n]otation declarations" +) -billion_laughs = parser.add_argument_group('amplification attack protection (e.g. billion laughs)', - description='NOTE: ' - 'If you ever need to increase these values ' - 'for non-attack payload, please file a bug report.') -billion_laughs.add_argument('-a', metavar='FACTOR', - help='set maximum tolerated [a]mplification factor (default: 100.0)') -billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)') +billion_laughs = parser.add_argument_group( + "amplification attack protection (e.g. billion laughs)", + description=( + "NOTE: " + "If you ever need to increase these values " + "for non-attack payload, please file a bug report." + ), +) +billion_laughs.add_argument( + "-a", + metavar="FACTOR", + help="set maximum tolerated [a]mplification factor (default: 100.0)", +) +billion_laughs.add_argument( + "-b", + metavar="BYTES", + help="set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)", +) -reparse_deferral = parser.add_argument_group('reparse deferral') -reparse_deferral.add_argument('-q', action='store_true', - help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens') +reparse_deferral = parser.add_argument_group("reparse deferral") +reparse_deferral.add_argument( + "-q", + action="store_true", + help="disable reparse deferral, and allow [q]uadratic parse runtime with large tokens", +) -parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') +parser.add_argument( + "files", metavar="FILE", nargs="*", help="file to process (default: STDIN)" +) -info = parser.add_argument_group('info arguments') +info = parser.add_argument_group("info arguments") info = info.add_mutually_exclusive_group() -info.add_argument('-h', '--help', action='store_true', help='show this [h]elp message and exit') -info.add_argument('-v', '--version', action='store_true', help='show program\'s [v]ersion number and exit') +info.add_argument( + "-h", "--help", action="store_true", help="show this [h]elp message and exit" +) +info.add_argument( + "-v", + "--version", + action="store_true", + help="show program's [v]ersion number and exit", +) -if __name__ == '__main__': +if __name__ == "__main__": parser.print_help()