Avoid regression in the size of XML input that we will accept.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:42 +0000 (16:50 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 28 Jul 2025 20:50:42 +0000 (16:50 -0400)
This mostly reverts commit 6082b3d5d, "Use xmlParseInNodeContext
not xmlParseBalancedChunkMemory".  It turns out that
xmlParseInNodeContext will reject text chunks exceeding 10MB, while
(in most libxml2 versions) xmlParseBalancedChunkMemory will not.
The bleeding-edge libxml2 bug that we needed to work around a year
ago is presumably no longer a factor, and the argument that
xmlParseBalancedChunkMemory is semi-deprecated is not enough to
justify a functionality regression.  Hence, go back to doing it
the old way.

Reported-by: Michael Paquier <michael@paquier.xyz>
Author: Michael Paquier <michael@paquier.xyz>
Co-authored-by: Erik Wienhold <ewie@ewie.name>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz
Backpatch-through: 13

src/backend/utils/adt/xml.c

index 630b1b84e1c5ba5f4ad8e8370bf98fe8473d156a..86b6425267f187367197d093a9d95c761d917c1d 100644 (file)
@@ -1529,6 +1529,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
    PgXmlErrorContext *xmlerrcxt;
    volatile xmlParserCtxtPtr ctxt = NULL;
    volatile xmlDocPtr doc = NULL;
+   volatile int save_keep_blanks = -1;
 
    len = VARSIZE_ANY_EXHDR(data);  /* will be useful later */
    string = xml_text2xmlChar(data);
@@ -1545,7 +1546,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
    PG_TRY();
    {
        bool        parse_as_document = false;
-       int         options;
        int         res_code;
        size_t      count = 0;
        xmlChar    *version = NULL;
@@ -1571,25 +1571,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                parse_as_document = true;
        }
 
-       /*
-        * Select parse options.
-        *
-        * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
-        * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
-        * internal DTD are applied'.  As for external DTDs, we try to support
-        * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
-        * happen because xmlPgEntityLoader prevents it.
-        */
-       options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
-           | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
-
        if (parse_as_document)
        {
+           int         options;
+
+           /* set up parser context used by xmlCtxtReadDoc */
            ctxt = xmlNewParserCtxt();
            if (ctxt == NULL || xmlerrcxt->err_occurred)
                xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                            "could not allocate parser context");
 
+           /*
+            * Select parse options.
+            *
+            * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
+            * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
+            * by internal DTD are applied'.  As for external DTDs, we try to
+            * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
+            * doesn't really happen because xmlPgEntityLoader prevents it.
+            */
+           options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
+               | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
+
            doc = xmlCtxtReadDoc(ctxt, utf8string,
                                 NULL,  /* no URL */
                                 "UTF-8",
@@ -1608,36 +1611,27 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        }
        else
        {
-           xmlNodePtr  root;
-
-           /* set up document with empty root node to be the context node */
+           /* set up document that xmlParseBalancedChunkMemory will add to */
            doc = xmlNewDoc(version);
            Assert(doc->encoding == NULL);
            doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
            doc->standalone = standalone;
 
-           root = xmlNewNode(NULL, (const xmlChar *) "content-root");
-           if (root == NULL || xmlerrcxt->err_occurred)
-               xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
-                           "could not allocate xml node");
-           /* This attaches root to doc, so we need not free it separately. */
-           xmlDocSetRootElement(doc, root);
+           /* set parse options --- have to do this the ugly way */
+           save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
 
            /* allow empty content */
            if (*(utf8string + count))
            {
                xmlNodePtr  node_list = NULL;
-               xmlParserErrors res;
 
-               res = xmlParseInNodeContext(root,
-                                           (char *) utf8string + count,
-                                           strlen((char *) utf8string + count),
-                                           options,
-                                           &node_list);
+               res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+                                                      utf8string + count,
+                                                      &node_list);
 
                xmlFreeNodeList(node_list);
 
-               if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
+               if (res_code != 0 || xmlerrcxt->err_occurred)
                    xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
                                "invalid XML content");
            }
@@ -1645,6 +1639,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
    }
    PG_CATCH();
    {
+       if (save_keep_blanks != -1)
+           xmlKeepBlanksDefault(save_keep_blanks);
        if (doc != NULL)
            xmlFreeDoc(doc);
        if (ctxt != NULL)
@@ -1656,6 +1652,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
    }
    PG_END_TRY();
 
+   if (save_keep_blanks != -1)
+       xmlKeepBlanksDefault(save_keep_blanks);
+
    if (ctxt != NULL)
        xmlFreeParserCtxt(ctxt);