* gnu/packages/patches/libxml2-parent-pointers.patch, gnu/packages/patches/libxml2-terminating-newline.patch, gnu/packages/patches/libxml2-xpath-recursion-limit.patch: New files. * gnu/packages/patches/python-libxml2-python39-compat.patch: Delete file. * gnu/local.mk (dist_patch_DATA): Adjust accordingly. * gnu/packages/xml.scm (libxml2): Update to 2.9.12. (python-lxml)[source](modules, snippet): New fields.
		
			
				
	
	
		
			228 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			228 lines
		
	
	
	
		
			9 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| Fix a regression in 2.9.12 where some corrupt XML structures were handled
 | |
| incorrectly:
 | |
| 
 | |
|   https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
 | |
| 
 | |
| This is an amalgamation of these upstream commits:
 | |
| 
 | |
|   https://gitlab.gnome.org/GNOME/libxml2/-/commit/85b1792e37b131e7a51af98a37f92472e8de5f3f
 | |
|   https://gitlab.gnome.org/GNOME/libxml2/-/commit/13ad8736d294536da4cbcd70a96b0a2fbf47070c
 | |
| 
 | |
| diff --git a/HTMLtree.c b/HTMLtree.c
 | |
| --- a/HTMLtree.c
 | |
| +++ b/HTMLtree.c
 | |
| @@ -744,7 +744,7 @@ void
 | |
|  htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|  	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
 | |
|                           int format) {
 | |
| -    xmlNodePtr root;
 | |
| +    xmlNodePtr root, parent;
 | |
|      xmlAttrPtr attr;
 | |
|      const htmlElemDesc * info;
 | |
|  
 | |
| @@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|      }
 | |
|  
 | |
|      root = cur;
 | |
| +    parent = cur->parent;
 | |
|      while (1) {
 | |
|          switch (cur->type) {
 | |
|          case XML_HTML_DOCUMENT_NODE:
 | |
| @@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|              if (((xmlDocPtr) cur)->intSubset != NULL) {
 | |
|                  htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
 | |
|              }
 | |
| -            if (cur->children != NULL) {
 | |
| +            /* Always validate cur->parent when descending. */
 | |
| +            if ((cur->parent == parent) && (cur->children != NULL)) {
 | |
| +                parent = cur;
 | |
|                  cur = cur->children;
 | |
|                  continue;
 | |
|              }
 | |
|              break;
 | |
|  
 | |
|          case XML_ELEMENT_NODE:
 | |
| +            /*
 | |
| +             * Some users like lxml are known to pass nodes with a corrupted
 | |
| +             * tree structure. Fall back to a recursive call to handle this
 | |
| +             * case.
 | |
| +             */
 | |
| +            if ((cur->parent != parent) && (cur->children != NULL)) {
 | |
| +                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 | |
| +                break;
 | |
| +            }
 | |
| +
 | |
|              /*
 | |
|               * Get specific HTML info for that node.
 | |
|               */
 | |
| @@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|                      (cur->name != NULL) &&
 | |
|                      (cur->name[0] != 'p')) /* p, pre, param */
 | |
|                      xmlOutputBufferWriteString(buf, "\n");
 | |
| +                parent = cur;
 | |
|                  cur = cur->children;
 | |
|                  continue;
 | |
|              }
 | |
| @@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|                  (info != NULL) && (!info->isinline)) {
 | |
|                  if ((cur->next->type != HTML_TEXT_NODE) &&
 | |
|                      (cur->next->type != HTML_ENTITY_REF_NODE) &&
 | |
| -                    (cur->parent != NULL) &&
 | |
| -                    (cur->parent->name != NULL) &&
 | |
| -                    (cur->parent->name[0] != 'p')) /* p, pre, param */
 | |
| +                    (parent != NULL) &&
 | |
| +                    (parent->name != NULL) &&
 | |
| +                    (parent->name[0] != 'p')) /* p, pre, param */
 | |
|                      xmlOutputBufferWriteString(buf, "\n");
 | |
|              }
 | |
|  
 | |
| @@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|                  break;
 | |
|              if (((cur->name == (const xmlChar *)xmlStringText) ||
 | |
|                   (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 | |
| -                ((cur->parent == NULL) ||
 | |
| -                 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 | |
| -                  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 | |
| +                ((parent == NULL) ||
 | |
| +                 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
 | |
| +                  (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
 | |
|                  xmlChar *buffer;
 | |
|  
 | |
|                  buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 | |
| @@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|                  break;
 | |
|              }
 | |
|  
 | |
| -            /*
 | |
| -             * The parent should never be NULL here but we want to handle
 | |
| -             * corrupted documents gracefully.
 | |
| -             */
 | |
| -            if (cur->parent == NULL)
 | |
| -                return;
 | |
| -            cur = cur->parent;
 | |
| +            cur = parent;
 | |
| +            /* cur->parent was validated when descending. */
 | |
| +            parent = cur->parent;
 | |
|  
 | |
|              if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 | |
|                  (cur->type == XML_DOCUMENT_NODE)) {
 | |
| @@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
|                      (cur->next != NULL)) {
 | |
|                      if ((cur->next->type != HTML_TEXT_NODE) &&
 | |
|                          (cur->next->type != HTML_ENTITY_REF_NODE) &&
 | |
| -                        (cur->parent != NULL) &&
 | |
| -                        (cur->parent->name != NULL) &&
 | |
| -                        (cur->parent->name[0] != 'p')) /* p, pre, param */
 | |
| +                        (parent != NULL) &&
 | |
| +                        (parent->name != NULL) &&
 | |
| +                        (parent->name[0] != 'p')) /* p, pre, param */
 | |
|                          xmlOutputBufferWriteString(buf, "\n");
 | |
|                  }
 | |
|              }
 | |
| diff --git a/xmlsave.c b/xmlsave.c
 | |
| --- a/xmlsave.c
 | |
| +++ b/xmlsave.c
 | |
| @@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|  static void
 | |
|  xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|      int format = ctxt->format;
 | |
| -    xmlNodePtr tmp, root, unformattedNode = NULL;
 | |
| +    xmlNodePtr tmp, root, unformattedNode = NULL, parent;
 | |
|      xmlAttrPtr attr;
 | |
|      xmlChar *start, *end;
 | |
|      xmlOutputBufferPtr buf;
 | |
| @@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|      buf = ctxt->buf;
 | |
|  
 | |
|      root = cur;
 | |
| +    parent = cur->parent;
 | |
|      while (1) {
 | |
|          switch (cur->type) {
 | |
|          case XML_DOCUMENT_NODE:
 | |
| @@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|              break;
 | |
|  
 | |
|          case XML_DOCUMENT_FRAG_NODE:
 | |
| -            if (cur->children != NULL) {
 | |
| +            /* Always validate cur->parent when descending. */
 | |
| +            if ((cur->parent == parent) && (cur->children != NULL)) {
 | |
| +                parent = cur;
 | |
|                  cur = cur->children;
 | |
|                  continue;
 | |
|              }
 | |
| @@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|              break;
 | |
|  
 | |
|          case XML_ELEMENT_NODE:
 | |
| -	    if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
 | |
| +            /*
 | |
| +             * Some users like lxml are known to pass nodes with a corrupted
 | |
| +             * tree structure. Fall back to a recursive call to handle this
 | |
| +             * case.
 | |
| +             */
 | |
| +            if ((cur->parent != parent) && (cur->children != NULL)) {
 | |
| +                xmlNodeDumpOutputInternal(ctxt, cur);
 | |
| +                break;
 | |
| +            }
 | |
| +
 | |
| +	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
 | |
| +                (xmlIndentTreeOutput))
 | |
|  		xmlOutputBufferWrite(buf, ctxt->indent_size *
 | |
|  				     (ctxt->level > ctxt->indent_nr ?
 | |
|  				      ctxt->indent_nr : ctxt->level),
 | |
| @@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|                  xmlOutputBufferWrite(buf, 1, ">");
 | |
|                  if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
 | |
|                  if (ctxt->level >= 0) ctxt->level++;
 | |
| +                parent = cur;
 | |
|                  cur = cur->children;
 | |
|                  continue;
 | |
|              }
 | |
| @@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|                  break;
 | |
|              }
 | |
|  
 | |
| -            /*
 | |
| -             * The parent should never be NULL here but we want to handle
 | |
| -             * corrupted documents gracefully.
 | |
| -             */
 | |
| -            if (cur->parent == NULL)
 | |
| -                return;
 | |
| -            cur = cur->parent;
 | |
| +            cur = parent;
 | |
| +            /* cur->parent was validated when descending. */
 | |
| +            parent = cur->parent;
 | |
|  
 | |
|              if (cur->type == XML_ELEMENT_NODE) {
 | |
|                  if (ctxt->level > 0) ctxt->level--;
 | |
| diff --git a/xmlsave.c b/xmlsave.c
 | |
| --- a/xmlsave.c
 | |
| +++ b/xmlsave.c
 | |
| @@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|              break;
 | |
|  
 | |
|          case XML_ELEMENT_NODE:
 | |
| +	    if ((cur != root) && (ctxt->format == 1) &&
 | |
| +                (xmlIndentTreeOutput))
 | |
| +		xmlOutputBufferWrite(buf, ctxt->indent_size *
 | |
| +				     (ctxt->level > ctxt->indent_nr ?
 | |
| +				      ctxt->indent_nr : ctxt->level),
 | |
| +				     ctxt->indent);
 | |
| +
 | |
|              /*
 | |
|               * Some users like lxml are known to pass nodes with a corrupted
 | |
|               * tree structure. Fall back to a recursive call to handle this
 | |
| @@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
 | |
|                  break;
 | |
|              }
 | |
|  
 | |
| -	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
 | |
| -                (xmlIndentTreeOutput))
 | |
| -		xmlOutputBufferWrite(buf, ctxt->indent_size *
 | |
| -				     (ctxt->level > ctxt->indent_nr ?
 | |
| -				      ctxt->indent_nr : ctxt->level),
 | |
| -				     ctxt->indent);
 | |
| -
 | |
|              xmlOutputBufferWrite(buf, 1, "<");
 | |
|              if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 | |
|                  xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 |