222 lines
8.7 KiB
Diff
222 lines
8.7 KiB
Diff
|
Restores compatibility with Python >=3.9,
|
|||
|
which removed the custom .error() method in
|
|||
|
https://github.com/python/cpython/commit/e34bbfd61f405eef89e8aa50672b0b25022de320
|
|||
|
|
|||
|
Despite the big diff, only a try…except clause is added.
|
|||
|
|
|||
|
--- source/sgmllib.py 2023-03-18 08:57:58.726240606 +0100
|
|||
|
+++ source/sgmllib.py 2023-03-18 09:02:01.667568916 +0100
|
|||
|
@@ -101,113 +101,116 @@
|
|||
|
"""Handle the remaining data."""
|
|||
|
self.goahead(1)
|
|||
|
|
|||
|
- def error(self, message):
|
|||
|
- raise SGMLParseError(message)
|
|||
|
-
|
|||
|
# Internal -- handle data as far as reasonable. May leave state
|
|||
|
# and data to be processed by a subsequent call. If 'end' is
|
|||
|
# true, force handling all data as if followed by EOF marker.
|
|||
|
def goahead(self, end):
|
|||
|
- rawdata = self.rawdata
|
|||
|
- i = 0
|
|||
|
- n = len(rawdata)
|
|||
|
- while i < n:
|
|||
|
- if self.nomoretags:
|
|||
|
- self.handle_data(rawdata[i:n])
|
|||
|
- i = n
|
|||
|
- break
|
|||
|
- match = interesting.search(rawdata, i)
|
|||
|
- if match: j = match.start()
|
|||
|
- else: j = n
|
|||
|
- if i < j:
|
|||
|
- self.handle_data(rawdata[i:j])
|
|||
|
- i = j
|
|||
|
- if i == n: break
|
|||
|
- if rawdata[i] == '<':
|
|||
|
- if starttagopen.match(rawdata, i):
|
|||
|
+ try:
|
|||
|
+ rawdata = self.rawdata
|
|||
|
+ i = 0
|
|||
|
+ n = len(rawdata)
|
|||
|
+ while i < n:
|
|||
|
+ if self.nomoretags:
|
|||
|
+ self.handle_data(rawdata[i:n])
|
|||
|
+ i = n
|
|||
|
+ break
|
|||
|
+ match = interesting.search(rawdata, i)
|
|||
|
+ if match: j = match.start()
|
|||
|
+ else: j = n
|
|||
|
+ if i < j:
|
|||
|
+ self.handle_data(rawdata[i:j])
|
|||
|
+ i = j
|
|||
|
+ if i == n: break
|
|||
|
+ if rawdata[i] == '<':
|
|||
|
+ if starttagopen.match(rawdata, i):
|
|||
|
+ if self.literal:
|
|||
|
+ self.handle_data(rawdata[i])
|
|||
|
+ i = i+1
|
|||
|
+ continue
|
|||
|
+ k = self.parse_starttag(i)
|
|||
|
+ if k < 0: break
|
|||
|
+ i = k
|
|||
|
+ continue
|
|||
|
+ if rawdata.startswith("</", i):
|
|||
|
+ k = self.parse_endtag(i)
|
|||
|
+ if k < 0: break
|
|||
|
+ i = k
|
|||
|
+ self.literal = 0
|
|||
|
+ continue
|
|||
|
+ if self.literal:
|
|||
|
+ if n > (i + 1):
|
|||
|
+ self.handle_data("<")
|
|||
|
+ i = i+1
|
|||
|
+ else:
|
|||
|
+ # incomplete
|
|||
|
+ break
|
|||
|
+ continue
|
|||
|
+ if rawdata.startswith("<!--", i):
|
|||
|
+ # Strictly speaking, a comment is --.*--
|
|||
|
+ # within a declaration tag <!...>.
|
|||
|
+ # This should be removed,
|
|||
|
+ # and comments handled only in parse_declaration.
|
|||
|
+ k = self.parse_comment(i)
|
|||
|
+ if k < 0: break
|
|||
|
+ i = k
|
|||
|
+ continue
|
|||
|
+ if rawdata.startswith("<?", i):
|
|||
|
+ k = self.parse_pi(i)
|
|||
|
+ if k < 0: break
|
|||
|
+ i = i+k
|
|||
|
+ continue
|
|||
|
+ if rawdata.startswith("<!", i):
|
|||
|
+ # This is some sort of declaration; in "HTML as
|
|||
|
+ # deployed," this should only be the document type
|
|||
|
+ # declaration ("<!DOCTYPE html...>").
|
|||
|
+ k = self.parse_declaration(i)
|
|||
|
+ if k < 0: break
|
|||
|
+ i = k
|
|||
|
+ continue
|
|||
|
+ elif rawdata[i] == '&':
|
|||
|
if self.literal:
|
|||
|
self.handle_data(rawdata[i])
|
|||
|
i = i+1
|
|||
|
continue
|
|||
|
- k = self.parse_starttag(i)
|
|||
|
- if k < 0: break
|
|||
|
- i = k
|
|||
|
- continue
|
|||
|
- if rawdata.startswith("</", i):
|
|||
|
- k = self.parse_endtag(i)
|
|||
|
- if k < 0: break
|
|||
|
- i = k
|
|||
|
- self.literal = 0
|
|||
|
- continue
|
|||
|
- if self.literal:
|
|||
|
- if n > (i + 1):
|
|||
|
- self.handle_data("<")
|
|||
|
- i = i+1
|
|||
|
- else:
|
|||
|
- # incomplete
|
|||
|
- break
|
|||
|
- continue
|
|||
|
- if rawdata.startswith("<!--", i):
|
|||
|
- # Strictly speaking, a comment is --.*--
|
|||
|
- # within a declaration tag <!...>.
|
|||
|
- # This should be removed,
|
|||
|
- # and comments handled only in parse_declaration.
|
|||
|
- k = self.parse_comment(i)
|
|||
|
- if k < 0: break
|
|||
|
- i = k
|
|||
|
- continue
|
|||
|
- if rawdata.startswith("<?", i):
|
|||
|
- k = self.parse_pi(i)
|
|||
|
- if k < 0: break
|
|||
|
- i = i+k
|
|||
|
- continue
|
|||
|
- if rawdata.startswith("<!", i):
|
|||
|
- # This is some sort of declaration; in "HTML as
|
|||
|
- # deployed," this should only be the document type
|
|||
|
- # declaration ("<!DOCTYPE html...>").
|
|||
|
- k = self.parse_declaration(i)
|
|||
|
- if k < 0: break
|
|||
|
- i = k
|
|||
|
- continue
|
|||
|
- elif rawdata[i] == '&':
|
|||
|
- if self.literal:
|
|||
|
+ match = charref.match(rawdata, i)
|
|||
|
+ if match:
|
|||
|
+ name = match.group(1)
|
|||
|
+ self.handle_charref(name)
|
|||
|
+ i = match.end(0)
|
|||
|
+ if rawdata[i-1] != ';': i = i-1
|
|||
|
+ continue
|
|||
|
+ match = entityref.match(rawdata, i)
|
|||
|
+ if match:
|
|||
|
+ name = match.group(1)
|
|||
|
+ self.handle_entityref(name)
|
|||
|
+ i = match.end(0)
|
|||
|
+ if rawdata[i-1] != ';': i = i-1
|
|||
|
+ continue
|
|||
|
+ else:
|
|||
|
+ self.error('neither < nor & ??')
|
|||
|
+ # We get here only if incomplete matches but
|
|||
|
+ # nothing else
|
|||
|
+ match = incomplete.match(rawdata, i)
|
|||
|
+ if not match:
|
|||
|
self.handle_data(rawdata[i])
|
|||
|
i = i+1
|
|||
|
continue
|
|||
|
- match = charref.match(rawdata, i)
|
|||
|
- if match:
|
|||
|
- name = match.group(1)
|
|||
|
- self.handle_charref(name)
|
|||
|
- i = match.end(0)
|
|||
|
- if rawdata[i-1] != ';': i = i-1
|
|||
|
- continue
|
|||
|
- match = entityref.match(rawdata, i)
|
|||
|
- if match:
|
|||
|
- name = match.group(1)
|
|||
|
- self.handle_entityref(name)
|
|||
|
- i = match.end(0)
|
|||
|
- if rawdata[i-1] != ';': i = i-1
|
|||
|
- continue
|
|||
|
- else:
|
|||
|
- self.error('neither < nor & ??')
|
|||
|
- # We get here only if incomplete matches but
|
|||
|
- # nothing else
|
|||
|
- match = incomplete.match(rawdata, i)
|
|||
|
- if not match:
|
|||
|
- self.handle_data(rawdata[i])
|
|||
|
- i = i+1
|
|||
|
- continue
|
|||
|
- j = match.end(0)
|
|||
|
- if j == n:
|
|||
|
- break # Really incomplete
|
|||
|
- self.handle_data(rawdata[i:j])
|
|||
|
- i = j
|
|||
|
- # end while
|
|||
|
- if end and i < n:
|
|||
|
- self.handle_data(rawdata[i:n])
|
|||
|
- i = n
|
|||
|
- self.rawdata = rawdata[i:]
|
|||
|
- # XXX if end: check for empty stack
|
|||
|
+ j = match.end(0)
|
|||
|
+ if j == n:
|
|||
|
+ break # Really incomplete
|
|||
|
+ self.handle_data(rawdata[i:j])
|
|||
|
+ i = j
|
|||
|
+ # end while
|
|||
|
+ if end and i < n:
|
|||
|
+ self.handle_data(rawdata[i:n])
|
|||
|
+ i = n
|
|||
|
+ self.rawdata = rawdata[i:]
|
|||
|
+ # XXX if end: check for empty stack
|
|||
|
+ except AssertionError as e:
|
|||
|
+ # The .error() method, which threw the custom SGMLParseError was removed
|
|||
|
+ # by https://github.com/python/cpython/issues/76025. So we have to catch
|
|||
|
+ # _markupbase’s AssertionError and translate it into the old one.
|
|||
|
+ raise SGMLParseError (e.args[0]) from e
|
|||
|
|
|||
|
# Extensions for the DOCTYPE scanner:
|
|||
|
_decl_otherchars = '='
|