+ public function loadHTML5($source) {
+ // Do our own handling of DOMDocument error reporting so we can ignore "unknown tags" which are usually fine in HTML5.
+ libxml_use_internal_errors(true);
+ if (!preg_match('/^\s*<?xml /', $source)) {
+ // Add an XML declaration to force DOMDocument into UTF-8 mode.
+ $source = '<?xml version="1.0" encoding="utf-8"?>'."\n".$source;
+ }
+ $result = $this->loadHTML($source);
+ // Handle DOMDocument loading errors, throw away warnings on unknown tags as HTML5 allows all kinds.
+ $errseverity = array(LIBXML_ERR_WARNING => 'Warning', LIBXML_ERR_ERROR => 'Error', LIBXML_ERR_FATAL => 'Fatal');
+ foreach (libxml_get_errors() as $error) {
+ // $error is a libXMLError, see https://www.php.net/manual/en/class.libxmlerror.php
+ // See http://www.xmlsoft.org/html/libxml-xmlerror.html#xmlParserErrors for error numbers
+ if ($error->code != 801) { // XML_HTML_UNKNOWN_TAG gets no output, should not exist for HTML5.
+ trigger_error($errseverity[$error->level].' loading HTML5: '.$error->message.' (code '.$error->code.'), line: '.$error->line, E_USER_WARNING);
+ }
+ }
+ libxml_clear_errors();
+ libxml_use_internal_errors(false);
+ return $result;
+ }
+