diff --git a/admin/include/functions_upload.inc.php b/admin/include/functions_upload.inc.php index 780bab0ab..b71783a40 100644 --- a/admin/include/functions_upload.inc.php +++ b/admin/include/functions_upload.inc.php @@ -8,6 +8,7 @@ include_once(PHPWG_ROOT_PATH.'admin/include/functions.php'); include_once(PHPWG_ROOT_PATH.'admin/include/image.class.php'); +include_once(PHPWG_ROOT_PATH.'include/svg-sanitizer.php'); // add default event handler for image and thumbnail resize add_event_handler('upload_image_resize', 'pwg_image_resize'); @@ -253,6 +254,24 @@ SELECT pwg_check_real_extension($source_filepath, $original_filename, true); + if ('svg' == $original_extension) + { + // Check for malicious code inside the svg + $issues = validate_svg(file_get_contents($source_filepath)); + if ($issues != '') + { + $error_msg = 'Invalid SVG "'.htmlspecialchars($original_filename).'" '.$issues; + unlink($source_filepath); + if (defined('IN_WS')) + { + global $service; + $service->sendResponse(new PwgError(415, $error_msg)); + exit; + } + die($error_msg); + } + } + $file_extension_replace_by = array( 'jpeg' => 'jpg', ); diff --git a/include/svg-sanitizer.php b/include/svg-sanitizer.php new file mode 100644 index 000000000..1c374553e --- /dev/null +++ b/include/svg-sanitizer.php @@ -0,0 +1,35 @@ +sanitize($svg_content); + $issues = $sanitizer->getXmlIssues(); + return ($issues) ? $issues[0]['message'] : ''; + } + catch(Exception $e) + { + return 'Exception during scan: '.$e->getMessage(); + } +} \ No newline at end of file diff --git a/include/svg-sanitizer/ElementReference/Resolver.php b/include/svg-sanitizer/ElementReference/Resolver.php new file mode 100644 index 000000000..cd7a84061 --- /dev/null +++ b/include/svg-sanitizer/ElementReference/Resolver.php @@ -0,0 +1,169 @@ +xPath = $xPath; + $this->useNestingLimit = $useNestingLimit; + } + + public function collect() + { + $this->collectIdentifiedElements(); + $this->processReferences(); + $this->determineInvalidSubjects(); + } + + /** + * Resolves one subject by element. + * + * @param \DOMElement $element + * @param bool $considerChildren Whether to search in Subject's children as well + * @return Subject|null + */ + public function findByElement(\DOMElement $element, $considerChildren = false) + { + foreach ($this->subjects as $subject) { + if ( + $element === $subject->getElement() + || $considerChildren && Helper::isElementContainedIn($element, $subject->getElement()) + ) { + return $subject; + } + } + return null; + } + + /** + * Resolves subjects (plural!) by element id - in theory malformed + * DOM might have same ids assigned to different elements and leaving + * it to client/browser implementation which element to actually use. + * + * @param string $elementId + * @return Subject[] + */ + public function findByElementId($elementId) + { + return array_filter( + $this->subjects, + function (Subject $subject) use ($elementId) { + return $elementId === $subject->getElementId(); + } + ); + } + + /** + * Collects elements having `id` attribute (those that can be referenced). + */ + protected function collectIdentifiedElements() + { + /** @var \DOMNodeList|\DOMElement[] $elements */ + $elements = $this->xPath->query('//*[@id]'); + foreach ($elements as $element) { + $this->subjects[$element->getAttribute('id')] = new Subject($element, $this->useNestingLimit); + } + } + + /** + * Processes references from and to elements having `id` attribute concerning + * their occurrence in `` statements. + */ + protected function processReferences() + { + $useNodeName = $this->xPath->createNodeName('use'); + foreach ($this->subjects as $subject) { + $useElements = $this->xPath->query( + $useNodeName . '[@href or @xlink:href]', + $subject->getElement() + ); + + /** @var \DOMElement $useElement */ + foreach ($useElements as $useElement) { + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($useElement) + ); + if ($useId === null || !isset($this->subjects[$useId])) { + continue; + } + $subject->addUse($this->subjects[$useId]); + $this->subjects[$useId]->addUsedIn($subject); + } + } + } + + /** + * Determines and tags infinite loops. + */ + protected function determineInvalidSubjects() + { + foreach ($this->subjects as $subject) { + + if (in_array($subject->getElement(), $this->elementsToRemove)) { + continue; + } + + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($subject->getElement()) + ); + + try { + if ($useId === $subject->getElementId()) { + $this->markSubjectAsInvalid($subject); + } elseif ($subject->hasInfiniteLoop()) { + $this->markSubjectAsInvalid($subject); + } + } catch (NestingException $e) { + $this->elementsToRemove[] = $e->getElement(); + $this->markSubjectAsInvalid($subject); + } + } + } + + /** + * Get all the elements that caused a nesting exception. + * + * @return array + */ + public function getElementsToRemove() { + return $this->elementsToRemove; + } + + /** + * The Subject is invalid for some reason, therefore we should + * remove it and all it's child usages. + * + * @param Subject $subject + */ + protected function markSubjectAsInvalid(Subject $subject) { + $this->elementsToRemove = array_merge( + $this->elementsToRemove, + $subject->clearInternalAndGetAffectedElements() + ); + } +} \ No newline at end of file diff --git a/include/svg-sanitizer/ElementReference/Subject.php b/include/svg-sanitizer/ElementReference/Subject.php new file mode 100644 index 000000000..3610f0f9b --- /dev/null +++ b/include/svg-sanitizer/ElementReference/Subject.php @@ -0,0 +1,153 @@ +element = $element; + $this->useNestingLimit = $useNestingLimit; + } + + /** + * @return \DOMElement + */ + public function getElement() + { + return $this->element; + } + + /** + * @return string + */ + public function getElementId() + { + return $this->element->getAttribute('id'); + } + + /** + * @param array $subjects Previously processed subjects + * @param int $level The current level of nesting. + * @return bool + * @throws \enshrined\svgSanitize\Exceptions\NestingException + */ + public function hasInfiniteLoop(array $subjects = [], $level = 1) + { + if ($level > $this->useNestingLimit) { + throw new \enshrined\svgSanitize\Exceptions\NestingException('Nesting level too high, aborting', 1570713498, null, $this->getElement()); + } + + if (in_array($this, $subjects, true)) { + return true; + } + $subjects[] = $this; + foreach ($this->useCollection as $usage) { + if ($usage->getSubject()->hasInfiniteLoop($subjects, $level + 1)) { + return true; + } + } + return false; + } + + /** + * @param Subject $subject + */ + public function addUse(Subject $subject) + { + if ($subject === $this) { + throw new \LogicException('Cannot add self usage', 1570713416); + } + $identifier = $subject->getElementId(); + if (isset($this->useCollection[$identifier])) { + $this->useCollection[$identifier]->increment(); + return; + } + $this->useCollection[$identifier] = new Usage($subject); + } + + /** + * @param Subject $subject + */ + public function addUsedIn(Subject $subject) + { + if ($subject === $this) { + throw new \LogicException('Cannot add self as usage', 1570713417); + } + $identifier = $subject->getElementId(); + if (isset($this->usedInCollection[$identifier])) { + $this->usedInCollection[$identifier]->increment(); + return; + } + $this->usedInCollection[$identifier] = new Usage($subject); + } + + /** + * @param bool $accumulated + * @return int + */ + public function countUse($accumulated = false) + { + $count = 0; + foreach ($this->useCollection as $use) { + $useCount = $use->getSubject()->countUse(); + $count += $use->getCount() * ($accumulated ? 1 + $useCount : max(1, $useCount)); + } + return $count; + } + + /** + * @return int + */ + public function countUsedIn() + { + $count = 0; + foreach ($this->usedInCollection as $usedIn) { + $count += $usedIn->getCount() * max(1, $usedIn->getSubject()->countUsedIn()); + } + return $count; + } + + /** + * Clear the internal arrays (to free up memory as they can get big) + * and return all the child usages DOMElement's + * + * @return array + */ + public function clearInternalAndGetAffectedElements() + { + $elements = array_map(function(Usage $usage) { + return $usage->getSubject()->getElement(); + }, $this->useCollection); + + $this->usedInCollection = []; + $this->useCollection = []; + + return $elements; + } +} \ No newline at end of file diff --git a/include/svg-sanitizer/ElementReference/Usage.php b/include/svg-sanitizer/ElementReference/Usage.php new file mode 100644 index 000000000..d0ba62d72 --- /dev/null +++ b/include/svg-sanitizer/ElementReference/Usage.php @@ -0,0 +1,49 @@ +subject = $subject; + $this->count = (int)$count; + } + + /** + * @param int $by + */ + public function increment($by = 1) + { + $this->count += (int)$by; + } + + /** + * @return Subject + */ + public function getSubject() + { + return $this->subject; + } + + /** + * @return int + */ + public function getCount() + { + return $this->count; + } +} \ No newline at end of file diff --git a/include/svg-sanitizer/Exceptions/NestingException.php b/include/svg-sanitizer/Exceptions/NestingException.php new file mode 100644 index 000000000..7acc842ad --- /dev/null +++ b/include/svg-sanitizer/Exceptions/NestingException.php @@ -0,0 +1,36 @@ +element = $element; + parent::__construct($message, $code, $previous); + } + + /** + * Get the element that caused the exception. + * + * @return \DOMElement + */ + public function getElement() + { + return $this->element; + } +} diff --git a/include/svg-sanitizer/Helper.php b/include/svg-sanitizer/Helper.php new file mode 100644 index 000000000..6e25003cc --- /dev/null +++ b/include/svg-sanitizer/Helper.php @@ -0,0 +1,53 @@ +hasAttribute('href')) { + return $element->getAttribute('href'); + } + if ($element->hasAttributeNS('http://www.w3.org/1999/xlink', 'href')) { + return $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href'); + } + return null; + } + + /** + * @param string $href + * @return string|null + */ + public static function extractIdReferenceFromHref($href) + { + if (!is_string($href) || strpos($href, '#') !== 0) { + return null; + } + return substr($href, 1); + } + + /** + * @param \DOMElement $needle + * @param \DOMElement $haystack + * @return bool + */ + public static function isElementContainedIn(\DOMElement $needle, \DOMElement $haystack) + { + if ($needle === $haystack) { + return true; + } + foreach ($haystack->childNodes as $childNode) { + if (!$childNode instanceof \DOMElement) { + continue; + } + if (self::isElementContainedIn($needle, $childNode)) { + return true; + } + } + return false; + } +} diff --git a/include/svg-sanitizer/Sanitizer.php b/include/svg-sanitizer/Sanitizer.php new file mode 100644 index 000000000..8f027e314 --- /dev/null +++ b/include/svg-sanitizer/Sanitizer.php @@ -0,0 +1,747 @@ +allowedAttrs = array_map('strtolower', AllowedAttributes::getAttributes()); + $this->allowedTags = array_map('strtolower', AllowedTags::getTags()); + } + + /** + * Set up the DOMDocument + */ + protected function resetInternal() + { + $this->xmlDocument = new \DOMDocument(); + $this->xmlDocument->preserveWhiteSpace = false; + $this->xmlDocument->strictErrorChecking = false; + $this->xmlDocument->formatOutput = !$this->minifyXML; + } + + /** + * Set XML options to use when saving XML + * See: DOMDocument::saveXML + * + * @param int $xmlOptions + */ + public function setXMLOptions($xmlOptions) + { + $this->xmlOptions = $xmlOptions; + } + + /** + * Get XML options to use when saving XML + * See: DOMDocument::saveXML + * + * @return int + */ + public function getXMLOptions() + { + return $this->xmlOptions; + } + + /** + * Get the array of allowed tags + * + * @return array + */ + public function getAllowedTags() + { + return $this->allowedTags; + } + + /** + * Set custom allowed tags + * + * @param TagInterface $allowedTags + */ + public function setAllowedTags(TagInterface $allowedTags) + { + $this->allowedTags = array_map('strtolower', $allowedTags::getTags()); + } + + /** + * Get the array of allowed attributes + * + * @return array + */ + public function getAllowedAttrs() + { + return $this->allowedAttrs; + } + + /** + * Set custom allowed attributes + * + * @param AttributeInterface $allowedAttrs + */ + public function setAllowedAttrs(AttributeInterface $allowedAttrs) + { + $this->allowedAttrs = array_map('strtolower', $allowedAttrs::getAttributes()); + } + + /** + * Should we remove references to remote files? + * + * @param bool $removeRemoteRefs + */ + public function removeRemoteReferences($removeRemoteRefs = false) + { + $this->removeRemoteReferences = $removeRemoteRefs; + } + + /** + * Get XML issues. + * + * @return array + */ + public function getXmlIssues() { + return $this->xmlIssues; + } + + /** + * Can we allow huge files? + * + * @return bool + */ + public function getAllowHugeFiles() { + return $this->allowHugeFiles; + } + + /** + * Set whether we can allow huge files. + * + * @param bool $allowHugeFiles + */ + public function setAllowHugeFiles( $allowHugeFiles ) { + $this->allowHugeFiles = $allowHugeFiles; + } + + + /** + * Sanitize the passed string + * + * @param string $dirty + * @return string|false + */ + public function sanitize($dirty) + { + // Don't run on an empty string + if (empty($dirty)) { + return ''; + } + + do { + /* + * recursively remove php tags because they can be hidden inside tags + * i.e. hp echo . ' danger! ';?> + */ + $dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty); + } while (preg_match('/<\?(=|php)(.+?)\?>/i', $dirty) != 0); + + $this->resetInternal(); + $this->setUpBefore(); + + $loaded = $this->xmlDocument->loadXML($dirty, $this->getAllowHugeFiles() ? LIBXML_PARSEHUGE : 0); + + // If we couldn't parse the XML then we go no further. Reset and return false + if (!$loaded) { + $this->xmlIssues = self::getXmlErrors(); + $this->resetAfter(); + return false; + } + + // Pre-process all identified elements + $xPath = new XPath($this->xmlDocument); + $this->elementReferenceResolver = new Resolver($xPath, $this->useNestingLimit); + $this->elementReferenceResolver->collect(); + $elementsToRemove = $this->elementReferenceResolver->getElementsToRemove(); + + // Start the cleaning process + $this->startClean($this->xmlDocument->childNodes, $elementsToRemove); + + // Save cleaned XML to a variable + if ($this->removeXMLTag) { + $clean = $this->xmlDocument->saveXML($this->xmlDocument->documentElement, $this->xmlOptions); + } else { + $clean = $this->xmlDocument->saveXML($this->xmlDocument, $this->xmlOptions); + } + + $this->resetAfter(); + + // Remove any extra whitespaces when minifying + if ($this->minifyXML) { + $clean = preg_replace('/\s+/', ' ', $clean); + } + + // Return result + return $clean; + } + + /** + * Set up libXML before we start + */ + protected function setUpBefore() + { + // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is + // disabled by default, so this function is no longer needed to protect against XXE attacks. + if (\LIBXML_VERSION < 20900) { + // Turn off the entity loader + $this->xmlLoaderValue = libxml_disable_entity_loader(true); + } + + // Suppress the errors because we don't really have to worry about formation before cleansing. + // See reset in resetAfter(). + $this->xmlErrorHandlerPreviousValue = libxml_use_internal_errors(true); + + // Reset array of altered XML + $this->xmlIssues = array(); + } + + /** + * Reset the class after use + */ + protected function resetAfter() + { + // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is + // disabled by default, so this function is no longer needed to protect against XXE attacks. + if (\LIBXML_VERSION < 20900) { + // Reset the entity loader + libxml_disable_entity_loader($this->xmlLoaderValue); + } + + libxml_clear_errors(); + libxml_use_internal_errors($this->xmlErrorHandlerPreviousValue); + } + + /** + * Start the cleaning with tags, then we move onto attributes and hrefs later + * + * @param \DOMNodeList $elements + * @param array $elementsToRemove + */ + protected function startClean(\DOMNodeList $elements, array $elementsToRemove) + { + // loop through all elements + // we do this backwards so we don't skip anything if we delete a node + // see comments at: http://php.net/manual/en/class.domnamednodemap.php + for ($i = $elements->length - 1; $i >= 0; $i--) { + /** @var \DOMElement $currentElement */ + $currentElement = $elements->item($i); + + /** + * If the element has exceeded the nesting limit, we should remove it. + * + * As it's only elements that cause us issues with nesting DOS attacks + * we should check what the element is before removing it. For now we'll only + * remove elements. + */ + if (in_array($currentElement, $elementsToRemove) && 'use' === $currentElement->nodeName) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Invalid \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + + if ($currentElement instanceof \DOMElement) { + // If the tag isn't in the whitelist, remove it and continue with next iteration + if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + + $this->cleanHrefs( $currentElement ); + + $this->cleanXlinkHrefs( $currentElement ); + + $this->cleanAttributesOnWhitelist($currentElement); + + if (strtolower($currentElement->tagName) === 'use') { + if ($this->isUseTagDirty($currentElement) + || $this->isUseTagExceedingThreshold($currentElement) + ) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + } + + // Strip out font elements that will break out of foreign content. + if (strtolower($currentElement->tagName) === 'font') { + $breaksOutOfForeignContent = false; + for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) { + // get attribute name + $attrName = $currentElement->attributes->item( $x )->nodeName; + + if (in_array(strtolower($attrName), ['face', 'color', 'size'])) { + $breaksOutOfForeignContent = true; + } + } + + if ($breaksOutOfForeignContent) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', + 'line' => $currentElement->getLineNo(), + ); + continue; + } + } + } + + $this->cleanUnsafeNodes($currentElement); + + if ($currentElement->hasChildNodes()) { + $this->startClean($currentElement->childNodes, $elementsToRemove); + } + } + } + + /** + * Only allow attributes that are on the whitelist + * + * @param \DOMElement $element + */ + protected function cleanAttributesOnWhitelist(\DOMElement $element) + { + for ($x = $element->attributes->length - 1; $x >= 0; $x--) { + // get attribute name + $attrName = $element->attributes->item($x)->nodeName; + + // Remove attribute if not in whitelist + if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) { + + $element->removeAttribute($attrName); + $this->xmlIssues[] = array( + 'message' => 'Suspicious attribute \'' . $attrName . '\'', + 'line' => $element->getLineNo(), + ); + } + + /** + * This is used for when a namespace isn't imported properly. + * Such as xlink:href when the xlink namespace isn't imported. + * We have to do this as the link is still ran in this case. + */ + if (false !== stripos($attrName, 'href')) { + $href = $element->getAttribute($attrName); + if (false === $this->isHrefSafeValue($href)) { + $element->removeAttribute($attrName); + $this->xmlIssues[] = array( + 'message' => 'Suspicious attribute \'href\'', + 'line' => $element->getLineNo(), + ); + } + } + + // Do we want to strip remote references? + if($this->removeRemoteReferences) { + // Remove attribute if it has a remote reference + if (isset($element->attributes->item($x)->value) && $this->hasRemoteReference($element->attributes->item($x)->value)) { + $element->removeAttribute($attrName); + $this->xmlIssues[] = array( + 'message' => 'Suspicious attribute \'' . $attrName . '\'', + 'line' => $element->getLineNo(), + ); + } + } + } + } + + /** + * Clean the xlink:hrefs of script and data embeds + * + * @param \DOMElement $element + */ + protected function cleanXlinkHrefs(\DOMElement $element) + { + foreach ($element->attributes as $attribute) { + // remove attributes with unexpected namespace prefix, e.g. `XLinK:href` (instead of `xlink:href`) + if ($attribute->prefix === '' && strtolower($attribute->nodeName) === 'xlink:href') { + $element->removeAttribute($attribute->nodeName); + $this->xmlIssues[] = array( + 'message' => sprintf('Unexpected attribute \'%s\'', $attribute->nodeName), + 'line' => $element->getLineNo(), + ); + } + } + $this->cleanHrefAttributes($element, 'xlink'); + } + + /** + * Clean the hrefs of script and data embeds + * + * @param \DOMElement $element + */ + protected function cleanHrefs(\DOMElement $element) + { + $this->cleanHrefAttributes($element); + } + + protected function cleanHrefAttributes(\DOMElement $element, string $prefix = ''): void + { + $relevantAttributes = array_filter( + iterator_to_array($element->attributes), + static function (\DOMAttr $attr) use ($prefix) { + return strtolower($attr->name) === 'href' && strtolower($attr->prefix) === $prefix; + } + ); + foreach ($relevantAttributes as $attribute) { + if (!$this->isHrefSafeValue($attribute->value)) { + $element->removeAttribute($attribute->nodeName); + $this->xmlIssues[] = array( + 'message' => sprintf('Suspicious attribute \'%s\'', $attribute->nodeName), + 'line' => $element->getLineNo(), + ); + continue; + } + // in case the attribute name is `HrEf`/`xlink:HrEf`, adjust it to `href`/`xlink:href` + if (!in_array($attribute->nodeName, $this->allowedAttrs, true) + && in_array(strtolower($attribute->nodeName), $this->allowedAttrs, true) + ) { + $element->removeAttribute($attribute->nodeName); + $element->setAttribute(strtolower($attribute->nodeName), $attribute->value); + } + } + } + + /** + * Only allow whitelisted starts to be within the href. + * + * This will stop scripts etc from being passed through, with or without attempting to hide bypasses. + * This stops the need for us to use a complicated script regex. + * + * @param $value + * @return bool + */ + protected function isHrefSafeValue($value) { + + // Allow empty values + if (empty($value)) { + return true; + } + + // Allow fragment identifiers. + if ('#' === substr($value, 0, 1)) { + return true; + } + + // Allow relative URIs. + if ('/' === substr($value, 0, 1)) { + return true; + } + + // Allow HTTPS domains. + if ('https://' === substr($value, 0, 8)) { + return true; + } + + // Allow HTTP domains. + if ('http://' === substr($value, 0, 7)) { + return true; + } + + // Allow known data URIs. + if (in_array(substr($value, 0, 14), array( + 'data:image/png', // PNG + 'data:image/gif', // GIF + 'data:image/jpg', // JPG + 'data:image/jpe', // JPEG + 'data:image/pjp', // PJPEG + ))) { + return true; + } + + // Allow known short data URIs. + if (in_array(substr($value, 0, 12), array( + 'data:img/png', // PNG + 'data:img/gif', // GIF + 'data:img/jpg', // JPG + 'data:img/jpe', // JPEG + 'data:img/pjp', // PJPEG + ))) { + return true; + } + + return false; + } + + /** + * Removes non-printable ASCII characters from string & trims it + * + * @param string $value + * @return bool + */ + protected function removeNonPrintableCharacters($value) + { + return trim(preg_replace('/[^ -~]/xu','',$value)); + } + + /** + * Does this attribute value have a remote reference? + * + * @param $value + * @return bool + */ + protected function hasRemoteReference($value) + { + $value = $this->removeNonPrintableCharacters($value); + + $wrapped_in_url = preg_match('~^url\(\s*[\'"]\s*(.*)\s*[\'"]\s*\)$~xi', $value, $match); + if (!$wrapped_in_url){ + return false; + } + + $value = trim($match[1], '\'"'); + + return preg_match('~^((https?|ftp|file):)?//~xi', $value); + } + + /** + * Should we minify the output? + * + * @param bool $shouldMinify + */ + public function minify($shouldMinify = false) + { + $this->minifyXML = (bool) $shouldMinify; + } + + /** + * Should we remove the XML tag in the header? + * + * @param bool $removeXMLTag + */ + public function removeXMLTag($removeXMLTag = false) + { + $this->removeXMLTag = (bool) $removeXMLTag; + } + + /** + * Whether `` elements shall be + * removed in case expansion would exceed this threshold. + * + * @param int $useThreshold + */ + public function useThreshold($useThreshold = 1000) + { + $this->useThreshold = (int)$useThreshold; + } + + /** + * Check to see if an attribute is an aria attribute or not + * + * @param $attributeName + * + * @return bool + */ + protected function isAriaAttribute($attributeName) + { + return strpos($attributeName, 'aria-') === 0; + } + + /** + * Check to see if an attribute is an data attribute or not + * + * @param $attributeName + * + * @return bool + */ + protected function isDataAttribute($attributeName) + { + return strpos($attributeName, 'data-') === 0; + } + + /** + * Make sure our use tag is only referencing internal resources + * + * @param \DOMElement $element + * @return bool + */ + protected function isUseTagDirty(\DOMElement $element) + { + $href = Helper::getElementHref($element); + return $href && strpos($href, '#') !== 0; + } + + /** + * Determines whether `` is expanded + * recursively in order to create DoS scenarios. The amount of a actually + * used element needs to be below `$this->useThreshold`. + * + * @param \DOMElement $element + * @return bool + */ + protected function isUseTagExceedingThreshold(\DOMElement $element) + { + if ($this->useThreshold <= 0) { + return false; + } + $useId = Helper::extractIdReferenceFromHref( + Helper::getElementHref($element) + ); + if ($useId === null) { + return false; + } + foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) { + if ($subject->countUse() >= $this->useThreshold) { + return true; + } + } + return false; + } + + /** + * Set the nesting limit for tags. + * + * @param $limit + */ + public function setUseNestingLimit($limit) + { + $this->useNestingLimit = (int) $limit; + } + + /** + * Remove nodes that are either invalid or malformed. + * + * @param \DOMNode $currentElement The current element. + */ + protected function cleanUnsafeNodes(\DOMNode $currentElement) { + // Replace CDATA node with encoded text node + if ($currentElement instanceof \DOMCdataSection) { + $textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue); + $currentElement->parentNode->replaceChild($textNode, $currentElement); + // If the element doesn't have a tagname, remove it and continue with next iteration + } elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) { + $currentElement->parentNode->removeChild($currentElement); + $this->xmlIssues[] = array( + 'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'', + 'line' => $currentElement->getLineNo(), + ); + return; + } + + if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) { + for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) { + /** @var \DOMElement $childElement */ + $childElement = $currentElement->childNodes->item($j); + $this->cleanUnsafeNodes($childElement); + } + } + } + + /** + * Retrieve array of errors + * @return array + */ + private static function getXmlErrors() + { + $errors = []; + foreach (libxml_get_errors() as $error) { + $errors[] = [ + 'message' => trim($error->message), + 'line' => $error->line, + ]; + } + + return $errors; + } +} diff --git a/include/svg-sanitizer/data/AllowedAttributes.php b/include/svg-sanitizer/data/AllowedAttributes.php new file mode 100644 index 000000000..71216706d --- /dev/null +++ b/include/svg-sanitizer/data/AllowedAttributes.php @@ -0,0 +1,357 @@ +handleDefaultNamespace(); + } + + /** + * @param string $nodeName + * @return string + */ + public function createNodeName($nodeName) + { + if (empty($this->defaultNamespaceURI)) { + return $nodeName; + } + return self::DEFAULT_NAMESPACE_PREFIX . ':' . $nodeName; + } + + protected function handleDefaultNamespace() + { + $rootElements = $this->getRootElements(); + + if (count($rootElements) !== 1) { + throw new \LogicException( + sprintf('Got %d svg elements, expected exactly one', count($rootElements)), + 1570870568 + ); + } + $this->defaultNamespaceURI = (string)$rootElements[0]->namespaceURI; + + if ($this->defaultNamespaceURI !== '') { + $this->registerNamespace(self::DEFAULT_NAMESPACE_PREFIX, $this->defaultNamespaceURI); + } + } + + /** + * @return \DOMElement[] + */ + protected function getRootElements() + { + $rootElements = []; + $elements = $this->document->getElementsByTagName('svg'); + /** @var \DOMElement $element */ + foreach ($elements as $element) { + if ($element->parentNode !== $this->document) { + continue; + } + $rootElements[] = $element; + } + return $rootElements; + } +} diff --git a/include/svg-sanitizer/svg-scanner.php b/include/svg-sanitizer/svg-scanner.php new file mode 100644 index 000000000..e5007713c --- /dev/null +++ b/include/svg-sanitizer/svg-scanner.php @@ -0,0 +1,192 @@ +#!/usr/bin/env php + array( + 'errors' => 0, + ), + + 'files' => array( + ), +); + + +/* + * Catch files to scan from $argv. + */ + +$files_to_scan = $argv; +unset( $files_to_scan[0] ); + +$files_to_scan = array_values( + $files_to_scan +); + +/* + * Catch no file specified. + */ + +if ( empty( $files_to_scan ) ) { + $results['totals']['errors']++; + $results['messages'] = array( + array( 'No files to scan specified' ), + ); + + sysexit( + $results, + 1 + ); +} + +/* + * Initialize the SVG scanner. + * + * Make sure to allow custom attributes, + * and to remove remote references. + */ +$sanitizer = new enshrined\svgSanitize\Sanitizer(); + +$sanitizer->removeRemoteReferences( true ); + +/* + * Scan each file specified to be scanned. + */ + +foreach( $files_to_scan as $file_name ) { + /* + * Read SVG file. + */ + $svg_file = @file_get_contents( $file_name ); + + /* + * If not found, report that and continue. + */ + if ( false === $svg_file ) { + $results['totals']['errors']++; + + $results['files'][ $file_name ][] = array( + 'errors' => 1, + 'messages' => array( + array( + 'message' => 'File specified could not be read (' . $file_name . ')', + 'line' => null, + ), + ), + ); + + continue; + } + + /* + * Sanitize file and get issues found. + */ + $sanitize_status = $sanitizer->sanitize( $svg_file ); + + $xml_issues = $sanitizer->getXmlIssues(); + + /* + * If we find no issues, simply note that. + */ + if ( empty( $xml_issues ) && ( false !== $sanitize_status ) ) { + $results['files'][ $file_name ] = array( + 'errors' => 0, + 'messages' => array() + ); + } + + /* + * Could not sanitize the file. + */ + else if ( + ( '' === $sanitize_status ) || + ( false === $sanitize_status ) + ) { + $results['totals']['errors']++; + + $results['files'][ $file_name ] = array( + 'errors' => 1, + 'messages' => array( + array( + 'message' => 'Unable to sanitize file \'' . $file_name . '\'' , + 'line' => null, + ) + ), + ); + } + + /* + * If we find issues, note it and update statistics. + */ + + else { + $results['totals']['errors'] += count( $xml_issues ); + + $results['files'][ $file_name ] = array( + 'errors' => count( $xml_issues ), + 'messages' => $xml_issues, + ); + } + + unset( $svg_file ); + unset( $xml_issues ); + unset( $sanitize_status ); +} + + +/* + * Exit with a status + * that reflects what issues + * we found. + */ +sysexit( + $results, + ( $results['totals']['errors'] === 0 ? 0 : 1 ) +);