OwlCyberSecurity - MANAGER

Edit File: ValidationHandler.php

<?php

namespace AmpProject\Validator;

use AmpProject\Format;
use AmpProject\Html\Parser\DocLocator;
use AmpProject\Html\Parser\HtmlSaxHandlerWithLocation;
use AmpProject\Html\Parser\ParsedAttribute;
use AmpProject\Html\Parser\ParsedTag;
use AmpProject\Html\UpperCaseTag as Tag;
use AmpProject\Str;
use AmpProject\Validator\Spec\Error\DisallowedManufacturedBody;
use AmpProject\Validator\Spec\Error\DuplicateAttribute;
use AmpProject\Validator\Spec\Error\DuplicateUniqueTag;
use AmpProject\Validator\Spec\Error\InvalidDoctypeHtml;

/**
 * Validation Handler which accepts callbacks from HTML Parser.
 *
 * @package ampproject/amp-toolbox
 */
final class ValidationHandler implements HtmlSaxHandlerWithLocation
{
    /**
     * AMP HTML format to validate against.
     *
     * @var string
     */
    private $htmlFormat;

/**
     * Selection of validation rules to use.
     *
     * @var ValidatorRules
     */
    private $rules;

/**
     * Validator specification to use.
     *
     * @var Spec
     */
    private $spec;

/**
     * Validation context.
     *
     * @var Context
     */
    private $context;

/**
     * Result of the validation.
     *
     * @var ValidationResult
     */
    private $validationResult;

public function __construct($htmlFormat = Format::AMP, ?Spec $spec = null)
    {
        $this->htmlFormat       = $htmlFormat;
        $this->spec             = $spec instanceof Spec ? $spec : new Spec();
        $this->validationResult = new ValidationResult();
        $this->rules            = new ValidatorRules($htmlFormat, $spec);
        $this->context          = new Context($this->rules);
    }

/**
     * Get the validation result.
     *
     * @return ValidationResult Validation result.
     */
    public function getResult()
    {
        return $this->validationResult;
    }

/**
     * Handler called when the parser found a new tag.
     *
     * @param ParsedTag $tag New tag that was found.
     * @return void
     */
    public function startTag(ParsedTag $tag)
    {
        if ($tag->upperName() === Tag::HTML) {
            $this->context->getRules()->validateHtmlTag($tag, $this->context, $this->validationResult);
        }
        if ($tag->upperName() === Tag::_DOCTYPE) {
            $this->validateDocType($tag);
            // Even though validateDocType emits all necessary errors about the tag, we continue to process it further
            // (validateTag and such) so that we can record the tag was present and record it as the root pseudo element
            // for the document.
        }
        $maybeDuplicateAttributeName = $tag->hasDuplicateAttributes();
        if ($maybeDuplicateAttributeName !== null) {
            $this->context->addWarning(
                DuplicateAttribute::CODE,
                $this->context->getFilePosition(),
                [$tag->lowerName(), $maybeDuplicateAttributeName],
                '',
                $this->validationResult
            );
            $tag->dedupeAttributes();
        }

if ($tag->upperName() === Tag::BODY) {
            $this->context->recordBodyTag($tag->attributes());
            $this->emitMissingExtensionErrors();
        }

/*
        /** @type {ValidateTagResult} * /
        let resultForReferencePoint = {
        bestMatchTagSpec: null,
        validationResult: new generated.ValidationResult(),
        devModeSuppress: false,
        inlineStyleCssBytes: 0,
        };
        resultForReferencePoint.validationResult->status =
        generated.ValidationResult.Status.UNKNOWN;
        const referencePointMatcher =
        $this->context->getTagStack()->parentReferencePointMatcher();
        // We must match the reference point before the TagSpec, as otherwise we
        // will end up with "unexplained" attributes during tagspec matching
        // which the reference point takes care of.
        if (referencePointMatcher !== null) {
        resultForReferencePoint =
        referencePointMatcher.validateTag($tag, $this->context);
        }

const resultForTag = validateTag(
        $tag, resultForReferencePoint.bestMatchTagSpec,
        $this->context);
        resultForTag.devModeSuppress =
        ShouldSuppressDevModeErrors($tag, $this->context);
        // Only merge in the reference point errors into the final result if the
        // tag otherwise passes one of the TagSpecs. Otherwise, we end up with
        // unnecessarily verbose errors.
        if (referencePointMatcher !== null &&
        resultForTag.validationResult->status ===
        generated.ValidationResult.Status.PASS &&
        !resultForTag.devModeSuppress) {
        $this->validationResult->mergeFrom(
        resultForReferencePoint.validationResult);
        }

checkForReferencePointCollision(
        resultForReferencePoint.bestMatchTagSpec, resultForTag.bestMatchTagSpec,
        $this->context, resultForTag.validationResult);
        if (!resultForTag.devModeSuppress)
        $this->validationResult->mergeFrom(resultForTag.validationResult);

$this->context->updateFromTagResults(
        $tag, resultForReferencePoint, resultForTag);
        */
    }

/**
     * Handler called when the parser found a closing tag.
     *
     * @param ParsedTag $tag Closing tag that was found.
     * @return void
     */
    public function endTag(ParsedTag $tag)
    {
        // TODO: Implement endTag() method.
    }

/**
     * Handler called when PCDATA is found.
     *
     * @param string $text The PCDATA that was found.
     * @return void
     */
    public function pcdata($text)
    {
        // TODO: Implement pcdata() method.
    }

/**
     * Handler called when RCDATA is found.
     *
     * @param string $text The RCDATA that was found.
     * @return void
     */
    public function rcdata($text)
    {
        // TODO: Implement rcdata() method.
    }

/**
     * Handler called when CDATA is found.
     *
     * @param string $text The CDATA that was found.
     * @return void
     */
    public function cdata($text)
    {
        // TODO: Implement cdata() method.
    }

/**
     * Handler called when the parser is starting to parse the document.
     *
     * @return void
     */
    public function startDoc()
    {
        $this->validationResult = new ValidationResult();
    }

/**
     * Handler called when the parsing is done.
     *
     * @return void
     */
    public function endDoc()
    {
        $this->rules->maybeEmitGlobalTagValidationErrors($this->context, $this->validationResult);

if ($this->validationResult->getStatus()->equals(ValidationStatus::UNKNOWN())) {
            $this->validationResult->setStatus(ValidationStatus::PASS());
        }

// As some errors can be inserted out of order, sort errors at the end based on their line / column numbers.
        $this->validationResult->getErrors()->sortByPosition();
    }

/**
     * Callback for informing that the parser is manufacturing a <body> tag not actually found on the page. This will be
     * followed by a startTag() with the actual body tag in question.
     *
     * @return void
     */
    public function markManufacturedBody()
    {
        $this->context->addError(
            DisallowedManufacturedBody::CODE,
            $this->context->getFilePosition(),
            [],
            '',
            $this->validationResult
        );
    }

/**
     * HTML5 defines how parsers treat documents with multiple body tags: they merge the attributes from the later ones
     * into the first one. Therefore, just before the parser sends the endDoc event, it will also send this event which
     * will provide the attributes from the effective body tag to the client (the handler).
     *
     * @param array<ParsedAttribute> $attributes Array of parsed attributes.
     * @return void
     */
    public function effectiveBodyTag($attributes)
    {
        $encounteredAttributes = $this->context->getEncounteredBodyAttributes();

// If we never recorded a body tag with attributes, it was manufactured, in which case we've already logged an
        // error for that. Doing more here would be confusing.
        if ($encounteredAttributes === null) {
            return;
        }

// So now we compare the attributes from the tag that we encountered (HtmlParser sent us a startTag() event for
        // it earlier) with the attributes from the effective body tag that we're just receiving now, which contains all
        // attributes on body tags within the doc. It's correct to think of this synthetic tag simply as a concatenation
        // - there is in general no elimination of duplicate attributes or overriding behavior. Thus, if the second body
        // tag has any attribute this will result in an error.
        $differenceSeen = count($attributes) !== count($encounteredAttributes);
        if (! $differenceSeen) {
            $attributesCount = count($attributes);
            for ($index = 0; $index < $attributesCount; $index++) {
                if ($attributes[$index] !== $encounteredAttributes[$index]) {
                    $differenceSeen = true;
                    break;
                }
            }
        }

if (! $differenceSeen) {
            return;
        }

$this->context->addError(
            DuplicateUniqueTag::CODE,
            $this->context->getEncounteredBodyFilePosition(),
            [Tag::BODY],
            '',
            $this->validationResult
        );
    }

/**
     * Called prior to parsing a document, that is, before startTag().
     *
     * @param DocLocator $locator A locator instance which provides access to the line/column information while SAX
     *                            events are being received by the handler.
     * @return void
     */
    public function setDocLocator(DocLocator $locator)
    {
        $this->context->setDocLocator($locator);
    }

/**
     * While parsing the document HEAD, we may accumulate errors which depend on seeing later extension <script> tags.
     */
    private function emitMissingExtensionErrors()
    {
        foreach ($this->context->getExtensionsContext()->getMissingExtensionErrors() as $missingExtensionError) {
            $this->context->recordError($missingExtensionError, $this->validationResult);
        }
    }

/**
     * Validate the <!doctype> tag.
     *
     * Currently, the HTML parser considers Doctype to be another HTML tag, which is not technically accurate. There is
     * special handling for doctype in Javascript which applies to all AMP formats, as this is strict handling for all
     * HTML in general. Specifically "attributes" are not allowed, even things like `data-foo`.
     *
     * @param ParsedTag $tag The <!doctype> tag to validate.
     */
    private function validateDocType(ParsedTag $tag)
    {
        $attributes = $tag->attributes();

// <!doctype html> - OK
        if (count($attributes) === 1 && $attributes[0]->name() === 'html') {
            return;
        }

// <!doctype html lang=...> OK
        // This is technically invalid. The 'correct' way to do this is to emit the
        // lang attribute on the `<html>` tag. However, we observe a number of
        // websites incorrectly emitting `lang` as part of doctype, so this specific
        // attribute is allowed to avoid breaking existing pages.
        if (
            count($attributes) === 2
            &&
            (
                ($attributes[0]->name() === 'html' && $attributes[1]->name() === 'lang')
                ||
                ($attributes[0]->name() === 'lang' && $attributes[1]->name() === 'html')
            )
        ) {
            return;
        }

if (count($attributes) !== 1 || $attributes[0]->name() !== 'html') {
            $this->context->addError(
                InvalidDoctypeHtml::CODE,
                $this->context->getFilePosition(),
                [],
                'https://amp.dev/documentation/guides-and-tutorials/start/create/basic_markup/',
                $this->validationResult
            );
        }
    }

/**
     * Validates the provided `ParsedHtmlTag` with respect to the tag specifications in the validator's rules, returning
     * a `ValidationResult` with errors for this tag and a PASS or FAIL status. At least one specification must
     * validate, or the result will have status `FAIL`.
     * Also passes back a reference to the tag spec which matched, if a match was found.
     * Context is not mutated; instead, pending mutations are stored in the return value, and are merged only if the tag
     * spec is applied (pending some reference point stuff).
     *
     * @param ParsedTag          $encounteredTag          Tag that was encountered.
     * @param ParsedTagSpec|null $bestMatchReferencePoint Reference point for the best match.
     * @param Context            $context                 Validation context.
     * @return ValidateTagResult
     */
    private function validateTag($encounteredTag, $bestMatchReferencePoint, $context)
    {
        $tagSpecDispatch = $context->getRules()->dispatchForTagName($encounteredTag->upperName());
        // Filter TagSpecDispatch.AllTagSpecs by type identifiers.
        $filteredTagSpecs = [];
        if ($tagSpecDispatch !== null) {
            foreach ($tagSpecDispatch->allTagSpecs() as $tagSpecId) {
                $parsedTagSpec = $context->getRules()->getByTagSpecId($tagSpecId);
                // Keep TagSpecs that are used for these type identifiers.
                if ($parsedTagSpec->isUsedForTypeIdentifiers($context->getTypeIdentifiers())) {
                    $filteredTagSpecs[] = $parsedTagSpec;
                }
            }
        }

// If there are no dispatch keys matching the tag name, ex: tag name is "foo", set a disallowed tag error.
        if (
            $tagSpecDispatch === null
            ||
            (! $tagSpecDispatch->hasDispatchKeys() && count($filteredTagSpecs) === 0)
        ) {
            $result  = new ValidationResult();
            $specUrl = '';
            // Special case the spec_url for font tags to be slightly more useful.
            if ($encounteredTag->upperName() === Tag::FONT) {
                $specUrl = $context->getRules()->getStylesSpecUrl();
            }
            $context->addError(
                ErrorCode::DISALLOWED_TAG,
                $context->getFilePosition(),
                [$encounteredTag->lowerName()],
                $specUrl,
                $result
            );

return new ValidateTagResult($result);
        }

// At this point, we have dispatch keys, tag specs, or both.
        // The strategy is to look for a matching dispatch key first. A matching dispatch key does not guarantee that
        // the dispatched tag spec will also match. If we find a matching dispatch key, we immediately return the result
        // for that tag spec, success or fail.
        // If we don't find a matching dispatch key, we must try all the tag specs to see if any of them match. If there
        // are no tag specs, we want to return a `GENERAL_DISALLOWED_TAG` error.
        // Calling `hasDispatchKeys()` here is only an optimization to skip the loop over encountered attributes in the
        // case where we have no dispatches.
        if ($tagSpecDispatch->hasDispatchKeys()) {
            foreach ($encounteredTag->attributes() as $attribute) {
                $tagSpecIds  = $tagSpecDispatch->matchingDispatchKey(
                    $attribute->name(),
                    // Attribute values are case-sensitive by default, but we match dispatch keys in a case-insensitive
                    // manner and then validate using whatever the tag spec requests.
                    Str::toLowerCase($attribute->value()),
                    $context->getTagStack()->parentTagName()
                );
                $bestAttempt = new ValidateTagResult(new ValidationResult());
                $bestAttempt->getValidationResult()->setStatus(ValidationStatus::UNKNOWN());
                foreach ($tagSpecIds as $tagSpecId) {
                    $parsedTagSpec = $context->getRules()->getByTagSpecId($tagSpecId);
                    // Skip tag specs that aren't used for these type identifiers.
                    if (! $parsedTagSpec->isUsedForTypeIdentifiers($context->getTypeIdentifiers())) {
                        continue;
                    }
                    $attempt = $this->validateTagAgainstSpec(
                        $parsedTagSpec,
                        $bestMatchReferencePoint,
                        $context,
                        $encounteredTag
                    );
                    if (
                        $context->getRules()->betterValidationResultThan(
                            $attempt->getValidationResult(),
                            $bestAttempt->getValidationResult()
                        )
                    ) {
                        $bestAttempt                   = $attempt;
                        $bestAttempt->bestMatchTagSpec = $parsedTagSpec;
                        // Exit early on success.
                        if ($bestAttempt->getValidationResult()->getStatus()->equals(ValidationStatus::PASS())) {
                            return $bestAttempt;
                        }
                    }
                }
                if (! $bestAttempt->getValidationResult()->getStatus()->equals(ValidationStatus::UNKNOWN())) {
                    return $bestAttempt;
                }
            }
        }

// None of the dispatch tag specs matched and passed. If there are no non-dispatch tag specs, consider this a
        // 'generally' disallowed tag, which gives an error that reads "tag foo is disallowed except in specific forms".
        if (count($filteredTagSpecs) === 0) {
            $result = new ValidationResult();
            if ($encounteredTag->upperName() === Tag::SCRIPT) {
                // Special case for `<script>` tags to produce better error messages.
                $context->addError(
                    ErrorCode::DISALLOWED_SCRIPT_TAG,
                    $context->getFilePosition(),
                    [],
                    $context->getRules()->getScriptSpecUrl(),
                    $result
                );
            } else {
                $context->addError(
                    ErrorCode::GENERAL_DISALLOWED_TAG,
                    $context->getFilePosition(),
                    [$encounteredTag->lowerName()],
                    '',
                    $result
                );
            }

return new ValidateTagResult($result);
        }

// Validate against all remaining tag specs. Each tag spec will produce a different set of errors. Even if none
        // of them match, we only want to return errors from a single tag spec, not all of them. We keep around the
        // 'best' attempt until we have found a matching tag spec or have tried them all.
        $bestAttempt = new ValidateTagResult(new ValidationResult());
        $bestAttempt->getValidationResult()->setStatus(ValidationStatus::UNKNOWN());
        foreach ($filteredTagSpecs as $parsedTagSpec) {
            $attempt = $this->validateTagAgainstSpec(
                $parsedTagSpec,
                $bestMatchReferencePoint,
                $context,
                $encounteredTag
            );
            if (
                $context->getRules()->betterValidationResultThan(
                    $attempt->getValidationResult(),
                    $bestAttempt->getValidationResult()
                )
            ) {
                $bestAttempt                   = $attempt;
                $bestAttempt->bestMatchTagSpec = $parsedTagSpec;
                // Exit early.
                if ($bestAttempt->getValidationResult()->getStatus()->equals(ValidationStatus::PASS())) {
                    return $bestAttempt;
                }
            }
        }

return $bestAttempt;
    }
}