package com.atlassian.confluence.search.lucene.extractor;

import com.atlassian.confluence.content.render.xhtml.storage.embed.StorageEmbeddedImageUnmarshaller;
import com.atlassian.confluence.content.render.xhtml.storage.link.StorageLinkConstants;
import com.atlassian.confluence.content.render.xhtml.storage.resource.identifiers.StorageResourceIdentifierConstants;
import com.atlassian.confluence.setup.actions.SetupEmbeddedDatabaseAction;
import com.atlassian.confluence.user.ConfluenceUser;
import com.atlassian.confluence.user.persistence.dao.compatibility.FindUserHelper;
import com.atlassian.sal.api.user.UserKey;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Deque;
import org.apache.commons.lang3.StringUtils;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:com/atlassian/confluence/search/lucene/extractor/HTMLSearchableTextExtractor.class */
public final class HTMLSearchableTextExtractor {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/atlassian/confluence/search/lucene/extractor/HTMLSearchableTextExtractor$TagStripperSAXHandler.class */
    public static final class TagStripperSAXHandler extends DefaultHandler {
        private static final String LINE_BREAK_ELEMENT = "br";
        private static final String A_HREF_ELEMENT = "a";
        private static final String HREF_ATTRIBUTE = "href";
        private final String[] elementsToIgnore;
        private boolean inLink;
        private StringBuilder linkTextContent;
        private String resourceTitle;
        private String pageTitle;
        private String hrefLink;
        private static final String[] BLOCK_LEVEL_ELEMENTS = {"address", "blockquote", "button", "dd", "div", "dl", "dt", "fieldset", "form", "h1", SetupEmbeddedDatabaseAction.DEFAULT_EVAL_DB, "h3", "h4", "h5", "h6", "hr", "li", "map", "noscript", "object", "ol", "p", "pre", "script", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul"};
        private static final String[] QNAMES_TO_IGNORE = {"ac:default-parameter", "ac:parameter", "ac:property"};
        private boolean isImage = false;
        private boolean isHref = false;
        private final StringBuilder textContent = new StringBuilder();
        private final Deque<String> qnameIgnoreStack = Lists.newLinkedList();

        public TagStripperSAXHandler(String str, String[] strArr) {
            this.pageTitle = str;
            this.elementsToIgnore = strArr;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) throws SAXException {
            if (this.qnameIgnoreStack.isEmpty()) {
                this.textContent.append(cArr, i, i2);
                if (this.inLink) {
                    this.linkTextContent.append(cArr, i, i2);
                }
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (!this.qnameIgnoreStack.isEmpty() && StringUtils.isNotBlank(str3) && this.qnameIgnoreStack.peek().equals(str3)) {
                this.qnameIgnoreStack.pop();
                return;
            }
            if (this.inLink && isLinkElement(str2)) {
                if (StringUtils.isBlank(this.linkTextContent.toString()) && StringUtils.isNotBlank(this.resourceTitle)) {
                    this.textContent.append(this.resourceTitle);
                } else if (StringUtils.isBlank(this.linkTextContent.toString()) && StringUtils.isBlank(this.resourceTitle)) {
                    this.textContent.append(this.pageTitle);
                }
                this.inLink = false;
                this.resourceTitle = null;
                return;
            }
            if (this.isImage && isImageElement(str2) && StringUtils.isNotBlank(this.resourceTitle)) {
                this.textContent.append(this.resourceTitle);
                this.isImage = false;
                this.resourceTitle = null;
            } else {
                if (!this.isHref || !isHrefElement(str2) || !StringUtils.isNotBlank(this.hrefLink)) {
                    blockElementHandling(str2);
                    return;
                }
                this.textContent.append(" " + this.hrefLink);
                this.isHref = false;
                this.hrefLink = null;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (StringUtils.isNotBlank(str3) && (Arrays.binarySearch(QNAMES_TO_IGNORE, str3) >= 0 || Arrays.binarySearch(this.elementsToIgnore, str2) >= 0)) {
                this.qnameIgnoreStack.push(str3);
                return;
            }
            if (isResourceIdentifierInLink(str3) && StringUtils.isBlank(this.resourceTitle)) {
                processResourceIdentifier(str3, attributes);
                return;
            }
            if (isLinkElement(str2)) {
                this.inLink = true;
                this.linkTextContent = new StringBuilder();
                return;
            }
            if (str2.equals(LINE_BREAK_ELEMENT)) {
                appendNewLine();
                return;
            }
            if (!str3.equals(A_HREF_ELEMENT)) {
                if (isImageElement(str2)) {
                    this.isImage = true;
                    return;
                } else {
                    blockElementHandling(str2);
                    return;
                }
            }
            this.isHref = true;
            String value = attributes.getValue(HREF_ATTRIBUTE);
            if (!StringUtils.isNotBlank(value) || value.startsWith("/")) {
                return;
            }
            this.hrefLink = value;
        }

        private boolean isResourceIdentifierInLink(String str) {
            return (this.inLink || this.isImage) && str.startsWith("ri:");
        }

        private void processResourceIdentifier(String str, Attributes attributes) {
            ConfluenceUser userByUserKey;
            if (str.contains("page")) {
                this.resourceTitle = attributes.getValue("ri:content-title");
                return;
            }
            if (str.contains(StorageResourceIdentifierConstants.BLOG_POST_ELEMENT_NAME)) {
                this.resourceTitle = attributes.getValue("ri:content-title");
                return;
            }
            if (str.contains("attachment")) {
                this.resourceTitle = attributes.getValue("ri:filename");
                return;
            }
            if (str.contains("user")) {
                String value = attributes.getValue("ri:userkey");
                if (!StringUtils.isNotBlank(value) || (userByUserKey = FindUserHelper.getUserByUserKey(new UserKey(value))) == null) {
                    return;
                }
                this.resourceTitle = userByUserKey.getName();
                return;
            }
            if (str.contains("space")) {
                this.resourceTitle = attributes.getValue("ri:space-key");
                return;
            }
            if (str.contains("shortcut")) {
                this.resourceTitle = attributes.getValue("ri:parameter") + "@" + attributes.getValue("ri:key");
            } else if (str.contains(StorageResourceIdentifierConstants.URL_ELEMENT_NAME)) {
                this.resourceTitle = attributes.getValue("ri:value");
            } else if (str.contains(StorageResourceIdentifierConstants.CONTENT_ENTITY_ELEMENT_NAME)) {
                this.resourceTitle = attributes.getValue("ri:content-id");
            }
        }

        private boolean isLinkElement(String str) {
            return StorageLinkConstants.LINK_ELEMENT.getLocalPart().equals(str);
        }

        private boolean isImageElement(String str) {
            return StorageEmbeddedImageUnmarshaller.IMAGE_ELEMENT.getLocalPart().equals(str);
        }

        private boolean isHrefElement(String str) {
            return A_HREF_ELEMENT.equals(str);
        }

        private void blockElementHandling(String str) {
            if (Arrays.binarySearch(BLOCK_LEVEL_ELEMENTS, str) >= 0) {
                appendNewLine();
            }
        }

        private void appendNewLine() {
            this.textContent.append('\n');
        }

        public String getTextContent() {
            return this.textContent.toString();
        }
    }

    public static String stripTags(String str) throws SAXException {
        return stripTags("", str, new String[0]);
    }

    public static String stripTags(String str, String[] strArr) throws SAXException {
        return stripTags("", str, strArr);
    }

    public static String stripTags(String str, String str2) throws SAXException {
        return stripTags(str, str2, new String[0]);
    }

    public static String stripTags(String str, String str2, String[] strArr) throws SAXException {
        if (str2 == null) {
            return null;
        }
        try {
            SAXParser sAXParser = new SAXParser();
            TagStripperSAXHandler tagStripperSAXHandler = new TagStripperSAXHandler(str, strArr);
            sAXParser.setContentHandler(tagStripperSAXHandler);
            InputSource inputSource = new InputSource(new StringReader(str2));
            sAXParser.setFeature("http://cyberneko.org/html/features/augmentations", true);
            sAXParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
            sAXParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            sAXParser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
            sAXParser.setFeature("http://cyberneko.org/html/features/scanner/normalize-attrs", true);
            sAXParser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true);
            sAXParser.parse(inputSource);
            return tagStripperSAXHandler.getTextContent();
        } catch (IOException e) {
            throw new SAXException("IOException while parsing the HTML source", e);
        }
    }
}
