/*
 * Decompiled with CFR 0.152.
 */
package org.noear.solon.ai.rag.loader;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import org.commonmark.node.AbstractVisitor;
import org.commonmark.node.BlockQuote;
import org.commonmark.node.Code;
import org.commonmark.node.FencedCodeBlock;
import org.commonmark.node.HardLineBreak;
import org.commonmark.node.Heading;
import org.commonmark.node.Image;
import org.commonmark.node.ListItem;
import org.commonmark.node.Node;
import org.commonmark.node.SoftLineBreak;
import org.commonmark.node.Text;
import org.commonmark.node.ThematicBreak;
import org.commonmark.node.Visitor;
import org.commonmark.parser.Parser;
import org.noear.solon.Utils;
import org.noear.solon.ai.rag.Document;
import org.noear.solon.ai.rag.loader.AbstractOptionsDocumentLoader;
import org.noear.solon.core.util.SupplierEx;
import org.noear.solon.lang.Preview;

@Preview(value="3.1")
public class MarkdownLoader
extends AbstractOptionsDocumentLoader<Options, MarkdownLoader> {
    private final SupplierEx<InputStream> source;
    private final Parser parser;

    public MarkdownLoader(byte[] source) {
        this((SupplierEx<InputStream>)((SupplierEx)() -> new ByteArrayInputStream(source)));
    }

    public MarkdownLoader(File source) {
        this((SupplierEx<InputStream>)((SupplierEx)() -> new FileInputStream(source)));
    }

    public MarkdownLoader(URL source) {
        this((SupplierEx<InputStream>)((SupplierEx)() -> source.openStream()));
    }

    public MarkdownLoader(SupplierEx<InputStream> source) {
        if (source == null) {
            throw new IllegalArgumentException("Source cannot be null");
        }
        this.source = source;
        this.parser = Parser.builder().build();
        this.options = new Options();
        this.additionalMetadata.put("type", "markdown");
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public List<Document> load() throws IOException {
        try (InputStream input = (InputStream)this.source.get();){
            Node md = this.parser.parseReader((Reader)new InputStreamReader(input));
            SplitVisitor splitVisitor = new SplitVisitor(this);
            md.accept((Visitor)splitVisitor);
            List<Document> list = splitVisitor.extract();
            return list;
        }
        catch (IOException e) {
            throw e;
        }
        catch (RuntimeException e) {
            throw e;
        }
        catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    public static class Options {
        private boolean horizontalLineAsNew;
        private boolean codeBlockAsNew;
        private boolean blockquoteAsNew;

        public Options horizontalLineAsNew(boolean horizontalLineAsNew) {
            this.horizontalLineAsNew = horizontalLineAsNew;
            return this;
        }

        public Options codeBlockAsNew(boolean codeBlockAsNew) {
            this.codeBlockAsNew = codeBlockAsNew;
            return this;
        }

        public Options blockquoteAsNew(boolean blockquoteAsNew) {
            this.blockquoteAsNew = blockquoteAsNew;
            return this;
        }
    }

    static class SplitVisitor
    extends AbstractVisitor {
        private final List<Document> documents = new ArrayList<Document>();
        private final List<String> currentParagraphs = new ArrayList<String>();
        private final MarkdownLoader loader;
        private Document currentDocument;

        SplitVisitor(MarkdownLoader loader) {
            this.loader = loader;
        }

        public void visit(org.commonmark.node.Document document) {
            this.currentDocument = new Document();
            super.visit(document);
        }

        public void visit(Heading heading) {
            this.doneAndNew();
            super.visit(heading);
        }

        public void visit(Image image) {
            String destination = image.getDestination();
            if (destination != null) {
                this.currentParagraphs.add(" ![" + Optional.ofNullable(image.getTitle()).orElse("") + "](" + destination + ") ");
            }
            super.visit(image);
        }

        public void visit(ThematicBreak thematicBreak) {
            if (((Options)this.loader.options).horizontalLineAsNew) {
                this.doneAndNew();
            }
            super.visit(thematicBreak);
        }

        public void visit(SoftLineBreak softLineBreak) {
            this.translateLineBreakToSpace();
            super.visit(softLineBreak);
        }

        public void visit(HardLineBreak hardLineBreak) {
            this.translateLineBreakToSpace();
            super.visit(hardLineBreak);
        }

        public void visit(ListItem listItem) {
            this.translateLineBreakToSpace();
            super.visit(listItem);
        }

        public void visit(BlockQuote blockQuote) {
            if (((Options)this.loader.options).blockquoteAsNew) {
                this.doneAndNew();
            }
            this.translateLineBreakToSpace();
            this.currentDocument.metadata("category", (Object)"blockquote");
            super.visit(blockQuote);
        }

        public void visit(Code code) {
            this.currentParagraphs.add(code.getLiteral());
            this.currentDocument.metadata("category", (Object)"code_inline");
            super.visit(code);
        }

        public void visit(FencedCodeBlock fencedCodeBlock) {
            if (((Options)this.loader.options).codeBlockAsNew) {
                this.doneAndNew();
            }
            this.translateLineBreakToSpace();
            this.currentParagraphs.add(fencedCodeBlock.getLiteral());
            this.currentDocument.metadata("category", (Object)"code_block");
            this.currentDocument.metadata("lang", (Object)fencedCodeBlock.getInfo());
            this.doneAndNew();
            super.visit(fencedCodeBlock);
        }

        public void visit(Text text) {
            Node tmp = text.getParent();
            if (tmp instanceof Heading) {
                Heading heading = (Heading)tmp;
                this.currentDocument.metadata("category", (Object)String.format("header_%d", heading.getLevel()));
                this.currentDocument.metadata("title", (Object)text.getLiteral());
            } else {
                this.currentParagraphs.add(text.getLiteral());
            }
            super.visit(text);
        }

        public List<Document> extract() {
            this.doneAndNew();
            return this.documents;
        }

        private void doneAndNew() {
            if (!this.currentParagraphs.isEmpty()) {
                String content = String.join((CharSequence)"", this.currentParagraphs);
                this.currentDocument.content(content);
                this.currentDocument.metadata(this.loader.additionalMetadata);
                this.documents.add(this.currentDocument);
                this.currentParagraphs.clear();
            }
            this.currentDocument = new Document();
        }

        private void translateLineBreakToSpace() {
            if (Utils.isNotEmpty(this.currentParagraphs)) {
                this.currentParagraphs.add(" ");
            }
        }
    }
}

