# Ingesting Dynamically Generated Content from Confluence

## Dynamically Generated Content

Confluence offers Macros or add-ons to enhance functionality and display additional content. This content is not ingested by the standard confluence connector. To ingest content generated by Macros you will need to use a Custom Script with a custom field mapping. The process is outlined below.

### Script

Use the following script will be used to ingest all content from the XML Schema of your Confluence pages.

{% hint style="warning" %}
On line 9 you will need to replace \<INSTANCE\_ID> with your Confluence instance. This can be found by navigating to your Confluence and taking note of the subdomain.
{% endhint %}

{% code lineNumbers="true" expandable="true" %}

```javascript
var transform = function (jsobject) {
    var Jsoup = Java.type('org.jsoup.Jsoup');

    // Get ID and storage body
    var id         = jsobject["id"];
    var storageXml = jsobject["body"]["storage"]["value"];

    // Dynamic attachment base URL
    var BASE_ATTACHMENT_URL = "https://<INSTANCE_ID>.atlassian.net/wiki/download/attachments/" + id + "/";

    // Parse with default HTML parser
    var doc = Jsoup.parse(storageXml);

    // ---------------------------------------------------------------------
    // 1) HANDLE ALL ac:structured-macro WITH ac:plain-text-body (CDATA)
    //    - code / code-block             -> <pre><code>...</code></pre>
    //    - hmpcost-html-macro-pro        -> treat CDATA as HTML
    //    - everything else with plain-text-body -> <pre><code>...</code></pre>
    // ---------------------------------------------------------------------
    var macros = doc.getElementsByTag("ac:structured-macro");
    for (var i = macros.size() - 1; i >= 0; i--) {
        var macro = macros.get(i);
        var name  = macro.attr("ac:name");

        var plainBody = macro.getElementsByTag("ac:plain-text-body").first();
        if (plainBody == null) {
            // macro with no plain-text-body; maybe rich-text-body only – handle later
            continue;
        }

        var cdata = plainBody.wholeText();
        if (cdata == null) {
            cdata = "";
        }

        // 1A) Standard code macros
        if (name === "code" || name === "code-block") {
            var preCode  = doc.createElement("pre");
            var codeNode = doc.createElement("code");
            codeNode.text(cdata);      // escaped as text
            preCode.appendChild(codeNode);
            macro.replaceWith(preCode);
            continue;
        }

        // 1B) HTML macro: interpret CDATA as HTML (iframe, headings, etc.)
        if (name === "hmpcost-html-macro-pro") {
            var wrapper = doc.createElement("div");
            wrapper.html(cdata);       // parse CDATA as HTML
            macro.replaceWith(wrapper);
            continue;
        }

        // 1C) Any other macro with plain-text-body → show as code by default
        var preOther  = doc.createElement("pre");
        var codeOther = doc.createElement("code");
        codeOther.text(cdata);
        preOther.appendChild(codeOther);
        macro.replaceWith(preOther);
    }

    // ---------------------------------------------------------------------
    // 2) ANY ac:plain-text-body OUTSIDE MACROS → <pre><code>...</code></pre>
    // ---------------------------------------------------------------------
    var plainBodies = doc.getElementsByTag("ac:plain-text-body");
    for (var j = plainBodies.size() - 1; j >= 0; j--) {
        var body = plainBodies.get(j);

        // check if still inside a macro
        var parent = body.parent();
        var inMacro = false;
        while (parent != null) {
            if (parent.tagName() === "ac:structured-macro") {
                inMacro = true;
                break;
            }
            parent = parent.parent();
        }
        if (inMacro) {
            continue;
        }

        var rawText = body.wholeText();
        if (rawText == null || rawText.trim().isEmpty()) {
            continue;
        }

        var pre2  = doc.createElement("pre");
        var code2 = doc.createElement("code");
        code2.text(rawText);
        pre2.appendChild(code2);

        body.replaceWith(pre2);
    }

    // ---------------------------------------------------------------------
    // 3) OTHER MACROS (no plain-text-body): unwrap rich-text body, remove others
    // ---------------------------------------------------------------------
    macros = doc.getElementsByTag("ac:structured-macro");
    for (var k = macros.size() - 1; k >= 0; k--) {
        var macro2 = macros.get(k);
        var richBody = macro2.getElementsByTag("ac:rich-text-body").first();
        if (richBody != null) {
            macro2.replaceWith(richBody);
        } else {
            macro2.remove();
        }
    }

    // ---------------------------------------------------------------------
    // 4) LINKS: <ac:link> → <a>
    // ---------------------------------------------------------------------
    var acLinks = doc.getElementsByTag("ac:link");
    for (var l = acLinks.size() - 1; l >= 0; l--) {
        var acLink = acLinks.get(l);

        var href = null;
        var text = acLink.text();

        // <ac:link><ri:page ri:content-title="My page"/></ac:link>
        var pageRef = acLink.getElementsByTag("ri:page").first();
        if (pageRef != null) {
            var title = pageRef.attr("ri:content-title");
            if (title != null && !title.isEmpty()) {
                var encodedTitle = String(title).replace(/ /g, "%20");
                href = "/wiki/pages/view?title=" + encodedTitle;
            }
        }

        // <ac:link><ri:attachment ri:filename="file name.png"/></ac:link>
        var attachmentRef = acLink.getElementsByTag("ri:attachment").first();
        if (attachmentRef != null) {
            var filename = attachmentRef.attr("ri:filename");
            if (filename != null && !filename.isEmpty()) {
                var encodedFilename = String(filename).replace(/ /g, "%20");
                href = BASE_ATTACHMENT_URL + encodedFilename;
            }
        }

        // <ac:link><ri:url ri:value="https://..."/></ac:link>
        var urlRef = acLink.getElementsByTag("ri:url").first();
        if (urlRef != null) {
            var value = urlRef.attr("ri:value");
            if (value != null && !value.isEmpty()) {
                href = value;
            }
        }

        var a = doc.createElement("a");
        if (href != null && !href.isEmpty()) {
            a.attr("href", href);
        }
        if (text != null && !text.isEmpty()) {
            a.text(text);
        } else if (href != null) {
            a.text(href);
        }

        acLink.replaceWith(a);
    }

    // ---------------------------------------------------------------------
    // 5) IMAGES: <ac:image> → <img>
    // ---------------------------------------------------------------------
    var acImages = doc.getElementsByTag("ac:image");
    for (var m = acImages.size() - 1; m >= 0; m--) {
        var acImg = acImages.get(m);

        var width  = acImg.attr("ac:width");
        var height = null;

        // If user resized -> only width matters
        if (width == null || width.isEmpty()) {
            width  = acImg.attr("ac:original-width");
            height = acImg.attr("ac:original-height");
        }

        var attachment = acImg.getElementsByTag("ri:attachment").first();
        if (attachment == null) {
            acImg.remove();
            continue;
        }

        var fileNameRaw = attachment.attr("ri:filename");
        if (fileNameRaw == null || fileNameRaw.isEmpty()) {
            acImg.remove();
            continue;
        }

        // Simple encoding: space → %20
        var encodedFileName = String(fileNameRaw).replace(/ /g, "%20");
        var src = BASE_ATTACHMENT_URL + encodedFileName;

        var img = doc.createElement("img");
        img.attr("src", src);

        // alignment: ac:align="center|left|right"
        var align = acImg.attr("ac:align");
        if (align != null && !align.isEmpty()) {
            var low = align.toLowerCase();
            if (low === "center") {
                img.attr("style", "display:block;margin-left:auto;margin-right:auto;");
            } else if (low === "right") {
                img.attr("style", "float:right;");
            } else if (low === "left") {
                img.attr("style", "float:left;");
            }
        }

        if (width != null && !width.isEmpty()) {
            img.attr("width", width.replace(/["\\]/g, ""));
        }
        if (height != null && !height.isEmpty()) {
            img.attr("height", height.replace(/["\\]/g, ""));
        }

        acImg.replaceWith(img);
    }

    // ---------------------------------------------------------------------
    // 6) EMOTICONS: <ac:emoticon ... ac:emoji-fallback="😛" /> → 😛
    // ---------------------------------------------------------------------
    var emoticons = doc.getElementsByTag("ac:emoticon");
    for (var e = emoticons.size() - 1; e >= 0; e--) {
        var emo = emoticons.get(e);

        var emoji = emo.attr("ac:emoji-fallback");
        if (!emoji || emoji.isEmpty()) {
            emoji = emo.attr("ac:emoji-shortname"); // e.g. :stuck_out_tongue:
        }
        if (!emoji || emoji.isEmpty()) {
            emoji = ":" + emo.attr("ac:name") + ":"; // e.g. :cheeky:
        }

        // Turn <ac:emoticon .../> into a simple <span>😛</span>
        emo.tagName("span");
        emo.text(emoji);
        // (We could strip attributes, but it's optional for rendering)
    }

    // ---------------------------------------------------------------------
    // 7) CLEANUP: unwrap rich-text, remove params, unwrap leftover plain-text
    // ---------------------------------------------------------------------
    var bodies = doc.getElementsByTag("ac:rich-text-body");
    for (var n = bodies.size() - 1; n >= 0; n--) {
        bodies.get(n).unwrap();
    }

    var params = doc.getElementsByTag("ac:parameter");
    for (var p = params.size() - 1; p >= 0; p--) {
        params.get(p).remove();
    }

    plainBodies = doc.getElementsByTag("ac:plain-text-body");
    for (var q = plainBodies.size() - 1; q >= 0; q--) {
        plainBodies.get(q).unwrap();
    }

    // Write back
    jsobject["body"]["storage"]["value"] = doc.outerHtml();
    return jsobject;
};

```

{% endcode %}

### Configure the Data Source

1. In the Aisera Admin UI navigate to **Settings > Data Source.**
2. Select the Confluence Data Source you want to activate to enable ingestion of dynamically generated content.
3. On the **Data Source Details** page, click the **Pencil** icon in the top right of the screen to edit the data source.
4. Select **Configurations.**
5. Modify the script above to replace the \<INSTANCE ID> in line 9 with your Confluence instance ID.
6. Insert the *modified script* into the **Custom Script** field.

   <div align="left"><figure><img src="https://3281977978-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FvBFXjH9S1CAy9f5hzg5Q%2Fuploads%2FagP8CDW5ayKd2tHydhCK%2Fimage.png?alt=media&#x26;token=66010d22-4883-4507-b048-2792a16000ab" alt=""><figcaption></figcaption></figure></div>
7. Add `content.body.storage` to the **Additional Fields to Expand** field.
8. Click **OK** to save the new configurations.
9. At the bottom of the **Data Source Details** page, click the **New Field Mapping** button.
10. Select **Body** for the **Field**, and enter `body.storage.value` for the **Confluence Field.**
11. Click **OK** to save the new field mapping.

    <div align="left"><figure><img src="https://3281977978-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FvBFXjH9S1CAy9f5hzg5Q%2Fuploads%2FxvDiLvrnIBlw8AHhwpIw%2Fimage.png?alt=media&#x26;token=ce131402-47ee-4408-bff4-20caba48898c" alt="" width="540"><figcaption></figcaption></figure></div>
