A => .gitignore +18 -0
@@ 1,18 @@
+# Binary
+mdcx
+
+# Example/test Confluence documents (not part of the project)
+0-root.xml
+rfc-111.xml
+rfc-111.md
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
A => README.md +128 -0
@@ 1,128 @@
+# mdcx
+
+Markdown to Confluence XML converter with bidirectional sync support for self-hosted Confluence Server/Data Center.
+
+Converts Markdown to [Confluence storage format](https://confluence.atlassian.com/doc/confluence-storage-format-790796544.html) XML and back. Supports pulling pages from Confluence, editing locally as Markdown, and pushing changes back — with template-aware embedding that preserves metadata tables, changelogs, and inline comment markers.
+
+## Install
+
+```bash
+go install sourcecraft.dev/bigbes/markdown-to-confluence-xml@latest
+```
+
+The binary is named `mdcx`.
+
+## Commands
+
+### `convert` — Markdown to Confluence XML
+
+```bash
+mdcx convert input.md -o output.xml
+cat input.md | mdcx convert > output.xml
+```
+
+### `embed` — Embed Markdown into a Confluence XML template
+
+Converts Markdown and inserts it between marker comments in an existing Confluence document, preserving everything outside the markers (metadata table, TOC, changelog, etc.).
+
+```bash
+mdcx embed input.md --template template.xml -o output.xml
+```
+
+The template must contain marker comments:
+
+```xml
+<!-- MD_CONTENT_START -->
+<!-- MD_CONTENT_END -->
+```
+
+### `extract` — Extract Markdown from Confluence XML
+
+Extracts content between markers and converts back to Markdown.
+
+```bash
+mdcx extract input.xml -o output.md
+mdcx extract input.xml --raw # output raw Confluence XML
+```
+
+### `pull` — Pull a page from Confluence
+
+```bash
+mdcx pull "https://confluence.example.com/pages/viewpage.action?pageId=12345" -o page.md
+mdcx pull "https://confluence.example.com/display/TEAM/Page+Title" -o page.md
+mdcx pull "https://confluence.example.com/display/TEAM/Page+Title" --raw -o page.xml
+```
+
+### `push` — Push Markdown to a Confluence page
+
+```bash
+# Replace entire page body
+mdcx push "https://confluence.example.com/display/TEAM/Page+Title" page.md
+
+# Template mode: replace only content between markers
+mdcx push "https://confluence.example.com/display/TEAM/Page+Title" page.md --template
+
+# With version message
+mdcx push "https://confluence.example.com/display/TEAM/Page+Title" page.md -m "Updated intro section"
+```
+
+## Authentication
+
+For `pull` and `push`, provide a [Personal Access Token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) via:
+
+- `--token` flag, or
+- `CONFLUENCE_TOKEN` environment variable
+
+```bash
+export CONFLUENCE_TOKEN="your-token-here"
+mdcx pull "https://confluence.example.com/display/TEAM/RFC-42" -o rfc.md
+```
+
+## Typical workflow
+
+```bash
+export CONFLUENCE_TOKEN="..."
+
+# Pull page to local Markdown
+mdcx pull "https://confluence.example.com/display/TEAM/RFC-42" -o rfc.md
+
+# Edit locally
+vim rfc.md
+
+# Push back, preserving template structure
+mdcx push "https://confluence.example.com/display/TEAM/RFC-42" rfc.md --template -m "Updated requirements"
+```
+
+## Supported elements
+
+| Markdown | Confluence XML |
+|---|---|
+| `# Heading` | `<h1>` ... `<h6>` |
+| `**bold**` | `<strong>` |
+| `*italic*` | `<em>` |
+| `~~strike~~` | `<del>` |
+| `` `code` `` | `<code>` |
+| Fenced code blocks | `<ac:structured-macro ac:name="code">` with CDATA |
+| `- item` / `1. item` | `<ul>/<ol>` with `<li>` |
+| Nested lists | Nested `<ul>/<ol>` inside `<li>` |
+| `- [x] task` | `<ac:task-list>` / `<ac:task>` |
+| `[text](url)` | `<a href="...">` |
+| `` | `<ac:image><ri:url .../>` |
+| `> blockquote` | `<ac:structured-macro ac:name="info">` (info panel) |
+| `---` | `<hr/>` |
+| GFM tables | `<table>` with `<th>/<td>` wrapped in `<p>` |
+| Inline comment markers | Preserved via `<span data-inline-comment="ref">` in Markdown |
+
+## Inline comment preservation
+
+Confluence inline comments (`<ac:inline-comment-marker ac:ref="UUID">`) are preserved through round-trips. In Markdown they appear as:
+
+```html
+<span data-inline-comment="b2f6ce98-4dc9-...">commented text</span>
+```
+
+This is converted back to proper `<ac:inline-comment-marker>` tags when pushing to Confluence.
+
+## License
+
+MIT
A => api/client.go +35 -0
@@ 1,35 @@
+package api
+
+import (
+ "net/http"
+ "time"
+)
+
+// Client is a Confluence REST API client for Server/Data Center.
+type Client struct {
+ BaseURL string
+ Token string
+ HTTPClient *http.Client
+}
+
+// NewClient creates a new Confluence API client.
+func NewClient(baseURL, token string) *Client {
+ return &Client{
+ BaseURL: baseURL,
+ Token: token,
+ HTTPClient: &http.Client{
+ Timeout: 30 * time.Second,
+ },
+ }
+}
+
+func (c *Client) newRequest(method, path string) (*http.Request, error) {
+ req, err := http.NewRequest(method, c.BaseURL+path, nil)
+ if err != nil {
+ return nil, err
+ }
+ req.Header.Set("Authorization", "Bearer "+c.Token)
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Accept", "application/json")
+ return req, nil
+}
A => api/client_test.go +183 -0
@@ 1,183 @@
+package api
+
+import (
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func newTestPage() ContentResponse {
+ return ContentResponse{
+ ID: "12345",
+ Type: "page",
+ Title: "Test Page",
+ Version: Version{
+ Number: 5,
+ },
+ Body: Body{
+ Storage: StorageBody{
+ Value: "<h1>Hello</h1><p>World</p>",
+ Representation: "storage",
+ },
+ },
+ }
+}
+
+func TestGetContent(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, "/rest/api/content/12345", r.URL.Path)
+ assert.Equal(t, "body.storage,version", r.URL.Query().Get("expand"))
+ assert.Equal(t, "Bearer test-token", r.Header.Get("Authorization"))
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(page)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ result, err := client.GetContent("12345")
+ require.NoError(t, err)
+ assert.Equal(t, "12345", result.ID)
+ assert.Equal(t, "Test Page", result.Title)
+ assert.Equal(t, 5, result.Version.Number)
+ assert.Equal(t, "<h1>Hello</h1><p>World</p>", result.Body.Storage.Value)
+}
+
+func TestFindContent(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, "/rest/api/content", r.URL.Path)
+ assert.Equal(t, "TEAM", r.URL.Query().Get("spaceKey"))
+ assert.Equal(t, "Test Page", r.URL.Query().Get("title"))
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(searchResults{
+ Results: []ContentResponse{page},
+ Size: 1,
+ })
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ result, err := client.FindContent("TEAM", "Test Page")
+ require.NoError(t, err)
+ assert.Equal(t, "12345", result.ID)
+}
+
+func TestFindContent_NotFound(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(searchResults{Results: nil, Size: 0})
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ _, err := client.FindContent("TEAM", "Nonexistent")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "not found")
+}
+
+func TestUpdateContent(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, http.MethodPut, r.Method)
+ assert.Equal(t, "/rest/api/content/12345", r.URL.Path)
+ assert.Equal(t, "Bearer test-token", r.Header.Get("Authorization"))
+
+ var req updateRequest
+ err := json.NewDecoder(r.Body).Decode(&req)
+ require.NoError(t, err)
+
+ assert.Equal(t, 6, req.Version.Number, "version should be incremented")
+ assert.Equal(t, "Test Page", req.Title)
+ assert.Equal(t, "page", req.Type)
+ assert.Equal(t, "<h1>Updated</h1>", req.Body.Storage.Value)
+ assert.Equal(t, "storage", req.Body.Storage.Representation)
+ assert.Equal(t, "Updated via mdcx", req.Version.Message)
+
+ w.WriteHeader(http.StatusOK)
+ json.NewEncoder(w).Encode(page)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ err := client.UpdateContent("12345", &page, "<h1>Updated</h1>", "Updated via mdcx")
+ require.NoError(t, err)
+}
+
+func TestGetContent_Unauthorized(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusUnauthorized)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "bad-token")
+ _, err := client.GetContent("12345")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "authentication failed")
+}
+
+func TestGetContent_NotFound(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusNotFound)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ _, err := client.GetContent("99999")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "not found")
+}
+
+func TestUpdateContent_VersionConflict(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.WriteHeader(http.StatusConflict)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ err := client.UpdateContent("12345", &page, "<h1>Updated</h1>", "")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "version conflict")
+}
+
+func TestGetPage_ByID(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, "/rest/api/content/12345", r.URL.Path)
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(page)
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ ref := &PageRef{BaseURL: server.URL, PageID: "12345"}
+ result, err := client.GetPage(ref)
+ require.NoError(t, err)
+ assert.Equal(t, "Test Page", result.Title)
+}
+
+func TestGetPage_BySpaceAndTitle(t *testing.T) {
+ page := newTestPage()
+ server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ assert.Equal(t, "/rest/api/content", r.URL.Path)
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(searchResults{
+ Results: []ContentResponse{page},
+ Size: 1,
+ })
+ }))
+ defer server.Close()
+
+ client := NewClient(server.URL, "test-token")
+ ref := &PageRef{BaseURL: server.URL, SpaceKey: "TEAM", Title: "Test Page"}
+ result, err := client.GetPage(ref)
+ require.NoError(t, err)
+ assert.Equal(t, "Test Page", result.Title)
+}
A => api/content.go +179 -0
@@ 1,179 @@
+package api
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+)
+
+// ContentResponse represents a Confluence page from the REST API.
+type ContentResponse struct {
+ ID string `json:"id"`
+ Type string `json:"type"`
+ Title string `json:"title"`
+ Version Version `json:"version"`
+ Body Body `json:"body"`
+}
+
+// Version holds page version info.
+type Version struct {
+ Number int `json:"number"`
+ Message string `json:"message,omitempty"`
+}
+
+// Body holds page body content.
+type Body struct {
+ Storage StorageBody `json:"storage"`
+}
+
+// StorageBody holds the storage format representation.
+type StorageBody struct {
+ Value string `json:"value"`
+ Representation string `json:"representation"`
+}
+
+// updateRequest is the JSON payload for updating a page.
+type updateRequest struct {
+ Version Version `json:"version"`
+ Title string `json:"title"`
+ Type string `json:"type"`
+ Body Body `json:"body"`
+}
+
+// searchResults wraps the /rest/api/content search response.
+type searchResults struct {
+ Results []ContentResponse `json:"results"`
+ Size int `json:"size"`
+}
+
+// GetContent fetches a page by ID with storage body and version info.
+func (c *Client) GetContent(pageID string) (*ContentResponse, error) {
+ path := fmt.Sprintf("/rest/api/content/%s?expand=body.storage,version", pageID)
+ req, err := c.newRequest(http.MethodGet, path)
+ if err != nil {
+ return nil, err
+ }
+
+ resp, err := c.HTTPClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("fetching page %s: %w", pageID, err)
+ }
+ defer resp.Body.Close()
+
+ if err := checkResponse(resp); err != nil {
+ return nil, err
+ }
+
+ var content ContentResponse
+ if err := json.NewDecoder(resp.Body).Decode(&content); err != nil {
+ return nil, fmt.Errorf("decoding response: %w", err)
+ }
+ return &content, nil
+}
+
+// FindContent searches for a page by space key and title.
+func (c *Client) FindContent(spaceKey, title string) (*ContentResponse, error) {
+ params := url.Values{
+ "spaceKey": {spaceKey},
+ "title": {title},
+ "expand": {"body.storage,version"},
+ }
+ path := "/rest/api/content?" + params.Encode()
+ req, err := c.newRequest(http.MethodGet, path)
+ if err != nil {
+ return nil, err
+ }
+
+ resp, err := c.HTTPClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("searching for page %q in space %q: %w", title, spaceKey, err)
+ }
+ defer resp.Body.Close()
+
+ if err := checkResponse(resp); err != nil {
+ return nil, err
+ }
+
+ var results searchResults
+ if err := json.NewDecoder(resp.Body).Decode(&results); err != nil {
+ return nil, fmt.Errorf("decoding search results: %w", err)
+ }
+
+ if results.Size == 0 {
+ return nil, fmt.Errorf("page %q not found in space %q", title, spaceKey)
+ }
+
+ return &results.Results[0], nil
+}
+
+// GetPage resolves a PageRef to a full ContentResponse.
+func (c *Client) GetPage(ref *PageRef) (*ContentResponse, error) {
+ if ref.PageID != "" {
+ return c.GetContent(ref.PageID)
+ }
+ return c.FindContent(ref.SpaceKey, ref.Title)
+}
+
+// UpdateContent updates a page's storage body, incrementing the version.
+func (c *Client) UpdateContent(pageID string, current *ContentResponse, newBody string, message string) error {
+ payload := updateRequest{
+ Version: Version{
+ Number: current.Version.Number + 1,
+ Message: message,
+ },
+ Title: current.Title,
+ Type: current.Type,
+ Body: Body{
+ Storage: StorageBody{
+ Value: newBody,
+ Representation: "storage",
+ },
+ },
+ }
+
+ data, err := json.Marshal(payload)
+ if err != nil {
+ return fmt.Errorf("marshaling update: %w", err)
+ }
+
+ path := fmt.Sprintf("/rest/api/content/%s", pageID)
+ req, err := c.newRequest(http.MethodPut, path)
+ if err != nil {
+ return err
+ }
+ req.Body = io.NopCloser(bytes.NewReader(data))
+ req.ContentLength = int64(len(data))
+
+ resp, err := c.HTTPClient.Do(req)
+ if err != nil {
+ return fmt.Errorf("updating page %s: %w", pageID, err)
+ }
+ defer resp.Body.Close()
+
+ return checkResponse(resp)
+}
+
+// checkResponse returns an error for non-2xx status codes.
+func checkResponse(resp *http.Response) error {
+ if resp.StatusCode >= 200 && resp.StatusCode < 300 {
+ return nil
+ }
+
+ body, _ := io.ReadAll(resp.Body)
+
+ switch resp.StatusCode {
+ case http.StatusUnauthorized:
+ return fmt.Errorf("authentication failed (401): invalid or expired token")
+ case http.StatusForbidden:
+ return fmt.Errorf("access denied (403): insufficient permissions")
+ case http.StatusNotFound:
+ return fmt.Errorf("page not found (404)")
+ case http.StatusConflict:
+ return fmt.Errorf("version conflict (409): page was modified since last fetch, re-pull and try again")
+ default:
+ return fmt.Errorf("API error (HTTP %d): %s", resp.StatusCode, string(body))
+ }
+}
A => api/url.go +76 -0
@@ 1,76 @@
+package api
+
+import (
+ "fmt"
+ "net/url"
+ "strings"
+)
+
+// PageRef holds parsed Confluence page reference from a URL.
+type PageRef struct {
+ BaseURL string // e.g., "https://confluence.example.com"
+ PageID string // numeric ID if available
+ SpaceKey string // from /display/SPACE/... format
+ Title string // URL-decoded page title
+}
+
+// ParsePageURL parses a Confluence Server/Data Center page URL into a PageRef.
+//
+// Supported formats:
+// - https://confluence.example.com/pages/viewpage.action?pageId=12345
+// - https://confluence.example.com/display/SPACE/Page+Title
+// - https://confluence.example.com/display/SPACE/Page+Title/Sub+Page
+func ParsePageURL(rawURL string) (*PageRef, error) {
+ u, err := url.Parse(rawURL)
+ if err != nil {
+ return nil, fmt.Errorf("parsing URL: %w", err)
+ }
+
+ if u.Scheme == "" || u.Host == "" {
+ return nil, fmt.Errorf("URL must include scheme and host: %s", rawURL)
+ }
+
+ baseURL := u.Scheme + "://" + u.Host
+ if u.Port() != "" && !strings.Contains(u.Host, ":") {
+ baseURL = u.Scheme + "://" + u.Host + ":" + u.Port()
+ }
+
+ path := strings.TrimRight(u.Path, "/")
+
+ // Format 1: /pages/viewpage.action?pageId=12345
+ if strings.HasSuffix(path, "/pages/viewpage.action") || path == "/pages/viewpage.action" {
+ pageID := u.Query().Get("pageId")
+ if pageID == "" {
+ return nil, fmt.Errorf("URL has viewpage.action but no pageId parameter: %s", rawURL)
+ }
+ return &PageRef{
+ BaseURL: baseURL,
+ PageID: pageID,
+ }, nil
+ }
+
+ // Format 2: /display/SPACE/Page+Title
+ if idx := strings.Index(path, "/display/"); idx != -1 {
+ rest := path[idx+len("/display/"):]
+ parts := strings.SplitN(rest, "/", 2)
+ if len(parts) < 2 || parts[0] == "" || parts[1] == "" {
+ return nil, fmt.Errorf("URL display format requires /display/SPACE/Title: %s", rawURL)
+ }
+ spaceKey := parts[0]
+ // The title may contain slashes for sub-pages; take the last segment
+ titleEncoded := parts[1]
+ // Confluence uses + for spaces in URL paths
+ title := strings.ReplaceAll(titleEncoded, "+", " ")
+ title, err = url.PathUnescape(title)
+ if err != nil {
+ return nil, fmt.Errorf("decoding page title: %w", err)
+ }
+ return &PageRef{
+ BaseURL: baseURL,
+ SpaceKey: spaceKey,
+ Title: title,
+ }, nil
+ }
+
+ return nil, fmt.Errorf("unrecognized Confluence URL format: %s", rawURL)
+}
A => api/url_test.go +78 -0
@@ 1,78 @@
+package api
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParsePageURL_ViewPageAction(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com/pages/viewpage.action?pageId=12345")
+ require.NoError(t, err)
+ assert.Equal(t, "https://confluence.example.com", ref.BaseURL)
+ assert.Equal(t, "12345", ref.PageID)
+ assert.Empty(t, ref.SpaceKey)
+ assert.Empty(t, ref.Title)
+}
+
+func TestParsePageURL_ViewPageActionWithPort(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com:8443/pages/viewpage.action?pageId=99")
+ require.NoError(t, err)
+ assert.Equal(t, "https://confluence.example.com:8443", ref.BaseURL)
+ assert.Equal(t, "99", ref.PageID)
+}
+
+func TestParsePageURL_DisplayFormat(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com/display/TEAM/My+Page+Title")
+ require.NoError(t, err)
+ assert.Equal(t, "https://confluence.example.com", ref.BaseURL)
+ assert.Empty(t, ref.PageID)
+ assert.Equal(t, "TEAM", ref.SpaceKey)
+ assert.Equal(t, "My Page Title", ref.Title)
+}
+
+func TestParsePageURL_DisplayFormatEncodedTitle(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com/display/DEV/API+%26+SDK+Guide")
+ require.NoError(t, err)
+ assert.Equal(t, "DEV", ref.SpaceKey)
+ assert.Equal(t, "API & SDK Guide", ref.Title)
+}
+
+func TestParsePageURL_DisplayFormatTrailingSlash(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com/display/TEAM/Page/")
+ require.NoError(t, err)
+ assert.Equal(t, "TEAM", ref.SpaceKey)
+ assert.Equal(t, "Page", ref.Title)
+}
+
+func TestParsePageURL_ViewPageNoPageID(t *testing.T) {
+ _, err := ParsePageURL("https://confluence.example.com/pages/viewpage.action")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "pageId")
+}
+
+func TestParsePageURL_DisplayFormatNoTitle(t *testing.T) {
+ _, err := ParsePageURL("https://confluence.example.com/display/SPACE")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "Title")
+}
+
+func TestParsePageURL_NoScheme(t *testing.T) {
+ _, err := ParsePageURL("confluence.example.com/display/SPACE/Page")
+ require.Error(t, err)
+}
+
+func TestParsePageURL_UnrecognizedFormat(t *testing.T) {
+ _, err := ParsePageURL("https://confluence.example.com/some/other/path")
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "unrecognized")
+}
+
+func TestParsePageURL_SubPage(t *testing.T) {
+ ref, err := ParsePageURL("https://confluence.example.com/display/TEAM/Parent/Child+Page")
+ require.NoError(t, err)
+ assert.Equal(t, "TEAM", ref.SpaceKey)
+ // Sub-pages: the full path after space key is the title
+ assert.Equal(t, "Parent/Child Page", ref.Title)
+}
A => cmd/convert.go +49 -0
@@ 1,49 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/spf13/cobra"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/converter"
+)
+
+var convertOutput string
+
+var convertCmd = &cobra.Command{
+ Use: "convert [input.md]",
+ Short: "Convert Markdown to Confluence XML",
+ Long: "Convert a Markdown file to Confluence storage format XML. Reads from stdin if no file is specified.",
+ Args: cobra.MaximumNArgs(1),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ var input []byte
+ var err error
+
+ if len(args) > 0 {
+ input, err = os.ReadFile(args[0])
+ } else {
+ input, err = io.ReadAll(os.Stdin)
+ }
+ if err != nil {
+ return fmt.Errorf("reading input: %w", err)
+ }
+
+ result, err := converter.MarkdownToConfluence(input)
+ if err != nil {
+ return fmt.Errorf("converting: %w", err)
+ }
+
+ if convertOutput != "" {
+ return os.WriteFile(convertOutput, []byte(result), 0644)
+ }
+ fmt.Print(result)
+ return nil
+ },
+}
+
+func init() {
+ convertCmd.Flags().StringVarP(&convertOutput, "output", "o", "", "Output file (default: stdout)")
+ rootCmd.AddCommand(convertCmd)
+}
A => cmd/embed.go +81 -0
@@ 1,81 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/spf13/cobra"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/converter"
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/template"
+)
+
+var (
+ embedTemplate string
+ embedOutput string
+ embedMarkerStart string
+ embedMarkerEnd string
+)
+
+var embedCmd = &cobra.Command{
+ Use: "embed [input.md]",
+ Short: "Embed Markdown into a Confluence XML template",
+ Long: `Convert Markdown to Confluence XML and embed it into a template document.
+The template must contain marker comments to indicate where content should be inserted:
+
+ <!-- MD_CONTENT_START -->
+ <!-- MD_CONTENT_END -->
+
+Reads Markdown from stdin if no file is specified.`,
+ Args: cobra.MaximumNArgs(1),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ if embedTemplate == "" {
+ return fmt.Errorf("--template flag is required")
+ }
+
+ // Read markdown input
+ var input []byte
+ var err error
+ if len(args) > 0 {
+ input, err = os.ReadFile(args[0])
+ } else {
+ input, err = io.ReadAll(os.Stdin)
+ }
+ if err != nil {
+ return fmt.Errorf("reading input: %w", err)
+ }
+
+ // Read template
+ tmpl, err := os.ReadFile(embedTemplate)
+ if err != nil {
+ return fmt.Errorf("reading template: %w", err)
+ }
+
+ // Convert markdown to confluence XML
+ xmlContent, err := converter.MarkdownToConfluence(input)
+ if err != nil {
+ return fmt.Errorf("converting markdown: %w", err)
+ }
+
+ // Embed into template
+ result, err := template.Embed(string(tmpl), xmlContent, embedMarkerStart, embedMarkerEnd)
+ if err != nil {
+ return fmt.Errorf("embedding: %w", err)
+ }
+
+ if embedOutput != "" {
+ return os.WriteFile(embedOutput, []byte(result), 0644)
+ }
+ fmt.Print(result)
+ return nil
+ },
+}
+
+func init() {
+ embedCmd.Flags().StringVarP(&embedTemplate, "template", "t", "", "Template XML file (required)")
+ embedCmd.Flags().StringVarP(&embedOutput, "output", "o", "", "Output file (default: stdout)")
+ embedCmd.Flags().StringVar(&embedMarkerStart, "marker-start", template.DefaultMarkerStart, "Start marker comment")
+ embedCmd.Flags().StringVar(&embedMarkerEnd, "marker-end", template.DefaultMarkerEnd, "End marker comment")
+ rootCmd.AddCommand(embedCmd)
+}
A => +77 -0
@@ 1,77 @@
package cmd
import (
"fmt"
"io"
"os"
"github.com/spf13/cobra"
"sourcecraft.dev/bigbes/markdown-to-confluence-xml/converter"
"sourcecraft.dev/bigbes/markdown-to-confluence-xml/template"
)
var (
extractOutput string
extractMarkerStart string
extractMarkerEnd string
extractRaw bool
)
var extractCmd = &cobra.Command{
Use: "extract [input.xml]",
Short: "Extract Markdown from a Confluence XML document",
Long: `Extract content between marker comments from a Confluence XML document
and convert it back to Markdown.
The document must contain marker comments:
<!-- MD_CONTENT_START -->
<!-- MD_CONTENT_END -->
Use --raw to get the Confluence XML without converting to Markdown.
Reads from stdin if no file is specified.`,
Args: cobra.MaximumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
// Read input
var input []byte
var err error
if len(args) > 0 {
input, err = os.ReadFile(args[0])
} else {
input, err = io.ReadAll(os.Stdin)
}
if err != nil {
return fmt.Errorf("reading input: %w", err)
}
// Extract content between markers
xmlContent, err := template.Extract(string(input), extractMarkerStart, extractMarkerEnd)
if err != nil {
return fmt.Errorf("extracting: %w", err)
}
var result string
if extractRaw {
result = xmlContent
} else {
result, err = converter.ConfluenceToMarkdown(xmlContent)
if err != nil {
return fmt.Errorf("converting to markdown: %w", err)
}
}
if extractOutput != "" {
return os.WriteFile(extractOutput, []byte(result), 0644)
}
fmt.Print(result)
return nil
},
}
func init() {
extractCmd.Flags().StringVarP(&extractOutput, "output", "o", "", "Output file (default: stdout)")
extractCmd.Flags().StringVar(&extractMarkerStart, "marker-start", template.DefaultMarkerStart, "Start marker comment")
extractCmd.Flags().StringVar(&extractMarkerEnd, "marker-end", template.DefaultMarkerEnd, "End marker comment")
extractCmd.Flags().BoolVar(&extractRaw, "raw", false, "Output raw Confluence XML instead of Markdown")
rootCmd.AddCommand(extractCmd)
}
A => cmd/fmt.go +57 -0
@@ 1,57 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/spf13/cobra"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/format"
+)
+
+var (
+ fmtOutput string
+ fmtIndent string
+)
+
+var fmtCmd = &cobra.Command{
+ Use: "fmt [input.xml]",
+ Short: "Pretty-print Confluence storage XML",
+ Long: `Format Confluence storage XML with sensible indentation.
+
+Block elements (p, h1-h6, ul, ol, li, table, tr, td, th, macros, layout)
+get their own lines with indentation. Inline elements (strong, em, code,
+a, ac:link, ac:image) stay on the same line. CDATA content inside code
+blocks is preserved as-is.
+
+Reads from stdin if no file is specified.`,
+ Args: cobra.MaximumNArgs(1),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ var input []byte
+ var err error
+
+ if len(args) > 0 {
+ input, err = os.ReadFile(args[0])
+ } else {
+ input, err = io.ReadAll(os.Stdin)
+ }
+ if err != nil {
+ return fmt.Errorf("reading input: %w", err)
+ }
+
+ result := format.PrettyXML(string(input), fmtIndent)
+
+ if fmtOutput != "" {
+ return os.WriteFile(fmtOutput, []byte(result), 0644)
+ }
+ fmt.Print(result)
+ return nil
+ },
+}
+
+func init() {
+ fmtCmd.Flags().StringVarP(&fmtOutput, "output", "o", "", "Output file (default: stdout)")
+ fmtCmd.Flags().StringVar(&fmtIndent, "indent", " ", "Indentation string (default: 2 spaces)")
+ rootCmd.AddCommand(fmtCmd)
+}
A => cmd/pull.go +78 -0
@@ 1,78 @@
+package cmd
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/spf13/cobra"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/api"
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/converter"
+)
+
+var (
+ pullOutput string
+ pullRaw bool
+)
+
+var pullCmd = &cobra.Command{
+ Use: "pull <confluence-url>",
+ Short: "Pull a page from Confluence and convert to Markdown",
+ Long: `Fetch a Confluence page by URL, extract its storage format body,
+and convert it to Markdown.
+
+Supported URL formats:
+ https://confluence.example.com/pages/viewpage.action?pageId=12345
+ https://confluence.example.com/display/SPACE/Page+Title
+
+Use --raw to get the Confluence storage XML without converting to Markdown.
+
+Authentication via --token flag or CONFLUENCE_TOKEN environment variable.`,
+ Args: cobra.ExactArgs(1),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ token := resolveToken()
+ if token == "" {
+ return fmt.Errorf("Confluence token required: use --token flag or set CONFLUENCE_TOKEN env var")
+ }
+
+ ref, err := api.ParsePageURL(args[0])
+ if err != nil {
+ return err
+ }
+
+ client := api.NewClient(ref.BaseURL, token)
+ page, err := client.GetPage(ref)
+ if err != nil {
+ return err
+ }
+
+ xmlBody := page.Body.Storage.Value
+ fmt.Fprintf(os.Stderr, "Pulled page: %s (id=%s, version=%d)\n", page.Title, page.ID, page.Version.Number)
+
+ var result string
+ if pullRaw {
+ result = xmlBody
+ } else {
+ result, err = converter.ConfluenceToMarkdown(xmlBody)
+ if err != nil {
+ return fmt.Errorf("converting to markdown: %w", err)
+ }
+ }
+
+ if pullOutput != "" {
+ if err := os.WriteFile(pullOutput, []byte(result), 0644); err != nil {
+ return err
+ }
+ fmt.Fprintf(os.Stderr, "Written to %s\n", pullOutput)
+ return nil
+ }
+ fmt.Print(result)
+ return nil
+ },
+}
+
+func init() {
+ pullCmd.Flags().StringVarP(&pullOutput, "output", "o", "", "Output file (default: stdout)")
+ pullCmd.Flags().BoolVar(&pullRaw, "raw", false, "Output raw Confluence storage XML instead of Markdown")
+ rootCmd.AddCommand(pullCmd)
+}
A => cmd/push.go +110 -0
@@ 1,110 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/spf13/cobra"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/api"
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/converter"
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/template"
+)
+
+var (
+ pushMessage string
+ pushRaw bool
+ pushTemplate bool
+ pushMarkerStart string
+ pushMarkerEnd string
+)
+
+var pushCmd = &cobra.Command{
+ Use: "push <confluence-url> [input.md]",
+ Short: "Push local Markdown to a Confluence page",
+ Long: `Convert a local Markdown file to Confluence storage format and update
+the page at the given URL.
+
+By default, the entire page body is replaced with the converted content.
+
+With --template, the current page body is preserved as a template:
+the content between marker comments is replaced with the new content,
+keeping everything else (metadata table, changelog, etc.) intact.
+
+With --raw, the input is treated as Confluence storage XML (no conversion).
+
+Reads from stdin if no input file is specified.
+
+Authentication via --token flag or CONFLUENCE_TOKEN environment variable.`,
+ Args: cobra.RangeArgs(1, 2),
+ RunE: func(cmd *cobra.Command, args []string) error {
+ token := resolveToken()
+ if token == "" {
+ return fmt.Errorf("Confluence token required: use --token flag or set CONFLUENCE_TOKEN env var")
+ }
+
+ ref, err := api.ParsePageURL(args[0])
+ if err != nil {
+ return err
+ }
+
+ // Read input
+ var input []byte
+ if len(args) > 1 {
+ input, err = os.ReadFile(args[1])
+ } else {
+ input, err = io.ReadAll(os.Stdin)
+ }
+ if err != nil {
+ return fmt.Errorf("reading input: %w", err)
+ }
+
+ // Convert to Confluence XML if not raw
+ var newXML string
+ if pushRaw {
+ newXML = string(input)
+ } else {
+ newXML, err = converter.MarkdownToConfluence(input)
+ if err != nil {
+ return fmt.Errorf("converting markdown: %w", err)
+ }
+ }
+
+ client := api.NewClient(ref.BaseURL, token)
+
+ // Fetch current page for version info (and template if needed)
+ page, err := client.GetPage(ref)
+ if err != nil {
+ return err
+ }
+
+ fmt.Fprintf(os.Stderr, "Updating page: %s (id=%s, version=%d -> %d)\n",
+ page.Title, page.ID, page.Version.Number, page.Version.Number+1)
+
+ // If template mode, embed into existing page body
+ body := newXML
+ if pushTemplate {
+ body, err = template.Embed(page.Body.Storage.Value, newXML, pushMarkerStart, pushMarkerEnd)
+ if err != nil {
+ return fmt.Errorf("embedding into template: %w", err)
+ }
+ }
+
+ if err := client.UpdateContent(page.ID, page, body, pushMessage); err != nil {
+ return err
+ }
+
+ fmt.Fprintf(os.Stderr, "Page updated successfully\n")
+ return nil
+ },
+}
+
+func init() {
+ pushCmd.Flags().StringVarP(&pushMessage, "message", "m", "", "Version message for the update")
+ pushCmd.Flags().BoolVar(&pushRaw, "raw", false, "Input is raw Confluence storage XML (skip conversion)")
+ pushCmd.Flags().BoolVar(&pushTemplate, "template", false, "Embed content into existing page body between markers")
+ pushCmd.Flags().StringVar(&pushMarkerStart, "marker-start", template.DefaultMarkerStart, "Start marker comment (with --template)")
+ pushCmd.Flags().StringVar(&pushMarkerEnd, "marker-end", template.DefaultMarkerEnd, "End marker comment (with --template)")
+ rootCmd.AddCommand(pushCmd)
+}
A => cmd/root.go +35 -0
@@ 1,35 @@
+package cmd
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/spf13/cobra"
+)
+
+var confluenceToken string
+
+var rootCmd = &cobra.Command{
+ Use: "mdcx",
+ Short: "Markdown to Confluence XML converter",
+ Long: "Convert Markdown to Confluence storage format XML, embed into templates, and extract back.",
+}
+
+func Execute() {
+ if err := rootCmd.Execute(); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ os.Exit(1)
+ }
+}
+
+func init() {
+ rootCmd.PersistentFlags().StringVar(&confluenceToken, "token", "", "Confluence Personal Access Token (or set CONFLUENCE_TOKEN)")
+}
+
+// resolveToken returns the token from flag or environment variable.
+func resolveToken() string {
+ if confluenceToken != "" {
+ return confluenceToken
+ }
+ return os.Getenv("CONFLUENCE_TOKEN")
+}
A => confluence/elements.go +50 -0
@@ 1,50 @@
+package confluence
+
+// Confluence storage format macro helpers.
+
+func CodeMacro(language string, body string) string {
+ var lang string
+ if language != "" {
+ lang = `<ac:parameter ac:name="language">` + language + `</ac:parameter>`
+ }
+ return `<ac:structured-macro ac:name="code" ac:schema-version="1">` +
+ lang +
+ `<ac:plain-text-body><![CDATA[` + escapeCDATA(body) + `]]></ac:plain-text-body>` +
+ `</ac:structured-macro>`
+}
+
+func InfoPanel(body string) string {
+ return `<ac:structured-macro ac:name="info" ac:schema-version="1">` +
+ `<ac:rich-text-body>` + body + `</ac:rich-text-body>` +
+ `</ac:structured-macro>`
+}
+
+func NotePanel(body string) string {
+ return `<ac:structured-macro ac:name="note" ac:schema-version="1">` +
+ `<ac:rich-text-body>` + body + `</ac:rich-text-body>` +
+ `</ac:structured-macro>`
+}
+
+func WarningPanel(body string) string {
+ return `<ac:structured-macro ac:name="warning" ac:schema-version="1">` +
+ `<ac:rich-text-body>` + body + `</ac:rich-text-body>` +
+ `</ac:structured-macro>`
+}
+
+func ImageExternal(url string) string {
+ return `<ac:image><ri:url ri:value="` + url + `"/></ac:image>`
+}
+
+// escapeCDATA splits ]]> sequences so they don't break CDATA sections.
+func escapeCDATA(s string) string {
+ result := make([]byte, 0, len(s))
+ for i := 0; i < len(s); i++ {
+ if i+2 < len(s) && s[i] == ']' && s[i+1] == ']' && s[i+2] == '>' {
+ result = append(result, ']', ']', '>', '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')
+ i += 2
+ } else {
+ result = append(result, s[i])
+ }
+ }
+ return string(result)
+}
A => confluence/renderer.go +477 -0
@@ 1,477 @@
+package confluence
+
+import (
+ "bytes"
+ "fmt"
+ "html"
+ "strings"
+
+ "github.com/yuin/goldmark/ast"
+ east "github.com/yuin/goldmark/extension/ast"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/util"
+)
+
+// Renderer renders goldmark AST nodes into Confluence storage format XML.
+type Renderer struct {
+ taskIDCounter int
+ inTaskBody bool
+ inlineCommentDepth int
+}
+
+// NewRenderer creates a new Confluence storage format renderer.
+func NewRenderer() renderer.NodeRenderer {
+ return &Renderer{}
+}
+
+func (r *Renderer) nextTaskID() int {
+ r.taskIDCounter++
+ return r.taskIDCounter
+}
+
+// RegisterFuncs implements renderer.NodeRenderer.
+func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
+ // Block nodes
+ reg.Register(ast.KindDocument, r.renderDocument)
+ reg.Register(ast.KindHeading, r.renderHeading)
+ reg.Register(ast.KindParagraph, r.renderParagraph)
+ reg.Register(ast.KindTextBlock, r.renderTextBlock)
+ reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
+ reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
+ reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
+ reg.Register(ast.KindBlockquote, r.renderBlockquote)
+ reg.Register(ast.KindList, r.renderList)
+ reg.Register(ast.KindListItem, r.renderListItem)
+ reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
+
+ // Inline nodes
+ reg.Register(ast.KindText, r.renderText)
+ reg.Register(ast.KindString, r.renderString)
+ reg.Register(ast.KindEmphasis, r.renderEmphasis)
+ reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
+ reg.Register(ast.KindLink, r.renderLink)
+ reg.Register(ast.KindAutoLink, r.renderAutoLink)
+ reg.Register(ast.KindImage, r.renderImage)
+ reg.Register(ast.KindRawHTML, r.renderRawHTML)
+
+ // GFM extensions
+ reg.Register(east.KindTable, r.renderTable)
+ reg.Register(east.KindTableHeader, r.renderTableHeader)
+ // Note: goldmark GFM has no KindTableBody
+ reg.Register(east.KindTableRow, r.renderTableRow)
+ reg.Register(east.KindTableCell, r.renderTableCell)
+ reg.Register(east.KindStrikethrough, r.renderStrikethrough)
+ reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox)
+}
+
+func (r *Renderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*ast.Heading)
+ tag := fmt.Sprintf("h%d", n.Level)
+ if entering {
+ fmt.Fprintf(w, "<%s>", tag)
+ } else {
+ fmt.Fprintf(w, "</%s>\n", tag)
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ // Inside task items, don't wrap in <p> - the task-body handles it
+ if r.inTaskBody {
+ if !entering {
+ w.WriteString("</ac:task-body>\n")
+ r.inTaskBody = false
+ }
+ return ast.WalkContinue, nil
+ }
+ if entering {
+ w.WriteString("<p>")
+ } else {
+ w.WriteString("</p>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ if r.inTaskBody {
+ w.WriteString("</ac:task-body>\n")
+ r.inTaskBody = false
+ } else {
+ w.WriteString("\n")
+ }
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.FencedCodeBlock)
+ language := ""
+ if n.Info != nil {
+ lang := n.Info.Segment.Value(source)
+ // Take first word only (e.g., "go title=foo" -> "go")
+ if idx := bytes.IndexByte(lang, ' '); idx > 0 {
+ lang = lang[:idx]
+ }
+ language = string(lang)
+ }
+
+ var buf bytes.Buffer
+ for i := 0; i < n.Lines().Len(); i++ {
+ line := n.Lines().At(i)
+ buf.Write(line.Value(source))
+ }
+ // Remove trailing newline from code content
+ code := strings.TrimRight(buf.String(), "\n")
+
+ w.WriteString(CodeMacro(language, code))
+ w.WriteString("\n")
+ return ast.WalkSkipChildren, nil
+}
+
+func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.CodeBlock)
+ var buf bytes.Buffer
+ for i := 0; i < n.Lines().Len(); i++ {
+ line := n.Lines().At(i)
+ buf.Write(line.Value(source))
+ }
+ code := strings.TrimRight(buf.String(), "\n")
+ w.WriteString(CodeMacro("", code))
+ w.WriteString("\n")
+ return ast.WalkSkipChildren, nil
+}
+
+func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<hr/>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString(`<ac:structured-macro ac:name="info" ac:schema-version="1"><ac:rich-text-body>`)
+ } else {
+ w.WriteString(`</ac:rich-text-body></ac:structured-macro>` + "\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*ast.List)
+ // Check if this is a task list (first item has a TaskCheckBox child)
+ if isTaskList(n) {
+ if entering {
+ w.WriteString("<ac:task-list>\n")
+ } else {
+ w.WriteString("</ac:task-list>\n")
+ }
+ return ast.WalkContinue, nil
+ }
+ tag := "ul"
+ if n.IsOrdered() {
+ tag = "ol"
+ }
+ if entering {
+ fmt.Fprintf(w, "<%s>\n", tag)
+ } else {
+ fmt.Fprintf(w, "</%s>\n", tag)
+ }
+ return ast.WalkContinue, nil
+}
+
+// isTaskList checks if a list node contains task checkbox items.
+func isTaskList(n *ast.List) bool {
+ for child := n.FirstChild(); child != nil; child = child.NextSibling() {
+ if li, ok := child.(*ast.ListItem); ok {
+ for c := li.FirstChild(); c != nil; c = c.NextSibling() {
+ if hasCheckbox(c) {
+ return true
+ }
+ }
+ }
+ }
+ return false
+}
+
+func hasCheckbox(n ast.Node) bool {
+ for c := n.FirstChild(); c != nil; c = c.NextSibling() {
+ if c.Kind() == east.KindTaskCheckBox {
+ return true
+ }
+ }
+ return false
+}
+
+func (r *Renderer) renderListItem(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ // Check if parent is a task list
+ if parent, ok := node.Parent().(*ast.List); ok && isTaskList(parent) {
+ if entering {
+ w.WriteString("<ac:task>\n")
+ fmt.Fprintf(w, "<ac:task-id>%d</ac:task-id>\n", r.nextTaskID())
+ } else {
+ w.WriteString("</ac:task>\n")
+ }
+ return ast.WalkContinue, nil
+ }
+ if entering {
+ w.WriteString("<li>")
+ } else {
+ w.WriteString("</li>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.HTMLBlock)
+ for i := 0; i < n.Lines().Len(); i++ {
+ line := n.Lines().At(i)
+ raw := strings.TrimRight(string(line.Value(source)), "\n")
+ w.WriteString(r.convertRawSpan(raw))
+ w.WriteString("\n")
+ }
+ return ast.WalkSkipChildren, nil
+}
+
+func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.Text)
+ w.WriteString(html.EscapeString(string(n.Segment.Value(source))))
+ if n.HardLineBreak() {
+ w.WriteString("<br/>")
+ } else if n.SoftLineBreak() {
+ w.WriteString("\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.String)
+ w.Write(n.Value)
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*ast.Emphasis)
+ tag := "em"
+ if n.Level == 2 {
+ tag = "strong"
+ }
+ if entering {
+ fmt.Fprintf(w, "<%s>", tag)
+ } else {
+ fmt.Fprintf(w, "</%s>", tag)
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<code>")
+ } else {
+ w.WriteString("</code>")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*ast.Link)
+ if entering {
+ fmt.Fprintf(w, `<a href="%s">`, html.EscapeString(string(n.Destination)))
+ } else {
+ w.WriteString("</a>")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*ast.AutoLink)
+ if entering {
+ url := string(n.URL(source))
+ fmt.Fprintf(w, `<a href="%s">%s</a>`, html.EscapeString(url), html.EscapeString(url))
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.Image)
+ url := string(n.Destination)
+ alt := nodeText(n, source)
+ if alt != "" {
+ fmt.Fprintf(w, `<ac:image ac:alt="%s"><ri:url ri:value="%s"/></ac:image>`,
+ html.EscapeString(alt), html.EscapeString(url))
+ } else {
+ fmt.Fprintf(w, `<ac:image><ri:url ri:value="%s"/></ac:image>`, html.EscapeString(url))
+ }
+ return ast.WalkSkipChildren, nil
+}
+
+func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*ast.RawHTML)
+ for i := 0; i < n.Segments.Len(); i++ {
+ seg := n.Segments.At(i)
+ raw := string(seg.Value(source))
+ w.WriteString(r.convertRawSpan(raw))
+ }
+ return ast.WalkContinue, nil
+}
+
+// convertRawSpan converts round-trip spans back to Confluence XML.
+func (r *Renderer) convertRawSpan(raw string) string {
+ if !strings.HasPrefix(raw, "<span ") {
+ // Handle closing tag — convert back if we're inside an inline comment
+ if raw == "</span>" && r.inlineCommentDepth > 0 {
+ r.inlineCommentDepth--
+ return "</ac:inline-comment-marker>"
+ }
+ return raw
+ }
+
+ // <span data-inline-comment="ref">
+ if strings.Contains(raw, "data-inline-comment=") {
+ ref := extractAttrValue(raw, "data-inline-comment")
+ if ref != "" {
+ r.inlineCommentDepth++
+ return `<ac:inline-comment-marker ac:ref="` + ref + `">`
+ }
+ }
+
+ // <span data-user-key="key"/>
+ if strings.Contains(raw, "data-user-key=") {
+ key := extractAttrValue(raw, "data-user-key")
+ if key != "" {
+ return `<ac:link><ri:user ri:userkey="` + key + `"/></ac:link>`
+ }
+ }
+
+ // <span data-attachment="filename"/>
+ if strings.Contains(raw, "data-attachment=") {
+ filename := extractAttrValue(raw, "data-attachment")
+ if filename != "" {
+ alt := extractAttrValue(raw, "data-alt")
+ if alt != "" {
+ return `<ac:image ac:alt="` + alt + `"><ri:attachment ri:filename="` + filename + `"/></ac:image>`
+ }
+ return `<ac:image><ri:attachment ri:filename="` + filename + `"/></ac:image>`
+ }
+ }
+
+ return raw
+}
+
+func extractAttrValue(tag, attr string) string {
+ key := attr + `="`
+ idx := strings.Index(tag, key)
+ if idx == -1 {
+ return ""
+ }
+ start := idx + len(key)
+ end := strings.Index(tag[start:], `"`)
+ if end == -1 {
+ return ""
+ }
+ return tag[start : start+end]
+}
+
+// GFM Table support
+
+func (r *Renderer) renderTable(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<table>\n<tbody>\n")
+ } else {
+ w.WriteString("</tbody>\n</table>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderTableHeader(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<tr>\n")
+ } else {
+ w.WriteString("</tr>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderTableRow(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<tr>\n")
+ } else {
+ w.WriteString("</tr>\n")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderTableCell(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ n := node.(*east.TableCell)
+ tag := "td"
+ if n.Parent().Kind() == east.KindTableHeader {
+ tag = "th"
+ }
+ if entering {
+ fmt.Fprintf(w, "<%s><p>", tag)
+ } else {
+ fmt.Fprintf(w, "</p></%s>\n", tag)
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderStrikethrough(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if entering {
+ w.WriteString("<del>")
+ } else {
+ w.WriteString("</del>")
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *Renderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+ n := node.(*east.TaskCheckBox)
+ if n.IsChecked {
+ w.WriteString("<ac:task-status>complete</ac:task-status>\n")
+ } else {
+ w.WriteString("<ac:task-status>incomplete</ac:task-status>\n")
+ }
+ w.WriteString("<ac:task-body>")
+ r.inTaskBody = true
+ return ast.WalkContinue, nil
+}
+
+// nodeText extracts plain text from a node tree.
+func nodeText(n ast.Node, source []byte) string {
+ var buf bytes.Buffer
+ for c := n.FirstChild(); c != nil; c = c.NextSibling() {
+ if t, ok := c.(*ast.Text); ok {
+ buf.Write(t.Segment.Value(source))
+ }
+ }
+ return buf.String()
+}
A => converter/md2xml.go +35 -0
@@ 1,35 @@
+package converter
+
+import (
+ "bytes"
+
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/util"
+
+ "sourcecraft.dev/bigbes/markdown-to-confluence-xml/confluence"
+)
+
+// MarkdownToConfluence converts Markdown source to Confluence storage format XML.
+func MarkdownToConfluence(source []byte) (string, error) {
+ md := goldmark.New(
+ goldmark.WithExtensions(
+ extension.GFM,
+ extension.TaskList,
+ ),
+ goldmark.WithRenderer(
+ renderer.NewRenderer(
+ renderer.WithNodeRenderers(
+ util.Prioritized(confluence.NewRenderer(), 100),
+ ),
+ ),
+ ),
+ )
+
+ var buf bytes.Buffer
+ if err := md.Convert(source, &buf); err != nil {
+ return "", err
+ }
+ return buf.String(), nil
+}
A => converter/md2xml_test.go +436 -0
@@ 1,436 @@
+package converter
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// === Markdown to Confluence XML tests ===
+
+func TestMarkdownToConfluence_Heading(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("# Hello World"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<h1>Hello World</h1>")
+}
+
+func TestMarkdownToConfluence_AllHeadingLevels(t *testing.T) {
+ for i, tag := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
+ prefix := strings.Repeat("#", i+1)
+ result, err := MarkdownToConfluence([]byte(prefix + " Heading"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<"+tag+">Heading</"+tag+">", "level %d", i+1)
+ }
+}
+
+func TestMarkdownToConfluence_Bold(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("**bold text**"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<strong>bold text</strong>")
+}
+
+func TestMarkdownToConfluence_Italic(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("*italic text*"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<em>italic text</em>")
+}
+
+func TestMarkdownToConfluence_Strikethrough(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("~~deleted~~"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<del>deleted</del>")
+}
+
+func TestMarkdownToConfluence_InlineCode(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("`code`"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<code>code</code>")
+}
+
+func TestMarkdownToConfluence_CodeBlock(t *testing.T) {
+ input := "```go\nfmt.Println(\"hello\")\n```"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `ac:name="code"`)
+ assert.Contains(t, result, `ac:name="language">go`)
+ assert.Contains(t, result, `<![CDATA[fmt.Println("hello")]]>`)
+}
+
+func TestMarkdownToConfluence_CodeBlockNoLanguage(t *testing.T) {
+ input := "```\nsome code\n```"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `ac:name="code"`)
+ assert.NotContains(t, result, `ac:name="language"`)
+}
+
+func TestMarkdownToConfluence_Table(t *testing.T) {
+ input := "| A | B |\n|---|---|\n| 1 | 2 |"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<table>")
+ assert.Contains(t, result, "<th><p>A</p></th>")
+ assert.Contains(t, result, "<td><p>1</p></td>")
+}
+
+func TestMarkdownToConfluence_Blockquote(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("> Important note"))
+ require.NoError(t, err)
+ assert.Contains(t, result, `ac:name="info"`)
+ assert.Contains(t, result, "Important note")
+}
+
+func TestMarkdownToConfluence_Image(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte(""))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<ac:image")
+ assert.Contains(t, result, `ri:value="https://example.com/img.png"`)
+ assert.Contains(t, result, `ac:alt="alt text"`)
+}
+
+func TestMarkdownToConfluence_Link(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("[Click here](https://example.com)"))
+ require.NoError(t, err)
+ assert.Contains(t, result, `<a href="https://example.com">Click here</a>`)
+}
+
+func TestMarkdownToConfluence_UnorderedList(t *testing.T) {
+ input := "- One\n- Two\n- Three"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<ul>")
+ assert.Contains(t, result, "<li>")
+}
+
+func TestMarkdownToConfluence_OrderedList(t *testing.T) {
+ input := "1. First\n2. Second\n3. Third"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<ol>")
+}
+
+func TestMarkdownToConfluence_NestedList(t *testing.T) {
+ input := "- Parent\n - Child"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Equal(t, 2, strings.Count(result, "<ul>"), "expected 2 ul tags for nested list")
+}
+
+func TestMarkdownToConfluence_HorizontalRule(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("---"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<hr/>")
+}
+
+func TestMarkdownToConfluence_TaskList(t *testing.T) {
+ input := "- [x] Done\n- [ ] Todo"
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<ac:task-list>")
+ assert.Contains(t, result, "<ac:task>")
+ assert.Contains(t, result, "<ac:task-id>")
+ assert.Contains(t, result, "<ac:task-status>complete</ac:task-status>")
+ assert.Contains(t, result, "<ac:task-status>incomplete</ac:task-status>")
+ assert.Contains(t, result, "<ac:task-body>")
+ assert.Contains(t, result, "</ac:task-body>")
+}
+
+func TestMarkdownToConfluence_Paragraph(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("Hello world"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<p>Hello world</p>")
+}
+
+func TestMarkdownToConfluence_HardLineBreak(t *testing.T) {
+ result, err := MarkdownToConfluence([]byte("Line one \nLine two"))
+ require.NoError(t, err)
+ assert.Contains(t, result, "<br/>")
+}
+
+func TestMarkdownToConfluence_InlineCommentMarker(t *testing.T) {
+ input := `Before <span data-inline-comment="abc-123">commented text</span> after`
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `<ac:inline-comment-marker ac:ref="abc-123">`)
+ assert.Contains(t, result, "</ac:inline-comment-marker>")
+ assert.Contains(t, result, "commented text")
+}
+
+// === Confluence XML to Markdown tests ===
+
+func TestConfluenceToMarkdown_Heading(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<h1>Hello World</h1>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "# Hello World")
+}
+
+func TestConfluenceToMarkdown_Bold(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<p><strong>bold</strong></p>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "**bold**")
+}
+
+func TestConfluenceToMarkdown_Italic(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<p><em>italic</em></p>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "*italic*")
+}
+
+func TestConfluenceToMarkdown_Strikethrough(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<p><del>deleted</del></p>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "~~deleted~~")
+}
+
+func TestConfluenceToMarkdown_InlineCode(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<p><code>code</code></p>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "`code`")
+}
+
+func TestConfluenceToMarkdown_CodeBlock(t *testing.T) {
+ input := `<ac:structured-macro ac:name="code" ac:schema-version="1"><ac:parameter ac:name="language">python</ac:parameter><ac:plain-text-body><![CDATA[print("hello")]]></ac:plain-text-body></ac:structured-macro>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, "```python")
+ assert.Contains(t, result, `print("hello")`)
+}
+
+func TestConfluenceToMarkdown_Link(t *testing.T) {
+ result, err := ConfluenceToMarkdown(`<p><a href="https://example.com">Click</a></p>`)
+ require.NoError(t, err)
+ assert.Contains(t, result, "[Click](https://example.com)")
+}
+
+func TestConfluenceToMarkdown_UnorderedList(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<ul>\n<li>One</li>\n<li>Two</li>\n</ul>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "- One")
+ assert.Contains(t, result, "- Two")
+}
+
+func TestConfluenceToMarkdown_OrderedList(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<ol>\n<li>First</li>\n<li>Second</li>\n</ol>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "1. First")
+ assert.Contains(t, result, "2. Second")
+}
+
+func TestConfluenceToMarkdown_Table(t *testing.T) {
+ input := `<table><tbody><tr><th><p>Name</p></th><th><p>Age</p></th></tr><tr><td><p>Alice</p></td><td><p>30</p></td></tr></tbody></table>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, "| Name |")
+ assert.Contains(t, result, "| Alice |")
+ assert.Contains(t, result, "|---|---|")
+}
+
+func TestConfluenceToMarkdown_InfoPanel(t *testing.T) {
+ input := `<ac:structured-macro ac:name="info" ac:schema-version="1"><ac:rich-text-body><p>Important note</p></ac:rich-text-body></ac:structured-macro>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, "> Important note")
+}
+
+func TestConfluenceToMarkdown_HorizontalRule(t *testing.T) {
+ result, err := ConfluenceToMarkdown("<hr/>")
+ require.NoError(t, err)
+ assert.Contains(t, result, "---")
+}
+
+func TestConfluenceToMarkdown_Emoticons(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected string
+ }{
+ {"plus", `<ac:emoticon ac:name="plus"/>`, "(+)"},
+ {"minus", `<ac:emoticon ac:name="minus"/>`, "(-)"},
+ {"question", `<ac:emoticon ac:name="question"/>`, "(?)"},
+ {"tick", `<ac:emoticon ac:name="tick"/>`, "(v)"},
+ {"cross", `<ac:emoticon ac:name="cross"/>`, "(x)"},
+ }
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ result, err := ConfluenceToMarkdown(tc.input)
+ require.NoError(t, err)
+ assert.Contains(t, result, tc.expected)
+ })
+ }
+}
+
+func TestConfluenceToMarkdown_InlineCommentMarker(t *testing.T) {
+ input := `<p>Before <ac:inline-comment-marker ac:ref="abc-123">commented text</ac:inline-comment-marker> after</p>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, `<span data-inline-comment="abc-123">`)
+ assert.Contains(t, result, "commented text")
+ assert.Contains(t, result, "</span>")
+}
+
+// === User reference tests ===
+
+func TestConfluenceToMarkdown_UserReference(t *testing.T) {
+ input := `<p>Author: <ac:link><ri:user ri:userkey="3cddbcec40cb91700140cb9345ed0b5c"/></ac:link></p>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, `<span data-user-key="3cddbcec40cb91700140cb9345ed0b5c"/>`)
+}
+
+func TestMarkdownToConfluence_UserReference(t *testing.T) {
+ input := `Author: <span data-user-key="3cddbcec40cb91700140cb9345ed0b5c"/>`
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `<ac:link><ri:user ri:userkey="3cddbcec40cb91700140cb9345ed0b5c"/></ac:link>`)
+}
+
+func TestRoundTrip_UserReference(t *testing.T) {
+ xmlInput := `<p>Author: <ac:link><ri:user ri:userkey="abc123"/></ac:link> wrote this</p>`
+ md, err := ConfluenceToMarkdown(xmlInput)
+ require.NoError(t, err)
+ require.Contains(t, md, `data-user-key="abc123"`)
+
+ xmlOutput, err := MarkdownToConfluence([]byte(md))
+ require.NoError(t, err)
+ assert.Contains(t, xmlOutput, `ri:userkey="abc123"`)
+}
+
+// === Attachment image tests ===
+
+func TestConfluenceToMarkdown_AttachmentImage(t *testing.T) {
+ input := `<p><ac:image><ri:attachment ri:filename="screenshot.png"/></ac:image></p>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, `<span data-attachment="screenshot.png"/>`)
+}
+
+func TestConfluenceToMarkdown_AttachmentImageWithAlt(t *testing.T) {
+ input := `<p><ac:image ac:alt="My Screenshot"><ri:attachment ri:filename="screenshot.png"/></ac:image></p>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, `data-attachment="screenshot.png"`)
+ assert.Contains(t, result, `data-alt="My Screenshot"`)
+}
+
+func TestMarkdownToConfluence_AttachmentImage(t *testing.T) {
+ input := `Image: <span data-attachment="screenshot.png"/>`
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `<ac:image><ri:attachment ri:filename="screenshot.png"/></ac:image>`)
+}
+
+func TestMarkdownToConfluence_AttachmentImageWithAlt(t *testing.T) {
+ input := `<span data-attachment="screenshot.png" data-alt="My Shot"/>`
+ result, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+ assert.Contains(t, result, `ac:alt="My Shot"`)
+ assert.Contains(t, result, `ri:filename="screenshot.png"`)
+}
+
+func TestRoundTrip_AttachmentImage(t *testing.T) {
+ xmlInput := `<p><ac:image><ri:attachment ri:filename="Pasted image 20260325004147.png"/></ac:image></p>`
+ md, err := ConfluenceToMarkdown(xmlInput)
+ require.NoError(t, err)
+ require.Contains(t, md, `data-attachment="Pasted image 20260325004147.png"`)
+
+ xmlOutput, err := MarkdownToConfluence([]byte(md))
+ require.NoError(t, err)
+ assert.Contains(t, xmlOutput, `ri:filename="Pasted image 20260325004147.png"`)
+}
+
+// === Inline spacing tests ===
+
+func TestConfluenceToMarkdown_SpaceAfterBold(t *testing.T) {
+ input := `<ul><li><strong>ATE:</strong> Запуск плейбуков</li></ul>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, "**ATE:** Запуск плейбуков")
+}
+
+func TestConfluenceToMarkdown_SpaceBetweenInlineElements(t *testing.T) {
+ input := `<ul><li><strong>Bold</strong> then <em>italic</em> text</li></ul>`
+ result, err := ConfluenceToMarkdown(input)
+ require.NoError(t, err)
+ assert.Contains(t, result, "**Bold** then *italic* text")
+}
+
+// === Round-trip tests ===
+
+func TestRoundTrip_Basic(t *testing.T) {
+ input := `# Test
+
+Some **bold** and *italic* text.
+
+## Code
+
+` + "```go\nfunc main() {}\n```" + `
+
+## List
+
+- One
+- Two
+- Three
+
+| H1 | H2 |
+|----|-----|
+| A | B |
+`
+
+ xml, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+
+ md, err := ConfluenceToMarkdown(xml)
+ require.NoError(t, err)
+
+ for _, check := range []string{"# Test", "**bold**", "*italic*", "```go", "func main() {}", "- One", "| H1", "| A"} {
+ assert.Contains(t, md, check, "round-trip should preserve %q", check)
+ }
+}
+
+func TestRoundTrip_Blockquote(t *testing.T) {
+ input := "> This is a blockquote\n"
+ xml, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+
+ md, err := ConfluenceToMarkdown(xml)
+ require.NoError(t, err)
+ assert.Contains(t, md, "> This is a blockquote")
+}
+
+func TestRoundTrip_Link(t *testing.T) {
+ input := "[Example](https://example.com)\n"
+ xml, err := MarkdownToConfluence([]byte(input))
+ require.NoError(t, err)
+
+ md, err := ConfluenceToMarkdown(xml)
+ require.NoError(t, err)
+ assert.Contains(t, md, "[Example](https://example.com)")
+}
+
+func TestRoundTrip_InlineCommentMarker(t *testing.T) {
+ xmlInput := `<p>Hello <ac:inline-comment-marker ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca">important text</ac:inline-comment-marker> world</p>`
+
+ md, err := ConfluenceToMarkdown(xmlInput)
+ require.NoError(t, err)
+ require.Contains(t, md, `data-inline-comment="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca"`)
+
+ xmlOutput, err := MarkdownToConfluence([]byte(md))
+ require.NoError(t, err)
+ assert.Contains(t, xmlOutput, `<ac:inline-comment-marker ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca">`)
+ assert.Contains(t, xmlOutput, "</ac:inline-comment-marker>")
+ assert.Contains(t, xmlOutput, "important text")
+}
+
+func TestRoundTrip_InlineCommentFromRealXML(t *testing.T) {
+ xmlInput := `<p>Товарищ! <ac:inline-comment-marker ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca">Не майся дурью, копируй этот шаблон и редактируй его!</ac:inline-comment-marker> Ускоришь написание RFC!</p>`
+
+ md, err := ConfluenceToMarkdown(xmlInput)
+ require.NoError(t, err)
+
+ xmlOutput, err := MarkdownToConfluence([]byte(md))
+ require.NoError(t, err)
+ assert.Contains(t, xmlOutput, `ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca"`)
+ assert.Contains(t, xmlOutput, "Не майся дурью")
+}
A => converter/xml2md.go +746 -0
@@ 1,746 @@
+package converter
+
+import (
+ "bytes"
+ "fmt"
+ htmlpkg "html"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+// ConfluenceToMarkdown converts Confluence storage format XML to Markdown.
+func ConfluenceToMarkdown(source string) (string, error) {
+ // Preprocess: extract CDATA content and replace with escaped text,
+ // because x/net/html doesn't handle CDATA sections.
+ preprocessed := preprocessCDATA(source)
+
+ // Wrap in a root element so the HTML parser handles it correctly.
+ wrapped := "<div>" + preprocessed + "</div>"
+ doc, err := html.Parse(strings.NewReader(wrapped))
+ if err != nil {
+ return "", fmt.Errorf("parsing confluence xml: %w", err)
+ }
+
+ var buf bytes.Buffer
+ c := &xmlConverter{buf: &buf}
+
+ // Navigate to the wrapper div: html > head > body > div
+ body := findNode(doc, "body")
+ if body == nil {
+ return "", fmt.Errorf("unexpected parse structure")
+ }
+ wrapper := body.FirstChild
+ if wrapper != nil {
+ c.walkChildren(wrapper, 0)
+ }
+
+ result := buf.String()
+ // Clean up excessive blank lines
+ for strings.Contains(result, "\n\n\n") {
+ result = strings.ReplaceAll(result, "\n\n\n", "\n\n")
+ }
+ return strings.TrimSpace(result) + "\n", nil
+}
+
+// preprocessCDATA replaces <![CDATA[...]]> with the content as a data attribute
+// on the parent element, since x/net/html doesn't parse CDATA.
+func preprocessCDATA(s string) string {
+ var result strings.Builder
+ for {
+ idx := strings.Index(s, "<![CDATA[")
+ if idx == -1 {
+ result.WriteString(s)
+ break
+ }
+ result.WriteString(s[:idx])
+ s = s[idx+len("<![CDATA["):]
+ endIdx := strings.Index(s, "]]>")
+ if endIdx == -1 {
+ result.WriteString(s)
+ break
+ }
+ // Write CDATA content as a special element that we can detect
+ content := s[:endIdx]
+ result.WriteString("<cdatacontent>")
+ result.WriteString(htmlpkg.EscapeString(content))
+ result.WriteString("</cdatacontent>")
+ s = s[endIdx+len("]]>"):]
+ }
+ return result.String()
+}
+
+type xmlConverter struct {
+ buf *bytes.Buffer
+ listDepth int
+ inListItem bool
+}
+
+func (c *xmlConverter) walkChildren(n *html.Node, depth int) {
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ c.walk(child, depth)
+ }
+}
+
+func (c *xmlConverter) walk(n *html.Node, depth int) {
+ if n.Type == html.TextNode {
+ text := n.Data
+ // Skip whitespace-only text nodes inside lists
+ if c.listDepth > 0 && strings.TrimSpace(text) == "" {
+ return
+ }
+ // Collapse whitespace in text nodes (XML indentation artifacts)
+ if strings.TrimSpace(text) != "" {
+ // Replace sequences of whitespace (including newlines) with single space,
+ // but preserve the trimmed content
+ text = collapseWhitespace(text)
+ }
+ c.buf.WriteString(text)
+ return
+ }
+
+ if n.Type != html.ElementNode {
+ c.walkChildren(n, depth)
+ return
+ }
+
+ tag := strings.ToLower(n.Data)
+
+ switch {
+ // Headings
+ case tag == "h1":
+ c.buf.WriteString("\n# ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+ case tag == "h2":
+ c.buf.WriteString("\n## ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+ case tag == "h3":
+ c.buf.WriteString("\n### ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+ case tag == "h4":
+ c.buf.WriteString("\n#### ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+ case tag == "h5":
+ c.buf.WriteString("\n##### ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+ case tag == "h6":
+ c.buf.WriteString("\n###### ")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n\n")
+
+ // Paragraphs
+ case tag == "p":
+ c.walkChildren(n, depth)
+ if !c.inListItem {
+ c.buf.WriteString("\n\n")
+ }
+
+ // Inline formatting
+ case tag == "strong", tag == "b":
+ c.buf.WriteString("**")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("**")
+ case tag == "em", tag == "i":
+ c.buf.WriteString("*")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("*")
+ case tag == "del", tag == "s":
+ c.buf.WriteString("~~")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("~~")
+ case tag == "code":
+ c.buf.WriteString("`")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("`")
+
+ // Links
+ case tag == "a":
+ href := getAttr(n, "href")
+ c.buf.WriteString("[")
+ c.walkChildren(n, depth)
+ c.buf.WriteString("](")
+ c.buf.WriteString(href)
+ c.buf.WriteString(")")
+
+ // Line break
+ case tag == "br":
+ c.buf.WriteString(" \n")
+
+ // Horizontal rule
+ case tag == "hr":
+ c.buf.WriteString("\n---\n\n")
+
+ // Lists
+ case tag == "ul":
+ c.listDepth++
+ if c.listDepth == 1 {
+ c.buf.WriteString("\n")
+ }
+ c.walkChildren(n, depth)
+ c.listDepth--
+ if c.listDepth == 0 {
+ c.buf.WriteString("\n")
+ }
+ case tag == "ol":
+ c.listDepth++
+ if c.listDepth == 1 {
+ c.buf.WriteString("\n")
+ }
+ c.walkOL(n, depth)
+ c.listDepth--
+ if c.listDepth == 0 {
+ c.buf.WriteString("\n")
+ }
+ case tag == "li":
+ prev := c.inListItem
+ c.inListItem = true
+ // Check if this list item contains a task checkbox
+ if hasTaskStatus(n) {
+ // Task status handler will write the prefix, walkChildrenInline for text
+ c.walkChildrenInline(n, depth)
+ c.buf.WriteString("\n")
+ } else {
+ indent := strings.Repeat(" ", max(0, c.listDepth-1))
+ c.buf.WriteString(indent)
+ c.buf.WriteString("- ")
+ c.walkChildrenInline(n, depth)
+ c.buf.WriteString("\n")
+ }
+ c.inListItem = prev
+
+ // Tables - convert to GFM table
+ case tag == "table":
+ c.renderTable(n, depth)
+
+ // Confluence macros - handled via ac:* namespace (parsed as ac-*)
+ // The HTML parser lowercases and handles colons differently.
+ // We need to handle both ac:structured-macro and the parsed form.
+
+ // Skip layout/structural elements, pass through children
+ case tag == "div", tag == "span", tag == "tbody", tag == "thead",
+ tag == "colgroup", tag == "col", tag == "content-wrapper":
+ c.walkChildren(n, depth)
+
+ // Handle Confluence-specific elements
+ default:
+ c.handleConfluenceElement(n, tag, depth)
+ }
+}
+
+func (c *xmlConverter) handleConfluenceElement(n *html.Node, tag string, depth int) {
+ switch {
+ // Confluence structured macros (code blocks, panels, etc.)
+ case strings.Contains(tag, "structured-macro") || strings.Contains(tag, "ac:structured-macro"):
+ macroName := getAttr(n, "ac:name")
+ if macroName == "" {
+ macroName = getAttr(n, "name")
+ }
+ switch macroName {
+ case "code":
+ c.renderCodeMacro(n)
+ case "info":
+ c.renderPanelAsBlockquote(n, depth)
+ case "note":
+ c.renderPanelAsBlockquote(n, depth)
+ case "warning":
+ c.renderPanelAsBlockquote(n, depth)
+ case "toc":
+ // Skip TOC macros
+ default:
+ c.walkChildren(n, depth)
+ }
+
+ // Confluence images
+ case strings.Contains(tag, "image") || strings.Contains(tag, "ac:image"):
+ alt := getAttr(n, "ac:alt")
+ if alt == "" {
+ alt = getAttr(n, "alt")
+ }
+ imgRef := c.findImageRef(n)
+ if imgRef.isAttachment {
+ // Preserve attachment reference as round-trippable HTML
+ fmt.Fprintf(c.buf, `<span data-attachment="%s"`, imgRef.filename)
+ if alt != "" {
+ fmt.Fprintf(c.buf, ` data-alt="%s"`, alt)
+ }
+ c.buf.WriteString("/>")
+ } else {
+ c.buf.WriteString("
+ c.buf.WriteString(imgRef.url)
+ c.buf.WriteString(")")
+ }
+
+ // Confluence links (user mentions, page links)
+ case strings.Contains(tag, "ac:link"):
+ if c.hasUserChild(n) {
+ c.walkChildren(n, depth)
+ } else {
+ c.walkChildren(n, depth)
+ }
+
+ // Confluence emoticons
+ case strings.Contains(tag, "emoticon") || strings.Contains(tag, "ac:emoticon"):
+ name := getAttr(n, "ac:name")
+ if name == "" {
+ name = getAttr(n, "name")
+ }
+ switch name {
+ case "plus":
+ c.buf.WriteString("(+)")
+ case "minus":
+ c.buf.WriteString("(-)")
+ case "question":
+ c.buf.WriteString("(?)")
+ case "tick":
+ c.buf.WriteString("(v)")
+ case "cross":
+ c.buf.WriteString("(x)")
+ }
+
+ // Confluence task lists
+ case strings.Contains(tag, "task-list"):
+ c.listDepth++
+ c.walkChildren(n, depth)
+ c.listDepth--
+ case strings.Contains(tag, "task-body"):
+ c.walkChildren(n, depth)
+ c.buf.WriteString("\n")
+ case strings.Contains(tag, "task-status"):
+ status := strings.TrimSpace(getTextContent(n))
+ indent := strings.Repeat(" ", max(0, c.listDepth-1))
+ if status == "complete" {
+ c.buf.WriteString(indent + "- [x] ")
+ } else {
+ c.buf.WriteString(indent + "- [ ] ")
+ }
+ case strings.Contains(tag, "task-id"):
+ // Skip task IDs
+ case strings.Contains(tag, "task") && !strings.Contains(tag, "task-"):
+ c.walkChildren(n, depth)
+
+ // Confluence inline comment markers — preserve as span with data attribute
+ case strings.Contains(tag, "inline-comment-marker"):
+ ref := getAttr(n, "ac:ref")
+ if ref == "" {
+ ref = getAttr(n, "ref")
+ }
+ if ref != "" {
+ fmt.Fprintf(c.buf, `<span data-inline-comment="%s">`, ref)
+ c.walkChildren(n, depth)
+ c.buf.WriteString("</span>")
+ } else {
+ c.walkChildren(n, depth)
+ }
+
+ // User references — preserve as round-trippable HTML span
+ case strings.Contains(tag, "ri:user"):
+ userKey := getAttr(n, "ri:userkey")
+ if userKey == "" {
+ userKey = getAttr(n, "userkey")
+ }
+ if userKey != "" {
+ fmt.Fprintf(c.buf, `<span data-user-key="%s"/>`, userKey)
+ }
+
+ // Time elements
+ case tag == "time":
+ datetime := getAttr(n, "datetime")
+ if datetime != "" {
+ c.buf.WriteString(datetime)
+ }
+
+ // Fallback: just walk children
+ default:
+ c.walkChildren(n, depth)
+ }
+}
+
+func (c *xmlConverter) renderCodeMacro(n *html.Node) {
+ language := ""
+ code := ""
+
+ // Walk children to find parameters and body
+ var walkMacro func(*html.Node)
+ walkMacro = func(node *html.Node) {
+ if node.Type == html.ElementNode {
+ tag := strings.ToLower(node.Data)
+ if strings.Contains(tag, "parameter") || strings.Contains(tag, "ac:parameter") {
+ name := getAttr(node, "ac:name")
+ if name == "" {
+ name = getAttr(node, "name")
+ }
+ if name == "language" {
+ language = getTextContent(node)
+ }
+ }
+ if strings.Contains(tag, "plain-text-body") || strings.Contains(tag, "ac:plain-text-body") {
+ code = getCDATAContent(node)
+ }
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ walkMacro(child)
+ }
+ }
+ walkMacro(n)
+
+ c.buf.WriteString("\n```")
+ c.buf.WriteString(language)
+ c.buf.WriteString("\n")
+ c.buf.WriteString(code)
+ if !strings.HasSuffix(code, "\n") {
+ c.buf.WriteString("\n")
+ }
+ c.buf.WriteString("```\n\n")
+}
+
+func (c *xmlConverter) renderPanelAsBlockquote(n *html.Node, depth int) {
+ // Collect panel body content
+ var bodyBuf bytes.Buffer
+ origBuf := c.buf
+ c.buf = &bodyBuf
+
+ // Find rich-text-body and walk it
+ var findBody func(*html.Node)
+ findBody = func(node *html.Node) {
+ if node.Type == html.ElementNode {
+ tag := strings.ToLower(node.Data)
+ if strings.Contains(tag, "rich-text-body") {
+ c.walkChildren(node, depth)
+ return
+ }
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ findBody(child)
+ }
+ }
+ findBody(n)
+
+ c.buf = origBuf
+ text := strings.TrimSpace(bodyBuf.String())
+ lines := strings.Split(text, "\n")
+ for _, line := range lines {
+ c.buf.WriteString("> ")
+ c.buf.WriteString(line)
+ c.buf.WriteString("\n")
+ }
+ c.buf.WriteString("\n")
+}
+
+func (c *xmlConverter) renderTable(n *html.Node, depth int) {
+ rows := collectTableRows(n)
+ if len(rows) == 0 {
+ return
+ }
+
+ // Determine column count
+ cols := 0
+ for _, row := range rows {
+ if len(row.cells) > cols {
+ cols = len(row.cells)
+ }
+ }
+ if cols == 0 {
+ return
+ }
+
+ c.buf.WriteString("\n")
+
+ // If first row is a header
+ isFirstRowHeader := len(rows) > 0 && rows[0].isHeader
+ startIdx := 0
+
+ if isFirstRowHeader {
+ c.writeTableRow(rows[0].cells, cols)
+ c.writeTableSep(cols)
+ startIdx = 1
+ } else {
+ // Write empty header and separator
+ empty := make([]string, cols)
+ c.writeTableRow(empty, cols)
+ c.writeTableSep(cols)
+ }
+
+ for i := startIdx; i < len(rows); i++ {
+ c.writeTableRow(rows[i].cells, cols)
+ }
+ c.buf.WriteString("\n")
+}
+
+func (c *xmlConverter) writeTableRow(cells []string, cols int) {
+ c.buf.WriteString("|")
+ for i := range cols {
+ cell := ""
+ if i < len(cells) {
+ cell = cells[i]
+ }
+ c.buf.WriteString(" ")
+ c.buf.WriteString(cell)
+ c.buf.WriteString(" |")
+ }
+ c.buf.WriteString("\n")
+}
+
+func (c *xmlConverter) writeTableSep(cols int) {
+ c.buf.WriteString("|")
+ for range cols {
+ c.buf.WriteString("---|")
+ }
+ c.buf.WriteString("\n")
+}
+
+func (c *xmlConverter) walkOL(n *html.Node, depth int) {
+ idx := 1
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type != html.ElementNode {
+ continue
+ }
+ tag := strings.ToLower(child.Data)
+ if tag == "li" {
+ indent := strings.Repeat(" ", max(0, c.listDepth-1))
+ c.buf.WriteString(indent)
+ fmt.Fprintf(c.buf, "%d. ", idx)
+ c.walkChildrenInline(child, depth)
+ c.buf.WriteString("\n")
+ idx++
+ }
+ }
+}
+
+func (c *xmlConverter) walkChildrenInline(n *html.Node, depth int) {
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.TextNode {
+ // Collapse whitespace but preserve a single space between inline elements
+ text := collapseWhitespace(child.Data)
+ // Only trim leading space if this is the very first child
+ if child == n.FirstChild {
+ text = strings.TrimLeft(text, " ")
+ }
+ // Only trim trailing space if this is the very last child
+ if child.NextSibling == nil {
+ text = strings.TrimRight(text, " ")
+ }
+ if text != "" {
+ c.buf.WriteString(text)
+ }
+ continue
+ }
+ if child.Type == html.ElementNode {
+ tag := strings.ToLower(child.Data)
+ switch {
+ case tag == "p":
+ c.walkChildrenInline(child, depth)
+ case tag == "ul", tag == "ol":
+ c.buf.WriteString("\n")
+ c.walk(child, depth)
+ default:
+ c.walk(child, depth)
+ }
+ }
+ }
+}
+
+type tableRow struct {
+ isHeader bool
+ cells []string
+}
+
+func collectTableRows(table *html.Node) []tableRow {
+ var rows []tableRow
+ var walk func(*html.Node, bool)
+ walk = func(n *html.Node, inHeader bool) {
+ if n.Type == html.ElementNode {
+ tag := strings.ToLower(n.Data)
+ switch tag {
+ case "thead":
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ walk(child, true)
+ }
+ return
+ case "tbody":
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ walk(child, false)
+ }
+ return
+ case "tr":
+ row := tableRow{isHeader: inHeader}
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.ElementNode {
+ cellTag := strings.ToLower(child.Data)
+ if cellTag == "th" {
+ row.isHeader = true
+ row.cells = append(row.cells, strings.TrimSpace(getTextContent(child)))
+ } else if cellTag == "td" {
+ row.cells = append(row.cells, strings.TrimSpace(getTextContent(child)))
+ }
+ }
+ }
+ rows = append(rows, row)
+ return
+ }
+ }
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ walk(child, inHeader)
+ }
+ }
+ walk(table, false)
+ return rows
+}
+
+type imageRef struct {
+ url string
+ filename string
+ isAttachment bool
+}
+
+func (c *xmlConverter) findImageRef(n *html.Node) imageRef {
+ var ref imageRef
+ var walk func(*html.Node)
+ walk = func(node *html.Node) {
+ if node.Type == html.ElementNode {
+ tag := strings.ToLower(node.Data)
+ // <ri:url ri:value="..."/>
+ if strings.Contains(tag, "url") {
+ v := getAttr(node, "ri:value")
+ if v == "" {
+ v = getAttr(node, "value")
+ }
+ if v != "" {
+ ref.url = v
+ return
+ }
+ }
+ // <ri:attachment ri:filename="..."/>
+ if strings.Contains(tag, "attachment") {
+ f := getAttr(node, "ri:filename")
+ if f == "" {
+ f = getAttr(node, "filename")
+ }
+ if f != "" {
+ ref.filename = f
+ ref.isAttachment = true
+ return
+ }
+ }
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ walk(child)
+ }
+ }
+ walk(n)
+ return ref
+}
+
+func (c *xmlConverter) hasUserChild(n *html.Node) bool {
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.ElementNode {
+ tag := strings.ToLower(child.Data)
+ if strings.Contains(tag, "user") {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+// Helper functions
+
+func findNode(n *html.Node, tag string) *html.Node {
+ if n.Type == html.ElementNode && n.Data == tag {
+ return n
+ }
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if found := findNode(child, tag); found != nil {
+ return found
+ }
+ }
+ return nil
+}
+
+func getAttr(n *html.Node, key string) string {
+ for _, attr := range n.Attr {
+ attrKey := attr.Key
+ if attr.Namespace != "" {
+ attrKey = attr.Namespace + ":" + attr.Key
+ }
+ if attrKey == key {
+ return attr.Val
+ }
+ }
+ return ""
+}
+
+// collapseWhitespace replaces runs of whitespace with a single space,
+// preserving leading/trailing single space if original had whitespace there.
+func collapseWhitespace(s string) string {
+ var buf strings.Builder
+ inWS := false
+ for _, r := range s {
+ if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
+ if !inWS {
+ buf.WriteByte(' ')
+ inWS = true
+ }
+ } else {
+ buf.WriteRune(r)
+ inWS = false
+ }
+ }
+ return buf.String()
+}
+
+// hasTaskStatus checks if a node contains a task-status element.
+func hasTaskStatus(n *html.Node) bool {
+ for child := n.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == html.ElementNode {
+ tag := strings.ToLower(child.Data)
+ if strings.Contains(tag, "task-status") {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+// getCDATAContent retrieves content from preprocessed CDATA sections.
+// It looks for <cdatacontent> elements and unescapes their text.
+func getCDATAContent(n *html.Node) string {
+ var buf bytes.Buffer
+ var walk func(*html.Node)
+ walk = func(node *html.Node) {
+ if node.Type == html.ElementNode && node.Data == "cdatacontent" {
+ text := getTextContent(node)
+ buf.WriteString(htmlpkg.UnescapeString(text))
+ return
+ }
+ if node.Type == html.TextNode {
+ buf.WriteString(node.Data)
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ walk(child)
+ }
+ }
+ walk(n)
+ return buf.String()
+}
+
+func getTextContent(n *html.Node) string {
+ var buf bytes.Buffer
+ var walk func(*html.Node)
+ walk = func(node *html.Node) {
+ if node.Type == html.TextNode {
+ buf.WriteString(node.Data)
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ walk(child)
+ }
+ }
+ walk(n)
+ return buf.String()
+}
A => format/pretty.go +493 -0
@@ 1,493 @@
+package format
+
+import (
+ "strings"
+ "unicode/utf8"
+)
+
+const defaultMaxLineWidth = 120
+
+// Block elements get their own line and increase indentation for children.
+var blockTags = map[string]bool{
+ // Layout
+ "ac:layout": true,
+ "ac:layout-section": true,
+ "ac:layout-cell": true,
+ // Block content
+ "p": true,
+ "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
+ "div": true,
+ // Lists
+ "ul": true, "ol": true, "li": true,
+ // Tables
+ "table": true, "thead": true, "tbody": true, "colgroup": true,
+ "tr": true, "th": true, "td": true,
+ // Macros
+ "ac:structured-macro": true,
+ "ac:rich-text-body": true,
+ "ac:plain-text-body": true,
+ // Task lists
+ "ac:task-list": true,
+ "ac:task": true,
+ "ac:task-body": true,
+}
+
+// inlineableBlocks: block tags that prefer to stay on one line if short enough.
+var inlineableBlocks = map[string]bool{
+ "li": true, "th": true, "td": true,
+ "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
+ "ac:task-id": true, "ac:task-status": true,
+}
+
+// Pre elements: content inside is not reformatted.
+var preTags = map[string]bool{
+ "ac:plain-text-body": true,
+}
+
+// PrettyXML formats Confluence storage XML with sensible indentation.
+func PrettyXML(input string, indent string) string {
+ tokens := tokenize(input)
+ var buf strings.Builder
+ level := 0
+ inPre := 0
+ atLineStart := true
+
+ i := 0
+ for i < len(tokens) {
+ tok := tokens[i]
+
+ switch tok.kind {
+ case tokenOpen:
+ tagName := tok.tagName()
+ if inPre > 0 {
+ buf.WriteString(tok.raw)
+ if preTags[tagName] {
+ inPre++
+ }
+ i++
+ continue
+ }
+ if preTags[tagName] {
+ inPre++
+ ensureIndentedLine(&buf, level, indent, &atLineStart)
+ buf.WriteString(tok.raw)
+ i++
+ continue
+ }
+ if blockTags[tagName] {
+ // Try to inline short blocks like <li>text</li>, <h1>Title</h1>
+ if inlineableBlocks[tagName] {
+ if inlined, skip := tryInlineBlock(tokens[i:], tagName); skip > 0 {
+ ensureIndentedLine(&buf, level, indent, &atLineStart)
+ buf.WriteString(inlined)
+ buf.WriteString("\n")
+ atLineStart = true
+ i += skip
+ continue
+ }
+ }
+ ensureIndentedLine(&buf, level, indent, &atLineStart)
+ buf.WriteString(tok.raw)
+ buf.WriteString("\n")
+ level++
+ atLineStart = true
+ } else {
+ if atLineStart {
+ writeIndentPrefix(&buf, level, indent)
+ atLineStart = false
+ }
+ buf.WriteString(tok.raw)
+ }
+
+ case tokenClose:
+ tagName := tok.tagName()
+ if inPre > 0 {
+ buf.WriteString(tok.raw)
+ if preTags[tagName] {
+ inPre--
+ }
+ i++
+ continue
+ }
+ if blockTags[tagName] {
+ level--
+ if level < 0 {
+ level = 0
+ }
+ if !atLineStart {
+ buf.WriteString("\n")
+ }
+ writeIndentPrefix(&buf, level, indent)
+ buf.WriteString(tok.raw)
+ buf.WriteString("\n")
+ atLineStart = true
+ } else {
+ buf.WriteString(tok.raw)
+ }
+
+ case tokenSelfClose:
+ tagName := tok.tagName()
+ if inPre > 0 {
+ buf.WriteString(tok.raw)
+ i++
+ continue
+ }
+ if blockTags[tagName] || tagName == "hr" || tagName == "col" {
+ ensureIndentedLine(&buf, level, indent, &atLineStart)
+ buf.WriteString(tok.raw)
+ buf.WriteString("\n")
+ atLineStart = true
+ } else {
+ if atLineStart {
+ writeIndentPrefix(&buf, level, indent)
+ atLineStart = false
+ }
+ buf.WriteString(tok.raw)
+ }
+
+ case tokenText:
+ if inPre > 0 {
+ buf.WriteString(tok.raw)
+ i++
+ continue
+ }
+ text := collapseWS(tok.raw)
+ if text == "" || text == " " {
+ i++
+ continue
+ }
+ if atLineStart {
+ text = strings.TrimLeft(text, " ")
+ if text == "" {
+ i++
+ continue
+ }
+ writeIndentPrefix(&buf, level, indent)
+ atLineStart = false
+ }
+ buf.WriteString(text)
+
+ case tokenCDATA, tokenComment:
+ if inPre > 0 {
+ buf.WriteString(tok.raw)
+ i++
+ continue
+ }
+ if atLineStart {
+ writeIndentPrefix(&buf, level, indent)
+ atLineStart = false
+ }
+ buf.WriteString(tok.raw)
+ }
+
+ i++
+ }
+
+ result := buf.String()
+ // Post-process: clean up lines and wrap long ones
+ lines := strings.Split(result, "\n")
+ var final []string
+ for _, line := range lines {
+ line = strings.TrimRight(line, " \t")
+ if runeWidth(line) > defaultMaxLineWidth {
+ final = append(final, wrapLine(line, defaultMaxLineWidth)...)
+ } else {
+ final = append(final, line)
+ }
+ }
+ return strings.TrimSpace(strings.Join(final, "\n")) + "\n"
+}
+
+// tryInlineBlock checks if the block starting at tokens[0] (an open tag) has
+// only inline/text children and a matching close tag, and the total is short
+// enough to fit on one line. Returns the inlined string and number of tokens consumed.
+func tryInlineBlock(tokens []token, tagName string) (string, int) {
+ if len(tokens) < 2 {
+ return "", 0
+ }
+ // Scan forward to find matching close tag
+ depth := 0
+ var inner strings.Builder
+ for j, tok := range tokens {
+ if j == 0 {
+ inner.WriteString(tok.raw)
+ depth = 1
+ continue
+ }
+ switch tok.kind {
+ case tokenOpen:
+ tn := tok.tagName()
+ if blockTags[tn] && !inlineableBlocks[tn] {
+ // Contains a non-inlineable block child — can't inline
+ return "", 0
+ }
+ if tn == tagName {
+ depth++
+ }
+ inner.WriteString(tok.raw)
+ case tokenClose:
+ tn := tok.tagName()
+ if tn == tagName {
+ depth--
+ if depth == 0 {
+ inner.WriteString(tok.raw)
+ result := inner.String()
+ if runeWidth(result) <= defaultMaxLineWidth {
+ return result, j + 1
+ }
+ return "", 0
+ }
+ }
+ inner.WriteString(tok.raw)
+ case tokenText:
+ text := collapseWS(tok.raw)
+ if text == "" {
+ continue
+ }
+ // Trim leading space only for the first text token after open tag
+ if j == 1 {
+ text = strings.TrimLeft(text, " ")
+ }
+ inner.WriteString(text)
+ case tokenCDATA:
+ // CDATA in an inlineable block — don't inline if multiline
+ if strings.Contains(tok.raw, "\n") {
+ return "", 0
+ }
+ inner.WriteString(tok.raw)
+ default:
+ inner.WriteString(tok.raw)
+ }
+ }
+ return "", 0
+}
+
+// wrapLine splits a long line at word boundaries, preserving leading indentation.
+// It is XML-aware: it won't break inside tags (< ... >).
+func wrapLine(line string, maxWidth int) []string {
+ // Extract leading indentation
+ trimmed := strings.TrimLeft(line, " \t")
+ indentStr := line[:len(line)-len(trimmed)]
+ contIndent := indentStr + " " // continuation lines get extra indent
+
+ // Split into segments: tags (unsplittable) and text (splittable at spaces)
+ segments := splitSegments(trimmed)
+
+ var lines []string
+ var cur strings.Builder
+ cur.WriteString(indentStr)
+ curWidth := runeWidth(indentStr)
+
+ for _, seg := range segments {
+ segW := runeWidth(seg)
+
+ if seg == "" {
+ continue
+ }
+
+ // Tags and non-space text: never break inside
+ if strings.HasPrefix(seg, "<") {
+ // If adding this tag exceeds limit and we have content, wrap
+ if curWidth+segW > maxWidth && curWidth > runeWidth(indentStr) {
+ lines = append(lines, strings.TrimRight(cur.String(), " "))
+ cur.Reset()
+ cur.WriteString(contIndent)
+ curWidth = runeWidth(contIndent)
+ }
+ cur.WriteString(seg)
+ curWidth += segW
+ continue
+ }
+
+ // Text segment: split at word boundaries
+ words := strings.Fields(seg)
+ // Preserve leading space if original had one
+ needSpace := len(seg) > 0 && seg[0] == ' '
+
+ for _, word := range words {
+ wordW := runeWidth(word)
+ spaceW := 0
+ if needSpace {
+ spaceW = 1
+ }
+
+ if curWidth+spaceW+wordW > maxWidth && curWidth > runeWidth(contIndent) {
+ lines = append(lines, strings.TrimRight(cur.String(), " "))
+ cur.Reset()
+ cur.WriteString(contIndent)
+ curWidth = runeWidth(contIndent)
+ needSpace = false
+ }
+
+ if needSpace {
+ cur.WriteByte(' ')
+ curWidth++
+ }
+ cur.WriteString(word)
+ curWidth += wordW
+ needSpace = true
+ }
+ }
+
+ if cur.Len() > 0 {
+ final := strings.TrimRight(cur.String(), " ")
+ if final != "" {
+ lines = append(lines, final)
+ }
+ }
+
+ if len(lines) == 0 {
+ return []string{line}
+ }
+ return lines
+}
+
+// splitSegments breaks text into alternating tag and text segments.
+// E.g. "Hello <strong>world</strong> end" -> ["Hello ", "<strong>", "world", "</strong>", " end"]
+func splitSegments(s string) []string {
+ var segs []string
+ for len(s) > 0 {
+ lt := strings.Index(s, "<")
+ if lt == -1 {
+ segs = append(segs, s)
+ break
+ }
+ if lt > 0 {
+ segs = append(segs, s[:lt])
+ }
+ gt := strings.Index(s[lt:], ">")
+ if gt == -1 {
+ segs = append(segs, s[lt:])
+ break
+ }
+ segs = append(segs, s[lt:lt+gt+1])
+ s = s[lt+gt+1:]
+ }
+ return segs
+}
+
+func runeWidth(s string) int {
+ return utf8.RuneCountInString(s)
+}
+
+func ensureIndentedLine(buf *strings.Builder, level int, indent string, atLineStart *bool) {
+ if !*atLineStart {
+ buf.WriteString("\n")
+ }
+ writeIndentPrefix(buf, level, indent)
+ *atLineStart = false
+}
+
+func writeIndentPrefix(buf *strings.Builder, level int, indent string) {
+ for range level {
+ buf.WriteString(indent)
+ }
+}
+
+func collapseWS(s string) string {
+ var buf strings.Builder
+ inWS := false
+ for _, r := range s {
+ if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
+ if !inWS {
+ buf.WriteByte(' ')
+ inWS = true
+ }
+ } else {
+ buf.WriteRune(r)
+ inWS = false
+ }
+ }
+ return buf.String()
+}
+
+// Token types for the XML tokenizer.
+type tokenKind int
+
+const (
+ tokenOpen tokenKind = iota // <tag ...>
+ tokenClose // </tag>
+ tokenSelfClose // <tag .../>
+ tokenText // plain text
+ tokenCDATA // <![CDATA[...]]>
+ tokenComment // <!-- ... -->
+)
+
+type token struct {
+ kind tokenKind
+ raw string
+}
+
+func (t token) tagName() string {
+ s := t.raw
+ switch t.kind {
+ case tokenOpen, tokenSelfClose:
+ s = s[1:]
+ if strings.HasSuffix(s, "/>") {
+ s = s[:len(s)-2]
+ } else {
+ s = strings.TrimSuffix(s, ">")
+ }
+ if idx := strings.IndexAny(s, " \t\n"); idx > 0 {
+ s = s[:idx]
+ }
+ return strings.ToLower(s)
+ case tokenClose:
+ s = s[2:]
+ s = strings.TrimSuffix(s, ">")
+ return strings.ToLower(strings.TrimSpace(s))
+ }
+ return ""
+}
+
+func tokenize(input string) []token {
+ var tokens []token
+ i := 0
+ for i < len(input) {
+ if input[i] == '<' {
+ if strings.HasPrefix(input[i:], "<![CDATA[") {
+ end := strings.Index(input[i:], "]]>")
+ if end == -1 {
+ tokens = append(tokens, token{tokenCDATA, input[i:]})
+ break
+ }
+ tokens = append(tokens, token{tokenCDATA, input[i : i+end+3]})
+ i += end + 3
+ continue
+ }
+ if strings.HasPrefix(input[i:], "<!--") {
+ end := strings.Index(input[i:], "-->")
+ if end == -1 {
+ tokens = append(tokens, token{tokenComment, input[i:]})
+ break
+ }
+ tokens = append(tokens, token{tokenComment, input[i : i+end+3]})
+ i += end + 3
+ continue
+ }
+ end := strings.Index(input[i:], ">")
+ if end == -1 {
+ tokens = append(tokens, token{tokenText, input[i:]})
+ break
+ }
+ tagStr := input[i : i+end+1]
+ if strings.HasPrefix(tagStr, "</") {
+ tokens = append(tokens, token{tokenClose, tagStr})
+ } else if strings.HasSuffix(tagStr, "/>") {
+ tokens = append(tokens, token{tokenSelfClose, tagStr})
+ } else {
+ tokens = append(tokens, token{tokenOpen, tagStr})
+ }
+ i += end + 1
+ } else {
+ end := strings.Index(input[i:], "<")
+ if end == -1 {
+ tokens = append(tokens, token{tokenText, input[i:]})
+ break
+ }
+ tokens = append(tokens, token{tokenText, input[i : i+end]})
+ i += end
+ }
+ }
+ return tokens
+}
A => format/pretty_test.go +161 -0
@@ 1,161 @@
+package format
+
+import (
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestPrettyXML_Paragraph(t *testing.T) {
+ input := `<p>Hello <strong>world</strong></p>`
+ result := PrettyXML(input, " ")
+ assert.Equal(t, "<p>\n Hello <strong>world</strong>\n</p>\n", result)
+}
+
+func TestPrettyXML_NestedBlocks(t *testing.T) {
+ input := `<ul><li>One</li><li>Two</li></ul>`
+ result := PrettyXML(input, " ")
+ // li should be inlined since content is short
+ assert.Contains(t, result, " <li>One</li>\n")
+ assert.Contains(t, result, " <li>Two</li>\n")
+}
+
+func TestPrettyXML_InlineStaysInline(t *testing.T) {
+ input := `<p>Text with <strong>bold</strong> and <em>italic</em></p>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "Text with <strong>bold</strong> and <em>italic</em>")
+}
+
+func TestPrettyXML_CodeBlockCDATAPreserved(t *testing.T) {
+ input := `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">go</ac:parameter><ac:plain-text-body><![CDATA[func main() {
+ fmt.Println("hello")
+}]]></ac:plain-text-body></ac:structured-macro>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, `<![CDATA[func main() {
+ fmt.Println("hello")
+}]]>`)
+}
+
+func TestPrettyXML_HeadingsInlined(t *testing.T) {
+ input := `<h1>Title</h1><h2>Subtitle</h2>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<h1>Title</h1>\n")
+ assert.Contains(t, result, "<h2>Subtitle</h2>\n")
+}
+
+func TestPrettyXML_HeadingsWithInlineMarkup(t *testing.T) {
+ input := `<h2>Section <strong>Important</strong></h2>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<h2>Section <strong>Important</strong></h2>\n")
+}
+
+func TestPrettyXML_SelfClosingBlock(t *testing.T) {
+ input := `<p>Before</p><hr/><p>After</p>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<hr/>\n")
+}
+
+func TestPrettyXML_Table(t *testing.T) {
+ input := `<table><tbody><tr><th><p>Name</p></th><td><p>Value</p></td></tr></tbody></table>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<table>\n")
+ assert.Contains(t, result, " <tbody>\n")
+ assert.Contains(t, result, " <tr>\n")
+}
+
+func TestPrettyXML_Layout(t *testing.T) {
+ input := `<ac:layout><ac:layout-section ac:type="single"><ac:layout-cell><p>Content</p></ac:layout-cell></ac:layout-section></ac:layout>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<ac:layout>\n")
+ assert.Contains(t, result, " <ac:layout-section ac:type=\"single\">\n")
+ assert.Contains(t, result, " <ac:layout-cell>\n")
+}
+
+func TestPrettyXML_TaskList(t *testing.T) {
+ input := `<ac:task-list><ac:task><ac:task-id>1</ac:task-id><ac:task-status>complete</ac:task-status><ac:task-body>Done</ac:task-body></ac:task></ac:task-list>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<ac:task-list>\n")
+ assert.Contains(t, result, " <ac:task>\n")
+ // task-id and task-status should be inlined
+ assert.Contains(t, result, "<ac:task-id>1</ac:task-id>")
+ assert.Contains(t, result, "<ac:task-status>complete</ac:task-status>")
+}
+
+func TestPrettyXML_CommentsPreserved(t *testing.T) {
+ input := `<!-- MD_CONTENT_START --><p>Content</p><!-- MD_CONTENT_END -->`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, "<!-- MD_CONTENT_START -->")
+ assert.Contains(t, result, "<!-- MD_CONTENT_END -->")
+}
+
+func TestPrettyXML_InlineElements(t *testing.T) {
+ input := `<p>Link: <a href="https://example.com">click</a> and <code>code</code></p>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, `Link: <a href="https://example.com">click</a> and <code>code</code>`)
+}
+
+func TestPrettyXML_EmptyInput(t *testing.T) {
+ result := PrettyXML("", " ")
+ assert.Equal(t, "\n", result)
+}
+
+func TestPrettyXML_UserAndAttachmentInline(t *testing.T) {
+ input := `<p>By <ac:link><ri:user ri:userkey="abc123"/></ac:link> see <ac:image><ri:attachment ri:filename="img.png"/></ac:image></p>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, `<ac:link><ri:user ri:userkey="abc123"/></ac:link>`)
+}
+
+func TestPrettyXML_CustomIndent(t *testing.T) {
+ input := `<ul><li>Item</li></ul>`
+ result := PrettyXML(input, "\t")
+ assert.Contains(t, result, "\t<li>Item</li>")
+}
+
+func TestPrettyXML_LiInlinedShort(t *testing.T) {
+ input := `<ul><li>Short item</li></ul>`
+ result := PrettyXML(input, " ")
+ assert.Contains(t, result, " <li>Short item</li>\n")
+}
+
+func TestPrettyXML_LiExpandedLong(t *testing.T) {
+ longText := strings.Repeat("слово ", 30) // ~180 chars in Cyrillic
+ input := `<ul><li>` + longText + `</li></ul>`
+ result := PrettyXML(input, " ")
+ // Should NOT be inlined — too long
+ assert.Contains(t, result, " <li>\n")
+}
+
+func TestPrettyXML_LongLineWrapped(t *testing.T) {
+ longText := strings.Repeat("word ", 30) // 150 chars
+ input := `<p>` + longText + `</p>`
+ result := PrettyXML(input, " ")
+ for _, line := range strings.Split(result, "\n") {
+ w := utf8.RuneCountInString(line)
+ if w > 125 { // allow small overshoot for tags
+ t.Errorf("line too long (%d runes): %s", w, line)
+ }
+ }
+}
+
+func TestPrettyXML_LongLineUTF8(t *testing.T) {
+ // Russian text: each Cyrillic char is 1 rune but 2 bytes
+ longText := strings.Repeat("Привет мир ", 15) // ~165 runes
+ input := `<p>` + longText + `</p>`
+ result := PrettyXML(input, " ")
+ for _, line := range strings.Split(result, "\n") {
+ w := utf8.RuneCountInString(line)
+ if w > 125 {
+ t.Errorf("line too long (%d runes): %s", w, line[:80])
+ }
+ }
+}
+
+func TestPrettyXML_LongLinePreservesTagIntegrity(t *testing.T) {
+ input := `<p>Text <strong>bold text here</strong> more text and <a href="https://example.com/very/long/path">some link</a> even more text to make line long enough to wrap around the boundary limit</p>`
+ result := PrettyXML(input, " ")
+ // Tags should not be split across lines
+ assert.NotContains(t, result, "<strong\n")
+ assert.NotContains(t, result, "</strong\n")
+}
A => go.mod +18 -0
@@ 1,18 @@
+module sourcecraft.dev/bigbes/markdown-to-confluence-xml
+
+go 1.26.1
+
+require (
+ github.com/spf13/cobra v1.10.2
+ github.com/stretchr/testify v1.11.1
+ github.com/yuin/goldmark v1.8.2
+ golang.org/x/net v0.52.0
+)
+
+require (
+ github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/inconshreveable/mousetrap v1.1.0 // indirect
+ github.com/pmezard/go-difflib v1.0.0 // indirect
+ github.com/spf13/pflag v1.0.9 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
+)
A => go.sum +23 -0
@@ 1,23 @@
+github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
+github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
+github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
+github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE=
+github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
+golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
A => main.go +7 -0
@@ 1,7 @@
+package main
+
+import "sourcecraft.dev/bigbes/markdown-to-confluence-xml/cmd"
+
+func main() {
+ cmd.Execute()
+}
A => template/embed.go +31 -0
@@ 1,31 @@
+package template
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Embed inserts content between marker comments in a template.
+// The markers should be XML comments like <!-- MD_CONTENT_START --> and <!-- MD_CONTENT_END -->.
+func Embed(templateXML, content, markerStart, markerEnd string) (string, error) {
+ startIdx := strings.Index(templateXML, markerStart)
+ if startIdx == -1 {
+ return "", fmt.Errorf("start marker %q not found in template", markerStart)
+ }
+ endIdx := strings.Index(templateXML, markerEnd)
+ if endIdx == -1 {
+ return "", fmt.Errorf("end marker %q not found in template", markerEnd)
+ }
+ if endIdx < startIdx {
+ return "", fmt.Errorf("end marker appears before start marker")
+ }
+
+ var buf strings.Builder
+ buf.WriteString(templateXML[:startIdx+len(markerStart)])
+ buf.WriteString("\n")
+ buf.WriteString(content)
+ buf.WriteString("\n")
+ buf.WriteString(templateXML[endIdx:])
+
+ return buf.String(), nil
+}
A => template/embed_test.go +64 -0
@@ 1,64 @@
+package template
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestEmbed_Basic(t *testing.T) {
+ tmpl := `<root>
+<!-- MD_CONTENT_START -->
+<p>Old content</p>
+<!-- MD_CONTENT_END -->
+</root>`
+
+ result, err := Embed(tmpl, "<h1>New</h1>", DefaultMarkerStart, DefaultMarkerEnd)
+ require.NoError(t, err)
+ assert.Contains(t, result, "<h1>New</h1>")
+ assert.NotContains(t, result, "Old content")
+ assert.Contains(t, result, DefaultMarkerStart)
+ assert.Contains(t, result, DefaultMarkerEnd)
+}
+
+func TestEmbed_CustomMarkers(t *testing.T) {
+ tmpl := `<root><!-- BEGIN --><p>old</p><!-- END --></root>`
+ result, err := Embed(tmpl, "<h1>New</h1>", "<!-- BEGIN -->", "<!-- END -->")
+ require.NoError(t, err)
+ assert.Contains(t, result, "<h1>New</h1>")
+}
+
+func TestEmbed_MissingStartMarker(t *testing.T) {
+ tmpl := `<root><!-- MD_CONTENT_END --></root>`
+ _, err := Embed(tmpl, "content", DefaultMarkerStart, DefaultMarkerEnd)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "start marker")
+}
+
+func TestEmbed_MissingEndMarker(t *testing.T) {
+ tmpl := `<root><!-- MD_CONTENT_START --></root>`
+ _, err := Embed(tmpl, "content", DefaultMarkerStart, DefaultMarkerEnd)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "end marker")
+}
+
+func TestEmbed_EndBeforeStart(t *testing.T) {
+ tmpl := `<!-- MD_CONTENT_END --> <!-- MD_CONTENT_START -->`
+ _, err := Embed(tmpl, "content", DefaultMarkerStart, DefaultMarkerEnd)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "before start")
+}
+
+func TestEmbed_PreserveSurroundingContent(t *testing.T) {
+ tmpl := `<header>title</header>
+<!-- MD_CONTENT_START -->
+old
+<!-- MD_CONTENT_END -->
+<footer>end</footer>`
+
+ result, err := Embed(tmpl, "<p>new</p>", DefaultMarkerStart, DefaultMarkerEnd)
+ require.NoError(t, err)
+ assert.Contains(t, result, "<header>title</header>")
+ assert.Contains(t, result, "<footer>end</footer>")
+}
A => +24 -0
@@ 1,24 @@
package template
import (
"fmt"
"strings"
)
// Extract returns the content between marker comments in a Confluence XML document.
func Extract(xmlDoc, markerStart, markerEnd string) (string, error) {
startIdx := strings.Index(xmlDoc, markerStart)
if startIdx == -1 {
return "", fmt.Errorf("start marker %q not found in document", markerStart)
}
endIdx := strings.Index(xmlDoc, markerEnd)
if endIdx == -1 {
return "", fmt.Errorf("end marker %q not found in document", markerEnd)
}
if endIdx < startIdx {
return "", fmt.Errorf("end marker appears before start marker")
}
content := xmlDoc[startIdx+len(markerStart) : endIdx]
return strings.TrimSpace(content), nil
}
A => +49 -0
@@ 1,49 @@
package template
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtract_Basic(t *testing.T) {
doc := `<root>
<!-- MD_CONTENT_START -->
<h1>Title</h1>
<p>Content here</p>
<!-- MD_CONTENT_END -->
</root>`
result, err := Extract(doc, DefaultMarkerStart, DefaultMarkerEnd)
require.NoError(t, err)
assert.Equal(t, "<h1>Title</h1>\n<p>Content here</p>", result)
}
func TestExtract_CustomMarkers(t *testing.T) {
doc := `<!-- BEGIN -->Hello<!-- END -->`
result, err := Extract(doc, "<!-- BEGIN -->", "<!-- END -->")
require.NoError(t, err)
assert.Equal(t, "Hello", result)
}
func TestExtract_MissingStartMarker(t *testing.T) {
doc := `<root><!-- MD_CONTENT_END --></root>`
_, err := Extract(doc, DefaultMarkerStart, DefaultMarkerEnd)
require.Error(t, err)
assert.Contains(t, err.Error(), "start marker")
}
func TestExtract_MissingEndMarker(t *testing.T) {
doc := `<root><!-- MD_CONTENT_START --></root>`
_, err := Extract(doc, DefaultMarkerStart, DefaultMarkerEnd)
require.Error(t, err)
assert.Contains(t, err.Error(), "end marker")
}
func TestExtract_EmptyContent(t *testing.T) {
doc := `<!-- MD_CONTENT_START --><!-- MD_CONTENT_END -->`
result, err := Extract(doc, DefaultMarkerStart, DefaultMarkerEnd)
require.NoError(t, err)
assert.Empty(t, result)
}
A => template/markers.go +6 -0
@@ 1,6 @@
+package template
+
+const (
+ DefaultMarkerStart = "<!-- MD_CONTENT_START -->"
+ DefaultMarkerEnd = "<!-- MD_CONTENT_END -->"
+)
A => testdata/sample.md +48 -0
@@ 1,48 @@
+# Introduction
+
+This is a **bold** and *italic* text with `inline code` and ~~strikethrough~~.
+
+## Code Example
+
+```go
+func main() {
+ fmt.Println("Hello, World!")
+}
+```
+
+## Lists
+
+- Item one
+- Item two
+ - Nested item
+- Item three
+
+1. First
+2. Second
+3. Third
+
+## Table
+
+| Name | Age | Role |
+|-------|-----|----------|
+| Alice | 30 | Engineer |
+| Bob | 25 | Designer |
+
+## Links and Images
+
+Visit [Confluence](https://confluence.atlassian.com) for more info.
+
+
+
+## Blockquote
+
+> This is an important note that should be displayed as an info panel in Confluence.
+
+---
+
+## Task List
+
+- [x] Completed task
+- [ ] Pending task
+
+That's all!