package format import ( "strings" "unicode/utf8" ) const defaultMaxLineWidth = 120 // Block elements get their own line and increase indentation for children. var blockTags = map[string]bool{ // Layout "ac:layout": true, "ac:layout-section": true, "ac:layout-cell": true, // Block content "p": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, "div": true, // Lists "ul": true, "ol": true, "li": true, // Tables "table": true, "thead": true, "tbody": true, "colgroup": true, "tr": true, "th": true, "td": true, // Macros "ac:structured-macro": true, "ac:rich-text-body": true, "ac:plain-text-body": true, // Task lists "ac:task-list": true, "ac:task": true, "ac:task-body": true, } // inlineableBlocks: block tags that prefer to stay on one line if short enough. var inlineableBlocks = map[string]bool{ "li": true, "th": true, "td": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, "ac:task-id": true, "ac:task-status": true, } // Pre elements: content inside is not reformatted. var preTags = map[string]bool{ "ac:plain-text-body": true, } // PrettyXML formats Confluence storage XML with sensible indentation. func PrettyXML(input string, indent string) string { tokens := tokenize(input) var buf strings.Builder level := 0 inPre := 0 atLineStart := true i := 0 for i < len(tokens) { tok := tokens[i] switch tok.kind { case tokenOpen: tagName := tok.tagName() if inPre > 0 { buf.WriteString(tok.raw) if preTags[tagName] { inPre++ } i++ continue } if preTags[tagName] { inPre++ ensureIndentedLine(&buf, level, indent, &atLineStart) buf.WriteString(tok.raw) i++ continue } if blockTags[tagName] { // Try to inline short blocks like

text

Title

if inlineableBlocks[tagName] { if inlined, skip := tryInlineBlock(tokens[i:], tagName); skip > 0 { ensureIndentedLine(&buf, level, indent, &atLineStart) buf.WriteString(inlined) buf.WriteString("\n") atLineStart = true i += skip continue } } ensureIndentedLine(&buf, level, indent, &atLineStart) buf.WriteString(tok.raw) buf.WriteString("\n") level++ atLineStart = true } else { if atLineStart { writeIndentPrefix(&buf, level, indent) atLineStart = false } buf.WriteString(tok.raw) } case tokenClose: tagName := tok.tagName() if inPre > 0 { buf.WriteString(tok.raw) if preTags[tagName] { inPre-- } i++ continue } if blockTags[tagName] { level-- if level < 0 { level = 0 } if !atLineStart { buf.WriteString("\n") } writeIndentPrefix(&buf, level, indent) buf.WriteString(tok.raw) buf.WriteString("\n") atLineStart = true } else { buf.WriteString(tok.raw) } case tokenSelfClose: tagName := tok.tagName() if inPre > 0 { buf.WriteString(tok.raw) i++ continue } if blockTags[tagName] || tagName == "hr" || tagName == "col" { ensureIndentedLine(&buf, level, indent, &atLineStart) buf.WriteString(tok.raw) buf.WriteString("\n") atLineStart = true } else { if atLineStart { writeIndentPrefix(&buf, level, indent) atLineStart = false } buf.WriteString(tok.raw) } case tokenText: if inPre > 0 { buf.WriteString(tok.raw) i++ continue } text := collapseWS(tok.raw) if text == "" || text == " " { i++ continue } if atLineStart { text = strings.TrimLeft(text, " ") if text == "" { i++ continue } writeIndentPrefix(&buf, level, indent) atLineStart = false } buf.WriteString(text) case tokenCDATA, tokenComment: if inPre > 0 { buf.WriteString(tok.raw) i++ continue } if atLineStart { writeIndentPrefix(&buf, level, indent) atLineStart = false } buf.WriteString(tok.raw) } i++ } result := buf.String() // Post-process: clean up lines and wrap long ones lines := strings.Split(result, "\n") var final []string for _, line := range lines { line = strings.TrimRight(line, " \t") if runeWidth(line) > defaultMaxLineWidth { final = append(final, wrapLine(line, defaultMaxLineWidth)...) } else { final = append(final, line) } } return strings.TrimSpace(strings.Join(final, "\n")) + "\n" } // tryInlineBlock checks if the block starting at tokens[0] (an open tag) has // only inline/text children and a matching close tag, and the total is short // enough to fit on one line. Returns the inlined string and number of tokens consumed. func tryInlineBlock(tokens []token, tagName string) (string, int) { if len(tokens) < 2 { return "", 0 } // Scan forward to find matching close tag depth := 0 var inner strings.Builder for j, tok := range tokens { if j == 0 { inner.WriteString(tok.raw) depth = 1 continue } switch tok.kind { case tokenOpen: tn := tok.tagName() if blockTags[tn] && !inlineableBlocks[tn] { // Contains a non-inlineable block child — can't inline return "", 0 } if tn == tagName { depth++ } inner.WriteString(tok.raw) case tokenClose: tn := tok.tagName() if tn == tagName { depth-- if depth == 0 { inner.WriteString(tok.raw) result := inner.String() if runeWidth(result) <= defaultMaxLineWidth { return result, j + 1 } return "", 0 } } inner.WriteString(tok.raw) case tokenText: text := collapseWS(tok.raw) if text == "" { continue } // Trim leading space only for the first text token after open tag if j == 1 { text = strings.TrimLeft(text, " ") } inner.WriteString(text) case tokenCDATA: // CDATA in an inlineable block — don't inline if multiline if strings.Contains(tok.raw, "\n") { return "", 0 } inner.WriteString(tok.raw) default: inner.WriteString(tok.raw) } } return "", 0 } // wrapLine splits a long line at word boundaries, preserving leading indentation. // It is XML-aware: it won't break inside tags (< ... >). func wrapLine(line string, maxWidth int) []string { // Extract leading indentation trimmed := strings.TrimLeft(line, " \t") indentStr := line[:len(line)-len(trimmed)] contIndent := indentStr + " " // continuation lines get extra indent // Split into segments: tags (unsplittable) and text (splittable at spaces) segments := splitSegments(trimmed) var lines []string var cur strings.Builder cur.WriteString(indentStr) curWidth := runeWidth(indentStr) for _, seg := range segments { segW := runeWidth(seg) if seg == "" { continue } // Tags and non-space text: never break inside if strings.HasPrefix(seg, "<") { // If adding this tag exceeds limit and we have content, wrap if curWidth+segW > maxWidth && curWidth > runeWidth(indentStr) { lines = append(lines, strings.TrimRight(cur.String(), " ")) cur.Reset() cur.WriteString(contIndent) curWidth = runeWidth(contIndent) } cur.WriteString(seg) curWidth += segW continue } // Text segment: split at word boundaries words := strings.Fields(seg) // Preserve leading space if original had one needSpace := len(seg) > 0 && seg[0] == ' ' for _, word := range words { wordW := runeWidth(word) spaceW := 0 if needSpace { spaceW = 1 } if curWidth+spaceW+wordW > maxWidth && curWidth > runeWidth(contIndent) { lines = append(lines, strings.TrimRight(cur.String(), " ")) cur.Reset() cur.WriteString(contIndent) curWidth = runeWidth(contIndent) needSpace = false } if needSpace { cur.WriteByte(' ') curWidth++ } cur.WriteString(word) curWidth += wordW needSpace = true } } if cur.Len() > 0 { final := strings.TrimRight(cur.String(), " ") if final != "" { lines = append(lines, final) } } if len(lines) == 0 { return []string{line} } return lines } // splitSegments breaks text into alternating tag and text segments. // E.g. "Hello world end" -> ["Hello ", "", "world", "", " end"] func splitSegments(s string) []string { var segs []string for len(s) > 0 { lt := strings.Index(s, "<") if lt == -1 { segs = append(segs, s) break } if lt > 0 { segs = append(segs, s[:lt]) } gt := strings.Index(s[lt:], ">") if gt == -1 { segs = append(segs, s[lt:]) break } segs = append(segs, s[lt:lt+gt+1]) s = s[lt+gt+1:] } return segs } func runeWidth(s string) int { return utf8.RuneCountInString(s) } func ensureIndentedLine(buf *strings.Builder, level int, indent string, atLineStart *bool) { if !*atLineStart { buf.WriteString("\n") } writeIndentPrefix(buf, level, indent) *atLineStart = false } func writeIndentPrefix(buf *strings.Builder, level int, indent string) { for range level { buf.WriteString(indent) } } func collapseWS(s string) string { var buf strings.Builder inWS := false for _, r := range s { if r == ' ' || r == '\t' || r == '\n' || r == '\r' { if !inWS { buf.WriteByte(' ') inWS = true } } else { buf.WriteRune(r) inWS = false } } return buf.String() } // Token types for the XML tokenizer. type tokenKind int const ( tokenOpen tokenKind = iota // tokenClose // tokenSelfClose // tokenText // plain text tokenCDATA // tokenComment // ) type token struct { kind tokenKind raw string } func (t token) tagName() string { s := t.raw switch t.kind { case tokenOpen, tokenSelfClose: s = s[1:] if strings.HasSuffix(s, "/>") { s = s[:len(s)-2] } else { s = strings.TrimSuffix(s, ">") } if idx := strings.IndexAny(s, " \t\n"); idx > 0 { s = s[:idx] } return strings.ToLower(s) case tokenClose: s = s[2:] s = strings.TrimSuffix(s, ">") return strings.ToLower(strings.TrimSpace(s)) } return "" } func tokenize(input string) []token { var tokens []token i := 0 for i < len(input) { if input[i] == '<' { if strings.HasPrefix(input[i:], "") if end == -1 { tokens = append(tokens, token{tokenCDATA, input[i:]}) break } tokens = append(tokens, token{tokenCDATA, input[i : i+end+3]}) i += end + 3 continue } if strings.HasPrefix(input[i:], "") if end == -1 { tokens = append(tokens, token{tokenComment, input[i:]}) break } tokens = append(tokens, token{tokenComment, input[i : i+end+3]}) i += end + 3 continue } end := strings.Index(input[i:], ">") if end == -1 { tokens = append(tokens, token{tokenText, input[i:]}) break } tagStr := input[i : i+end+1] if strings.HasPrefix(tagStr, "") { tokens = append(tokens, token{tokenSelfClose, tagStr}) } else { tokens = append(tokens, token{tokenOpen, tagStr}) } i += end + 1 } else { end := strings.Index(input[i:], "<") if end == -1 { tokens = append(tokens, token{tokenText, input[i:]}) break } tokens = append(tokens, token{tokenText, input[i : i+end]}) i += end } } return tokens }