package format
import (
"strings"
"unicode/utf8"
)
const defaultMaxLineWidth = 120
// Block elements get their own line and increase indentation for children.
var blockTags = map[string]bool{
// Layout
"ac:layout": true,
"ac:layout-section": true,
"ac:layout-cell": true,
// Block content
"p": true,
"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
"div": true,
// Lists
"ul": true, "ol": true, "li": true,
// Tables
"table": true, "thead": true, "tbody": true, "colgroup": true,
"tr": true, "th": true, "td": true,
// Macros
"ac:structured-macro": true,
"ac:rich-text-body": true,
"ac:plain-text-body": true,
// Task lists
"ac:task-list": true,
"ac:task": true,
"ac:task-body": true,
}
// inlineableBlocks: block tags that prefer to stay on one line if short enough.
var inlineableBlocks = map[string]bool{
"li": true, "th": true, "td": true,
"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
"ac:task-id": true, "ac:task-status": true,
}
// Pre elements: content inside is not reformatted.
var preTags = map[string]bool{
"ac:plain-text-body": true,
}
// PrettyXML formats Confluence storage XML with sensible indentation.
func PrettyXML(input string, indent string) string {
tokens := tokenize(input)
var buf strings.Builder
level := 0
inPre := 0
atLineStart := true
i := 0
for i < len(tokens) {
tok := tokens[i]
switch tok.kind {
case tokenOpen:
tagName := tok.tagName()
if inPre > 0 {
buf.WriteString(tok.raw)
if preTags[tagName] {
inPre++
}
i++
continue
}
if preTags[tagName] {
inPre++
ensureIndentedLine(&buf, level, indent, &atLineStart)
buf.WriteString(tok.raw)
i++
continue
}
if blockTags[tagName] {
// Try to inline short blocks like
text, Title
if inlineableBlocks[tagName] {
if inlined, skip := tryInlineBlock(tokens[i:], tagName); skip > 0 {
ensureIndentedLine(&buf, level, indent, &atLineStart)
buf.WriteString(inlined)
buf.WriteString("\n")
atLineStart = true
i += skip
continue
}
}
ensureIndentedLine(&buf, level, indent, &atLineStart)
buf.WriteString(tok.raw)
buf.WriteString("\n")
level++
atLineStart = true
} else {
if atLineStart {
writeIndentPrefix(&buf, level, indent)
atLineStart = false
}
buf.WriteString(tok.raw)
}
case tokenClose:
tagName := tok.tagName()
if inPre > 0 {
buf.WriteString(tok.raw)
if preTags[tagName] {
inPre--
}
i++
continue
}
if blockTags[tagName] {
level--
if level < 0 {
level = 0
}
if !atLineStart {
buf.WriteString("\n")
}
writeIndentPrefix(&buf, level, indent)
buf.WriteString(tok.raw)
buf.WriteString("\n")
atLineStart = true
} else {
buf.WriteString(tok.raw)
}
case tokenSelfClose:
tagName := tok.tagName()
if inPre > 0 {
buf.WriteString(tok.raw)
i++
continue
}
if blockTags[tagName] || tagName == "hr" || tagName == "col" {
ensureIndentedLine(&buf, level, indent, &atLineStart)
buf.WriteString(tok.raw)
buf.WriteString("\n")
atLineStart = true
} else {
if atLineStart {
writeIndentPrefix(&buf, level, indent)
atLineStart = false
}
buf.WriteString(tok.raw)
}
case tokenText:
if inPre > 0 {
buf.WriteString(tok.raw)
i++
continue
}
text := collapseWS(tok.raw)
if text == "" || text == " " {
i++
continue
}
if atLineStart {
text = strings.TrimLeft(text, " ")
if text == "" {
i++
continue
}
writeIndentPrefix(&buf, level, indent)
atLineStart = false
}
buf.WriteString(text)
case tokenCDATA, tokenComment:
if inPre > 0 {
buf.WriteString(tok.raw)
i++
continue
}
if atLineStart {
writeIndentPrefix(&buf, level, indent)
atLineStart = false
}
buf.WriteString(tok.raw)
}
i++
}
result := buf.String()
// Post-process: clean up lines and wrap long ones
lines := strings.Split(result, "\n")
var final []string
for _, line := range lines {
line = strings.TrimRight(line, " \t")
if runeWidth(line) > defaultMaxLineWidth {
final = append(final, wrapLine(line, defaultMaxLineWidth)...)
} else {
final = append(final, line)
}
}
return strings.TrimSpace(strings.Join(final, "\n")) + "\n"
}
// tryInlineBlock checks if the block starting at tokens[0] (an open tag) has
// only inline/text children and a matching close tag, and the total is short
// enough to fit on one line. Returns the inlined string and number of tokens consumed.
func tryInlineBlock(tokens []token, tagName string) (string, int) {
if len(tokens) < 2 {
return "", 0
}
// Scan forward to find matching close tag
depth := 0
var inner strings.Builder
for j, tok := range tokens {
if j == 0 {
inner.WriteString(tok.raw)
depth = 1
continue
}
switch tok.kind {
case tokenOpen:
tn := tok.tagName()
if blockTags[tn] && !inlineableBlocks[tn] {
// Contains a non-inlineable block child — can't inline
return "", 0
}
if tn == tagName {
depth++
}
inner.WriteString(tok.raw)
case tokenClose:
tn := tok.tagName()
if tn == tagName {
depth--
if depth == 0 {
inner.WriteString(tok.raw)
result := inner.String()
if runeWidth(result) <= defaultMaxLineWidth {
return result, j + 1
}
return "", 0
}
}
inner.WriteString(tok.raw)
case tokenText:
text := collapseWS(tok.raw)
if text == "" {
continue
}
// Trim leading space only for the first text token after open tag
if j == 1 {
text = strings.TrimLeft(text, " ")
}
inner.WriteString(text)
case tokenCDATA:
// CDATA in an inlineable block — don't inline if multiline
if strings.Contains(tok.raw, "\n") {
return "", 0
}
inner.WriteString(tok.raw)
default:
inner.WriteString(tok.raw)
}
}
return "", 0
}
// wrapLine splits a long line at word boundaries, preserving leading indentation.
// It is XML-aware: it won't break inside tags (< ... >).
func wrapLine(line string, maxWidth int) []string {
// Extract leading indentation
trimmed := strings.TrimLeft(line, " \t")
indentStr := line[:len(line)-len(trimmed)]
contIndent := indentStr + " " // continuation lines get extra indent
// Split into segments: tags (unsplittable) and text (splittable at spaces)
segments := splitSegments(trimmed)
var lines []string
var cur strings.Builder
cur.WriteString(indentStr)
curWidth := runeWidth(indentStr)
for _, seg := range segments {
segW := runeWidth(seg)
if seg == "" {
continue
}
// Tags and non-space text: never break inside
if strings.HasPrefix(seg, "<") {
// If adding this tag exceeds limit and we have content, wrap
if curWidth+segW > maxWidth && curWidth > runeWidth(indentStr) {
lines = append(lines, strings.TrimRight(cur.String(), " "))
cur.Reset()
cur.WriteString(contIndent)
curWidth = runeWidth(contIndent)
}
cur.WriteString(seg)
curWidth += segW
continue
}
// Text segment: split at word boundaries
words := strings.Fields(seg)
// Preserve leading space if original had one
needSpace := len(seg) > 0 && seg[0] == ' '
for _, word := range words {
wordW := runeWidth(word)
spaceW := 0
if needSpace {
spaceW = 1
}
if curWidth+spaceW+wordW > maxWidth && curWidth > runeWidth(contIndent) {
lines = append(lines, strings.TrimRight(cur.String(), " "))
cur.Reset()
cur.WriteString(contIndent)
curWidth = runeWidth(contIndent)
needSpace = false
}
if needSpace {
cur.WriteByte(' ')
curWidth++
}
cur.WriteString(word)
curWidth += wordW
needSpace = true
}
}
if cur.Len() > 0 {
final := strings.TrimRight(cur.String(), " ")
if final != "" {
lines = append(lines, final)
}
}
if len(lines) == 0 {
return []string{line}
}
return lines
}
// splitSegments breaks text into alternating tag and text segments.
// E.g. "Hello world end" -> ["Hello ", "", "world", "", " end"]
func splitSegments(s string) []string {
var segs []string
for len(s) > 0 {
lt := strings.Index(s, "<")
if lt == -1 {
segs = append(segs, s)
break
}
if lt > 0 {
segs = append(segs, s[:lt])
}
gt := strings.Index(s[lt:], ">")
if gt == -1 {
segs = append(segs, s[lt:])
break
}
segs = append(segs, s[lt:lt+gt+1])
s = s[lt+gt+1:]
}
return segs
}
func runeWidth(s string) int {
return utf8.RuneCountInString(s)
}
func ensureIndentedLine(buf *strings.Builder, level int, indent string, atLineStart *bool) {
if !*atLineStart {
buf.WriteString("\n")
}
writeIndentPrefix(buf, level, indent)
*atLineStart = false
}
func writeIndentPrefix(buf *strings.Builder, level int, indent string) {
for range level {
buf.WriteString(indent)
}
}
func collapseWS(s string) string {
var buf strings.Builder
inWS := false
for _, r := range s {
if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
if !inWS {
buf.WriteByte(' ')
inWS = true
}
} else {
buf.WriteRune(r)
inWS = false
}
}
return buf.String()
}
// Token types for the XML tokenizer.
type tokenKind int
const (
tokenOpen tokenKind = iota //
tokenClose //
tokenSelfClose //
tokenText // plain text
tokenCDATA //
tokenComment //
)
type token struct {
kind tokenKind
raw string
}
func (t token) tagName() string {
s := t.raw
switch t.kind {
case tokenOpen, tokenSelfClose:
s = s[1:]
if strings.HasSuffix(s, "/>") {
s = s[:len(s)-2]
} else {
s = strings.TrimSuffix(s, ">")
}
if idx := strings.IndexAny(s, " \t\n"); idx > 0 {
s = s[:idx]
}
return strings.ToLower(s)
case tokenClose:
s = s[2:]
s = strings.TrimSuffix(s, ">")
return strings.ToLower(strings.TrimSpace(s))
}
return ""
}
func tokenize(input string) []token {
var tokens []token
i := 0
for i < len(input) {
if input[i] == '<' {
if strings.HasPrefix(input[i:], "")
if end == -1 {
tokens = append(tokens, token{tokenCDATA, input[i:]})
break
}
tokens = append(tokens, token{tokenCDATA, input[i : i+end+3]})
i += end + 3
continue
}
if strings.HasPrefix(input[i:], "")
if end == -1 {
tokens = append(tokens, token{tokenComment, input[i:]})
break
}
tokens = append(tokens, token{tokenComment, input[i : i+end+3]})
i += end + 3
continue
}
end := strings.Index(input[i:], ">")
if end == -1 {
tokens = append(tokens, token{tokenText, input[i:]})
break
}
tagStr := input[i : i+end+1]
if strings.HasPrefix(tagStr, "") {
tokens = append(tokens, token{tokenClose, tagStr})
} else if strings.HasSuffix(tagStr, "/>") {
tokens = append(tokens, token{tokenSelfClose, tagStr})
} else {
tokens = append(tokens, token{tokenOpen, tagStr})
}
i += end + 1
} else {
end := strings.Index(input[i:], "<")
if end == -1 {
tokens = append(tokens, token{tokenText, input[i:]})
break
}
tokens = append(tokens, token{tokenText, input[i : i+end]})
i += end
}
}
return tokens
}