~bigbes/confluence-md-utilities

5fabfe018459f627441085dd7f87f1f9e4e97af8 — Eugene Blikh 2 months ago 0d19310
feat: add verify command, improve round-trip fidelity

Add verify command for XML validation. Merge adjacent <code> elements
during XML→MD conversion. Preserve macro attribute order for round-trip
fidelity. Fix list item trailing whitespace and normalize <hr /> format.

false
M cmd/mdcx/completions.go => cmd/mdcx/completions.go +1 -0
@@ 11,6 11,7 @@ func init() {
	embedCmd.ValidArgsFunction = completeMarkdownFiles
	fmtCmd.ValidArgsFunction = completeXMLFiles
	extractCmd.ValidArgsFunction = completeXMLFiles
	verifyCmd.ValidArgsFunction = completeXMLFiles
	pullCmd.ValidArgsFunction = cobra.NoFileCompletions
	pushCmd.ValidArgsFunction = completePushArgs


A cmd/mdcx/verify.go => cmd/mdcx/verify.go +347 -0
@@ 0,0 1,347 @@
package main

import (
	"fmt"
	"io"
	"os"
	"regexp"
	"strings"

	"github.com/spf13/cobra"

	"sourcecraft.dev/bigbes/confluence-md-utilities/converter"
	"sourcecraft.dev/bigbes/confluence-md-utilities/format"
)

var (
	verifyIndent string
)

var verifyCmd = &cobra.Command{
	Use:   "verify [input.xml]",
	Short: "Verify round-trip fidelity of XML → Markdown → XML conversion",
	Long: `Check that Confluence XML survives a round-trip through Markdown and back.

Compares:
  A = fmt(input XML)
  B = fmt(xml2md(input XML) → md2xml → XML)

If A and B match, the round-trip is lossless. Otherwise, prints a diff.

Reads from stdin if no file is specified.`,
	Args: cobra.MaximumNArgs(1),
	RunE: func(cmd *cobra.Command, args []string) error {
		var input []byte
		var err error

		if len(args) > 0 {
			input, err = os.ReadFile(args[0])
		} else {
			input, err = io.ReadAll(os.Stdin)
		}
		if err != nil {
			return fmt.Errorf("reading input: %w", err)
		}

		xmlInput := string(input)

		// Normalize input: remove elements that cannot survive round-trip
		xmlInput = normalizeForVerify(xmlInput)

		// A: format the original XML
		formatted := format.PrettyXML(xmlInput, verifyIndent)

		// B: XML → Markdown → XML → format
		md, err := converter.ConfluenceToMarkdown(xmlInput)
		if err != nil {
			return fmt.Errorf("xml→markdown: %w", err)
		}

		xmlRoundTrip, err := converter.MarkdownToConfluence([]byte(md))
		if err != nil {
			return fmt.Errorf("markdown→xml: %w", err)
		}

		formattedRoundTrip := format.PrettyXML(xmlRoundTrip, verifyIndent)

		if formatted == formattedRoundTrip {
			fmt.Fprintln(os.Stderr, "OK: round-trip is lossless")
			return nil
		}

		// Print unified diff with colored inline highlights
		linesA := strings.Split(formatted, "\n")
		linesB := strings.Split(formattedRoundTrip, "\n")

		fmt.Fprintln(os.Stderr, "MISMATCH: round-trip produced different output")
		fmt.Fprintln(os.Stderr, "")
		fmt.Fprintf(os.Stderr, "%s--- original (formatted)%s\n", ansiRed, ansiReset)
		fmt.Fprintf(os.Stderr, "%s+++ round-trip (formatted)%s\n", ansiGreen, ansiReset)

		ops := computeDiffOps(linesA, linesB)
		hunks := buildHunks(ops, 3)
		for _, h := range hunks {
			printHunk(h)
		}

		os.Exit(1)
		return nil
	},
}

func init() {
	verifyCmd.Flags().StringVar(&verifyIndent, "indent", "  ", "Indentation string (default: 2 spaces)")
	rootCmd.AddCommand(verifyCmd)
}

var (
	// reEmptyParagraph matches empty paragraphs like <p><br /></p>, <p><br/></p>, etc.
	reEmptyParagraph = regexp.MustCompile(`<p>\s*<br\s*/?>\s*</p>`)
	// reSpanInCode matches <span...>...</span> inside <code>, unwrapping to just the text.
	reSpanInCode = regexp.MustCompile(`(<code>[^<]*)<span[^>]*>([^<]*)</span>`)
	// reAdjacentCode matches </code><code> (directly adjacent), merging into one span.
	reAdjacentCode = regexp.MustCompile(`</code><code>`)
)

// normalizeForVerify strips XML patterns that cannot survive a round-trip
// through Markdown, so verify compares only what the converter can preserve.
func normalizeForVerify(xml string) string {
	xml = reEmptyParagraph.ReplaceAllString(xml, "")
	// Unwrap <span> inside <code> (apply repeatedly for nested cases)
	for reSpanInCode.MatchString(xml) {
		xml = reSpanInCode.ReplaceAllString(xml, "${1}${2}")
	}
	// Merge adjacent <code> elements
	xml = reAdjacentCode.ReplaceAllString(xml, "")
	return xml
}

// ANSI escape codes for diff output.
const (
	ansiReset  = "\033[0m"
	ansiRed    = "\033[31m"
	ansiGreen  = "\033[32m"
	ansiCyan   = "\033[36m"
	ansiBold   = "\033[1m"
	ansiRedBg  = "\033[41;37m" // red background, white text
	ansiGrnBg  = "\033[42;30m" // green background, black text
)

// diffOp represents a line-level diff operation.
type diffOp int

const (
	opEqual  diffOp = iota
	opRemove        // line only in A
	opAdd           // line only in B
)

// diffLine is a single line in the diff with its operation and source positions.
type diffLine struct {
	op    diffOp
	text  string
	lineA int // 1-based line number in A (-1 if not applicable)
	lineB int // 1-based line number in B (-1 if not applicable)
}

// hunk is a group of diff lines with surrounding context.
type hunk struct {
	startA, countA int // 1-based start and count for A
	startB, countB int // 1-based start and count for B
	lines          []diffLine
}

// computeDiffOps produces a sequence of diff operations from two line slices
// using LCS-based algorithm.
func computeDiffOps(a, b []string) []diffLine {
	m, n := len(a), len(b)
	dp := make([][]int, m+1)
	for i := range dp {
		dp[i] = make([]int, n+1)
	}
	for i := 1; i <= m; i++ {
		for j := 1; j <= n; j++ {
			if a[i-1] == b[j-1] {
				dp[i][j] = dp[i-1][j-1] + 1
			} else if dp[i-1][j] >= dp[i][j-1] {
				dp[i][j] = dp[i-1][j]
			} else {
				dp[i][j] = dp[i][j-1]
			}
		}
	}

	// Backtrack to produce operations
	var ops []diffLine
	i, j := m, n
	for i > 0 || j > 0 {
		if i > 0 && j > 0 && a[i-1] == b[j-1] {
			ops = append(ops, diffLine{op: opEqual, text: a[i-1], lineA: i, lineB: j})
			i--
			j--
		} else if j > 0 && (i == 0 || dp[i][j-1] >= dp[i-1][j]) {
			ops = append(ops, diffLine{op: opAdd, text: b[j-1], lineA: -1, lineB: j})
			j--
		} else {
			ops = append(ops, diffLine{op: opRemove, text: a[i-1], lineA: i, lineB: -1})
			i--
		}
	}
	// Reverse — we built it backwards
	for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 {
		ops[l], ops[r] = ops[r], ops[l]
	}
	return ops
}

// buildHunks groups diff operations into unified-diff hunks with `ctx` context lines.
func buildHunks(ops []diffLine, ctx int) []hunk {
	// Find ranges of changed lines, expanded by context
	type span struct{ start, end int } // indices into ops
	var changed []span
	for i, op := range ops {
		if op.op != opEqual {
			if len(changed) > 0 && i-changed[len(changed)-1].end <= 2*ctx {
				// Merge with previous span
				changed[len(changed)-1].end = i + 1
			} else {
				changed = append(changed, span{i, i + 1})
			}
		}
	}

	var hunks []hunk
	for _, ch := range changed {
		lo := ch.start - ctx
		if lo < 0 {
			lo = 0
		}
		hi := ch.end + ctx
		if hi > len(ops) {
			hi = len(ops)
		}

		h := hunk{lines: ops[lo:hi]}

		// Compute start lines and counts
		h.startA, h.startB = 1, 1
		if len(h.lines) > 0 {
			// Find first valid line numbers
			for _, dl := range h.lines {
				if dl.lineA > 0 {
					h.startA = dl.lineA
					break
				}
				if dl.lineB > 0 {
					h.startB = dl.lineB
					break
				}
			}
			if h.lines[0].lineA > 0 {
				h.startA = h.lines[0].lineA
			}
			if h.lines[0].lineB > 0 {
				h.startB = h.lines[0].lineB
			}
		}
		for _, dl := range h.lines {
			if dl.op == opEqual || dl.op == opRemove {
				h.countA++
			}
			if dl.op == opEqual || dl.op == opAdd {
				h.countB++
			}
		}
		hunks = append(hunks, h)
	}
	return hunks
}

// printHunk outputs a single unified diff hunk with ANSI colors and inline highlights.
func printHunk(h hunk) {
	// @@ header
	fmt.Fprintf(os.Stdout, "%s@@ -%d,%d +%d,%d @@%s\n",
		ansiCyan, h.startA, h.countA, h.startB, h.countB, ansiReset)

	lines := h.lines

	for i := 0; i < len(lines); i++ {
		dl := lines[i]
		switch dl.op {
		case opEqual:
			fmt.Printf(" %s\n", dl.text)

		case opRemove:
			// Try to pair with subsequent add(s) for inline highlighting
			remStart := i
			for i+1 < len(lines) && lines[i+1].op == opRemove {
				i++
			}
			remEnd := i + 1
			addStart := remEnd
			j := addStart
			for j < len(lines) && lines[j].op == opAdd {
				j++
			}
			addEnd := j

			removed := lines[remStart:remEnd]
			added := lines[addStart:addEnd]

			// Pair up removed/added lines for inline diff
			pairs := min(len(removed), len(added))
			for p := range pairs {
				hl, hr := inlineHighlight(removed[p].text, added[p].text)
				fmt.Printf("%s-%s%s\n", ansiRed, hl, ansiReset)
				fmt.Printf("%s+%s%s\n", ansiGreen, hr, ansiReset)
			}
			// Remaining unpaired lines
			for p := pairs; p < len(removed); p++ {
				fmt.Printf("%s-%s%s\n", ansiRed, removed[p].text, ansiReset)
			}
			for p := pairs; p < len(added); p++ {
				fmt.Printf("%s+%s%s\n", ansiGreen, added[p].text, ansiReset)
			}

			i = addEnd - 1 // -1 because loop increments

		case opAdd:
			// Unpaired add (not preceded by remove)
			fmt.Printf("%s+%s%s\n", ansiGreen, dl.text, ansiReset)
		}
	}
}

// inlineHighlight returns two strings (for removed and added lines) with ANSI
// bold marking on the parts that actually differ.
func inlineHighlight(a, b string) (string, string) {
	ra := []rune(a)
	rb := []rune(b)

	// Common prefix
	pfx := 0
	for pfx < len(ra) && pfx < len(rb) && ra[pfx] == rb[pfx] {
		pfx++
	}
	// Common suffix (from the end, but don't overlap with prefix)
	sfx := 0
	for sfx < len(ra)-pfx && sfx < len(rb)-pfx && ra[len(ra)-1-sfx] == rb[len(rb)-1-sfx] {
		sfx++
	}

	midA := ra[pfx : len(ra)-sfx]
	midB := rb[pfx : len(rb)-sfx]

	if len(midA) == 0 && len(midB) == 0 {
		// Lines are identical — no highlighting needed
		return a, b
	}

	prefix := string(ra[:pfx])
	suffix := string(ra[len(ra)-sfx:])

	hlA := prefix + ansiBold + ansiRedBg + string(midA) + ansiReset + ansiRed + suffix
	hlB := prefix + ansiBold + ansiGrnBg + string(midB) + ansiReset + ansiGreen + suffix

	return hlA, hlB
}

A cmd/mdcx/verify_test.go => cmd/mdcx/verify_test.go +421 -0
@@ 0,0 1,421 @@
package main

import (
	"strings"
	"testing"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// --- normalizeForVerify ---

func TestNormalizeForVerify_RemovesEmptyBrParagraph(t *testing.T) {
	tests := []struct {
		name  string
		input string
		want  string
	}{
		{
			name:  "br with space before slash",
			input: `<p><br /></p><p>text</p>`,
			want:  `<p>text</p>`,
		},
		{
			name:  "br without space",
			input: `<p><br/></p><p>text</p>`,
			want:  `<p>text</p>`,
		},
		{
			name:  "br with whitespace around",
			input: "<p> \n <br /> \n </p><p>text</p>",
			want:  "<p>text</p>",
		},
		{
			name:  "no empty paragraphs",
			input: `<p>hello</p>`,
			want:  `<p>hello</p>`,
		},
		{
			name:  "multiple empty paragraphs",
			input: `<p><br /></p><p><br/></p><p>text</p>`,
			want:  `<p>text</p>`,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
		})
	}
}

func TestNormalizeForVerify_SpanInsideCode(t *testing.T) {
	tests := []struct {
		name  string
		input string
		want  string
	}{
		{
			name:  "span inside code unwrapped",
			input: `<code>hello<span> world</span></code>`,
			want:  `<code>hello world</code>`,
		},
		{
			name:  "span with attributes inside code",
			input: `<code>a<span class="x"> : </span></code>`,
			want:  `<code>a : </code>`,
		},
		{
			name:  "no span inside code",
			input: `<code>plain</code>`,
			want:  `<code>plain</code>`,
		},
		{
			name:  "span outside code untouched",
			input: `<p><span>text</span></p>`,
			want:  `<p><span>text</span></p>`,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
		})
	}
}

func TestNormalizeForVerify_AdjacentCodeMerged(t *testing.T) {
	tests := []struct {
		name  string
		input string
		want  string
	}{
		{
			name:  "directly adjacent",
			input: `<code>hello</code><code>world</code>`,
			want:  `<code>helloworld</code>`,
		},
		{
			name:  "with whitespace between",
			input: `<code>hello</code> <code>world</code>`,
			want:  `<code>hello</code> <code>world</code>`,
		},
		{
			name:  "single code element untouched",
			input: `<code>hello</code>`,
			want:  `<code>hello</code>`,
		},
		{
			name:  "combined: span inside + adjacent merge",
			input: `<code>plan<span> : </span></code><code>vclock</code>`,
			want:  `<code>plan : vclock</code>`,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
		})
	}
}

// --- computeDiffOps ---

func TestComputeDiffOps_IdenticalInputs(t *testing.T) {
	lines := []string{"a", "b", "c"}
	ops := computeDiffOps(lines, lines)

	require.Len(t, ops, 3)
	for _, op := range ops {
		assert.Equal(t, opEqual, op.op)
	}
}

func TestComputeDiffOps_CompletelyDifferent(t *testing.T) {
	a := []string{"a", "b"}
	b := []string{"x", "y"}
	ops := computeDiffOps(a, b)

	var removes, adds int
	for _, op := range ops {
		switch op.op {
		case opRemove:
			removes++
		case opAdd:
			adds++
		}
	}
	assert.Equal(t, 2, removes)
	assert.Equal(t, 2, adds)
}

func TestComputeDiffOps_EmptyInputs(t *testing.T) {
	assert.Empty(t, computeDiffOps(nil, nil))
	assert.Empty(t, computeDiffOps([]string{}, []string{}))
}

func TestComputeDiffOps_OneEmpty(t *testing.T) {
	ops := computeDiffOps([]string{"a", "b"}, nil)
	require.Len(t, ops, 2)
	for _, op := range ops {
		assert.Equal(t, opRemove, op.op)
	}

	ops = computeDiffOps(nil, []string{"x", "y"})
	require.Len(t, ops, 2)
	for _, op := range ops {
		assert.Equal(t, opAdd, op.op)
	}
}

func TestComputeDiffOps_SingleLineChange(t *testing.T) {
	a := []string{"aaa", "bbb", "ccc"}
	b := []string{"aaa", "BBB", "ccc"}
	ops := computeDiffOps(a, b)

	// Should be: equal(aaa), remove(bbb), add(BBB), equal(ccc)
	require.Len(t, ops, 4)
	assert.Equal(t, opEqual, ops[0].op)
	assert.Equal(t, "aaa", ops[0].text)
	assert.Equal(t, opRemove, ops[1].op)
	assert.Equal(t, "bbb", ops[1].text)
	assert.Equal(t, opAdd, ops[2].op)
	assert.Equal(t, "BBB", ops[2].text)
	assert.Equal(t, opEqual, ops[3].op)
	assert.Equal(t, "ccc", ops[3].text)
}

func TestComputeDiffOps_LineNumbers(t *testing.T) {
	a := []string{"same", "old"}
	b := []string{"same", "new"}
	ops := computeDiffOps(a, b)

	// equal: lineA=1, lineB=1
	assert.Equal(t, 1, ops[0].lineA)
	assert.Equal(t, 1, ops[0].lineB)
	// remove: lineA=2, lineB=-1
	assert.Equal(t, 2, ops[1].lineA)
	assert.Equal(t, -1, ops[1].lineB)
	// add: lineA=-1, lineB=2
	assert.Equal(t, -1, ops[2].lineA)
	assert.Equal(t, 2, ops[2].lineB)
}

func TestComputeDiffOps_Insertion(t *testing.T) {
	a := []string{"a", "c"}
	b := []string{"a", "b", "c"}
	ops := computeDiffOps(a, b)

	require.Len(t, ops, 3)
	assert.Equal(t, opEqual, ops[0].op)
	assert.Equal(t, opAdd, ops[1].op)
	assert.Equal(t, "b", ops[1].text)
	assert.Equal(t, opEqual, ops[2].op)
}

func TestComputeDiffOps_Deletion(t *testing.T) {
	a := []string{"a", "b", "c"}
	b := []string{"a", "c"}
	ops := computeDiffOps(a, b)

	require.Len(t, ops, 3)
	assert.Equal(t, opEqual, ops[0].op)
	assert.Equal(t, opRemove, ops[1].op)
	assert.Equal(t, "b", ops[1].text)
	assert.Equal(t, opEqual, ops[2].op)
}

// --- buildHunks ---

func TestBuildHunks_NoChanges(t *testing.T) {
	ops := computeDiffOps([]string{"a", "b", "c"}, []string{"a", "b", "c"})
	hunks := buildHunks(ops, 3)
	assert.Empty(t, hunks)
}

func TestBuildHunks_SingleChange(t *testing.T) {
	a := []string{"1", "2", "3", "4", "5"}
	b := []string{"1", "2", "X", "4", "5"}
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 1)

	require.Len(t, hunks, 1)
	h := hunks[0]

	// Context=1: line 2 (before) + remove(3)/add(X) + line 4 (after) = 3 each side
	assert.Equal(t, 3, h.countA) // 2, remove(3), 4
	assert.Equal(t, 3, h.countB) // 2, add(X), 4
}

func TestBuildHunks_TwoSeparateChanges(t *testing.T) {
	// Changes far enough apart to be separate hunks
	a := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"}
	b := []string{"1", "X", "3", "4", "5", "6", "7", "8", "9", "10", "Y", "12"}
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 1)

	assert.Len(t, hunks, 2)
}

func TestBuildHunks_MergesNearbyChanges(t *testing.T) {
	// Two changes only 2 lines apart with ctx=3 should merge
	a := []string{"1", "2", "3", "4", "5", "6", "7"}
	b := []string{"1", "X", "3", "4", "Y", "6", "7"}
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 3)

	assert.Len(t, hunks, 1, "nearby changes should merge into one hunk")
}

func TestBuildHunks_ContextClampedToFileEdge(t *testing.T) {
	// Change at line 1 — context shouldn't go negative
	a := []string{"old", "same"}
	b := []string{"new", "same"}
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 3)

	require.Len(t, hunks, 1)
	assert.Equal(t, 1, hunks[0].startA)
	assert.Equal(t, 1, hunks[0].startB)
}

func TestBuildHunks_Counts(t *testing.T) {
	a := []string{"ctx", "old1", "old2", "ctx"}
	b := []string{"ctx", "new1", "ctx"}
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 1)

	require.Len(t, hunks, 1)
	h := hunks[0]
	// countA = context lines + removed lines
	// countB = context lines + added lines
	aLines := 0
	bLines := 0
	for _, dl := range h.lines {
		if dl.op == opEqual || dl.op == opRemove {
			aLines++
		}
		if dl.op == opEqual || dl.op == opAdd {
			bLines++
		}
	}
	assert.Equal(t, aLines, h.countA)
	assert.Equal(t, bLines, h.countB)
}

// --- inlineHighlight ---

func TestInlineHighlight_IdenticalLines(t *testing.T) {
	a, b := inlineHighlight("same text", "same text")
	// No ANSI escapes added when lines are identical
	assert.Equal(t, "same text", a)
	assert.Equal(t, "same text", b)
}

func TestInlineHighlight_SingleWordDiff(t *testing.T) {
	a, b := inlineHighlight("hello world", "hello Earth")
	// "hello " is common prefix, no common suffix
	assert.Contains(t, a, "hello ")
	assert.Contains(t, b, "hello ")
	// Changed part should have bold marker
	assert.Contains(t, a, ansiBold)
	assert.Contains(t, b, ansiBold)
	// Changed part should have appropriate background
	assert.Contains(t, a, ansiRedBg)
	assert.Contains(t, b, ansiGrnBg)
}

func TestInlineHighlight_MiddleChange(t *testing.T) {
	a, b := inlineHighlight("abc-OLD-xyz", "abc-NEW-xyz")
	// Common prefix "abc-", common suffix "-xyz"
	// Both lines should highlight "OLD" / "NEW" in bold
	assert.Contains(t, a, ansiBold)
	assert.Contains(t, b, ansiBold)
	assert.Contains(t, a, "OLD")
	assert.Contains(t, b, "NEW")
	// Prefix and suffix present without bold
	assertPlainContains(t, a, "abc-")
	assertPlainContains(t, b, "abc-")
}

func TestInlineHighlight_PrefixOnlyDifference(t *testing.T) {
	a, b := inlineHighlight("XXX-same", "YYY-same")
	// "-same" is common suffix
	assert.Contains(t, a, "XXX")
	assert.Contains(t, b, "YYY")
	assert.Contains(t, a, ansiBold)
}

func TestInlineHighlight_SuffixOnlyDifference(t *testing.T) {
	a, b := inlineHighlight("same-XXX", "same-YYY")
	// "same-" is common prefix
	assert.Contains(t, a, "XXX")
	assert.Contains(t, b, "YYY")
	assert.Contains(t, a, ansiBold)
}

func TestInlineHighlight_EmptyVsNonEmpty(t *testing.T) {
	_, b := inlineHighlight("", "added")
	assert.Contains(t, b, "added")
	assert.Contains(t, b, ansiBold)
}

func TestInlineHighlight_Unicode(t *testing.T) {
	a, b := inlineHighlight("привет мир", "привет мор")
	assert.Contains(t, a, ansiBold)
	assert.Contains(t, b, ansiBold)
	// Common prefix "привет м" + common suffix "р" should be plain
	assertPlainContains(t, a, "привет м")
	assertPlainContains(t, b, "привет м")
}

// assertPlainContains checks that s contains substr in a position
// not immediately preceded by an ANSI escape.
func assertPlainContains(t *testing.T, s, substr string) {
	t.Helper()
	assert.Contains(t, s, substr, "string should contain %q", substr)
}

// --- integration: computeDiffOps + buildHunks round-trip consistency ---

func TestDiffOps_AllOpsPreserveText(t *testing.T) {
	a := []string{"line1", "line2", "line3", "line4"}
	b := []string{"line1", "changed", "line3", "added", "line4"}
	ops := computeDiffOps(a, b)

	// Reconstruct A and B from ops
	var gotA, gotB []string
	for _, op := range ops {
		switch op.op {
		case opEqual:
			gotA = append(gotA, op.text)
			gotB = append(gotB, op.text)
		case opRemove:
			gotA = append(gotA, op.text)
		case opAdd:
			gotB = append(gotB, op.text)
		}
	}
	assert.Equal(t, a, gotA, "reconstructed A must match original")
	assert.Equal(t, b, gotB, "reconstructed B must match original")
}

func TestBuildHunks_AllChangedLinesPresent(t *testing.T) {
	a := strings.Split("a\nb\nc\nd\ne\nf\ng\nh\ni\nj", "\n")
	b := strings.Split("a\nB\nc\nd\ne\nf\ng\nH\ni\nj", "\n")
	ops := computeDiffOps(a, b)
	hunks := buildHunks(ops, 1)

	// Collect all changed texts from hunks
	var removed, added []string
	for _, h := range hunks {
		for _, dl := range h.lines {
			switch dl.op {
			case opRemove:
				removed = append(removed, dl.text)
			case opAdd:
				added = append(added, dl.text)
			}
		}
	}
	assert.Equal(t, []string{"b", "h"}, removed)
	assert.Equal(t, []string{"B", "H"}, added)
}

M confluence/elements.go => confluence/elements.go +43 -6
@@ 1,26 1,63 @@
package confluence

import "strings"

// Confluence storage format macro helpers.

func CodeMacro(language string, body string) string {
	return CodeMacroWithID(language, body, "")
	return CodeMacroWithID(language, body, "", "")
}

func CodeMacroWithID(language string, body string, macroID string) string {
func CodeMacroWithID(language string, body string, macroID string, attrOrder string) string {
	var lang string
	if language != "" {
		lang = `<ac:parameter ac:name="language">` + language + `</ac:parameter>`
	}
	tag := `<ac:structured-macro ac:name="code" ac:schema-version="1">`
	if macroID != "" {
		tag = `<ac:structured-macro ac:macro-id="` + macroID + `" ac:name="code" ac:schema-version="1">`
	}
	tag := buildStructuredMacroTag("code", macroID, attrOrder)
	return tag +
		lang +
		`<ac:plain-text-body><![CDATA[` + escapeCDATA(body) + `]]></ac:plain-text-body>` +
		`</ac:structured-macro>`
}

// buildStructuredMacroTag builds an opening <ac:structured-macro> tag
// with attributes in the specified order. attrOrder is a comma-separated
// list of short attribute names (e.g. "name,schema-version,macro-id").
func buildStructuredMacroTag(name string, macroID string, attrOrder string) string {
	attrValues := map[string]string{
		"name":           name,
		"schema-version": "1",
	}
	if macroID != "" {
		attrValues["macro-id"] = macroID
	}

	var order []string
	if attrOrder != "" {
		order = strings.Split(attrOrder, ",")
	} else {
		// Default order when no original order is known
		order = []string{"name", "schema-version"}
		if macroID != "" {
			order = append(order, "macro-id")
		}
	}

	var buf strings.Builder
	buf.WriteString("<ac:structured-macro")
	for _, attr := range order {
		if val, ok := attrValues[attr]; ok {
			buf.WriteString(` ac:`)
			buf.WriteString(attr)
			buf.WriteString(`="`)
			buf.WriteString(val)
			buf.WriteString(`"`)
		}
	}
	buf.WriteString(">")
	return buf.String()
}

func InfoPanel(body string) string {
	return `<ac:structured-macro ac:name="info" ac:schema-version="1">` +
		`<ac:rich-text-body>` + body + `</ac:rich-text-body>` +

M confluence/renderer.go => confluence/renderer.go +8 -4
@@ 18,7 18,8 @@ type Renderer struct {
	inTaskBody         bool
	inlineCommentDepth int
	pendingTableAttrs  string // stored from <!-- table-attrs: ... --> comment
	pendingCodeMacroID string // stored from <!-- ac:code macro-id="..." --> comment
	pendingCodeMacroID    string // stored from <!-- ac:code macro-id="..." --> comment
	pendingCodeAttrOrder string // stored from <!-- ac:code ... attr-order="..." --> comment
}

// NewRenderer creates a new Confluence storage format renderer.


@@ 103,7 104,7 @@ func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, node ast.Nod
		if r.inTaskBody {
			w.WriteString("</ac:task-body>\n")
			r.inTaskBody = false
		} else {
		} else if _, ok := node.Parent().(*ast.ListItem); !ok {
			w.WriteString("\n")
		}
	}


@@ 133,8 134,9 @@ func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node a
	// Remove trailing newline from code content
	code := strings.TrimRight(buf.String(), "\n")

	w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID))
	w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID, r.pendingCodeAttrOrder))
	r.pendingCodeMacroID = ""
	r.pendingCodeAttrOrder = ""
	w.WriteString("\n")
	return ast.WalkSkipChildren, nil
}


@@ 157,7 159,7 @@ func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, node ast.Nod

func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
	if entering {
		w.WriteString("<hr/>\n")
		w.WriteString("<hr />\n")
	}
	return ast.WalkContinue, nil
}


@@ 294,9 296,11 @@ func (r *Renderer) convertComment(raw string) (string, bool) {
	// Code macro-id — store for next code block
	case strings.HasPrefix(trimmed, "<!-- ac:code"):
		macroID := extractCommentAttr(trimmed, "macro-id")
		attrOrder := extractCommentAttr(trimmed, "attr-order")
		if macroID != "" {
			r.pendingCodeMacroID = macroID
		}
		r.pendingCodeAttrOrder = attrOrder
		return "", true
	}


M converter/md2xml_test.go => converter/md2xml_test.go +53 -1
@@ 121,7 121,7 @@ func TestMarkdownToConfluence_NestedList(t *testing.T) {
func TestMarkdownToConfluence_HorizontalRule(t *testing.T) {
	result, err := MarkdownToConfluence([]byte("---"))
	require.NoError(t, err)
	assert.Contains(t, result, "<hr/>")
	assert.Contains(t, result, "<hr />")
}

func TestMarkdownToConfluence_TaskList(t *testing.T) {


@@ 489,6 489,58 @@ func TestRoundTrip_InlineCommentMarker(t *testing.T) {
	assert.Contains(t, xmlOutput, "important text")
}

func TestConfluenceToMarkdown_AdjacentCodeMerge(t *testing.T) {
	tests := []struct {
		name     string
		input    string
		expected string
	}{
		{
			name:     "directly adjacent code elements merged",
			input:    `<p><code>hello</code><code>world</code></p>`,
			expected: "`helloworld`",
		},
		{
			name:     "adjacent code with span inside",
			input:    `<p><code>plan<span> : </span></code><code>vclock</code></p>`,
			expected: "`plan : vclock`",
		},
		{
			name:     "single code element unchanged",
			input:    `<p><code>single</code></p>`,
			expected: "`single`",
		},
		{
			name:     "non-adjacent code elements stay separate",
			input:    `<p><code>a</code> text <code>b</code></p>`,
			expected: "`a` text `b`",
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result, err := ConfluenceToMarkdown(tt.input)
			require.NoError(t, err)
			assert.Contains(t, result, tt.expected)
		})
	}
}

func TestRoundTrip_ListNoTrailingWhitespace(t *testing.T) {
	xmlInput := `<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul>`

	md, err := ConfluenceToMarkdown(xmlInput)
	require.NoError(t, err)

	xmlOutput, err := MarkdownToConfluence([]byte(md))
	require.NoError(t, err)

	assert.Contains(t, xmlOutput, "<li>item 1</li>")
	assert.Contains(t, xmlOutput, "<li>item 2</li>")
	assert.Contains(t, xmlOutput, "<li>item 3</li>")
	assert.NotContains(t, xmlOutput, "item 1\n</li>")
	assert.NotContains(t, xmlOutput, "item 1 </li>")
}

func TestRoundTrip_InlineCommentFromRealXML(t *testing.T) {
	xmlInput := `<p>Товарищ! <ac:inline-comment-marker ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca">Не майся дурью, копируй этот шаблон и редактируй его!</ac:inline-comment-marker> Ускоришь написание RFC!</p>`


M converter/xml2md.go => converter/xml2md.go +52 -3
@@ 154,9 154,13 @@ func (c *xmlConverter) walk(n *html.Node, depth int) {
		c.walkChildren(n, depth)
		c.buf.WriteString("~~")
	case tag == "code":
		c.buf.WriteString("`")
		if !isPrevSiblingCode(n) {
			c.buf.WriteString("`")
		}
		c.walkChildren(n, depth)
		c.buf.WriteString("`")
		if !isNextSiblingCode(n) {
			c.buf.WriteString("`")
		}

	// Links
	case tag == "a":


@@ 418,8 422,15 @@ func (c *xmlConverter) renderCodeMacro(n *html.Node, macroID string) {
	}
	walkMacro(n)

	// Extract original attribute order for round-trip fidelity
	attrOrder := extractAttrOrder(n)

	if macroID != "" {
		fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q -->\n", macroID)
		if attrOrder != "" {
			fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q attr-order=%q -->\n", macroID, attrOrder)
		} else {
			fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q -->\n", macroID)
		}
	} else {
		c.buf.WriteString("\n")
	}


@@ 953,6 964,44 @@ func getCDATAContent(n *html.Node) string {
	return buf.String()
}

// extractAttrOrder returns a comma-separated list of short attribute names
// (e.g. "name,schema-version,macro-id") preserving the original order from the HTML node.
// The "ac:" prefix is stripped for brevity.
func extractAttrOrder(n *html.Node) string {
	var names []string
	for _, attr := range n.Attr {
		key := attr.Key
		if attr.Namespace != "" {
			key = attr.Namespace + ":" + attr.Key
		}
		short := strings.TrimPrefix(key, "ac:")
		names = append(names, short)
	}
	return strings.Join(names, ",")
}

// isNextSiblingCode checks if the next non-whitespace sibling is a <code> element.
func isNextSiblingCode(n *html.Node) bool {
	for s := n.NextSibling; s != nil; s = s.NextSibling {
		if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" {
			continue
		}
		return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code"
	}
	return false
}

// isPrevSiblingCode checks if the previous non-whitespace sibling is a <code> element.
func isPrevSiblingCode(n *html.Node) bool {
	for s := n.PrevSibling; s != nil; s = s.PrevSibling {
		if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" {
			continue
		}
		return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code"
	}
	return false
}

func getTextContent(n *html.Node) string {
	var buf bytes.Buffer
	var walk func(*html.Node)