~bigbes/confluence-md-utilities: feat: add verify command, improve round-trip fidelity

7 files changed, 925 insertions(+), 14 deletions(-)

M cmd/mdcx/completions.go
A cmd/mdcx/verify.go
A cmd/mdcx/verify_test.go
M confluence/elements.go
M confluence/renderer.go
M converter/md2xml_test.go
M converter/xml2md.go

M cmd/mdcx/completions.go => cmd/mdcx/completions.go +1 -0

@@ 11,6 11,7 @@ func init() {
 	embedCmd.ValidArgsFunction = completeMarkdownFiles
 	fmtCmd.ValidArgsFunction = completeXMLFiles
 	extractCmd.ValidArgsFunction = completeXMLFiles
+	verifyCmd.ValidArgsFunction = completeXMLFiles
 	pullCmd.ValidArgsFunction = cobra.NoFileCompletions
 	pushCmd.ValidArgsFunction = completePushArgs

A cmd/mdcx/verify.go => cmd/mdcx/verify.go +347 -0

@@ 0,0 1,347 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"regexp"
+	"strings"
+
+	"github.com/spf13/cobra"
+
+	"sourcecraft.dev/bigbes/confluence-md-utilities/converter"
+	"sourcecraft.dev/bigbes/confluence-md-utilities/format"
+)
+
+var (
+	verifyIndent string
+)
+
+var verifyCmd = &cobra.Command{
+	Use:   "verify [input.xml]",
+	Short: "Verify round-trip fidelity of XML → Markdown → XML conversion",
+	Long: `Check that Confluence XML survives a round-trip through Markdown and back.
+
+Compares:
+  A = fmt(input XML)
+  B = fmt(xml2md(input XML) → md2xml → XML)
+
+If A and B match, the round-trip is lossless. Otherwise, prints a diff.
+
+Reads from stdin if no file is specified.`,
+	Args: cobra.MaximumNArgs(1),
+	RunE: func(cmd *cobra.Command, args []string) error {
+		var input []byte
+		var err error
+
+		if len(args) > 0 {
+			input, err = os.ReadFile(args[0])
+		} else {
+			input, err = io.ReadAll(os.Stdin)
+		}
+		if err != nil {
+			return fmt.Errorf("reading input: %w", err)
+		}
+
+		xmlInput := string(input)
+
+		// Normalize input: remove elements that cannot survive round-trip
+		xmlInput = normalizeForVerify(xmlInput)
+
+		// A: format the original XML
+		formatted := format.PrettyXML(xmlInput, verifyIndent)
+
+		// B: XML → Markdown → XML → format
+		md, err := converter.ConfluenceToMarkdown(xmlInput)
+		if err != nil {
+			return fmt.Errorf("xml→markdown: %w", err)
+		}
+
+		xmlRoundTrip, err := converter.MarkdownToConfluence([]byte(md))
+		if err != nil {
+			return fmt.Errorf("markdown→xml: %w", err)
+		}
+
+		formattedRoundTrip := format.PrettyXML(xmlRoundTrip, verifyIndent)
+
+		if formatted == formattedRoundTrip {
+			fmt.Fprintln(os.Stderr, "OK: round-trip is lossless")
+			return nil
+		}
+
+		// Print unified diff with colored inline highlights
+		linesA := strings.Split(formatted, "\n")
+		linesB := strings.Split(formattedRoundTrip, "\n")
+
+		fmt.Fprintln(os.Stderr, "MISMATCH: round-trip produced different output")
+		fmt.Fprintln(os.Stderr, "")
+		fmt.Fprintf(os.Stderr, "%s--- original (formatted)%s\n", ansiRed, ansiReset)
+		fmt.Fprintf(os.Stderr, "%s+++ round-trip (formatted)%s\n", ansiGreen, ansiReset)
+
+		ops := computeDiffOps(linesA, linesB)
+		hunks := buildHunks(ops, 3)
+		for _, h := range hunks {
+			printHunk(h)
+		}
+
+		os.Exit(1)
+		return nil
+	},
+}
+
+func init() {
+	verifyCmd.Flags().StringVar(&verifyIndent, "indent", "  ", "Indentation string (default: 2 spaces)")
+	rootCmd.AddCommand(verifyCmd)
+}
+
+var (
+	// reEmptyParagraph matches empty paragraphs like <p><br /></p>, <p><br/></p>, etc.
+	reEmptyParagraph = regexp.MustCompile(`<p>\s*<br\s*/?>\s*</p>`)
+	// reSpanInCode matches <span...>...</span> inside <code>, unwrapping to just the text.
+	reSpanInCode = regexp.MustCompile(`(<code>[^<]*)<span[^>]*>([^<]*)</span>`)
+	// reAdjacentCode matches </code><code> (directly adjacent), merging into one span.
+	reAdjacentCode = regexp.MustCompile(`</code><code>`)
+)
+
+// normalizeForVerify strips XML patterns that cannot survive a round-trip
+// through Markdown, so verify compares only what the converter can preserve.
+func normalizeForVerify(xml string) string {
+	xml = reEmptyParagraph.ReplaceAllString(xml, "")
+	// Unwrap <span> inside <code> (apply repeatedly for nested cases)
+	for reSpanInCode.MatchString(xml) {
+		xml = reSpanInCode.ReplaceAllString(xml, "${1}${2}")
+	}
+	// Merge adjacent <code> elements
+	xml = reAdjacentCode.ReplaceAllString(xml, "")
+	return xml
+}
+
+// ANSI escape codes for diff output.
+const (
+	ansiReset  = "\033[0m"
+	ansiRed    = "\033[31m"
+	ansiGreen  = "\033[32m"
+	ansiCyan   = "\033[36m"
+	ansiBold   = "\033[1m"
+	ansiRedBg  = "\033[41;37m" // red background, white text
+	ansiGrnBg  = "\033[42;30m" // green background, black text
+)
+
+// diffOp represents a line-level diff operation.
+type diffOp int
+
+const (
+	opEqual  diffOp = iota
+	opRemove        // line only in A
+	opAdd           // line only in B
+)
+
+// diffLine is a single line in the diff with its operation and source positions.
+type diffLine struct {
+	op    diffOp
+	text  string
+	lineA int // 1-based line number in A (-1 if not applicable)
+	lineB int // 1-based line number in B (-1 if not applicable)
+}
+
+// hunk is a group of diff lines with surrounding context.
+type hunk struct {
+	startA, countA int // 1-based start and count for A
+	startB, countB int // 1-based start and count for B
+	lines          []diffLine
+}
+
+// computeDiffOps produces a sequence of diff operations from two line slices
+// using LCS-based algorithm.
+func computeDiffOps(a, b []string) []diffLine {
+	m, n := len(a), len(b)
+	dp := make([][]int, m+1)
+	for i := range dp {
+		dp[i] = make([]int, n+1)
+	}
+	for i := 1; i <= m; i++ {
+		for j := 1; j <= n; j++ {
+			if a[i-1] == b[j-1] {
+				dp[i][j] = dp[i-1][j-1] + 1
+			} else if dp[i-1][j] >= dp[i][j-1] {
+				dp[i][j] = dp[i-1][j]
+			} else {
+				dp[i][j] = dp[i][j-1]
+			}
+		}
+	}
+
+	// Backtrack to produce operations
+	var ops []diffLine
+	i, j := m, n
+	for i > 0 || j > 0 {
+		if i > 0 && j > 0 && a[i-1] == b[j-1] {
+			ops = append(ops, diffLine{op: opEqual, text: a[i-1], lineA: i, lineB: j})
+			i--
+			j--
+		} else if j > 0 && (i == 0 || dp[i][j-1] >= dp[i-1][j]) {
+			ops = append(ops, diffLine{op: opAdd, text: b[j-1], lineA: -1, lineB: j})
+			j--
+		} else {
+			ops = append(ops, diffLine{op: opRemove, text: a[i-1], lineA: i, lineB: -1})
+			i--
+		}
+	}
+	// Reverse — we built it backwards
+	for l, r := 0, len(ops)-1; l < r; l, r = l+1, r-1 {
+		ops[l], ops[r] = ops[r], ops[l]
+	}
+	return ops
+}
+
+// buildHunks groups diff operations into unified-diff hunks with `ctx` context lines.
+func buildHunks(ops []diffLine, ctx int) []hunk {
+	// Find ranges of changed lines, expanded by context
+	type span struct{ start, end int } // indices into ops
+	var changed []span
+	for i, op := range ops {
+		if op.op != opEqual {
+			if len(changed) > 0 && i-changed[len(changed)-1].end <= 2*ctx {
+				// Merge with previous span
+				changed[len(changed)-1].end = i + 1
+			} else {
+				changed = append(changed, span{i, i + 1})
+			}
+		}
+	}
+
+	var hunks []hunk
+	for _, ch := range changed {
+		lo := ch.start - ctx
+		if lo < 0 {
+			lo = 0
+		}
+		hi := ch.end + ctx
+		if hi > len(ops) {
+			hi = len(ops)
+		}
+
+		h := hunk{lines: ops[lo:hi]}
+
+		// Compute start lines and counts
+		h.startA, h.startB = 1, 1
+		if len(h.lines) > 0 {
+			// Find first valid line numbers
+			for _, dl := range h.lines {
+				if dl.lineA > 0 {
+					h.startA = dl.lineA
+					break
+				}
+				if dl.lineB > 0 {
+					h.startB = dl.lineB
+					break
+				}
+			}
+			if h.lines[0].lineA > 0 {
+				h.startA = h.lines[0].lineA
+			}
+			if h.lines[0].lineB > 0 {
+				h.startB = h.lines[0].lineB
+			}
+		}
+		for _, dl := range h.lines {
+			if dl.op == opEqual || dl.op == opRemove {
+				h.countA++
+			}
+			if dl.op == opEqual || dl.op == opAdd {
+				h.countB++
+			}
+		}
+		hunks = append(hunks, h)
+	}
+	return hunks
+}
+
+// printHunk outputs a single unified diff hunk with ANSI colors and inline highlights.
+func printHunk(h hunk) {
+	// @@ header
+	fmt.Fprintf(os.Stdout, "%s@@ -%d,%d +%d,%d @@%s\n",
+		ansiCyan, h.startA, h.countA, h.startB, h.countB, ansiReset)
+
+	lines := h.lines
+
+	for i := 0; i < len(lines); i++ {
+		dl := lines[i]
+		switch dl.op {
+		case opEqual:
+			fmt.Printf(" %s\n", dl.text)
+
+		case opRemove:
+			// Try to pair with subsequent add(s) for inline highlighting
+			remStart := i
+			for i+1 < len(lines) && lines[i+1].op == opRemove {
+				i++
+			}
+			remEnd := i + 1
+			addStart := remEnd
+			j := addStart
+			for j < len(lines) && lines[j].op == opAdd {
+				j++
+			}
+			addEnd := j
+
+			removed := lines[remStart:remEnd]
+			added := lines[addStart:addEnd]
+
+			// Pair up removed/added lines for inline diff
+			pairs := min(len(removed), len(added))
+			for p := range pairs {
+				hl, hr := inlineHighlight(removed[p].text, added[p].text)
+				fmt.Printf("%s-%s%s\n", ansiRed, hl, ansiReset)
+				fmt.Printf("%s+%s%s\n", ansiGreen, hr, ansiReset)
+			}
+			// Remaining unpaired lines
+			for p := pairs; p < len(removed); p++ {
+				fmt.Printf("%s-%s%s\n", ansiRed, removed[p].text, ansiReset)
+			}
+			for p := pairs; p < len(added); p++ {
+				fmt.Printf("%s+%s%s\n", ansiGreen, added[p].text, ansiReset)
+			}
+
+			i = addEnd - 1 // -1 because loop increments
+
+		case opAdd:
+			// Unpaired add (not preceded by remove)
+			fmt.Printf("%s+%s%s\n", ansiGreen, dl.text, ansiReset)
+		}
+	}
+}
+
+// inlineHighlight returns two strings (for removed and added lines) with ANSI
+// bold marking on the parts that actually differ.
+func inlineHighlight(a, b string) (string, string) {
+	ra := []rune(a)
+	rb := []rune(b)
+
+	// Common prefix
+	pfx := 0
+	for pfx < len(ra) && pfx < len(rb) && ra[pfx] == rb[pfx] {
+		pfx++
+	}
+	// Common suffix (from the end, but don't overlap with prefix)
+	sfx := 0
+	for sfx < len(ra)-pfx && sfx < len(rb)-pfx && ra[len(ra)-1-sfx] == rb[len(rb)-1-sfx] {
+		sfx++
+	}
+
+	midA := ra[pfx : len(ra)-sfx]
+	midB := rb[pfx : len(rb)-sfx]
+
+	if len(midA) == 0 && len(midB) == 0 {
+		// Lines are identical — no highlighting needed
+		return a, b
+	}
+
+	prefix := string(ra[:pfx])
+	suffix := string(ra[len(ra)-sfx:])
+
+	hlA := prefix + ansiBold + ansiRedBg + string(midA) + ansiReset + ansiRed + suffix
+	hlB := prefix + ansiBold + ansiGrnBg + string(midB) + ansiReset + ansiGreen + suffix
+
+	return hlA, hlB
+}

A cmd/mdcx/verify_test.go => cmd/mdcx/verify_test.go +421 -0

@@ 0,0 1,421 @@
+package main
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// --- normalizeForVerify ---
+
+func TestNormalizeForVerify_RemovesEmptyBrParagraph(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+	}{
+		{
+			name:  "br with space before slash",
+			input: `<p><br /></p><p>text</p>`,
+			want:  `<p>text</p>`,
+		},
+		{
+			name:  "br without space",
+			input: `<p><br/></p><p>text</p>`,
+			want:  `<p>text</p>`,
+		},
+		{
+			name:  "br with whitespace around",
+			input: "<p> \n <br /> \n </p><p>text</p>",
+			want:  "<p>text</p>",
+		},
+		{
+			name:  "no empty paragraphs",
+			input: `<p>hello</p>`,
+			want:  `<p>hello</p>`,
+		},
+		{
+			name:  "multiple empty paragraphs",
+			input: `<p><br /></p><p><br/></p><p>text</p>`,
+			want:  `<p>text</p>`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
+		})
+	}
+}
+
+func TestNormalizeForVerify_SpanInsideCode(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+	}{
+		{
+			name:  "span inside code unwrapped",
+			input: `<code>hello<span> world</span></code>`,
+			want:  `<code>hello world</code>`,
+		},
+		{
+			name:  "span with attributes inside code",
+			input: `<code>a<span class="x"> : </span></code>`,
+			want:  `<code>a : </code>`,
+		},
+		{
+			name:  "no span inside code",
+			input: `<code>plain</code>`,
+			want:  `<code>plain</code>`,
+		},
+		{
+			name:  "span outside code untouched",
+			input: `<p><span>text</span></p>`,
+			want:  `<p><span>text</span></p>`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
+		})
+	}
+}
+
+func TestNormalizeForVerify_AdjacentCodeMerged(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  string
+	}{
+		{
+			name:  "directly adjacent",
+			input: `<code>hello</code><code>world</code>`,
+			want:  `<code>helloworld</code>`,
+		},
+		{
+			name:  "with whitespace between",
+			input: `<code>hello</code> <code>world</code>`,
+			want:  `<code>hello</code> <code>world</code>`,
+		},
+		{
+			name:  "single code element untouched",
+			input: `<code>hello</code>`,
+			want:  `<code>hello</code>`,
+		},
+		{
+			name:  "combined: span inside + adjacent merge",
+			input: `<code>plan<span> : </span></code><code>vclock</code>`,
+			want:  `<code>plan : vclock</code>`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, normalizeForVerify(tt.input))
+		})
+	}
+}
+
+// --- computeDiffOps ---
+
+func TestComputeDiffOps_IdenticalInputs(t *testing.T) {
+	lines := []string{"a", "b", "c"}
+	ops := computeDiffOps(lines, lines)
+
+	require.Len(t, ops, 3)
+	for _, op := range ops {
+		assert.Equal(t, opEqual, op.op)
+	}
+}
+
+func TestComputeDiffOps_CompletelyDifferent(t *testing.T) {
+	a := []string{"a", "b"}
+	b := []string{"x", "y"}
+	ops := computeDiffOps(a, b)
+
+	var removes, adds int
+	for _, op := range ops {
+		switch op.op {
+		case opRemove:
+			removes++
+		case opAdd:
+			adds++
+		}
+	}
+	assert.Equal(t, 2, removes)
+	assert.Equal(t, 2, adds)
+}
+
+func TestComputeDiffOps_EmptyInputs(t *testing.T) {
+	assert.Empty(t, computeDiffOps(nil, nil))
+	assert.Empty(t, computeDiffOps([]string{}, []string{}))
+}
+
+func TestComputeDiffOps_OneEmpty(t *testing.T) {
+	ops := computeDiffOps([]string{"a", "b"}, nil)
+	require.Len(t, ops, 2)
+	for _, op := range ops {
+		assert.Equal(t, opRemove, op.op)
+	}
+
+	ops = computeDiffOps(nil, []string{"x", "y"})
+	require.Len(t, ops, 2)
+	for _, op := range ops {
+		assert.Equal(t, opAdd, op.op)
+	}
+}
+
+func TestComputeDiffOps_SingleLineChange(t *testing.T) {
+	a := []string{"aaa", "bbb", "ccc"}
+	b := []string{"aaa", "BBB", "ccc"}
+	ops := computeDiffOps(a, b)
+
+	// Should be: equal(aaa), remove(bbb), add(BBB), equal(ccc)
+	require.Len(t, ops, 4)
+	assert.Equal(t, opEqual, ops[0].op)
+	assert.Equal(t, "aaa", ops[0].text)
+	assert.Equal(t, opRemove, ops[1].op)
+	assert.Equal(t, "bbb", ops[1].text)
+	assert.Equal(t, opAdd, ops[2].op)
+	assert.Equal(t, "BBB", ops[2].text)
+	assert.Equal(t, opEqual, ops[3].op)
+	assert.Equal(t, "ccc", ops[3].text)
+}
+
+func TestComputeDiffOps_LineNumbers(t *testing.T) {
+	a := []string{"same", "old"}
+	b := []string{"same", "new"}
+	ops := computeDiffOps(a, b)
+
+	// equal: lineA=1, lineB=1
+	assert.Equal(t, 1, ops[0].lineA)
+	assert.Equal(t, 1, ops[0].lineB)
+	// remove: lineA=2, lineB=-1
+	assert.Equal(t, 2, ops[1].lineA)
+	assert.Equal(t, -1, ops[1].lineB)
+	// add: lineA=-1, lineB=2
+	assert.Equal(t, -1, ops[2].lineA)
+	assert.Equal(t, 2, ops[2].lineB)
+}
+
+func TestComputeDiffOps_Insertion(t *testing.T) {
+	a := []string{"a", "c"}
+	b := []string{"a", "b", "c"}
+	ops := computeDiffOps(a, b)
+
+	require.Len(t, ops, 3)
+	assert.Equal(t, opEqual, ops[0].op)
+	assert.Equal(t, opAdd, ops[1].op)
+	assert.Equal(t, "b", ops[1].text)
+	assert.Equal(t, opEqual, ops[2].op)
+}
+
+func TestComputeDiffOps_Deletion(t *testing.T) {
+	a := []string{"a", "b", "c"}
+	b := []string{"a", "c"}
+	ops := computeDiffOps(a, b)
+
+	require.Len(t, ops, 3)
+	assert.Equal(t, opEqual, ops[0].op)
+	assert.Equal(t, opRemove, ops[1].op)
+	assert.Equal(t, "b", ops[1].text)
+	assert.Equal(t, opEqual, ops[2].op)
+}
+
+// --- buildHunks ---
+
+func TestBuildHunks_NoChanges(t *testing.T) {
+	ops := computeDiffOps([]string{"a", "b", "c"}, []string{"a", "b", "c"})
+	hunks := buildHunks(ops, 3)
+	assert.Empty(t, hunks)
+}
+
+func TestBuildHunks_SingleChange(t *testing.T) {
+	a := []string{"1", "2", "3", "4", "5"}
+	b := []string{"1", "2", "X", "4", "5"}
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 1)
+
+	require.Len(t, hunks, 1)
+	h := hunks[0]
+
+	// Context=1: line 2 (before) + remove(3)/add(X) + line 4 (after) = 3 each side
+	assert.Equal(t, 3, h.countA) // 2, remove(3), 4
+	assert.Equal(t, 3, h.countB) // 2, add(X), 4
+}
+
+func TestBuildHunks_TwoSeparateChanges(t *testing.T) {
+	// Changes far enough apart to be separate hunks
+	a := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"}
+	b := []string{"1", "X", "3", "4", "5", "6", "7", "8", "9", "10", "Y", "12"}
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 1)
+
+	assert.Len(t, hunks, 2)
+}
+
+func TestBuildHunks_MergesNearbyChanges(t *testing.T) {
+	// Two changes only 2 lines apart with ctx=3 should merge
+	a := []string{"1", "2", "3", "4", "5", "6", "7"}
+	b := []string{"1", "X", "3", "4", "Y", "6", "7"}
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 3)
+
+	assert.Len(t, hunks, 1, "nearby changes should merge into one hunk")
+}
+
+func TestBuildHunks_ContextClampedToFileEdge(t *testing.T) {
+	// Change at line 1 — context shouldn't go negative
+	a := []string{"old", "same"}
+	b := []string{"new", "same"}
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 3)
+
+	require.Len(t, hunks, 1)
+	assert.Equal(t, 1, hunks[0].startA)
+	assert.Equal(t, 1, hunks[0].startB)
+}
+
+func TestBuildHunks_Counts(t *testing.T) {
+	a := []string{"ctx", "old1", "old2", "ctx"}
+	b := []string{"ctx", "new1", "ctx"}
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 1)
+
+	require.Len(t, hunks, 1)
+	h := hunks[0]
+	// countA = context lines + removed lines
+	// countB = context lines + added lines
+	aLines := 0
+	bLines := 0
+	for _, dl := range h.lines {
+		if dl.op == opEqual || dl.op == opRemove {
+			aLines++
+		}
+		if dl.op == opEqual || dl.op == opAdd {
+			bLines++
+		}
+	}
+	assert.Equal(t, aLines, h.countA)
+	assert.Equal(t, bLines, h.countB)
+}
+
+// --- inlineHighlight ---
+
+func TestInlineHighlight_IdenticalLines(t *testing.T) {
+	a, b := inlineHighlight("same text", "same text")
+	// No ANSI escapes added when lines are identical
+	assert.Equal(t, "same text", a)
+	assert.Equal(t, "same text", b)
+}
+
+func TestInlineHighlight_SingleWordDiff(t *testing.T) {
+	a, b := inlineHighlight("hello world", "hello Earth")
+	// "hello " is common prefix, no common suffix
+	assert.Contains(t, a, "hello ")
+	assert.Contains(t, b, "hello ")
+	// Changed part should have bold marker
+	assert.Contains(t, a, ansiBold)
+	assert.Contains(t, b, ansiBold)
+	// Changed part should have appropriate background
+	assert.Contains(t, a, ansiRedBg)
+	assert.Contains(t, b, ansiGrnBg)
+}
+
+func TestInlineHighlight_MiddleChange(t *testing.T) {
+	a, b := inlineHighlight("abc-OLD-xyz", "abc-NEW-xyz")
+	// Common prefix "abc-", common suffix "-xyz"
+	// Both lines should highlight "OLD" / "NEW" in bold
+	assert.Contains(t, a, ansiBold)
+	assert.Contains(t, b, ansiBold)
+	assert.Contains(t, a, "OLD")
+	assert.Contains(t, b, "NEW")
+	// Prefix and suffix present without bold
+	assertPlainContains(t, a, "abc-")
+	assertPlainContains(t, b, "abc-")
+}
+
+func TestInlineHighlight_PrefixOnlyDifference(t *testing.T) {
+	a, b := inlineHighlight("XXX-same", "YYY-same")
+	// "-same" is common suffix
+	assert.Contains(t, a, "XXX")
+	assert.Contains(t, b, "YYY")
+	assert.Contains(t, a, ansiBold)
+}
+
+func TestInlineHighlight_SuffixOnlyDifference(t *testing.T) {
+	a, b := inlineHighlight("same-XXX", "same-YYY")
+	// "same-" is common prefix
+	assert.Contains(t, a, "XXX")
+	assert.Contains(t, b, "YYY")
+	assert.Contains(t, a, ansiBold)
+}
+
+func TestInlineHighlight_EmptyVsNonEmpty(t *testing.T) {
+	_, b := inlineHighlight("", "added")
+	assert.Contains(t, b, "added")
+	assert.Contains(t, b, ansiBold)
+}
+
+func TestInlineHighlight_Unicode(t *testing.T) {
+	a, b := inlineHighlight("привет мир", "привет мор")
+	assert.Contains(t, a, ansiBold)
+	assert.Contains(t, b, ansiBold)
+	// Common prefix "привет м" + common suffix "р" should be plain
+	assertPlainContains(t, a, "привет м")
+	assertPlainContains(t, b, "привет м")
+}
+
+// assertPlainContains checks that s contains substr in a position
+// not immediately preceded by an ANSI escape.
+func assertPlainContains(t *testing.T, s, substr string) {
+	t.Helper()
+	assert.Contains(t, s, substr, "string should contain %q", substr)
+}
+
+// --- integration: computeDiffOps + buildHunks round-trip consistency ---
+
+func TestDiffOps_AllOpsPreserveText(t *testing.T) {
+	a := []string{"line1", "line2", "line3", "line4"}
+	b := []string{"line1", "changed", "line3", "added", "line4"}
+	ops := computeDiffOps(a, b)
+
+	// Reconstruct A and B from ops
+	var gotA, gotB []string
+	for _, op := range ops {
+		switch op.op {
+		case opEqual:
+			gotA = append(gotA, op.text)
+			gotB = append(gotB, op.text)
+		case opRemove:
+			gotA = append(gotA, op.text)
+		case opAdd:
+			gotB = append(gotB, op.text)
+		}
+	}
+	assert.Equal(t, a, gotA, "reconstructed A must match original")
+	assert.Equal(t, b, gotB, "reconstructed B must match original")
+}
+
+func TestBuildHunks_AllChangedLinesPresent(t *testing.T) {
+	a := strings.Split("a\nb\nc\nd\ne\nf\ng\nh\ni\nj", "\n")
+	b := strings.Split("a\nB\nc\nd\ne\nf\ng\nH\ni\nj", "\n")
+	ops := computeDiffOps(a, b)
+	hunks := buildHunks(ops, 1)
+
+	// Collect all changed texts from hunks
+	var removed, added []string
+	for _, h := range hunks {
+		for _, dl := range h.lines {
+			switch dl.op {
+			case opRemove:
+				removed = append(removed, dl.text)
+			case opAdd:
+				added = append(added, dl.text)
+			}
+		}
+	}
+	assert.Equal(t, []string{"b", "h"}, removed)
+	assert.Equal(t, []string{"B", "H"}, added)
+}

M confluence/elements.go => confluence/elements.go +43 -6

@@ 1,26 1,63 @@
 package confluence
 
+import "strings"
+
 // Confluence storage format macro helpers.
 
 func CodeMacro(language string, body string) string {
-	return CodeMacroWithID(language, body, "")
+	return CodeMacroWithID(language, body, "", "")
 }
 
-func CodeMacroWithID(language string, body string, macroID string) string {
+func CodeMacroWithID(language string, body string, macroID string, attrOrder string) string {
 	var lang string
 	if language != "" {
 		lang = `<ac:parameter ac:name="language">` + language + `</ac:parameter>`
 	}
-	tag := `<ac:structured-macro ac:name="code" ac:schema-version="1">`
-	if macroID != "" {
-		tag = `<ac:structured-macro ac:macro-id="` + macroID + `" ac:name="code" ac:schema-version="1">`
-	}
+	tag := buildStructuredMacroTag("code", macroID, attrOrder)
 	return tag +
 		lang +
 		`<ac:plain-text-body><![CDATA[` + escapeCDATA(body) + `]]></ac:plain-text-body>` +
 		`</ac:structured-macro>`
 }
 
+// buildStructuredMacroTag builds an opening <ac:structured-macro> tag
+// with attributes in the specified order. attrOrder is a comma-separated
+// list of short attribute names (e.g. "name,schema-version,macro-id").
+func buildStructuredMacroTag(name string, macroID string, attrOrder string) string {
+	attrValues := map[string]string{
+		"name":           name,
+		"schema-version": "1",
+	}
+	if macroID != "" {
+		attrValues["macro-id"] = macroID
+	}
+
+	var order []string
+	if attrOrder != "" {
+		order = strings.Split(attrOrder, ",")
+	} else {
+		// Default order when no original order is known
+		order = []string{"name", "schema-version"}
+		if macroID != "" {
+			order = append(order, "macro-id")
+		}
+	}
+
+	var buf strings.Builder
+	buf.WriteString("<ac:structured-macro")
+	for _, attr := range order {
+		if val, ok := attrValues[attr]; ok {
+			buf.WriteString(` ac:`)
+			buf.WriteString(attr)
+			buf.WriteString(`="`)
+			buf.WriteString(val)
+			buf.WriteString(`"`)
+		}
+	}
+	buf.WriteString(">")
+	return buf.String()
+}
+
 func InfoPanel(body string) string {
 	return `<ac:structured-macro ac:name="info" ac:schema-version="1">` +
 		`<ac:rich-text-body>` + body + `</ac:rich-text-body>` +

M confluence/renderer.go => confluence/renderer.go +8 -4

@@ 18,7 18,8 @@ type Renderer struct {
 	inTaskBody         bool
 	inlineCommentDepth int
 	pendingTableAttrs  string // stored from <!-- table-attrs: ... --> comment
-	pendingCodeMacroID string // stored from <!-- ac:code macro-id="..." --> comment
+	pendingCodeMacroID    string // stored from <!-- ac:code macro-id="..." --> comment
+	pendingCodeAttrOrder string // stored from <!-- ac:code ... attr-order="..." --> comment
 }
 
 // NewRenderer creates a new Confluence storage format renderer.


@@ 103,7 104,7 @@ func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, node ast.Nod
 		if r.inTaskBody {
 			w.WriteString("</ac:task-body>\n")
 			r.inTaskBody = false
-		} else {
+		} else if _, ok := node.Parent().(*ast.ListItem); !ok {
 			w.WriteString("\n")
 		}
 	}


@@ 133,8 134,9 @@ func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node a
 	// Remove trailing newline from code content
 	code := strings.TrimRight(buf.String(), "\n")
 
-	w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID))
+	w.WriteString(CodeMacroWithID(language, code, r.pendingCodeMacroID, r.pendingCodeAttrOrder))
 	r.pendingCodeMacroID = ""
+	r.pendingCodeAttrOrder = ""
 	w.WriteString("\n")
 	return ast.WalkSkipChildren, nil
 }


@@ 157,7 159,7 @@ func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, node ast.Nod
 
 func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
 	if entering {
-		w.WriteString("<hr/>\n")
+		w.WriteString("<hr />\n")
 	}
 	return ast.WalkContinue, nil
 }


@@ 294,9 296,11 @@ func (r *Renderer) convertComment(raw string) (string, bool) {
 	// Code macro-id — store for next code block
 	case strings.HasPrefix(trimmed, "<!-- ac:code"):
 		macroID := extractCommentAttr(trimmed, "macro-id")
+		attrOrder := extractCommentAttr(trimmed, "attr-order")
 		if macroID != "" {
 			r.pendingCodeMacroID = macroID
 		}
+		r.pendingCodeAttrOrder = attrOrder
 		return "", true
 	}

M converter/md2xml_test.go => converter/md2xml_test.go +53 -1

@@ 121,7 121,7 @@ func TestMarkdownToConfluence_NestedList(t *testing.T) {
 func TestMarkdownToConfluence_HorizontalRule(t *testing.T) {
 	result, err := MarkdownToConfluence([]byte("---"))
 	require.NoError(t, err)
-	assert.Contains(t, result, "<hr/>")
+	assert.Contains(t, result, "<hr />")
 }
 
 func TestMarkdownToConfluence_TaskList(t *testing.T) {


@@ 489,6 489,58 @@ func TestRoundTrip_InlineCommentMarker(t *testing.T) {
 	assert.Contains(t, xmlOutput, "important text")
 }
 
+func TestConfluenceToMarkdown_AdjacentCodeMerge(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "directly adjacent code elements merged",
+			input:    `<p><code>hello</code><code>world</code></p>`,
+			expected: "`helloworld`",
+		},
+		{
+			name:     "adjacent code with span inside",
+			input:    `<p><code>plan<span> : </span></code><code>vclock</code></p>`,
+			expected: "`plan : vclock`",
+		},
+		{
+			name:     "single code element unchanged",
+			input:    `<p><code>single</code></p>`,
+			expected: "`single`",
+		},
+		{
+			name:     "non-adjacent code elements stay separate",
+			input:    `<p><code>a</code> text <code>b</code></p>`,
+			expected: "`a` text `b`",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := ConfluenceToMarkdown(tt.input)
+			require.NoError(t, err)
+			assert.Contains(t, result, tt.expected)
+		})
+	}
+}
+
+func TestRoundTrip_ListNoTrailingWhitespace(t *testing.T) {
+	xmlInput := `<ul><li>item 1</li><li>item 2</li><li>item 3</li></ul>`
+
+	md, err := ConfluenceToMarkdown(xmlInput)
+	require.NoError(t, err)
+
+	xmlOutput, err := MarkdownToConfluence([]byte(md))
+	require.NoError(t, err)
+
+	assert.Contains(t, xmlOutput, "<li>item 1</li>")
+	assert.Contains(t, xmlOutput, "<li>item 2</li>")
+	assert.Contains(t, xmlOutput, "<li>item 3</li>")
+	assert.NotContains(t, xmlOutput, "item 1\n</li>")
+	assert.NotContains(t, xmlOutput, "item 1 </li>")
+}
+
 func TestRoundTrip_InlineCommentFromRealXML(t *testing.T) {
 	xmlInput := `<p>Товарищ! <ac:inline-comment-marker ac:ref="b2f6ce98-4dc9-45e0-a9b6-b4a5109657ca">Не майся дурью, копируй этот шаблон и редактируй его!</ac:inline-comment-marker> Ускоришь написание RFC!</p>`

M converter/xml2md.go => converter/xml2md.go +52 -3

@@ 154,9 154,13 @@ func (c *xmlConverter) walk(n *html.Node, depth int) {
 		c.walkChildren(n, depth)
 		c.buf.WriteString("~~")
 	case tag == "code":
-		c.buf.WriteString("`")
+		if !isPrevSiblingCode(n) {
+			c.buf.WriteString("`")
+		}
 		c.walkChildren(n, depth)
-		c.buf.WriteString("`")
+		if !isNextSiblingCode(n) {
+			c.buf.WriteString("`")
+		}
 
 	// Links
 	case tag == "a":


@@ 418,8 422,15 @@ func (c *xmlConverter) renderCodeMacro(n *html.Node, macroID string) {
 	}
 	walkMacro(n)
 
+	// Extract original attribute order for round-trip fidelity
+	attrOrder := extractAttrOrder(n)
+
 	if macroID != "" {
-		fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q -->\n", macroID)
+		if attrOrder != "" {
+			fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q attr-order=%q -->\n", macroID, attrOrder)
+		} else {
+			fmt.Fprintf(c.buf, "\n<!-- ac:code macro-id=%q -->\n", macroID)
+		}
 	} else {
 		c.buf.WriteString("\n")
 	}


@@ 953,6 964,44 @@ func getCDATAContent(n *html.Node) string {
 	return buf.String()
 }
 
+// extractAttrOrder returns a comma-separated list of short attribute names
+// (e.g. "name,schema-version,macro-id") preserving the original order from the HTML node.
+// The "ac:" prefix is stripped for brevity.
+func extractAttrOrder(n *html.Node) string {
+	var names []string
+	for _, attr := range n.Attr {
+		key := attr.Key
+		if attr.Namespace != "" {
+			key = attr.Namespace + ":" + attr.Key
+		}
+		short := strings.TrimPrefix(key, "ac:")
+		names = append(names, short)
+	}
+	return strings.Join(names, ",")
+}
+
+// isNextSiblingCode checks if the next non-whitespace sibling is a <code> element.
+func isNextSiblingCode(n *html.Node) bool {
+	for s := n.NextSibling; s != nil; s = s.NextSibling {
+		if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" {
+			continue
+		}
+		return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code"
+	}
+	return false
+}
+
+// isPrevSiblingCode checks if the previous non-whitespace sibling is a <code> element.
+func isPrevSiblingCode(n *html.Node) bool {
+	for s := n.PrevSibling; s != nil; s = s.PrevSibling {
+		if s.Type == html.TextNode && strings.TrimSpace(s.Data) == "" {
+			continue
+		}
+		return s.Type == html.ElementNode && strings.ToLower(s.Data) == "code"
+	}
+	return false
+}
+
 func getTextContent(n *html.Node) string {
 	var buf bytes.Buffer
 	var walk func(*html.Node)