~bigbes/ci-cacher

ref: 519ce411ce7e77bc46ba4eb12c4619c838b3f059 ci-cacher/internal/hash/hash.go -rw-r--r-- 4.5 KiB
519ce411 — Eugene Blikh Bump VERSION to 0.1.1 for tag 2 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// Package hash derives cache keys from file/directory contents.
//
// For a single file path, Derive returns the same hex digest as
// `sha256sum <path> | cut -c1-<length>` (default length 16). This matches
// the convention used by the existing .builds/lib/ci-lib.sh shell helpers
// it replaces.
package hash

import (
	"crypto/sha256"
	"fmt"
	"hash"
	"io"
	"io/fs"
	"os"
	"path/filepath"
	"sort"
	"strings"
)

// DefaultLength is the hex-character count after truncation. 16 hex chars =
// 64 bits, which matches the existing `cut -c1-16` shell convention.
const DefaultLength = 16

// Derive returns a deterministic hex digest of the contents of the given
// paths, truncated to length characters. The order of paths matters:
// concatenating in flag order is intentional (so callers can express
// dependencies like "Dockerfile + context dir" with stable ordering).
//
// A single regular file produces the same digest as `sha256sum <file>`
// when length is 64 (or its prefix when length<64). A directory is hashed
// recursively by walking entries in sorted relative-path order.
func Derive(paths []string, length int) (string, error) {
	if length <= 0 || length > 64 {
		length = DefaultLength
	}
	if len(paths) == 0 {
		return "", fmt.Errorf("no --hash-from paths given")
	}

	final := sha256.New()
	for _, p := range paths {
		h, err := hashOne(p)
		if err != nil {
			return "", err
		}
		// When there is exactly one file path, return its sha256 directly
		// so the digest matches `sha256sum`. The "wrap into outer sha256"
		// dance is only needed when combining multiple inputs.
		if len(paths) == 1 {
			return h[:length], nil
		}
		final.Write([]byte(h))
		final.Write([]byte{0})
	}
	return hex(final)[:length], nil
}

func hashOne(p string) (string, error) {
	st, err := os.Stat(p)
	if err != nil {
		return "", fmt.Errorf("stat %s: %w", p, err)
	}
	if st.IsDir() {
		return hashDir(p)
	}
	return hashFile(p)
}

func hashFile(p string) (string, error) {
	f, err := os.Open(p)
	if err != nil {
		return "", fmt.Errorf("open %s: %w", p, err)
	}
	defer f.Close()
	h := sha256.New()
	if _, err := io.Copy(h, f); err != nil {
		return "", fmt.Errorf("read %s: %w", p, err)
	}
	return hex(h), nil
}

// hashDir walks p in sorted relative-path order and writes
// "<relpath>\0<file-sha256>\n" for each regular file into a rolling sha256.
// Non-regular entries (symlinks, devices, sockets) are skipped — their
// content is not portable and would make the hash brittle.
func hashDir(root string) (string, error) {
	var entries []string
	err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			return err
		}
		if !d.Type().IsRegular() {
			return nil
		}
		rel, err := filepath.Rel(root, path)
		if err != nil {
			return err
		}
		entries = append(entries, rel)
		return nil
	})
	if err != nil {
		return "", fmt.Errorf("walk %s: %w", root, err)
	}
	sort.Strings(entries)

	h := sha256.New()
	for _, rel := range entries {
		fh, err := hashFile(filepath.Join(root, rel))
		if err != nil {
			return "", err
		}
		// Use forward slashes so the digest is OS-independent.
		h.Write([]byte(filepath.ToSlash(rel)))
		h.Write([]byte{0})
		h.Write([]byte(fh))
		h.Write([]byte{'\n'})
	}
	return hex(h), nil
}

func hex(h hash.Hash) string {
	const hexdigits = "0123456789abcdef"
	sum := h.Sum(nil)
	b := make([]byte, len(sum)*2)
	for i, x := range sum {
		b[i*2] = hexdigits[x>>4]
		b[i*2+1] = hexdigits[x&0x0f]
	}
	return string(b)
}

// ApplyTemplate substitutes {hash} (and {arch} when archSuffix is true)
// in keyTemplate with the derived values. If keyTemplate has no {hash}
// placeholder and paths is non-empty, the hash is appended before the
// final extension. When archSuffix is true, "-<goos>-<goarch>" is appended
// to the final path component, before its extension.
func ApplyTemplate(keyTemplate, derived, goos, goarch string, archSuffix bool) string {
	key := keyTemplate
	if derived != "" {
		if strings.Contains(key, "{hash}") {
			key = strings.ReplaceAll(key, "{hash}", derived)
		} else {
			key = insertBeforeExt(key, "-"+derived)
		}
	}
	if archSuffix {
		key = insertBeforeExt(key, "-"+goos+"-"+goarch)
	}
	return key
}

func insertBeforeExt(key, suffix string) string {
	dir, base := filepath.Split(key)
	ext := filepath.Ext(base)
	// Handle compound extensions like ".tar.zst" / ".tar.gz".
	if ext == ".zst" || ext == ".gz" || ext == ".bz2" || ext == ".xz" {
		if inner := filepath.Ext(strings.TrimSuffix(base, ext)); inner == ".tar" {
			ext = inner + ext
		}
	}
	stem := strings.TrimSuffix(base, ext)
	return dir + stem + suffix + ext
}